1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 2013 Oracle and/or its affiliates. All rights reserved.
5 */
6 /*
7 * Copyright (c) 1990, 1993, 1994, 1995, 1996
8 * Keith Bostic. All rights reserved.
9 */
10 /*
11 * Copyright (c) 1990, 1993, 1994, 1995
12 * The Regents of the University of California. All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * $Id$
39 */
40
41 #include "db_config.h"
42
43 #include "db_int.h"
44 #include "dbinc/db_page.h"
45 #include "dbinc/db_swap.h"
46 #include "dbinc/btree.h"
47 #include "dbinc/fop.h"
48 #include "dbinc/hash.h"
49 #include "dbinc/heap.h"
50 #include "dbinc/lock.h"
51 #include "dbinc/mp.h"
52 #include "dbinc/partition.h"
53 #include "dbinc/qam.h"
54 #include "dbinc/txn.h"
55
56 static int __db_disassociate __P((DB *));
57 static int __db_disassociate_foreign __P ((DB *));
58
59 #ifdef CONFIG_TEST
60 static int __db_makecopy __P((ENV *, const char *, const char *));
61 static int __qam_testdocopy __P((DB *, const char *));
62 #endif
63
64 /*
65 * DB.C --
66 * This file contains the utility functions for the DBP layer.
67 */
68
69 /*
70 * __db_master_open --
71 * Open up a handle on a master database.
72 *
73 * PUBLIC: int __db_master_open __P((DB *, DB_THREAD_INFO *,
74 * PUBLIC: DB_TXN *, const char *, u_int32_t, int, DB **));
75 */
76 int
__db_master_open(subdbp,ip,txn,name,flags,mode,dbpp)77 __db_master_open(subdbp, ip, txn, name, flags, mode, dbpp)
78 DB *subdbp;
79 DB_THREAD_INFO *ip;
80 DB_TXN *txn;
81 const char *name;
82 u_int32_t flags;
83 int mode;
84 DB **dbpp;
85 {
86 DB *dbp;
87 int ret;
88
89 *dbpp = NULL;
90
91 /* Open up a handle on the main database. */
92 if ((ret = __db_create_internal(&dbp, subdbp->env, 0)) != 0)
93 return (ret);
94
95 /* Set the creation directory. */
96 dbp->dirname = subdbp->dirname;
97
98 /*
99 * It's always a btree.
100 * Run in the transaction we've created.
101 * Set the pagesize in case we're creating a new database.
102 * Flag that we're creating a database with subdatabases.
103 */
104 dbp->pgsize = subdbp->pgsize;
105 F_SET(dbp, DB_AM_SUBDB);
106 F_SET(dbp, F_ISSET(subdbp,
107 DB_AM_RECOVER | DB_AM_SWAP |
108 DB_AM_ENCRYPT | DB_AM_CHKSUM | DB_AM_NOT_DURABLE));
109
110 /*
111 * If there was a subdb specified, then we only want to apply
112 * DB_EXCL to the subdb, not the actual file. We only got here
113 * because there was a subdb specified.
114 */
115 LF_CLR(DB_EXCL);
116 LF_SET(DB_RDWRMASTER);
117 if ((ret = __db_open(dbp, ip, txn,
118 name, NULL, DB_BTREE, flags, mode, PGNO_BASE_MD)) != 0)
119 goto err;
120
121 /*
122 * The items in dbp are initialized from the master file's meta page.
123 * Other items such as checksum and encryption are checked when we
124 * read the meta-page, so we do not check those here. However, if
125 * the meta-page caused checksumming to be turned on and it wasn't
126 * already, set it here.
127 */
128 if (F_ISSET(dbp, DB_AM_CHKSUM))
129 F_SET(subdbp, DB_AM_CHKSUM);
130
131 /*
132 * The user may have specified a page size for an existing file,
133 * which we want to ignore.
134 */
135 subdbp->pgsize = dbp->pgsize;
136 *dbpp = dbp;
137
138 if (0) {
139 err: if (!F_ISSET(dbp, DB_AM_DISCARD))
140 (void)__db_close(dbp, txn, DB_NOSYNC);
141 }
142
143 return (ret);
144 }
145
146 /*
147 * __db_master_update --
148 * Add/Open/Remove a subdatabase from a master database.
149 *
150 * PUBLIC: int __db_master_update __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *,
151 * PUBLIC: const char *, DBTYPE, mu_action, const char *, u_int32_t));
152 */
153 int
__db_master_update(mdbp,sdbp,ip,txn,subdb,type,action,newname,flags)154 __db_master_update(mdbp, sdbp, ip, txn, subdb, type, action, newname, flags)
155 DB *mdbp, *sdbp;
156 DB_TXN *txn;
157 DB_THREAD_INFO *ip;
158 const char *subdb;
159 DBTYPE type;
160 mu_action action;
161 const char *newname;
162 u_int32_t flags;
163 {
164 DBC *dbc, *ndbc;
165 DBT key, data, ndata;
166 ENV *env;
167 PAGE *p, *r;
168 db_pgno_t t_pgno;
169 int modify, ret, t_ret;
170
171 env = mdbp->env;
172 dbc = ndbc = NULL;
173 p = NULL;
174
175 /*
176 * Open up a cursor. If this is CDB and we're creating the database,
177 * make it an update cursor.
178 *
179 * Might we modify the master database? If so, we'll need to lock.
180 */
181 modify = (!F_ISSET(mdbp, DB_AM_RDONLY) &&
182 (action != MU_OPEN || LF_ISSET(DB_CREATE))) ? 1 : 0;
183
184 if ((ret = __db_cursor(mdbp, ip, txn, &dbc,
185 (CDB_LOCKING(env) && modify) ? DB_WRITECURSOR : 0)) != 0)
186 return (ret);
187
188 /*
189 * Point the cursor at the record.
190 *
191 * If we're removing or potentially creating an entry, lock the page
192 * with DB_RMW.
193 *
194 * We do multiple cursor operations with the cursor in some cases and
195 * subsequently access the data DBT information. Set DB_DBT_MALLOC so
196 * we don't risk modification of the data between our uses of it.
197 *
198 * !!!
199 * We don't include the name's nul termination in the database.
200 */
201 DB_INIT_DBT(key, subdb, strlen(subdb));
202 memset(&data, 0, sizeof(data));
203 F_SET(&data, DB_DBT_MALLOC);
204
205 ret = __dbc_get(dbc, &key, &data,
206 DB_SET | ((STD_LOCKING(dbc) && modify) ? DB_RMW : 0));
207
208 /*
209 * What we do next--whether or not we found a record for the
210 * specified subdatabase--depends on what the specified action is.
211 * Handle ret appropriately as the first statement of each case.
212 */
213 switch (action) {
214 case MU_REMOVE:
215 /*
216 * We should have found something if we're removing it. Note
217 * that in the common case where the DB we're asking to remove
218 * doesn't exist, we won't get this far; __db_subdb_remove
219 * will already have returned an error from __db_open.
220 */
221 if (ret != 0)
222 goto err;
223
224 /*
225 * Delete the subdatabase entry first; if this fails,
226 * we don't want to touch the actual subdb pages.
227 */
228 if ((ret = __dbc_del(dbc, 0)) != 0)
229 goto err;
230
231 /*
232 * We're handling actual data, not on-page meta-data,
233 * so it hasn't been converted to/from opposite
234 * endian architectures. Do it explicitly, now.
235 */
236 memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t));
237 DB_NTOHL_SWAP(env, &sdbp->meta_pgno);
238 if ((ret = __memp_fget(mdbp->mpf, &sdbp->meta_pgno,
239 ip, dbc->txn, DB_MPOOL_DIRTY, &p)) != 0)
240 goto err;
241
242 /* Free the root on the master db if it was created. */
243 if (TYPE(p) == P_BTREEMETA &&
244 ((BTMETA *)p)->root != PGNO_INVALID) {
245 if ((ret = __memp_fget(mdbp->mpf,
246 &((BTMETA *)p)->root, ip, dbc->txn,
247 DB_MPOOL_DIRTY, &r)) != 0)
248 goto err;
249
250 /* Free and put the page. */
251 if ((ret = __db_free(dbc, r, 0)) != 0) {
252 r = NULL;
253 goto err;
254 }
255 }
256 /* Free and put the page. */
257 if ((ret = __db_free(dbc, p, 0)) != 0) {
258 p = NULL;
259 goto err;
260 }
261 p = NULL;
262 break;
263 case MU_RENAME:
264 /* We should have found something if we're renaming it. */
265 if (ret != 0)
266 goto err;
267
268 /*
269 * Before we rename, we need to make sure we're not
270 * overwriting another subdatabase, or else this operation
271 * won't be undoable. Open a second cursor and check
272 * for the existence of newname; it shouldn't appear under
273 * us since we hold the metadata lock.
274 */
275 if ((ret = __db_cursor(mdbp, ip, txn, &ndbc,
276 CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0)
277 goto err;
278 DB_SET_DBT(key, newname, strlen(newname));
279
280 /*
281 * We don't actually care what the meta page of the potentially-
282 * overwritten DB is; we just care about existence.
283 */
284 memset(&ndata, 0, sizeof(ndata));
285 F_SET(&ndata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
286
287 if ((ret = __dbc_get(ndbc, &key, &ndata, DB_SET)) == 0) {
288 /* A subdb called newname exists. Bail. */
289 ret = EEXIST;
290 __db_errx(env, DB_STR_A("0673",
291 "rename: database %s exists", "%s"), newname);
292 goto err;
293 } else if (ret != DB_NOTFOUND)
294 goto err;
295
296 /*
297 * Now do the put first; we don't want to lose our only
298 * reference to the subdb. Use the second cursor so the
299 * first one continues to point to the old record.
300 */
301 if ((ret = __dbc_put(ndbc, &key, &data, DB_KEYFIRST)) != 0)
302 goto err;
303 if ((ret = __dbc_del(dbc, 0)) != 0) {
304 /*
305 * If the delete fails, try to delete the record
306 * we just put, in case we're not txn-protected.
307 */
308 (void)__dbc_del(ndbc, 0);
309 goto err;
310 }
311
312 break;
313 case MU_OPEN:
314 /*
315 * Get the subdatabase information. If it already exists,
316 * copy out the page number and we're done.
317 */
318 switch (ret) {
319 case 0:
320 if (LF_ISSET(DB_CREATE) && LF_ISSET(DB_EXCL)) {
321 ret = EEXIST;
322 goto err;
323 }
324 memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t));
325 DB_NTOHL_SWAP(env, &sdbp->meta_pgno);
326 goto done;
327 case DB_NOTFOUND:
328 if (LF_ISSET(DB_CREATE))
329 break;
330 /*
331 * No db_err, it is reasonable to remove a
332 * nonexistent db.
333 */
334 ret = ENOENT;
335 goto err;
336 default:
337 goto err;
338 }
339
340 /* Create a subdatabase. */
341 if (F_ISSET(mdbp, DB_AM_RDONLY)) {
342 ret = EBADF;
343 goto err;
344 }
345 if ((ret = __db_new(dbc,
346 type == DB_HASH ? P_HASHMETA : P_BTREEMETA, NULL, &p)) != 0)
347 goto err;
348 sdbp->meta_pgno = PGNO(p);
349
350 /*
351 * XXX
352 * We're handling actual data, not on-page meta-data, so it
353 * hasn't been converted to/from opposite endian architectures.
354 * Do it explicitly, now.
355 */
356 t_pgno = PGNO(p);
357 DB_HTONL_SWAP(env, &t_pgno);
358 memset(&ndata, 0, sizeof(ndata));
359 ndata.data = &t_pgno;
360 ndata.size = sizeof(db_pgno_t);
361 if ((ret = __dbc_put(dbc, &key, &ndata, 0)) != 0)
362 goto err;
363 F_SET(sdbp, DB_AM_CREATED);
364 break;
365
366 case MU_MOVE:
367 /* We should have found something if we're moving it. */
368 if (ret != 0)
369 goto err;
370 t_pgno = sdbp->meta_pgno;
371 DB_HTONL_SWAP(env, &t_pgno);
372 memset(&ndata, 0, sizeof(ndata));
373 ndata.data = &t_pgno;
374 ndata.size = sizeof(db_pgno_t);
375 if ((ret = __dbc_put(dbc, &key, &ndata, 0)) != 0)
376 goto err;
377 mdbp->mpf->mfp->revision++;
378 }
379
380 err:
381 done: /*
382 * If we allocated a page: if we're successful, mark the page dirty
383 * and return it to the cache, otherwise, discard/free it.
384 */
385 if (p != NULL && (t_ret = __memp_fput(mdbp->mpf,
386 dbc->thread_info, p, dbc->priority)) != 0 && ret == 0)
387 ret = t_ret;
388
389 /* Discard the cursor(s) and data. */
390 if (data.data != NULL)
391 __os_ufree(env, data.data);
392 if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
393 ret = t_ret;
394 if (ndbc != NULL && (t_ret = __dbc_close(ndbc)) != 0 && ret == 0)
395 ret = t_ret;
396
397 return (ret);
398 }
399
400 /*
401 * __env_dbreg_setup --
402 *
403 * PUBLIC: int __env_dbreg_setup __P((DB *,
404 * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t));
405 */
406 int
__env_dbreg_setup(dbp,txn,fname,dname,id)407 __env_dbreg_setup(dbp, txn, fname, dname, id)
408 DB *dbp;
409 DB_TXN *txn;
410 const char *fname, *dname;
411 u_int32_t id;
412 {
413 ENV *env;
414 int ret;
415
416 env = dbp->env;
417 if (dbp->log_filename == NULL
418 #if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) && !defined(DIAGNOSTIC)
419 && (txn != NULL || F_ISSET(dbp, DB_AM_RECOVER))
420 #endif
421 #if !defined(DEBUG_ROP)
422 && !F_ISSET(dbp, DB_AM_RDONLY)
423 #endif
424 ) {
425 if ((ret = __dbreg_setup(dbp,
426 F_ISSET(dbp, DB_AM_INMEM) ? dname: fname,
427 F_ISSET(dbp, DB_AM_INMEM) ? NULL : dname, id)) != 0)
428 return (ret);
429
430 /*
431 * If we're actively logging and our caller isn't a
432 * recovery function that already did so, then assign
433 * this dbp a log fileid.
434 */
435 if (DBENV_LOGGING(env) && !F_ISSET(dbp, DB_AM_RECOVER) &&
436 (ret = __dbreg_new_id(dbp, txn)) != 0)
437 return (ret);
438 }
439 return (0);
440 }
441
442 /*
443 * __env_setup --
444 * Set up the underlying environment during a db_open.
445 *
446 * PUBLIC: int __env_setup __P((DB *,
447 * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t, u_int32_t));
448 */
449 int
__env_setup(dbp,txn,fname,dname,id,flags)450 __env_setup(dbp, txn, fname, dname, id, flags)
451 DB *dbp;
452 DB_TXN *txn;
453 const char *fname, *dname;
454 u_int32_t id, flags;
455 {
456 DB *ldbp;
457 DB_ENV *dbenv;
458 ENV *env;
459 u_int32_t maxid;
460 int ret;
461
462 env = dbp->env;
463 dbenv = env->dbenv;
464
465 /*
466 * When verifying an in-memory db, we need to pass dname to
467 * __env_mpool. That is the only time fname will be used.
468 */
469 if (F_ISSET(dbp, DB_AM_INMEM) && F_ISSET(dbp, DB_AM_VERIFYING))
470 fname = dname;
471
472 /* If we don't yet have an environment, it's time to create it. */
473 if (!F_ISSET(env, ENV_OPEN_CALLED)) {
474 #if defined(HAVE_MIXED_SIZE_ADDRESSING) && (SIZEOF_CHAR_P == 8)
475 __db_errx(env, DB_STR("0701", "DB_PRIVATE is not supported by"
476 " 64-bit applications in mixed-size-addressing mode"));
477 return (EINVAL);
478 #endif
479 /* Make sure we have at least DB_MINCACHE pages in our cache. */
480 if (dbenv->mp_gbytes == 0 &&
481 dbenv->mp_bytes < dbp->pgsize * DB_MINPAGECACHE &&
482 (ret = __memp_set_cachesize(
483 dbenv, 0, dbp->pgsize * DB_MINPAGECACHE, 0)) != 0)
484 return (ret);
485
486 if ((ret = __env_open(dbenv, NULL, DB_CREATE |
487 DB_INIT_MPOOL | DB_PRIVATE | LF_ISSET(DB_THREAD), 0)) != 0)
488 return (ret);
489 }
490
491 /* Join the underlying cache. */
492 if ((!F_ISSET(dbp, DB_AM_INMEM) || F_ISSET(dbp, DB_AM_VERIFYING) ||
493 dname == NULL) && (ret = __env_mpool(dbp, fname, flags)) != 0)
494 return (ret);
495
496 /* We may need a per-thread mutex. */
497 if (LF_ISSET(DB_THREAD) && (ret = __mutex_alloc(
498 env, MTX_DB_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbp->mutex)) != 0)
499 return (ret);
500
501 /*
502 * Set up a bookkeeping entry for this database in the log region,
503 * if such a region exists. Note that even if we're in recovery
504 * or a replication client, where we won't log registries, we'll
505 * still need an FNAME struct, so LOGGING_ON is the correct macro.
506 */
507 if (LOGGING_ON(env) &&
508 (!F_ISSET(dbp, DB_AM_INMEM) || dname == NULL) &&
509 (ret = __env_dbreg_setup(dbp, txn, fname, dname, id)) != 0)
510 return (ret);
511
512 /*
513 * Insert ourselves into the ENV's dblist. We allocate a
514 * unique ID to each {fileid, meta page number} pair, and to
515 * each temporary file (since they all have a zero fileid).
516 * This ID gives us something to use to tell which DB handles
517 * go with which databases in all the cursor adjustment
518 * routines, where we don't want to do a lot of ugly and
519 * expensive memcmps.
520 */
521 MUTEX_LOCK(env, env->mtx_dblist);
522 maxid = 0;
523 TAILQ_FOREACH(ldbp, &env->dblist, dblistlinks) {
524 /*
525 * There are three cases: on-disk database (first clause),
526 * named in-memory database (second clause), temporary database
527 * (never matches; no clause).
528 */
529 if (!F_ISSET(dbp, DB_AM_INMEM)) {
530 if (memcmp(ldbp->fileid, dbp->fileid, DB_FILE_ID_LEN)
531 == 0 && ldbp->meta_pgno == dbp->meta_pgno)
532 break;
533 } else if (dname != NULL) {
534 if (F_ISSET(ldbp, DB_AM_INMEM) &&
535 ldbp->dname != NULL &&
536 strcmp(ldbp->dname, dname) == 0)
537 break;
538 }
539 if (ldbp->adj_fileid > maxid)
540 maxid = ldbp->adj_fileid;
541 }
542
543 /*
544 * If ldbp is NULL, we didn't find a match. Assign the dbp an
545 * adj_fileid one higher than the largest we found, and
546 * insert it at the head of the master dbp list.
547 *
548 * If ldbp is not NULL, it is a match for our dbp. Give dbp
549 * the same ID that ldbp has, and add it after ldbp so they're
550 * together in the list.
551 */
552 if (ldbp == NULL) {
553 dbp->adj_fileid = maxid + 1;
554 TAILQ_INSERT_HEAD(&env->dblist, dbp, dblistlinks);
555 } else {
556 dbp->adj_fileid = ldbp->adj_fileid;
557 TAILQ_INSERT_AFTER(&env->dblist, ldbp, dbp, dblistlinks);
558 }
559 MUTEX_UNLOCK(env, env->mtx_dblist);
560
561 return (0);
562 }
563
564 /*
565 * __env_mpool --
566 * Set up the underlying environment cache during a db_open.
567 *
568 * PUBLIC: int __env_mpool __P((DB *, const char *, u_int32_t));
569 */
570 int
__env_mpool(dbp,fname,flags)571 __env_mpool(dbp, fname, flags)
572 DB *dbp;
573 const char *fname;
574 u_int32_t flags;
575 {
576 DBT pgcookie;
577 DB_MPOOLFILE *mpf;
578 DB_PGINFO pginfo;
579 ENV *env;
580 int fidset, ftype, ret;
581 int32_t lsn_off;
582 u_int8_t nullfid[DB_FILE_ID_LEN];
583 u_int32_t clear_len;
584
585 env = dbp->env;
586
587 /* The LSN is the first entry on a DB page, byte offset 0. */
588 lsn_off = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LSN_OFF_NOTSET : 0;
589
590 /* It's possible that this database is already open. */
591 if (F_ISSET(dbp, DB_AM_OPEN_CALLED))
592 return (0);
593
594 /*
595 * If we need to pre- or post-process a file's pages on I/O, set the
596 * file type. If it's a hash file, always call the pgin and pgout
597 * routines. This means that hash files can never be mapped into
598 * process memory. If it's a btree file and requires swapping, we
599 * need to page the file in and out. This has to be right -- we can't
600 * mmap files that are being paged in and out.
601 */
602 switch (dbp->type) {
603 case DB_BTREE:
604 case DB_HEAP:
605 case DB_RECNO:
606 ftype = F_ISSET(dbp, DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM)
607 ? DB_FTYPE_SET : DB_FTYPE_NOTSET;
608 clear_len = CRYPTO_ON(env) ?
609 (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) :
610 DB_PAGE_DB_LEN;
611 break;
612 case DB_HASH:
613 ftype = DB_FTYPE_SET;
614 clear_len = CRYPTO_ON(env) ?
615 (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) :
616 DB_PAGE_DB_LEN;
617 break;
618 case DB_QUEUE:
619 ftype = F_ISSET(dbp,
620 DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM) ?
621 DB_FTYPE_SET : DB_FTYPE_NOTSET;
622
623 /*
624 * If we came in here without a pagesize set, then we need
625 * to mark the in-memory handle as having clear_len not
626 * set, because we don't really know the clear length or
627 * the page size yet (since the file doesn't yet exist).
628 */
629 clear_len = dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET;
630 break;
631 case DB_UNKNOWN:
632 /*
633 * If we're running in the verifier, our database might
634 * be corrupt and we might not know its type--but we may
635 * still want to be able to verify and salvage.
636 *
637 * If we can't identify the type, it's not going to be safe
638 * to call __db_pgin--we pretty much have to give up all
639 * hope of salvaging cross-endianness. Proceed anyway;
640 * at worst, the database will just appear more corrupt
641 * than it actually is, but at best, we may be able
642 * to salvage some data even with no metadata page.
643 */
644 if (F_ISSET(dbp, DB_AM_VERIFYING)) {
645 ftype = DB_FTYPE_NOTSET;
646 clear_len = DB_PAGE_DB_LEN;
647 break;
648 }
649
650 /*
651 * This might be an in-memory file and we won't know its
652 * file type until after we open it and read the meta-data
653 * page.
654 */
655 if (F_ISSET(dbp, DB_AM_INMEM)) {
656 clear_len = DB_CLEARLEN_NOTSET;
657 ftype = DB_FTYPE_NOTSET;
658 lsn_off = DB_LSN_OFF_NOTSET;
659 break;
660 }
661 /* FALLTHROUGH */
662 default:
663 return (__db_unknown_type(env, "DB->open", dbp->type));
664 }
665
666 mpf = dbp->mpf;
667
668 memset(nullfid, 0, DB_FILE_ID_LEN);
669 fidset = memcmp(nullfid, dbp->fileid, DB_FILE_ID_LEN);
670 if (fidset)
671 (void)__memp_set_fileid(mpf, dbp->fileid);
672
673 (void)__memp_set_clear_len(mpf, clear_len);
674 (void)__memp_set_ftype(mpf, ftype);
675 (void)__memp_set_lsn_offset(mpf, lsn_off);
676
677 pginfo.db_pagesize = dbp->pgsize;
678 pginfo.flags =
679 F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
680 pginfo.type = dbp->type;
681 pgcookie.data = &pginfo;
682 pgcookie.size = sizeof(DB_PGINFO);
683 (void)__memp_set_pgcookie(mpf, &pgcookie);
684
685 #ifndef DIAG_MVCC
686 if (F_ISSET(env->dbenv, DB_ENV_MULTIVERSION))
687 #endif
688 if (F_ISSET(dbp, DB_AM_TXN) &&
689 dbp->type != DB_QUEUE && dbp->type != DB_UNKNOWN)
690 LF_SET(DB_MULTIVERSION);
691
692 if ((ret = __memp_fopen(mpf, NULL, fname, &dbp->dirname,
693 LF_ISSET(DB_CREATE | DB_DURABLE_UNKNOWN | DB_MULTIVERSION |
694 DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE) |
695 (F_ISSET(env->dbenv, DB_ENV_DIRECT_DB) ? DB_DIRECT : 0) |
696 (F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_TXN_NOT_DURABLE : 0),
697 0, dbp->pgsize)) != 0) {
698 /*
699 * The open didn't work; we need to reset the mpf,
700 * retaining the in-memory semantics (if any).
701 */
702 (void)__memp_fclose(dbp->mpf, 0);
703 (void)__memp_fcreate(env, &dbp->mpf);
704 if (F_ISSET(dbp, DB_AM_INMEM))
705 MAKE_INMEM(dbp);
706 return (ret);
707 }
708
709 /*
710 * Set the open flag. We use it to mean that the dbp has gone
711 * through mpf setup, including dbreg_register. Also, below,
712 * the underlying access method open functions may want to do
713 * things like acquire cursors, so the open flag has to be set
714 * before calling them.
715 */
716 F_SET(dbp, DB_AM_OPEN_CALLED);
717 if (!fidset && fname != NULL) {
718 (void)__memp_get_fileid(dbp->mpf, dbp->fileid);
719 dbp->preserve_fid = 1;
720 }
721
722 return (0);
723 }
724
725 /*
726 * __db_close --
727 * DB->close method.
728 *
729 * PUBLIC: int __db_close __P((DB *, DB_TXN *, u_int32_t));
730 */
731 int
__db_close(dbp,txn,flags)732 __db_close(dbp, txn, flags)
733 DB *dbp;
734 DB_TXN *txn;
735 u_int32_t flags;
736 {
737 ENV *env;
738 int db_ref, deferred_close, ret, t_ret;
739
740 env = dbp->env;
741 deferred_close = 0;
742
743 PERFMON4(env, db, close,
744 dbp->fname, dbp->dname, flags, &dbp->fileid[0]);
745
746 /* Refresh the structure and close any underlying resources. */
747 ret = __db_refresh(dbp, txn, flags, &deferred_close, 0);
748
749 /*
750 * If we've deferred the close because the logging of the close failed,
751 * return our failure right away without destroying the handle.
752 */
753 if (deferred_close)
754 return (ret);
755
756 /* !!!
757 * This code has an apparent race between the moment we read and
758 * decrement env->db_ref and the moment we check whether it's 0.
759 * However, if the environment is DBLOCAL, the user shouldn't have a
760 * reference to the env handle anyway; the only way we can get
761 * multiple dbps sharing a local env is if we open them internally
762 * during something like a subdatabase open. If any such thing is
763 * going on while the user is closing the original dbp with a local
764 * env, someone's already badly screwed up, so there's no reason
765 * to bother engineering around this possibility.
766 */
767 MUTEX_LOCK(env, env->mtx_dblist);
768 db_ref = --env->db_ref;
769 MUTEX_UNLOCK(env, env->mtx_dblist);
770 if (F_ISSET(env, ENV_DBLOCAL) && db_ref == 0 &&
771 (t_ret = __env_close(env->dbenv, 0)) != 0 && ret == 0)
772 ret = t_ret;
773
774 /* Free the database handle. */
775 memset(dbp, CLEAR_BYTE, sizeof(*dbp));
776 __os_free(env, dbp);
777
778 return (ret);
779 }
780
781 /*
782 * __db_refresh --
783 * Refresh the DB structure, releasing any allocated resources.
784 * This does most of the work of closing files now because refresh
785 * is what is used during abort processing (since we can't destroy
786 * the actual handle) and during abort processing, we may have a
787 * fully opened handle.
788 *
789 * PUBLIC: int __db_refresh __P((DB *, DB_TXN *, u_int32_t, int *, int));
790 */
791 int
__db_refresh(dbp,txn,flags,deferred_closep,reuse)792 __db_refresh(dbp, txn, flags, deferred_closep, reuse)
793 DB *dbp;
794 DB_TXN *txn;
795 u_int32_t flags;
796 int *deferred_closep, reuse;
797 {
798 DB *sdbp;
799 DBC *dbc;
800 DB_FOREIGN_INFO *f_info, *tmp;
801 DB_LOCKER *locker;
802 DB_LOCKREQ lreq;
803 ENV *env;
804 REGENV *renv;
805 REGINFO *infop;
806 u_int32_t save_flags;
807 int resync, ret, t_ret;
808
809 ret = 0;
810
811 env = dbp->env;
812 infop = env->reginfo;
813 if (infop != NULL)
814 renv = infop->primary;
815 else
816 renv = NULL;
817
818 /*
819 * If this dbp is not completely open, avoid trapping by trying to
820 * sync without an mpool file.
821 */
822 if (dbp->mpf == NULL)
823 LF_SET(DB_NOSYNC);
824
825 /* If never opened, or not currently open, it's easy. */
826 if (!F_ISSET(dbp, DB_AM_OPEN_CALLED))
827 goto never_opened;
828
829 /*
830 * If we have any secondary indices, disassociate them from us.
831 * We don't bother with the mutex here; it only protects some
832 * of the ops that will make us core-dump mid-close anyway, and
833 * if you're trying to do something with a secondary *while* you're
834 * closing the primary, you deserve what you get. The disassociation
835 * is mostly done just so we can close primaries and secondaries in
836 * any order--but within one thread of control.
837 */
838 LIST_FOREACH(sdbp, &dbp->s_secondaries, s_links) {
839 LIST_REMOVE(sdbp, s_links);
840 if ((t_ret = __db_disassociate(sdbp)) != 0 && ret == 0)
841 ret = t_ret;
842 }
843 if (F_ISSET(dbp, DB_AM_SECONDARY))
844 LIST_REMOVE(dbp, s_links);
845
846 /*
847 * Disassociate ourself from any databases using us as a foreign key
848 * database by clearing the referring db's pointer. Reclaim memory.
849 */
850 f_info = LIST_FIRST(&dbp->f_primaries);
851 while (f_info != NULL) {
852 tmp = LIST_NEXT(f_info, f_links);
853 LIST_REMOVE(f_info, f_links);
854 f_info->dbp->s_foreign = NULL;
855 __os_free(env, f_info);
856 f_info = tmp;
857 }
858
859 if (dbp->s_foreign != NULL &&
860 (t_ret = __db_disassociate_foreign(dbp)) != 0 && ret == 0)
861 ret = t_ret;
862
863 /*
864 * Sync the underlying access method. Do before closing the cursors
865 * because DB->sync allocates cursors in order to write Recno backing
866 * source text files.
867 *
868 * Sync is slow on some systems, notably Solaris filesystems where the
869 * entire buffer cache is searched. If we're in recovery, don't flush
870 * the file, it's not necessary.
871 */
872 if (!LF_ISSET(DB_NOSYNC) &&
873 !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) &&
874 (t_ret = __db_sync(dbp)) != 0 && ret == 0)
875 ret = t_ret;
876
877 /*
878 * Go through the active cursors, unregister each cursor from its
879 * transaction if any, and call the cursor recycle routine,
880 * which resolves pending operations and moves the cursors onto the
881 * free list. Then, walk the free list and call the cursor destroy
882 * routine. Note that any failure on a close is considered "really
883 * bad" and we just break out of the loop and force forward.
884 */
885 resync = TAILQ_FIRST(&dbp->active_queue) == NULL ? 0 : 1;
886 while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL) {
887 if (dbc->txn != NULL)
888 TAILQ_REMOVE(&(dbc->txn->my_cursors), dbc, txn_cursors);
889
890 if ((t_ret = __dbc_close(dbc)) != 0) {
891 if (ret == 0)
892 ret = t_ret;
893 break;
894 }
895 }
896
897 while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
898 if ((t_ret = __dbc_destroy(dbc)) != 0) {
899 if (ret == 0)
900 ret = t_ret;
901 break;
902 }
903
904 /*
905 * Close any outstanding join cursors. Join cursors destroy themselves
906 * on close and have no separate destroy routine. We don't have to set
907 * the resync flag here, because join cursors aren't write cursors.
908 */
909 while ((dbc = TAILQ_FIRST(&dbp->join_queue)) != NULL)
910 if ((t_ret = __db_join_close(dbc)) != 0) {
911 if (ret == 0)
912 ret = t_ret;
913 break;
914 }
915
916 /*
917 * Sync the memory pool, even though we've already called DB->sync,
918 * because closing cursors can dirty pages by deleting items they
919 * referenced.
920 *
921 * Sync is slow on some systems, notably Solaris filesystems where the
922 * entire buffer cache is searched. If we're in recovery, don't flush
923 * the file, it's not necessary.
924 */
925 if (resync && !LF_ISSET(DB_NOSYNC) &&
926 !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) &&
927 (t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0)
928 ret = t_ret;
929
930 /*
931 * If there is a file extension watermark associated with this
932 * database, we don't need it any more.
933 */
934 __txn_remove_fe_watermark(txn, dbp);
935
936 never_opened:
937 MUTEX_LOCK(env, env->mtx_dblist);
938 /*
939 * At this point, we haven't done anything to render the DB handle
940 * unusable, at least by a transaction abort. Take the opportunity
941 * now to log the file close if we have initialized the logging
942 * information. If this log fails and we're in a transaction,
943 * we have to bail out of the attempted close; we'll need a dbp in
944 * order to successfully abort the transaction, and we can't conjure
945 * a new one up because we haven't gotten out the dbreg_register
946 * record that represents the close. In this case, we put off
947 * actually closing the dbp until we've performed the abort.
948 */
949 if (!reuse && LOGGING_ON(dbp->env) && dbp->log_filename != NULL) {
950 /*
951 * Discard the log file id, if any. We want to log the close
952 * if and only if this is not a recovery dbp or a client dbp,
953 * or a dead dbp handle.
954 */
955 DB_ASSERT(env, renv != NULL);
956 if (F_ISSET(dbp, DB_AM_RECOVER) || IS_REP_CLIENT(env) ||
957 dbp->timestamp != renv->rep_timestamp) {
958 if ((t_ret = __dbreg_revoke_id(dbp,
959 0, DB_LOGFILEID_INVALID)) == 0 && ret == 0)
960 ret = t_ret;
961 if ((t_ret = __dbreg_teardown(dbp)) != 0 && ret == 0)
962 ret = t_ret;
963 } else {
964 if ((t_ret = __dbreg_close_id(dbp,
965 txn, DBREG_CLOSE)) != 0 && txn != NULL) {
966 MUTEX_UNLOCK(env, env->mtx_dblist);
967 /*
968 * We're in a txn and the attempt to log the
969 * close failed; let the txn subsystem know
970 * that we need to destroy this dbp once we're
971 * done with the abort, then bail from the
972 * close.
973 *
974 * Note that if the attempt to put off the
975 * close -also- fails--which it won't unless
976 * we're out of heap memory--we're really
977 * screwed. Panic.
978 */
979 if ((ret =
980 __txn_closeevent(env, txn, dbp)) != 0)
981 return (__env_panic(env, ret));
982 if (deferred_closep != NULL)
983 *deferred_closep = 1;
984 return (t_ret);
985 }
986 /*
987 * If dbreg_close_id failed and we were not in a
988 * transaction, then we need to finish this close
989 * because the caller can't do anything with the
990 * handle after we return an error. We rely on
991 * dbreg_close_id to mark the entry in some manner
992 * so that we do not do a clean shutdown of this
993 * environment. If shutdown isn't clean, then the
994 * application *must* run recovery and that will
995 * generate the RCLOSE record.
996 */
997 }
998
999 }
1000
1001 /* Close any handle we've been holding since the open. */
1002 if (dbp->saved_open_fhp != NULL &&
1003 (t_ret = __os_closehandle(env, dbp->saved_open_fhp)) != 0 &&
1004 ret == 0)
1005 ret = t_ret;
1006
1007 /*
1008 * Remove this DB handle from the ENV's dblist, if it's been added.
1009 *
1010 * Close our reference to the underlying cache while locked, we don't
1011 * want to race with a thread searching for our underlying cache link
1012 * while opening a DB handle.
1013 *
1014 * The DB handle may not yet have been added to the ENV list, don't
1015 * blindly call the underlying TAILQ_REMOVE macro. Explicitly reset
1016 * the field values to NULL so that we can't call TAILQ_REMOVE twice.
1017 */
1018 if (!reuse &&
1019 (dbp->dblistlinks.tqe_next != NULL ||
1020 dbp->dblistlinks.tqe_prev != NULL)) {
1021 TAILQ_REMOVE(&env->dblist, dbp, dblistlinks);
1022 dbp->dblistlinks.tqe_next = NULL;
1023 dbp->dblistlinks.tqe_prev = NULL;
1024 }
1025
1026 /* Close the memory pool file handle. */
1027 if (dbp->mpf != NULL) {
1028 if ((t_ret = __memp_fclose(dbp->mpf,
1029 F_ISSET(dbp, DB_AM_DISCARD) ? DB_MPOOL_DISCARD : 0)) != 0 &&
1030 ret == 0)
1031 ret = t_ret;
1032 dbp->mpf = NULL;
1033 if (reuse &&
1034 (t_ret = __memp_fcreate(env, &dbp->mpf)) != 0 &&
1035 ret == 0)
1036 ret = t_ret;
1037 }
1038
1039 MUTEX_UNLOCK(env, env->mtx_dblist);
1040
1041 /*
1042 * Call the access specific close function.
1043 *
1044 * We do this here rather than in __db_close as we need to do this when
1045 * aborting an open so that file descriptors are closed and abort of
1046 * renames can succeed on platforms that lock open files (such as
1047 * Windows). In particular, we need to ensure that all the extents
1048 * associated with a queue are closed so that queue renames can be
1049 * aborted.
1050 *
1051 * It is also important that we do this before releasing the handle
1052 * lock, because dbremove and dbrename assume that once they have the
1053 * handle lock, it is safe to modify the underlying file(s).
1054 *
1055 * !!!
1056 * Because of where these functions are called in the DB handle close
1057 * process, these routines can't do anything that would dirty pages or
1058 * otherwise affect closing down the database. Specifically, we can't
1059 * abort and recover any of the information they control.
1060 */
1061 #ifdef HAVE_PARTITION
1062 if (dbp->p_internal != NULL &&
1063 (t_ret = __partition_close(dbp, txn, flags)) != 0 && ret == 0)
1064 ret = t_ret;
1065 #endif
1066 if ((t_ret = __bam_db_close(dbp)) != 0 && ret == 0)
1067 ret = t_ret;
1068 if ((t_ret = __ham_db_close(dbp)) != 0 && ret == 0)
1069 ret = t_ret;
1070 if ((t_ret = __heap_db_close(dbp)) != 0 && ret == 0)
1071 ret = t_ret;
1072 if ((t_ret = __qam_db_close(dbp, dbp->flags)) != 0 && ret == 0)
1073 ret = t_ret;
1074
1075 /*
1076 * !!!
1077 * At this point, the access-method specific information has been
1078 * freed. From now on, we can use the dbp, but not touch any
1079 * access-method specific data.
1080 */
1081
1082 if (!reuse && dbp->locker != NULL) {
1083 /* We may have pending trade operations on this dbp. */
1084 if (txn == NULL)
1085 txn = dbp->cur_txn;
1086 if (IS_REAL_TXN(txn))
1087 __txn_remlock(env,
1088 txn, &dbp->handle_lock, dbp->locker);
1089
1090 /* We may be holding the handle lock; release it. */
1091 lreq.op = DB_LOCK_PUT_ALL;
1092 lreq.obj = NULL;
1093 if ((t_ret = __lock_vec(env,
1094 dbp->locker, 0, &lreq, 1, NULL)) != 0 && ret == 0)
1095 ret = t_ret;
1096
1097 if ((t_ret =
1098 __lock_id_free(env, dbp->locker)) != 0 && ret == 0)
1099 ret = t_ret;
1100 dbp->locker = NULL;
1101 LOCK_INIT(dbp->handle_lock);
1102 }
1103
1104 /*
1105 * If this is a temporary file (un-named in-memory file), then
1106 * discard the locker ID allocated as the fileid.
1107 */
1108 if (LOCKING_ON(env) &&
1109 F_ISSET(dbp, DB_AM_INMEM) && !dbp->preserve_fid &&
1110 *(u_int32_t *)dbp->fileid != DB_LOCK_INVALIDID) {
1111 if ((t_ret = __lock_getlocker(env->lk_handle,
1112 *(u_int32_t *)dbp->fileid, 0, &locker)) == 0)
1113 t_ret = __lock_id_free(env, locker);
1114 if (ret == 0)
1115 ret = t_ret;
1116 }
1117
1118 if (reuse) {
1119 /*
1120 * If we are reusing this dbp, then we're done now. Re-init
1121 * the handle, preserving important flags, and then return.
1122 * This code is borrowed from __db_init, which does more
1123 * than we can do here.
1124 */
1125 save_flags = F_ISSET(dbp, DB_AM_INMEM |
1126 DB_AM_RDONLY | DB_AM_TXN);
1127
1128 if ((ret = __bam_db_create(dbp)) != 0)
1129 return (ret);
1130 if ((ret = __ham_db_create(dbp)) != 0)
1131 return (ret);
1132 if ((ret = __heap_db_create(dbp)) != 0)
1133 return (ret);
1134 if ((ret = __qam_db_create(dbp)) != 0)
1135 return (ret);
1136
1137 /* Restore flags */
1138 dbp->flags = dbp->orig_flags | save_flags;
1139
1140 if (FLD_ISSET(save_flags, DB_AM_INMEM)) {
1141 /*
1142 * If this is inmem, then it may have a fileid
1143 * even if it was never opened, and we need to
1144 * clear out that fileid.
1145 */
1146 memset(dbp->fileid, 0, sizeof(dbp->fileid));
1147 MAKE_INMEM(dbp);
1148 }
1149 return (ret);
1150 }
1151
1152 dbp->type = DB_UNKNOWN;
1153
1154 /*
1155 * The thread mutex may have been invalidated in __dbreg_close_id if the
1156 * fname refcount did not go to 0. If not, discard the thread mutex.
1157 */
1158 if ((t_ret = __mutex_free(env, &dbp->mutex)) != 0 && ret == 0)
1159 ret = t_ret;
1160
1161 /* Discard any memory allocated for the file and database names. */
1162 if (dbp->fname != NULL) {
1163 __os_free(dbp->env, dbp->fname);
1164 dbp->fname = NULL;
1165 }
1166 if (dbp->dname != NULL) {
1167 __os_free(dbp->env, dbp->dname);
1168 dbp->dname = NULL;
1169 }
1170
1171 /* Discard any memory used to store returned data. */
1172 if (dbp->my_rskey.data != NULL)
1173 __os_free(dbp->env, dbp->my_rskey.data);
1174 if (dbp->my_rkey.data != NULL)
1175 __os_free(dbp->env, dbp->my_rkey.data);
1176 if (dbp->my_rdata.data != NULL)
1177 __os_free(dbp->env, dbp->my_rdata.data);
1178
1179 /* For safety's sake; we may refresh twice. */
1180 memset(&dbp->my_rskey, 0, sizeof(DBT));
1181 memset(&dbp->my_rkey, 0, sizeof(DBT));
1182 memset(&dbp->my_rdata, 0, sizeof(DBT));
1183
1184 /* Clear out fields that normally get set during open. */
1185 memset(dbp->fileid, 0, sizeof(dbp->fileid));
1186 dbp->adj_fileid = 0;
1187 dbp->meta_pgno = 0;
1188 dbp->cur_locker = NULL;
1189 dbp->cur_txn = NULL;
1190 dbp->associate_locker = NULL;
1191 dbp->open_flags = 0;
1192
1193 /*
1194 * If we are being refreshed with a txn specified, then we need
1195 * to make sure that we clear out the lock handle field, because
1196 * releasing all the locks for this transaction will release this
1197 * lock and we don't want close to stumble upon this handle and
1198 * try to close it.
1199 */
1200 if (txn != NULL)
1201 LOCK_INIT(dbp->handle_lock);
1202
1203 /* Reset flags to whatever the user configured. */
1204 dbp->flags = dbp->orig_flags;
1205
1206 return (ret);
1207 }
1208
1209 /*
1210 * __db_disassociate --
1211 * Destroy the association between a given secondary and its primary.
1212 */
1213 static int
__db_disassociate(sdbp)1214 __db_disassociate(sdbp)
1215 DB *sdbp;
1216 {
1217 DBC *dbc;
1218 int ret, t_ret;
1219
1220 ret = 0;
1221
1222 sdbp->s_callback = NULL;
1223 sdbp->s_primary = NULL;
1224 sdbp->get = sdbp->stored_get;
1225 sdbp->close = sdbp->stored_close;
1226
1227 /*
1228 * Complain, but proceed, if we have any active cursors. (We're in
1229 * the middle of a close, so there's really no turning back.)
1230 */
1231 if (sdbp->s_refcnt != 1 ||
1232 TAILQ_FIRST(&sdbp->active_queue) != NULL ||
1233 TAILQ_FIRST(&sdbp->join_queue) != NULL) {
1234 __db_errx(sdbp->env, DB_STR("0674",
1235 "Closing a primary DB while a secondary DB has active cursors is unsafe"));
1236 ret = EINVAL;
1237 }
1238 sdbp->s_refcnt = 0;
1239
1240 while ((dbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL)
1241 if ((t_ret = __dbc_destroy(dbc)) != 0 && ret == 0)
1242 ret = t_ret;
1243
1244 F_CLR(sdbp, DB_AM_SECONDARY);
1245 return (ret);
1246 }
1247
1248 /*
1249 * __db_disassociate_foreign --
1250 * Destroy the association between a given secondary and its foreign.
1251 */
1252 static int
__db_disassociate_foreign(sdbp)1253 __db_disassociate_foreign(sdbp)
1254 DB *sdbp;
1255 {
1256 DB *fdbp;
1257 DB_FOREIGN_INFO *f_info, *tmp;
1258 int ret;
1259
1260 if (sdbp->s_foreign == NULL)
1261 return (0);
1262 if ((ret = __os_malloc(sdbp->env, sizeof(DB_FOREIGN_INFO), &tmp)) != 0)
1263 return (ret);
1264
1265 fdbp = sdbp->s_foreign;
1266 ret = 0;
1267 f_info = LIST_FIRST(&fdbp->f_primaries);
1268 while (f_info != NULL) {
1269 tmp = LIST_NEXT(f_info, f_links);
1270 if (f_info ->dbp == sdbp) {
1271 LIST_REMOVE(f_info, f_links);
1272 __os_free(sdbp->env, f_info);
1273 }
1274 f_info = tmp;
1275 }
1276
1277 return (ret);
1278 }
1279
1280 /*
1281 * __db_log_page
1282 * Log a meta-data or root page during a subdatabase create operation.
1283 *
1284 * PUBLIC: int __db_log_page __P((DB *, DB_TXN *, DB_LSN *, db_pgno_t, PAGE *));
1285 */
1286 int
__db_log_page(dbp,txn,lsn,pgno,page)1287 __db_log_page(dbp, txn, lsn, pgno, page)
1288 DB *dbp;
1289 DB_TXN *txn;
1290 DB_LSN *lsn;
1291 db_pgno_t pgno;
1292 PAGE *page;
1293 {
1294 DBT page_dbt;
1295 DB_LSN new_lsn;
1296 int ret;
1297
1298 if (!LOGGING_ON(dbp->env) || txn == NULL)
1299 return (0);
1300
1301 memset(&page_dbt, 0, sizeof(page_dbt));
1302 page_dbt.size = dbp->pgsize;
1303 page_dbt.data = page;
1304
1305 ret = __crdel_metasub_log(dbp, txn, &new_lsn, F_ISSET(dbp,
1306 DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0, pgno, &page_dbt, lsn);
1307
1308 if (ret == 0)
1309 page->lsn = new_lsn;
1310 return (ret);
1311 }
1312
1313 /*
1314 * __db_walk_cursors
1315 * Walk all cursors for a database.
1316 *
1317 * PUBLIC: int __db_walk_cursors __P((DB *, DBC *,
1318 * PUBLIC: int (*) __P((DBC *, DBC *,
1319 * PUBLIC: u_int32_t *, db_pgno_t, u_int32_t, void *)),
1320 * PUBLIC: u_int32_t *, db_pgno_t, u_int32_t, void *));
1321 */
1322 int
__db_walk_cursors(dbp,my_dbc,func,countp,pgno,indx,args)1323 __db_walk_cursors(dbp, my_dbc, func, countp, pgno, indx, args)
1324 DB *dbp;
1325 DBC *my_dbc;
1326 int (*func)__P((DBC *, DBC *,
1327 u_int32_t *, db_pgno_t, u_int32_t, void *));
1328 u_int32_t *countp;
1329 db_pgno_t pgno;
1330 u_int32_t indx;
1331 void *args;
1332 {
1333 ENV *env;
1334 DB *ldbp;
1335 DBC *dbc;
1336 int ret;
1337
1338 env = dbp->env;
1339 ret = 0;
1340
1341 MUTEX_LOCK(env, env->mtx_dblist);
1342 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
1343 for (*countp = 0;
1344 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
1345 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
1346 loop: MUTEX_LOCK(env, ldbp->mutex);
1347 TAILQ_FOREACH(dbc, &ldbp->active_queue, links)
1348 if ((ret = (func)(dbc, my_dbc,
1349 countp, pgno, indx, args)) != 0)
1350 break;
1351 /*
1352 * We use the error to communicate that function
1353 * dropped the mutex.
1354 */
1355 if (ret == DB_LOCK_NOTGRANTED)
1356 goto loop;
1357 MUTEX_UNLOCK(env, ldbp->mutex);
1358 if (ret != 0)
1359 break;
1360 }
1361 MUTEX_UNLOCK(env, env->mtx_dblist);
1362 return (ret);
1363 }
1364
1365 /*
1366 * __db_backup_name
1367 * Create the backup file name for a given file.
1368 *
1369 * PUBLIC: int __db_backup_name __P((ENV *,
1370 * PUBLIC: const char *, DB_TXN *, char **));
1371 */
1372 #undef BACKUP_PREFIX
1373 #define BACKUP_PREFIX "__db."
1374
1375 #undef MAX_INT_TO_HEX
1376 #define MAX_INT_TO_HEX 8
1377
1378 int
__db_backup_name(env,name,txn,backup)1379 __db_backup_name(env, name, txn, backup)
1380 ENV *env;
1381 const char *name;
1382 DB_TXN *txn;
1383 char **backup;
1384 {
1385 u_int32_t id;
1386 size_t len;
1387 int ret;
1388 char *p, *retp;
1389
1390 *backup = NULL;
1391
1392 /*
1393 * Part of the name may be a full path, so we need to make sure that
1394 * we allocate enough space for it, even in the case where we don't
1395 * use the entire filename for the backup name.
1396 */
1397 len = strlen(name) + strlen(BACKUP_PREFIX) + 2 * MAX_INT_TO_HEX + 1;
1398 if ((ret = __os_malloc(env, len, &retp)) != 0)
1399 return (ret);
1400
1401 /*
1402 * Create the name. Backup file names are in one of 2 forms: in a
1403 * transactional env "__db.TXNID.ID", where ID is a random number,
1404 * and in any other env "__db.FILENAME".
1405 *
1406 * In addition, the name passed may contain an env-relative path.
1407 * In that case, put the "__db." in the right place (in the last
1408 * component of the pathname).
1409 *
1410 * There are four cases here:
1411 * 1. simple path w/out transaction
1412 * 2. simple path + transaction
1413 * 3. multi-component path w/out transaction
1414 * 4. multi-component path + transaction
1415 */
1416 p = __db_rpath(name);
1417 if (IS_REAL_TXN(txn)) {
1418 __os_unique_id(env, &id);
1419 if (p == NULL) /* Case 2. */
1420 snprintf(retp, len, "%s%x.%x",
1421 BACKUP_PREFIX, txn->txnid, id);
1422 else /* Case 4. */
1423 snprintf(retp, len, "%.*s%x.%x",
1424 (int)(p - name) + 1, name, txn->txnid, id);
1425 } else {
1426 if (p == NULL) /* Case 1. */
1427 snprintf(retp, len, "%s%s", BACKUP_PREFIX, name);
1428 else /* Case 3. */
1429 snprintf(retp, len, "%.*s%s%s",
1430 (int)(p - name) + 1, name, BACKUP_PREFIX, p + 1);
1431 }
1432
1433 *backup = retp;
1434 return (0);
1435 }
1436
1437 #ifdef CONFIG_TEST
1438 /*
1439 * __db_testcopy
1440 * Create a copy of all backup files and our "main" DB.
1441 *
1442 * PUBLIC: #ifdef CONFIG_TEST
1443 * PUBLIC: int __db_testcopy __P((ENV *, DB *, const char *));
1444 * PUBLIC: #endif
1445 */
1446 int
__db_testcopy(env,dbp,name)1447 __db_testcopy(env, dbp, name)
1448 ENV *env;
1449 DB *dbp;
1450 const char *name;
1451 {
1452 DB_MPOOL *dbmp;
1453 DB_MPOOLFILE *mpf;
1454
1455 DB_ASSERT(env, dbp != NULL || name != NULL);
1456
1457 if (name == NULL) {
1458 dbmp = env->mp_handle;
1459 mpf = dbp->mpf;
1460 name = R_ADDR(dbmp->reginfo, mpf->mfp->path_off);
1461 }
1462
1463 if (dbp != NULL && dbp->type == DB_QUEUE)
1464 return (__qam_testdocopy(dbp, name));
1465 else
1466 #ifdef HAVE_PARTITION
1467 if (dbp != NULL && DB_IS_PARTITIONED(dbp))
1468 return (__part_testdocopy(dbp, name));
1469 else
1470 #endif
1471 return (__db_testdocopy(env, name));
1472 }
1473
1474 static int
__qam_testdocopy(dbp,name)1475 __qam_testdocopy(dbp, name)
1476 DB *dbp;
1477 const char *name;
1478 {
1479 DB_THREAD_INFO *ip;
1480 QUEUE_FILELIST *filelist, *fp;
1481 int ret;
1482 char buf[DB_MAXPATHLEN], *dir;
1483
1484 filelist = NULL;
1485 if ((ret = __db_testdocopy(dbp->env, name)) != 0)
1486 return (ret);
1487
1488 /* Call ENV_GET_THREAD_INFO to get a valid DB_THREAD_INFO */
1489 ENV_GET_THREAD_INFO(dbp->env, ip);
1490 if (dbp->mpf != NULL &&
1491 (ret = __qam_gen_filelist(dbp, ip, &filelist)) != 0)
1492 goto done;
1493
1494 if (filelist == NULL)
1495 return (0);
1496 dir = ((QUEUE *)dbp->q_internal)->dir;
1497 for (fp = filelist; fp->mpf != NULL; fp++) {
1498 snprintf(buf, sizeof(buf),
1499 QUEUE_EXTENT, dir, PATH_SEPARATOR[0], name, fp->id);
1500 if ((ret = __db_testdocopy(dbp->env, buf)) != 0)
1501 return (ret);
1502 }
1503
1504 done: __os_free(dbp->env, filelist);
1505 return (0);
1506 }
1507
1508 /*
1509 * __db_testdocopy
1510 * Create a copy of all backup files and our "main" DB.
1511 * PUBLIC: int __db_testdocopy __P((ENV *, const char *));
1512 */
1513 int
__db_testdocopy(env,name)1514 __db_testdocopy(env, name)
1515 ENV *env;
1516 const char *name;
1517 {
1518 size_t len;
1519 int dircnt, i, ret;
1520 char *copy, **namesp, *p, *real_name;
1521
1522 dircnt = 0;
1523 copy = NULL;
1524 namesp = NULL;
1525
1526 /* Create the real backing file name. */
1527 if ((ret = __db_appname(env,
1528 DB_APP_DATA, name, NULL, &real_name)) != 0)
1529 return (ret);
1530
1531 /*
1532 * !!!
1533 * There are tests that attempt to copy non-existent files. I'd guess
1534 * it's a testing bug, but I don't have time to figure it out. Block
1535 * the case here.
1536 */
1537 if (__os_exists(env, real_name, NULL) != 0) {
1538 __os_free(env, real_name);
1539 return (0);
1540 }
1541
1542 /*
1543 * Copy the file itself.
1544 *
1545 * Allocate space for the file name, including adding an ".afterop" and
1546 * trailing nul byte.
1547 */
1548 len = strlen(real_name) + sizeof(".afterop");
1549 if ((ret = __os_malloc(env, len, ©)) != 0)
1550 goto err;
1551 snprintf(copy, len, "%s.afterop", real_name);
1552 if ((ret = __db_makecopy(env, real_name, copy)) != 0)
1553 goto err;
1554
1555 /*
1556 * Get the directory path to call __os_dirlist().
1557 */
1558 if ((p = __db_rpath(real_name)) != NULL)
1559 *p = '\0';
1560 if ((ret = __os_dirlist(env, real_name, 0, &namesp, &dircnt)) != 0)
1561 goto err;
1562
1563 /*
1564 * Walk the directory looking for backup files. Backup file names in
1565 * transactional environments are of the form:
1566 *
1567 * BACKUP_PREFIX.TXNID.ID
1568 */
1569 for (i = 0; i < dircnt; i++) {
1570 /* Check for a related backup file name. */
1571 if (strncmp(
1572 namesp[i], BACKUP_PREFIX, sizeof(BACKUP_PREFIX) - 1) != 0)
1573 continue;
1574 p = namesp[i] + sizeof(BACKUP_PREFIX);
1575 p += strspn(p, "0123456789ABCDEFabcdef");
1576 if (*p != '.')
1577 continue;
1578 ++p;
1579 p += strspn(p, "0123456789ABCDEFabcdef");
1580 if (*p != '\0')
1581 continue;
1582
1583 /*
1584 * Copy the backup file.
1585 *
1586 * Allocate space for the file name, including adding a
1587 * ".afterop" and trailing nul byte.
1588 */
1589 if (real_name != NULL) {
1590 __os_free(env, real_name);
1591 real_name = NULL;
1592 }
1593 if ((ret = __db_appname(env,
1594 DB_APP_DATA, namesp[i], NULL, &real_name)) != 0)
1595 goto err;
1596 if (copy != NULL) {
1597 __os_free(env, copy);
1598 copy = NULL;
1599 }
1600 len = strlen(real_name) + sizeof(".afterop");
1601 if ((ret = __os_malloc(env, len, ©)) != 0)
1602 goto err;
1603 snprintf(copy, len, "%s.afterop", real_name);
1604 if ((ret = __db_makecopy(env, real_name, copy)) != 0)
1605 goto err;
1606 }
1607
1608 err: if (namesp != NULL)
1609 __os_dirfree(env, namesp, dircnt);
1610 if (copy != NULL)
1611 __os_free(env, copy);
1612 if (real_name != NULL)
1613 __os_free(env, real_name);
1614 return (ret);
1615 }
1616
1617 static int
__db_makecopy(env,src,dest)1618 __db_makecopy(env, src, dest)
1619 ENV *env;
1620 const char *src, *dest;
1621 {
1622 DB_FH *rfhp, *wfhp;
1623 size_t rcnt, wcnt;
1624 int ret;
1625 char *buf;
1626
1627 rfhp = wfhp = NULL;
1628
1629 if ((ret = __os_malloc(env, 64 * 1024, &buf)) != 0)
1630 goto err;
1631
1632 if ((ret = __os_open(env, src, 0,
1633 DB_OSO_RDONLY, DB_MODE_600, &rfhp)) != 0)
1634 goto err;
1635 if ((ret = __os_open(env, dest, 0,
1636 DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &wfhp)) != 0)
1637 goto err;
1638
1639 for (;;) {
1640 if ((ret =
1641 __os_read(env, rfhp, buf, sizeof(buf), &rcnt)) != 0)
1642 goto err;
1643 if (rcnt == 0)
1644 break;
1645 if ((ret =
1646 __os_write(env, wfhp, buf, sizeof(buf), &wcnt)) != 0)
1647 goto err;
1648 }
1649
1650 if (0) {
1651 err: __db_err(env, ret, "__db_makecopy: %s -> %s", src, dest);
1652 }
1653
1654 if (buf != NULL)
1655 __os_free(env, buf);
1656 if (rfhp != NULL)
1657 (void)__os_closehandle(env, rfhp);
1658 if (wfhp != NULL)
1659 (void)__os_closehandle(env, wfhp);
1660 return (ret);
1661 }
1662 #endif
1663