1 /*-
2 * Copyright (c) 1996, 2020 Oracle and/or its affiliates. All rights reserved.
3 *
4 * See the file LICENSE for license information.
5 *
6 * $Id$
7 */
8
9 #include "db_config.h"
10
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14 #include "dbinc/hash.h"
15 #ifndef HAVE_QUEUE
16 #include "dbinc/qam.h" /* For __db_no_queue_am(). */
17 #endif
18 #include "dbinc/lock.h"
19 #include "dbinc/mp.h"
20 #include "dbinc/partition.h"
21 #include "dbinc/txn.h"
22
23 static int __db_associate_arg __P((DB *, DB *,
24 int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t));
25 static int __dbc_del_arg __P((DBC *, u_int32_t));
26 static int __dbc_pget_arg __P((DBC *, DBT *, u_int32_t));
27 static int __dbc_put_arg __P((DBC *, DBT *, DBT *, u_int32_t));
28 static int __db_curinval __P((const ENV *));
29 static int __db_cursor_arg __P((DB *, u_int32_t));
30 static int __db_del_arg __P((DB *, DBT *, u_int32_t, int));
31 static int __db_get_arg __P((const DB *, DBT *, DBT *, u_int32_t));
32 static int __db_join_arg __P((DB *, DBC **, u_int32_t));
33 static int __db_pget_arg __P((DB *, DBT *, u_int32_t));
34 static int __db_put_arg __P((DB *, DBT *, DBT *, u_int32_t, int));
35 static int __dbt_ferr __P((const DB *, const char *, const DBT *, int));
36 static int __db_compact_func
37 __P((DBC *, DBC *, u_int32_t *, db_pgno_t, u_int32_t, void *));
38 static int __db_associate_foreign_arg __P((DB *, DB *,
39 int (*)(DB *, const DBT *, DBT *, const DBT *, int *),
40 u_int32_t));
41
42 /*
43 * These functions implement the Berkeley DB API. They are organized in a
44 * layered fashion. The interface functions (XXX_pp) perform all generic
45 * error checks (for example, PANIC'd region, replication state change
46 * in progress, inconsistent transaction usage), call function-specific
47 * check routines (_arg) to check for proper flag usage, etc., do pre-amble
48 * processing (incrementing handle counts, handling local transactions),
49 * call the function and then do post-amble processing (local transactions,
50 * decrement handle counts).
51 *
52 * The basic structure is:
53 * Check for simple/generic errors (PANIC'd region)
54 * Check if replication is changing state (increment handle count).
55 * Call function-specific argument checking routine
56 * Create internal transaction if necessary
57 * Call underlying worker function
58 * Commit/abort internal transaction if necessary
59 * Decrement handle count
60 */
61
62 /*
63 * __db_associate_pp --
64 * DB->associate pre/post processing.
65 *
66 * PUBLIC: int __db_associate_pp __P((DB *, DB_TXN *, DB *,
67 * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t));
68 */
69 int
__db_associate_pp(dbp,txn,sdbp,callback,flags)70 __db_associate_pp(dbp, txn, sdbp, callback, flags)
71 DB *dbp, *sdbp;
72 DB_TXN *txn;
73 int (*callback) __P((DB *, const DBT *, const DBT *, DBT *));
74 u_int32_t flags;
75 {
76 DBC *sdbc;
77 DB_THREAD_INFO *ip;
78 ENV *env;
79 int handle_check, ret, t_ret, txn_local;
80
81 env = dbp->env;
82 txn_local = 0;
83
84 STRIP_AUTO_COMMIT(flags);
85
86 ENV_ENTER(env, ip);
87 XA_CHECK_TXN(ip, txn);
88
89 /* Check for replication block. */
90 handle_check = IS_ENV_REPLICATED(env);
91 if (handle_check &&
92 (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) {
93 handle_check = 0;
94 goto err;
95 }
96
97 /*
98 * Secondary cursors may have the primary's lock file ID, so we need
99 * to make sure that no older cursors are lying around when we make
100 * the transition.
101 */
102 if (TAILQ_FIRST(&sdbp->active_queue) != NULL ||
103 TAILQ_FIRST(&sdbp->join_queue) != NULL) {
104 ret = USR_ERR(env, EINVAL);
105 __db_errx(env, DB_STR("0572",
106 "Databases may not become secondary indices while cursors are open"));
107 goto err;
108 }
109
110 if ((ret = __db_associate_arg(dbp, sdbp, callback, flags)) != 0)
111 goto err;
112
113 /*
114 * Create a local transaction as necessary, check for consistent
115 * transaction usage, and, if we have no transaction but do have
116 * locking on, acquire a locker id for the handle lock acquisition.
117 */
118 if (IS_DB_AUTO_COMMIT(dbp, txn)) {
119 if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
120 goto err;
121 txn_local = 1;
122 }
123
124 /* Check for consistent transaction usage. */
125 if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
126 goto err;
127
128 while ((sdbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL)
129 if ((ret = __dbc_destroy(sdbc)) != 0)
130 goto err;
131
132 #ifdef HAVE_SLICES
133 if (FLD_ISSET(dbp->open_flags, DB_SLICED) !=
134 FLD_ISSET(sdbp->open_flags, DB_SLICED)) {
135 ret = USR_ERR(dbp->env, EINVAL);
136 __db_errx(dbp->env,
137 "DB->associate() does not support mixing sliced and non-sliced databases");
138 } else if (FLD_ISSET(dbp->open_flags, DB_SLICED))
139 ret = __db_slice_associate(dbp, txn, sdbp, callback, flags);
140 else
141 #endif
142 /*lint -e{539} Did not expect positive indentation. */
143 ret = __db_associate(dbp, ip, txn, sdbp, callback, flags);
144
145 err: if (txn_local &&
146 (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
147 ret = t_ret;
148
149 /* Release replication block. */
150 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
151 ret = t_ret;
152 ENV_LEAVE(env, ip);
153 return (ret);
154 }
155
156 /*
157 * __db_associate_arg --
158 * Check DB->associate arguments.
159 */
160 static int
__db_associate_arg(dbp,sdbp,callback,flags)161 __db_associate_arg(dbp, sdbp, callback, flags)
162 DB *dbp, *sdbp;
163 int (*callback) __P((DB *, const DBT *, const DBT *, DBT *));
164 u_int32_t flags;
165 {
166 ENV *env;
167 int ret;
168
169 env = dbp->env;
170
171 if (dbp->blob_threshold || sdbp->blob_threshold) {
172 __db_errx(env, DB_STR("0751",
173 "Secondary and primary databases cannot support external files."));
174 return (EINVAL);
175 }
176
177 if (sdbp->type == DB_HEAP) {
178 __db_errx(env, DB_STR("0752",
179 "Heap databases may not be used as secondary databases"));
180 return (EINVAL);
181 }
182
183 if (F_ISSET(sdbp, DB_AM_SECONDARY)) {
184 __db_errx(env, DB_STR("0573",
185 "Secondary index handles may not be re-associated"));
186 return (EINVAL);
187 }
188 if (F_ISSET(dbp, DB_AM_SECONDARY)) {
189 __db_errx(env, DB_STR("0574",
190 "Secondary indices may not be used as primary databases"));
191 return (EINVAL);
192 }
193 if (F_ISSET(dbp, DB_AM_DUP)) {
194 __db_errx(env, DB_STR("0575",
195 "Primary databases may not be configured with duplicates"));
196 return (EINVAL);
197 }
198 if (F_ISSET(dbp, DB_AM_RENUMBER)) {
199 __db_errx(env, DB_STR("0576",
200 "Renumbering recno databases may not be used as primary databases"));
201 return (EINVAL);
202 }
203
204 /*
205 * It's OK for the primary and secondary to not share an environment IFF
206 * the environments are local to the DB handle. (Specifically, cursor
207 * adjustment will work correctly in this case.) The environment being
208 * local implies the environment is not configured for either locking or
209 * transactions, as neither of those could work correctly.
210 */
211 if (dbp->env != sdbp->env &&
212 (!F_ISSET(dbp->env, ENV_DBLOCAL) ||
213 !F_ISSET(sdbp->env, ENV_DBLOCAL))) {
214 __db_errx(env, DB_STR("0577",
215 "The primary and secondary must be opened in the same environment"));
216 return (EINVAL);
217 }
218 if ((DB_IS_THREADED(dbp) && !DB_IS_THREADED(sdbp)) ||
219 (!DB_IS_THREADED(dbp) && DB_IS_THREADED(sdbp))) {
220 __db_errx(env, DB_STR("0578",
221 "The DB_THREAD setting must be the same for primary and secondary"));
222 return (EINVAL);
223 }
224 if (callback == NULL &&
225 (!F_ISSET(dbp, DB_AM_RDONLY) || !F_ISSET(sdbp, DB_AM_RDONLY))) {
226 __db_errx(env, DB_STR("0579",
227 "Callback function may be NULL only when database handles are read-only"));
228 return (EINVAL);
229 }
230
231 if ((ret = __db_fchk(env, "DB->associate", flags, DB_CREATE |
232 DB_IMMUTABLE_KEY)) != 0)
233 return (ret);
234
235 return (0);
236 }
237
238 /*
239 * __db_close_pp --
240 * DB->close pre/post processing.
241 *
242 * PUBLIC: int __db_close_pp __P((DB *, u_int32_t));
243 */
244 int
__db_close_pp(dbp,flags)245 __db_close_pp(dbp, flags)
246 DB *dbp;
247 u_int32_t flags;
248 {
249 DB_THREAD_INFO *ip;
250 ENV *env;
251 int handle_check, ret, t_ret;
252
253 env = dbp->env;
254 ret = 0;
255
256 /*
257 * Close a DB handle -- as a handle destructor, we can't fail.
258 *
259 * !!!
260 * The actual argument checking is simple, do it inline, outside of
261 * the replication block.
262 */
263 if (flags != 0 && flags != DB_NOSYNC)
264 ret = __db_ferr(env, "DB->close", 0);
265
266 ENV_ENTER(env, ip);
267
268 /* Check for replication block. */
269 handle_check = IS_ENV_REPLICATED(env);
270 if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) {
271 handle_check = 0;
272 if (ret == 0)
273 ret = t_ret;
274 }
275
276 if ((t_ret = __db_close(dbp, NULL, flags)) != 0 && ret == 0)
277 ret = t_ret;
278
279 /* Release replication block. */
280 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
281 ret = t_ret;
282
283 ENV_LEAVE(env, ip);
284 return (ret);
285 }
286
287 /*
288 * __db_cursor_pp --
289 * DB->cursor pre/post processing.
290 *
291 * PUBLIC: int __db_cursor_pp __P((DB *, DB_TXN *, DBC **, u_int32_t));
292 */
293 int
__db_cursor_pp(dbp,txn,dbcp,flags)294 __db_cursor_pp(dbp, txn, dbcp, flags)
295 DB *dbp;
296 DB_TXN *txn;
297 DBC **dbcp;
298 u_int32_t flags;
299 {
300 DB_THREAD_INFO *ip;
301 DBC *dbc;
302 ENV *env;
303 REGENV *renv;
304 int rep_blocked, ret;
305
306 env = dbp->env;
307 (*dbcp) = NULL;
308
309 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor");
310
311 ENV_ENTER(env, ip);
312 XA_CHECK_TXN(ip, txn);
313
314 /* Check for replication block. */
315 rep_blocked = 0;
316 if (IS_ENV_REPLICATED(env)) {
317 if (!IS_REAL_TXN(txn)) {
318 if ((ret = __op_rep_enter(env, 0, 1)) != 0)
319 goto err;
320 rep_blocked = 1;
321 }
322 renv = env->reginfo->primary;
323 if (dbp->timestamp != renv->rep_timestamp) {
324 __db_errx(env, DB_STR("0580",
325 "replication recovery unrolled committed transactions;"
326 "open DB and DBcursor handles must be closed"));
327 ret = DB_REP_HANDLE_DEAD;
328 goto err;
329 }
330 }
331 if ((ret = __db_cursor_arg(dbp, flags)) != 0)
332 goto err;
333
334 /*
335 * Check for consistent transaction usage. For now, assume this
336 * cursor might be used for read operations only (in which case
337 * it may not require a txn). We'll check more stringently in
338 * c_del and c_put. (Note this means the read-op txn tests have
339 * to be a subset of the write-op ones.)
340 */
341 if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0)
342 goto err;
343
344 #ifdef HAVE_SLICES
345 if (FLD_ISSET(dbp->open_flags, DB_SLICED))
346 LF_SET(DB_SLICED);
347 #endif
348 ret = __db_cursor(dbp, ip, txn, dbcp, flags);
349
350 /*
351 * Register externally created cursors into the valid transaction.
352 * If a family transaction was passed in, the transaction handle in
353 * the cursor may not match.
354 */
355 if ((dbc = *dbcp) != NULL) {
356 txn = dbc->txn;
357 dbc->open_flags = flags;
358 }
359 if (txn != NULL && ret == 0)
360 TAILQ_INSERT_HEAD(&(txn->my_cursors), dbc, txn_cursors);
361
362 err: /* Release replication block on error. */
363 if (ret != 0 && rep_blocked)
364 (void)__op_rep_exit(env);
365
366 ENV_LEAVE(env, ip);
367 return (ret);
368 }
369
370 /*
371 * __db_cursor --
372 * DB->cursor.
373 *
374 * PUBLIC: int __db_cursor __P((DB *,
375 * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBC **, u_int32_t));
376 */
377 int
__db_cursor(dbp,ip,txn,dbcp,flags)378 __db_cursor(dbp, ip, txn, dbcp, flags)
379 DB *dbp;
380 DB_THREAD_INFO *ip;
381 DB_TXN *txn;
382 DBC **dbcp;
383 u_int32_t flags;
384 {
385 DBC *dbc;
386 ENV *env;
387 db_lockmode_t mode;
388 int ret;
389
390 env = dbp->env;
391
392 if (MULTIVERSION(dbp) && txn == NULL && (LF_ISSET(DB_TXN_SNAPSHOT) ||
393 F_ISSET(env->dbenv, DB_ENV_TXN_SNAPSHOT))) {
394 if ((ret =
395 __txn_begin(env, ip, NULL, &txn, DB_TXN_SNAPSHOT)) != 0)
396 return (ret);
397 F_SET(txn, TXN_PRIVATE);
398 }
399
400 PERFMON5(env, db, cursor, dbp->fname,
401 dbp->dname, txn == NULL ? 0 : txn->txnid, flags, &dbp->fileid[0]);
402
403 if ((ret = __db_cursor_int(dbp, ip, txn, dbp->type, PGNO_INVALID,
404 LF_ISSET(DB_CURSOR_BULK | DB_CURSOR_TRANSIENT | DB_RECOVER),
405 NULL, &dbc)) != 0)
406 return (ret);
407
408 /*
409 * If this is CDB, do all the locking in the interface, which is
410 * right here.
411 */
412 if (CDB_LOCKING(env)) {
413 mode = (LF_ISSET(DB_WRITELOCK)) ? DB_LOCK_WRITE :
414 ((LF_ISSET(DB_WRITECURSOR) || txn != NULL) ?
415 DB_LOCK_IWRITE : DB_LOCK_READ);
416 if ((ret = __lock_get(env, dbc->locker, 0,
417 &dbc->lock_dbt, mode, &dbc->mylock)) != 0)
418 goto err;
419 if (LF_ISSET(DB_WRITECURSOR))
420 F_SET(dbc, DBC_WRITECURSOR);
421 if (LF_ISSET(DB_WRITELOCK))
422 F_SET(dbc, DBC_WRITER);
423 }
424
425 if (LF_ISSET(DB_READ_UNCOMMITTED) ||
426 (txn != NULL && F_ISSET(txn, TXN_READ_UNCOMMITTED)))
427 F_SET(dbc, DBC_READ_UNCOMMITTED);
428
429 if (LF_ISSET(DB_READ_COMMITTED) ||
430 (txn != NULL && F_ISSET(txn, TXN_READ_COMMITTED)))
431 F_SET(dbc, DBC_READ_COMMITTED);
432
433 #ifdef HAVE_SLICES
434 if (LF_ISSET(DB_SLICED))
435 ret = __dbc_slice_init(dbc);
436 #endif
437
438 *dbcp = dbc;
439 return (0);
440
441 err: (void)__dbc_close(dbc);
442 return (ret);
443 }
444
445 /*
446 * __db_cursor_arg --
447 * Check DB->cursor arguments.
448 */
449 static int
__db_cursor_arg(dbp,flags)450 __db_cursor_arg(dbp, flags)
451 DB *dbp;
452 u_int32_t flags;
453 {
454 ENV *env;
455
456 env = dbp->env;
457
458 /*
459 * DB_READ_COMMITTED and DB_READ_UNCOMMITTED require locking.
460 */
461 if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED)) {
462 if (!LOCKING_ON(env))
463 return (__db_fnl(env, "DB->cursor"));
464 }
465
466 if (dbp->blob_threshold &&
467 LF_ISSET(DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT)) {
468 __db_errx(dbp->env, DB_STR("0753",
469 "External file enabled databases do not support READ_UNCOMMITTED and TXN_SNAPSHOT."));
470 return (EINVAL);
471 }
472
473 LF_CLR(DB_CURSOR_BULK |
474 DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT);
475
476 /* Check for invalid function flags. */
477 if (LF_ISSET(DB_WRITECURSOR)) {
478 if (DB_IS_READONLY(dbp))
479 return (__db_rdonly(env, "DB->cursor"));
480 if (!CDB_LOCKING(env))
481 return (__db_ferr(env, "DB->cursor", 0));
482 LF_CLR(DB_WRITECURSOR);
483 } else if (LF_ISSET(DB_WRITELOCK)) {
484 if (DB_IS_READONLY(dbp))
485 return (__db_rdonly(env, "DB->cursor"));
486 LF_CLR(DB_WRITELOCK);
487 }
488
489 if (flags != 0)
490 return (__db_ferr(env, "DB->cursor", 0));
491
492 return (0);
493 }
494
495 /*
496 * __db_del_pp --
497 * DB->del pre/post processing.
498 *
499 * PUBLIC: int __db_del_pp __P((DB *, DB_TXN *, DBT *, u_int32_t));
500 */
501 int
__db_del_pp(dbp,txn,key,flags)502 __db_del_pp(dbp, txn, key, flags)
503 DB *dbp;
504 DB_TXN *txn;
505 DBT *key;
506 u_int32_t flags;
507 {
508 DB_THREAD_INFO *ip;
509 ENV *env;
510 int forward_op, handle_check, ret, t_ret, txn_local;
511
512 env = dbp->env;
513 txn_local = 0;
514 forward_op = 0;
515 #ifdef HAVE_REPLICATION_THREADS
516 forward_op = IS_REP_CLIENT(env) &&
517 IS_USING_WRITE_FORWARDING(env) && txn == NULL;
518 #endif
519
520 STRIP_AUTO_COMMIT(flags);
521 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del");
522
523 #ifdef CONFIG_TEST
524 if (IS_REP_MASTER(env))
525 DB_TEST_WAIT(env, env->test_check);
526 #endif
527 ENV_ENTER(env, ip);
528 XA_CHECK_TXN(ip, txn);
529
530 /* Check for replication block. */
531 handle_check = IS_ENV_REPLICATED(env);
532 if (handle_check &&
533 (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) {
534 handle_check = 0;
535 goto err;
536 }
537
538 if ((ret = __db_del_arg(dbp, key, flags, forward_op)) != 0)
539 goto err;
540
541 /* Forward singleton del operation to replication master if needed. */
542 #ifdef HAVE_REPLICATION_THREADS
543 if (forward_op) {
544 ret = __repmgr_forward_single_write(
545 REPMGR_WF_SINGLE_DEL, dbp, key, NULL, flags);
546 /* Always skip regular del processing for forwarded del. */
547 goto rep_exit;
548 }
549 #endif
550
551 /* Create local transaction as necessary. */
552 if (IS_DB_AUTO_COMMIT(dbp, txn)) {
553 if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
554 goto err;
555 txn_local = 1;
556 }
557
558 /* Check for consistent transaction usage. */
559 if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
560 goto err;
561
562 ret = __db_del(dbp, ip, txn, key, flags);
563
564 err: if (txn_local &&
565 (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
566 ret = t_ret;
567
568 /* Release replication block. */
569 #ifdef HAVE_REPLICATION_THREADS
570 rep_exit:
571 #endif
572 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
573 ret = t_ret;
574 ENV_LEAVE(env, ip);
575 __dbt_userfree(env, key, NULL, NULL);
576 return (ret);
577 }
578
579 /*
580 * __db_del_arg --
581 * Check DB->delete arguments.
582 */
583 static int
__db_del_arg(dbp,key,flags,forward_op)584 __db_del_arg(dbp, key, flags, forward_op)
585 DB *dbp;
586 DBT *key;
587 u_int32_t flags;
588 int forward_op;
589 {
590 ENV *env;
591 int ret;
592
593 env = dbp->env;
594
595 /*
596 * Check for changes to a read-only tree unless this is a
597 * replication client write operation to be forwarded.
598 */
599 if (!forward_op && DB_IS_READONLY(dbp))
600 return (__db_rdonly(env, "DB->del"));
601
602 /* Check for invalid function flags. */
603 switch (flags) {
604 case DB_CONSUME:
605 if (dbp->type != DB_QUEUE)
606 return (__db_ferr(env, "DB->del", 0));
607 goto copy;
608 case DB_MULTIPLE:
609 case DB_MULTIPLE_KEY:
610 if (!F_ISSET(key, DB_DBT_BULK)) {
611 __db_errx(env, DB_STR("0581",
612 "DB->del with DB_MULTIPLE(_KEY) requires multiple key records"));
613 return (EINVAL);
614 }
615 /* FALL THROUGH */
616 case 0:
617 copy: if ((ret = __dbt_usercopy(env, key)) != 0)
618 return (ret);
619 break;
620 default:
621 return (__db_ferr(env, "DB->del", 0));
622 }
623
624 return (0);
625 }
626
627 /*
628 * __db_exists --
629 * DB->exists implementation.
630 *
631 * PUBLIC: int __db_exists __P((DB *, DB_TXN *, DBT *, u_int32_t));
632 */
633 int
__db_exists(dbp,txn,key,flags)634 __db_exists(dbp, txn, key, flags)
635 DB *dbp;
636 DB_TXN *txn;
637 DBT *key;
638 u_int32_t flags;
639 {
640 DBT data;
641 int ret;
642
643 /*
644 * Most flag checking is done in the DB->get call, we only check for
645 * specific incompatibilities here. This saves making __get_arg
646 * aware of the exist method's API constraints.
647 */
648 STRIP_AUTO_COMMIT(flags);
649
650 if ((ret = __db_fchk(dbp->env, "DB->exists", flags,
651 DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) != 0)
652 return (ret);
653
654 /*
655 * Configure a data DBT that returns no bytes so there's no copy
656 * of the data.
657 */
658 memset(&data, 0, sizeof(data));
659 data.dlen = 0;
660 data.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM;
661
662 return (dbp->get(dbp, txn, key, &data, flags));
663 }
664
665 /*
666 * db_fd_pp --
667 * DB->fd pre/post processing.
668 *
669 * PUBLIC: int __db_fd_pp __P((DB *, int *));
670 */
671 int
__db_fd_pp(dbp,fdp)672 __db_fd_pp(dbp, fdp)
673 DB *dbp;
674 int *fdp;
675 {
676 DB_FH *fhp;
677 DB_THREAD_INFO *ip;
678 ENV *env;
679 int handle_check, ret, t_ret;
680
681 env = dbp->env;
682
683 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd");
684
685 ENV_ENTER(env, ip);
686
687 /* Check for replication block. */
688 handle_check = IS_ENV_REPLICATED(env);
689 if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0)
690 goto err;
691
692 /*
693 * !!!
694 * There's no argument checking to be done.
695 *
696 * !!!
697 * The actual method call is simple, do it inline.
698 *
699 * This is a truly spectacular layering violation.
700 */
701 if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) == 0) {
702 if (fhp == NULL) {
703 *fdp = -1;
704 ret = USR_ERR(env, ENOENT);
705 __db_errx(env, DB_STR("0582",
706 "Database does not have a valid file handle"));
707 } else
708 *fdp = fhp->fd;
709 }
710
711 /* Release replication block. */
712 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
713 ret = t_ret;
714
715 err: ENV_LEAVE(env, ip);
716 return (ret);
717 }
718
719 /*
720 * __db_get_pp --
721 * DB->get pre/post processing.
722 *
723 * PUBLIC: int __db_get_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
724 */
725 int
__db_get_pp(dbp,txn,key,data,flags)726 __db_get_pp(dbp, txn, key, data, flags)
727 DB *dbp;
728 DB_TXN *txn;
729 DBT *key, *data;
730 u_int32_t flags;
731 {
732 DB_THREAD_INFO *ip;
733 ENV *env;
734 u_int32_t mode;
735 int handle_check, ignore_lease, ret, t_ret, txn_local;
736
737 env = dbp->env;
738 mode = 0;
739 txn_local = 0;
740
741 STRIP_AUTO_COMMIT(flags);
742 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get");
743
744 ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
745 LF_CLR(DB_IGNORE_LEASE);
746
747 if ((ret = __db_get_arg(dbp, key, data, flags)) != 0) {
748 __dbt_userfree(env, key, NULL, data);
749 return (ret);
750 }
751
752 ENV_ENTER(env, ip);
753 XA_CHECK_TXN(ip, txn);
754
755 /* Check for replication block. */
756 handle_check = IS_ENV_REPLICATED(env);
757 if (handle_check &&
758 (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) {
759 handle_check = 0;
760 goto err;
761 }
762
763 if (LF_ISSET(DB_READ_UNCOMMITTED))
764 mode = DB_READ_UNCOMMITTED;
765 else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME ||
766 (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT) {
767 mode = DB_WRITELOCK;
768 if (IS_DB_AUTO_COMMIT(dbp, txn)) {
769 if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
770 goto err;
771 txn_local = 1;
772 }
773 }
774
775 /* Check for consistent transaction usage. */
776 if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID,
777 mode == DB_WRITELOCK || LF_ISSET(DB_RMW) ? 0 : 1)) != 0)
778 goto err;
779
780 ret = __db_get(dbp, ip, txn, key, data, flags);
781 /*
782 * Check for master leases.
783 */
784 if (ret == 0 &&
785 IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
786 ret = __rep_lease_check(env, 1);
787
788 err: if (txn_local &&
789 (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
790 ret = t_ret;
791
792 /* Release replication block. */
793 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
794 ret = t_ret;
795
796 DB_TEST_CRASH(env->test_abort, DB_TEST_NO_MUTEX);
797 ENV_LEAVE(env, ip);
798 __dbt_userfree(env, key, NULL, data);
799 return (ret);
800 }
801
802 /*
803 * __db_get --
804 * DB->get.
805 *
806 * PUBLIC: int __db_get __P((DB *,
807 * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t));
808 */
809 int
__db_get(dbp,ip,txn,key,data,flags)810 __db_get(dbp, ip, txn, key, data, flags)
811 DB *dbp;
812 DB_THREAD_INFO *ip;
813 DB_TXN *txn;
814 DBT *key, *data;
815 u_int32_t flags;
816 {
817 DBC *dbc;
818 u_int32_t mode;
819 int ret, t_ret;
820
821 /*
822 * The DB_CURSOR_TRANSIENT flag indicates that we're just doing a single
823 * operation with this cursor, and that in case of error we don't need
824 * to restore it to its old position. Thus, we can perform the get
825 * without duplicating the cursor, saving some cycles in this common
826 * case.
827 */
828 mode = DB_CURSOR_TRANSIENT;
829 if (LF_ISSET(DB_READ_UNCOMMITTED)) {
830 mode |= DB_READ_UNCOMMITTED;
831 LF_CLR(DB_READ_UNCOMMITTED);
832 } else if (LF_ISSET(DB_READ_COMMITTED)) {
833 mode |= DB_READ_COMMITTED;
834 LF_CLR(DB_READ_COMMITTED);
835 } else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME ||
836 (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT)
837 mode |= DB_WRITELOCK;
838
839 if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0)
840 return (ret);
841
842 DEBUG_LREAD(dbc, txn, "DB->get", key, NULL, flags);
843
844 /*
845 * The semantics of bulk gets are different for DB->get vs DBC->get.
846 * Mark the cursor so the low-level bulk get routines know which
847 * behavior we want.
848 */
849 F_SET(dbc, DBC_FROM_DB_GET);
850
851 /*
852 * SET_RET_MEM indicates that if key and/or data have no DBT
853 * flags set and DB manages the returned-data memory, that memory
854 * will belong to this handle, not to the underlying cursor.
855 */
856 SET_RET_MEM(dbc, dbp);
857
858 if (LF_ISSET(~(DB_RMW | DB_MULTIPLE)) == 0)
859 LF_SET(DB_SET);
860
861 #ifdef HAVE_PARTITION
862 if (F_ISSET(dbc, DBC_PARTITIONED))
863 ret = __partc_get(dbc, key, data, flags);
864 else
865 #endif
866 ret = __dbc_get(dbc, key, data, flags);
867
868 if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
869 ret = t_ret;
870
871 return (ret);
872 }
873
874 /*
875 * __db_get_arg --
876 * DB->get argument checking, used by both DB->get and DB->pget.
877 */
878 static int
__db_get_arg(dbp,key,data,flags)879 __db_get_arg(dbp, key, data, flags)
880 const DB *dbp;
881 DBT *key, *data;
882 u_int32_t flags;
883 {
884 ENV *env;
885 int dirty, multi, ret;
886
887 env = dbp->env;
888
889 if (dbp->blob_threshold && LF_ISSET(DB_READ_UNCOMMITTED)) {
890 __db_errx(env, DB_STR("0754",
891 "External file enabled databases do not support DB_READ_UNCOMMITTED."));
892 return (EINVAL);
893 }
894
895 /*
896 * Check for read-modify-write validity. DB_RMW doesn't make sense
897 * with CDB cursors since if you're going to write the cursor, you
898 * had to create it with DB_WRITECURSOR. Regardless, we check for
899 * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it.
900 * If this changes, confirm that DB does not itself set the DB_RMW
901 * flag in a path where CDB may have been configured.
902 */
903 dirty = 0;
904 if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) {
905 if (!LOCKING_ON(env))
906 return (__db_fnl(env, "DB->get"));
907 if ((ret = __db_fcchk(env, "DB->get",
908 flags, DB_READ_UNCOMMITTED, DB_READ_COMMITTED)) != 0)
909 return (ret);
910 if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED))
911 dirty = 1;
912 LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
913 }
914
915 multi = 0;
916 if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
917 if (LF_ISSET(DB_MULTIPLE_KEY))
918 goto multi_err;
919 multi = LF_ISSET(DB_MULTIPLE) ? 1 : 0;
920 LF_CLR(DB_MULTIPLE);
921 }
922
923 /* Check for invalid function flags. */
924 switch (flags) {
925 case DB_GET_BOTH:
926 if ((ret = __dbt_usercopy(env, data)) != 0)
927 return (ret);
928 /* FALLTHROUGH */
929 case 0:
930 if ((ret = __dbt_usercopy(env, key)) != 0) {
931 __dbt_userfree(env, key, NULL, data);
932 return (ret);
933 }
934 break;
935 case DB_SET_RECNO:
936 if (!F_ISSET(dbp, DB_AM_RECNUM))
937 goto err;
938 if ((ret = __dbt_usercopy(env, key)) != 0)
939 return (ret);
940 break;
941 case DB_CONSUME:
942 case DB_CONSUME_WAIT:
943 if (DB_IS_READONLY(dbp))
944 return (__db_rdonly(env,
945 "DB->get CONSUME/CONSUME_WAIT"));
946 if (dirty) {
947 __db_errx(env, DB_STR_A("0583",
948 "%s is not supported with DB_CONSUME or DB_CONSUME_WAIT",
949 "%s"), LF_ISSET(DB_READ_UNCOMMITTED) ?
950 "DB_READ_UNCOMMITTED" : "DB_READ_COMMITTED");
951 return (EINVAL);
952 }
953 if (multi)
954 multi_err: return (__db_ferr(env, "DB->get", 1));
955 if (dbp->type == DB_QUEUE)
956 break;
957 /* FALLTHROUGH */
958 default:
959 err: return (__db_ferr(env, "DB->get", 0));
960 }
961
962 /*
963 * Check for invalid key/data flags.
964 */
965 if ((ret =
966 __dbt_ferr(dbp, "key", key, DB_RETURNS_A_KEY(dbp, flags))) != 0)
967 return (ret);
968
969 if (F_ISSET(data, DB_DBT_READONLY)) {
970 __db_errx(env, DB_STR("0584",
971 "DB_DBT_READONLY should not be set on data DBT."));
972 return (EINVAL);
973 }
974 if ((ret = __dbt_ferr(dbp, "data", data, 1)) != 0)
975 return (ret);
976
977 if (multi) {
978 if (!F_ISSET(data, DB_DBT_USERMEM)) {
979 __db_errx(env, DB_STR("0585",
980 "DB_MULTIPLE requires DB_DBT_USERMEM be set"));
981 return (EINVAL);
982 }
983 if (F_ISSET(key, DB_DBT_PARTIAL) ||
984 F_ISSET(data, DB_DBT_PARTIAL)) {
985 __db_errx(env, DB_STR("0586",
986 "DB_MULTIPLE does not support DB_DBT_PARTIAL"));
987 return (EINVAL);
988 }
989 if (data->ulen < 1024 ||
990 data->ulen < dbp->pgsize || data->ulen % 1024 != 0) {
991 __db_errx(env, DB_STR("0587",
992 "DB_MULTIPLE buffers must be aligned, "
993 "at least page size and multiples of 1KB"));
994 return (EINVAL);
995 }
996 }
997
998 /* Check invalid partial key. */
999 if (F_ISSET(key, DB_DBT_PARTIAL) && !(LF_ISSET(DB_CONSUME) &&
1000 LF_ISSET(DB_CONSUME_WAIT) && LF_ISSET(DB_SET_RECNO))) {
1001 __db_errx(env, DB_STR("0708",
1002 "Invalid positioning flag combined with DB_DBT_PARTIAL"));
1003 return (EINVAL);
1004 }
1005
1006 return (0);
1007 }
1008
1009 /*
1010 * __db_join_pp --
1011 * DB->join pre/post processing.
1012 *
1013 * PUBLIC: int __db_join_pp __P((DB *, DBC **, DBC **, u_int32_t));
1014 */
1015 int
__db_join_pp(primary,curslist,dbcp,flags)1016 __db_join_pp(primary, curslist, dbcp, flags)
1017 DB *primary;
1018 DBC **curslist, **dbcp;
1019 u_int32_t flags;
1020 {
1021 DB_THREAD_INFO *ip;
1022 ENV *env;
1023 int handle_check, ret, t_ret;
1024
1025 env = primary->env;
1026
1027 ENV_ENTER(env, ip);
1028
1029 /* Check for replication block. */
1030 handle_check = IS_ENV_REPLICATED(env);
1031 if (handle_check && (ret = __db_rep_enter(
1032 primary, 1, 0, IS_REAL_TXN(curslist[0]->txn))) != 0) {
1033 handle_check = 0;
1034 goto err;
1035 }
1036
1037 if ((ret = __db_join_arg(primary, curslist, flags)) == 0)
1038 ret = __db_join(primary, curslist, dbcp, flags);
1039
1040 /* Release replication block. */
1041 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1042 ret = t_ret;
1043
1044 err: ENV_LEAVE(env, ip);
1045 return (ret);
1046 }
1047
1048 /*
1049 * __db_join_arg --
1050 * Check DB->join arguments.
1051 */
1052 static int
__db_join_arg(primary,curslist,flags)1053 __db_join_arg(primary, curslist, flags)
1054 DB *primary;
1055 DBC **curslist;
1056 u_int32_t flags;
1057 {
1058 DB_TXN *txn;
1059 ENV *env;
1060 int i;
1061
1062 env = primary->env;
1063
1064 switch (flags) {
1065 case 0:
1066 case DB_JOIN_NOSORT:
1067 break;
1068 default:
1069 return (__db_ferr(env, "DB->join", 0));
1070 }
1071
1072 if (curslist == NULL || curslist[0] == NULL) {
1073 __db_errx(env, DB_STR("0588",
1074 "At least one secondary cursor must be specified to DB->join"));
1075 return (EINVAL);
1076 }
1077
1078 txn = curslist[0]->txn;
1079 for (i = 1; curslist[i] != NULL; i++)
1080 if (curslist[i]->txn != txn) {
1081 __db_errx(env, DB_STR("0589",
1082 "All secondary cursors must share the same transaction"));
1083 return (EINVAL);
1084 }
1085
1086 return (0);
1087 }
1088
1089 /*
1090 * __db_key_range_pp --
1091 * DB->key_range pre/post processing.
1092 *
1093 * PUBLIC: int __db_key_range_pp
1094 * PUBLIC: __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t));
1095 */
1096 int
__db_key_range_pp(dbp,txn,key,kr,flags)1097 __db_key_range_pp(dbp, txn, key, kr, flags)
1098 DB *dbp;
1099 DB_TXN *txn;
1100 DBT *key;
1101 DB_KEY_RANGE *kr;
1102 u_int32_t flags;
1103 {
1104 DBC *dbc;
1105 DB_THREAD_INFO *ip;
1106 ENV *env;
1107 int handle_check, ret, t_ret;
1108
1109 env = dbp->env;
1110
1111 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->key_range");
1112
1113 /*
1114 * !!!
1115 * The actual argument checking is simple, do it inline, outside of
1116 * the replication block.
1117 */
1118 if (flags != 0)
1119 return (__db_ferr(env, "DB->key_range", 0));
1120
1121 ENV_ENTER(env, ip);
1122 XA_CHECK_TXN(ip, txn);
1123
1124 /* Check for replication block. */
1125 handle_check = IS_ENV_REPLICATED(env);
1126 if (handle_check &&
1127 (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) {
1128 handle_check = 0;
1129 goto err;
1130 }
1131
1132 /* Check for consistent transaction usage. */
1133 if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0)
1134 goto err;
1135
1136 /*
1137 * !!!
1138 * The actual method call is simple, do it inline.
1139 */
1140 switch (dbp->type) {
1141 case DB_BTREE:
1142 if ((ret = __dbt_usercopy(env, key)) != 0)
1143 goto err;
1144
1145 /* Acquire a cursor. */
1146 if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) {
1147 __dbt_userfree(env, key, NULL, NULL);
1148 break;
1149 }
1150
1151 DEBUG_LWRITE(dbc, NULL, "bam_key_range", NULL, NULL, 0);
1152 #ifdef HAVE_PARTITION
1153 if (DB_IS_PARTITIONED(dbp))
1154 ret = __part_key_range(dbc, key, kr, flags);
1155 else
1156 #endif
1157 ret = __bam_key_range(dbc, key, kr, flags);
1158
1159 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
1160 ret = t_ret;
1161 __dbt_userfree(env, key, NULL, NULL);
1162 break;
1163 case DB_HASH:
1164 case DB_QUEUE:
1165 case DB_RECNO:
1166 ret = __dbh_am_chk(dbp, DB_OK_BTREE);
1167 break;
1168 case DB_UNKNOWN:
1169 default:
1170 ret = __db_unknown_type(env, "DB->key_range", dbp->type);
1171 break;
1172 }
1173
1174 err: /* Release replication block. */
1175 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1176 ret = t_ret;
1177
1178 ENV_LEAVE(env, ip);
1179 return (ret);
1180 }
1181
1182 /*
1183 * __db_open_pp --
1184 * DB->open pre/post processing.
1185 *
1186 * PUBLIC: int __db_open_pp __P((DB *, DB_TXN *,
1187 * PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int));
1188 */
1189 int
__db_open_pp(dbp,txn,fname,dname,type,flags,mode)1190 __db_open_pp(dbp, txn, fname, dname, type, flags, mode)
1191 DB *dbp;
1192 DB_TXN *txn;
1193 const char *fname, *dname;
1194 DBTYPE type;
1195 u_int32_t flags;
1196 int mode;
1197 {
1198 DB_THREAD_INFO *ip;
1199 ENV *env;
1200 int handle_check, nosync, remove_me, ret, t_ret, txn_local;
1201
1202 env = dbp->env;
1203 nosync = 1;
1204 handle_check = remove_me = txn_local = 0;
1205
1206 ENV_ENTER(env, ip);
1207
1208 /*
1209 * Save the flags. We do this here because we don't pass all of the
1210 * flags down into the actual DB->open method call, we strip
1211 * DB_AUTO_COMMIT at this layer.
1212 */
1213 dbp->open_flags = flags;
1214
1215 /* Save the current DB handle flags for refresh. */
1216 dbp->orig_flags = dbp->flags;
1217
1218 if (fname == NULL && PREFMAS_IS_SET(env)) {
1219 ret = USR_ERR(env, EINVAL);
1220 __db_errx(env, DB_STR("0783", "In-memory databases are not "
1221 "supported in Replication Manager preferred master mode"));
1222 goto err;
1223 }
1224
1225 /* Check for replication block. */
1226 handle_check = IS_ENV_REPLICATED(env);
1227 if (handle_check &&
1228 (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) {
1229 handle_check = 0;
1230 goto err;
1231 }
1232
1233 /*
1234 * A replication client can't create a database, but it's convenient to
1235 * allow a repmgr application to specify DB_CREATE anyway. Thus for
1236 * such an application the meaning of DB_CREATE becomes "create it if
1237 * I'm a master, and otherwise ignore the flag". A repmgr application
1238 * running as master can't be sure that it won't spontaneously become a
1239 * client, so there's a race condition.
1240 */
1241 if (IS_REP_CLIENT(env) && !F_ISSET(dbp, DB_AM_NOT_DURABLE))
1242 LF_CLR(DB_CREATE);
1243
1244 /*
1245 * Create local transaction as necessary, check for consistent
1246 * transaction usage.
1247 */
1248 if (IS_ENV_AUTO_COMMIT(env, txn, flags)) {
1249 if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0)
1250 goto err;
1251 txn_local = 1;
1252 } else if (txn != NULL && !TXN_ON(env) &&
1253 (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_FAMILY))) {
1254 ret = __db_not_txn_env(env);
1255 goto err;
1256 }
1257 LF_CLR(DB_AUTO_COMMIT);
1258
1259 /*
1260 * We check arguments after possibly creating a local transaction,
1261 * which is unusual -- the reason is some flags are illegal if any
1262 * kind of transaction is in effect.
1263 */
1264 if ((ret = __db_open_arg(dbp, txn, fname, dname, type, flags)) == 0) {
1265 if ((ret = __db_open(dbp, ip, txn, fname, dname, type,
1266 flags, mode, PGNO_BASE_MD)) != 0)
1267 goto txnerr;
1268 #ifdef HAVE_SLICES
1269 if (FLD_ISSET(dbp->open_flags, DB_SLICED) &&
1270 (ret = __db_slice_open(dbp,
1271 ip, txn, fname, type, flags, mode)) != 0)
1272 goto txnerr;
1273 #endif
1274 }
1275
1276 /*
1277 * You can open the database that describes the subdatabases in the
1278 * rest of the file read-only. The content of each key's data is
1279 * unspecified and applications should never be adding new records
1280 * or updating existing records. However, during recovery, we need
1281 * to open these databases R/W so we can redo/undo changes in them.
1282 * Likewise, we need to open master databases read/write during
1283 * rename and remove so we can be sure they're fully sync'ed, so
1284 * we provide an override flag for the purpose.
1285 */
1286 if (dname == NULL && !IS_RECOVERING(env) && !LF_ISSET(DB_RDONLY) &&
1287 !LF_ISSET(DB_RDWRMASTER) && F_ISSET(dbp, DB_AM_SUBDB)) {
1288 ret = USR_ERR(env, EINVAL);
1289 __db_errx(env, DB_STR("0590",
1290 "files containing multiple databases may only be opened read-only"));
1291 goto txnerr;
1292 }
1293
1294 /*
1295 * Success: file creations have to be synchronous, otherwise we don't
1296 * care.
1297 */
1298 if (F_ISSET(dbp, DB_AM_CREATED | DB_AM_CREATED_MSTR))
1299 nosync = 0;
1300
1301 /* Success: don't discard the file on close. */
1302 F_CLR(dbp, DB_AM_DISCARD | DB_AM_CREATED | DB_AM_CREATED_MSTR);
1303
1304 /*
1305 * If not transactional, remove the databases/subdatabases if it is
1306 * persistent. If we're transactional, the child transaction abort
1307 * cleans up.
1308 */
1309 txnerr: if (ret != 0 && !IS_REAL_TXN(txn)) {
1310 remove_me = (F_ISSET(dbp, DB_AM_CREATED) &&
1311 (fname != NULL || dname != NULL)) ? 1 : 0;
1312 if (F_ISSET(dbp, DB_AM_CREATED_MSTR) ||
1313 (dname == NULL && remove_me))
1314 /* Remove file. */
1315 (void)__db_remove_int(dbp,
1316 ip, txn, fname, NULL, DB_FORCE);
1317 else if (remove_me)
1318 /* Remove subdatabase. */
1319 (void)__db_remove_int(dbp,
1320 ip, txn, fname, dname, DB_FORCE);
1321 }
1322
1323 if (txn_local && (t_ret =
1324 __db_txn_auto_resolve(env, txn, nosync, ret)) && ret == 0)
1325 ret = t_ret;
1326
1327 err: /* Release replication block. */
1328 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1329 ret = t_ret;
1330
1331 ENV_LEAVE(env, ip);
1332 return (ret);
1333 }
1334
1335 /*
1336 * __db_open_arg --
1337 * Check DB->open arguments.
1338 * PUBLIC: int __db_open_arg __P((DB *,
1339 * PUBLIC: DB_TXN *, const char *, const char *, DBTYPE, u_int32_t));
1340 */
1341 int
__db_open_arg(dbp,txn,fname,dname,type,flags)1342 __db_open_arg(dbp, txn, fname, dname, type, flags)
1343 DB *dbp;
1344 DB_TXN *txn;
1345 const char *fname, *dname;
1346 DBTYPE type;
1347 u_int32_t flags;
1348 {
1349 ENV *env;
1350 u_int32_t ok_flags;
1351 int ret;
1352
1353 env = dbp->env;
1354
1355 /* Validate arguments. */
1356 #undef OKFLAGS
1357 #define OKFLAGS \
1358 (DB_AUTO_COMMIT | DB_CREATE | DB_EXCL | DB_FCNTL_LOCKING | \
1359 DB_MULTIVERSION | DB_NOMMAP | DB_NO_AUTO_COMMIT | DB_RDONLY | \
1360 DB_RDWRMASTER | DB_READ_UNCOMMITTED | DB_SLICED | DB_THREAD | \
1361 DB_TRUNCATE)
1362 if ((ret = __db_fchk(env, "DB->open", flags, OKFLAGS)) != 0)
1363 return (ret);
1364 if (LF_ISSET(DB_EXCL) && !LF_ISSET(DB_CREATE))
1365 return (__db_ferr(env, "DB->open", 1));
1366 if (LF_ISSET(DB_RDONLY) && LF_ISSET(DB_CREATE))
1367 return (__db_ferr(env, "DB->open", 1));
1368
1369 if (LF_ISSET(DB_SLICED)) {
1370 #ifdef HAVE_SLICES
1371 if (env->dbenv->slice_cnt == 0)
1372 return (__env_not_sliced(env));
1373 #else
1374 return (__env_no_slices(env));
1375 #endif
1376 }
1377
1378 #ifdef HAVE_VXWORKS
1379 if (LF_ISSET(DB_TRUNCATE)) {
1380 __db_errx(env, DB_STR("0591",
1381 "DB_TRUNCATE not supported on VxWorks"));
1382 return (DB_OPNOTSUP);
1383 }
1384 #endif
1385 switch (type) {
1386 case DB_UNKNOWN:
1387 if (LF_ISSET(DB_CREATE | DB_TRUNCATE)) {
1388 __db_errx(env, DB_STR("0592",
1389 "DB_UNKNOWN type specified with DB_CREATE or DB_TRUNCATE"));
1390 return (EINVAL);
1391 }
1392 ok_flags = 0;
1393 break;
1394 case DB_BTREE:
1395 ok_flags = DB_OK_BTREE;
1396 break;
1397 case DB_HASH:
1398 #ifndef HAVE_HASH
1399 return (__db_no_hash_am(env));
1400 #endif
1401 ok_flags = DB_OK_HASH;
1402 break;
1403 case DB_HEAP:
1404 ok_flags = DB_OK_HEAP;
1405 break;
1406 case DB_QUEUE:
1407 #ifndef HAVE_QUEUE
1408 return (__db_no_queue_am(env));
1409 #endif
1410 ok_flags = DB_OK_QUEUE;
1411 break;
1412 case DB_RECNO:
1413 ok_flags = DB_OK_RECNO;
1414 break;
1415 default:
1416 __db_errx(env, DB_STR_A("0593",
1417 "unknown type: %lu", "%lu"), (u_long)type);
1418 return (EINVAL);
1419 }
1420 if (ok_flags)
1421 DB_ILLEGAL_METHOD(dbp, ok_flags);
1422
1423 /* The environment may have been created, but never opened. */
1424 if (!F_ISSET(env, ENV_DBLOCAL | ENV_OPEN_CALLED)) {
1425 __db_errx(env, DB_STR("0594",
1426 "database environment not yet opened"));
1427 return (EINVAL);
1428 }
1429
1430 /*
1431 * Historically, you could pass in an environment that didn't have a
1432 * mpool, and DB would create a private one behind the scenes. This
1433 * no longer works.
1434 */
1435 if (!F_ISSET(env, ENV_DBLOCAL) && !MPOOL_ON(env)) {
1436 __db_errx(env, DB_STR("0595",
1437 "environment did not include a memory pool"));
1438 return (EINVAL);
1439 }
1440
1441 /*
1442 * You can't specify threads during DB->open if subsystems in the
1443 * environment weren't configured with them.
1444 */
1445 if (LF_ISSET(DB_THREAD) && !F_ISSET(env, ENV_DBLOCAL | ENV_THREAD)) {
1446 __db_errx(env, DB_STR("0596",
1447 "environment not created using DB_THREAD"));
1448 return (EINVAL);
1449 }
1450
1451 /* Exclusive database handles cannot be threaded.*/
1452 if (LF_ISSET(DB_THREAD) && F2_ISSET(dbp, DB2_AM_EXCL)) {
1453 __db_errx(env, DB_STR("0744",
1454 "Exclusive database handles cannot be threaded."));
1455 return (EINVAL);
1456 }
1457
1458 /* Exclusive database handles require transactional environments. */
1459 if (F2_ISSET(dbp, DB2_AM_EXCL) && !TXN_ON(env)) {
1460 __db_errx(env, DB_STR("0745",
1461 "Exclusive database handles require transactional environments."));
1462 return (EINVAL);
1463 }
1464
1465 /* Replication clients cannot open exclusive database handles. */
1466 if (F2_ISSET(dbp, DB2_AM_EXCL) && IS_REP_CLIENT(env)) {
1467 __db_errx(env, DB_STR("0746",
1468 "Exclusive database handles cannot be opened on replication clients."));
1469 return (EINVAL);
1470 }
1471
1472 /* DB_MULTIVERSION requires a database configured for transactions. */
1473 if (LF_ISSET(DB_MULTIVERSION) && !IS_REAL_TXN(txn)) {
1474 __db_errx(env, DB_STR("0597",
1475 "DB_MULTIVERSION illegal without a transaction specified"));
1476 return (EINVAL);
1477 }
1478
1479 if (LF_ISSET(DB_MULTIVERSION) && type == DB_QUEUE) {
1480 __db_errx(env, DB_STR("0598",
1481 "DB_MULTIVERSION illegal with queue databases"));
1482 return (EINVAL);
1483 }
1484
1485 if (LF_ISSET(DB_MULTIVERSION) && dbp->blob_threshold) {
1486 __db_errx(env, DB_STR("0755",
1487 "DB_MULTIVERSION illegal with external file enabled databases"));
1488 return (EINVAL);
1489 }
1490
1491 if (LF_ISSET(DB_READ_UNCOMMITTED) && dbp->blob_threshold) {
1492 __db_errx(env, DB_STR("0756",
1493 "DB_READ_UNCOMMITTED illegal with external file enabled databases"));
1494 return (EINVAL);
1495 }
1496
1497 /* DB_TRUNCATE is neither transaction recoverable nor lockable. */
1498 if (LF_ISSET(DB_TRUNCATE) && (LOCKING_ON(env) || txn != NULL)) {
1499 __db_errx(env, DB_STR_A("0599",
1500 "DB_TRUNCATE illegal with %s specified", "%s"),
1501 LOCKING_ON(env) ? "locking" : "transactions");
1502 return (EINVAL);
1503 }
1504
1505 /* Subdatabase checks. */
1506 if (dname != NULL) {
1507 /* QAM can only be done on in-memory subdatabases. */
1508 if (type == DB_QUEUE && fname != NULL) {
1509 __db_errx(env, DB_STR("0600",
1510 "Queue databases must be one-per-file"));
1511 return (EINVAL);
1512 }
1513
1514 /*
1515 * Named in-memory databases can't support certain flags,
1516 * so check here.
1517 */
1518 if (fname == NULL)
1519 F_CLR(dbp, DB_AM_CHKSUM | DB_AM_ENCRYPT);
1520 }
1521
1522 return (0);
1523 }
1524
1525 /*
1526 * __db_pget_pp --
1527 * DB->pget pre/post processing.
1528 *
1529 * PUBLIC: int __db_pget_pp
1530 * PUBLIC: __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t));
1531 */
1532 int
__db_pget_pp(dbp,txn,skey,pkey,data,flags)1533 __db_pget_pp(dbp, txn, skey, pkey, data, flags)
1534 DB *dbp;
1535 DB_TXN *txn;
1536 DBT *skey, *pkey, *data;
1537 u_int32_t flags;
1538 {
1539 DB_THREAD_INFO *ip;
1540 ENV *env;
1541 int handle_check, ignore_lease, ret, t_ret;
1542
1543 env = dbp->env;
1544
1545 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->pget");
1546
1547 ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
1548 LF_CLR(DB_IGNORE_LEASE);
1549
1550 if ((ret = __db_pget_arg(dbp, pkey, flags)) != 0 ||
1551 (ret = __db_get_arg(dbp, skey, data, flags)) != 0) {
1552 __dbt_userfree(env, skey, pkey, data);
1553 return (ret);
1554 }
1555
1556 ENV_ENTER(env, ip);
1557 XA_CHECK_TXN(ip, txn);
1558
1559 /* Check for replication block. */
1560 handle_check = IS_ENV_REPLICATED(env);
1561 if (handle_check &&
1562 (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) {
1563 handle_check = 0;
1564 goto err;
1565 }
1566
1567 ret = __db_pget(dbp, ip, txn, skey, pkey, data, flags);
1568 /*
1569 * Check for master leases.
1570 */
1571 if (ret == 0 &&
1572 IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
1573 ret = __rep_lease_check(env, 1);
1574
1575 err: /* Release replication block. */
1576 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1577 ret = t_ret;
1578
1579 ENV_LEAVE(env, ip);
1580 __dbt_userfree(env, skey, pkey, data);
1581 return (ret);
1582 }
1583
1584 /*
1585 * __db_pget --
1586 * DB->pget.
1587 *
1588 * PUBLIC: int __db_pget __P((DB *,
1589 * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t));
1590 */
1591 int
__db_pget(dbp,ip,txn,skey,pkey,data,flags)1592 __db_pget(dbp, ip, txn, skey, pkey, data, flags)
1593 DB *dbp;
1594 DB_THREAD_INFO *ip;
1595 DB_TXN *txn;
1596 DBT *skey, *pkey, *data;
1597 u_int32_t flags;
1598 {
1599 DBC *dbc;
1600 u_int32_t mode;
1601 int ret, t_ret;
1602
1603 mode = DB_CURSOR_TRANSIENT;
1604 if (LF_ISSET(DB_READ_UNCOMMITTED)) {
1605 mode |= DB_READ_UNCOMMITTED;
1606 LF_CLR(DB_READ_UNCOMMITTED);
1607 } else if (LF_ISSET(DB_READ_COMMITTED)) {
1608 mode |= DB_READ_COMMITTED;
1609 LF_CLR(DB_READ_COMMITTED);
1610 }
1611
1612 if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0)
1613 return (ret);
1614
1615 SET_RET_MEM(dbc, dbp);
1616
1617 DEBUG_LREAD(dbc, txn, "__db_pget", skey, NULL, flags);
1618
1619 /*
1620 * !!!
1621 * The actual method call is simple, do it inline.
1622 *
1623 * The underlying cursor pget will fill in a default DBT for null
1624 * pkeys, and use the cursor's returned-key memory internally to
1625 * store any intermediate primary keys. However, we've just set
1626 * the returned-key memory to the DB handle's key memory, which
1627 * is unsafe to use if the DB handle is threaded. If the pkey
1628 * argument is NULL, use the DBC-owned returned-key memory
1629 * instead; it'll go away when we close the cursor before we
1630 * return, but in this case that's just fine, as we're not
1631 * returning the primary key.
1632 */
1633 if (pkey == NULL)
1634 dbc->rkey = &dbc->my_rkey;
1635
1636 /*
1637 * The cursor is just a perfectly ordinary secondary database cursor.
1638 * Call its c_pget() method to do the dirty work.
1639 */
1640 if (flags == 0 || flags == DB_RMW)
1641 flags |= DB_SET;
1642
1643 ret = __dbc_pget(dbc, skey, pkey, data, flags);
1644
1645 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
1646 ret = t_ret;
1647
1648 return (ret);
1649 }
1650
1651 /*
1652 * __db_pget_arg --
1653 * Check DB->pget arguments.
1654 */
1655 static int
__db_pget_arg(dbp,pkey,flags)1656 __db_pget_arg(dbp, pkey, flags)
1657 DB *dbp;
1658 DBT *pkey;
1659 u_int32_t flags;
1660 {
1661 ENV *env;
1662 int ret;
1663
1664 env = dbp->env;
1665
1666 if (!F_ISSET(dbp, DB_AM_SECONDARY)) {
1667 ret = USR_ERR(env, EINVAL);
1668 __db_errx(env, DB_STR("0601",
1669 "DB->pget may only be used on secondary indices"));
1670 return (ret);
1671 }
1672
1673 if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
1674 ret = USR_ERR(env, EINVAL);
1675 __db_errx(env,DB_STR("0602",
1676 "DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices"));
1677 return (ret);
1678 }
1679
1680 /* DB_CONSUME makes no sense on a secondary index. */
1681 LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
1682 switch (flags) {
1683 case DB_CONSUME:
1684 case DB_CONSUME_WAIT:
1685 return (__db_ferr(env, "DB->pget", 0));
1686 default:
1687 /* __db_get_arg will catch the rest. */
1688 break;
1689 }
1690
1691 /*
1692 * We allow the pkey field to be NULL, so that we can make the
1693 * two-DBT get calls into wrappers for the three-DBT ones.
1694 */
1695 if (pkey != NULL &&
1696 (ret = __dbt_ferr(dbp, "primary key", pkey, 1)) != 0)
1697 return (ret);
1698
1699 /* Check invalid partial pkey. */
1700 if (pkey != NULL && F_ISSET(pkey, DB_DBT_PARTIAL)) {
1701 ret = USR_ERR(env, EINVAL);
1702 __db_errx(env, DB_STR("0709",
1703 "The primary key returned by pget can't be partial"));
1704 return (ret);
1705 }
1706
1707 if (flags == DB_GET_BOTH) {
1708 /* The pkey field can't be NULL if we're doing a DB_GET_BOTH. */
1709 if (pkey == NULL) {
1710 ret = USR_ERR(env, EINVAL);
1711 __db_errx(env, DB_STR("0603",
1712 "DB_GET_BOTH on a secondary index requires a primary key"));
1713 return (ret);
1714 }
1715 if ((ret = __dbt_usercopy(env, pkey)) != 0)
1716 return (ret);
1717 }
1718
1719 return (0);
1720 }
1721
1722 /*
1723 * __db_put_pp --
1724 * DB->put pre/post processing.
1725 *
1726 * PUBLIC: int __db_put_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
1727 */
1728 int
__db_put_pp(dbp,txn,key,data,flags)1729 __db_put_pp(dbp, txn, key, data, flags)
1730 DB *dbp;
1731 DB_TXN *txn;
1732 DBT *key, *data;
1733 u_int32_t flags;
1734 {
1735 DB_THREAD_INFO *ip;
1736 ENV *env;
1737 int forward_op, handle_check, ret, txn_local, t_ret;
1738
1739 env = dbp->env;
1740 txn_local = 0;
1741 forward_op = 0;
1742 #ifdef HAVE_REPLICATION_THREADS
1743 forward_op = IS_REP_CLIENT(env) &&
1744 IS_USING_WRITE_FORWARDING(env) && txn == NULL;
1745 #endif
1746
1747 STRIP_AUTO_COMMIT(flags);
1748 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put");
1749
1750 if ((ret = __db_put_arg(dbp, key, data, flags, forward_op)) != 0)
1751 return (ret);
1752
1753 ENV_ENTER(env, ip);
1754 XA_CHECK_TXN(ip, txn);
1755
1756 /* Check for replication block. */
1757 handle_check = IS_ENV_REPLICATED(env);
1758 if (handle_check &&
1759 (ret = __db_rep_enter(dbp, 1, 0, IS_REAL_TXN(txn))) != 0) {
1760 handle_check = 0;
1761 goto err;
1762 }
1763
1764 /* Forward singleton put operation to replication master if needed. */
1765 #ifdef HAVE_REPLICATION_THREADS
1766 if (forward_op) {
1767 ret = __repmgr_forward_single_write(
1768 REPMGR_WF_SINGLE_PUT, dbp, key, data, flags);
1769 /* Always skip regular put processing for forwarded put. */
1770 goto rep_exit;
1771 }
1772 #endif
1773
1774 /* Create local transaction as necessary. */
1775 if (IS_DB_AUTO_COMMIT(dbp, txn)) {
1776 if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
1777 goto err;
1778 txn_local = 1;
1779 }
1780
1781 /* Check for consistent transaction usage. */
1782 if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
1783 goto err;
1784
1785 ret = __db_put(dbp, ip, txn, key, data, flags);
1786
1787 err: if (txn_local &&
1788 (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
1789 ret = t_ret;
1790
1791 /* Release replication block. */
1792 #ifdef HAVE_REPLICATION_THREADS
1793 rep_exit:
1794 #endif
1795 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1796 ret = t_ret;
1797
1798 DB_TEST_CRASH(env->test_abort, DB_TEST_NO_MUTEX);
1799 ENV_LEAVE(env, ip);
1800 __dbt_userfree(env, key, NULL, data);
1801 return (ret);
1802 }
1803
1804 /*
1805 * __db_put_arg --
1806 * Check DB->put arguments.
1807 */
1808 static int
__db_put_arg(dbp,key,data,flags,forward_op)1809 __db_put_arg(dbp, key, data, flags, forward_op)
1810 DB *dbp;
1811 DBT *key, *data;
1812 u_int32_t flags;
1813 int forward_op;
1814 {
1815 ENV *env;
1816 int ret, returnkey;
1817
1818 env = dbp->env;
1819 returnkey = 0;
1820
1821 /*
1822 * Check for changes to a read-only tree unless this is a
1823 * replication client write operation to be forwarded.
1824 */
1825 if (!forward_op && DB_IS_READONLY(dbp))
1826 return (__db_rdonly(env, "DB->put"));
1827
1828 /* Check for puts on a secondary. */
1829 if (F_ISSET(dbp, DB_AM_SECONDARY)) {
1830 __db_errx(env, DB_STR("0604",
1831 "DB->put forbidden on secondary indices"));
1832 return (EINVAL);
1833 }
1834
1835 if (LF_ISSET(DB_MULTIPLE_KEY | DB_MULTIPLE)) {
1836 if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY))
1837 goto err;
1838
1839 switch (LF_ISSET(DB_OPFLAGS_MASK)) {
1840 case 0:
1841 case DB_OVERWRITE_DUP:
1842 break;
1843 default:
1844 __db_errx(env, DB_STR("0605",
1845 "DB->put: DB_MULTIPLE(_KEY) can only be combined with DB_OVERWRITE_DUP"));
1846 return (EINVAL);
1847 }
1848
1849 if (!F_ISSET(key, DB_DBT_BULK)) {
1850 __db_errx(env, DB_STR("0606",
1851 "DB->put with DB_MULTIPLE(_KEY) requires a bulk key buffer"));
1852 return (EINVAL);
1853 }
1854 }
1855 if (LF_ISSET(DB_MULTIPLE)) {
1856 if (!F_ISSET(data, DB_DBT_BULK)) {
1857 __db_errx(env, DB_STR("0607",
1858 "DB->put with DB_MULTIPLE requires a bulk data buffer"));
1859 return (EINVAL);
1860 }
1861 }
1862
1863 /* Check for invalid function flags. */
1864 switch (LF_ISSET(DB_OPFLAGS_MASK)) {
1865 case 0:
1866 case DB_NOOVERWRITE:
1867 case DB_OVERWRITE_DUP:
1868 break;
1869 case DB_APPEND:
1870 if (dbp->type != DB_RECNO &&
1871 dbp->type != DB_QUEUE && dbp->type != DB_HEAP)
1872 goto err;
1873 returnkey = 1;
1874 break;
1875 case DB_NODUPDATA:
1876 if (F_ISSET(dbp, DB_AM_DUPSORT))
1877 break;
1878 /* FALLTHROUGH */
1879 default:
1880 err: return (__db_ferr(env, "DB->put", 0));
1881 }
1882
1883 /*
1884 * Check for invalid key/data flags. The key may reasonably be NULL
1885 * if DB_APPEND is set and the application doesn't care about the
1886 * returned key.
1887 */
1888 if (((returnkey && key != NULL) || !returnkey) &&
1889 (ret = __dbt_ferr(dbp, "key", key, returnkey)) != 0)
1890 return (ret);
1891 if (!LF_ISSET(DB_MULTIPLE_KEY) &&
1892 (ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
1893 return (ret);
1894
1895 /*
1896 * The key parameter should not be NULL or have the "partial" flag set
1897 * in a put call unless the user doesn't care about a key value we'd
1898 * return. The user tells us they don't care about the returned key by
1899 * setting the key parameter to NULL or configuring the key DBT to not
1900 * return any information. (Returned keys from a put are always record
1901 * numbers, and returning part of a record number doesn't make sense:
1902 * only accept a partial return if the length returned is 0.)
1903 */
1904 if ((returnkey &&
1905 key != NULL && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0) ||
1906 (!returnkey && F_ISSET(key, DB_DBT_PARTIAL)))
1907 return (__db_ferr(env, "key DBT", 0));
1908
1909 /* Check for partial puts in the presence of duplicates. */
1910 if (data != NULL && F_ISSET(data, DB_DBT_PARTIAL) &&
1911 (F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) {
1912 __db_errx(env, DB_STR("0608",
1913 "a partial put in the presence of duplicates requires a cursor operation"));
1914 return (EINVAL);
1915 }
1916
1917 if ((flags != DB_APPEND && (ret = __dbt_usercopy(env, key)) != 0) ||
1918 (!LF_ISSET(DB_MULTIPLE_KEY) &&
1919 (ret = __dbt_usercopy(env, data)) != 0))
1920 return (ret);
1921
1922 return (0);
1923 }
1924
1925 /*
1926 * __db_compact_func
1927 * Callback routine to report if the txn has open cursors.
1928 */
1929 static int
__db_compact_func(dbc,my_dbc,countp,pgno,indx,args)1930 __db_compact_func(dbc, my_dbc, countp, pgno, indx, args)
1931 DBC *dbc, *my_dbc;
1932 u_int32_t *countp;
1933 db_pgno_t pgno;
1934 u_int32_t indx;
1935 void *args;
1936 {
1937 DB_TXN *txn;
1938
1939 COMPQUIET(my_dbc, NULL);
1940 COMPQUIET(countp, NULL);
1941 COMPQUIET(pgno, 0);
1942 COMPQUIET(indx, 0);
1943
1944 txn = (DB_TXN *)args;
1945
1946 if (txn == dbc->txn)
1947 return (EEXIST);
1948 return (0);
1949 }
1950 /*
1951 * __db_compact_pp --
1952 * DB->compact pre/post processing.
1953 *
1954 * PUBLIC: int __db_compact_pp __P((DB *, DB_TXN *,
1955 * PUBLIC: DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *));
1956 */
1957 int
__db_compact_pp(dbp,txn,start,stop,c_data,flags,end)1958 __db_compact_pp(dbp, txn, start, stop, c_data, flags, end)
1959 DB *dbp;
1960 DB_TXN *txn;
1961 DBT *start, *stop;
1962 DB_COMPACT *c_data;
1963 u_int32_t flags;
1964 DBT *end;
1965 {
1966 DB_COMPACT *dp, l_data;
1967 DB_THREAD_INFO *ip;
1968 ENV *env;
1969 int handle_check, ret, t_ret;
1970 u_int32_t count;
1971
1972 env = dbp->env;
1973
1974 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->compact");
1975
1976 /*
1977 * !!!
1978 * The actual argument checking is simple, do it inline, outside of
1979 * the replication block.
1980 */
1981 if ((ret = __db_fchk(
1982 env, "DB->compact", flags, DB_FREELIST_ONLY | DB_FREE_SPACE)) != 0)
1983 return (ret);
1984
1985 /* Check for changes to a read-only database. */
1986 if (DB_IS_READONLY(dbp))
1987 return (__db_rdonly(env, "DB->compact"));
1988
1989 if (start != NULL && (ret = __dbt_usercopy(env, start)) != 0)
1990 return (ret);
1991 if (stop != NULL && (ret = __dbt_usercopy(env, stop)) != 0) {
1992 __dbt_userfree(env, start, NULL, NULL);
1993 return (ret);
1994 }
1995
1996 ENV_ENTER(env, ip);
1997 XA_CHECK_TXN(ip, txn);
1998
1999 /* Check for replication block. */
2000 handle_check = IS_ENV_REPLICATED(env);
2001 if (handle_check && (ret = __db_rep_enter(dbp, 1, 0,
2002 IS_REAL_TXN(txn))) != 0) {
2003 handle_check = 0;
2004 goto err;
2005 }
2006
2007 if (txn != NULL) {
2008 if ((ret = __db_walk_cursors(dbp,
2009 NULL, __db_compact_func, &count, 0, 0, txn)) != 0) {
2010 if (ret == EEXIST) {
2011 ret = USR_ERR(env, EINVAL);
2012 __db_errx(env, DB_STR("0609",
2013 "DB->compact may not be called with active cursors in the transaction."));
2014 }
2015 goto err;
2016 }
2017 }
2018
2019 if (c_data == NULL) {
2020 dp = &l_data;
2021 memset(dp, 0, sizeof(*dp));
2022 } else
2023 dp = c_data;
2024 #ifdef HAVE_PARTITION
2025 if (DB_IS_PARTITIONED(dbp))
2026 ret = __part_compact(dbp, ip, txn, start, stop, dp, flags, end);
2027 else
2028 #endif
2029 switch (dbp->type) {
2030 case DB_HASH:
2031 case DB_BTREE:
2032 case DB_RECNO:
2033 ret = __db_compact_int(dbp, ip,
2034 txn, start, stop, dp, flags, end);
2035 break;
2036 default:
2037 ret = __dbh_am_chk(dbp, DB_OK_BTREE);
2038 break;
2039 }
2040
2041 #ifdef HAVE_SLICES
2042 if (ret == 0 && FLD_ISSET(dbp->open_flags, DB_SLICED))
2043 ret = __db_slice_compact(dbp, txn, start, stop, dp, flags, end);
2044 #endif
2045
2046 /* Release replication block. */
2047 err: if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
2048 ret = t_ret;
2049
2050 ENV_LEAVE(env, ip);
2051 __dbt_userfree(env, start, stop, NULL);
2052 return (ret);
2053 }
2054
2055 /*
2056 * __db_associate_foreign_pp --
2057 * DB->associate_foreign pre/post processing.
2058 *
2059 * PUBLIC: int __db_associate_foreign_pp __P((DB *, DB *,
2060 * PUBLIC: int (*)(DB *, const DBT *, DBT *, const DBT *, int *),
2061 * PUBLIC: u_int32_t));
2062 */
2063 int
__db_associate_foreign_pp(fdbp,dbp,callback,flags)2064 __db_associate_foreign_pp(fdbp, dbp, callback, flags)
2065 DB *dbp, *fdbp;
2066 int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *));
2067 u_int32_t flags;
2068 {
2069 /* Most of this is based on the implementation of associate */
2070 DB_THREAD_INFO *ip;
2071 ENV *env;
2072 int handle_check, ret, t_ret;
2073
2074 env = dbp->env;
2075
2076 PANIC_CHECK(env);
2077 STRIP_AUTO_COMMIT(flags);
2078
2079 ENV_ENTER(env, ip);
2080
2081 /* Check for replication block. */
2082 handle_check = IS_ENV_REPLICATED(env);
2083 if (handle_check &&
2084 (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) {
2085 handle_check = 0;
2086 goto err;
2087 }
2088
2089 if ((ret = __db_associate_foreign_arg(fdbp, dbp, callback, flags)) != 0)
2090 goto err;
2091
2092 ret = __db_associate_foreign(fdbp, dbp, callback, flags);
2093
2094 err: /* Release replication block. */
2095 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
2096 ret = t_ret;
2097 ENV_LEAVE(env, ip);
2098 return (ret);
2099 }
2100
2101 /*
2102 * __db_associate_foreign_arg --
2103 * DB->associate_foreign argument checking.
2104 */
2105 static int
__db_associate_foreign_arg(fdbp,dbp,callback,flags)2106 __db_associate_foreign_arg(fdbp, dbp, callback, flags)
2107 DB *dbp, *fdbp;
2108 int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *));
2109 u_int32_t flags;
2110 {
2111 ENV *env;
2112
2113 env = fdbp->env;
2114
2115 if (F_ISSET(fdbp, DB_AM_SECONDARY)) {
2116 __db_errx(env, DB_STR("0610",
2117 "Secondary indices may not be used as foreign databases"));
2118 return (EINVAL);
2119 }
2120 if (F_ISSET(fdbp, DB_AM_DUP)) {
2121 __db_errx(env, DB_STR("0611",
2122 "Foreign databases may not be configured with duplicates"));
2123 return (EINVAL);
2124 }
2125 if (F_ISSET(fdbp, DB_AM_RENUMBER)) {
2126 __db_errx(env, DB_STR("0612",
2127 "Renumbering recno databases may not be used as foreign databases"));
2128 return (EINVAL);
2129 }
2130 if (!F_ISSET(dbp, DB_AM_SECONDARY)) {
2131 __db_errx(env, DB_STR("0613",
2132 "The associating database must be a secondary index."));
2133 return (EINVAL);
2134 }
2135 if (LF_ISSET(DB_FOREIGN_NULLIFY) && callback == NULL) {
2136 __db_errx(env, DB_STR("0614",
2137 "When specifying a delete action of nullify, a callback "
2138 "function needs to be configured"));
2139 return (EINVAL);
2140 } else if (!LF_ISSET(DB_FOREIGN_NULLIFY) && callback != NULL) {
2141 __db_errx(env, DB_STR("0615",
2142 "When not specifying a delete action of nullify, a "
2143 "callback function cannot be configured"));
2144 return (EINVAL);
2145 }
2146 if (FLD_ISSET(dbp->open_flags, DB_SLICED) ||
2147 FLD_ISSET(fdbp->open_flags, DB_SLICED)) {
2148 __db_errx(env,
2149 "DB->associate_foreign does not support sliced databases.");
2150 return (EINVAL);
2151 }
2152
2153 return (0);
2154 }
2155
2156 /*
2157 * __db_sync_pp --
2158 * DB->sync pre/post processing.
2159 *
2160 * PUBLIC: int __db_sync_pp __P((DB *, u_int32_t));
2161 */
2162 int
__db_sync_pp(dbp,flags)2163 __db_sync_pp(dbp, flags)
2164 DB *dbp;
2165 u_int32_t flags;
2166 {
2167 DB_THREAD_INFO *ip;
2168 ENV *env;
2169 int handle_check, ret, t_ret;
2170
2171 env = dbp->env;
2172
2173 DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync");
2174
2175 /*
2176 * !!!
2177 * The actual argument checking is simple, do it inline, outside of
2178 * the replication block.
2179 */
2180 if (flags != 0)
2181 return (__db_ferr(env, "DB->sync", 0));
2182
2183 ENV_ENTER(env, ip);
2184
2185 /* Check for replication block. */
2186 handle_check = IS_ENV_REPLICATED(env);
2187 if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) {
2188 handle_check = 0;
2189 goto err;
2190 }
2191
2192 ret = __db_sync(dbp);
2193
2194 /* Release replication block. */
2195 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
2196 ret = t_ret;
2197
2198 err: ENV_LEAVE(env, ip);
2199 return (ret);
2200 }
2201
2202 /*
2203 * __dbc_close_pp --
2204 * DBC->close pre/post processing.
2205 *
2206 * PUBLIC: int __dbc_close_pp __P((DBC *));
2207 */
2208 int
__dbc_close_pp(dbc)2209 __dbc_close_pp(dbc)
2210 DBC *dbc;
2211 {
2212 DB *dbp;
2213 DB_THREAD_INFO *ip;
2214 ENV *env;
2215 DB_TXN *txn;
2216 int handle_check, ret, t_ret;
2217
2218 dbp = dbc->dbp;
2219 env = dbp->env;
2220 txn = dbc->txn;
2221 ret = 0;
2222
2223 /*
2224 * If the cursor is already closed we have a serious problem, and we
2225 * assume that the cursor isn't on the active queue. Don't do any of
2226 * the remaining cursor close processing.
2227 */
2228 if (!F_ISSET(dbc, DBC_ACTIVE)) {
2229 __db_errx(env, DB_STR("0616",
2230 "Closing already-closed cursor"));
2231 return (EINVAL);
2232 }
2233
2234 ENV_ENTER(env, ip);
2235 dbc->thread_info = ip;
2236
2237 /* Check for replication block. */
2238 handle_check = !IS_REAL_TXN(dbc->txn) && IS_ENV_REPLICATED(env);
2239
2240 /* Unregister the cursor from its transaction, regardless of ret. */
2241 if (txn != NULL) {
2242 TAILQ_REMOVE(&(txn->my_cursors), dbc, txn_cursors);
2243 dbc->txn_cursors.tqe_next = NULL;
2244 dbc->txn_cursors.tqe_prev = NULL;
2245 } else {
2246 DB_ASSERT(env, dbc->txn_cursors.tqe_next == NULL &&
2247 dbc->txn_cursors.tqe_prev == NULL);
2248 }
2249
2250 #ifdef HAVE_SLICES
2251 if (FLD_ISSET(dbp->open_flags, DB_SLICED))
2252 ret = __dbc_slice_close(dbc);
2253 #endif
2254
2255 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
2256 ret = t_ret;
2257
2258 /* Release replication block. */
2259 if (handle_check &&
2260 (t_ret = __op_rep_exit(env)) != 0 && ret == 0)
2261 ret = t_ret;
2262
2263 ENV_LEAVE(env, ip);
2264 return (ret);
2265 }
2266
2267 /*
2268 * __dbc_cmp_pp --
2269 * DBC->cmp pre/post processing.
2270 *
2271 * PUBLIC: int __dbc_cmp_pp __P((DBC *, DBC *, int*, u_int32_t));
2272 */
2273 int
__dbc_cmp_pp(dbc,other_cursor,result,flags)2274 __dbc_cmp_pp(dbc, other_cursor, result, flags)
2275 DBC *dbc, *other_cursor;
2276 int *result;
2277 u_int32_t flags;
2278 {
2279 DB *dbp, *odbp;
2280 DB_THREAD_INFO *ip;
2281 ENV *env;
2282 int ret;
2283
2284 dbp = dbc->dbp;
2285 odbp = other_cursor->dbp;
2286 env = dbp->env;
2287
2288 if (flags != 0)
2289 return (__db_ferr(env, "DBcursor->cmp", 0));
2290
2291 if (other_cursor == NULL) {
2292 __db_errx(env, DB_STR("0617",
2293 "DBcursor->cmp dbc pointer must not be null"));
2294 return (EINVAL);
2295 }
2296
2297 if (dbp != odbp) {
2298 __db_errx(env, DB_STR("0618",
2299 "DBcursor->cmp both cursors must refer to the same database."));
2300 return (EINVAL);
2301 }
2302
2303 ENV_ENTER(env, ip);
2304 dbc->thread_info = ip;
2305 ret = __dbc_cmp(dbc, other_cursor, result);
2306 ENV_LEAVE(env, ip);
2307 return (ret);
2308 }
2309
2310 /*
2311 * __dbc_count_pp --
2312 * DBC->count pre/post processing.
2313 *
2314 * PUBLIC: int __dbc_count_pp __P((DBC *, db_recno_t *, u_int32_t));
2315 */
2316 int
__dbc_count_pp(dbc,recnop,flags)2317 __dbc_count_pp(dbc, recnop, flags)
2318 DBC *dbc;
2319 db_recno_t *recnop;
2320 u_int32_t flags;
2321 {
2322 DB *dbp;
2323 DB_THREAD_INFO *ip;
2324 ENV *env;
2325 int ret;
2326
2327 dbp = dbc->dbp;
2328 env = dbp->env;
2329
2330 /*
2331 * !!!
2332 * The actual argument checking is simple, do it inline, outside of
2333 * the replication block.
2334 *
2335 * The cursor must be initialized, return EINVAL for an invalid cursor.
2336 */
2337 if (flags != 0)
2338 return (__db_ferr(env, "DBcursor->count", 0));
2339
2340 if (!IS_INITIALIZED(dbc))
2341 return (__db_curinval(env));
2342
2343 ENV_ENTER(env, ip);
2344 dbc->thread_info = ip;
2345 ret = __dbc_count(dbc, recnop);
2346 ENV_LEAVE(env, ip);
2347 return (ret);
2348 }
2349
2350 /*
2351 * __dbc_del_pp --
2352 * DBC->del pre/post processing.
2353 *
2354 * PUBLIC: int __dbc_del_pp __P((DBC *, u_int32_t));
2355 */
2356 int
__dbc_del_pp(dbc,flags)2357 __dbc_del_pp(dbc, flags)
2358 DBC *dbc;
2359 u_int32_t flags;
2360 {
2361 DB *dbp;
2362 DB_THREAD_INFO *ip;
2363 ENV *env;
2364 int ret;
2365
2366 dbp = dbc->dbp;
2367 env = dbp->env;
2368
2369 if ((ret = __dbc_del_arg(dbc, flags)) != 0)
2370 return (ret);
2371
2372 ENV_ENTER(env, ip);
2373 dbc->thread_info = ip;
2374
2375 /* Check for consistent transaction usage. */
2376 if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0)
2377 goto err;
2378
2379 DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->del", NULL, NULL, flags);
2380 ret = __dbc_del(dbc, flags);
2381
2382 err: ENV_LEAVE(env, ip);
2383 return (ret);
2384 }
2385
2386 /*
2387 * __dbc_del_arg --
2388 * Check DBC->del arguments.
2389 */
2390 static int
__dbc_del_arg(dbc,flags)2391 __dbc_del_arg(dbc, flags)
2392 DBC *dbc;
2393 u_int32_t flags;
2394 {
2395 DB *dbp;
2396 ENV *env;
2397
2398 dbp = dbc->dbp;
2399 env = dbp->env;
2400
2401 /* Check for changes to a read-only tree. */
2402 if (DB_IS_READONLY(dbp))
2403 return (__db_rdonly(env, "DBcursor->del"));
2404
2405 /* Check for invalid function flags. */
2406 switch (flags) {
2407 case 0:
2408 break;
2409 case DB_CONSUME:
2410 if (dbp->type != DB_QUEUE)
2411 return (__db_ferr(env, "DBC->del", 0));
2412 break;
2413 case DB_UPDATE_SECONDARY:
2414 DB_ASSERT(env, F_ISSET(dbp, DB_AM_SECONDARY));
2415 break;
2416 default:
2417 return (__db_ferr(env, "DBcursor->del", 0));
2418 }
2419
2420 /*
2421 * The cursor must be initialized, return EINVAL for an invalid cursor,
2422 * otherwise 0.
2423 */
2424 if (!IS_INITIALIZED(dbc))
2425 return (__db_curinval(env));
2426
2427 return (0);
2428 }
2429
2430 /*
2431 * __dbc_dup_pp --
2432 * DBC->dup pre/post processing.
2433 *
2434 * PUBLIC: int __dbc_dup_pp __P((DBC *, DBC **, u_int32_t));
2435 */
2436 int
__dbc_dup_pp(dbc,dbcp,flags)2437 __dbc_dup_pp(dbc, dbcp, flags)
2438 DBC *dbc, **dbcp;
2439 u_int32_t flags;
2440 {
2441 DB *dbp;
2442 DB_THREAD_INFO *ip;
2443 ENV *env;
2444 int rep_blocked, ret;
2445
2446 dbp = dbc->dbp;
2447 env = dbp->env;
2448
2449 /*
2450 * !!!
2451 * The actual argument checking is simple, do it inline, outside of
2452 * the replication block.
2453 */
2454 if (flags != 0 && flags != DB_POSITION)
2455 return (__db_ferr(env, "DBcursor->dup", 0));
2456
2457 ENV_ENTER(env, ip);
2458 dbc->thread_info = ip;
2459 rep_blocked = 0;
2460 if (dbc->txn == NULL && IS_ENV_REPLICATED(env)) {
2461 if ((ret = __op_rep_enter(env, 1, 1)) != 0)
2462 goto err;
2463 rep_blocked = 1;
2464 }
2465 ret = __dbc_dup(dbc, dbcp, flags);
2466
2467 /* Register externally created cursors into the valid transaction. */
2468 DB_ASSERT(env, (*dbcp)->txn == dbc->txn);
2469 if ((*dbcp)->txn != NULL && ret == 0)
2470 TAILQ_INSERT_HEAD(&((*dbcp)->txn->my_cursors), *dbcp,
2471 txn_cursors);
2472 err:
2473 if (ret != 0 && rep_blocked)
2474 (void)__op_rep_exit(env);
2475
2476 ENV_LEAVE(env, ip);
2477
2478 return (ret);
2479 }
2480
2481 /*
2482 * __dbc_get_pp --
2483 * DBC->get pre/post processing.
2484 *
2485 * PUBLIC: int __dbc_get_pp __P((DBC *, DBT *, DBT *, u_int32_t));
2486 */
2487 int
__dbc_get_pp(dbc,key,data,flags)2488 __dbc_get_pp(dbc, key, data, flags)
2489 DBC *dbc;
2490 DBT *key, *data;
2491 u_int32_t flags;
2492 {
2493 DB *dbp;
2494 DB_THREAD_INFO *ip;
2495 ENV *env;
2496 int ignore_lease, ret;
2497
2498 dbp = dbc->dbp;
2499 env = dbp->env;
2500
2501 ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
2502 LF_CLR(DB_IGNORE_LEASE);
2503 if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0) {
2504 __dbt_userfree(env, key, NULL, data);
2505 return (ret);
2506 }
2507
2508 ENV_ENTER(env, ip);
2509 dbc->thread_info = ip;
2510
2511 DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get",
2512 flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
2513 ret = __dbc_get(dbc, key, data, flags);
2514
2515 /*
2516 * Check for master leases.
2517 */
2518 if (ret == 0 &&
2519 IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
2520 ret = __rep_lease_check(env, 1);
2521
2522 DB_TEST_CRASH(env->test_abort, DB_TEST_NO_MUTEX);
2523 ENV_LEAVE(env, ip);
2524 __dbt_userfree(env, key, NULL, data);
2525 return (ret);
2526 }
2527
2528 /*
2529 * __dbc_get_arg --
2530 * Common DBC->get argument checking, used by both DBC->get and DBC->pget.
2531 * PUBLIC: int __dbc_get_arg __P((DBC *, DBT *, DBT *, u_int32_t));
2532 */
2533 int
__dbc_get_arg(dbc,key,data,flags)2534 __dbc_get_arg(dbc, key, data, flags)
2535 DBC *dbc;
2536 DBT *key, *data;
2537 u_int32_t flags;
2538 {
2539 DB *dbp;
2540 ENV *env;
2541 int dirty, multi, ret;
2542
2543 dbp = dbc->dbp;
2544 env = dbp->env;
2545
2546 /*
2547 * Typically in checking routines that modify the flags, we have
2548 * to save them and restore them, because the checking routine
2549 * calls the work routine. However, this is a pure-checking
2550 * routine which returns to a function that calls the work routine,
2551 * so it's OK that we do not save and restore the flags, even though
2552 * we modify them.
2553 *
2554 * Check for read-modify-write validity. DB_RMW doesn't make sense
2555 * with CDB cursors since if you're going to write the cursor, you
2556 * had to create it with DB_WRITECURSOR. Regardless, we check for
2557 * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it.
2558 * If this changes, confirm that DB does not itself set the DB_RMW
2559 * flag in a path where CDB may have been configured.
2560 */
2561 dirty = 0;
2562 if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) {
2563 if (!LOCKING_ON(env))
2564 return (__db_fnl(env, "DBcursor->get"));
2565 if (LF_ISSET(DB_READ_UNCOMMITTED))
2566 dirty = 1;
2567 LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
2568 }
2569
2570 multi = 0;
2571 if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
2572 multi = 1;
2573 if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY))
2574 goto multi_err;
2575 LF_CLR(DB_MULTIPLE | DB_MULTIPLE_KEY);
2576 }
2577
2578 /* Check for invalid function flags. */
2579 switch (flags) {
2580 case DB_CONSUME:
2581 case DB_CONSUME_WAIT:
2582 if (dirty) {
2583 __db_errx(env, DB_STR("0619",
2584 "DB_READ_UNCOMMITTED is not supported with DB_CONSUME or DB_CONSUME_WAIT"));
2585 return (EINVAL);
2586 }
2587 if (dbp->type != DB_QUEUE)
2588 goto err;
2589 break;
2590 case DB_CURRENT:
2591 case DB_FIRST:
2592 case DB_NEXT:
2593 case DB_NEXT_DUP:
2594 case DB_NEXT_NODUP:
2595 break;
2596 case DB_LAST:
2597 case DB_PREV:
2598 case DB_PREV_DUP:
2599 case DB_PREV_NODUP:
2600 if (multi)
2601 multi_err: return (__db_ferr(env, "DBcursor->get", 1));
2602 break;
2603 case DB_GET_BOTHC:
2604 if (dbp->type == DB_QUEUE)
2605 goto err;
2606 /* FALLTHROUGH */
2607 case DB_GET_BOTH:
2608 case DB_GET_BOTH_RANGE:
2609 if ((ret = __dbt_usercopy(env, data)) != 0)
2610 goto err;
2611 /* FALLTHROUGH */
2612 case DB_SET:
2613 case DB_SET_RANGE:
2614 if ((ret = __dbt_usercopy(env, key)) != 0)
2615 goto err;
2616 break;
2617 case DB_GET_RECNO:
2618 /*
2619 * The one situation in which this might be legal with a
2620 * non-RECNUM dbp is if dbp is a secondary and its primary is
2621 * DB_AM_RECNUM.
2622 */
2623 if (!F_ISSET(dbp, DB_AM_RECNUM) &&
2624 (!F_ISSET(dbp, DB_AM_SECONDARY) ||
2625 !F_ISSET(dbp->s_primary, DB_AM_RECNUM)))
2626 goto err;
2627 break;
2628 case DB_SET_RECNO:
2629 if (!F_ISSET(dbp, DB_AM_RECNUM))
2630 goto err;
2631 if ((ret = __dbt_usercopy(env, key)) != 0)
2632 goto err;
2633 break;
2634 default:
2635 err: __dbt_userfree(env, key, NULL, data);
2636 return (__db_ferr(env, "DBcursor->get", 0));
2637 }
2638
2639 /* Check for invalid key/data flags. */
2640 if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0)
2641 return (ret);
2642 if (F_ISSET(data, DB_DBT_READONLY)) {
2643 __db_errx(env, DB_STR("0584",
2644 "DB_DBT_READONLY should not be set on data DBT."));
2645 return (EINVAL);
2646 }
2647 if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
2648 return (ret);
2649
2650 if (multi) {
2651 if (!F_ISSET(data, DB_DBT_USERMEM)) {
2652 __db_errx(env, DB_STR("0621",
2653 "DB_MULTIPLE/DB_MULTIPLE_KEY require DB_DBT_USERMEM be set"));
2654 return (EINVAL);
2655 }
2656 if (F_ISSET(key, DB_DBT_PARTIAL) ||
2657 F_ISSET(data, DB_DBT_PARTIAL)) {
2658 __db_errx(env, DB_STR("0622",
2659 "DB_MULTIPLE/DB_MULTIPLE_KEY do not support DB_DBT_PARTIAL"));
2660 return (EINVAL);
2661 }
2662 if (data->ulen < 1024 ||
2663 data->ulen < dbp->pgsize || data->ulen % 1024 != 0) {
2664 __db_errx(env, DB_STR("0623",
2665 "DB_MULTIPLE/DB_MULTIPLE_KEY buffers must be "
2666 "aligned, at least page size and multiples of 1KB"));
2667 return (EINVAL);
2668 }
2669 }
2670
2671 /* Check compatible flags for partial key. */
2672 if (F_ISSET(key, DB_DBT_PARTIAL) && (flags == DB_GET_BOTH ||
2673 flags == DB_GET_BOTH_RANGE || flags == DB_SET)) {
2674 __db_errx(env, DB_STR("0708",
2675 "Invalid positioning flag combined with DB_DBT_PARTIAL"));
2676 return (EINVAL);
2677 }
2678
2679 /*
2680 * The cursor must be initialized for DB_CURRENT, DB_GET_RECNO,
2681 * DB_PREV_DUP and DB_NEXT_DUP. Return EINVAL for an invalid
2682 * cursor, otherwise 0.
2683 */
2684 if (!IS_INITIALIZED(dbc) && (flags == DB_CURRENT ||
2685 flags == DB_GET_RECNO ||
2686 flags == DB_NEXT_DUP || flags == DB_PREV_DUP))
2687 return (__db_curinval(env));
2688
2689 /* Check for consistent transaction usage. */
2690 if (LF_ISSET(DB_RMW) &&
2691 (ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0)
2692 return (ret);
2693
2694 return (0);
2695 }
2696
2697 /*
2698 * __db_secondary_close_pp --
2699 * DB->close for secondaries
2700 *
2701 * PUBLIC: int __db_secondary_close_pp __P((DB *, u_int32_t));
2702 */
2703 int
__db_secondary_close_pp(dbp,flags)2704 __db_secondary_close_pp(dbp, flags)
2705 DB *dbp;
2706 u_int32_t flags;
2707 {
2708 DB_THREAD_INFO *ip;
2709 ENV *env;
2710 int handle_check, ret, t_ret;
2711
2712 env = dbp->env;
2713 ret = 0;
2714
2715 /*
2716 * As a DB handle destructor, we can't fail.
2717 *
2718 * !!!
2719 * The actual argument checking is simple, do it inline, outside of
2720 * the replication block.
2721 */
2722 if (flags != 0 && flags != DB_NOSYNC)
2723 ret = __db_ferr(env, "DB->close", 0);
2724
2725 ENV_ENTER(env, ip);
2726
2727 /* Check for replication block. */
2728 handle_check = IS_ENV_REPLICATED(env);
2729 if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) {
2730 handle_check = 0;
2731 if (ret == 0)
2732 ret = t_ret;
2733 }
2734
2735 if ((t_ret = __db_secondary_close(dbp, flags)) != 0 && ret == 0)
2736 ret = t_ret;
2737
2738 /* Release replication block. */
2739 if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
2740 ret = t_ret;
2741
2742 ENV_LEAVE(env, ip);
2743 return (ret);
2744 }
2745
2746 /*
2747 * __dbc_pget_pp --
2748 * DBC->pget pre/post processing.
2749 *
2750 * PUBLIC: int __dbc_pget_pp __P((DBC *, DBT *, DBT *, DBT *, u_int32_t));
2751 */
2752 int
__dbc_pget_pp(dbc,skey,pkey,data,flags)2753 __dbc_pget_pp(dbc, skey, pkey, data, flags)
2754 DBC *dbc;
2755 DBT *skey, *pkey, *data;
2756 u_int32_t flags;
2757 {
2758 DB *dbp;
2759 DB_THREAD_INFO *ip;
2760 ENV *env;
2761 int ignore_lease, ret;
2762
2763 dbp = dbc->dbp;
2764 env = dbp->env;
2765
2766 ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
2767 LF_CLR(DB_IGNORE_LEASE);
2768 if ((ret = __dbc_pget_arg(dbc, pkey, flags)) != 0 ||
2769 (ret = __dbc_get_arg(dbc, skey, data, flags)) != 0) {
2770 __dbt_userfree(env, skey, pkey, data);
2771 return (ret);
2772 }
2773
2774 ENV_ENTER(env, ip);
2775 dbc->thread_info = ip;
2776 DEBUG_LREAD(dbc, dbc->txn, "DBcursor->pget",
2777 flags == DB_SET ||
2778 flags == DB_SET_RANGE ? skey : NULL, NULL, flags);
2779 ret = __dbc_pget(dbc, skey, pkey, data, flags);
2780 /*
2781 * Check for master leases.
2782 */
2783 if (ret == 0 &&
2784 IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
2785 ret = __rep_lease_check(env, 1);
2786
2787 ENV_LEAVE(env, ip);
2788
2789 __dbt_userfree(env, skey, pkey, data);
2790 return (ret);
2791 }
2792
2793 /*
2794 * __dbc_pget_arg --
2795 * Check DBC->pget arguments.
2796 */
2797 static int
__dbc_pget_arg(dbc,pkey,flags)2798 __dbc_pget_arg(dbc, pkey, flags)
2799 DBC *dbc;
2800 DBT *pkey;
2801 u_int32_t flags;
2802 {
2803 DB *dbp;
2804 ENV *env;
2805 int ret;
2806
2807 dbp = dbc->dbp;
2808 env = dbp->env;
2809
2810 if (!F_ISSET(dbp, DB_AM_SECONDARY)) {
2811 __db_errx(env, DB_STR("0624",
2812 "DBcursor->pget may only be used on secondary indices"));
2813 return (EINVAL);
2814 }
2815
2816 if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
2817 __db_errx(env, DB_STR("0602",
2818 "DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices"));
2819 return (EINVAL);
2820 }
2821
2822 switch (LF_ISSET(DB_OPFLAGS_MASK)) {
2823 case DB_CONSUME:
2824 case DB_CONSUME_WAIT:
2825 /* These flags make no sense on a secondary index. */
2826 return (__db_ferr(env, "DBcursor->pget", 0));
2827 case DB_GET_BOTH:
2828 case DB_GET_BOTH_RANGE:
2829 /* BOTH is "get both the primary and the secondary". */
2830 if (pkey == NULL) {
2831 __db_errx(env, DB_STR_A("0626",
2832 "%s requires both a secondary and a primary key",
2833 "%s"), LF_ISSET(DB_GET_BOTH) ?
2834 "DB_GET_BOTH" : "DB_GET_BOTH_RANGE");
2835 return (EINVAL);
2836 }
2837 if ((ret = __dbt_usercopy(env, pkey)) != 0)
2838 return (ret);
2839 break;
2840 default:
2841 /* __dbc_get_arg will catch the rest. */
2842 break;
2843 }
2844
2845 /*
2846 * We allow the pkey field to be NULL, so that we can make the
2847 * two-DBT get calls into wrappers for the three-DBT ones.
2848 */
2849 if (pkey != NULL &&
2850 (ret = __dbt_ferr(dbp, "primary key", pkey, 0)) != 0)
2851 return (ret);
2852
2853 /* Check invalid partial pkey. */
2854 if (pkey != NULL && F_ISSET(pkey, DB_DBT_PARTIAL)) {
2855 __db_errx(env, DB_STR("0711",
2856 "The primary key returned by pget can't be partial."));
2857 return (EINVAL);
2858 }
2859
2860 /* But the pkey field can't be NULL if we're doing a DB_GET_BOTH. */
2861 if (pkey == NULL && (flags & DB_OPFLAGS_MASK) == DB_GET_BOTH) {
2862 __db_errx(env, DB_STR("0603",
2863 "DB_GET_BOTH on a secondary index requires a primary key"));
2864 return (EINVAL);
2865 }
2866
2867 return (0);
2868 }
2869
2870 /*
2871 * __dbc_put_pp --
2872 * DBC->put pre/post processing.
2873 *
2874 * PUBLIC: int __dbc_put_pp __P((DBC *, DBT *, DBT *, u_int32_t));
2875 */
2876 int
__dbc_put_pp(dbc,key,data,flags)2877 __dbc_put_pp(dbc, key, data, flags)
2878 DBC *dbc;
2879 DBT *key, *data;
2880 u_int32_t flags;
2881 {
2882 DB *dbp;
2883 DB_THREAD_INFO *ip;
2884 ENV *env;
2885 int ret;
2886
2887 dbp = dbc->dbp;
2888 env = dbp->env;
2889
2890 if ((ret = __dbc_put_arg(dbc, key, data, flags)) != 0) {
2891 __dbt_userfree(env, key, NULL, data);
2892 return (ret);
2893 }
2894
2895 ENV_ENTER(env, ip);
2896 dbc->thread_info = ip;
2897
2898 /* Check for consistent transaction usage. */
2899 if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0)
2900 goto err;
2901
2902 DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->put",
2903 flags == DB_KEYFIRST || flags == DB_KEYLAST ||
2904 flags == DB_NODUPDATA || flags == DB_UPDATE_SECONDARY ?
2905 key : NULL, data, flags);
2906 ret = __dbc_put(dbc, key, data, flags);
2907
2908 err: ENV_LEAVE(env, ip);
2909 __dbt_userfree(env, key, NULL, data);
2910 return (ret);
2911 }
2912
2913 /*
2914 * __dbc_put_arg --
2915 * Check DBC->put arguments.
2916 */
2917 static int
__dbc_put_arg(dbc,key,data,flags)2918 __dbc_put_arg(dbc, key, data, flags)
2919 DBC *dbc;
2920 DBT *key, *data;
2921 u_int32_t flags;
2922 {
2923 DB *dbp;
2924 ENV *env;
2925 int key_flags, ret;
2926
2927 dbp = dbc->dbp;
2928 env = dbp->env;
2929 key_flags = 0;
2930
2931 /* Check for changes to a read-only tree. */
2932 if (DB_IS_READONLY(dbp))
2933 return (__db_rdonly(env, "DBcursor->put"));
2934
2935 /* Check for puts on a secondary. */
2936 if (F_ISSET(dbp, DB_AM_SECONDARY)) {
2937 if (flags == DB_UPDATE_SECONDARY)
2938 flags = 0;
2939 else {
2940 __db_errx(env, DB_STR("0628",
2941 "DBcursor->put forbidden on secondary indices"));
2942 return (EINVAL);
2943 }
2944 }
2945
2946 if ((ret = __dbt_usercopy(env, data)) != 0)
2947 return (ret);
2948
2949 /* Check for invalid function flags. */
2950 switch (flags) {
2951 case DB_AFTER:
2952 case DB_BEFORE:
2953 switch (dbp->type) {
2954 case DB_BTREE:
2955 case DB_HASH: /* Only with unsorted duplicates. */
2956 if (!F_ISSET(dbp, DB_AM_DUP))
2957 goto err;
2958 if (dbp->dup_compare != NULL)
2959 goto err;
2960 break;
2961 case DB_QUEUE: /* Not permitted. */
2962 goto err;
2963 case DB_RECNO: /* Only with mutable record numbers. */
2964 if (!F_ISSET(dbp, DB_AM_RENUMBER))
2965 goto err;
2966 key_flags = key == NULL ? 0 : 1;
2967 break;
2968 case DB_UNKNOWN:
2969 default:
2970 goto err;
2971 }
2972 break;
2973 case DB_CURRENT:
2974 /*
2975 * If there is a comparison function, doing a DB_CURRENT
2976 * must not change the part of the data item that is used
2977 * for the comparison.
2978 */
2979 break;
2980 case DB_NODUPDATA:
2981 if (!F_ISSET(dbp, DB_AM_DUPSORT))
2982 goto err;
2983 /* FALLTHROUGH */
2984 case DB_KEYFIRST:
2985 case DB_KEYLAST:
2986 case DB_OVERWRITE_DUP:
2987 key_flags = 1;
2988 if ((ret = __dbt_usercopy(env, key)) != 0)
2989 return (ret);
2990 break;
2991 default:
2992 err: return (__db_ferr(env, "DBcursor->put", 0));
2993 }
2994
2995 /*
2996 * Check for invalid key/data flags. The key may reasonably be NULL
2997 * if DB_AFTER or DB_BEFORE is set and the application doesn't care
2998 * about the returned key, or if the DB_CURRENT flag is set.
2999 */
3000 if (key_flags && (ret = __dbt_ferr(dbp, "key", key, 0)) != 0)
3001 return (ret);
3002 if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
3003 return (ret);
3004
3005 /*
3006 * The key parameter should not be NULL or have the "partial" flag set
3007 * in a put call unless the user doesn't care about a key value we'd
3008 * return. The user tells us they don't care about the returned key by
3009 * setting the key parameter to NULL or configuring the key DBT to not
3010 * return any information. (Returned keys from a put are always record
3011 * numbers, and returning part of a record number doesn't make sense:
3012 * only accept a partial return if the length returned is 0.)
3013 */
3014 if (key_flags && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0)
3015 return (__db_ferr(env, "key DBT", 0));
3016
3017 /*
3018 * The cursor must be initialized for anything other than DB_KEYFIRST,
3019 * DB_KEYLAST or zero: return EINVAL for an invalid cursor, otherwise 0.
3020 */
3021 if (!IS_INITIALIZED(dbc) && flags != 0 && flags != DB_KEYFIRST &&
3022 flags != DB_KEYLAST && flags != DB_NODUPDATA &&
3023 flags != DB_OVERWRITE_DUP)
3024 return (__db_curinval(env));
3025
3026 return (0);
3027 }
3028
3029 /*
3030 * __dbt_ferr --
3031 * Check a DBT for flag errors.
3032 */
3033 static int
__dbt_ferr(dbp,name,dbt,check_thread)3034 __dbt_ferr(dbp, name, dbt, check_thread)
3035 const DB *dbp;
3036 const char *name;
3037 const DBT *dbt;
3038 int check_thread;
3039 {
3040 ENV *env;
3041 int ret;
3042
3043 env = dbp->env;
3044
3045 /*
3046 * Check for invalid DBT flags. We allow any of the flags to be
3047 * specified to any DB or DBcursor call so that applications can
3048 * set DB_DBT_MALLOC when retrieving a data item from a secondary
3049 * database and then specify that same DBT as a key to a primary
3050 * database, without having to clear flags.
3051 */
3052 if ((ret = __db_fchk(env, name, dbt->flags,
3053 DB_DBT_APPMALLOC | DB_DBT_BLOB | DB_DBT_BULK | DB_DBT_DUPOK |
3054 DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERCOPY |
3055 DB_DBT_USERMEM | DB_DBT_PARTIAL | DB_DBT_READONLY)) != 0)
3056 return (ret);
3057 switch (F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC |
3058 DB_DBT_USERCOPY | DB_DBT_USERMEM)) {
3059 case 0:
3060 case DB_DBT_MALLOC:
3061 case DB_DBT_REALLOC:
3062 case DB_DBT_USERCOPY:
3063 case DB_DBT_USERMEM:
3064 break;
3065 default:
3066 return (__db_ferr(env, name, 1));
3067 }
3068
3069 if (F_ISSET(dbt, DB_DBT_BULK) && F_ISSET(dbt, DB_DBT_PARTIAL)) {
3070 __db_errx(env, DB_STR_A("0629",
3071 "Bulk and partial operations cannot be combined on %s DBT",
3072 "%s"), name);
3073 return (EINVAL);
3074 }
3075
3076 if (check_thread && DB_IS_THREADED(dbp) &&
3077 !F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC |
3078 DB_DBT_USERCOPY | DB_DBT_USERMEM | DB_DBT_READONLY)) {
3079 __db_errx(env, DB_STR_A("0630",
3080 "DB_THREAD mandates memory allocation flag on %s DBT",
3081 "%s"), name);
3082 return (EINVAL);
3083 }
3084 return (0);
3085 }
3086
3087 /*
3088 * __db_curinval
3089 * Report that a cursor is in an invalid state.
3090 */
3091 static int
__db_curinval(env)3092 __db_curinval(env)
3093 const ENV *env;
3094 {
3095 __db_errx(env, DB_STR("0631",
3096 "Cursor position must be set before performing this operation"));
3097 return (EINVAL);
3098 }
3099
3100 /*
3101 * __db_txn_auto_init --
3102 * Handle DB_AUTO_COMMIT initialization.
3103 *
3104 * PUBLIC: int __db_txn_auto_init __P((ENV *, DB_THREAD_INFO *, DB_TXN **));
3105 */
3106 int
__db_txn_auto_init(env,ip,txnidp)3107 __db_txn_auto_init(env, ip, txnidp)
3108 ENV *env;
3109 DB_THREAD_INFO *ip;
3110 DB_TXN **txnidp;
3111 {
3112 /*
3113 * Method calls where applications explicitly specify DB_AUTO_COMMIT
3114 * require additional validation: the DB_AUTO_COMMIT flag cannot be
3115 * specified if a transaction cookie is also specified, nor can the
3116 * flag be specified in a non-transactional environment.
3117 */
3118 if (*txnidp != NULL && !F_ISSET(*txnidp, TXN_FAMILY)) {
3119 __db_errx(env, DB_STR("0632",
3120 "DB_AUTO_COMMIT may not be specified along with a transaction handle"));
3121 return (EINVAL);
3122 }
3123
3124 if (!TXN_ON(env)) {
3125 __db_errx(env, DB_STR("0633",
3126 "DB_AUTO_COMMIT may not be specified in non-transactional environment"));
3127 return (EINVAL);
3128 }
3129
3130 /*
3131 * Our caller checked to see if replication is making a state change.
3132 * Don't call the user-level API (which would repeat that check).
3133 */
3134 return (__txn_begin(env, ip, *txnidp, txnidp, 0));
3135 }
3136
3137 /*
3138 * __db_txn_auto_resolve --
3139 * Resolve local transactions.
3140 *
3141 * PUBLIC: int __db_txn_auto_resolve __P((ENV *, DB_TXN *, int, int));
3142 */
3143 int
__db_txn_auto_resolve(env,txn,nosync,ret)3144 __db_txn_auto_resolve(env, txn, nosync, ret)
3145 ENV *env;
3146 DB_TXN *txn;
3147 int nosync, ret;
3148 {
3149 int t_ret;
3150
3151 if (ret == 0)
3152 return (__txn_commit(txn, nosync ? DB_TXN_NOSYNC : 0));
3153
3154 if ((t_ret = __txn_abort(txn)) != 0)
3155 return (__env_panic(env, t_ret));
3156
3157 return (ret);
3158 }
3159