1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998, 1999
5  *	Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1995, 1996
9  *	The President and Fellows of Harvard University.  All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include "db_config.h"
37 
38 #ifndef lint
39 static const char sccsid[] = "@(#)log_rec.c	11.16 (Sleepycat) 10/19/99";
40 #endif /* not lint */
41 
42 #ifndef NO_SYSTEM_INCLUDES
43 #include <sys/types.h>
44 
45 #include <assert.h>
46 #include <errno.h>
47 #include <string.h>
48 #endif
49 
50 #include "db_int.h"
51 #include "log.h"
52 #include "db_dispatch.h"
53 #include "db_page.h"
54 #include "db_ext.h"
55 
56 static int CDB___log_do_open __P((DB_LOG *, u_int8_t *, char *, DBTYPE, u_int32_t));
57 static int CDB___log_lid_to_fname __P((DB_LOG *, int32_t, FNAME **));
58 static int CDB___log_open_file __P((DB_LOG *, __log_register_args *));
59 
60 /*
61  * PUBLIC: int CDB___log_register_recover
62  * PUBLIC:     __P((DB_ENV *, DBT *, DB_LSN *, int, void *));
63  */
64 int
CDB___log_register_recover(dbenv,dbtp,lsnp,redo,info)65 CDB___log_register_recover(dbenv, dbtp, lsnp, redo, info)
66 	DB_ENV *dbenv;
67 	DBT *dbtp;
68 	DB_LSN *lsnp;
69 	int redo;
70 	void *info;
71 {
72 	DB_ENTRY *dbe;
73 	DB_LOG *logp;
74 	__log_register_args *argp;
75 	int do_rem, ret, t_ret;
76 
77 	logp = dbenv->lg_handle;
78 
79 #ifdef DEBUG_RECOVER
80 	CDB___log_register_print(logp, dbtp, lsnp, redo, info);
81 #endif
82 	COMPQUIET(lsnp, NULL);
83 
84 	F_SET(logp, DBC_RECOVER);
85 
86 	if ((ret = CDB___log_register_read(dbtp->data, &argp)) != 0)
87 		goto out;
88 
89 	if ((argp->opcode == LOG_OPEN &&
90 	    (redo == TXN_REDO || redo == TXN_OPENFILES ||
91 	     redo == TXN_FORWARD_ROLL)) ||
92 	    (argp->opcode == LOG_CLOSE &&
93 	    (redo == TXN_UNDO || redo == TXN_BACKWARD_ROLL))) {
94 		/*
95 		 * If we are redoing an open or undoing a close, then we need
96 		 * to open a file.
97 		 */
98 		ret = CDB___log_open_file(logp, argp);
99 		if (ret == ENOENT || ret == EINVAL) {
100 			if (redo == TXN_OPENFILES && argp->name.size != 0 &&
101 			    (ret = CDB___db_txnlist_delete(info,
102 			        argp->name.data, argp->id, 0)) != 0)
103 				goto out;
104 			ret = 0;
105 		}
106 	} else if (argp->opcode != LOG_CHECKPOINT) {
107 		/*
108 		 * If we are undoing an open, then we need to close the file.
109 		 *
110   		 * If the file is deleted, then we can just ignore this close.
111  		 * Otherwise, we should usually have a valid dbp we should
112   		 * close or whose reference count should be decremented.
113  		 * However, if we shut down without closing a file, we may, in
114  		 * fact, not have the file open, and that's OK.
115 		 */
116 		do_rem = 0;
117 		MUTEX_THREAD_LOCK(logp->mutexp);
118 		if (argp->id < logp->dbentry_cnt) {
119 			dbe = &logp->dbentry[argp->id];
120 #ifdef DIAGNOSTIC
121 			assert(dbe->refcount == 1);
122 #endif
123 			ret = CDB___db_txnlist_close(info, argp->id, dbe->count);
124 			if (dbe->dbp != NULL &&
125 			    (t_ret = dbe->dbp->close(dbe->dbp, 0)) != 0
126 			    && ret == 0)
127 				ret = t_ret;
128 			do_rem = 1;
129 		}
130 		MUTEX_THREAD_UNLOCK(logp->mutexp);
131 		if (do_rem)
132 			(void)CDB___log_rem_logid(logp, argp->id);
133  	} else if ((redo == TXN_UNDO || redo == TXN_OPENFILES) &&
134 	    (argp->id >= logp->dbentry_cnt ||
135  	    (!logp->dbentry[argp->id].deleted &&
136  	    logp->dbentry[argp->id].dbp == NULL))) {
137  		/*
138  		 * It's a checkpoint and we are rolling backward.  It
139  		 * is possible that the system was shut down and thus
140  		 * ended with a stable checkpoint; this file was never
141  		 * closed and has therefore not been reopened yet.  If
142  		 * so, we need to try to open it.
143  		 */
144  		ret = CDB___log_open_file(logp, argp);
145  		if (ret == ENOENT || ret == EINVAL) {
146 			if (argp->name.size != 0 && (ret =
147 			    CDB___db_txnlist_delete(info,
148 			        argp->name.data, argp->id, 0)) != 0)
149 				goto out;
150  			ret = 0;
151  		}
152 	}
153 
154 out:	F_CLR(logp, DBC_RECOVER);
155 	if (argp != NULL)
156 		CDB___os_free(argp, 0);
157 	return (ret);
158 }
159 
160 /*
161  * CDB___log_open_file --
162  *	Called during CDB_log_register recovery.  Make sure that we have an
163  *	entry in the dbentry table for this ndx.  Returns 0 on success,
164  *	non-zero on error.
165  */
166 static int
CDB___log_open_file(lp,argp)167 CDB___log_open_file(lp, argp)
168 	DB_LOG *lp;
169 	__log_register_args *argp;
170 {
171 	DB_ENTRY *dbe;
172 
173 	/*
174 	 * We never re-open temporary files.  Temp files are only
175 	 * useful during aborts in which case the dbp was entered
176 	 * when the file was registered.  During recovery, we treat
177 	 * temp files as properly deleted files, allowing the open to
178 	 * fail and not reporting any errors when recovery fails to
179 	 * get a valid dbp from db_fileid_to_db.
180 	 */
181 	if (argp->name.size == 0) {
182 		(void)CDB___log_add_logid(lp, NULL, argp->id);
183 		return (ENOENT);
184 	}
185 
186 	/*
187 	 * Because of reference counting, we cannot automatically close files
188 	 * during recovery, so when we're opening, we have to check that the
189 	 * name we are opening is what we expect.  If it's not, then we close
190 	 * the old file and open the new one.
191 	 */
192 	MUTEX_THREAD_LOCK(lp->mutexp);
193 	if (argp->id < lp->dbentry_cnt)
194 		dbe = &lp->dbentry[argp->id];
195 	else
196 		dbe = NULL;
197 
198 	if (dbe != NULL && (dbe->deleted == 1 || dbe->dbp != NULL)) {
199 		dbe->refcount++;
200 		MUTEX_THREAD_UNLOCK(lp->mutexp);
201 		return (0);
202 	}
203 
204 	MUTEX_THREAD_UNLOCK(lp->mutexp);
205 
206 	return (CDB___log_do_open(lp,
207 	    argp->uid.data, argp->name.data, argp->ftype, argp->id));
208 }
209 
210 /*
211  * CDB___log_do_open --
212  * 	Open files referenced in the log.  This is the part of the open that
213  * is not protected by the thread mutex.
214  */
215 static int
CDB___log_do_open(lp,uid,name,ftype,ndx)216 CDB___log_do_open(lp, uid, name, ftype, ndx)
217 	DB_LOG *lp;
218 	u_int8_t *uid;
219 	char *name;
220 	DBTYPE ftype;
221 	u_int32_t ndx;
222 {
223 	DB *dbp;
224 	int ret;
225 	u_int8_t zeroid[DB_FILE_ID_LEN];
226 
227 	if ((ret = CDB_db_create(&dbp, lp->dbenv, 0)) != 0)
228 		return (ret);
229 	if ((ret = dbp->open(dbp, name, NULL, ftype, 0, 0600)) == 0) {
230 		/*
231 		 * Verify that we are opening the same file that we were
232 		 * referring to when we wrote this log record.
233 		 */
234 		memset(zeroid, 0, DB_FILE_ID_LEN);
235 		if (memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) == 0 ||
236 		    memcmp(dbp->fileid, zeroid, DB_FILE_ID_LEN) == 0) {
237 			(void)CDB___log_add_logid(lp, dbp, ndx);
238 			return (0);
239 		}
240 	}
241 	(void)dbp->close(dbp, 0);
242 	(void)CDB___log_add_logid(lp, NULL, ndx);
243 
244 	return (ENOENT);
245 }
246 
247 /*
248  * CDB___log_add_logid --
249  *	Adds a DB entry to the log's DB entry table.
250  *
251  * PUBLIC: int CDB___log_add_logid __P((DB_LOG *, DB *, u_int32_t));
252  */
253 int
CDB___log_add_logid(logp,dbp,ndx)254 CDB___log_add_logid(logp, dbp, ndx)
255 	DB_LOG *logp;
256 	DB *dbp;
257 	u_int32_t ndx;
258 {
259 	u_int32_t i;
260 	int ret;
261 
262 	ret = 0;
263 
264 	MUTEX_THREAD_LOCK(logp->mutexp);
265 
266 	/*
267 	 * Check if we need to grow the table.  Note, ndx is 0-based (the
268 	 * index into the DB entry table) an dbentry_cnt is 1-based, the
269 	 * number of available slots.
270 	 */
271 	if (logp->dbentry_cnt <= ndx) {
272 		if ((ret = CDB___os_realloc((ndx + DB_GROW_SIZE) * sizeof(DB_ENTRY),
273 		    NULL, &logp->dbentry)) != 0)
274 			goto err;
275 
276 		/* Initialize the new entries. */
277 		for (i = logp->dbentry_cnt; i < ndx + DB_GROW_SIZE; i++) {
278 			logp->dbentry[i].count = 0;
279 			logp->dbentry[i].dbp = NULL;
280 			logp->dbentry[i].deleted = 0;
281 			logp->dbentry[i].refcount = 0;
282 		}
283 
284 		logp->dbentry_cnt = i;
285 	}
286 
287 	if (logp->dbentry[ndx].deleted == 0 &&
288 	    logp->dbentry[ndx].dbp == NULL) {
289 		logp->dbentry[ndx].count = 0;
290 		logp->dbentry[ndx].dbp = dbp;
291 		logp->dbentry[ndx].deleted = dbp == NULL;
292 		logp->dbentry[ndx].refcount = 1;
293 	} else
294 		logp->dbentry[ndx].refcount++;
295 
296 
297 err:	MUTEX_THREAD_UNLOCK(logp->mutexp);
298 	return (ret);
299 }
300 
301 /*
302  * CDB___db_fileid_to_db --
303  *	Return the DB corresponding to the specified fileid.
304  *
305  * PUBLIC: int CDB___db_fileid_to_db __P((DB_ENV *, DB **, int32_t, int));
306  */
307 int
CDB___db_fileid_to_db(dbenv,dbpp,ndx,inc)308 CDB___db_fileid_to_db(dbenv, dbpp, ndx, inc)
309 	DB_ENV *dbenv;
310 	DB **dbpp;
311 	int32_t ndx;
312 	int inc;
313 {
314 	DB_LOG *logp;
315 	FNAME *fname;
316 	int ret;
317 	char *name;
318 
319 	ret = 0;
320 	logp = dbenv->lg_handle;
321 
322 	MUTEX_THREAD_LOCK(logp->mutexp);
323 
324 	/*
325 	 * Under XA, a process different than the one issuing DB operations
326 	 * may abort a transaction.  In this case, recovery routines are run
327 	 * by a process that does not necessarily have the file open, so we
328 	 * we must open the file explicitly.
329 	 */
330 	if ((u_int32_t)ndx >= logp->dbentry_cnt ||
331 	    (!logp->dbentry[ndx].deleted && logp->dbentry[ndx].dbp == NULL)) {
332 		if (CDB___log_lid_to_fname(logp, ndx, &fname) != 0) {
333 			/* Couldn't find entry; this is a fatal error. */
334 			ret = EINVAL;
335 			goto err;
336 		}
337 		name = R_ADDR(&logp->reginfo, fname->name_off);
338 
339 		/*
340 		 * CDB___log_do_open is called without protection of the
341 		 * log thread lock.
342 		 */
343 		MUTEX_THREAD_UNLOCK(logp->mutexp);
344 
345 		/*
346 		 * At this point, we are not holding the thread lock, so exit
347 		 * directly instead of going through the exit code at the
348 		 * bottom.  If the CDB___log_do_open succeeded, then we don't need
349 		 * to do any of the remaining error checking at the end of this
350 		 * routine.
351 		 */
352 		if ((ret = CDB___log_do_open(logp,
353 		    fname->ufid, name, fname->s_type, ndx)) != 0)
354 			return (ret);
355 
356 		*dbpp = logp->dbentry[ndx].dbp;
357 		return (0);
358 	}
359 
360 	/*
361 	 * Return DB_DELETED if the file has been deleted (it's not an error).
362 	 */
363 	if (logp->dbentry[ndx].deleted) {
364 		ret = DB_DELETED;
365 		if (inc)
366 			logp->dbentry[ndx].count++;
367 		goto err;
368 	}
369 
370 	/*
371 	 * Otherwise return 0, but if we don't have a corresponding DB, it's
372 	 * an error.
373 	 */
374 	if ((*dbpp = logp->dbentry[ndx].dbp) == NULL)
375 		ret = ENOENT;
376 
377 err:	MUTEX_THREAD_UNLOCK(logp->mutexp);
378 	return (ret);
379 }
380 
381 /*
382  * Close files that were opened by the recovery daemon.
383  *
384  * PUBLIC: void CDB___log_close_files __P((DB_ENV *));
385  */
386 void
CDB___log_close_files(dbenv)387 CDB___log_close_files(dbenv)
388 	DB_ENV *dbenv;
389 {
390 	DB_ENTRY *dbe;
391 	DB_LOG *logp;
392 	u_int32_t i;
393 
394 	logp = dbenv->lg_handle;
395 	MUTEX_THREAD_LOCK(logp->mutexp);
396 	F_SET(logp, DBC_RECOVER);
397 	for (i = 0; i < logp->dbentry_cnt; i++) {
398 		dbe = &logp->dbentry[i];
399 		if (dbe->dbp != NULL) {
400 			(void)dbe->dbp->close(dbe->dbp, 0);
401 			dbe->dbp = NULL;
402 		}
403 		dbe->deleted = 0;
404 		dbe->refcount = 0;
405 	}
406 	F_CLR(logp, DBC_RECOVER);
407 	MUTEX_THREAD_UNLOCK(logp->mutexp);
408 }
409 
410 /*
411  * PUBLIC: void CDB___log_rem_logid __P((DB_LOG *, u_int32_t));
412  */
413 void
CDB___log_rem_logid(logp,ndx)414 CDB___log_rem_logid(logp, ndx)
415 	DB_LOG *logp;
416 	u_int32_t ndx;
417 {
418 	MUTEX_THREAD_LOCK(logp->mutexp);
419 	if (--logp->dbentry[ndx].refcount == 0) {
420 		logp->dbentry[ndx].dbp = NULL;
421 		logp->dbentry[ndx].deleted = 0;
422 	}
423 	MUTEX_THREAD_UNLOCK(logp->mutexp);
424 }
425 
426 /*
427  * CDB___log_lid_to_fname --
428  * 	Traverse the shared-memory region looking for the entry that
429  *	matches the passed log fileid.  Returns 0 on success; -1 on error.
430  */
431 static int
CDB___log_lid_to_fname(dblp,lid,fnamep)432 CDB___log_lid_to_fname(dblp, lid, fnamep)
433 	DB_LOG *dblp;
434 	int32_t lid;
435 	FNAME **fnamep;
436 {
437 	FNAME *fnp;
438 	LOG *lp;
439 
440 	lp = dblp->reginfo.primary;
441 
442 	for (fnp = SH_TAILQ_FIRST(&lp->fq, __fname);
443 	    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
444 		if (fnp->ref == 0)	/* Entry not in use. */
445 			continue;
446 		if (fnp->id == lid) {
447 			*fnamep = fnp;
448 			return (0);
449 		}
450 	}
451 	return (-1);
452 }
453