1  /*
2    Unix SMB/CIFS implementation.
3 
4    trivial database library
5 
6    Copyright (C) Andrew Tridgell              1999-2005
7    Copyright (C) Paul `Rusty' Russell		   2000
8    Copyright (C) Jeremy Allison			   2000-2003
9 
10      ** NOTE! The following LGPL license applies to the tdb
11      ** library. This does NOT imply that all of Samba is released
12      ** under the LGPL
13 
14    This library is free software; you can redistribute it and/or
15    modify it under the terms of the GNU Lesser General Public
16    License as published by the Free Software Foundation; either
17    version 3 of the License, or (at your option) any later version.
18 
19    This library is distributed in the hope that it will be useful,
20    but WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    Lesser General Public License for more details.
23 
24    You should have received a copy of the GNU Lesser General Public
25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 */
27 
28 #include "tdb_private.h"
29 
30 /* all contexts, to ensure no double-opens (fcntl locks don't nest!) */
31 static struct tdb_context *tdbs = NULL;
32 
33 
34 /* This is based on the hash algorithm from gdbm */
default_tdb_hash(TDB_DATA * key)35 static unsigned int default_tdb_hash(TDB_DATA *key)
36 {
37 	uint32_t value;	/* Used to compute the hash value.  */
38 	uint32_t   i;	/* Used to cycle through random values. */
39 
40 	/* Set the initial value from the key size. */
41 	for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++)
42 		value = (value + (key->dptr[i] << (i*5 % 24)));
43 
44 	return (1103515243 * value + 12345);
45 }
46 
47 
48 /* initialise a new database with a specified hash size */
tdb_new_database(struct tdb_context * tdb,int hash_size)49 static int tdb_new_database(struct tdb_context *tdb, int hash_size)
50 {
51 	struct tdb_header *newdb;
52 	size_t size;
53 	int ret = -1;
54 	ssize_t written;
55 
56 	/* We make it up in memory, then write it out if not internal */
57 	size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off_t);
58 	if (!(newdb = (struct tdb_header *)calloc(size, 1))) {
59 		tdb->ecode = TDB_ERR_OOM;
60 		return -1;
61 	}
62 
63 	/* Fill in the header */
64 	newdb->version = TDB_VERSION;
65 	newdb->hash_size = hash_size;
66 	if (tdb->flags & TDB_INTERNAL) {
67 		tdb->map_size = size;
68 		tdb->map_ptr = (char *)newdb;
69 		memcpy(&tdb->header, newdb, sizeof(tdb->header));
70 		/* Convert the `ondisk' version if asked. */
71 		CONVERT(*newdb);
72 		return 0;
73 	}
74 	if (lseek(tdb->fd, 0, SEEK_SET) == -1)
75 		goto fail;
76 
77 	if (ftruncate(tdb->fd, 0) == -1)
78 		goto fail;
79 
80 	/* This creates an endian-converted header, as if read from disk */
81 	CONVERT(*newdb);
82 	memcpy(&tdb->header, newdb, sizeof(tdb->header));
83 	/* Don't endian-convert the magic food! */
84 	memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
85 	/* we still have "ret == -1" here */
86 	written = write(tdb->fd, newdb, size);
87 	if (written == size) {
88 		ret = 0;
89 	} else if (written != -1) {
90 		/* call write once again, this usually should return -1 and
91 		 * set errno appropriately */
92 		size -= written;
93 		written = write(tdb->fd, newdb+written, size);
94 		if (written == size) {
95 		ret = 0;
96 		} else if (written >= 0) {
97 			/* a second incomplete write - we give up.
98 			 * guessing the errno... */
99 			errno = ENOSPC;
100 		}
101 	}
102 
103   fail:
104 	SAFE_FREE(newdb);
105 	return ret;
106 }
107 
108 
109 
tdb_already_open(dev_t device,ino_t ino)110 static int tdb_already_open(dev_t device,
111 			    ino_t ino)
112 {
113 	struct tdb_context *i;
114 
115 	for (i = tdbs; i; i = i->next) {
116 		if (i->device == device && i->inode == ino) {
117 			return 1;
118 		}
119 	}
120 
121 	return 0;
122 }
123 
124 /* open the database, creating it if necessary
125 
126    The open_flags and mode are passed straight to the open call on the
127    database file. A flags value of O_WRONLY is invalid. The hash size
128    is advisory, use zero for a default value.
129 
130    Return is NULL on error, in which case errno is also set.  Don't
131    try to call tdb_error or tdb_errname, just do strerror(errno).
132 
133    @param name may be NULL for internal databases. */
tdb_open(const char * name,int hash_size,int tdb_flags,int open_flags,mode_t mode)134 struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags,
135 		      int open_flags, mode_t mode)
136 {
137 	return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL, NULL);
138 }
139 
140 /* a default logging function */
141 static void null_log_fn(struct tdb_context *tdb, enum tdb_debug_level level, const char *fmt, ...) PRINTF_ATTRIBUTE(3, 4);
null_log_fn(struct tdb_context * tdb,enum tdb_debug_level level,const char * fmt,...)142 static void null_log_fn(struct tdb_context *tdb, enum tdb_debug_level level, const char *fmt, ...)
143 {
144 }
145 
146 
tdb_open_ex(const char * name,int hash_size,int tdb_flags,int open_flags,mode_t mode,const struct tdb_logging_context * log_ctx,tdb_hash_func hash_fn)147 struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
148 				int open_flags, mode_t mode,
149 				const struct tdb_logging_context *log_ctx,
150 				tdb_hash_func hash_fn)
151 {
152 	struct tdb_context *tdb;
153 	struct stat st;
154 	int rev = 0, locked = 0;
155 	unsigned char *vp;
156 	uint32_t vertest;
157 	unsigned v;
158 
159 	if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) {
160 		/* Can't log this */
161 		errno = ENOMEM;
162 		goto fail;
163 	}
164 	tdb_io_init(tdb);
165 	tdb->fd = -1;
166 #ifdef TDB_TRACE
167 	tdb->tracefd = -1;
168 #endif
169 	tdb->name = NULL;
170 	tdb->map_ptr = NULL;
171 	tdb->flags = tdb_flags;
172 	tdb->open_flags = open_flags;
173 	if (log_ctx) {
174 		tdb->log = *log_ctx;
175 	} else {
176 		tdb->log.log_fn = null_log_fn;
177 		tdb->log.log_private = NULL;
178 	}
179 	tdb->hash_fn = hash_fn ? hash_fn : default_tdb_hash;
180 
181 	/* cache the page size */
182 	tdb->page_size = getpagesize();
183 	if (tdb->page_size <= 0) {
184 		tdb->page_size = 0x2000;
185 	}
186 
187 	tdb->max_dead_records = (tdb_flags & TDB_VOLATILE) ? 5 : 0;
188 
189 	if ((open_flags & O_ACCMODE) == O_WRONLY) {
190 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: can't open tdb %s write-only\n",
191 			 name));
192 		errno = EINVAL;
193 		goto fail;
194 	}
195 
196 	if (hash_size == 0)
197 		hash_size = DEFAULT_HASH_SIZE;
198 	if ((open_flags & O_ACCMODE) == O_RDONLY) {
199 		tdb->read_only = 1;
200 		/* read only databases don't do locking or clear if first */
201 		tdb->flags |= TDB_NOLOCK;
202 		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
203 	}
204 
205 	if ((tdb->flags & TDB_ALLOW_NESTING) &&
206 	    (tdb->flags & TDB_DISALLOW_NESTING)) {
207 		tdb->ecode = TDB_ERR_NESTING;
208 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
209 			"allow_nesting and disallow_nesting are not allowed together!"));
210 		errno = EINVAL;
211 		goto fail;
212 	}
213 
214 	/*
215 	 * TDB_ALLOW_NESTING is the default behavior.
216 	 * Note: this may change in future versions!
217 	 */
218 	if (!(tdb->flags & TDB_DISALLOW_NESTING)) {
219 		tdb->flags |= TDB_ALLOW_NESTING;
220 	}
221 
222 	/* internal databases don't mmap or lock, and start off cleared */
223 	if (tdb->flags & TDB_INTERNAL) {
224 		tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
225 		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
226 		if (tdb_new_database(tdb, hash_size) != 0) {
227 			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: tdb_new_database failed!"));
228 			goto fail;
229 		}
230 		goto internal;
231 	}
232 
233 	if ((tdb->fd = open(name, open_flags, mode)) == -1) {
234 		TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_open_ex: could not open file %s: %s\n",
235 			 name, strerror(errno)));
236 		goto fail;	/* errno set by open(2) */
237 	}
238 
239 	/* on exec, don't inherit the fd */
240 	v = fcntl(tdb->fd, F_GETFD, 0);
241         fcntl(tdb->fd, F_SETFD, v | FD_CLOEXEC);
242 
243 	/* ensure there is only one process initialising at once */
244 	if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
245 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get global lock on %s: %s\n",
246 			 name, strerror(errno)));
247 		goto fail;	/* errno set by tdb_brlock */
248 	}
249 
250 	/* we need to zero database if we are the only one with it open */
251 	if ((tdb_flags & TDB_CLEAR_IF_FIRST) &&
252 	    (!tdb->read_only) &&
253 	    (locked = (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0, 1) == 0))) {
254 		open_flags |= O_CREAT;
255 		if (ftruncate(tdb->fd, 0) == -1) {
256 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
257 				 "failed to truncate %s: %s\n",
258 				 name, strerror(errno)));
259 			goto fail; /* errno set by ftruncate */
260 		}
261 	}
262 
263 	errno = 0;
264 	if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header)
265 	    || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0) {
266 		if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) {
267 			if (errno == 0) {
268 				errno = EIO; /* ie bad format or something */
269 			}
270 			goto fail;
271 		}
272 		rev = (tdb->flags & TDB_CONVERT);
273 	} else if (tdb->header.version != TDB_VERSION
274 		   && !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION)))) {
275 		/* wrong version */
276 		errno = EIO;
277 		goto fail;
278 	}
279 	vp = (unsigned char *)&tdb->header.version;
280 	vertest = (((uint32_t)vp[0]) << 24) | (((uint32_t)vp[1]) << 16) |
281 		  (((uint32_t)vp[2]) << 8) | (uint32_t)vp[3];
282 	tdb->flags |= (vertest==TDB_VERSION) ? TDB_BIGENDIAN : 0;
283 	if (!rev)
284 		tdb->flags &= ~TDB_CONVERT;
285 	else {
286 		tdb->flags |= TDB_CONVERT;
287 		tdb_convert(&tdb->header, sizeof(tdb->header));
288 	}
289 	if (fstat(tdb->fd, &st) == -1)
290 		goto fail;
291 
292 	if (tdb->header.rwlocks != 0) {
293 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n"));
294 		goto fail;
295 	}
296 
297 	/* Is it already in the open list?  If so, fail. */
298 	if (tdb_already_open(st.st_dev, st.st_ino)) {
299 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
300 			 "%s (%d,%d) is already open in this process\n",
301 			 name, (int)st.st_dev, (int)st.st_ino));
302 		errno = EBUSY;
303 		goto fail;
304 	}
305 
306 	if (!(tdb->name = (char *)strdup(name))) {
307 		errno = ENOMEM;
308 		goto fail;
309 	}
310 
311 	tdb->map_size = st.st_size;
312 	tdb->device = st.st_dev;
313 	tdb->inode = st.st_ino;
314 	tdb_mmap(tdb);
315 	if (locked) {
316 		if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0, 1) == -1) {
317 			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
318 				 "failed to take ACTIVE_LOCK on %s: %s\n",
319 				 name, strerror(errno)));
320 			goto fail;
321 		}
322 
323 	}
324 
325 	/* We always need to do this if the CLEAR_IF_FIRST flag is set, even if
326 	   we didn't get the initial exclusive lock as we need to let all other
327 	   users know we're using it. */
328 
329 	if (tdb_flags & TDB_CLEAR_IF_FIRST) {
330 		/* leave this lock in place to indicate it's in use */
331 		if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)
332 			goto fail;
333 	}
334 
335 	/* if needed, run recovery */
336 	if (tdb_transaction_recover(tdb) == -1) {
337 		goto fail;
338 	}
339 
340 #ifdef TDB_TRACE
341 	{
342 		char tracefile[strlen(name) + 32];
343 
344 		snprintf(tracefile, sizeof(tracefile),
345 			 "%s.trace.%li", name, (long)getpid());
346 		tdb->tracefd = open(tracefile, O_WRONLY|O_CREAT|O_EXCL, 0600);
347 		if (tdb->tracefd >= 0) {
348 			tdb_enable_seqnum(tdb);
349 			tdb_trace_open(tdb, "tdb_open", hash_size, tdb_flags,
350 				       open_flags);
351 		} else
352 			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to open trace file %s!\n", tracefile));
353 	}
354 #endif
355 
356  internal:
357 	/* Internal (memory-only) databases skip all the code above to
358 	 * do with disk files, and resume here by releasing their
359 	 * global lock and hooking into the active list. */
360 	if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1) == -1)
361 		goto fail;
362 	tdb->next = tdbs;
363 	tdbs = tdb;
364 	return tdb;
365 
366  fail:
367 	{ int save_errno = errno;
368 
369 	if (!tdb)
370 		return NULL;
371 
372 #ifdef TDB_TRACE
373 	close(tdb->tracefd);
374 #endif
375 	if (tdb->map_ptr) {
376 		if (tdb->flags & TDB_INTERNAL)
377 			SAFE_FREE(tdb->map_ptr);
378 		else
379 			tdb_munmap(tdb);
380 	}
381 	SAFE_FREE(tdb->name);
382 	if (tdb->fd != -1)
383 		if (close(tdb->fd) != 0)
384 			TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to close tdb->fd on error!\n"));
385 	SAFE_FREE(tdb);
386 	errno = save_errno;
387 	return NULL;
388 	}
389 }
390 
391 /*
392  * Set the maximum number of dead records per hash chain
393  */
394 
tdb_set_max_dead(struct tdb_context * tdb,int max_dead)395 void tdb_set_max_dead(struct tdb_context *tdb, int max_dead)
396 {
397 	tdb->max_dead_records = max_dead;
398 }
399 
400 /**
401  * Close a database.
402  *
403  * @returns -1 for error; 0 for success.
404  **/
tdb_close(struct tdb_context * tdb)405 int tdb_close(struct tdb_context *tdb)
406 {
407 	struct tdb_context **i;
408 	int ret = 0;
409 
410 	tdb_trace(tdb, "tdb_close");
411 	if (tdb->transaction) {
412 		_tdb_transaction_cancel(tdb);
413 	}
414 
415 	if (tdb->map_ptr) {
416 		if (tdb->flags & TDB_INTERNAL)
417 			SAFE_FREE(tdb->map_ptr);
418 		else
419 			tdb_munmap(tdb);
420 	}
421 	SAFE_FREE(tdb->name);
422 	if (tdb->fd != -1) {
423 		ret = close(tdb->fd);
424 		tdb->fd = -1;
425 	}
426 	SAFE_FREE(tdb->lockrecs);
427 
428 	/* Remove from contexts list */
429 	for (i = &tdbs; *i; i = &(*i)->next) {
430 		if (*i == tdb) {
431 			*i = tdb->next;
432 			break;
433 		}
434 	}
435 
436 #ifdef TDB_TRACE
437 	close(tdb->tracefd);
438 #endif
439 	memset(tdb, 0, sizeof(*tdb));
440 	SAFE_FREE(tdb);
441 
442 	return ret;
443 }
444 
445 /* register a loging function */
tdb_set_logging_function(struct tdb_context * tdb,const struct tdb_logging_context * log_ctx)446 void tdb_set_logging_function(struct tdb_context *tdb,
447                               const struct tdb_logging_context *log_ctx)
448 {
449         tdb->log = *log_ctx;
450 }
451 
tdb_get_logging_private(struct tdb_context * tdb)452 void *tdb_get_logging_private(struct tdb_context *tdb)
453 {
454 	return tdb->log.log_private;
455 }
456 
tdb_reopen_internal(struct tdb_context * tdb,bool active_lock)457 static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock)
458 {
459 #if !defined(LIBREPLACE_PREAD_NOT_REPLACED) || \
460 	!defined(LIBREPLACE_PWRITE_NOT_REPLACED)
461 	struct stat st;
462 #endif
463 
464 	if (tdb->flags & TDB_INTERNAL) {
465 		return 0; /* Nothing to do. */
466 	}
467 
468 	if (tdb->num_locks != 0 || tdb->global_lock.count) {
469 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed with locks held\n"));
470 		goto fail;
471 	}
472 
473 	if (tdb->transaction != 0) {
474 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed inside a transaction\n"));
475 		goto fail;
476 	}
477 
478 /* If we have real pread & pwrite, we can skip reopen. */
479 #if !defined(LIBREPLACE_PREAD_NOT_REPLACED) || \
480 	!defined(LIBREPLACE_PWRITE_NOT_REPLACED)
481 	if (tdb_munmap(tdb) != 0) {
482 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: munmap failed (%s)\n", strerror(errno)));
483 		goto fail;
484 	}
485 	if (close(tdb->fd) != 0)
486 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: WARNING closing tdb->fd failed!\n"));
487 	tdb->fd = open(tdb->name, tdb->open_flags & ~(O_CREAT|O_TRUNC), 0);
488 	if (tdb->fd == -1) {
489 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: open failed (%s)\n", strerror(errno)));
490 		goto fail;
491 	}
492 	if (fstat(tdb->fd, &st) != 0) {
493 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno)));
494 		goto fail;
495 	}
496 	if (st.st_ino != tdb->inode || st.st_dev != tdb->device) {
497 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n"));
498 		goto fail;
499 	}
500 	tdb_mmap(tdb);
501 #endif /* fake pread or pwrite */
502 
503 	if (active_lock &&
504 	    (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)) {
505 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: failed to obtain active lock\n"));
506 		goto fail;
507 	}
508 
509 	return 0;
510 
511 fail:
512 	tdb_close(tdb);
513 	return -1;
514 }
515 
516 /* reopen a tdb - this can be used after a fork to ensure that we have an independent
517    seek pointer from our parent and to re-establish locks */
tdb_reopen(struct tdb_context * tdb)518 int tdb_reopen(struct tdb_context *tdb)
519 {
520 	return tdb_reopen_internal(tdb, tdb->flags & TDB_CLEAR_IF_FIRST);
521 }
522 
523 /* reopen all tdb's */
tdb_reopen_all(int parent_longlived)524 int tdb_reopen_all(int parent_longlived)
525 {
526 	struct tdb_context *tdb;
527 
528 	for (tdb=tdbs; tdb; tdb = tdb->next) {
529 		bool active_lock = (tdb->flags & TDB_CLEAR_IF_FIRST);
530 
531 		/*
532 		 * If the parent is longlived (ie. a
533 		 * parent daemon architecture), we know
534 		 * it will keep it's active lock on a
535 		 * tdb opened with CLEAR_IF_FIRST. Thus
536 		 * for child processes we don't have to
537 		 * add an active lock. This is essential
538 		 * to improve performance on systems that
539 		 * keep POSIX locks as a non-scalable data
540 		 * structure in the kernel.
541 		 */
542 		if (parent_longlived) {
543 			/* Ensure no clear-if-first. */
544 			active_lock = false;
545 		}
546 
547 		if (tdb_reopen_internal(tdb, active_lock) != 0)
548 			return -1;
549 	}
550 
551 	return 0;
552 }
553