1  /*
2    Unix SMB/CIFS implementation.
3 
4    trivial database library
5 
6    Copyright (C) Rusty Russell		   2009
7 
8      ** NOTE! The following LGPL license applies to the tdb
9      ** library. This does NOT imply that all of Samba is released
10      ** under the LGPL
11 
12    This library is free software; you can redistribute it and/or
13    modify it under the terms of the GNU Lesser General Public
14    License as published by the Free Software Foundation; either
15    version 3 of the License, or (at your option) any later version.
16 
17    This library is distributed in the hope that it will be useful,
18    but WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    Lesser General Public License for more details.
21 
22    You should have received a copy of the GNU Lesser General Public
23    License along with this library; if not, see <http://www.gnu.org/licenses/>.
24 */
25 #include "tdb_private.h"
26 
27 /* Since we opened it, these shouldn't fail unless it's recent corruption. */
28 static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
29 {
30 	struct tdb_header hdr;
31 	uint32_t h1, h2;
32 
33 	if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), 0) == -1)
34 		return false;
35 	if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
36 		goto corrupt;
37 
38 	CONVERT(hdr);
39 	if (hdr.version != TDB_VERSION)
40 		goto corrupt;
41 
42 	if (hdr.rwlocks != 0 &&
43 	    hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC &&
44 	    hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC)
45 		goto corrupt;
46 
47 	tdb_header_hash(tdb, &h1, &h2);
48 	if (hdr.magic1_hash && hdr.magic2_hash &&
49 	    (hdr.magic1_hash != h1 || hdr.magic2_hash != h2))
50 		goto corrupt;
51 
52 	if (hdr.hash_size == 0)
53 		goto corrupt;
54 
55 	if (hdr.hash_size != tdb->hash_size)
56 		goto corrupt;
57 
58 	if (hdr.recovery_start != 0 &&
59 	    hdr.recovery_start < TDB_DATA_START(tdb->hash_size))
60 		goto corrupt;
61 
62 	*recovery = hdr.recovery_start;
63 	return true;
64 
65 corrupt:
66 	tdb->ecode = TDB_ERR_CORRUPT;
67 	TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
68 	return false;
69 }
70 
71 /* Generic record header check. */
72 static bool tdb_check_record(struct tdb_context *tdb,
73 			     tdb_off_t off,
74 			     const struct tdb_record *rec)
75 {
76 	tdb_off_t tailer;
77 
78 	/* Check rec->next: 0 or points to record offset, aligned. */
79 	if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->hash_size)){
80 		TDB_LOG((tdb, TDB_DEBUG_ERROR,
81 			 "Record offset %u too small next %u\n",
82 			 off, rec->next));
83 		goto corrupt;
84 	}
85 	if (rec->next + sizeof(*rec) < rec->next) {
_add_wrapper(name)86 		TDB_LOG((tdb, TDB_DEBUG_ERROR,
87 			 "Record offset %u too large next %u\n",
88 			 off, rec->next));
wrapper(self, *args, **kwargs)89 		goto corrupt;
90 	}
91 	if ((rec->next % TDB_ALIGNMENT) != 0) {
92 		TDB_LOG((tdb, TDB_DEBUG_ERROR,
93 			 "Record offset %u misaligned next %u\n",
94 			 off, rec->next));
95 		goto corrupt;
96 	}
97 	if (tdb_oob(tdb, rec->next, sizeof(*rec), 0))
98 		goto corrupt;
99 
100 	/* Check rec_len: similar to rec->next, implies next record. */
101 	if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
102 		TDB_LOG((tdb, TDB_DEBUG_ERROR,
103 			 "Record offset %u misaligned length %u\n",
104 			 off, rec->rec_len));
105 		goto corrupt;
106 	}
107 	/* Must fit tailer. */
108 	if (rec->rec_len < sizeof(tailer)) {
109 		TDB_LOG((tdb, TDB_DEBUG_ERROR,
110 			 "Record offset %u too short length %u\n",
111 			 off, rec->rec_len));
112 		goto corrupt;
113 	}
114 	/* OOB allows "right at the end" access, so this works for last rec. */
115 	if (tdb_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0))
_add_getter(name)116 		goto corrupt;
117 
118 	/* Check tailer. */
119 	if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
getter(self)120 			 &tailer) == -1)
121 		goto corrupt;
122 	if (tailer != sizeof(*rec) + rec->rec_len) {
setter(self, value)123 		TDB_LOG((tdb, TDB_DEBUG_ERROR,
124 			 "Record offset %u invalid tailer\n", off));
125 		goto corrupt;
126 	}
127 
128 	return true;
129 
130 corrupt:
131 	tdb->ecode = TDB_ERR_CORRUPT;
132 	return false;
133 }
134 
135 /* Grab some bytes: may copy if can't use mmap.
136    Caller has already done bounds check. */
137 static TDB_DATA get_bytes(struct tdb_context *tdb,
138 			  tdb_off_t off, tdb_len_t len)
139 {
140 	TDB_DATA d;
141 
142 	d.dsize = len;
143 
144 	if (tdb->transaction == NULL && tdb->map_ptr != NULL)
145 		d.dptr = (unsigned char *)tdb->map_ptr + off;
146 	else
147 		d.dptr = tdb_alloc_read(tdb, off, d.dsize);
148 	return d;
149 }
150 
151 /* Frees data if we're not able to simply use mmap. */
152 static void put_bytes(struct tdb_context *tdb, TDB_DATA d)
153 {
154 	if (tdb->transaction == NULL && tdb->map_ptr != NULL)
155 		return;
156 	free(d.dptr);
157 }
158 
159 /* We use the excellent Jenkins lookup3 hash; this is based on hash_word2.
160  * See: http://burtleburtle.net/bob/c/lookup3.c
161  */
162 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
163 static void hash(uint32_t key, uint32_t *pc, uint32_t *pb)
164 {
165 	uint32_t a,b,c;
166 
167 	/* Set up the internal state */
168 	a = b = c = 0xdeadbeef + *pc;
169 	c += *pb;
170 	a += key;
171 	c ^= b; c -= rot(b,14);
172 	a ^= c; a -= rot(c,11);
173 	b ^= a; b -= rot(a,25);
174 	c ^= b; c -= rot(b,16);
175 	a ^= c; a -= rot(c,4);
176 	b ^= a; b -= rot(a,14);
177 	c ^= b; c -= rot(b,24);
178 	*pc=c; *pb=b;
179 }
180 
181 /*
182   We want to check that all free records are in the free list
183   (only once), and all free list entries are free records.  Similarly
184   for each hash chain of used records.
185 
186   Doing that naively (without walking hash chains, since we want to be
187   linear) means keeping a list of records which have been seen in each
188   hash chain, and another of records pointed to (ie. next pointers
189   from records and the initial hash chain heads).  These two lists
190   should be equal.  This will take 8 bytes per record, and require
191   sorting at the end.
192 
193   So instead, we record each offset in a bitmap such a way that
194   recording it twice will cancel out.  Since each offset should appear
195   exactly twice, the bitmap should be zero at the end.
196 
197   The approach was inspired by Bloom Filters (see Wikipedia).  For
198   each value, we flip K bits in a bitmap of size N.  The number of
199   distinct arrangements is:
200 
201 	N! / (K! * (N-K)!)
202 
203   Of course, not all arrangements are actually distinct, but testing
204   shows this formula to be close enough.
205 
206   So, if K == 8 and N == 256, the probability of two things flipping the same
207   bits is 1 in 409,663,695,276,000.
208 
209   Given that ldb uses a hash size of 10000, using 32 bytes per hash chain
210   (320k) seems reasonable.
211 */
212 #define NUM_HASHES 8
213 #define BITMAP_BITS 256
214 
215 static void bit_flip(unsigned char bits[], unsigned int idx)
216 {
217 	bits[idx / CHAR_BIT] ^= (1 << (idx % CHAR_BIT));
218 }
219 
220 /* We record offsets in a bitmap for the particular chain it should be in.  */
221 static void record_offset(unsigned char bits[], tdb_off_t off)
222 {
223 	uint32_t h1 = off, h2 = 0;
224 	unsigned int i;
225 
226 	/* We get two good hash values out of jhash2, so we use both.  Then
227 	 * we keep going to produce further hash values. */
228 	for (i = 0; i < NUM_HASHES / 2; i++) {
229 		hash(off, &h1, &h2);
230 		bit_flip(bits, h1 % BITMAP_BITS);
231 		bit_flip(bits, h2 % BITMAP_BITS);
232 		h2++;
233 	}
234 }
235 
236 /* Check that an in-use record is valid. */
237 static bool tdb_check_used_record(struct tdb_context *tdb,
238 				  tdb_off_t off,
239 				  const struct tdb_record *rec,
240 				  unsigned char **hashes,
241 				  int (*check)(TDB_DATA, TDB_DATA, void *),
242 				  void *private_data)
243 {
244 	TDB_DATA key, data;
245 	tdb_len_t len;
246 
247 	if (!tdb_check_record(tdb, off, rec))
248 		return false;
249 
250 	/* key + data + tailer must fit in record */
251 	len = rec->key_len;
252 	len += rec->data_len;
253 	if (len < rec->data_len) {
254 		/* overflow */
255 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "Record lengths overflow\n"));
256 		return false;
257 	}
258 	len += sizeof(tdb_off_t);
259 	if (len < sizeof(tdb_off_t)) {
260 		/* overflow */
261 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "Record lengths overflow\n"));
262 		return false;
263 	}
264 
265 	if (len > rec->rec_len) {
266 		TDB_LOG((tdb, TDB_DEBUG_ERROR,
267 			 "Record offset %u too short for contents\n", off));
268 		return false;
269 	}
270 
271 	key = get_bytes(tdb, off + sizeof(*rec), rec->key_len);
272 	if (!key.dptr)
273 		return false;
274 
275 	if (tdb->hash_fn(&key) != rec->full_hash) {
276 		TDB_LOG((tdb, TDB_DEBUG_ERROR,
277 			 "Record offset %u has incorrect hash\n", off));
278 		goto fail_put_key;
279 	}
280 
281 	/* Mark this offset as a known value for this hash bucket. */
282 	record_offset(hashes[BUCKET(rec->full_hash)+1], off);
283 	/* And similarly if the next pointer is valid. */
284 	if (rec->next)
285 		record_offset(hashes[BUCKET(rec->full_hash)+1], rec->next);
286 
287 	/* If they supply a check function and this record isn't dead,
288 	   get data and feed it. */
289 	if (check && rec->magic != TDB_DEAD_MAGIC) {
290 		data = get_bytes(tdb, off + sizeof(*rec) + rec->key_len,
291 				 rec->data_len);
292 		if (!data.dptr)
293 			goto fail_put_key;
294 
295 		if (check(key, data, private_data) == -1)
296 			goto fail_put_data;
297 		put_bytes(tdb, data);
298 	}
299 
300 	put_bytes(tdb, key);
301 	return true;
302 
303 fail_put_data:
304 	put_bytes(tdb, data);
305 fail_put_key:
306 	put_bytes(tdb, key);
307 	return false;
308 }
309 
310 /* Check that an unused record is valid. */
311 static bool tdb_check_free_record(struct tdb_context *tdb,
312 				  tdb_off_t off,
313 				  const struct tdb_record *rec,
314 				  unsigned char **hashes)
315 {
316 	if (!tdb_check_record(tdb, off, rec))
317 		return false;
318 
319 	/* Mark this offset as a known value for the free list. */
320 	record_offset(hashes[0], off);
321 	/* And similarly if the next pointer is valid. */
322 	if (rec->next)
323 		record_offset(hashes[0], rec->next);
324 	return true;
325 }
326 
327 /* Slow, but should be very rare. */
328 size_t tdb_dead_space(struct tdb_context *tdb, tdb_off_t off)
329 {
330 	size_t len;
331 
332 	for (len = 0; off + len < tdb->map_size; len++) {
333 		char c;
334 		if (tdb->methods->tdb_read(tdb, off, &c, 1, 0))
335 			return 0;
336 		if (c != 0 && c != 0x42)
337 			break;
338 	}
339 	return len;
340 }
341 
342 _PUBLIC_ int tdb_check(struct tdb_context *tdb,
343 	      int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
344 	      void *private_data)
345 {
346 	unsigned int h;
347 	unsigned char **hashes;
348 	tdb_off_t off, recovery_start;
349 	struct tdb_record rec;
350 	bool found_recovery = false;
351 	tdb_len_t dead;
352 	bool locked;
353 
354 	/* Read-only databases use no locking at all: it's best-effort.
355 	 * We may have a write lock already, so skip that case too. */
356 	if (tdb->read_only || tdb->allrecord_lock.count != 0) {
357 		locked = false;
358 	} else {
359 		if (tdb_lockall_read(tdb) == -1)
360 			return -1;
361 		locked = true;
362 	}
363 
364 	/* Make sure we know true size of the underlying file. */
365 	tdb_oob(tdb, tdb->map_size, 1, 1);
366 
367 	/* Header must be OK: also gets us the recovery ptr, if any. */
368 	if (!tdb_check_header(tdb, &recovery_start))
369 		goto unlock;
370 
371 	/* We should have the whole header, too. */
372 	if (tdb->map_size < TDB_DATA_START(tdb->hash_size)) {
373 		tdb->ecode = TDB_ERR_CORRUPT;
374 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
375 		goto unlock;
376 	}
377 
378 	/* One big malloc: pointers then bit arrays. */
379 	hashes = (unsigned char **)calloc(
380 			1, sizeof(hashes[0]) * (1+tdb->hash_size)
381 			+ BITMAP_BITS / CHAR_BIT * (1+tdb->hash_size));
382 	if (!hashes) {
383 		tdb->ecode = TDB_ERR_OOM;
384 		goto unlock;
385 	}
386 
387 	/* Initialize pointers */
388 	hashes[0] = (unsigned char *)(&hashes[1+tdb->hash_size]);
389 	for (h = 1; h < 1+tdb->hash_size; h++)
390 		hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
391 
392 	/* Freelist and hash headers are all in a row: read them. */
393 	for (h = 0; h < 1+tdb->hash_size; h++) {
394 		if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
395 				 &off) == -1)
396 			goto free;
397 		if (off)
398 			record_offset(hashes[h], off);
399 	}
400 
401 	/* For each record, read it in and check it's ok. */
402 	for (off = TDB_DATA_START(tdb->hash_size);
403 	     off < tdb->map_size;
404 	     off += sizeof(rec) + rec.rec_len) {
405 		if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
406 					   DOCONV()) == -1)
407 			goto free;
408 		switch (rec.magic) {
409 		case TDB_MAGIC:
410 		case TDB_DEAD_MAGIC:
411 			if (!tdb_check_used_record(tdb, off, &rec, hashes,
412 						   check, private_data))
413 				goto free;
414 			break;
415 		case TDB_FREE_MAGIC:
416 			if (!tdb_check_free_record(tdb, off, &rec, hashes))
417 				goto free;
418 			break;
419 		/* If we crash after ftruncate, we can get zeroes or fill. */
420 		case TDB_RECOVERY_INVALID_MAGIC:
421 		case 0x42424242:
422 			if (recovery_start == off) {
423 				found_recovery = true;
424 				break;
425 			}
426 			dead = tdb_dead_space(tdb, off);
427 			if (dead < sizeof(rec))
428 				goto corrupt;
429 
430 			TDB_LOG((tdb, TDB_DEBUG_ERROR,
431 				 "Dead space at %u-%u (of %u)\n",
432 				 off, off + dead, tdb->map_size));
433 			rec.rec_len = dead - sizeof(rec);
434 			break;
435 		case TDB_RECOVERY_MAGIC:
436 			if (recovery_start != off) {
437 				TDB_LOG((tdb, TDB_DEBUG_ERROR,
438 					 "Unexpected recovery record at offset %u\n",
439 					 off));
440 				goto free;
441 			}
442 			found_recovery = true;
443 			break;
444 		default: ;
445 		corrupt:
446 			tdb->ecode = TDB_ERR_CORRUPT;
447 			TDB_LOG((tdb, TDB_DEBUG_ERROR,
448 				 "Bad magic 0x%x at offset %u\n",
449 				 rec.magic, off));
450 			goto free;
451 		}
452 	}
453 
454 	/* Now, hashes should all be empty: each record exists and is referred
455 	 * to by one other. */
456 	for (h = 0; h < 1+tdb->hash_size; h++) {
457 		unsigned int i;
458 		for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
459 			if (hashes[h][i] != 0) {
460 				tdb->ecode = TDB_ERR_CORRUPT;
461 				TDB_LOG((tdb, TDB_DEBUG_ERROR,
462 					 "Hashes do not match records\n"));
463 				goto free;
464 			}
465 		}
466 	}
467 
468 	/* We must have found recovery area if there was one. */
469 	if (recovery_start != 0 && !found_recovery) {
470 		TDB_LOG((tdb, TDB_DEBUG_ERROR,
471 			 "Expected a recovery area at %u\n",
472 			 recovery_start));
473 		goto free;
474 	}
475 
476 	free(hashes);
477 	if (locked) {
478 		tdb_unlockall_read(tdb);
479 	}
480 	return 0;
481 
482 free:
483 	free(hashes);
484 unlock:
485 	if (locked) {
486 		tdb_unlockall_read(tdb);
487 	}
488 	return -1;
489 }
490