1 /*-
2  * Copyright (c) 1996, 2020 Oracle and/or its affiliates.  All rights reserved.
3  *
4  * See the file LICENSE for license information.
5  *
6  * $Id$
7  */
8 
9 #include "db_config.h"
10 
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14 #include "dbinc/hash.h"
15 #include "dbinc/mp.h"
16 
17 #ifdef HAVE_STATISTICS
18 static int __ham_stat_callback __P((DBC *, PAGE *, void *, int *));
19 
20 /*
21  * __ham_stat --
22  *	Gather/print the hash statistics
23  *
24  * PUBLIC: int __ham_stat __P((DBC *, void *, u_int32_t));
25  */
26 int
__ham_stat(dbc,spp,flags)27 __ham_stat(dbc, spp, flags)
28 	DBC *dbc;
29 	void *spp;
30 	u_int32_t flags;
31 {
32 	DB *dbp;
33 	DB_HASH_STAT *sp;
34 	DB_MPOOLFILE *mpf;
35 	ENV *env;
36 	HASH_CURSOR *hcp;
37 	PAGE *h;
38 	db_pgno_t pgno;
39 	int ret;
40 
41 	dbp = dbc->dbp;
42 	env = dbp->env;
43 
44 	mpf = dbp->mpf;
45 	sp = NULL;
46 
47 	hcp = (HASH_CURSOR *)dbc->internal;
48 
49 	if ((ret = __ham_get_meta(dbc)) != 0)
50 		goto err;
51 
52 	/* Allocate and clear the structure. */
53 	if ((ret = __os_umalloc(env, sizeof(*sp), &sp)) != 0)
54 		goto err;
55 	memset(sp, 0, sizeof(*sp));
56 	/* Copy the fields that we have. */
57 	sp->hash_nkeys = hcp->hdr->dbmeta.key_count;
58 	sp->hash_ndata = hcp->hdr->dbmeta.record_count;
59 	/*
60 	 * Don't take the page number from the meta-data page -- that value is
61 	 * only maintained in the primary database, we may have been called on
62 	 * a subdatabase.
63 	 */
64 	if ((ret = __memp_get_last_pgno(dbp->mpf, &pgno)) != 0)
65 		goto err;
66 	sp->hash_pagecnt = pgno + 1;
67 	sp->hash_pagesize = dbp->pgsize;
68 	sp->hash_buckets = hcp->hdr->max_bucket + 1;
69 	sp->hash_magic = hcp->hdr->dbmeta.magic;
70 	sp->hash_version = hcp->hdr->dbmeta.version;
71 	sp->hash_metaflags = hcp->hdr->dbmeta.flags;
72 	sp->hash_ffactor = hcp->hdr->ffactor;
73 
74 	if (flags == DB_FAST_STAT)
75 		goto done;
76 
77 	/* Walk the free list, counting pages. */
78 	for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free;
79 	    pgno != PGNO_INVALID;) {
80 		++sp->hash_free;
81 
82 		if ((ret = __memp_fget(mpf,
83 		     &pgno, dbc->thread_info, dbc->txn, 0, &h)) != 0)
84 			goto err;
85 
86 		pgno = h->next_pgno;
87 		(void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority);
88 	}
89 
90 	/* Now traverse the rest of the table. */
91 	sp->hash_nkeys = 0;
92 	sp->hash_ndata = 0;
93 	if ((ret = __ham_traverse(dbc,
94 	    DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0)
95 		goto err;
96 
97 	if (!F_ISSET(dbp, DB_AM_RDONLY)) {
98 		/*
99 		 * A transaction is not required for DB->stat, so this update
100 		 * can't safely make a copy of the meta page.  We have to
101 		 * update in place.
102 		 */
103 		if ((ret = __ham_dirty_meta(dbc,
104 		    (dbc->txn == NULL) ? DB_MPOOL_EDIT : 0)) != 0)
105 			goto err;
106 		hcp->hdr->dbmeta.key_count = sp->hash_nkeys;
107 		hcp->hdr->dbmeta.record_count = sp->hash_ndata;
108 	}
109 
110 done:	if ((ret = __ham_release_meta(dbc)) != 0)
111 		goto err;
112 
113 	*(DB_HASH_STAT **)spp = sp;
114 	return (0);
115 
116 err:	if (sp != NULL)
117 		__os_ufree(env, sp);
118 
119 	if (hcp->hdr != NULL)
120 		(void)__ham_release_meta(dbc);
121 
122 	return (ret);
123 }
124 
125 /*
126  * __ham_stat_print --
127  *	Display hash statistics.
128  *
129  * PUBLIC: int __ham_stat_print __P((DBC *, u_int32_t));
130  */
131 int
__ham_stat_print(dbc,flags)132 __ham_stat_print(dbc, flags)
133 	DBC *dbc;
134 	u_int32_t flags;
135 {
136 	DB *dbp;
137 	ENV *env;
138 	DB_HASH_STAT *sp;
139 	int lorder, ret;
140 	const char *s;
141 
142 	dbp = dbc->dbp;
143 	env = dbp->env;
144 
145 	if ((ret = __ham_stat(dbc, &sp, LF_ISSET(DB_FAST_STAT))) != 0)
146 		return (ret);
147 
148 	if (LF_ISSET(DB_STAT_ALL)) {
149 		__db_msg(env, "%s", DB_GLOBAL(db_line));
150 		__db_msg(env, "Default Hash database information:");
151 	}
152 	__db_msg(env, "%lx\tHash magic number", (u_long)sp->hash_magic);
153 	__db_msg(env,
154 	    "%lu\tHash version number", (u_long)sp->hash_version);
155 	(void)__db_get_lorder(dbp, &lorder);
156 	switch (lorder) {
157 	case 1234:
158 		s = "Little-endian";
159 		break;
160 	case 4321:
161 		s = "Big-endian";
162 		break;
163 	default:
164 		s = "Unrecognized byte order";
165 		break;
166 	}
167 	__db_msg(env, "%s\tByte order", s);
168 	__db_prflags(env,
169 	    NULL, sp->hash_metaflags, __db_get_hmeta_fn(), NULL, "\tFlags");
170 	__db_dl(env,
171 	    "Number of pages in the database", (u_long)sp->hash_pagecnt);
172 	__db_dl(env,
173 	    "Underlying database page size", (u_long)sp->hash_pagesize);
174 	__db_dl(env, "Specified fill factor", (u_long)sp->hash_ffactor);
175 	__db_dl(env,
176 	    "Number of keys in the database", (u_long)sp->hash_nkeys);
177 	__db_dl(env,
178 	    "Number of data items in the database", (u_long)sp->hash_ndata);
179 
180 	__db_dl(env, "Number of hash buckets", (u_long)sp->hash_buckets);
181 	__db_dl_pct(env, "Number of bytes free on bucket pages",
182 	    (u_long)sp->hash_bfree, DB_PCT_PG(
183 	    sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff");
184 
185 	__db_dl(env,
186 	    "Number of external files", (u_long)sp->hash_ext_files);
187 	__db_dl(env,
188 	    "Number of hash overflow (big item) pages",
189 	    (u_long)sp->hash_bigpages);
190 	__db_dl_pct(env,
191 	    "Number of bytes free in hash overflow (big item) pages",
192 	    (u_long)sp->hash_big_bfree, DB_PCT_PG(
193 	    sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff");
194 
195 	__db_dl(env,
196 	    "Number of bucket overflow pages", (u_long)sp->hash_overflows);
197 	__db_dl_pct(env,
198 	    "Number of bytes free on bucket overflow pages",
199 	    (u_long)sp->hash_ovfl_free, DB_PCT_PG(
200 	    sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff");
201 
202 	__db_dl(env, "Number of duplicate pages", (u_long)sp->hash_dup);
203 	__db_dl_pct(env, "Number of bytes free in duplicate pages",
204 	    (u_long)sp->hash_dup_free, DB_PCT_PG(
205 	    sp->hash_dup_free, sp->hash_dup, sp->hash_pagesize), "ff");
206 
207 	__db_dl(env,
208 	    "Number of pages on the free list", (u_long)sp->hash_free);
209 
210 	__os_ufree(env, sp);
211 
212 	return (0);
213 }
214 
215 static int
__ham_stat_callback(dbc,pagep,cookie,putp)216 __ham_stat_callback(dbc, pagep, cookie, putp)
217 	DBC *dbc;
218 	PAGE *pagep;
219 	void *cookie;
220 	int *putp;
221 {
222 	DB *dbp;
223 	DB_BTREE_STAT bstat;
224 	DB_HASH_STAT *sp;
225 	db_indx_t indx, len, off, tlen, top;
226 	u_int8_t *hk;
227 	int ret;
228 
229 	*putp = 0;
230 	sp = cookie;
231 	dbp = dbc->dbp;
232 
233 	switch (pagep->type) {
234 	case P_INVALID:
235 		/*
236 		 * Hash pages may be wholly zeroed;  this is not a bug.
237 		 * Obviously such pages have no data, so we can just proceed.
238 		 */
239 		break;
240 	case P_HASH_UNSORTED:
241 	case P_HASH:
242 		/*
243 		 * We count the buckets and the overflow pages
244 		 * separately and tally their bytes separately
245 		 * as well.  We need to figure out if this page
246 		 * is a bucket.
247 		 */
248 		if (PREV_PGNO(pagep) == PGNO_INVALID)
249 			sp->hash_bfree += P_FREESPACE(dbp, pagep);
250 		else {
251 			sp->hash_overflows++;
252 			sp->hash_ovfl_free += P_FREESPACE(dbp, pagep);
253 		}
254 		top = NUM_ENT(pagep);
255 		/* Correct for on-page duplicates and deleted items. */
256 		for (indx = 0; indx < top; indx += P_INDX) {
257 			switch (*H_PAIRDATA(dbp, pagep, indx)) {
258 			case H_OFFDUP:
259 				break;
260 			case H_BLOB:
261 				sp->hash_nblobs++;
262 				sp->hash_ext_files++;
263 				/* fall through */
264 			case H_OFFPAGE:
265 			case H_KEYDATA:
266 				sp->hash_ndata++;
267 				break;
268 			case H_DUPLICATE:
269 				tlen = LEN_HDATA(dbp, pagep, 0, indx);
270 				hk = H_PAIRDATA(dbp, pagep, indx);
271 				for (off = 0; off < tlen;
272 				    off += len + 2 * sizeof(db_indx_t)) {
273 					sp->hash_ndata++;
274 					memcpy(&len,
275 					    HKEYDATA_DATA(hk)
276 					    + off, sizeof(db_indx_t));
277 				}
278 				break;
279 			default:
280 				return (__db_pgfmt(dbp->env, PGNO(pagep)));
281 			}
282 		}
283 		sp->hash_nkeys += H_NUMPAIRS(pagep);
284 		break;
285 	case P_IBTREE:
286 	case P_IRECNO:
287 	case P_LBTREE:
288 	case P_LRECNO:
289 	case P_LDUP:
290 		/*
291 		 * These are all btree pages; get a correct
292 		 * cookie and call them.  Then add appropriate
293 		 * fields into our stat structure.
294 		 */
295 		memset(&bstat, 0, sizeof(bstat));
296 		if ((ret = __bam_stat_callback(dbc, pagep, &bstat, putp)) != 0)
297 			return (ret);
298 		sp->hash_dup++;
299 		sp->hash_dup_free += bstat.bt_leaf_pgfree +
300 		    bstat.bt_dup_pgfree + bstat.bt_int_pgfree;
301 		sp->hash_ndata += bstat.bt_ndata;
302 		break;
303 	case P_OVERFLOW:
304 		sp->hash_bigpages++;
305 		sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep);
306 		break;
307 	default:
308 		return (__db_pgfmt(dbp->env, PGNO(pagep)));
309 	}
310 
311 	return (0);
312 }
313 
314 /*
315  * __ham_print_cursor --
316  *	Display the current cursor.
317  *
318  * PUBLIC: void __ham_print_cursor __P((DBC *));
319  */
320 void
__ham_print_cursor(dbc)321 __ham_print_cursor(dbc)
322 	DBC *dbc;
323 {
324 	static const FN fn[] = {
325 		{ H_CONTINUE,	"H_CONTINUE" },
326 		{ H_DELETED,	"H_DELETED" },
327 		{ H_DUPONLY,	"H_DUPONLY" },
328 		{ H_EXPAND,	"H_EXPAND" },
329 		{ H_ISDUP,	"H_ISDUP" },
330 		{ H_NEXT_NODUP,	"H_NEXT_NODUP" },
331 		{ H_NOMORE,	"H_NOMORE" },
332 		{ H_OK,		"H_OK" },
333 		{ 0,		NULL }
334 	};
335 	ENV *env;
336 	HASH_CURSOR *cp;
337 
338 	env = dbc->env;
339 	cp = (HASH_CURSOR *)dbc->internal;
340 
341 	STAT_ULONG("Bucket traversing", cp->bucket);
342 	STAT_ULONG("Bucket locked", cp->lbucket);
343 	STAT_ULONG("Duplicate set offset", cp->dup_off);
344 	STAT_ULONG("Current duplicate length", cp->dup_len);
345 	STAT_ULONG("Total duplicate set length", cp->dup_tlen);
346 	STAT_ULONG("Bytes needed for add", cp->seek_size);
347 	STAT_ULONG("Page on which we can insert", cp->seek_found_page);
348 	STAT_ULONG("Order", cp->order);
349 	__db_prflags(env, NULL, cp->flags, fn, NULL, "\tInternal Flags");
350 }
351 
352 #else /* !HAVE_STATISTICS */
353 
354 int
__ham_stat(dbc,spp,flags)355 __ham_stat(dbc, spp, flags)
356 	DBC *dbc;
357 	void *spp;
358 	u_int32_t flags;
359 {
360 	COMPQUIET(spp, NULL);
361 	COMPQUIET(flags, 0);
362 
363 	return (__db_stat_not_built(dbc->env));
364 }
365 #endif
366 
367 /*
368  * __ham_traverse
369  *	 Traverse an entire hash table.  We use the callback so that we
370  * can use this both for stat collection and for deallocation.
371  *
372  * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t,
373  * PUBLIC:     int (*)(DBC *, PAGE *, void *, int *), void *, int));
374  */
375 int
__ham_traverse(dbc,mode,callback,cookie,look_past_max)376 __ham_traverse(dbc, mode, callback, cookie, look_past_max)
377 	DBC *dbc;
378 	db_lockmode_t mode;
379 	int (*callback) __P((DBC *, PAGE *, void *, int *));
380 	void *cookie;
381 	int look_past_max;
382 {
383 	DB *dbp;
384 	DBC *opd;
385 	DB_MPOOLFILE *mpf;
386 	HASH_CURSOR *hcp;
387 	HKEYDATA *hk;
388 	db_pgno_t pgno, opgno;
389 	int did_put, i, ret, t_ret;
390 	u_int32_t bucket, spares_entry;
391 
392 	dbp = dbc->dbp;
393 	opd = NULL;
394 	mpf = dbp->mpf;
395 	hcp = (HASH_CURSOR *)dbc->internal;
396 	ret = 0;
397 
398 	/*
399 	 * In a perfect world, we could simply read each page in the file
400 	 * and look at its page type to tally the information necessary.
401 	 * Unfortunately, the bucket locking that hash tables do to make
402 	 * locking easy, makes this a pain in the butt.  We have to traverse
403 	 * duplicate, overflow and big pages from the bucket so that we
404 	 * don't access anything that isn't properly locked.
405 	 *
406 	 */
407 	for (bucket = 0;; bucket++) {
408 		/*
409 		 * We put the loop exit condition check here, because
410 		 * it made for a really vile extended ?: that made SCO's
411 		 * compiler drop core.
412 		 *
413 		 * If look_past_max is not set, we can stop at max_bucket;
414 		 * if it is set, we need to include pages that are part of
415 		 * the current doubling but beyond the highest bucket we've
416 		 * split into, as well as pages from a "future" doubling
417 		 * that may have been created within an aborted
418 		 * transaction.  To do this, keep looping (and incrementing
419 		 * bucket) until the corresponding spares array entries
420 		 * cease to be defined.
421 		 */
422 		if (look_past_max) {
423 			spares_entry = __db_log2(bucket + 1);
424 			if (spares_entry >= NCACHED ||
425 			    hcp->hdr->spares[spares_entry] == 0)
426 				break;
427 		} else {
428 			if (bucket > hcp->hdr->max_bucket)
429 				break;
430 		}
431 
432 		hcp->bucket = bucket;
433 		hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket);
434 		for (ret = __ham_get_cpage(dbc, mode); ret == 0;
435 		    ret = __ham_next_cpage(dbc, pgno)) {
436 
437 			/*
438 			 * If we are cleaning up pages past the max_bucket,
439 			 * then they may be on the free list and have their
440 			 * next pointers set, but they should be ignored.  In
441 			 * fact, we really ought to just skip anybody who is
442 			 * not a valid page.
443 			 */
444 			if (TYPE(hcp->page) == P_INVALID)
445 				break;
446 			pgno = NEXT_PGNO(hcp->page);
447 
448 			/*
449 			 * Go through each item on the page checking for
450 			 * duplicates (in which case we have to count the
451 			 * duplicate pages) or big key/data items (in which
452 			 * case we have to count those pages).
453 			 */
454 			for (i = 0; i < NUM_ENT(hcp->page); i++) {
455 				hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i);
456 				switch (HPAGE_PTYPE(hk)) {
457 				case H_OFFDUP:
458 					memcpy(&opgno, HOFFDUP_PGNO(hk),
459 					    sizeof(db_pgno_t));
460 					if ((ret = __dbc_newopd(dbc,
461 					    opgno, NULL, &opd)) != 0)
462 						return (ret);
463 					if ((ret = __bam_traverse(opd,
464 					    DB_LOCK_READ, opgno,
465 					    callback, cookie))
466 					    != 0)
467 						goto err;
468 					if ((ret = __dbc_close(opd)) != 0)
469 						return (ret);
470 					opd = NULL;
471 					break;
472 				case H_OFFPAGE:
473 					/*
474 					 * We are about to get a big page
475 					 * which will use the same spot that
476 					 * the current page uses, so we need
477 					 * to restore the current page before
478 					 * looking at it again.
479 					 */
480 					memcpy(&opgno, HOFFPAGE_PGNO(hk),
481 					    sizeof(db_pgno_t));
482 					if ((ret = __db_traverse_big(dbc,
483 					    opgno, callback, cookie)) != 0)
484 						goto err;
485 					break;
486 				case H_BLOB:
487 				case H_KEYDATA:
488 				case H_DUPLICATE:
489 					break;
490 				default:
491 					ret = __db_unknown_path(
492 					    dbp->env, "__ham_traverse");
493 					goto err;
494 				}
495 			}
496 
497 			/* Call the callback on main pages. */
498 			if ((ret = callback(dbc,
499 			    hcp->page, cookie, &did_put)) != 0)
500 				goto err;
501 
502 			if (did_put)
503 				hcp->page = NULL;
504 			if (pgno == PGNO_INVALID)
505 				break;
506 		}
507 		if (ret != 0)
508 			goto err;
509 
510 		if (hcp->page != NULL) {
511 			if ((ret = __memp_fput(mpf,
512 			    dbc->thread_info, hcp->page, dbc->priority)) != 0)
513 				return (ret);
514 			hcp->page = NULL;
515 		}
516 
517 	}
518 err:	if (opd != NULL &&
519 	    (t_ret = __dbc_close(opd)) != 0 && ret == 0)
520 		ret = t_ret;
521 	return (ret);
522 }
523