1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 2013 Oracle and/or its affiliates. All rights reserved.
5 *
6 * $Id$
7 */
8
9 #include "db_config.h"
10
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14 #include "dbinc/hash.h"
15 #include "dbinc/mp.h"
16
17 #ifdef HAVE_STATISTICS
18 static int __ham_stat_callback __P((DBC *, PAGE *, void *, int *));
19
20 /*
21 * __ham_stat --
22 * Gather/print the hash statistics
23 *
24 * PUBLIC: int __ham_stat __P((DBC *, void *, u_int32_t));
25 */
26 int
__ham_stat(dbc,spp,flags)27 __ham_stat(dbc, spp, flags)
28 DBC *dbc;
29 void *spp;
30 u_int32_t flags;
31 {
32 DB *dbp;
33 DB_HASH_STAT *sp;
34 DB_MPOOLFILE *mpf;
35 ENV *env;
36 HASH_CURSOR *hcp;
37 PAGE *h;
38 db_pgno_t pgno;
39 int ret;
40
41 dbp = dbc->dbp;
42 env = dbp->env;
43
44 mpf = dbp->mpf;
45 sp = NULL;
46
47 hcp = (HASH_CURSOR *)dbc->internal;
48
49 if ((ret = __ham_get_meta(dbc)) != 0)
50 goto err;
51
52 /* Allocate and clear the structure. */
53 if ((ret = __os_umalloc(env, sizeof(*sp), &sp)) != 0)
54 goto err;
55 memset(sp, 0, sizeof(*sp));
56 /* Copy the fields that we have. */
57 sp->hash_nkeys = hcp->hdr->dbmeta.key_count;
58 sp->hash_ndata = hcp->hdr->dbmeta.record_count;
59 /*
60 * Don't take the page number from the meta-data page -- that value is
61 * only maintained in the primary database, we may have been called on
62 * a subdatabase.
63 */
64 if ((ret = __memp_get_last_pgno(dbp->mpf, &pgno)) != 0)
65 goto err;
66 sp->hash_pagecnt = pgno + 1;
67 sp->hash_pagesize = dbp->pgsize;
68 sp->hash_buckets = hcp->hdr->max_bucket + 1;
69 sp->hash_magic = hcp->hdr->dbmeta.magic;
70 sp->hash_version = hcp->hdr->dbmeta.version;
71 sp->hash_metaflags = hcp->hdr->dbmeta.flags;
72 sp->hash_ffactor = hcp->hdr->ffactor;
73
74 if (flags == DB_FAST_STAT)
75 goto done;
76
77 /* Walk the free list, counting pages. */
78 for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free;
79 pgno != PGNO_INVALID;) {
80 ++sp->hash_free;
81
82 if ((ret = __memp_fget(mpf,
83 &pgno, dbc->thread_info, dbc->txn, 0, &h)) != 0)
84 goto err;
85
86 pgno = h->next_pgno;
87 (void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority);
88 }
89
90 /* Now traverse the rest of the table. */
91 sp->hash_nkeys = 0;
92 sp->hash_ndata = 0;
93 if ((ret = __ham_traverse(dbc,
94 DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0)
95 goto err;
96
97 if (!F_ISSET(dbp, DB_AM_RDONLY)) {
98 /*
99 * A transaction is not required for DB->stat, so this update
100 * can't safely make a copy of the meta page. We have to
101 * update in place.
102 */
103 if ((ret = __ham_dirty_meta(dbc,
104 (dbc->txn == NULL) ? DB_MPOOL_EDIT : 0)) != 0)
105 goto err;
106 hcp->hdr->dbmeta.key_count = sp->hash_nkeys;
107 hcp->hdr->dbmeta.record_count = sp->hash_ndata;
108 }
109
110 done: if ((ret = __ham_release_meta(dbc)) != 0)
111 goto err;
112
113 *(DB_HASH_STAT **)spp = sp;
114 return (0);
115
116 err: if (sp != NULL)
117 __os_ufree(env, sp);
118
119 if (hcp->hdr != NULL)
120 (void)__ham_release_meta(dbc);
121
122 return (ret);
123 }
124
125 /*
126 * __ham_stat_print --
127 * Display hash statistics.
128 *
129 * PUBLIC: int __ham_stat_print __P((DBC *, u_int32_t));
130 */
131 int
__ham_stat_print(dbc,flags)132 __ham_stat_print(dbc, flags)
133 DBC *dbc;
134 u_int32_t flags;
135 {
136 static const FN fn[] = {
137 { DB_HASH_DUP, "duplicates" },
138 { DB_HASH_SUBDB, "multiple-databases" },
139 { DB_HASH_DUPSORT, "sorted duplicates" },
140 { 0, NULL }
141 };
142 DB *dbp;
143 ENV *env;
144 DB_HASH_STAT *sp;
145 int lorder, ret;
146 const char *s;
147
148 dbp = dbc->dbp;
149 env = dbp->env;
150
151 if ((ret = __ham_stat(dbc, &sp, LF_ISSET(DB_FAST_STAT))) != 0)
152 return (ret);
153
154 if (LF_ISSET(DB_STAT_ALL)) {
155 __db_msg(env, "%s", DB_GLOBAL(db_line));
156 __db_msg(env, "Default Hash database information:");
157 }
158 __db_msg(env, "%lx\tHash magic number", (u_long)sp->hash_magic);
159 __db_msg(env,
160 "%lu\tHash version number", (u_long)sp->hash_version);
161 (void)__db_get_lorder(dbp, &lorder);
162 switch (lorder) {
163 case 1234:
164 s = "Little-endian";
165 break;
166 case 4321:
167 s = "Big-endian";
168 break;
169 default:
170 s = "Unrecognized byte order";
171 break;
172 }
173 __db_msg(env, "%s\tByte order", s);
174 __db_prflags(env, NULL, sp->hash_metaflags, fn, NULL, "\tFlags");
175 __db_dl(env,
176 "Number of pages in the database", (u_long)sp->hash_pagecnt);
177 __db_dl(env,
178 "Underlying database page size", (u_long)sp->hash_pagesize);
179 __db_dl(env, "Specified fill factor", (u_long)sp->hash_ffactor);
180 __db_dl(env,
181 "Number of keys in the database", (u_long)sp->hash_nkeys);
182 __db_dl(env,
183 "Number of data items in the database", (u_long)sp->hash_ndata);
184
185 __db_dl(env, "Number of hash buckets", (u_long)sp->hash_buckets);
186 __db_dl_pct(env, "Number of bytes free on bucket pages",
187 (u_long)sp->hash_bfree, DB_PCT_PG(
188 sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff");
189
190 __db_dl(env,
191 "Number of overflow pages", (u_long)sp->hash_bigpages);
192 __db_dl_pct(env, "Number of bytes free in overflow pages",
193 (u_long)sp->hash_big_bfree, DB_PCT_PG(
194 sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff");
195
196 __db_dl(env,
197 "Number of bucket overflow pages", (u_long)sp->hash_overflows);
198 __db_dl_pct(env,
199 "Number of bytes free in bucket overflow pages",
200 (u_long)sp->hash_ovfl_free, DB_PCT_PG(
201 sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff");
202
203 __db_dl(env, "Number of duplicate pages", (u_long)sp->hash_dup);
204 __db_dl_pct(env, "Number of bytes free in duplicate pages",
205 (u_long)sp->hash_dup_free, DB_PCT_PG(
206 sp->hash_dup_free, sp->hash_dup, sp->hash_pagesize), "ff");
207
208 __db_dl(env,
209 "Number of pages on the free list", (u_long)sp->hash_free);
210
211 __os_ufree(env, sp);
212
213 return (0);
214 }
215
216 static int
__ham_stat_callback(dbc,pagep,cookie,putp)217 __ham_stat_callback(dbc, pagep, cookie, putp)
218 DBC *dbc;
219 PAGE *pagep;
220 void *cookie;
221 int *putp;
222 {
223 DB *dbp;
224 DB_BTREE_STAT bstat;
225 DB_HASH_STAT *sp;
226 db_indx_t indx, len, off, tlen, top;
227 u_int8_t *hk;
228 int ret;
229
230 *putp = 0;
231 sp = cookie;
232 dbp = dbc->dbp;
233
234 switch (pagep->type) {
235 case P_INVALID:
236 /*
237 * Hash pages may be wholly zeroed; this is not a bug.
238 * Obviously such pages have no data, so we can just proceed.
239 */
240 break;
241 case P_HASH_UNSORTED:
242 case P_HASH:
243 /*
244 * We count the buckets and the overflow pages
245 * separately and tally their bytes separately
246 * as well. We need to figure out if this page
247 * is a bucket.
248 */
249 if (PREV_PGNO(pagep) == PGNO_INVALID)
250 sp->hash_bfree += P_FREESPACE(dbp, pagep);
251 else {
252 sp->hash_overflows++;
253 sp->hash_ovfl_free += P_FREESPACE(dbp, pagep);
254 }
255 top = NUM_ENT(pagep);
256 /* Correct for on-page duplicates and deleted items. */
257 for (indx = 0; indx < top; indx += P_INDX) {
258 switch (*H_PAIRDATA(dbp, pagep, indx)) {
259 case H_OFFDUP:
260 break;
261 case H_OFFPAGE:
262 case H_KEYDATA:
263 sp->hash_ndata++;
264 break;
265 case H_DUPLICATE:
266 tlen = LEN_HDATA(dbp, pagep, 0, indx);
267 hk = H_PAIRDATA(dbp, pagep, indx);
268 for (off = 0; off < tlen;
269 off += len + 2 * sizeof(db_indx_t)) {
270 sp->hash_ndata++;
271 memcpy(&len,
272 HKEYDATA_DATA(hk)
273 + off, sizeof(db_indx_t));
274 }
275 break;
276 default:
277 return (__db_pgfmt(dbp->env, PGNO(pagep)));
278 }
279 }
280 sp->hash_nkeys += H_NUMPAIRS(pagep);
281 break;
282 case P_IBTREE:
283 case P_IRECNO:
284 case P_LBTREE:
285 case P_LRECNO:
286 case P_LDUP:
287 /*
288 * These are all btree pages; get a correct
289 * cookie and call them. Then add appropriate
290 * fields into our stat structure.
291 */
292 memset(&bstat, 0, sizeof(bstat));
293 if ((ret = __bam_stat_callback(dbc, pagep, &bstat, putp)) != 0)
294 return (ret);
295 sp->hash_dup++;
296 sp->hash_dup_free += bstat.bt_leaf_pgfree +
297 bstat.bt_dup_pgfree + bstat.bt_int_pgfree;
298 sp->hash_ndata += bstat.bt_ndata;
299 break;
300 case P_OVERFLOW:
301 sp->hash_bigpages++;
302 sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep);
303 break;
304 default:
305 return (__db_pgfmt(dbp->env, PGNO(pagep)));
306 }
307
308 return (0);
309 }
310
311 /*
312 * __ham_print_cursor --
313 * Display the current cursor.
314 *
315 * PUBLIC: void __ham_print_cursor __P((DBC *));
316 */
317 void
__ham_print_cursor(dbc)318 __ham_print_cursor(dbc)
319 DBC *dbc;
320 {
321 static const FN fn[] = {
322 { H_CONTINUE, "H_CONTINUE" },
323 { H_DELETED, "H_DELETED" },
324 { H_DUPONLY, "H_DUPONLY" },
325 { H_EXPAND, "H_EXPAND" },
326 { H_ISDUP, "H_ISDUP" },
327 { H_NEXT_NODUP, "H_NEXT_NODUP" },
328 { H_NOMORE, "H_NOMORE" },
329 { H_OK, "H_OK" },
330 { 0, NULL }
331 };
332 ENV *env;
333 HASH_CURSOR *cp;
334
335 env = dbc->env;
336 cp = (HASH_CURSOR *)dbc->internal;
337
338 STAT_ULONG("Bucket traversing", cp->bucket);
339 STAT_ULONG("Bucket locked", cp->lbucket);
340 STAT_ULONG("Duplicate set offset", cp->dup_off);
341 STAT_ULONG("Current duplicate length", cp->dup_len);
342 STAT_ULONG("Total duplicate set length", cp->dup_tlen);
343 STAT_ULONG("Bytes needed for add", cp->seek_size);
344 STAT_ULONG("Page on which we can insert", cp->seek_found_page);
345 STAT_ULONG("Order", cp->order);
346 __db_prflags(env, NULL, cp->flags, fn, NULL, "\tInternal Flags");
347 }
348
349 #else /* !HAVE_STATISTICS */
350
351 int
__ham_stat(dbc,spp,flags)352 __ham_stat(dbc, spp, flags)
353 DBC *dbc;
354 void *spp;
355 u_int32_t flags;
356 {
357 COMPQUIET(spp, NULL);
358 COMPQUIET(flags, 0);
359
360 return (__db_stat_not_built(dbc->env));
361 }
362 #endif
363
364 /*
365 * __ham_traverse
366 * Traverse an entire hash table. We use the callback so that we
367 * can use this both for stat collection and for deallocation.
368 *
369 * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t,
370 * PUBLIC: int (*)(DBC *, PAGE *, void *, int *), void *, int));
371 */
372 int
__ham_traverse(dbc,mode,callback,cookie,look_past_max)373 __ham_traverse(dbc, mode, callback, cookie, look_past_max)
374 DBC *dbc;
375 db_lockmode_t mode;
376 int (*callback) __P((DBC *, PAGE *, void *, int *));
377 void *cookie;
378 int look_past_max;
379 {
380 DB *dbp;
381 DBC *opd;
382 DB_MPOOLFILE *mpf;
383 HASH_CURSOR *hcp;
384 HKEYDATA *hk;
385 db_pgno_t pgno, opgno;
386 int did_put, i, ret, t_ret;
387 u_int32_t bucket, spares_entry;
388
389 dbp = dbc->dbp;
390 opd = NULL;
391 mpf = dbp->mpf;
392 hcp = (HASH_CURSOR *)dbc->internal;
393 ret = 0;
394
395 /*
396 * In a perfect world, we could simply read each page in the file
397 * and look at its page type to tally the information necessary.
398 * Unfortunately, the bucket locking that hash tables do to make
399 * locking easy, makes this a pain in the butt. We have to traverse
400 * duplicate, overflow and big pages from the bucket so that we
401 * don't access anything that isn't properly locked.
402 *
403 */
404 for (bucket = 0;; bucket++) {
405 /*
406 * We put the loop exit condition check here, because
407 * it made for a really vile extended ?: that made SCO's
408 * compiler drop core.
409 *
410 * If look_past_max is not set, we can stop at max_bucket;
411 * if it is set, we need to include pages that are part of
412 * the current doubling but beyond the highest bucket we've
413 * split into, as well as pages from a "future" doubling
414 * that may have been created within an aborted
415 * transaction. To do this, keep looping (and incrementing
416 * bucket) until the corresponding spares array entries
417 * cease to be defined.
418 */
419 if (look_past_max) {
420 spares_entry = __db_log2(bucket + 1);
421 if (spares_entry >= NCACHED ||
422 hcp->hdr->spares[spares_entry] == 0)
423 break;
424 } else {
425 if (bucket > hcp->hdr->max_bucket)
426 break;
427 }
428
429 hcp->bucket = bucket;
430 hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket);
431 for (ret = __ham_get_cpage(dbc, mode); ret == 0;
432 ret = __ham_next_cpage(dbc, pgno)) {
433
434 /*
435 * If we are cleaning up pages past the max_bucket,
436 * then they may be on the free list and have their
437 * next pointers set, but they should be ignored. In
438 * fact, we really ought to just skip anybody who is
439 * not a valid page.
440 */
441 if (TYPE(hcp->page) == P_INVALID)
442 break;
443 pgno = NEXT_PGNO(hcp->page);
444
445 /*
446 * Go through each item on the page checking for
447 * duplicates (in which case we have to count the
448 * duplicate pages) or big key/data items (in which
449 * case we have to count those pages).
450 */
451 for (i = 0; i < NUM_ENT(hcp->page); i++) {
452 hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i);
453 switch (HPAGE_PTYPE(hk)) {
454 case H_OFFDUP:
455 memcpy(&opgno, HOFFDUP_PGNO(hk),
456 sizeof(db_pgno_t));
457 if ((ret = __dbc_newopd(dbc,
458 opgno, NULL, &opd)) != 0)
459 return (ret);
460 if ((ret = __bam_traverse(opd,
461 DB_LOCK_READ, opgno,
462 callback, cookie))
463 != 0)
464 goto err;
465 if ((ret = __dbc_close(opd)) != 0)
466 return (ret);
467 opd = NULL;
468 break;
469 case H_OFFPAGE:
470 /*
471 * We are about to get a big page
472 * which will use the same spot that
473 * the current page uses, so we need
474 * to restore the current page before
475 * looking at it again.
476 */
477 memcpy(&opgno, HOFFPAGE_PGNO(hk),
478 sizeof(db_pgno_t));
479 if ((ret = __db_traverse_big(dbc,
480 opgno, callback, cookie)) != 0)
481 goto err;
482 break;
483 case H_KEYDATA:
484 case H_DUPLICATE:
485 break;
486 default:
487 ret = __db_unknown_path(
488 dbp->env, "__ham_traverse");
489 goto err;
490 }
491 }
492
493 /* Call the callback on main pages. */
494 if ((ret = callback(dbc,
495 hcp->page, cookie, &did_put)) != 0)
496 goto err;
497
498 if (did_put)
499 hcp->page = NULL;
500 if (pgno == PGNO_INVALID)
501 break;
502 }
503 if (ret != 0)
504 goto err;
505
506 if (hcp->page != NULL) {
507 if ((ret = __memp_fput(mpf,
508 dbc->thread_info, hcp->page, dbc->priority)) != 0)
509 return (ret);
510 hcp->page = NULL;
511 }
512
513 }
514 err: if (opd != NULL &&
515 (t_ret = __dbc_close(opd)) != 0 && ret == 0)
516 ret = t_ret;
517 return (ret);
518 }
519