1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 #include "wt_internal.h"
10
11 /*
12 * __ref_index_slot --
13 * Return the page's index and slot for a reference.
14 */
15 static inline void
__ref_index_slot(WT_SESSION_IMPL * session,WT_REF * ref,WT_PAGE_INDEX ** pindexp,uint32_t * slotp)16 __ref_index_slot(WT_SESSION_IMPL *session,
17 WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
18 {
19 WT_PAGE_INDEX *pindex;
20 WT_REF **start, **stop, **p, **t;
21 uint64_t sleep_usecs, yield_count;
22 uint32_t entries, slot;
23
24 /*
25 * If we don't find our reference, the page split and our home
26 * pointer references the wrong page. When internal pages
27 * split, their WT_REF structure home values are updated; yield
28 * and wait for that to happen.
29 */
30 for (sleep_usecs = yield_count = 0;;) {
31 /*
32 * Copy the parent page's index value: the page can split at
33 * any time, but the index's value is always valid, even if
34 * it's not up-to-date.
35 */
36 WT_INTL_INDEX_GET(session, ref->home, pindex);
37 entries = pindex->entries;
38
39 /*
40 * Use the page's reference hint: it should be correct unless
41 * there was a split or delete in the parent before our slot.
42 * If the hint is wrong, it can be either too big or too small,
43 * but often only by a small amount. Search up and down the
44 * index starting from the hint.
45 *
46 * It's not an error for the reference hint to be wrong, it
47 * just means the first retrieval (which sets the hint for
48 * subsequent retrievals), is slower.
49 */
50 slot = ref->pindex_hint;
51 if (slot >= entries)
52 slot = entries - 1;
53 if (pindex->index[slot] == ref)
54 goto found;
55 for (start = &pindex->index[0],
56 stop = &pindex->index[entries - 1],
57 p = t = &pindex->index[slot];
58 p > start || t < stop;) {
59 if (p > start && *--p == ref) {
60 slot = (uint32_t)(p - start);
61 goto found;
62 }
63 if (t < stop && *++t == ref) {
64 slot = (uint32_t)(t - start);
65 goto found;
66 }
67 }
68 /*
69 * We failed to get the page index and slot reference, yield
70 * before retrying, and if we've yielded enough times, start
71 * sleeping so we don't burn CPU to no purpose.
72 */
73 __wt_spin_backoff(&yield_count, &sleep_usecs);
74 WT_STAT_CONN_INCRV(session, page_index_slot_ref_blocked,
75 sleep_usecs);
76 }
77
78 found: WT_ASSERT(session, pindex->index[slot] == ref);
79 *pindexp = pindex;
80 *slotp = slot;
81 }
82
83 /*
84 * __ref_is_leaf --
85 * Check if a reference is for a leaf page.
86 */
87 static inline bool
__ref_is_leaf(WT_REF * ref)88 __ref_is_leaf(WT_REF *ref)
89 {
90 size_t addr_size;
91 const uint8_t *addr;
92 u_int type;
93
94 /*
95 * If the page has a disk address, we can crack it to figure out if
96 * this page is a leaf page or not. If there's no address, the page
97 * isn't on disk and we don't know the page type.
98 */
99 __wt_ref_info(ref, &addr, &addr_size, &type);
100 return (addr == NULL ?
101 false : type == WT_CELL_ADDR_LEAF || type == WT_CELL_ADDR_LEAF_NO);
102 }
103
104 /*
105 * __ref_ascend --
106 * Ascend the tree one level.
107 */
108 static inline void
__ref_ascend(WT_SESSION_IMPL * session,WT_REF ** refp,WT_PAGE_INDEX ** pindexp,uint32_t * slotp)109 __ref_ascend(WT_SESSION_IMPL *session,
110 WT_REF **refp, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
111 {
112 WT_REF *parent_ref, *ref;
113
114 /*
115 * Ref points to the first/last slot on an internal page from which we
116 * are ascending the tree, moving to the parent page. This is tricky
117 * because the internal page we're on may be splitting into its parent.
118 * Find a stable configuration where the page we start from and the
119 * page we're moving to are connected. The tree eventually stabilizes
120 * into that configuration, keep trying until we succeed.
121 */
122 for (ref = *refp;;) {
123 /*
124 * Find our parent slot on the next higher internal page, the
125 * slot from which we move to a next/prev slot, checking that
126 * we haven't reached the root.
127 */
128 parent_ref = ref->home->pg_intl_parent_ref;
129 if (__wt_ref_is_root(parent_ref))
130 break;
131 __ref_index_slot(session, parent_ref, pindexp, slotp);
132
133 /*
134 * There's a split race when a cursor moving forwards through
135 * the tree ascends the tree. If we're splitting an internal
136 * page into its parent, we move the WT_REF structures and
137 * then update the parent's page index before updating the split
138 * page's page index, and it's not an atomic update. A thread
139 * can read the split page's original page index and then read
140 * the parent page's replacement index.
141 *
142 * This can create a race for next-cursor movements.
143 *
144 * For example, imagine an internal page with 3 child pages,
145 * with the namespaces a-f, g-h and i-j; the first child page
146 * splits. The parent starts out with the following page-index:
147 *
148 * | ... | a | g | i | ... |
149 *
150 * which changes to this:
151 *
152 * | ... | a | c | e | g | i | ... |
153 *
154 * The split page starts out with the following page-index:
155 *
156 * | a | b | c | d | e | f |
157 *
158 * Imagine a cursor finishing the 'f' part of the namespace that
159 * starts its ascent to the parent's 'a' slot. Then the page
160 * splits and the parent page's page index is replaced. If the
161 * cursor then searches the parent's replacement page index for
162 * the 'a' slot, it finds it and then increments to the slot
163 * after the 'a' slot, the 'c' slot, and then it incorrectly
164 * repeats its traversal of part of the namespace.
165 *
166 * This function takes a WT_REF argument which is the page from
167 * which we start our ascent. If the parent's slot we find in
168 * our search doesn't point to the same page as that initial
169 * WT_REF, there's a race and we start over again.
170 */
171 if (ref->home == parent_ref->page)
172 break;
173 }
174
175 *refp = parent_ref;
176 }
177
178 /*
179 * __split_prev_race --
180 * Check for races when descending the tree during a previous-cursor walk.
181 */
182 static inline bool
__split_prev_race(WT_SESSION_IMPL * session,WT_REF * ref,WT_PAGE_INDEX ** pindexp)183 __split_prev_race(
184 WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
185 {
186 WT_PAGE_INDEX *pindex;
187
188 /*
189 * Handle a cursor moving backwards through the tree or setting up at
190 * the end of the tree. We're passed the child page into which we're
191 * descending, and the parent page's page-index we used to find that
192 * child page.
193 *
194 * When splitting an internal page into its parent, we move the split
195 * pages WT_REF structures, then update the parent's page index, then
196 * update the split page's page index, and nothing is atomic. A thread
197 * can read the parent page's replacement page index and then the split
198 * page's original index, or vice-versa, and either change can cause a
199 * cursor moving backwards through the tree to skip pages.
200 *
201 * This isn't a problem for a cursor setting up at the start of the tree
202 * or moving forward through the tree because we do right-hand splits on
203 * internal pages and the initial part of the split page's namespace
204 * won't change as part of a split (in other words, a thread reading the
205 * parent page's and split page's indexes will move to the same slot no
206 * matter what order of indexes are read.
207 *
208 * Acquire the child's page index, then confirm the parent's page index
209 * hasn't changed, to check for reading an old version of the parent's
210 * page index and then reading a new version of the child's page index.
211 */
212 WT_INTL_INDEX_GET(session, ref->page, pindex);
213 if (__wt_split_descent_race(session, ref, *pindexp))
214 return (true);
215
216 /*
217 * That doesn't check if we read a new version of parent's page index
218 * and then an old version of the child's page index. For example, if
219 * a thread were in a newly created split page subtree, the split
220 * completes into the parent before the thread reads it and descends
221 * into the child (where the split hasn't yet completed).
222 *
223 * Imagine an internal page with 3 child pages, with the namespaces a-f,
224 * g-h and i-j; the first child page splits. The parent starts out with
225 * the following page-index:
226 *
227 * | ... | a | g | i | ... |
228 *
229 * The split page starts out with the following page-index:
230 *
231 * | a | b | c | d | e | f |
232 *
233 * The first step is to move the c-f ranges into a new subtree, so, for
234 * example we might have two new internal pages 'c' and 'e', where the
235 * new 'c' page references the c-d namespace and the new 'e' page
236 * references the e-f namespace. The top of the subtree references the
237 * parent page, but until the parent's page index is updated, threads in
238 * the subtree won't be able to ascend out of the subtree. However, once
239 * the parent page's page index is updated to this:
240 *
241 * | ... | a | c | e | g | i | ... |
242 *
243 * threads in the subtree can ascend into the parent. Imagine a cursor
244 * in the c-d part of the namespace that ascends to the parent's 'c'
245 * slot. It would then decrement to the slot before the 'c' slot, the
246 * 'a' slot.
247 *
248 * The previous-cursor movement selects the last slot in the 'a' page;
249 * if the split page's page-index hasn't been updated yet, it selects
250 * the 'f' slot, which is incorrect. Once the split page's page index is
251 * updated to this:
252 *
253 * | a | b |
254 *
255 * the previous-cursor movement will select the 'b' slot, which is
256 * correct.
257 *
258 * If the last slot on the page no longer points to the current page as
259 * its "home", the page is being split and part of its namespace moved,
260 * restart. (We probably don't have to restart, I think we could spin
261 * until the page-index is updated, but I'm not willing to debug that
262 * one if I'm wrong.)
263 */
264 if (pindex->index[pindex->entries - 1]->home != ref->page)
265 return (true);
266
267 *pindexp = pindex;
268 return (false);
269 }
270
271 /*
272 * __tree_walk_internal --
273 * Move to the next/previous page in the tree.
274 */
275 static inline int
__tree_walk_internal(WT_SESSION_IMPL * session,WT_REF ** refp,uint64_t * walkcntp,int (* skip_func)(WT_SESSION_IMPL *,WT_REF *,void *,bool *),void * func_cookie,uint32_t flags)276 __tree_walk_internal(WT_SESSION_IMPL *session,
277 WT_REF **refp, uint64_t *walkcntp,
278 int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *),
279 void *func_cookie, uint32_t flags)
280 {
281 WT_BTREE *btree;
282 WT_DECL_RET;
283 WT_PAGE_INDEX *pindex;
284 WT_REF *couple, *ref, *ref_orig;
285 uint64_t restart_sleep, restart_yield, swap_sleep, swap_yield;
286 uint32_t current_state, slot;
287 bool empty_internal, prev, skip;
288
289 btree = S2BT(session);
290 pindex = NULL;
291 restart_sleep = restart_yield = swap_sleep = swap_yield = 0;
292 empty_internal = false;
293
294 /*
295 * We're not supposed to walk trees without root pages. As this has not
296 * always been the case, assert to debug that change.
297 */
298 WT_ASSERT(session, btree->root.page != NULL);
299
300 /* Check whether deleted pages can be skipped. */
301 if (!LF_ISSET(WT_READ_DELETED_SKIP))
302 LF_SET(WT_READ_DELETED_CHECK);
303
304 /*
305 * !!!
306 * Fast-truncate currently only works on row-store trees.
307 */
308 if (btree->type != BTREE_ROW)
309 LF_CLR(WT_READ_TRUNCATE);
310
311 prev = LF_ISSET(WT_READ_PREV) ? 1 : 0;
312
313 /*
314 * There are multiple reasons and approaches to walking the in-memory
315 * tree:
316 *
317 * (1) finding pages to evict (the eviction server);
318 * (2) writing just dirty leaves or internal nodes (checkpoint);
319 * (3) discarding pages (close);
320 * (4) truncating pages in a range (fast truncate);
321 * (5) skipping pages based on outside information (compaction);
322 * (6) cursor scans (applications).
323 *
324 * Except for cursor scans and compaction, the walk is limited to the
325 * cache, no pages are read. In all cases, hazard pointers protect the
326 * walked pages from eviction.
327 *
328 * Walks use hazard-pointer coupling through the tree and that's OK
329 * (hazard pointers can't deadlock, so there's none of the usual
330 * problems found when logically locking up a btree). If the eviction
331 * thread tries to evict the active page, it fails because of our
332 * hazard pointer. If eviction tries to evict our parent, that fails
333 * because the parent has a child page that can't be discarded. We do
334 * play one game: don't couple up to our parent and then back down to a
335 * new leaf, couple to the next page to which we're descending, it
336 * saves a hazard-pointer swap for each cursor page movement.
337 *
338 * The hazard pointer on the original location is held until the end of
339 * the movement, in case we have to restart the movement. Take a copy
340 * of any held page and clear the return value (it makes future error
341 * handling easier).
342 */
343 couple = NULL;
344 ref_orig = *refp;
345 *refp = NULL;
346
347 /*
348 * Tree walks are special: they look inside page structures that splits
349 * may want to free. Publish the tree is active during this window.
350 */
351 WT_ENTER_PAGE_INDEX(session);
352
353 /* If no page is active, begin a walk from the start/end of the tree. */
354 if ((ref = ref_orig) == NULL) {
355 if (0) {
356 restart: /*
357 * Yield before retrying, and if we've yielded enough
358 * times, start sleeping so we don't burn CPU to no
359 * purpose.
360 */
361 __wt_spin_backoff(&restart_yield, &restart_sleep);
362
363 WT_ERR(__wt_page_release(session, couple, flags));
364 couple = NULL;
365 }
366
367 if ((ref = ref_orig) == NULL) {
368 ref = &btree->root;
369 WT_INTL_INDEX_GET(session, ref->page, pindex);
370 slot = prev ? pindex->entries - 1 : 0;
371 goto descend;
372 }
373 }
374
375 /*
376 * If the active page was the root, we've reached the walk's end; we
377 * only get here if we've returned the root to our caller, so we're
378 * holding no hazard pointers.
379 */
380 if (__wt_ref_is_root(ref))
381 goto done;
382
383 /* Figure out the current slot in the WT_REF array. */
384 __ref_index_slot(session, ref, &pindex, &slot);
385
386 for (;;) {
387 /*
388 * If we're at the last/first slot on the internal page, return
389 * it in post-order traversal. Otherwise move to the next/prev
390 * slot and left/right-most element in that subtree.
391 */
392 while ((prev && slot == 0) ||
393 (!prev && slot == pindex->entries - 1)) {
394 /* Ascend to the parent. */
395 __ref_ascend(session, &ref, &pindex, &slot);
396
397 /*
398 * If at the root and returning internal pages, return
399 * the root page, otherwise we're done.
400 */
401 if (__wt_ref_is_root(ref)) {
402 if (!LF_ISSET(WT_READ_SKIP_INTL))
403 *refp = ref;
404 goto done;
405 }
406
407 /*
408 * If we got all the way through an internal page and
409 * all of the child pages were deleted, mark it for
410 * eviction.
411 */
412 if (empty_internal) {
413 __wt_page_evict_soon(session, ref);
414 empty_internal = false;
415 }
416
417 /* Encourage races. */
418 __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_8);
419
420 /* Optionally return internal pages. */
421 if (LF_ISSET(WT_READ_SKIP_INTL))
422 continue;
423
424 for (;;) {
425 /*
426 * Swap our previous hazard pointer for the page
427 * we'll return.
428 *
429 * Not-found is an expected return, as eviction
430 * might have been attempted. The page can't be
431 * evicted, we're holding a hazard pointer on a
432 * child, spin until we're successful.
433 *
434 * Restart is not expected, our parent WT_REF
435 * should not have split.
436 */
437 ret = __wt_page_swap(session,
438 couple, ref, WT_READ_NOTFOUND_OK | flags);
439 if (ret == 0) {
440 /* Success, "couple" released. */
441 couple = NULL;
442 *refp = ref;
443 goto done;
444 }
445
446 WT_ASSERT(session, ret == WT_NOTFOUND);
447 WT_ERR_NOTFOUND_OK(ret);
448
449 __wt_spin_backoff(&swap_yield, &swap_sleep);
450 }
451 /* NOTREACHED */
452 }
453
454 if (prev)
455 --slot;
456 else
457 ++slot;
458
459 if (walkcntp != NULL)
460 ++*walkcntp;
461
462 for (;;) {
463 descend: /*
464 * Get a reference, setting the reference hint if it's
465 * wrong (used when we continue the walk). We don't
466 * always update the hints when splitting, it's expected
467 * for them to be incorrect in some workloads.
468 */
469 ref = pindex->index[slot];
470 if (ref->pindex_hint != slot)
471 ref->pindex_hint = slot;
472
473 /*
474 * If we see any child states other than deleted, the
475 * page isn't empty.
476 */
477 current_state = ref->state;
478 if (current_state != WT_REF_DELETED &&
479 !LF_ISSET(WT_READ_TRUNCATE))
480 empty_internal = false;
481
482 if (LF_ISSET(WT_READ_CACHE)) {
483 /*
484 * Only look at unlocked pages in memory:
485 * fast-path some common cases.
486 */
487 if (LF_ISSET(WT_READ_NO_WAIT) &&
488 current_state != WT_REF_MEM &&
489 current_state != WT_REF_LIMBO)
490 break;
491
492 /* Skip lookaside pages if not requested. */
493 if (current_state == WT_REF_LOOKASIDE &&
494 !LF_ISSET(WT_READ_LOOKASIDE))
495 break;
496 } else if (LF_ISSET(WT_READ_TRUNCATE)) {
497 /*
498 * Avoid pulling a deleted page back in to try
499 * to delete it again.
500 */
501 if (current_state == WT_REF_DELETED &&
502 __wt_delete_page_skip(session, ref, false))
503 break;
504 /*
505 * If deleting a range, try to delete the page
506 * without instantiating it.
507 */
508 WT_ERR(__wt_delete_page(session, ref, &skip));
509 if (skip)
510 break;
511 empty_internal = false;
512 } else if (skip_func != NULL) {
513 WT_ERR(skip_func(session,
514 ref, func_cookie, &skip));
515 if (skip)
516 break;
517 } else {
518 /*
519 * Try to skip deleted pages visible to us.
520 */
521 if (current_state == WT_REF_DELETED &&
522 __wt_delete_page_skip(session, ref, false))
523 break;
524 }
525
526 ret = __wt_page_swap(session, couple, ref,
527 WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags);
528 if (ret == 0) {
529 /* Success, so "couple" has been released. */
530 couple = NULL;
531
532 /* Return leaf pages to our caller. */
533 if (!WT_PAGE_IS_INTERNAL(ref->page)) {
534 *refp = ref;
535 goto done;
536 }
537
538 /* Set the new "couple" value. */
539 couple = ref;
540
541 /* Configure traversal of any internal page. */
542 empty_internal = true;
543 if (prev) {
544 if (__split_prev_race(
545 session, ref, &pindex))
546 goto restart;
547 slot = pindex->entries - 1;
548 } else {
549 WT_INTL_INDEX_GET(
550 session, ref->page, pindex);
551 slot = 0;
552 }
553 continue;
554 }
555
556 /*
557 * Not-found is an expected return when walking only
558 * in-cache pages, or if we see a deleted page.
559 *
560 * An expected error, so "couple" is unchanged.
561 */
562 if (ret == WT_NOTFOUND) {
563 WT_NOT_READ(ret, 0);
564 break;
565 }
566
567 /*
568 * The page we're moving to might have split, in which
569 * case restart the movement.
570 *
571 * An expected error, so "couple" is unchanged.
572 */
573 if (ret == WT_RESTART)
574 goto restart;
575
576 /* Unexpected error, so "couple" was released. */
577 couple = NULL;
578 goto err;
579 }
580 }
581
582 done:
583 err:
584 WT_TRET(__wt_page_release(session, couple, flags));
585 WT_TRET(__wt_page_release(session, ref_orig, flags));
586 WT_LEAVE_PAGE_INDEX(session);
587 return (ret);
588 }
589
590 /*
591 * __wt_tree_walk --
592 * Move to the next/previous page in the tree.
593 */
594 int
__wt_tree_walk(WT_SESSION_IMPL * session,WT_REF ** refp,uint32_t flags)595 __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
596 {
597 return (__tree_walk_internal(session, refp, NULL, NULL, NULL, flags));
598 }
599
600 /*
601 * __wt_tree_walk_count --
602 * Move to the next/previous page in the tree, tracking how many
603 * references were visited to get there.
604 */
605 int
__wt_tree_walk_count(WT_SESSION_IMPL * session,WT_REF ** refp,uint64_t * walkcntp,uint32_t flags)606 __wt_tree_walk_count(WT_SESSION_IMPL *session,
607 WT_REF **refp, uint64_t *walkcntp, uint32_t flags)
608 {
609 return (__tree_walk_internal(
610 session, refp, walkcntp, NULL, NULL, flags));
611 }
612
613 /*
614 * __wt_tree_walk_custom_skip --
615 * Walk the tree calling a custom function to decide whether to skip refs.
616 */
617 int
__wt_tree_walk_custom_skip(WT_SESSION_IMPL * session,WT_REF ** refp,int (* skip_func)(WT_SESSION_IMPL *,WT_REF *,void *,bool *),void * func_cookie,uint32_t flags)618 __wt_tree_walk_custom_skip(
619 WT_SESSION_IMPL *session, WT_REF **refp,
620 int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *),
621 void *func_cookie, uint32_t flags)
622 {
623 return (__tree_walk_internal(
624 session, refp, NULL, skip_func, func_cookie, flags));
625 }
626
627 /*
628 * __tree_walk_skip_count_callback --
629 * Optionally skip leaf pages.
630 * When the skip-leaf-count variable is non-zero, skip some count of leaf
631 * pages, then take the next leaf page we can.
632 *
633 * The reason to do some of this work here, is because we can look at the cell
634 * and know it's a leaf page without reading it into memory. If this page is
635 * disk-based, crack the cell to figure out it's a leaf page without reading
636 * it.
637 */
638 static int
__tree_walk_skip_count_callback(WT_SESSION_IMPL * session,WT_REF * ref,void * context,bool * skipp)639 __tree_walk_skip_count_callback(
640 WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
641 {
642 uint64_t *skipleafcntp;
643
644 skipleafcntp = (uint64_t *)context;
645 WT_ASSERT(session, skipleafcntp != NULL);
646
647 /*
648 * Skip deleted pages visible to us.
649 */
650 if (ref->state == WT_REF_DELETED &&
651 __wt_delete_page_skip(session, ref, false))
652 *skipp = true;
653 else if (*skipleafcntp > 0 && __ref_is_leaf(ref)) {
654 --*skipleafcntp;
655 *skipp = true;
656 } else
657 *skipp = false;
658 return (0);
659 }
660
661 /*
662 * __wt_tree_walk_skip --
663 * Move to the next/previous page in the tree, skipping a certain number
664 * of leaf pages before returning.
665 */
666 int
__wt_tree_walk_skip(WT_SESSION_IMPL * session,WT_REF ** refp,uint64_t * skipleafcntp)667 __wt_tree_walk_skip(
668 WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp)
669 {
670 /*
671 * Optionally skip leaf pages, the second half. The tree-walk function
672 * didn't have an on-page cell it could use to figure out if the page
673 * was a leaf page or not, it had to acquire the hazard pointer and look
674 * at the page. The tree-walk code never acquires a hazard pointer on a
675 * leaf page without returning it, and it's not trivial to change that.
676 * So, the tree-walk code returns all leaf pages here and we deal with
677 * decrementing the count.
678 */
679 do {
680 WT_RET(__tree_walk_internal(session, refp, NULL,
681 __tree_walk_skip_count_callback, skipleafcntp,
682 WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED));
683
684 /*
685 * The walk skipped internal pages, any page returned must be a
686 * leaf page.
687 */
688 if (*skipleafcntp > 0)
689 --*skipleafcntp;
690 } while (*skipleafcntp > 0);
691
692 return (0);
693 }
694