1 /* 2 * Copyright (c) 2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * This module implements the cluster synchronizer. Basically the way 36 * it works is that a thread is created for each cluster node in a PFS. 37 * This thread is responsible for synchronizing the current node using 38 * data from other nodes. 39 * 40 * Any out of sync master or slave can get back into synchronization as 41 * long as a quorum of masters agree on the update_tid. If a quorum is 42 * not available it may still be possible to synchronize to the highest 43 * available update_tid as a way of trying to catch up as much as possible 44 * until a quorum is available. 45 * 46 * If no quorum is possible (which can happen even if all masters are 47 * available, if the update_tid does not match), then manual intervention 48 * may be required to resolve discrepancies. 49 */ 50 #include "hammer2.h" 51 52 typedef struct hammer2_deferred_ip { 53 struct hammer2_deferred_ip *next; 54 hammer2_inode_t *ip; 55 } hammer2_deferred_ip_t; 56 57 typedef struct hammer2_deferred_list { 58 hammer2_deferred_ip_t *base; 59 int count; 60 } hammer2_deferred_list_t; 61 62 63 #define HAMMER2_SYNCHRO_DEBUG 1 64 65 static int hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip, 66 hammer2_deferred_list_t *list); 67 #if 0 68 static void hammer2_update_pfs_status(hammer2_thread_t *thr, uint32_t flags); 69 nerror = hammer2_sync_insert( 70 thr, &parent, &chain, 71 focus->bref.modify_tid, 72 idx, focus); 73 #endif 74 static int hammer2_sync_insert(hammer2_thread_t *thr, 75 hammer2_chain_t **parentp, hammer2_chain_t **chainp, 76 hammer2_tid_t modify_tid, int idx, 77 hammer2_chain_t *focus); 78 static int hammer2_sync_destroy(hammer2_thread_t *thr, 79 hammer2_chain_t **parentp, hammer2_chain_t **chainp, 80 hammer2_tid_t mtid, int idx); 81 static int hammer2_sync_replace(hammer2_thread_t *thr, 82 hammer2_chain_t *parent, hammer2_chain_t *chain, 83 hammer2_tid_t mtid, int idx, 84 hammer2_chain_t *focus); 85 86 /**************************************************************************** 87 * HAMMER2 SYNC THREADS * 88 ****************************************************************************/ 89 /* 90 * Primary management thread for an element of a node. A thread will exist 91 * for each element requiring management. 92 * 93 * No management threads are needed for the SPMP or for any PMP with only 94 * a single MASTER. 95 * 96 * On the SPMP - handles bulkfree and dedup operations 97 * On a PFS - handles remastering and synchronization 98 */ 99 void 100 hammer2_primary_sync_thread(void *arg) 101 { 102 hammer2_thread_t *thr = arg; 103 hammer2_pfs_t *pmp; 104 hammer2_deferred_list_t list; 105 hammer2_deferred_ip_t *defer; 106 int error; 107 108 pmp = thr->pmp; 109 bzero(&list, sizeof(list)); 110 111 lockmgr(&thr->lk, LK_EXCLUSIVE); 112 while ((thr->flags & HAMMER2_THREAD_STOP) == 0) { 113 /* 114 * Handle freeze request 115 */ 116 if (thr->flags & HAMMER2_THREAD_FREEZE) { 117 atomic_set_int(&thr->flags, HAMMER2_THREAD_FROZEN); 118 atomic_clear_int(&thr->flags, HAMMER2_THREAD_FREEZE); 119 } 120 121 /* 122 * Force idle if frozen until unfrozen or stopped. 123 */ 124 if (thr->flags & HAMMER2_THREAD_FROZEN) { 125 lksleep(thr->xopq, &thr->lk, 0, "frozen", 0); 126 continue; 127 } 128 129 /* 130 * Reset state on REMASTER request 131 */ 132 if (thr->flags & HAMMER2_THREAD_REMASTER) { 133 atomic_clear_int(&thr->flags, HAMMER2_THREAD_REMASTER); 134 /* reset state */ 135 } 136 137 /* 138 * Synchronization scan. 139 */ 140 kprintf("sync_slaves pfs %s clindex %d\n", 141 pmp->pfs_names[thr->clindex], thr->clindex); 142 hammer2_trans_init(pmp, 0); 143 144 hammer2_inode_ref(pmp->iroot); 145 146 for (;;) { 147 int didbreak = 0; 148 /* XXX lock synchronize pmp->modify_tid */ 149 error = hammer2_sync_slaves(thr, pmp->iroot, &list); 150 if (error != EAGAIN) 151 break; 152 while ((defer = list.base) != NULL) { 153 hammer2_inode_t *nip; 154 155 nip = defer->ip; 156 error = hammer2_sync_slaves(thr, nip, &list); 157 if (error && error != EAGAIN) 158 break; 159 if (hammer2_thr_break(thr)) { 160 didbreak = 1; 161 break; 162 } 163 164 /* 165 * If no additional defers occurred we can 166 * remove this one, otherwrise keep it on 167 * the list and retry once the additional 168 * defers have completed. 169 */ 170 if (defer == list.base) { 171 --list.count; 172 list.base = defer->next; 173 kfree(defer, M_HAMMER2); 174 defer = NULL; /* safety */ 175 hammer2_inode_drop(nip); 176 } 177 } 178 179 /* 180 * If the thread is being remastered, frozen, or 181 * stopped, clean up any left-over deferals. 182 */ 183 if (didbreak || (error && error != EAGAIN)) { 184 kprintf("didbreak\n"); 185 while ((defer = list.base) != NULL) { 186 --list.count; 187 hammer2_inode_drop(defer->ip); 188 list.base = defer->next; 189 kfree(defer, M_HAMMER2); 190 } 191 if (error == 0 || error == EAGAIN) 192 error = EINPROGRESS; 193 break; 194 } 195 } 196 197 hammer2_inode_drop(pmp->iroot); 198 hammer2_trans_done(pmp); 199 200 if (error) 201 kprintf("hammer2_sync_slaves: error %d\n", error); 202 203 /* 204 * Wait for event, or 5-second poll. 205 */ 206 lksleep(thr->xopq, &thr->lk, 0, "h2idle", hz * 5); 207 } 208 thr->td = NULL; 209 wakeup(thr); 210 lockmgr(&thr->lk, LK_RELEASE); 211 /* thr structure can go invalid after this point */ 212 } 213 214 #if 0 215 /* 216 * Given a locked cluster created from pmp->iroot, update the PFS's 217 * reporting status. 218 */ 219 static 220 void 221 hammer2_update_pfs_status(hammer2_thread_t *thr, uint32_t flags) 222 { 223 hammer2_pfs_t *pmp = thr->pmp; 224 225 flags &= HAMMER2_CLUSTER_ZFLAGS; 226 if (pmp->cluster_flags == flags) 227 return; 228 pmp->cluster_flags = flags; 229 230 kprintf("pfs %p", pmp); 231 if (flags & HAMMER2_CLUSTER_MSYNCED) 232 kprintf(" masters-all-good"); 233 if (flags & HAMMER2_CLUSTER_SSYNCED) 234 kprintf(" slaves-all-good"); 235 236 if (flags & HAMMER2_CLUSTER_WRHARD) 237 kprintf(" quorum/rw"); 238 else if (flags & HAMMER2_CLUSTER_RDHARD) 239 kprintf(" quorum/ro"); 240 241 if (flags & HAMMER2_CLUSTER_UNHARD) 242 kprintf(" out-of-sync-masters"); 243 else if (flags & HAMMER2_CLUSTER_NOHARD) 244 kprintf(" no-masters-visible"); 245 246 if (flags & HAMMER2_CLUSTER_WRSOFT) 247 kprintf(" soft/rw"); 248 else if (flags & HAMMER2_CLUSTER_RDSOFT) 249 kprintf(" soft/ro"); 250 251 if (flags & HAMMER2_CLUSTER_UNSOFT) 252 kprintf(" out-of-sync-slaves"); 253 else if (flags & HAMMER2_CLUSTER_NOSOFT) 254 kprintf(" no-slaves-visible"); 255 kprintf("\n"); 256 } 257 #endif 258 259 #if 0 260 static 261 void 262 dumpcluster(const char *label, 263 hammer2_cluster_t *cparent, hammer2_cluster_t *cluster) 264 { 265 hammer2_chain_t *chain; 266 int i; 267 268 if ((hammer2_debug & 1) == 0) 269 return; 270 271 kprintf("%s\t", label); 272 KKASSERT(cparent->nchains == cluster->nchains); 273 for (i = 0; i < cparent->nchains; ++i) { 274 if (i) 275 kprintf("\t"); 276 kprintf("%d ", i); 277 if ((chain = cparent->array[i].chain) != NULL) { 278 kprintf("%016jx%s ", 279 chain->bref.key, 280 ((cparent->array[i].flags & 281 HAMMER2_CITEM_INVALID) ? "(I)" : " ") 282 ); 283 } else { 284 kprintf(" NULL %s ", " "); 285 } 286 if ((chain = cluster->array[i].chain) != NULL) { 287 kprintf("%016jx%s ", 288 chain->bref.key, 289 ((cluster->array[i].flags & 290 HAMMER2_CITEM_INVALID) ? "(I)" : " ") 291 ); 292 } else { 293 kprintf(" NULL %s ", " "); 294 } 295 kprintf("\n"); 296 } 297 } 298 #endif 299 300 /* 301 * Each out of sync node sync-thread must issue an all-nodes XOP scan of 302 * the inode. This creates a multiplication effect since the XOP scan itself 303 * issues to all nodes. However, this is the only way we can safely 304 * synchronize nodes which might have disparate I/O bandwidths and the only 305 * way we can safely deal with stalled nodes. 306 */ 307 static 308 int 309 hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip, 310 hammer2_deferred_list_t *list) 311 { 312 hammer2_xop_scanall_t *xop; 313 hammer2_chain_t *parent; 314 hammer2_chain_t *chain; 315 hammer2_pfs_t *pmp; 316 hammer2_key_t key_next; 317 hammer2_tid_t sync_tid; 318 int cache_index = -1; 319 int needrescan; 320 int wantupdate; 321 int error; 322 int nerror; 323 int idx; 324 int n; 325 326 pmp = ip->pmp; 327 idx = thr->clindex; /* cluster node we are responsible for */ 328 needrescan = 0; 329 wantupdate = 0; 330 331 if (ip->cluster.focus == NULL) 332 return (EINPROGRESS); 333 sync_tid = ip->cluster.focus->bref.modify_tid; 334 335 #if 0 336 /* 337 * Nothing to do if all slaves are synchronized. 338 * Nothing to do if cluster not authoritatively readable. 339 */ 340 if (pmp->cluster_flags & HAMMER2_CLUSTER_SSYNCED) 341 return(0); 342 if ((pmp->cluster_flags & HAMMER2_CLUSTER_RDHARD) == 0) 343 return(HAMMER2_ERROR_INCOMPLETE); 344 #endif 345 346 error = 0; 347 348 /* 349 * The inode is left unlocked during the scan. Issue a XOP 350 * that does *not* include our cluster index to iterate 351 * properly synchronized elements and resolve our cluster index 352 * against it. 353 */ 354 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED); 355 xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); 356 xop->key_beg = HAMMER2_KEY_MIN; 357 xop->key_end = HAMMER2_KEY_MAX; 358 hammer2_xop_start_except(&xop->head, hammer2_xop_scanall, idx); 359 parent = hammer2_inode_chain(ip, idx, 360 HAMMER2_RESOLVE_ALWAYS | 361 HAMMER2_RESOLVE_SHARED); 362 if (parent->bref.modify_tid != sync_tid) 363 wantupdate = 1; 364 365 hammer2_inode_unlock(ip); 366 367 chain = hammer2_chain_lookup(&parent, &key_next, 368 HAMMER2_KEY_MIN, HAMMER2_KEY_MAX, 369 &cache_index, 370 HAMMER2_LOOKUP_SHARED | 371 HAMMER2_LOOKUP_NODIRECT | 372 HAMMER2_LOOKUP_NODATA); 373 error = hammer2_xop_collect(&xop->head, 0); 374 kprintf("XOP_INITIAL xop=%p clindex %d on %s\n", xop, thr->clindex, 375 pmp->pfs_names[thr->clindex]); 376 377 for (;;) { 378 /* 379 * We are done if our scan is done and the XOP scan is done. 380 * We are done if the XOP scan failed (that is, we don't 381 * have authoritative data to synchronize with). 382 */ 383 int advance_local = 0; 384 int advance_xop = 0; 385 int dodefer = 0; 386 hammer2_chain_t *focus; 387 388 kprintf("loop xop=%p chain[1]=%p lockcnt=%d\n", 389 xop, xop->head.cluster.array[1].chain, 390 (xop->head.cluster.array[1].chain ? 391 xop->head.cluster.array[1].chain->lockcnt : -1) 392 ); 393 394 if (chain == NULL && error == ENOENT) 395 break; 396 if (error && error != ENOENT) 397 break; 398 399 /* 400 * Compare 401 */ 402 if (chain && error == ENOENT) { 403 /* 404 * If we have local chains but the XOP scan is done, 405 * the chains need to be deleted. 406 */ 407 n = -1; 408 focus = NULL; 409 } else if (chain == NULL) { 410 /* 411 * If our local scan is done but the XOP scan is not, 412 * we need to create the missing chain(s). 413 */ 414 n = 1; 415 focus = xop->head.cluster.focus; 416 } else { 417 /* 418 * Otherwise compare to determine the action 419 * needed. 420 */ 421 focus = xop->head.cluster.focus; 422 n = hammer2_chain_cmp(chain, focus); 423 } 424 425 /* 426 * Take action based on comparison results. 427 */ 428 if (n < 0) { 429 /* 430 * Delete extranious local data. This will 431 * automatically advance the chain. 432 */ 433 nerror = hammer2_sync_destroy(thr, &parent, &chain, 434 0, idx); 435 } else if (n == 0 && chain->bref.modify_tid != 436 focus->bref.modify_tid) { 437 /* 438 * Matching key but local data or meta-data requires 439 * updating. If we will recurse, we still need to 440 * update to compatible content first but we do not 441 * synchronize modify_tid until the entire recursion 442 * has completed successfully. 443 */ 444 if (focus->bref.type == HAMMER2_BREF_TYPE_INODE) { 445 nerror = hammer2_sync_replace( 446 thr, parent, chain, 447 0, 448 idx, focus); 449 dodefer = 1; 450 } else { 451 nerror = hammer2_sync_replace( 452 thr, parent, chain, 453 focus->bref.modify_tid, 454 idx, focus); 455 } 456 } else if (n == 0) { 457 /* 458 * 100% match, advance both 459 */ 460 advance_local = 1; 461 advance_xop = 1; 462 nerror = 0; 463 } else if (n > 0) { 464 /* 465 * Insert missing local data. 466 * 467 * If we will recurse, we still need to update to 468 * compatible content first but we do not synchronize 469 * modify_tid until the entire recursion has 470 * completed successfully. 471 */ 472 if (focus->bref.type == HAMMER2_BREF_TYPE_INODE) { 473 nerror = hammer2_sync_insert( 474 thr, &parent, &chain, 475 0, 476 idx, focus); 477 dodefer = 2; 478 } else { 479 nerror = hammer2_sync_insert( 480 thr, &parent, &chain, 481 focus->bref.modify_tid, 482 idx, focus); 483 } 484 advance_local = 1; 485 advance_xop = 1; 486 } 487 488 /* 489 * We cannot recurse depth-first because the XOP is still 490 * running in node threads for this scan. Create a placemarker 491 * by obtaining and record the hammer2_inode. 492 * 493 * We excluded our node from the XOP so we must temporarily 494 * add it to xop->head.cluster so it is properly incorporated 495 * into the inode. 496 * 497 * The deferral is pushed onto a LIFO list for bottom-up 498 * synchronization. 499 */ 500 if (error == 0 && dodefer) { 501 hammer2_inode_t *nip; 502 hammer2_deferred_ip_t *defer; 503 504 KKASSERT(focus->bref.type == HAMMER2_BREF_TYPE_INODE); 505 506 defer = kmalloc(sizeof(*defer), M_HAMMER2, 507 M_WAITOK | M_ZERO); 508 KKASSERT(xop->head.cluster.array[idx].chain == NULL); 509 xop->head.cluster.array[idx].flags = 510 HAMMER2_CITEM_INVALID; 511 xop->head.cluster.array[idx].chain = chain; 512 nip = hammer2_inode_get(pmp, ip, 513 &xop->head.cluster, idx); 514 xop->head.cluster.array[idx].chain = NULL; 515 516 hammer2_inode_ref(nip); 517 hammer2_inode_unlock(nip); 518 519 defer->next = list->base; 520 defer->ip = nip; 521 list->base = defer; 522 ++list->count; 523 needrescan = 1; 524 } 525 526 /* 527 * If at least one deferral was added and the deferral 528 * list has grown too large, stop adding more. This 529 * will trigger an EAGAIN return. 530 */ 531 if (needrescan && list->count > 1000) 532 break; 533 534 /* 535 * Advancements for iteration. 536 */ 537 if (advance_xop) { 538 error = hammer2_xop_collect(&xop->head, 0); 539 } 540 if (advance_local) { 541 chain = hammer2_chain_next(&parent, chain, &key_next, 542 key_next, HAMMER2_KEY_MAX, 543 &cache_index, 544 HAMMER2_LOOKUP_SHARED | 545 HAMMER2_LOOKUP_NODIRECT | 546 HAMMER2_LOOKUP_NODATA); 547 } 548 } 549 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 550 if (chain) { 551 hammer2_chain_unlock(chain); 552 hammer2_chain_drop(chain); 553 } 554 if (parent) { 555 hammer2_chain_unlock(parent); 556 hammer2_chain_drop(parent); 557 } 558 559 /* 560 * If we added deferrals we want the caller to synchronize them 561 * and then call us again. 562 * 563 * NOTE: In this situation we do not yet want to synchronize our 564 * inode, setting the error code also has that effect. 565 */ 566 if (error == 0 && needrescan) 567 error = EAGAIN; 568 569 /* 570 * If no error occurred and work was performed, synchronize the 571 * inode meta-data itself. 572 * 573 * XXX inode lock was lost 574 */ 575 if (error == 0 && wantupdate) { 576 hammer2_xop_ipcluster_t *xop2; 577 hammer2_chain_t *focus; 578 579 xop2 = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); 580 hammer2_xop_start_except(&xop2->head, hammer2_xop_ipcluster, 581 idx); 582 error = hammer2_xop_collect(&xop2->head, 0); 583 if (error == 0) { 584 focus = xop2->head.cluster.focus; 585 kprintf("syncthr: update inode %p (%s)\n", 586 focus, 587 (focus ? 588 (char *)focus->data->ipdata.filename : "?")); 589 chain = hammer2_inode_chain_and_parent(ip, idx, 590 &parent, 591 HAMMER2_RESOLVE_ALWAYS | 592 HAMMER2_RESOLVE_SHARED); 593 594 KKASSERT(parent != NULL); 595 nerror = hammer2_sync_replace( 596 thr, parent, chain, 597 sync_tid, 598 idx, focus); 599 hammer2_chain_unlock(chain); 600 hammer2_chain_drop(chain); 601 hammer2_chain_unlock(parent); 602 hammer2_chain_drop(parent); 603 /* XXX */ 604 } 605 hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP); 606 } 607 608 return error; 609 } 610 611 /* 612 * Create a missing chain by copying the focus from another device. 613 * 614 * On entry *parentp and focus are both locked shared. The chain will be 615 * created and returned in *chainp also locked shared. 616 */ 617 static 618 int 619 hammer2_sync_insert(hammer2_thread_t *thr, 620 hammer2_chain_t **parentp, hammer2_chain_t **chainp, 621 hammer2_tid_t mtid, int idx, hammer2_chain_t *focus) 622 { 623 hammer2_chain_t *chain; 624 625 #if HAMMER2_SYNCHRO_DEBUG 626 if (hammer2_debug & 1) 627 kprintf("insert rec par=%p/%d.%016jx slave %d %d.%016jx mod=%016jx\n", 628 *parentp, 629 (*parentp)->bref.type, 630 (*parentp)->bref.key, 631 idx, 632 focus->bref.type, focus->bref.key, mtid); 633 #endif 634 635 /* 636 * Create the missing chain. Exclusive locks are needed. 637 * 638 * Have to be careful to avoid deadlocks. 639 */ 640 if (*chainp) 641 hammer2_chain_unlock(*chainp); 642 hammer2_chain_unlock(*parentp); 643 hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_ALWAYS); 644 /* reissue lookup? */ 645 646 chain = NULL; 647 hammer2_chain_create(parentp, &chain, thr->pmp, 648 focus->bref.key, focus->bref.keybits, 649 focus->bref.type, focus->bytes, 650 mtid, 0, 0); 651 hammer2_chain_modify(chain, mtid, 0, 0); 652 653 /* 654 * Copy focus to new chain 655 */ 656 657 /* type already set */ 658 chain->bref.methods = focus->bref.methods; 659 /* keybits already set */ 660 chain->bref.vradix = focus->bref.vradix; 661 /* mirror_tid set by flush */ 662 KKASSERT(chain->bref.modify_tid == mtid); 663 chain->bref.flags = focus->bref.flags; 664 /* key already present */ 665 /* check code will be recalculated */ 666 667 /* 668 * Copy data body. 669 */ 670 switch(chain->bref.type) { 671 case HAMMER2_BREF_TYPE_INODE: 672 if ((focus->data->ipdata.meta.op_flags & 673 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 674 bcopy(focus->data, chain->data, 675 offsetof(hammer2_inode_data_t, u)); 676 break; 677 } 678 /* fall through */ 679 case HAMMER2_BREF_TYPE_DATA: 680 bcopy(focus->data, chain->data, chain->bytes); 681 hammer2_chain_setcheck(chain, chain->data); 682 break; 683 default: 684 KKASSERT(0); 685 break; 686 } 687 688 hammer2_chain_unlock(chain); /* unlock, leave ref */ 689 if (*chainp) 690 hammer2_chain_drop(*chainp); 691 *chainp = chain; /* will be returned locked */ 692 693 /* 694 * Avoid ordering deadlock when relocking. 695 */ 696 hammer2_chain_unlock(*parentp); 697 hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_SHARED | 698 HAMMER2_RESOLVE_ALWAYS); 699 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED | 700 HAMMER2_RESOLVE_ALWAYS); 701 702 return 0; 703 } 704 705 /* 706 * Destroy an extranious chain. 707 * 708 * Both *parentp and *chainp are locked shared. 709 * 710 * On return, *chainp will be adjusted to point to the next element in the 711 * iteration and locked shared. 712 */ 713 static 714 int 715 hammer2_sync_destroy(hammer2_thread_t *thr, 716 hammer2_chain_t **parentp, hammer2_chain_t **chainp, 717 hammer2_tid_t mtid, int idx) 718 { 719 hammer2_chain_t *chain; 720 hammer2_chain_t *parent; 721 hammer2_key_t key_next; 722 hammer2_key_t save_key; 723 int cache_index = -1; 724 725 chain = *chainp; 726 727 #if HAMMER2_SYNCHRO_DEBUG 728 if (hammer2_debug & 1) 729 kprintf("destroy rec %p/%p slave %d %d.%016jx\n", 730 *parentp, chain, 731 idx, chain->bref.type, chain->bref.key); 732 #endif 733 734 save_key = chain->bref.key; 735 if (save_key != HAMMER2_KEY_MAX) 736 ++save_key; 737 738 /* 739 * Try to avoid unnecessary I/O. 740 * 741 * XXX accounting not propagated up properly. We might have to do 742 * a RESOLVE_MAYBE here and pass 0 for the flags. 743 */ 744 hammer2_chain_unlock(chain); /* relock exclusive */ 745 hammer2_chain_unlock(*parentp); 746 hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_ALWAYS); 747 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 748 749 hammer2_chain_delete(*parentp, chain, mtid, HAMMER2_DELETE_PERMANENT); 750 hammer2_chain_unlock(chain); 751 hammer2_chain_drop(chain); 752 chain = NULL; /* safety */ 753 754 hammer2_chain_unlock(*parentp); /* relock shared */ 755 hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_SHARED | 756 HAMMER2_RESOLVE_ALWAYS); 757 *chainp = hammer2_chain_lookup(&parent, &key_next, 758 save_key, HAMMER2_KEY_MAX, 759 &cache_index, 760 HAMMER2_LOOKUP_SHARED | 761 HAMMER2_LOOKUP_NODIRECT | 762 HAMMER2_LOOKUP_NODATA); 763 return 0; 764 } 765 766 /* 767 * cparent is locked exclusively, with an extra ref, cluster is not locked. 768 * Replace element [i] in the cluster. 769 */ 770 static 771 int 772 hammer2_sync_replace(hammer2_thread_t *thr, 773 hammer2_chain_t *parent, hammer2_chain_t *chain, 774 hammer2_tid_t mtid, int idx, 775 hammer2_chain_t *focus) 776 { 777 int nradix; 778 uint8_t otype; 779 780 #if HAMMER2_SYNCHRO_DEBUG 781 if (hammer2_debug & 1) 782 kprintf("replace rec %p slave %d %d.%016jx mod=%016jx\n", 783 chain, 784 idx, 785 focus->bref.type, focus->bref.key, mtid); 786 #endif 787 hammer2_chain_unlock(chain); 788 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 789 if (chain->bytes != focus->bytes) { 790 /* XXX what if compressed? */ 791 nradix = hammer2_getradix(chain->bytes); 792 hammer2_chain_resize(NULL, parent, chain, 793 mtid, 0, 794 nradix, 0); 795 } 796 hammer2_chain_modify(chain, mtid, 0, 0); 797 otype = chain->bref.type; 798 chain->bref.type = focus->bref.type; 799 chain->bref.methods = focus->bref.methods; 800 chain->bref.keybits = focus->bref.keybits; 801 chain->bref.vradix = focus->bref.vradix; 802 /* mirror_tid updated by flush */ 803 KKASSERT(chain->bref.modify_tid == mtid); 804 chain->bref.flags = focus->bref.flags; 805 /* key already present */ 806 /* check code will be recalculated */ 807 chain->error = 0; 808 809 /* 810 * Copy data body. 811 */ 812 switch(chain->bref.type) { 813 case HAMMER2_BREF_TYPE_INODE: 814 if ((focus->data->ipdata.meta.op_flags & 815 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 816 /* 817 * If DIRECTDATA is transitioning to 0 or the old 818 * chain is not an inode we have to initialize 819 * the block table. 820 */ 821 if (otype != HAMMER2_BREF_TYPE_INODE || 822 (chain->data->ipdata.meta.op_flags & 823 HAMMER2_OPFLAG_DIRECTDATA)) { 824 kprintf("chain inode trans away from dd\n"); 825 bzero(&chain->data->ipdata.u, 826 sizeof(chain->data->ipdata.u)); 827 } 828 bcopy(focus->data, chain->data, 829 offsetof(hammer2_inode_data_t, u)); 830 /* XXX setcheck on inode should not be needed */ 831 hammer2_chain_setcheck(chain, chain->data); 832 break; 833 } 834 /* fall through */ 835 case HAMMER2_BREF_TYPE_DATA: 836 bcopy(focus->data, chain->data, chain->bytes); 837 hammer2_chain_setcheck(chain, chain->data); 838 break; 839 default: 840 KKASSERT(0); 841 break; 842 } 843 844 hammer2_chain_unlock(chain); 845 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED | 846 HAMMER2_RESOLVE_MAYBE); 847 848 return 0; 849 } 850