1 /*
2 * Copyright (c) 2013-2018 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34 /*
35 * The cluster module collects multiple chains representing the same
36 * information from different nodes into a single entity. It allows direct
37 * access to media data as long as it is not blockref array data (which
38 * will obviously have to be different at each node).
39 *
40 * This module also handles I/O dispatch, status rollup, and various
41 * mastership arrangements including quorum operations. It effectively
42 * presents one topology to the vnops layer.
43 *
44 * Many of the API calls mimic chain API calls but operate on clusters
45 * instead of chains. Please see hammer2_chain.c for more complete code
46 * documentation of the API functions.
47 *
48 * WARNING! This module is *extremely* complex. It must issue asynchronous
49 * locks and I/O, do quorum and/or master-slave processing, and
50 * it must operate properly even if some nodes are broken (which
51 * can also mean indefinite locks).
52 *
53 * CLUSTER OPERATIONS
54 *
55 * Cluster operations can be broken down into three pieces:
56 *
57 * (1) Chain locking and data retrieval.
58 *
59 * - Most complex functions, quorum management on transaction ids.
60 *
61 * - Locking and data accesses must be internally asynchronous.
62 *
63 * - Validate and manage cache coherency primitives (cache state
64 * is stored in chain topologies but must be validated by these
65 * functions).
66 *
67 * (2) Lookups and Scans
68 * hammer2_cluster_lookup()
69 * hammer2_cluster_next()
70 *
71 * - Depend on locking & data retrieval functions, but still complex.
72 *
73 * - Must do quorum management on transaction ids.
74 *
75 * - Lookup and Iteration ops Must be internally asynchronous.
76 *
77 * (3) Modifying Operations
78 * hammer2_cluster_create()
79 *
80 * - Can usually punt on failures, operation continues unless quorum
81 * is lost. If quorum is lost, must wait for resynchronization
82 * (depending on the management mode).
83 *
84 * - Must disconnect node on failures (also not flush), remount, and
85 * resynchronize.
86 *
87 * - Network links (via kdmsg) are relatively easy to issue as the
88 * complex underworkings of hammer2_chain.c don't have to messed
89 * with (the protocol is at a higher level than block-level).
90 *
91 * - Multiple local disk nodes (i.e. block devices) are another matter.
92 * Chain operations have to be dispatched to per-node threads (xN)
93 * because we can't asynchronize potentially very complex chain
94 * operations in hammer2_chain.c (it would be a huge mess).
95 *
96 * (these threads are also used to terminate incoming kdmsg ops from
97 * other machines).
98 *
99 * - Single-node filesystems do not use threads and will simply call
100 * hammer2_chain.c functions directly. This short-cut is handled
101 * at the base of each cluster function.
102 */
103 #include <sys/cdefs.h>
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/types.h>
107
108 #include "hammer2.h"
109
110 /*
111 * Returns the bref type of the cluster's foucs.
112 *
113 * If the cluster is errored, returns HAMMER2_BREF_TYPE_EMPTY (0).
114 * The cluster must be locked.
115 */
116 uint8_t
hammer2_cluster_type(hammer2_cluster_t * cluster)117 hammer2_cluster_type(hammer2_cluster_t *cluster)
118 {
119 if (cluster->error == 0) {
120 KKASSERT(cluster->focus != NULL);
121 return(cluster->focus->bref.type);
122 }
123 return 0;
124 }
125
126 /*
127 * Returns the bref of the cluster's focus, sans any data-offset information
128 * (since offset information is per-node and wouldn't be useful).
129 *
130 * Callers use this function to access modify_tid, mirror_tid, type,
131 * key, and keybits.
132 *
133 * If the cluster is errored, returns an empty bref.
134 * The cluster must be locked.
135 */
136 void
hammer2_cluster_bref(hammer2_cluster_t * cluster,hammer2_blockref_t * bref)137 hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref)
138 {
139 if (cluster->error == 0) {
140 KKASSERT(cluster->focus != NULL);
141 *bref = cluster->focus->bref;
142 bref->data_off = 0;
143 } else {
144 bzero(bref, sizeof(*bref));
145 }
146 }
147
148 /*
149 * Create a degenerate cluster with one ref from a single locked chain.
150 * The returned cluster will be focused on the chain and inherit its
151 * error state.
152 *
153 * The chain's lock and reference are transfered to the new cluster, so
154 * the caller should not try to unlock the chain separately.
155 *
156 * We fake the flags.
157 */
158 void
hammer2_dummy_xop_from_chain(hammer2_xop_head_t * xop,hammer2_chain_t * chain)159 hammer2_dummy_xop_from_chain(hammer2_xop_head_t *xop, hammer2_chain_t *chain)
160 {
161 hammer2_cluster_t *cluster;
162
163 bzero(xop, sizeof(*xop));
164
165 cluster = &xop->cluster;
166 cluster->array[0].chain = chain;
167 cluster->array[0].flags = HAMMER2_CITEM_FEMOD;
168 cluster->nchains = 1;
169 cluster->focus = chain;
170 cluster->focus_index = 0;
171 cluster->pmp = chain->pmp;
172 cluster->refs = 1;
173 cluster->error = chain->error;
174 cluster->flags = HAMMER2_CLUSTER_LOCKED |
175 HAMMER2_CLUSTER_WRHARD |
176 HAMMER2_CLUSTER_RDHARD |
177 HAMMER2_CLUSTER_MSYNCED |
178 HAMMER2_CLUSTER_SSYNCED;
179 }
180
181 /*
182 * Add a reference to a cluster and its underlying chains.
183 *
184 * We must also ref the underlying chains in order to allow ref/unlock
185 * sequences to later re-lock.
186 */
187 void
hammer2_cluster_ref(hammer2_cluster_t * cluster)188 hammer2_cluster_ref(hammer2_cluster_t *cluster)
189 {
190 atomic_add_int(&cluster->refs, 1);
191 }
192
193 /*
194 * Drop the caller's reference to the cluster. When the ref count drops to
195 * zero this function frees the cluster and drops all underlying chains.
196 *
197 * In-progress read I/Os are typically detached from the cluster once the
198 * first one returns (the remaining stay attached to the DIOs but are then
199 * ignored and drop naturally).
200 */
201 void
hammer2_cluster_drop(hammer2_cluster_t * cluster)202 hammer2_cluster_drop(hammer2_cluster_t *cluster)
203 {
204 hammer2_chain_t *chain;
205 int i;
206
207 KKASSERT(cluster->refs > 0);
208 if (atomic_fetchadd_int(&cluster->refs, -1) == 1) {
209 cluster->focus = NULL; /* safety XXX chg to assert */
210 cluster->focus_index = 0;
211
212 for (i = 0; i < cluster->nchains; ++i) {
213 chain = cluster->array[i].chain;
214 if (chain) {
215 hammer2_chain_drop(chain);
216 cluster->array[i].chain = NULL; /* safety */
217 }
218 }
219 cluster->nchains = 0; /* safety */
220
221 kfree(cluster, M_HAMMER2);
222 /* cluster is invalid */
223 }
224 }
225
226 /*
227 * Lock a cluster. Cluster must already be referenced. Focus is maintained.
228 *
229 * WARNING! This function expects the caller to handle resolution of the
230 * cluster. We never re-resolve the cluster in this function,
231 * because it might be used to temporarily unlock/relock a cparent
232 * in an iteration or recursrion, and the cparents elements do not
233 * necessarily match.
234 */
235 void
hammer2_cluster_lock(hammer2_cluster_t * cluster,int how)236 hammer2_cluster_lock(hammer2_cluster_t *cluster, int how)
237 {
238 hammer2_chain_t *chain;
239 int i;
240
241 /* cannot be on inode-embedded cluster template, must be on copy */
242 KKASSERT(cluster->refs > 0);
243 KKASSERT((cluster->flags & HAMMER2_CLUSTER_INODE) == 0);
244 if (cluster->flags & HAMMER2_CLUSTER_LOCKED) {
245 panic("hammer2_cluster_lock: cluster %p already locked!\n",
246 cluster);
247 }
248 atomic_set_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
249
250 /*
251 * Lock chains and resolve state.
252 */
253 for (i = 0; i < cluster->nchains; ++i) {
254 chain = cluster->array[i].chain;
255 if (chain == NULL)
256 continue;
257 hammer2_chain_lock(chain, how);
258 }
259 }
260
261 void
hammer2_cluster_unhold(hammer2_cluster_t * cluster)262 hammer2_cluster_unhold(hammer2_cluster_t *cluster)
263 {
264 hammer2_chain_t *chain;
265 int i;
266
267 for (i = 0; i < cluster->nchains; ++i) {
268 chain = cluster->array[i].chain;
269 if (chain == NULL)
270 continue;
271 hammer2_chain_unhold(chain);
272 }
273 }
274
275 void
hammer2_cluster_rehold(hammer2_cluster_t * cluster)276 hammer2_cluster_rehold(hammer2_cluster_t *cluster)
277 {
278 hammer2_chain_t *chain;
279 int i;
280
281 for (i = 0; i < cluster->nchains; ++i) {
282 chain = cluster->array[i].chain;
283 if (chain == NULL)
284 continue;
285 hammer2_chain_rehold(chain);
286 }
287 }
288
289 /*
290 * This is used by the XOPS subsystem to calculate the state of
291 * the collection and tell hammer2_xop_collect() what to do with it.
292 * The collection can be in various states of desynchronization, the
293 * caller specifically wants to resolve the passed-in key.
294 *
295 * Return values (HAMMER2_ERROR_*):
296 *
297 * 0 - Quorum agreement, key is valid
298 *
299 * ENOENT - Quorum agreement, end of scan
300 *
301 * ESRCH - Quorum agreement, key is INVALID (caller should
302 * skip key).
303 *
304 * EIO - Quorum agreement but all elements had errors.
305 *
306 * EDEADLK - No quorum agreement possible for key, a repair
307 * may be needed. Caller has to decide what to do,
308 * possibly iterating the key or generating an EIO.
309 *
310 * EINPROGRESS - No quorum agreement yet, but agreement is still
311 * possible if caller waits for more responses. Caller
312 * should not iterate key.
313 *
314 * CHECK - CRC check error
315 *
316 * NOTE! If the pmp is in HMNT2_LOCAL mode, the cluster check always succeeds.
317 *
318 * XXX needs to handle SOFT_MASTER and SOFT_SLAVE
319 */
320 int
hammer2_cluster_check(hammer2_cluster_t * cluster,hammer2_key_t key,int flags)321 hammer2_cluster_check(hammer2_cluster_t *cluster, hammer2_key_t key, int flags)
322 {
323 hammer2_chain_t *chain;
324 hammer2_chain_t *focus;
325 hammer2_pfs_t *pmp;
326 hammer2_tid_t quorum_tid;
327 hammer2_tid_t last_best_quorum_tid;
328 uint32_t nflags;
329 int ttlmasters;
330 int ttlslaves;
331 int nmasters;
332 int nmasters_keymatch;
333 int nslaves;
334 int nquorum;
335 int umasters; /* unknown masters (still in progress) */
336 int error;
337 int i;
338
339 cluster->error = 0;
340 cluster->focus = NULL;
341
342 pmp = cluster->pmp;
343 KKASSERT(pmp != NULL || cluster->nchains == 0);
344
345 /*
346 * Calculate quorum
347 */
348 nquorum = pmp ? pmp->pfs_nmasters / 2 + 1 : 0;
349 nflags = 0;
350 ttlmasters = 0;
351 ttlslaves = 0;
352
353 /*
354 * Pass 1
355 *
356 * NOTE: A NULL chain is not necessarily an error, it could be
357 * e.g. a lookup failure or the end of an iteration.
358 * Process normally.
359 */
360 for (i = 0; i < cluster->nchains; ++i) {
361 cluster->array[i].flags &= ~HAMMER2_CITEM_FEMOD;
362 cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
363
364 chain = cluster->array[i].chain;
365 error = cluster->array[i].error;
366 if (chain && error) {
367 if (cluster->focus == NULL || cluster->focus == chain) {
368 /* error will be overridden by valid focus */
369 /* XXX */
370 }
371
372 /*
373 * Must count total masters and slaves whether the
374 * chain is errored or not.
375 */
376 switch (cluster->pmp->pfs_types[i]) {
377 case HAMMER2_PFSTYPE_SUPROOT:
378 case HAMMER2_PFSTYPE_MASTER:
379 ++ttlmasters;
380 break;
381 case HAMMER2_PFSTYPE_SLAVE:
382 ++ttlslaves;
383 break;
384 }
385 continue;
386 }
387 switch (cluster->pmp->pfs_types[i]) {
388 case HAMMER2_PFSTYPE_MASTER:
389 ++ttlmasters;
390 break;
391 case HAMMER2_PFSTYPE_SLAVE:
392 ++ttlslaves;
393 break;
394 case HAMMER2_PFSTYPE_SOFT_MASTER:
395 nflags |= HAMMER2_CLUSTER_WRSOFT;
396 nflags |= HAMMER2_CLUSTER_RDSOFT;
397 break;
398 case HAMMER2_PFSTYPE_SOFT_SLAVE:
399 nflags |= HAMMER2_CLUSTER_RDSOFT;
400 break;
401 case HAMMER2_PFSTYPE_SUPROOT:
402 /*
403 * Degenerate cluster representing the super-root
404 * topology on a single device. Fake stuff so
405 * cluster ops work as expected.
406 */
407 ++ttlmasters;
408 nflags |= HAMMER2_CLUSTER_WRHARD;
409 nflags |= HAMMER2_CLUSTER_RDHARD;
410 cluster->focus_index = i;
411 cluster->focus = chain;
412 cluster->error = error;
413 break;
414 default:
415 break;
416 }
417 }
418
419 /*
420 * Pass 2
421 *
422 * Resolve nmasters - master nodes fully match
423 *
424 * Resolve umasters - master nodes operation still
425 * in progress
426 *
427 * Resolve nmasters_keymatch - master nodes match the passed-in
428 * key and may or may not match
429 * the quorum-agreed tid.
430 *
431 * The quorum-agreed TID is the highest matching TID.
432 */
433 last_best_quorum_tid = HAMMER2_TID_MAX;
434 umasters = 0;
435 nmasters = 0;
436 nmasters_keymatch = 0;
437 quorum_tid = 0; /* fix gcc warning */
438
439 while (nmasters < nquorum && last_best_quorum_tid != 0) {
440 umasters = 0;
441 nmasters = 0;
442 nmasters_keymatch = 0;
443 quorum_tid = 0;
444
445 for (i = 0; i < cluster->nchains; ++i) {
446 /* XXX SOFT smpresent handling */
447 switch(cluster->pmp->pfs_types[i]) {
448 case HAMMER2_PFSTYPE_MASTER:
449 case HAMMER2_PFSTYPE_SUPROOT:
450 break;
451 default:
452 continue;
453 }
454
455 chain = cluster->array[i].chain;
456 error = cluster->array[i].error;
457
458 /*
459 * Skip elements still in progress. umasters keeps
460 * track of masters that might still be in-progress.
461 */
462 if (chain == NULL && (cluster->array[i].flags &
463 HAMMER2_CITEM_NULL) == 0) {
464 ++umasters;
465 continue;
466 }
467
468 /*
469 * Key match?
470 */
471 if (flags & HAMMER2_CHECK_NULL) {
472 if (chain == NULL) {
473 ++nmasters;
474 ++nmasters_keymatch;
475 if (cluster->error == 0)
476 cluster->error = error;
477 }
478 } else if (chain &&
479 (key == (hammer2_key_t)-1 ||
480 chain->bref.key == key)) {
481 ++nmasters_keymatch;
482
483 if (chain->bref.modify_tid <
484 last_best_quorum_tid &&
485 quorum_tid < chain->bref.modify_tid) {
486 /*
487 * Select new TID as master if better
488 * than any found so far in this loop,
489 * as long as it does not reach the
490 * best tid found in the previous loop.
491 */
492 nmasters = 0;
493 quorum_tid = chain->bref.modify_tid;
494 }
495 if (quorum_tid == chain->bref.modify_tid) {
496 /*
497 * TID matches current collection.
498 *
499 * (error handled in next pass)
500 */
501 ++nmasters;
502 if (chain->error == 0) {
503 cluster->focus = chain;
504 cluster->focus_index = i;
505 }
506 }
507 }
508 }
509 if (nmasters >= nquorum)
510 break;
511 last_best_quorum_tid = quorum_tid;
512 }
513
514 /*
515 kprintf("nmasters %d/%d nmaster_keymatch=%d umasters=%d\n",
516 nmasters, nquorum, nmasters_keymatch, umasters);
517 */
518
519 /*
520 * Early return if we do not have enough masters.
521 */
522 if (nmasters < nquorum) {
523 if (nmasters + umasters >= nquorum)
524 return HAMMER2_ERROR_EINPROGRESS;
525 if (nmasters_keymatch < nquorum)
526 return HAMMER2_ERROR_ESRCH;
527 return HAMMER2_ERROR_EDEADLK;
528 }
529
530 /*
531 * Validated end of scan.
532 */
533 if (flags & HAMMER2_CHECK_NULL) {
534 if (cluster->error == 0)
535 cluster->error = HAMMER2_ERROR_ENOENT;
536 return cluster->error;
537 }
538
539 /*
540 * If we have a NULL focus at this point the agreeing quorum all
541 * had chain errors.
542 */
543 if (cluster->focus == NULL)
544 return HAMMER2_ERROR_EIO;
545
546 /*
547 * Pass 3
548 *
549 * We have quorum agreement, validate elements, not end of scan.
550 */
551 nslaves = 0;
552 cluster->error = 0;
553
554 for (i = 0; i < cluster->nchains; ++i) {
555 chain = cluster->array[i].chain;
556 error = cluster->array[i].error;
557 if (chain == NULL ||
558 chain->bref.key != key ||
559 chain->bref.modify_tid != quorum_tid) {
560 continue;
561 }
562
563 /*
564 * Quorum Match
565 *
566 * XXX for now, cumulative error.
567 */
568 if (cluster->error == 0)
569 cluster->error = error;
570
571 switch (cluster->pmp->pfs_types[i]) {
572 case HAMMER2_PFSTYPE_MASTER:
573 cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
574 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
575 nflags |= HAMMER2_CLUSTER_WRHARD;
576 nflags |= HAMMER2_CLUSTER_RDHARD;
577 break;
578 case HAMMER2_PFSTYPE_SLAVE:
579 /*
580 * We must have enough up-to-date masters to reach
581 * a quorum and the slave modify_tid must match the
582 * quorum's modify_tid.
583 *
584 * Do not select an errored slave.
585 */
586 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
587 nflags |= HAMMER2_CLUSTER_RDHARD;
588 ++nslaves;
589 break;
590 case HAMMER2_PFSTYPE_SOFT_MASTER:
591 /*
592 * Directly mounted soft master always wins. There
593 * should be only one.
594 */
595 cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
596 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
597 break;
598 case HAMMER2_PFSTYPE_SOFT_SLAVE:
599 /*
600 * Directly mounted soft slave always wins. There
601 * should be only one.
602 *
603 * XXX
604 */
605 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
606 break;
607 case HAMMER2_PFSTYPE_SUPROOT:
608 /*
609 * spmp (degenerate case)
610 */
611 cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
612 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
613 nflags |= HAMMER2_CLUSTER_WRHARD;
614 nflags |= HAMMER2_CLUSTER_RDHARD;
615 break;
616 default:
617 break;
618 }
619 }
620
621 /*
622 * Focus now set, adjust ddflag. Skip this pass if the focus
623 * is bad or if we are at the PFS root (the bref won't match at
624 * the PFS root, obviously).
625 *
626 * focus is probably not locked and it isn't safe to test its
627 * content (e.g. focus->data, focus->dio, other content). We
628 * do not synchronize the dio to the cpu here. In fact, in numerous
629 * situations the frontend doesn't even need to access its dio/data,
630 * so synchronizing it here would be wasteful.
631 */
632 focus = cluster->focus;
633 if (focus) {
634 cluster->ddflag =
635 (cluster->focus->bref.type == HAMMER2_BREF_TYPE_INODE);
636 } else {
637 cluster->ddflag = 0;
638 goto skip4;
639 }
640 if (cluster->focus->flags & HAMMER2_CHAIN_PFSBOUNDARY)
641 goto skip4;
642
643 /*
644 * Pass 4
645 *
646 * Validate the elements that were not marked invalid. They should
647 * match.
648 */
649 for (i = 0; i < cluster->nchains; ++i) {
650 int ddflag;
651
652 chain = cluster->array[i].chain;
653
654 if (chain == NULL)
655 continue;
656 if (chain == focus)
657 continue;
658 if (cluster->array[i].flags & HAMMER2_CITEM_INVALID)
659 continue;
660
661 ddflag = (chain->bref.type == HAMMER2_BREF_TYPE_INODE);
662 if (chain->bref.type != focus->bref.type ||
663 chain->bref.key != focus->bref.key ||
664 chain->bref.keybits != focus->bref.keybits ||
665 chain->bref.modify_tid != focus->bref.modify_tid ||
666 chain->bytes != focus->bytes ||
667 ddflag != cluster->ddflag) {
668 cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
669 if (hammer2_debug & 1)
670 kprintf("cluster_check: matching modify_tid failed "
671 "bref test: idx=%d type=%02x/%02x "
672 "key=%016jx/%d-%016jx/%d "
673 "mod=%016jx/%016jx bytes=%u/%u\n",
674 i,
675 chain->bref.type, focus->bref.type,
676 chain->bref.key, chain->bref.keybits,
677 focus->bref.key, focus->bref.keybits,
678 chain->bref.modify_tid, focus->bref.modify_tid,
679 chain->bytes, focus->bytes);
680 if (hammer2_debug & 0x4000)
681 panic("cluster_check");
682 /* flag issue and force resync? */
683 }
684 }
685 skip4:
686
687 if (ttlslaves == 0)
688 nflags |= HAMMER2_CLUSTER_NOSOFT;
689 if (ttlmasters == 0)
690 nflags |= HAMMER2_CLUSTER_NOHARD;
691
692 /*
693 * Set SSYNCED or MSYNCED for slaves and masters respectively if
694 * all available nodes (even if 0 are available) are fully
695 * synchronized. This is used by the synchronization thread to
696 * determine if there is work it could potentially accomplish.
697 */
698 if (nslaves == ttlslaves)
699 nflags |= HAMMER2_CLUSTER_SSYNCED;
700 if (nmasters == ttlmasters)
701 nflags |= HAMMER2_CLUSTER_MSYNCED;
702
703 /*
704 * Determine if the cluster was successfully locked for the
705 * requested operation and generate an error code. The cluster
706 * will not be locked (or ref'd) if an error is returned.
707 */
708 atomic_set_int(&cluster->flags, nflags);
709 atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_ZFLAGS & ~nflags);
710
711 return cluster->error;
712 }
713
714 /*
715 * Unlock a cluster. Refcount and focus is maintained.
716 */
717 void
hammer2_cluster_unlock(hammer2_cluster_t * cluster)718 hammer2_cluster_unlock(hammer2_cluster_t *cluster)
719 {
720 hammer2_chain_t *chain;
721 int i;
722
723 if ((cluster->flags & HAMMER2_CLUSTER_LOCKED) == 0) {
724 kprintf("hammer2_cluster_unlock: cluster %p not locked\n",
725 cluster);
726 }
727 KKASSERT(cluster->flags & HAMMER2_CLUSTER_LOCKED);
728 KKASSERT(cluster->refs > 0);
729 atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
730
731 for (i = 0; i < cluster->nchains; ++i) {
732 chain = cluster->array[i].chain;
733 if (chain)
734 hammer2_chain_unlock(chain);
735 }
736 }
737