xref: /illumos-gate/usr/src/cmd/fm/fmd/common/fmd_case.c (revision f00e6aa6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * FMD Case Subsystem
31  *
32  * Diagnosis engines are expected to group telemetry events related to the
33  * diagnosis of a particular problem on the system into a set of cases.  The
34  * diagnosis engine may have any number of cases open at a given point in time.
35  * Some cases may eventually be *solved* by associating a suspect list of one
36  * or more problems with the case, at which point fmd publishes a list.suspect
37  * event for the case and it becomes visible to administrators and agents.
38  *
39  * Every case is named using a UUID, and is globally visible in the case hash.
40  * Cases are reference-counted, except for the reference from the case hash
41  * itself.  Consumers of case references include modules, which store active
42  * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code.
43  *
44  * Cases obey the following state machine.  In states UNSOLVED, SOLVED, and
45  * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine
46  * or transport) and the case is referenced by the mod_cases list.  Once the
47  * case reaches the CLOSED or REPAIRED states, a case's module changes to refer
48  * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases.
49  *
50  *			+------------+
51  *	     +----------|  UNSOLVED  |
52  *	     |		+------------+
53  *	   1 |	             4 |
54  *           |                 |
55  *	+----v---+ /-2->+------v-----+	  3	+--------+
56  *      | SOLVED |<     | CLOSE_WAIT |--------->| CLOSED |
57  *	+--------+ \-5->+------------+		+--------+
58  *	                       |                    |
59  *                           6 |                    | 7
60  *      		+------v-----+              |
61  *	                |  REPAIRED  |<-------------+
62  *			+------------+
63  *
64  * The state machine changes are triggered by calls to fmd_case_transition()
65  * from various locations inside of fmd, as described below:
66  *
67  * [1] Called by: fmd_case_solve()
68  *       Actions: FMD_CF_SOLVED flag is set in ci_flags
69  *                conviction policy is applied to suspect list
70  *                suspects convicted are marked faulty (F) in R$
71  *                list.suspect event logged and dispatched
72  *
73  * [2] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
74  *       Actions: FMD_CF_ISOLATED flag is set in ci_flags
75  *                suspects convicted (F) are marked unusable (U) in R$
76  *                diagnosis engine fmdo_close() entry point scheduled
77  *                case transitions to CLOSED [3] upon exit from CLOSE_WAIT
78  *
79  * [3] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
80  *       Actions: list.isolated event dispatched
81  *                case deleted from module's list of open cases
82  *
83  * [4] Called by: fmd_case_close(), fmd_case_uuclose()
84  *       Actions: diagnosis engine fmdo_close() entry point scheduled
85  *                case is subsequently discarded by fmd_case_delete()
86  *
87  * [5] Called by: fmd_case_repair(), fmd_case_update()
88  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
89  *                diagnosis engine fmdo_close() entry point scheduled
90  *                case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
91  *
92  * [6] Called by: fmd_case_repair(), fmd_case_update()
93  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
94  *                suspects convicted are marked non faulty (!F) in R$
95  *                list.repaired event dispatched
96  *
97  * [7] Called by: fmd_case_repair(), fmd_case_update()
98  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
99  *                suspects convicted are marked non faulty (!F) in R$
100  *                list.repaired event dispatched
101  */
102 
103 #include <sys/fm/protocol.h>
104 #include <uuid/uuid.h>
105 #include <alloca.h>
106 
107 #include <fmd_alloc.h>
108 #include <fmd_module.h>
109 #include <fmd_error.h>
110 #include <fmd_conf.h>
111 #include <fmd_case.h>
112 #include <fmd_string.h>
113 #include <fmd_subr.h>
114 #include <fmd_protocol.h>
115 #include <fmd_event.h>
116 #include <fmd_eventq.h>
117 #include <fmd_dispq.h>
118 #include <fmd_buf.h>
119 #include <fmd_log.h>
120 #include <fmd_asru.h>
121 #include <fmd_xprt.h>
122 
123 #include <fmd.h>
124 
125 static const char *const _fmd_case_snames[] = {
126 	"UNSOLVED",	/* FMD_CASE_UNSOLVED */
127 	"SOLVED",	/* FMD_CASE_SOLVED */
128 	"CLOSE_WAIT",	/* FMD_CASE_CLOSE_WAIT */
129 	"CLOSED",	/* FMD_CASE_CLOSED */
130 	"REPAIRED"	/* FMD_CASE_REPAIRED */
131 };
132 
133 fmd_case_hash_t *
134 fmd_case_hash_create(void)
135 {
136 	fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP);
137 
138 	(void) pthread_rwlock_init(&chp->ch_lock, NULL);
139 	chp->ch_hashlen = fmd.d_str_buckets;
140 	chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP);
141 	chp->ch_count = 0;
142 
143 	return (chp);
144 }
145 
146 /*
147  * Destroy the case hash.  Unlike most of our hash tables, no active references
148  * are kept by the case hash itself; all references come from other subsystems.
149  * The hash must be destroyed after all modules are unloaded; if anything was
150  * present in the hash it would be by definition a reference count leak.
151  */
152 void
153 fmd_case_hash_destroy(fmd_case_hash_t *chp)
154 {
155 	fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen);
156 	fmd_free(chp, sizeof (fmd_case_hash_t));
157 }
158 
159 /*
160  * Take a snapshot of the case hash by placing an additional hold on each
161  * member in an auxiliary array, and then call 'func' for each case.
162  */
163 void
164 fmd_case_hash_apply(fmd_case_hash_t *chp,
165     void (*func)(fmd_case_t *, void *), void *arg)
166 {
167 	fmd_case_impl_t *cp, **cps, **cpp;
168 	uint_t cpc, i;
169 
170 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
171 
172 	cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
173 	cpc = chp->ch_count;
174 
175 	for (i = 0; i < chp->ch_hashlen; i++) {
176 		for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next) {
177 			fmd_case_hold((fmd_case_t *)cp);
178 			*cpp++ = cp;
179 		}
180 	}
181 
182 	ASSERT(cpp == cps + cpc);
183 	(void) pthread_rwlock_unlock(&chp->ch_lock);
184 
185 	for (i = 0; i < cpc; i++) {
186 		func((fmd_case_t *)cps[i], arg);
187 		fmd_case_rele((fmd_case_t *)cps[i]);
188 	}
189 
190 	fmd_free(cps, cpc * sizeof (fmd_case_t *));
191 }
192 
193 /*
194  * Look up the diagcode for this case and cache it in ci_code.  If no suspects
195  * were defined for this case or if the lookup fails, the event dictionary or
196  * module code is broken, and we set the event code to a precomputed default.
197  */
198 static const char *
199 fmd_case_mkcode(fmd_case_t *cp)
200 {
201 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
202 	fmd_case_susp_t *cis;
203 
204 	char **keys, **keyp;
205 	const char *s;
206 
207 	ASSERT(MUTEX_HELD(&cip->ci_lock));
208 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
209 
210 	fmd_free(cip->ci_code, cip->ci_codelen);
211 	cip->ci_codelen = cip->ci_mod->mod_codelen;
212 	cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
213 	keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1));
214 
215 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
216 		if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0)
217 			keyp++;
218 	}
219 
220 	*keyp = NULL; /* mark end of keys[] array for libdiagcode */
221 
222 	if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code(
223 	    cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) {
224 		(void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s);
225 		fmd_free(cip->ci_code, cip->ci_codelen);
226 		cip->ci_codelen = strlen(s) + 1;
227 		cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
228 		(void) strcpy(cip->ci_code, s);
229 	}
230 
231 	return (cip->ci_code);
232 }
233 
234 nvlist_t *
235 fmd_case_mkevent(fmd_case_t *cp, const char *class)
236 {
237 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
238 	fmd_case_susp_t *cis;
239 
240 	fmd_asru_hash_t *ahp = fmd.d_asrus;
241 	fmd_asru_t *asru;
242 
243 	nvlist_t **nva, **nvp, *nvl, *fmri;
244 	uint8_t *ba, *bp;
245 
246 	int msg = B_TRUE;
247 	boolean_t b;
248 
249 	(void) pthread_mutex_lock(&cip->ci_lock);
250 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
251 
252 	nva = nvp = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
253 	ba = bp = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
254 
255 	/*
256 	 * For each suspect associated with the case, store its fault event
257 	 * nvlist in 'nva'.  We also look to see if any of the suspect faults
258 	 * have asked not to be messaged.  If any of them have made such a
259 	 * request, propagate that attribute to the composite list.* event.
260 	 * Finally, store each suspect's faulty status into the bitmap 'ba'.
261 	 */
262 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
263 		if (nvlist_lookup_boolean_value(cis->cis_nvl,
264 		    FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE)
265 			msg = B_FALSE;
266 
267 		if (nvlist_lookup_nvlist(cis->cis_nvl,
268 		    FM_FAULT_ASRU, &fmri) == 0 && (asru =
269 		    fmd_asru_hash_lookup_nvl(ahp, fmri, FMD_B_FALSE)) != NULL) {
270 			*bp++ = (asru->asru_flags & FMD_ASRU_FAULTY) != 0;
271 			fmd_asru_hash_release(ahp, asru);
272 		} else
273 			*bp++ = 0;
274 
275 		*nvp++ = cis->cis_nvl;
276 	}
277 
278 	if (cip->ci_code == NULL)
279 		(void) fmd_case_mkcode(cp);
280 
281 	nvl = fmd_protocol_list(class, cip->ci_mod->mod_fmri,
282 	    cip->ci_uuid, cip->ci_code, cip->ci_nsuspects, nva, ba, msg);
283 
284 	(void) pthread_mutex_unlock(&cip->ci_lock);
285 	return (nvl);
286 }
287 
288 /*
289  * Convict suspects in a case by applying a conviction policy and updating the
290  * resource cache prior to emitting the list.suspect event for the given case.
291  * At present, our policy is very simple: convict every suspect in the case.
292  * In the future, this policy can be extended and made configurable to permit:
293  *
294  * - convicting the suspect with the highest FIT rate
295  * - convicting the suspect with the cheapest FRU
296  * - convicting the suspect with the FRU that is in a depot's inventory
297  * - convicting the suspect with the longest lifetime
298  *
299  * and so forth.  A word to the wise: this problem is significantly harder that
300  * it seems at first glance.  Future work should heed the following advice:
301  *
302  * Hacking the policy into C code here is a very bad idea.  The policy needs to
303  * be decided upon very carefully and fundamentally encodes knowledge of what
304  * suspect list combinations can be emitted by what diagnosis engines.  As such
305  * fmd's code is the wrong location, because that would require fmd itself to
306  * be updated for every diagnosis engine change, defeating the entire design.
307  * The FMA Event Registry knows the suspect list combinations: policy inputs
308  * can be derived from it and used to produce per-module policy configuration.
309  *
310  * If the policy needs to be dynamic and not statically fixed at either fmd
311  * startup or module load time, any implementation of dynamic policy retrieval
312  * must employ some kind of caching mechanism or be part of a built-in module.
313  * The fmd_case_convict() function is called with locks held inside of fmd and
314  * is not a place where unbounded blocking on some inter-process or inter-
315  * system communication to another service (e.g. another daemon) can occur.
316  */
317 static void
318 fmd_case_convict(fmd_case_t *cp)
319 {
320 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
321 	fmd_asru_hash_t *ahp = fmd.d_asrus;
322 
323 	fmd_case_susp_t *cis;
324 	fmd_asru_t *asru;
325 	nvlist_t *fmri;
326 
327 	(void) pthread_mutex_lock(&cip->ci_lock);
328 	(void) fmd_case_mkcode(cp);
329 
330 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
331 		if (nvlist_lookup_nvlist(cis->cis_nvl, FM_FAULT_ASRU, &fmri))
332 			continue; /* no ASRU provided by diagnosis engine */
333 
334 		if ((asru = fmd_asru_hash_lookup_nvl(ahp,
335 		    fmri, FMD_B_TRUE)) == NULL) {
336 			fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in "
337 			    "%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
338 			continue;
339 		}
340 
341 		(void) fmd_asru_clrflags(asru,
342 		    FMD_ASRU_UNUSABLE, cp, cis->cis_nvl);
343 		(void) fmd_asru_setflags(asru,
344 		    FMD_ASRU_FAULTY, cp, cis->cis_nvl);
345 
346 		fmd_asru_hash_release(ahp, asru);
347 	}
348 
349 	(void) pthread_mutex_unlock(&cip->ci_lock);
350 }
351 
352 void
353 fmd_case_publish(fmd_case_t *cp, uint_t state)
354 {
355 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
356 	fmd_event_t *e;
357 	nvlist_t *nvl;
358 	char *class;
359 
360 	if (state == FMD_CASE_CURRENT)
361 		state = cip->ci_state; /* use current state */
362 
363 	switch (state) {
364 	case FMD_CASE_SOLVED:
365 		fmd_case_convict(cp);
366 		nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
367 		(void) pthread_mutex_lock(&cip->ci_lock);
368 		if (cip->ci_diag == NULL)
369 			(void) nvlist_xdup(nvl, &cip->ci_diag, &fmd.d_nva);
370 		(void) pthread_mutex_unlock(&cip->ci_lock);
371 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
372 
373 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
374 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
375 		fmd_log_append(fmd.d_fltlog, e, cp);
376 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
377 		fmd_dispq_dispatch(fmd.d_disp, e, class);
378 
379 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
380 		cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++;
381 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
382 
383 		break;
384 
385 	case FMD_CASE_CLOSE_WAIT:
386 		fmd_case_hold(cp);
387 		e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp);
388 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
389 
390 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
391 		cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++;
392 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
393 
394 		break;
395 
396 	case FMD_CASE_CLOSED:
397 		nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS);
398 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
399 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
400 		fmd_dispq_dispatch(fmd.d_disp, e, class);
401 		break;
402 
403 	case FMD_CASE_REPAIRED:
404 		nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
405 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
406 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
407 		fmd_dispq_dispatch(fmd.d_disp, e, class);
408 		break;
409 	}
410 }
411 
412 fmd_case_t *
413 fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid)
414 {
415 	fmd_case_impl_t *cip;
416 	uint_t h;
417 
418 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
419 	h = fmd_strhash(uuid) % chp->ch_hashlen;
420 
421 	for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) {
422 		if (strcmp(cip->ci_uuid, uuid) == 0)
423 			break;
424 	}
425 
426 	if (cip != NULL)
427 		fmd_case_hold((fmd_case_t *)cip);
428 	else
429 		(void) fmd_set_errno(EFMD_CASE_INVAL);
430 
431 	(void) pthread_rwlock_unlock(&chp->ch_lock);
432 	return ((fmd_case_t *)cip);
433 }
434 
435 static fmd_case_impl_t *
436 fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
437 {
438 	fmd_case_impl_t *eip;
439 	uint_t h;
440 
441 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
442 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
443 
444 	for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) {
445 		if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0) {
446 			fmd_case_hold((fmd_case_t *)eip);
447 			(void) pthread_rwlock_unlock(&chp->ch_lock);
448 			return (eip); /* uuid already present */
449 		}
450 	}
451 
452 	cip->ci_next = chp->ch_hash[h];
453 	chp->ch_hash[h] = cip;
454 
455 	chp->ch_count++;
456 	ASSERT(chp->ch_count != 0);
457 
458 	(void) pthread_rwlock_unlock(&chp->ch_lock);
459 	return (cip);
460 }
461 
462 static void
463 fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
464 {
465 	fmd_case_impl_t *cp, **pp;
466 	uint_t h;
467 
468 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
469 
470 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
471 	pp = &chp->ch_hash[h];
472 
473 	for (cp = *pp; cp != NULL; cp = cp->ci_next) {
474 		if (cp != cip)
475 			pp = &cp->ci_next;
476 		else
477 			break;
478 	}
479 
480 	if (cp == NULL) {
481 		fmd_panic("case %p (%s) not found on hash chain %u\n",
482 		    (void *)cip, cip->ci_uuid, h);
483 	}
484 
485 	*pp = cp->ci_next;
486 	cp->ci_next = NULL;
487 
488 	ASSERT(chp->ch_count != 0);
489 	chp->ch_count--;
490 
491 	(void) pthread_rwlock_unlock(&chp->ch_lock);
492 }
493 
494 fmd_case_t *
495 fmd_case_create(fmd_module_t *mp, void *data)
496 {
497 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
498 	fmd_case_impl_t *eip = NULL;
499 	uuid_t uuid;
500 
501 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
502 	fmd_buf_hash_create(&cip->ci_bufs);
503 
504 	fmd_module_hold(mp);
505 	cip->ci_mod = mp;
506 	cip->ci_refs = 1;
507 	cip->ci_state = FMD_CASE_UNSOLVED;
508 	cip->ci_flags = FMD_CF_DIRTY;
509 	cip->ci_data = data;
510 
511 	/*
512 	 * Calling libuuid: get a clue.  The library interfaces cleverly do not
513 	 * define any constant for the length of an unparse string, and do not
514 	 * permit the caller to specify a buffer length for safety.  The spec
515 	 * says it will be 36 bytes, but we make it tunable just in case.
516 	 */
517 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen);
518 	cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP);
519 
520 	/*
521 	 * We expect this loop to execute only once, but code it defensively
522 	 * against the possibility of libuuid bugs.  Keep generating uuids and
523 	 * attempting to do a hash insert until we get a unique one.
524 	 */
525 	do {
526 		if (eip != NULL)
527 			fmd_case_rele((fmd_case_t *)eip);
528 		uuid_generate(uuid);
529 		uuid_unparse(uuid, cip->ci_uuid);
530 	} while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip);
531 
532 	ASSERT(fmd_module_locked(mp));
533 	fmd_list_append(&mp->mod_cases, cip);
534 	fmd_module_setcdirty(mp);
535 
536 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
537 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
538 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
539 
540 	return ((fmd_case_t *)cip);
541 }
542 
543 fmd_case_t *
544 fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
545     uint_t state, const char *uuid, const char *code)
546 {
547 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
548 	fmd_case_impl_t *eip;
549 
550 	ASSERT(state < FMD_CASE_REPAIRED);
551 
552 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
553 	fmd_buf_hash_create(&cip->ci_bufs);
554 
555 	fmd_module_hold(mp);
556 	cip->ci_mod = mp;
557 	cip->ci_xprt = xp;
558 	cip->ci_refs = 1;
559 	cip->ci_state = state;
560 	cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP);
561 	cip->ci_uuidlen = strlen(cip->ci_uuid);
562 	cip->ci_code = fmd_strdup(code, FMD_SLEEP);
563 	cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
564 
565 	if (state > FMD_CASE_CLOSE_WAIT)
566 		cip->ci_flags |= FMD_CF_SOLVED;
567 
568 	/*
569 	 * Insert the case into the global case hash.  If the specified UUID is
570 	 * already present, check to see if it is an orphan: if so, reclaim it;
571 	 * otherwise if it is owned by a different module then return NULL.
572 	 */
573 	if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) {
574 		(void) pthread_mutex_lock(&cip->ci_lock);
575 		cip->ci_refs--; /* decrement to zero */
576 		fmd_case_destroy((fmd_case_t *)cip, B_FALSE);
577 
578 		cip = eip; /* switch 'cip' to the existing case */
579 		(void) pthread_mutex_lock(&cip->ci_lock);
580 
581 		/*
582 		 * If the ASRU cache is trying to recreate an orphan, then just
583 		 * return the existing case that we found without changing it.
584 		 */
585 		if (mp == fmd.d_rmod) {
586 			(void) pthread_mutex_unlock(&cip->ci_lock);
587 			fmd_case_rele((fmd_case_t *)cip);
588 			return ((fmd_case_t *)cip);
589 		}
590 
591 		/*
592 		 * If the existing case isn't an orphan or is being proxied,
593 		 * then we have a UUID conflict: return failure to the caller.
594 		 */
595 		if (cip->ci_mod != fmd.d_rmod || xp != NULL) {
596 			(void) pthread_mutex_unlock(&cip->ci_lock);
597 			fmd_case_rele((fmd_case_t *)cip);
598 			return (NULL);
599 		}
600 
601 		/*
602 		 * If the new module is reclaiming an orphaned case, remove
603 		 * the case from the root module, switch ci_mod, and then fall
604 		 * through to adding the case to the new owner module 'mp'.
605 		 */
606 		fmd_module_lock(cip->ci_mod);
607 		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
608 		fmd_module_unlock(cip->ci_mod);
609 
610 		fmd_module_rele(cip->ci_mod);
611 		cip->ci_mod = mp;
612 		fmd_module_hold(mp);
613 
614 		(void) pthread_mutex_unlock(&cip->ci_lock);
615 		fmd_case_rele((fmd_case_t *)cip);
616 	}
617 
618 	ASSERT(fmd_module_locked(mp));
619 	fmd_list_append(&mp->mod_cases, cip);
620 
621 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
622 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
623 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
624 
625 	return ((fmd_case_t *)cip);
626 }
627 
628 void
629 fmd_case_destroy(fmd_case_t *cp, int visible)
630 {
631 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
632 	fmd_case_item_t *cit, *ncit;
633 	fmd_case_susp_t *cis, *ncis;
634 
635 	ASSERT(MUTEX_HELD(&cip->ci_lock));
636 	ASSERT(cip->ci_refs == 0);
637 
638 	if (visible) {
639 		TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid));
640 		fmd_case_hash_delete(fmd.d_cases, cip);
641 	}
642 
643 	for (cit = cip->ci_items; cit != NULL; cit = ncit) {
644 		ncit = cit->cit_next;
645 		fmd_event_rele(cit->cit_event);
646 		fmd_free(cit, sizeof (fmd_case_item_t));
647 	}
648 
649 	for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
650 		ncis = cis->cis_next;
651 		nvlist_free(cis->cis_nvl);
652 		fmd_free(cis, sizeof (fmd_case_susp_t));
653 	}
654 
655 	if (cip->ci_principal != NULL)
656 		fmd_event_rele(cip->ci_principal);
657 
658 	fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
659 	fmd_free(cip->ci_code, cip->ci_codelen);
660 	fmd_buf_hash_destroy(&cip->ci_bufs);
661 
662 	if (cip->ci_diag != NULL)
663 		nvlist_free(cip->ci_diag);
664 
665 	fmd_module_rele(cip->ci_mod);
666 	fmd_free(cip, sizeof (fmd_case_impl_t));
667 }
668 
669 void
670 fmd_case_hold(fmd_case_t *cp)
671 {
672 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
673 
674 	(void) pthread_mutex_lock(&cip->ci_lock);
675 	cip->ci_refs++;
676 	ASSERT(cip->ci_refs != 0);
677 	(void) pthread_mutex_unlock(&cip->ci_lock);
678 }
679 
680 void
681 fmd_case_hold_locked(fmd_case_t *cp)
682 {
683 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
684 
685 	ASSERT(MUTEX_HELD(&cip->ci_lock));
686 	cip->ci_refs++;
687 	ASSERT(cip->ci_refs != 0);
688 }
689 
690 void
691 fmd_case_rele(fmd_case_t *cp)
692 {
693 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
694 
695 	(void) pthread_mutex_lock(&cip->ci_lock);
696 	ASSERT(cip->ci_refs != 0);
697 
698 	if (--cip->ci_refs == 0)
699 		fmd_case_destroy((fmd_case_t *)cip, B_TRUE);
700 	else
701 		(void) pthread_mutex_unlock(&cip->ci_lock);
702 }
703 
704 int
705 fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep)
706 {
707 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
708 	fmd_case_item_t *cit;
709 	fmd_event_t *oep;
710 	uint_t state;
711 	int new;
712 
713 	fmd_event_hold(ep);
714 	(void) pthread_mutex_lock(&cip->ci_lock);
715 
716 	if (cip->ci_flags & FMD_CF_SOLVED)
717 		state = FMD_EVS_DIAGNOSED;
718 	else
719 		state = FMD_EVS_ACCEPTED;
720 
721 	oep = cip->ci_principal;
722 	cip->ci_principal = ep;
723 
724 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
725 		if (cit->cit_event == ep)
726 			break;
727 	}
728 
729 	cip->ci_flags |= FMD_CF_DIRTY;
730 	new = cit == NULL && ep != oep;
731 
732 	(void) pthread_mutex_unlock(&cip->ci_lock);
733 
734 	fmd_module_setcdirty(cip->ci_mod);
735 	fmd_event_transition(ep, state);
736 
737 	if (oep != NULL)
738 		fmd_event_rele(oep);
739 
740 	return (new);
741 }
742 
743 int
744 fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep)
745 {
746 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
747 	fmd_case_item_t *cit;
748 	uint_t state;
749 	int new;
750 
751 	(void) pthread_mutex_lock(&cip->ci_lock);
752 
753 	if (cip->ci_flags & FMD_CF_SOLVED)
754 		state = FMD_EVS_DIAGNOSED;
755 	else
756 		state = FMD_EVS_ACCEPTED;
757 
758 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
759 		if (cit->cit_event == ep)
760 			break;
761 	}
762 
763 	new = cit == NULL && ep != cip->ci_principal;
764 
765 	/*
766 	 * If the event is already in the case or the case is already solved,
767 	 * there is no reason to save it: just transition it appropriately.
768 	 */
769 	if (cit != NULL || (cip->ci_flags & FMD_CF_SOLVED)) {
770 		(void) pthread_mutex_unlock(&cip->ci_lock);
771 		fmd_event_transition(ep, state);
772 		return (new);
773 	}
774 
775 	cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP);
776 	fmd_event_hold(ep);
777 
778 	cit->cit_next = cip->ci_items;
779 	cit->cit_event = ep;
780 
781 	cip->ci_items = cit;
782 	cip->ci_nitems++;
783 
784 	cip->ci_flags |= FMD_CF_DIRTY;
785 	(void) pthread_mutex_unlock(&cip->ci_lock);
786 
787 	fmd_module_setcdirty(cip->ci_mod);
788 	fmd_event_transition(ep, state);
789 
790 	return (new);
791 }
792 
793 void
794 fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl)
795 {
796 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
797 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
798 
799 	(void) pthread_mutex_lock(&cip->ci_lock);
800 	ASSERT(cip->ci_state < FMD_CASE_SOLVED);
801 	cip->ci_flags |= FMD_CF_DIRTY;
802 
803 	cis->cis_next = cip->ci_suspects;
804 	cis->cis_nvl = nvl;
805 
806 	cip->ci_suspects = cis;
807 	cip->ci_nsuspects++;
808 
809 	(void) pthread_mutex_unlock(&cip->ci_lock);
810 	fmd_module_setcdirty(cip->ci_mod);
811 }
812 
813 void
814 fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
815 {
816 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
817 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
818 
819 	(void) pthread_mutex_lock(&cip->ci_lock);
820 	ASSERT(cip->ci_state == FMD_CASE_CLOSED);
821 	ASSERT(cip->ci_mod == fmd.d_rmod);
822 
823 	cis->cis_next = cip->ci_suspects;
824 	cis->cis_nvl = nvl;
825 
826 	cip->ci_suspects = cis;
827 	cip->ci_nsuspects++;
828 
829 	(void) pthread_mutex_unlock(&cip->ci_lock);
830 }
831 
832 void
833 fmd_case_reset_suspects(fmd_case_t *cp)
834 {
835 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
836 	fmd_case_susp_t *cis, *ncis;
837 
838 	(void) pthread_mutex_lock(&cip->ci_lock);
839 	ASSERT(cip->ci_state < FMD_CASE_SOLVED);
840 
841 	for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
842 		ncis = cis->cis_next;
843 		nvlist_free(cis->cis_nvl);
844 		fmd_free(cis, sizeof (fmd_case_susp_t));
845 	}
846 
847 	cip->ci_flags |= FMD_CF_DIRTY;
848 	cip->ci_suspects = NULL;
849 	cip->ci_nsuspects = 0;
850 
851 	(void) pthread_mutex_unlock(&cip->ci_lock);
852 	fmd_module_setcdirty(cip->ci_mod);
853 }
854 
855 /*
856  * Grab ci_lock and update the case state and set the dirty bit.  Then perform
857  * whatever actions and emit whatever events are appropriate for the state.
858  * Refer to the topmost block comment explaining the state machine for details.
859  */
860 void
861 fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
862 {
863 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
864 
865 	uint_t old_state;
866 	fmd_case_susp_t *cis;
867 	fmd_case_item_t *cit;
868 	fmd_asru_t *asru;
869 	fmd_event_t *e;
870 	nvlist_t *nvl;
871 
872 	ASSERT(state <= FMD_CASE_REPAIRED);
873 	(void) pthread_mutex_lock(&cip->ci_lock);
874 	cip->ci_flags |= flags;
875 
876 	if (cip->ci_state >= state) {
877 		(void) pthread_mutex_unlock(&cip->ci_lock);
878 		return; /* already in specified state */
879 	}
880 
881 	TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
882 	    _fmd_case_snames[cip->ci_state], _fmd_case_snames[state]));
883 
884 	old_state = cip->ci_state;
885 	cip->ci_state = state;
886 	cip->ci_flags |= FMD_CF_DIRTY;
887 
888 	if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod)
889 		fmd_module_setcdirty(cip->ci_mod);
890 
891 	switch (state) {
892 	case FMD_CASE_SOLVED:
893 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
894 			fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED);
895 
896 		if (cip->ci_principal != NULL) {
897 			fmd_event_transition(cip->ci_principal,
898 			    FMD_EVS_DIAGNOSED);
899 		}
900 		break;
901 
902 	case FMD_CASE_CLOSE_WAIT:
903 		/*
904 		 * If the case was never solved, do not change ASRUs.
905 		 * If the case was never fmd_case_closed, do not change ASRUs.
906 		 * If the case was repaired, do not change ASRUs.
907 		 */
908 		if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED |
909 		    FMD_CF_REPAIRED)) != (FMD_CF_SOLVED | FMD_CF_ISOLATED))
910 			goto close_wait_finish;
911 
912 		/*
913 		 * For each fault event in the suspect list, attempt to look up
914 		 * the corresponding ASRU in the ASRU dictionary.  If the ASRU
915 		 * is found there and is marked faulty, we now mark it unusable
916 		 * and record the case meta-data and fault event with the ASRU.
917 		 */
918 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
919 			if (nvlist_lookup_nvlist(cis->cis_nvl, FM_FAULT_ASRU,
920 			    &nvl) == 0 && (asru = fmd_asru_hash_lookup_nvl(
921 			    fmd.d_asrus, nvl, FMD_B_FALSE)) != NULL) {
922 				(void) fmd_asru_setflags(asru,
923 				    FMD_ASRU_UNUSABLE, cp, cis->cis_nvl);
924 				fmd_asru_hash_release(fmd.d_asrus, asru);
925 			}
926 		}
927 
928 	close_wait_finish:
929 		if (!fmd_case_orphaned(cp))
930 			break; /* state transition complete */
931 
932 		/*
933 		 * If an orphaned case transitions to CLOSE_WAIT, the owning
934 		 * module is no longer loaded: continue on to CASE_CLOSED.
935 		 */
936 		state = cip->ci_state = FMD_CASE_CLOSED;
937 		/*FALLTHRU*/
938 
939 	case FMD_CASE_CLOSED:
940 		ASSERT(fmd_case_orphaned(cp));
941 		fmd_module_lock(cip->ci_mod);
942 		fmd_list_append(&cip->ci_mod->mod_cases, cip);
943 		fmd_module_unlock(cip->ci_mod);
944 		break;
945 
946 	case FMD_CASE_REPAIRED:
947 		ASSERT(fmd_case_orphaned(cp));
948 
949 		if (old_state == FMD_CASE_CLOSE_WAIT)
950 			break; /* case was never closed (transition 6 above) */
951 
952 		fmd_module_lock(cip->ci_mod);
953 		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
954 		fmd_module_unlock(cip->ci_mod);
955 		break;
956 	}
957 
958 	(void) pthread_mutex_unlock(&cip->ci_lock);
959 
960 	/*
961 	 * If the module has initialized, then publish the appropriate event
962 	 * for the new case state.  If not, we are being called from the
963 	 * checkpoint code during module load, in which case the module's
964 	 * _fmd_init() routine hasn't finished yet, and our event dictionaries
965 	 * may not be open yet, which will prevent us from computing the event
966 	 * code.  Defer the call to fmd_case_publish() by enqueuing a PUBLISH
967 	 * event in our queue: this won't be processed until _fmd_init is done.
968 	 */
969 	if (cip->ci_mod->mod_flags & FMD_MOD_INIT)
970 		fmd_case_publish(cp, state);
971 	else {
972 		fmd_case_hold(cp);
973 		e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp);
974 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
975 	}
976 
977 	/*
978 	 * If we transitioned to CLOSED or REPAIRED, adjust the reference count
979 	 * to reflect our addition to or removal from fmd.d_rmod->mod_cases.
980 	 */
981 	if (state == FMD_CASE_CLOSED)
982 		fmd_case_hold(cp);
983 	else if (state == FMD_CASE_REPAIRED && old_state != FMD_CASE_CLOSE_WAIT)
984 		fmd_case_rele(cp);
985 }
986 
987 void
988 fmd_case_setdirty(fmd_case_t *cp)
989 {
990 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
991 
992 	(void) pthread_mutex_lock(&cip->ci_lock);
993 	cip->ci_flags |= FMD_CF_DIRTY;
994 	(void) pthread_mutex_unlock(&cip->ci_lock);
995 
996 	fmd_module_setcdirty(cip->ci_mod);
997 }
998 
999 void
1000 fmd_case_clrdirty(fmd_case_t *cp)
1001 {
1002 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1003 
1004 	(void) pthread_mutex_lock(&cip->ci_lock);
1005 	cip->ci_flags &= ~FMD_CF_DIRTY;
1006 	(void) pthread_mutex_unlock(&cip->ci_lock);
1007 }
1008 
1009 void
1010 fmd_case_commit(fmd_case_t *cp)
1011 {
1012 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1013 	fmd_case_item_t *cit;
1014 
1015 	(void) pthread_mutex_lock(&cip->ci_lock);
1016 
1017 	if (cip->ci_flags & FMD_CF_DIRTY) {
1018 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
1019 			fmd_event_commit(cit->cit_event);
1020 
1021 		if (cip->ci_principal != NULL)
1022 			fmd_event_commit(cip->ci_principal);
1023 
1024 		fmd_buf_hash_commit(&cip->ci_bufs);
1025 		cip->ci_flags &= ~FMD_CF_DIRTY;
1026 	}
1027 
1028 	(void) pthread_mutex_unlock(&cip->ci_lock);
1029 }
1030 
1031 /*
1032  * Indicate that the case may need to change state because one or more of the
1033  * ASRUs named as a suspect has changed state.  We examine all the suspects
1034  * and if none are still faulty, we initiate a case close transition.
1035  */
1036 void
1037 fmd_case_update(fmd_case_t *cp)
1038 {
1039 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1040 	fmd_case_susp_t *cis;
1041 	fmd_asru_t *asru;
1042 	nvlist_t *nvl;
1043 
1044 	int astate = 0;
1045 	uint_t cstate;
1046 
1047 	(void) pthread_mutex_lock(&cip->ci_lock);
1048 	cstate = cip->ci_state;
1049 
1050 	if (cip->ci_xprt != NULL || cip->ci_state < FMD_CASE_SOLVED) {
1051 		(void) pthread_mutex_unlock(&cip->ci_lock);
1052 		return; /* update is not appropriate */
1053 	}
1054 
1055 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
1056 		if (nvlist_lookup_nvlist(cis->cis_nvl, FM_FAULT_ASRU,
1057 		    &nvl) == 0 && (asru = fmd_asru_hash_lookup_nvl(
1058 		    fmd.d_asrus, nvl, FMD_B_FALSE)) != NULL) {
1059 			astate |= fmd_asru_getstate(asru);
1060 			fmd_asru_hash_release(fmd.d_asrus, asru);
1061 		}
1062 	}
1063 
1064 	(void) pthread_mutex_unlock(&cip->ci_lock);
1065 
1066 	if (astate & FMD_ASRU_FAULTY)
1067 		return; /* one or more suspects are still marked faulty */
1068 
1069 	if (cstate == FMD_CASE_CLOSED)
1070 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
1071 	else
1072 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
1073 }
1074 
1075 /*
1076  * Delete a closed case from the module's case list once the fmdo_close() entry
1077  * point has run to completion.  If the case is owned by a transport module,
1078  * tell the transport to proxy a case close on the other end of the transport.
1079  * If not, transition to the appropriate next state based on ci_flags.  This
1080  * function represents the end of CLOSE_WAIT and transitions the case to either
1081  * CLOSED or REPAIRED or discards it entirely because it was never solved;
1082  * refer to the topmost block comment explaining the state machine for details.
1083  */
1084 void
1085 fmd_case_delete(fmd_case_t *cp)
1086 {
1087 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1088 
1089 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1090 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
1091 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1092 
1093 	ASSERT(fmd_module_locked(cip->ci_mod));
1094 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1095 
1096 	if (cip->ci_xprt == NULL)
1097 		fmd_module_setcdirty(cip->ci_mod);
1098 
1099 	fmd_module_rele(cip->ci_mod);
1100 	cip->ci_mod = fmd.d_rmod;
1101 	fmd_module_hold(cip->ci_mod);
1102 
1103 	/*
1104 	 * If a proxied case finishes CLOSE_WAIT, then it can be discarded
1105 	 * rather than orphaned because by definition it can have no entries
1106 	 * in the resource cache of the current fault manager.
1107 	 */
1108 	if (cip->ci_xprt != NULL)
1109 		fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
1110 	else if (cip->ci_flags & FMD_CF_REPAIRED)
1111 		fmd_case_transition(cp, FMD_CASE_REPAIRED, 0);
1112 	else if (cip->ci_flags & FMD_CF_ISOLATED)
1113 		fmd_case_transition(cp, FMD_CASE_CLOSED, 0);
1114 
1115 	fmd_case_rele(cp);
1116 }
1117 
1118 void
1119 fmd_case_discard(fmd_case_t *cp)
1120 {
1121 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1122 
1123 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1124 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
1125 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1126 
1127 	ASSERT(fmd_module_locked(cip->ci_mod));
1128 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1129 	fmd_case_rele(cp);
1130 }
1131 
1132 /*
1133  * Indicate that the problem corresponding to a case has been repaired by
1134  * clearing the faulty bit on each ASRU named as a suspect.  If the case hasn't
1135  * already been closed, this function initiates the transition to CLOSE_WAIT.
1136  * The caller must have the case held from fmd_case_hash_lookup(), so we can
1137  * grab and drop ci_lock without the case being able to be freed in between.
1138  */
1139 int
1140 fmd_case_repair(fmd_case_t *cp)
1141 {
1142 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1143 	fmd_case_susp_t *cis;
1144 	nvlist_t *nvl;
1145 	uint_t cstate;
1146 
1147 	fmd_asru_hash_t *ahp = fmd.d_asrus;
1148 	fmd_asru_t **aa;
1149 	uint_t i, an;
1150 
1151 	(void) pthread_mutex_lock(&cip->ci_lock);
1152 	cstate = cip->ci_state;
1153 
1154 	if (cip->ci_xprt != NULL) {
1155 		(void) pthread_mutex_unlock(&cip->ci_lock);
1156 		return (fmd_set_errno(EFMD_CASE_OWNER));
1157 	}
1158 
1159 	if (cstate < FMD_CASE_SOLVED) {
1160 		(void) pthread_mutex_unlock(&cip->ci_lock);
1161 		return (fmd_set_errno(EFMD_CASE_STATE));
1162 	}
1163 
1164 	/*
1165 	 * Take a snapshot of any ASRUs referenced by the case that are present
1166 	 * in the resource cache.  Then drop ci_lock and clear the faulty bit
1167 	 * on each ASRU (we can't call fmd_asru_clrflags() with ci_lock held).
1168 	 */
1169 	an = cip->ci_nsuspects;
1170 	aa = alloca(sizeof (fmd_asru_t *) * an);
1171 	bzero(aa, sizeof (fmd_asru_t *) * an);
1172 
1173 	for (i = 0, cis = cip->ci_suspects;
1174 	    cis != NULL; cis = cis->cis_next, i++) {
1175 		if (nvlist_lookup_nvlist(cis->cis_nvl,
1176 		    FM_FAULT_ASRU, &nvl) == 0)
1177 			aa[i] = fmd_asru_hash_lookup_nvl(ahp, nvl, FMD_B_FALSE);
1178 	}
1179 
1180 	(void) pthread_mutex_unlock(&cip->ci_lock);
1181 
1182 	for (i = 0; i < an; i++) {
1183 		if (aa[i] == NULL)
1184 			continue; /* no asru was found */
1185 		(void) fmd_asru_clrflags(aa[i], FMD_ASRU_FAULTY, NULL, NULL);
1186 		fmd_asru_hash_release(ahp, aa[i]);
1187 	}
1188 
1189 	if (cstate == FMD_CASE_CLOSED)
1190 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
1191 	else
1192 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
1193 
1194 	return (0);
1195 }
1196 
1197 int
1198 fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep)
1199 {
1200 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1201 	fmd_case_item_t *cit;
1202 	uint_t state;
1203 	int rv = 0;
1204 
1205 	(void) pthread_mutex_lock(&cip->ci_lock);
1206 
1207 	if (cip->ci_state >= FMD_CASE_SOLVED)
1208 		state = FMD_EVS_DIAGNOSED;
1209 	else
1210 		state = FMD_EVS_ACCEPTED;
1211 
1212 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
1213 		if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0)
1214 			break;
1215 	}
1216 
1217 	if (rv == 0 && cip->ci_principal != NULL)
1218 		rv = fmd_event_equal(ep, cip->ci_principal);
1219 
1220 	(void) pthread_mutex_unlock(&cip->ci_lock);
1221 
1222 	if (rv != 0)
1223 		fmd_event_transition(ep, state);
1224 
1225 	return (rv);
1226 }
1227 
1228 int
1229 fmd_case_orphaned(fmd_case_t *cp)
1230 {
1231 	return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod);
1232 }
1233