xref: /illumos-gate/usr/src/cmd/fm/fmd/common/fmd.c (revision 7c478bd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/utsname.h>
31 #include <sys/param.h>
32 #include <sys/systeminfo.h>
33 #include <sys/fm/util.h>
34 
35 #include <limits.h>
36 #include <unistd.h>
37 #include <signal.h>
38 #include <stdlib.h>
39 #include <stdio.h>
40 
41 #include <fmd_conf.h>
42 #include <fmd_dispq.h>
43 #include <fmd_timerq.h>
44 #include <fmd_subr.h>
45 #include <fmd_error.h>
46 #include <fmd_module.h>
47 #include <fmd_thread.h>
48 #include <fmd_alloc.h>
49 #include <fmd_string.h>
50 #include <fmd_transport.h>
51 #include <fmd_builtin.h>
52 #include <fmd_ustat.h>
53 #include <fmd_protocol.h>
54 #include <fmd_scheme.h>
55 #include <fmd_asru.h>
56 #include <fmd_case.h>
57 #include <fmd_log.h>
58 #include <fmd_rpc.h>
59 #include <fmd_dr.h>
60 
61 #include <fmd.h>
62 
63 extern const nv_alloc_ops_t fmd_nv_alloc_ops;	/* see fmd_nv.c */
64 
65 const char _fmd_version[] = "1.0";		/* daemon version string */
66 static char _fmd_plat[MAXNAMELEN];		/* native platform string */
67 static char _fmd_isa[MAXNAMELEN];		/* native instruction set */
68 static struct utsname _fmd_uts;			/* native uname(2) info */
69 
70 /*
71  * Note: the configuration file path is ordered from most common to most host-
72  * specific because new conf files are merged/override previous ones.  The
73  * module paths are in the opposite order, from most specific to most common,
74  * because once a module is loaded fmd will not try to load over the same name.
75  */
76 
77 static const char _fmd_conf_path[] =
78 	"%r/usr/lib/fm/fmd:"
79 	"%r/usr/platform/%m/lib/fm/fmd:"
80 	"%r/usr/platform/%i/lib/fm/fmd:"
81 	"%r/etc/fm/fmd";
82 
83 static const char _fmd_agent_path[] =
84 	"%r/usr/platform/%i/lib/fm/fmd/agents:"
85 	"%r/usr/platform/%m/lib/fm/fmd/agents:"
86 	"%r/usr/lib/fm/fmd/agents";
87 
88 static const char _fmd_plugin_path[] =
89 	"%r/usr/platform/%i/lib/fm/fmd/plugins:"
90 	"%r/usr/platform/%m/lib/fm/fmd/plugins:"
91 	"%r/usr/lib/fm/fmd/plugins";
92 
93 static const char _fmd_scheme_path[] =
94 	"usr/lib/fm/fmd/schemes";
95 
96 static const fmd_conf_mode_t _fmd_cerror_modes[] = {
97 	{ "unload", "unload offending client module", FMD_CERROR_UNLOAD },
98 	{ "stop", "stop daemon for debugger attach", FMD_CERROR_STOP },
99 	{ "abort", "abort daemon and force core dump", FMD_CERROR_ABORT },
100 	{ NULL, NULL, 0 }
101 };
102 
103 static const fmd_conf_mode_t _fmd_dbout_modes[] = {
104 	{ "stderr", "send debug messages to stderr", FMD_DBOUT_STDERR },
105 	{ "syslog", "send debug messages to syslog", FMD_DBOUT_SYSLOG },
106 	{ NULL, NULL, 0 }
107 };
108 
109 static const fmd_conf_mode_t _fmd_debug_modes[] = {
110 	{ "help", "display debugging modes and exit", FMD_DBG_HELP },
111 	{ "mod", "debug module load/unload/locking", FMD_DBG_MOD },
112 	{ "disp", "debug dispatch queue processing", FMD_DBG_DISP },
113 	{ "xprt", "debug transport-specific routines", FMD_DBG_XPRT },
114 	{ "evt", "debug event subsystem routines", FMD_DBG_EVT },
115 	{ "log", "debug log subsystem routines", FMD_DBG_LOG },
116 	{ "tmr", "debug timer subsystem routines", FMD_DBG_TMR },
117 	{ "fmri", "debug fmri subsystem routines", FMD_DBG_FMRI },
118 	{ "asru", "debug asru subsystem routines", FMD_DBG_ASRU },
119 	{ "case", "debug case subsystem routines", FMD_DBG_CASE },
120 	{ "ckpt", "debug checkpoint routines", FMD_DBG_CKPT },
121 	{ "rpc", "debug rpc service routines", FMD_DBG_RPC },
122 	{ "all", "enable all available debug modes", FMD_DBG_ALL },
123 	{ NULL, NULL, 0 }
124 };
125 
126 static int
127 fmd_cerror_set(fmd_conf_param_t *pp, const char *value)
128 {
129 	return (fmd_conf_mode_set(_fmd_cerror_modes, pp, value));
130 }
131 
132 static int
133 fmd_dbout_set(fmd_conf_param_t *pp, const char *value)
134 {
135 	return (fmd_conf_mode_set(_fmd_dbout_modes, pp, value));
136 }
137 
138 static int
139 fmd_debug_set(fmd_conf_param_t *pp, const char *value)
140 {
141 	int err = fmd_conf_mode_set(_fmd_debug_modes, pp, value);
142 
143 	if (err == 0)
144 		fmd.d_fmd_debug = pp->cp_value.cpv_num;
145 
146 	return (err);
147 }
148 
149 static int
150 fmd_trmode_set(fmd_conf_param_t *pp, const char *value)
151 {
152 	fmd_tracebuf_f *func;
153 
154 	if (strcasecmp(value, "none") == 0)
155 		func = fmd_trace_none;
156 	else if (strcasecmp(value, "lite") == 0)
157 		func = fmd_trace_lite;
158 	else if (strcasecmp(value, "full") == 0)
159 		func = fmd_trace_full;
160 	else
161 		return (fmd_set_errno(EFMD_CONF_INVAL));
162 
163 	fmd.d_thr_trace = (void (*)())func;
164 	pp->cp_value.cpv_ptr = (void *)func;
165 	return (0);
166 }
167 
168 static void
169 fmd_trmode_get(const fmd_conf_param_t *pp, void *ptr)
170 {
171 	*((void **)ptr) = pp->cp_value.cpv_ptr;
172 }
173 
174 static int
175 fmd_clkmode_set(fmd_conf_param_t *pp, const char *value)
176 {
177 	const fmd_timeops_t *ops;
178 
179 	if (strcasecmp(value, "native") == 0)
180 		ops = &fmd_timeops_native;
181 	else if (strcasecmp(value, "simulated") == 0)
182 		ops = &fmd_timeops_simulated;
183 	else
184 		return (fmd_set_errno(EFMD_CONF_INVAL));
185 
186 	fmd.d_clockops = ops;
187 	pp->cp_value.cpv_ptr = (void *)ops;
188 	return (0);
189 }
190 
191 static void
192 fmd_clkmode_get(const fmd_conf_param_t *pp, void *ptr)
193 {
194 	*((void **)ptr) = pp->cp_value.cpv_ptr;
195 }
196 
197 static const fmd_conf_ops_t fmd_cerror_ops = {
198 	fmd_cerror_set, fmd_conf_mode_get, fmd_conf_notsup, fmd_conf_nop
199 };
200 
201 static const fmd_conf_ops_t fmd_dbout_ops = {
202 	fmd_dbout_set, fmd_conf_mode_get, fmd_conf_notsup, fmd_conf_nop
203 };
204 
205 static const fmd_conf_ops_t fmd_debug_ops = {
206 	fmd_debug_set, fmd_conf_mode_get, fmd_conf_notsup, fmd_conf_nop
207 };
208 
209 static const fmd_conf_ops_t fmd_trmode_ops = {
210 	fmd_trmode_set, fmd_trmode_get, fmd_conf_notsup, fmd_conf_nop
211 };
212 
213 static const fmd_conf_ops_t fmd_clkmode_ops = {
214 	fmd_clkmode_set, fmd_clkmode_get, fmd_conf_notsup, fmd_conf_nop
215 };
216 
217 static const fmd_conf_formal_t _fmd_conf[] = {
218 { "agent.path", &fmd_conf_path, _fmd_agent_path }, /* path for agents */
219 { "alloc_msecs", &fmd_conf_uint32, "10" },	/* msecs before alloc retry */
220 { "alloc_tries", &fmd_conf_uint32, "3" },	/* max # of alloc retries */
221 { "chassis", &fmd_conf_string, NULL },		/* chassis serial number */
222 { "ckpt.dir", &fmd_conf_string, "var/fm/fmd/ckpt" }, /* ckpt directory path */
223 { "ckpt.dirmode", &fmd_conf_int32, "0700" },	/* ckpt directory perm mode */
224 { "ckpt.mode", &fmd_conf_int32, "0400" },	/* ckpt file perm mode */
225 { "ckpt.restore", &fmd_conf_bool, "true" },	/* restore checkpoints? */
226 { "ckpt.save", &fmd_conf_bool, "true" },	/* save checkpoints? */
227 { "ckpt.zero", &fmd_conf_bool, "false" },	/* zero checkpoints on start? */
228 { "client.buflim", &fmd_conf_size, "10m" },	/* client buffer space limit */
229 { "client.dbout", &fmd_dbout_ops, NULL },	/* client debug output sinks */
230 { "client.debug", &fmd_conf_bool, NULL },	/* client debug enable */
231 { "client.error", &fmd_cerror_ops, "unload" },	/* client error policy */
232 { "client.memlim", &fmd_conf_size, "10m" },	/* client allocation limit */
233 { "client.evqlim", &fmd_conf_uint32, "256" },	/* client event queue limit */
234 { "client.thrlim", &fmd_conf_uint32, "8" },	/* client aux thread limit */
235 { "client.thrsig", &fmd_conf_signal, "SIGUSR1" }, /* fmd_thr_signal() value */
236 { "client.tmrlim", &fmd_conf_uint32, "1024" },	/* client pending timer limit */
237 { "clock", &fmd_clkmode_ops, "native" },	/* clock operation mode */
238 { "conf_path", &fmd_conf_path, _fmd_conf_path }, /* root config file path */
239 { "conf_file", &fmd_conf_string, "fmd.conf" },	/* root config file name */
240 { "core", &fmd_conf_bool, "false" },		/* force core dump on quit */
241 { "dbout", &fmd_dbout_ops, NULL },		/* daemon debug output sinks */
242 { "debug", &fmd_debug_ops, NULL },		/* daemon debugging flags */
243 { "dictdir", &fmd_conf_string, "usr/lib/fm/dict" }, /* default diagcode dir */
244 { "domain", &fmd_conf_string, NULL },		/* domain id for de auth */
245 { "errchan", &fmd_conf_string, FM_ERROR_CHAN }, /* error event channel name */
246 { "fg", &fmd_conf_bool, "false" },		/* run daemon in foreground */
247 { "gc_interval", &fmd_conf_time, "1d" },	/* garbage collection intvl */
248 { "ids.avg", &fmd_conf_uint32, "4" },		/* desired idspace chain len */
249 { "ids.max", &fmd_conf_uint32, "1024" },	/* maximum idspace buckets */
250 { "isaname", &fmd_conf_string, _fmd_isa },	/* instruction set (uname -p) */
251 { "log.rsrc", &fmd_conf_string, "var/fm/fmd/rsrc" }, /* asru log dir path */
252 { "log.creator", &fmd_conf_string, "fmd" },	/* exacct log creator string */
253 { "log.error", &fmd_conf_string, "var/fm/fmd/errlog" }, /* error log path */
254 { "log.fault", &fmd_conf_string, "var/fm/fmd/fltlog" }, /* fault log path */
255 { "log.minfree", &fmd_conf_size, "2m" },	/* min log fsys free space */
256 { "log.tryrotate", &fmd_conf_uint32, "10" },	/* max log rotation attempts */
257 { "log.waitrotate", &fmd_conf_time, "200ms" },	/* log rotation retry delay */
258 { "machine", &fmd_conf_string, _fmd_uts.machine }, /* machine name (uname -m) */
259 { "nodiagcode", &fmd_conf_string, "-" },	/* diagcode to use if error */
260 { "osrelease", &fmd_conf_string, _fmd_uts.release }, /* release (uname -r) */
261 { "osversion", &fmd_conf_string, _fmd_uts.version }, /* version (uname -v) */
262 { "platform", &fmd_conf_string, _fmd_plat },	/* platform string (uname -i) */
263 { "plugin.close", &fmd_conf_bool, "true" },	/* dlclose plugins on fini */
264 { "plugin.path", &fmd_conf_path, _fmd_plugin_path }, /* path for plugin mods */
265 { "rootdir", &fmd_conf_string, "" },		/* root directory for paths */
266 { "rpc.adm.path", &fmd_conf_string, NULL },	/* FMD_ADM rendezvous file */
267 { "rpc.adm.prog", &fmd_conf_uint32, "100169" },	/* FMD_ADM rpc program num */
268 { "rpc.api.path", &fmd_conf_string, NULL },	/* FMD_API rendezvous file */
269 { "rpc.api.prog", &fmd_conf_uint32, "100170" },	/* FMD_API rpc program num */
270 { "rpc.rcvsize", &fmd_conf_size, "128k" },	/* rpc receive buffer size */
271 { "rpc.sndsize", &fmd_conf_size, "128k" },	/* rpc send buffer size */
272 { "rsrc.age", &fmd_conf_time, "30d" },		/* max age of old rsrc log */
273 { "rsrc.zero", &fmd_conf_bool, "false" },	/* zero rsrc cache on start? */
274 { "schemedir", &fmd_conf_string, _fmd_scheme_path }, /* path for scheme mods */
275 { "self.name", &fmd_conf_string, "fmd-self-diagnosis" }, /* self-diag module */
276 { "self.dict", &fmd_conf_list, "FMD.dict" },	/* self-diag dictionary list */
277 { "server", &fmd_conf_string, _fmd_uts.nodename }, /* server id for de auth */
278 { "strbuckets", &fmd_conf_uint32, "211" },	/* size of string hashes */
279 #ifdef DEBUG
280 { "trace.mode", &fmd_trmode_ops, "full" },	/* trace mode: none/lite/full */
281 #else
282 { "trace.mode", &fmd_trmode_ops, "lite" },	/* trace mode: none/lite/full */
283 #endif
284 { "trace.recs", &fmd_conf_uint32, "128" },	/* trace records per thread */
285 { "trace.frames", &fmd_conf_uint32, "16" },	/* max trace rec stack frames */
286 { "uuidlen", &fmd_conf_uint32, "36" },		/* UUID ASCII string length */
287 { "xprt.class", &fmd_conf_string, NULL },	/* transport event class */
288 { "xprt.device", &fmd_conf_string, NULL },	/* transport replay device */
289 { "xprt.sid", &fmd_conf_string, "fmd" },	/* transport subscriber id */
290 };
291 
292 /*
293  * Statistics maintained by fmd itself on behalf of various global subsystems.
294  * NOTE: FMD_TYPE_STRING statistics should not be used here.  If they are
295  * required in the future, the FMD_ADM_MODGSTAT service routine must change.
296  */
297 static fmd_statistics_t _fmd_stats = {
298 { "transport.received", FMD_TYPE_UINT64, "events received by transport" },
299 { "transport.discarded", FMD_TYPE_UINT64, "bad events discarded by transport" },
300 { "transport.retried", FMD_TYPE_UINT64, "retries requested of transport" },
301 { "transport.replayed", FMD_TYPE_UINT64, "events replayed by transport" },
302 { "transport.lost", FMD_TYPE_UINT64, "events lost by transport" },
303 { "errlog.replayed", FMD_TYPE_UINT64, "total events replayed from errlog" },
304 { "errlog.partials", FMD_TYPE_UINT64, "events partially committed in errlog" },
305 { "errlog.enospc", FMD_TYPE_UINT64, "events not appended to errlog (ENOSPC)" },
306 { "fltlog.enospc", FMD_TYPE_UINT64, "events not appended to fltlog (ENOSPC)" },
307 { "log.enospc", FMD_TYPE_UINT64, "events not appended to other logs (ENOSPC)" },
308 { "dr.gen", FMD_TYPE_UINT64, "dynamic reconfiguration generation" },
309 };
310 
311 void
312 fmd_create(fmd_t *dp, const char *arg0, const char *root, const char *conf)
313 {
314 	fmd_conf_path_t *pap;
315 	char file[PATH_MAX];
316 	const char *name;
317 	fmd_stat_t *sp;
318 	int i;
319 
320 	(void) sysinfo(SI_PLATFORM, _fmd_plat, sizeof (_fmd_plat));
321 	(void) sysinfo(SI_ARCHITECTURE, _fmd_isa, sizeof (_fmd_isa));
322 	(void) uname(&_fmd_uts);
323 
324 	bzero(dp, sizeof (fmd_t));
325 
326 	dp->d_version = _fmd_version;
327 	dp->d_pname = fmd_strbasename(arg0);
328 	dp->d_pid = getpid();
329 
330 	if (pthread_key_create(&dp->d_key, NULL) != 0)
331 		fmd_error(EFMD_EXIT, "failed to create pthread key");
332 
333 	(void) pthread_mutex_init(&dp->d_xprt_lock, NULL);
334 	(void) pthread_cond_init(&dp->d_xprt_cv, NULL);
335 	dp->d_xprt_wait++; /* pause transport threads */
336 
337 	(void) pthread_mutex_init(&dp->d_err_lock, NULL);
338 	(void) pthread_mutex_init(&dp->d_thr_lock, NULL);
339 	(void) pthread_mutex_init(&dp->d_mod_lock, NULL);
340 	(void) pthread_mutex_init(&dp->d_stats_lock, NULL);
341 	(void) pthread_rwlock_init(&dp->d_log_lock, NULL);
342 
343 	/*
344 	 * A small number of properties must be set manually before we open
345 	 * the root configuration file.  These include any settings for our
346 	 * memory allocator and path expansion token values, because these
347 	 * values are needed by the routines in fmd_conf.c itself.  After
348 	 * the root configuration file is processed, we reset these properties
349 	 * based upon the latest values from the configuration file.
350 	 */
351 	dp->d_alloc_msecs = 10;
352 	dp->d_alloc_tries = 3;
353 	dp->d_str_buckets = 211;
354 
355 	dp->d_rootdir = root ? root : "";
356 	dp->d_platform = _fmd_plat;
357 	dp->d_machine = _fmd_uts.machine;
358 	dp->d_isaname = _fmd_isa;
359 
360 	dp->d_conf = fmd_conf_open(conf,
361 	    sizeof (_fmd_conf) / sizeof (_fmd_conf[0]), _fmd_conf);
362 
363 	if (dp->d_conf == NULL) {
364 		fmd_error(EFMD_EXIT,
365 		    "failed to load required configuration properties\n");
366 	}
367 
368 	(void) fmd_conf_getprop(dp->d_conf, "alloc.msecs", &dp->d_alloc_msecs);
369 	(void) fmd_conf_getprop(dp->d_conf, "alloc.tries", &dp->d_alloc_tries);
370 	(void) fmd_conf_getprop(dp->d_conf, "strbuckets", &dp->d_str_buckets);
371 
372 	(void) fmd_conf_getprop(dp->d_conf, "platform", &dp->d_platform);
373 	(void) fmd_conf_getprop(dp->d_conf, "machine", &dp->d_machine);
374 	(void) fmd_conf_getprop(dp->d_conf, "isaname", &dp->d_isaname);
375 
376 	/*
377 	 * Manually specified rootdirs override config files, so only update
378 	 * d_rootdir based on the config files we parsed if no 'root' was set.
379 	 */
380 	if (root == NULL)
381 		(void) fmd_conf_getprop(dp->d_conf, "rootdir", &dp->d_rootdir);
382 	else
383 		(void) fmd_conf_setprop(dp->d_conf, "rootdir", dp->d_rootdir);
384 
385 	/*
386 	 * Once the base conf file properties are loaded, lookup the values
387 	 * of $conf_path and $conf_file and merge in any other conf files.
388 	 */
389 	(void) fmd_conf_getprop(dp->d_conf, "conf_path", &pap);
390 	(void) fmd_conf_getprop(dp->d_conf, "conf_file", &name);
391 
392 	for (i = 0; i < pap->cpa_argc; i++) {
393 		(void) snprintf(file, sizeof (file),
394 		    "%s/%s", pap->cpa_argv[i], name);
395 		if (access(file, F_OK) == 0)
396 			fmd_conf_merge(dp->d_conf, file);
397 	}
398 
399 	/*
400 	 * Update the value of fmd.d_fg based on "fg".  We cache this property
401 	 * because it must be accessed deep within fmd at fmd_verror() time.
402 	 */
403 	(void) fmd_conf_getprop(fmd.d_conf, "fg", &fmd.d_fg);
404 
405 	/*
406 	 * Initialize our custom libnvpair allocator and create an nvlist for
407 	 * authority elements corresponding to this instance of the daemon.
408 	 */
409 	(void) nv_alloc_init(&dp->d_nva, &fmd_nv_alloc_ops);
410 	dp->d_auth = fmd_protocol_authority();
411 
412 	/*
413 	 * The fmd_module_t for the root module must be created manually.  Most
414 	 * of it remains unused and zero, except for the few things we fill in.
415 	 */
416 	dp->d_rmod = fmd_zalloc(sizeof (fmd_module_t), FMD_SLEEP);
417 	dp->d_rmod->mod_name = fmd_strdup(dp->d_pname, FMD_SLEEP);
418 	fmd_list_append(&dp->d_mod_list, dp->d_rmod);
419 
420 	(void) pthread_mutex_init(&dp->d_rmod->mod_lock, NULL);
421 	(void) pthread_cond_init(&dp->d_rmod->mod_cv, NULL);
422 
423 	dp->d_rmod->mod_thread = fmd_thread_xcreate(dp->d_rmod, pthread_self());
424 	dp->d_rmod->mod_ustat = fmd_ustat_create();
425 
426 	if (pthread_setspecific(dp->d_key, dp->d_rmod->mod_thread) != 0)
427 		fmd_error(EFMD_EXIT, "failed to attach main thread key");
428 
429 	if ((dp->d_stats = (fmd_statistics_t *)fmd_ustat_insert(
430 	    dp->d_rmod->mod_ustat, FMD_USTAT_NOALLOC, sizeof (_fmd_stats) /
431 	    sizeof (fmd_stat_t), (fmd_stat_t *)&_fmd_stats, NULL)) == NULL)
432 		fmd_error(EFMD_EXIT, "failed to initialize statistics");
433 
434 	/*
435 	 * In addition to inserting the _fmd_stats collection of program-wide
436 	 * statistics, we also insert a statistic named after each of our
437 	 * errors and update these counts in fmd_verror() (see fmd_subr.c).
438 	 */
439 	dp->d_errstats = sp = fmd_zalloc(sizeof (fmd_stat_t) *
440 	    (EFMD_END - EFMD_UNKNOWN), FMD_SLEEP);
441 
442 	for (i = 0; i < EFMD_END - EFMD_UNKNOWN; i++, sp++) {
443 		(void) snprintf(sp->fmds_name, sizeof (sp->fmds_name), "err.%s",
444 		    strrchr(fmd_errclass(EFMD_UNKNOWN + i), '.') + 1);
445 		sp->fmds_type = FMD_TYPE_UINT64;
446 	}
447 
448 	(void) fmd_ustat_insert(dp->d_rmod->mod_ustat, FMD_USTAT_NOALLOC,
449 	    EFMD_END - EFMD_UNKNOWN, dp->d_errstats, NULL);
450 }
451 
452 void
453 fmd_destroy(fmd_t *dp)
454 {
455 	fmd_module_t *mp;
456 	int core;
457 
458 	(void) fmd_conf_getprop(fmd.d_conf, "core", &core);
459 
460 	fmd_rpc_fini();
461 	fmd_transport_fini();
462 	fmd_dr_fini();
463 
464 	/*
465 	 * Unload the self-diagnosis module first.  This ensures that it does
466 	 * not get confused as we start unloading other modules, etc.  We must
467 	 * hold the dispq lock as a writer while doing so since it uses d_self.
468 	 */
469 	if (dp->d_self != NULL) {
470 		(void) pthread_rwlock_wrlock(&dp->d_disp->dq_lock);
471 		fmd_module_unload(dp->d_self);
472 		fmd_module_rele(dp->d_self);
473 		dp->d_self = NULL;
474 		(void) pthread_rwlock_unlock(&dp->d_disp->dq_lock);
475 	}
476 
477 	/*
478 	 * Unload modules in reverse order *except* for the root module, which
479 	 * is first in the list.  This allows it to keep its thread and trace.
480 	 */
481 	for (mp = fmd_list_prev(&dp->d_mod_list); mp != dp->d_rmod; ) {
482 		fmd_module_unload(mp);
483 		mp = fmd_list_prev(mp);
484 	}
485 
486 	if (dp->d_mod_hash != NULL) {
487 		fmd_modhash_destroy(dp->d_mod_hash);
488 		dp->d_mod_hash = NULL;
489 	}
490 
491 	/*
492 	 * Close both log files now that modules are no longer active.  We must
493 	 * set these pointers to NULL in case any subsequent errors occur.
494 	 */
495 	if (dp->d_errlog != NULL) {
496 		fmd_log_rele(dp->d_errlog);
497 		dp->d_errlog = NULL;
498 	}
499 
500 	if (dp->d_fltlog != NULL) {
501 		fmd_log_rele(dp->d_fltlog);
502 		dp->d_fltlog = NULL;
503 	}
504 
505 	/*
506 	 * Now that all data structures that refer to modules are torn down,
507 	 * no modules should be remaining on the module list except for d_rmod.
508 	 * If we trip one of these assertions, we're missing a rele somewhere.
509 	 */
510 	ASSERT(fmd_list_prev(&dp->d_mod_list) == dp->d_rmod);
511 	ASSERT(fmd_list_next(&dp->d_mod_list) == dp->d_rmod);
512 
513 	/*
514 	 * Now destroy the root module.  We clear its thread key first so any
515 	 * calls to fmd_trace() inside of the module code will be ignored.
516 	 */
517 	(void) pthread_setspecific(dp->d_key, NULL);
518 	(void) pthread_mutex_lock(&dp->d_rmod->mod_lock);
519 	fmd_module_destroy(dp->d_rmod);
520 
521 	if (dp->d_timers != NULL)
522 		fmd_timerq_destroy(dp->d_timers);
523 	if (dp->d_disp != NULL)
524 		fmd_dispq_destroy(dp->d_disp);
525 	if (dp->d_asrus != NULL)
526 		fmd_asru_hash_destroy(dp->d_asrus);
527 	if (dp->d_schemes != NULL)
528 		fmd_scheme_hash_destroy(dp->d_schemes);
529 	if (dp->d_cases != NULL)
530 		fmd_case_hash_destroy(dp->d_cases);
531 
532 	if (dp->d_errstats != NULL) {
533 		fmd_free(dp->d_errstats,
534 		    sizeof (fmd_stat_t) * (EFMD_END - EFMD_UNKNOWN));
535 	}
536 
537 	if (dp->d_conf != NULL)
538 		fmd_conf_close(dp->d_conf);
539 
540 	nvlist_free(dp->d_auth);
541 	(void) nv_alloc_fini(&dp->d_nva);
542 	dp->d_clockops->fto_fini(dp->d_clockptr);
543 
544 	(void) pthread_key_delete(dp->d_key);
545 	bzero(dp, sizeof (fmd_t));
546 
547 	if (core)
548 		fmd_panic("forcing core dump at user request\n");
549 }
550 
551 /*ARGSUSED*/
552 static void
553 fmd_gc(fmd_t *dp, id_t id, hrtime_t hrt)
554 {
555 	hrtime_t delta;
556 
557 	if (id != 0) {
558 		TRACE((FMD_DBG_MOD, "garbage collect start"));
559 		fmd_modhash_apply(dp->d_mod_hash, fmd_module_gc);
560 		TRACE((FMD_DBG_MOD, "garbage collect end"));
561 
562 		(void) pthread_rwlock_rdlock(&dp->d_log_lock);
563 		fmd_log_update(dp->d_errlog);
564 		(void) pthread_rwlock_unlock(&dp->d_log_lock);
565 	}
566 
567 	(void) fmd_conf_getprop(dp->d_conf, "gc_interval", &delta);
568 	(void) fmd_timerq_install(dp->d_timers, dp->d_rmod->mod_timerids,
569 	    (fmd_timer_f *)fmd_gc, dp, NULL, delta);
570 }
571 
572 /*
573  * Events are committed to the errlog after cases are checkpointed.  If fmd
574  * crashes before an event is ever associated with a module, this function will
575  * be called to replay it to all subscribers.  If fmd crashes in between the
576  * subscriber checkpointing and committing the event in the error log, the
577  * module will have seen the event and we don't want to replay it.  So we look
578  * for the event in all modules and transition it to the proper state.  If
579  * it is found, we commit it to the error log and do not replay it.  The in-
580  * memory case search used by fmd_module_contains() et al isn't particularly
581  * efficient, but it is faster than doing read i/o's on every case event to
582  * check their status or write i/o's on every event to replay to update states.
583  * We can improve the efficiency of this lookup algorithm later if necessary.
584  */
585 /*ARGSUSED*/
586 static void
587 fmd_err_replay(fmd_log_t *lp, fmd_event_t *ep, fmd_t *dp)
588 {
589 	fmd_module_t *mp;
590 	fmd_stat_t *sp;
591 
592 	(void) pthread_mutex_lock(&dp->d_mod_lock);
593 
594 	for (mp = fmd_list_next(&dp->d_mod_list);
595 	    mp != NULL; mp = fmd_list_next(mp)) {
596 		if (fmd_module_contains(mp, ep)) {
597 			fmd_module_hold(mp);
598 			break;
599 		}
600 	}
601 
602 	(void) pthread_mutex_unlock(&dp->d_mod_lock);
603 
604 	if (mp != NULL) {
605 		fmd_event_commit(ep);
606 		fmd_module_rele(mp);
607 		sp = &dp->d_stats->ds_log_partials;
608 	} else {
609 		fmd_dispq_dispatch(dp->d_disp, ep,
610 		    ((fmd_event_impl_t *)ep)->ev_data);
611 		sp = &dp->d_stats->ds_log_replayed;
612 	}
613 
614 	(void) pthread_mutex_lock(&dp->d_stats_lock);
615 	sp->fmds_value.ui64++;
616 	(void) pthread_mutex_unlock(&dp->d_stats_lock);
617 }
618 
619 /*
620  * This signal handler is installed for the client.thrsig signal to be used to
621  * force an auxiliary thread to wake up from a system call and return EINTR in
622  * response to a module's use of fmd_thr_signal().  We also trace the event.
623  */
624 static void
625 fmd_signal(int sig)
626 {
627 	TRACE((FMD_DBG_MOD, "module thread received sig #%d", sig));
628 }
629 
630 void
631 fmd_run(fmd_t *dp, int pfd)
632 {
633 	char *nodc_key[] = { FMD_FLT_NODC, NULL };
634 	char nodc_str[128];
635 	struct sigaction act;
636 
637 	int status = FMD_EXIT_SUCCESS;
638 	const char *name;
639 	fmd_conf_path_t *pap;
640 	int dbout;
641 
642 	/*
643 	 * Cache all the current debug property settings in d_fmd_debug,
644 	 * d_fmd_dbout, d_hdl_debug, and d_hdl_dbout.  If a given debug mask
645 	 * is non-zero and the corresponding dbout mask is zero, set dbout
646 	 * to a sensible default value based on whether we have daemonized.
647 	 */
648 	(void) fmd_conf_getprop(dp->d_conf, "dbout", &dbout);
649 
650 	if (dp->d_fmd_debug != 0 && dbout == 0)
651 		dp->d_fmd_dbout = dp->d_fg? FMD_DBOUT_STDERR : FMD_DBOUT_SYSLOG;
652 	else
653 		dp->d_fmd_dbout = dbout;
654 
655 	(void) fmd_conf_getprop(dp->d_conf, "client.debug", &dp->d_hdl_debug);
656 	(void) fmd_conf_getprop(dp->d_conf, "client.dbout", &dbout);
657 
658 	if (dp->d_hdl_debug != 0 && dbout == 0)
659 		dp->d_hdl_dbout = dp->d_fg? FMD_DBOUT_STDERR : FMD_DBOUT_SYSLOG;
660 	else
661 		dp->d_hdl_dbout = dbout;
662 
663 	/*
664 	 * Initialize remaining major program data structures such as the event
665 	 * transport, dispatch queues, log files, module hash collections, etc.
666 	 * This work is done here rather than in fmd_create() to permit the -o
667 	 * command-line option to modify properties after fmd_create() is done.
668 	 * Note that our event transport will remain blocked until we broadcast
669 	 * to threads blocked on d_xprt_cv at the end of this function.
670 	 */
671 	dp->d_clockptr = dp->d_clockops->fto_init();
672 	fmd_transport_init();
673 	fmd_rpc_init();
674 	fmd_dr_init();
675 
676 	dp->d_rmod->mod_timerids = fmd_idspace_create(dp->d_pname, 1, 16);
677 	dp->d_timers = fmd_timerq_create();
678 	dp->d_disp = fmd_dispq_create();
679 	dp->d_cases = fmd_case_hash_create();
680 
681 	/*
682 	 * Once our subsystems that use signals have been set up, install the
683 	 * signal handler for the fmd_thr_signal() API.  Verify that the signal
684 	 * being used for this purpose doesn't conflict with something else.
685 	 */
686 	(void) fmd_conf_getprop(dp->d_conf, "client.thrsig", &dp->d_thr_sig);
687 
688 	if (sigaction(dp->d_thr_sig, NULL, &act) != 0) {
689 		fmd_error(EFMD_EXIT, "invalid signal selected for "
690 		    "client.thrsig property: %d\n", dp->d_thr_sig);
691 	}
692 
693 	if (act.sa_handler != SIG_IGN && act.sa_handler != SIG_DFL) {
694 		fmd_error(EFMD_EXIT, "signal selected for client.thrsig "
695 		    "property is already in use: %d\n", dp->d_thr_sig);
696 	}
697 
698 	act.sa_handler = fmd_signal;
699 	act.sa_flags = 0;
700 
701 	(void) sigemptyset(&act.sa_mask);
702 	(void) sigaction(dp->d_thr_sig, &act, NULL);
703 
704 	(void) fmd_conf_getprop(dp->d_conf, "schemedir", &name);
705 	dp->d_schemes = fmd_scheme_hash_create(dp->d_rootdir, name);
706 
707 	(void) fmd_conf_getprop(dp->d_conf, "log.rsrc", &name);
708 	dp->d_asrus = fmd_asru_hash_create(dp->d_rootdir, name);
709 
710 	(void) fmd_conf_getprop(dp->d_conf, "log.error", &name);
711 	dp->d_errlog = fmd_log_open(dp->d_rootdir, name, FMD_LOG_ERROR);
712 
713 	(void) fmd_conf_getprop(dp->d_conf, "log.fault", &name);
714 	dp->d_fltlog = fmd_log_open(dp->d_rootdir, name, FMD_LOG_FAULT);
715 
716 	if (dp->d_asrus == NULL || dp->d_errlog == NULL || dp->d_fltlog == NULL)
717 		fmd_error(EFMD_EXIT, "failed to initialize log files\n");
718 
719 	dp->d_mod_hash = fmd_modhash_create();
720 	dp->d_running = 1; /* we are now officially an active fmd */
721 
722 	/*
723 	 * Now that we're running, if a pipe fd was specified, write an exit
724 	 * status to it to indicate that our parent process can safely detach.
725 	 */
726 	if (pfd >= 0)
727 		(void) write(pfd, &status, sizeof (status));
728 
729 	/*
730 	 * Once all data structures are initialized, we load all of our modules
731 	 * in order according to class in order to load up any subscriptions.
732 	 */
733 	fmd_builtin_loadall(dp->d_mod_hash);
734 	(void) fmd_conf_getprop(dp->d_conf, "self.name", &name);
735 	dp->d_self = fmd_modhash_lookup(dp->d_mod_hash, name);
736 
737 	if (fmd_module_dc_key2code(dp->d_self,
738 	    nodc_key, nodc_str, sizeof (nodc_str)) == 0)
739 		(void) fmd_conf_setprop(dp->d_conf, "nodiagcode", nodc_str);
740 
741 	(void) fmd_conf_getprop(dp->d_conf, "plugin.path", &pap);
742 	fmd_modhash_loadall(dp->d_mod_hash, pap, &fmd_rtld_ops);
743 
744 	(void) fmd_conf_getprop(dp->d_conf, "agent.path", &pap);
745 	fmd_modhash_loadall(dp->d_mod_hash, pap, &fmd_proc_ops);
746 
747 	/*
748 	 * Before activating the inbound event transport, we first replay any
749 	 * fault events from the ASRU cache, any case events from the case hash
750 	 * associated with restored case checkpoints, and any error events from
751 	 * the errlog that did not finish processing the last time we ran. Then
752 	 * we replay any pending events from the event transport itself.
753 	 */
754 	fmd_asru_hash_refresh(dp->d_asrus);
755 	fmd_case_hash_refresh(dp->d_cases);
756 
757 	(void) pthread_rwlock_rdlock(&dp->d_log_lock);
758 	fmd_log_replay(dp->d_errlog, (fmd_log_f *)fmd_err_replay, dp);
759 	fmd_log_update(dp->d_errlog);
760 	(void) pthread_rwlock_unlock(&dp->d_log_lock);
761 
762 	fmd_transport_replay();
763 
764 	/*
765 	 * Finally, awaken any threads associated with receiving events from
766 	 * our main ereport event transport that are sleeping on d_xprt_wait.
767 	 */
768 	(void) pthread_mutex_lock(&dp->d_xprt_lock);
769 	ASSERT(dp->d_xprt_wait != 0);
770 	dp->d_xprt_wait--;
771 	(void) pthread_mutex_unlock(&dp->d_xprt_lock);
772 	(void) pthread_cond_broadcast(&dp->d_xprt_cv);
773 
774 	fmd_gc(dp, 0, 0);
775 }
776 
777 void
778 fmd_help(fmd_t *dp)
779 {
780 	const fmd_conf_mode_t *cmp;
781 
782 	(void) printf("Usage: %s -o debug=mode[,mode]\n", dp->d_pname);
783 
784 	for (cmp = _fmd_debug_modes; cmp->cm_name != NULL; cmp++)
785 		(void) printf("\t%s\t%s\n", cmp->cm_name, cmp->cm_desc);
786 }
787