xref: /illumos-gate/usr/src/cmd/svc/startd/restarter.c (revision 609a0c4b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
25  */
26 
27 /*
28  * restarter.c - service manipulation
29  *
30  * This component manages services whose restarter is svc.startd, the standard
31  * restarter.  It translates restarter protocol events from the graph engine
32  * into actions on processes, as a delegated restarter would do.
33  *
34  * The master restarter manages a number of always-running threads:
35  *   - restarter event thread: events from the graph engine
36  *   - timeout thread: thread to fire queued timeouts
37  *   - contract thread: thread to handle contract events
38  *   - wait thread: thread to handle wait-based services
39  *
40  * The other threads are created as-needed:
41  *   - per-instance method threads
42  *   - per-instance event processing threads
43  *
44  * The interaction of all threads must result in the following conditions
45  * being satisfied (on a per-instance basis):
46  *   - restarter events must be processed in order
47  *   - method execution must be serialized
48  *   - instance delete must be held until outstanding methods are complete
49  *   - contract events shouldn't be processed while a method is running
50  *   - timeouts should fire even when a method is running
51  *
52  * Service instances are represented by restarter_inst_t's and are kept in the
53  * instance_list list.
54  *
55  * Service States
56  *   The current state of a service instance is kept in
57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
58  *   some time, then before we effect the transition we set
59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
60  *   rotate i_next_state to i_state and set i_next_state to
61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
62  *   held.  The exception is when we launch methods, which are done with
63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
64  *   method_thread() does, we set ri_method_thread to the thread id of the
65  *   method thread, and when it is nonzero any thread with a different thread id
66  *   waits on ri_method_cv.
67  *
68  * Method execution is serialized by blocking on ri_method_cv in
69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
70  * also prevents the instance structure from being deleted until all
71  * outstanding operations such as method_thread() have finished.
72  *
73  * Lock ordering:
74  *
75  * dgraph_lock [can be held when taking:]
76  *   utmpx_lock
77  *   dictionary->dict_lock
78  *   st->st_load_lock
79  *   wait_info_lock
80  *   ru->restarter_update_lock
81  *     restarter_queue->rpeq_lock
82  *   instance_list.ril_lock
83  *     inst->ri_lock
84  *   st->st_configd_live_lock
85  *
86  * instance_list.ril_lock
87  *   graph_queue->gpeq_lock
88  *   gu->gu_lock
89  *   st->st_configd_live_lock
90  *   dictionary->dict_lock
91  *   inst->ri_lock
92  *     graph_queue->gpeq_lock
93  *     gu->gu_lock
94  *     tu->tu_lock
95  *     tq->tq_lock
96  *     inst->ri_queue_lock
97  *       wait_info_lock
98  *       bp->cb_lock
99  *     utmpx_lock
100  *
101  * single_user_thread_lock
102  *   wait_info_lock
103  *   utmpx_lock
104  *
105  * gu_freeze_lock
106  *
107  * logbuf_mutex nests inside pretty much everything.
108  */
109 
110 #include <sys/contract/process.h>
111 #include <sys/ctfs.h>
112 #include <sys/stat.h>
113 #include <sys/time.h>
114 #include <sys/types.h>
115 #include <sys/uio.h>
116 #include <sys/wait.h>
117 #include <assert.h>
118 #include <errno.h>
119 #include <fcntl.h>
120 #include <libcontract.h>
121 #include <libcontract_priv.h>
122 #include <libintl.h>
123 #include <librestart.h>
124 #include <librestart_priv.h>
125 #include <libuutil.h>
126 #include <limits.h>
127 #include <poll.h>
128 #include <port.h>
129 #include <pthread.h>
130 #include <stdarg.h>
131 #include <stdio.h>
132 #include <strings.h>
133 #include <unistd.h>
134 
135 #include "startd.h"
136 #include "protocol.h"
137 
138 static uu_list_pool_t *restarter_instance_pool;
139 static restarter_instance_list_t instance_list;
140 
141 static uu_list_pool_t *restarter_queue_pool;
142 
143 /*
144  * Function used to reset the restart times for an instance, when
145  * an administrative task comes along and essentially makes the times
146  * in this array ineffective.
147  */
148 static void
149 reset_start_times(restarter_inst_t *inst)
150 {
151 	inst->ri_start_index = 0;
152 	bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
153 }
154 
155 /*ARGSUSED*/
156 static int
157 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
158     void *private)
159 {
160 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
161 	int rc_id = *(int *)rc_arg;
162 
163 	if (lc_id > rc_id)
164 		return (1);
165 	if (lc_id < rc_id)
166 		return (-1);
167 	return (0);
168 }
169 
170 static restarter_inst_t *
171 inst_lookup_by_name(const char *name)
172 {
173 	int id;
174 
175 	id = dict_lookup_byname(name);
176 	if (id == -1)
177 		return (NULL);
178 
179 	return (inst_lookup_by_id(id));
180 }
181 
182 restarter_inst_t *
183 inst_lookup_by_id(int id)
184 {
185 	restarter_inst_t *inst;
186 
187 	MUTEX_LOCK(&instance_list.ril_lock);
188 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
189 	if (inst != NULL)
190 		MUTEX_LOCK(&inst->ri_lock);
191 	MUTEX_UNLOCK(&instance_list.ril_lock);
192 
193 	if (inst != NULL) {
194 		while (inst->ri_method_thread != 0 &&
195 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
196 			++inst->ri_method_waiters;
197 			(void) pthread_cond_wait(&inst->ri_method_cv,
198 			    &inst->ri_lock);
199 			assert(inst->ri_method_waiters > 0);
200 			--inst->ri_method_waiters;
201 		}
202 	}
203 
204 	return (inst);
205 }
206 
207 static restarter_inst_t *
208 inst_lookup_queue(const char *name)
209 {
210 	int id;
211 	restarter_inst_t *inst;
212 
213 	id = dict_lookup_byname(name);
214 	if (id == -1)
215 		return (NULL);
216 
217 	MUTEX_LOCK(&instance_list.ril_lock);
218 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
219 	if (inst != NULL)
220 		MUTEX_LOCK(&inst->ri_queue_lock);
221 	MUTEX_UNLOCK(&instance_list.ril_lock);
222 
223 	return (inst);
224 }
225 
226 const char *
227 service_style(int flags)
228 {
229 	switch (flags & RINST_STYLE_MASK) {
230 	case RINST_CONTRACT:	return ("contract");
231 	case RINST_TRANSIENT:	return ("transient");
232 	case RINST_WAIT:	return ("wait");
233 
234 	default:
235 #ifndef NDEBUG
236 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
237 #endif
238 		abort();
239 		/* NOTREACHED */
240 	}
241 }
242 
243 /*
244  * Fails with ECONNABORTED or ECANCELED.
245  */
246 static int
247 check_contract(restarter_inst_t *inst, boolean_t primary,
248     scf_instance_t *scf_inst)
249 {
250 	ctid_t *ctidp;
251 	int fd, r;
252 
253 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
254 	    &inst->ri_i.i_transient_ctid;
255 
256 	assert(*ctidp >= 1);
257 
258 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
259 	if (fd >= 0) {
260 		r = close(fd);
261 		assert(r == 0);
262 		return (0);
263 	}
264 
265 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
266 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
267 	switch (r) {
268 	case 0:
269 	case ECONNABORTED:
270 	case ECANCELED:
271 		*ctidp = 0;
272 		return (r);
273 
274 	case ENOMEM:
275 		uu_die("Out of memory\n");
276 		/* NOTREACHED */
277 
278 	case EPERM:
279 		uu_die("Insufficient privilege.\n");
280 		/* NOTREACHED */
281 
282 	case EACCES:
283 		uu_die("Repository backend access denied.\n");
284 		/* NOTREACHED */
285 
286 	case EROFS:
287 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
288 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
289 		return (0);
290 
291 	case EINVAL:
292 	case EBADF:
293 	default:
294 		assert(0);
295 		abort();
296 		/* NOTREACHED */
297 	}
298 }
299 
300 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
301 
302 /*
303  * int restarter_insert_inst(scf_handle_t *, char *)
304  *   If the inst is already in the restarter list, return its id.  If the inst
305  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
306  *   states, insert it into the list, and return 0.
307  *
308  *   Fails with
309  *     ENOENT - name is not in the repository
310  */
311 static int
312 restarter_insert_inst(scf_handle_t *h, const char *name)
313 {
314 	int id, r;
315 	restarter_inst_t *inst;
316 	uu_list_index_t idx;
317 	scf_service_t *scf_svc;
318 	scf_instance_t *scf_inst;
319 	scf_snapshot_t *snap = NULL;
320 	scf_propertygroup_t *pg;
321 	char *svc_name, *inst_name;
322 	char logfilebuf[PATH_MAX];
323 	char *c;
324 	boolean_t do_commit_states;
325 	restarter_instance_state_t state, next_state;
326 	protocol_states_t *ps;
327 	pid_t start_pid;
328 	restarter_str_t reason = restarter_str_insert_in_graph;
329 
330 	MUTEX_LOCK(&instance_list.ril_lock);
331 
332 	/*
333 	 * We don't use inst_lookup_by_name() here because we want the lookup
334 	 * & insert to be atomic.
335 	 */
336 	id = dict_lookup_byname(name);
337 	if (id != -1) {
338 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
339 		    &idx);
340 		if (inst != NULL) {
341 			MUTEX_UNLOCK(&instance_list.ril_lock);
342 			return (0);
343 		}
344 	}
345 
346 	/* Allocate an instance */
347 	inst = startd_zalloc(sizeof (restarter_inst_t));
348 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
349 	inst->ri_utmpx_prefix[0] = '\0';
350 
351 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
352 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
353 
354 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
355 
356 	/*
357 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
358 	 * just in case.
359 	 */
360 	inst->ri_id = (id != -1 ? id : dict_insert(name));
361 
362 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
363 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
364 
365 	scf_svc = safe_scf_service_create(h);
366 	scf_inst = safe_scf_instance_create(h);
367 	pg = safe_scf_pg_create(h);
368 	svc_name = startd_alloc(max_scf_name_size);
369 	inst_name = startd_alloc(max_scf_name_size);
370 
371 rep_retry:
372 	if (snap != NULL)
373 		scf_snapshot_destroy(snap);
374 	if (inst->ri_logstem != NULL)
375 		startd_free(inst->ri_logstem, PATH_MAX);
376 	if (inst->ri_common_name != NULL)
377 		startd_free(inst->ri_common_name,
378 		    strlen(inst->ri_common_name) + 1);
379 	if (inst->ri_C_common_name != NULL)
380 		startd_free(inst->ri_C_common_name,
381 		    strlen(inst->ri_C_common_name) + 1);
382 	snap = NULL;
383 	inst->ri_logstem = NULL;
384 	inst->ri_common_name = NULL;
385 	inst->ri_C_common_name = NULL;
386 
387 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
388 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
389 		switch (scf_error()) {
390 		case SCF_ERROR_CONNECTION_BROKEN:
391 			libscf_handle_rebind(h);
392 			goto rep_retry;
393 
394 		case SCF_ERROR_NOT_FOUND:
395 			goto deleted;
396 		}
397 
398 		uu_die("Can't decode FMRI %s: %s\n", name,
399 		    scf_strerror(scf_error()));
400 	}
401 
402 	/*
403 	 * If there's no running snapshot, then we execute using the editing
404 	 * snapshot.  Pending snapshots will be taken later.
405 	 */
406 	snap = libscf_get_running_snapshot(scf_inst);
407 
408 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
409 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
410 	    0)) {
411 		switch (scf_error()) {
412 		case SCF_ERROR_NOT_SET:
413 			break;
414 
415 		case SCF_ERROR_CONNECTION_BROKEN:
416 			libscf_handle_rebind(h);
417 			goto rep_retry;
418 
419 		default:
420 			assert(0);
421 			abort();
422 		}
423 
424 		goto deleted;
425 	}
426 
427 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
428 	for (c = logfilebuf; *c != '\0'; c++)
429 		if (*c == '/')
430 			*c = '-';
431 
432 	inst->ri_logstem = startd_alloc(PATH_MAX);
433 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
434 	    LOG_SUFFIX);
435 
436 	/*
437 	 * If the restarter group is missing, use uninit/none.  Otherwise,
438 	 * we're probably being restarted & don't want to mess up the states
439 	 * that are there.
440 	 */
441 	state = RESTARTER_STATE_UNINIT;
442 	next_state = RESTARTER_STATE_NONE;
443 
444 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
445 	if (r != 0) {
446 		switch (scf_error()) {
447 		case SCF_ERROR_CONNECTION_BROKEN:
448 			libscf_handle_rebind(h);
449 			goto rep_retry;
450 
451 		case SCF_ERROR_NOT_SET:
452 			goto deleted;
453 
454 		case SCF_ERROR_NOT_FOUND:
455 			/*
456 			 * This shouldn't happen since the graph engine should
457 			 * have initialized the state to uninitialized/none if
458 			 * there was no restarter pg.  In case somebody
459 			 * deleted it, though....
460 			 */
461 			do_commit_states = B_TRUE;
462 			break;
463 
464 		default:
465 			assert(0);
466 			abort();
467 		}
468 	} else {
469 		r = libscf_read_states(pg, &state, &next_state);
470 		if (r != 0) {
471 			do_commit_states = B_TRUE;
472 		} else {
473 			if (next_state != RESTARTER_STATE_NONE) {
474 				/*
475 				 * Force next_state to _NONE since we
476 				 * don't look for method processes.
477 				 */
478 				next_state = RESTARTER_STATE_NONE;
479 				do_commit_states = B_TRUE;
480 			} else {
481 				/*
482 				 * The reason for transition will depend on
483 				 * state.
484 				 */
485 				if (st->st_initial == 0)
486 					reason = restarter_str_startd_restart;
487 				else if (state == RESTARTER_STATE_MAINT)
488 					reason = restarter_str_bad_repo_state;
489 				/*
490 				 * Inform the restarter of our state without
491 				 * changing the STIME in the repository.
492 				 */
493 				ps = startd_alloc(sizeof (*ps));
494 				inst->ri_i.i_state = ps->ps_state = state;
495 				inst->ri_i.i_next_state = ps->ps_state_next =
496 				    next_state;
497 				ps->ps_reason = reason;
498 
499 				graph_protocol_send_event(inst->ri_i.i_fmri,
500 				    GRAPH_UPDATE_STATE_CHANGE, ps);
501 
502 				do_commit_states = B_FALSE;
503 			}
504 		}
505 	}
506 
507 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
508 	    &inst->ri_utmpx_prefix)) {
509 	case 0:
510 		break;
511 
512 	case ECONNABORTED:
513 		libscf_handle_rebind(h);
514 		goto rep_retry;
515 
516 	case ECANCELED:
517 		goto deleted;
518 
519 	case ENOENT:
520 		/*
521 		 * This is odd, because the graph engine should have required
522 		 * the general property group.  So we'll just use default
523 		 * flags in anticipation of the graph engine sending us
524 		 * REMOVE_INSTANCE when it finds out that the general property
525 		 * group has been deleted.
526 		 */
527 		inst->ri_flags = RINST_CONTRACT;
528 		break;
529 
530 	default:
531 		assert(0);
532 		abort();
533 	}
534 
535 	r = libscf_get_template_values(scf_inst, snap,
536 	    &inst->ri_common_name, &inst->ri_C_common_name);
537 
538 	/*
539 	 * Copy our names to smaller buffers to reduce our memory footprint.
540 	 */
541 	if (inst->ri_common_name != NULL) {
542 		char *tmp = safe_strdup(inst->ri_common_name);
543 		startd_free(inst->ri_common_name, max_scf_value_size);
544 		inst->ri_common_name = tmp;
545 	}
546 
547 	if (inst->ri_C_common_name != NULL) {
548 		char *tmp = safe_strdup(inst->ri_C_common_name);
549 		startd_free(inst->ri_C_common_name, max_scf_value_size);
550 		inst->ri_C_common_name = tmp;
551 	}
552 
553 	switch (r) {
554 	case 0:
555 		break;
556 
557 	case ECONNABORTED:
558 		libscf_handle_rebind(h);
559 		goto rep_retry;
560 
561 	case ECANCELED:
562 		goto deleted;
563 
564 	case ECHILD:
565 	case ENOENT:
566 		break;
567 
568 	default:
569 		assert(0);
570 		abort();
571 	}
572 
573 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
574 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
575 	    &start_pid)) {
576 	case 0:
577 		break;
578 
579 	case ECONNABORTED:
580 		libscf_handle_rebind(h);
581 		goto rep_retry;
582 
583 	case ECANCELED:
584 		goto deleted;
585 
586 	default:
587 		assert(0);
588 		abort();
589 	}
590 
591 	if (inst->ri_i.i_primary_ctid >= 1) {
592 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
593 
594 		switch (check_contract(inst, B_TRUE, scf_inst)) {
595 		case 0:
596 			break;
597 
598 		case ECONNABORTED:
599 			libscf_handle_rebind(h);
600 			goto rep_retry;
601 
602 		case ECANCELED:
603 			goto deleted;
604 
605 		default:
606 			assert(0);
607 			abort();
608 		}
609 	}
610 
611 	if (inst->ri_i.i_transient_ctid >= 1) {
612 		switch (check_contract(inst, B_FALSE, scf_inst)) {
613 		case 0:
614 			break;
615 
616 		case ECONNABORTED:
617 			libscf_handle_rebind(h);
618 			goto rep_retry;
619 
620 		case ECANCELED:
621 			goto deleted;
622 
623 		default:
624 			assert(0);
625 			abort();
626 		}
627 	}
628 
629 	/* No more failures we live through, so add it to the list. */
630 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
631 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
632 	MUTEX_LOCK(&inst->ri_lock);
633 	MUTEX_LOCK(&inst->ri_queue_lock);
634 
635 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
636 
637 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
638 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
639 	MUTEX_UNLOCK(&instance_list.ril_lock);
640 
641 	if (start_pid != -1 &&
642 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
643 		int ret;
644 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
645 		if (ret == -1) {
646 			/*
647 			 * Implication:  if we can't reregister the
648 			 * instance, we will start another one.  Two
649 			 * instances may or may not result in a resource
650 			 * conflict.
651 			 */
652 			log_error(LOG_WARNING,
653 			    "%s: couldn't reregister %ld for wait\n",
654 			    inst->ri_i.i_fmri, start_pid);
655 		} else if (ret == 1) {
656 			/*
657 			 * Leading PID has exited.
658 			 */
659 			(void) stop_instance(h, inst, RSTOP_EXIT);
660 		}
661 	}
662 
663 
664 	scf_pg_destroy(pg);
665 
666 	if (do_commit_states)
667 		(void) restarter_instance_update_states(h, inst, state,
668 		    next_state, RERR_NONE, reason);
669 
670 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
671 	    service_style(inst->ri_flags));
672 
673 	MUTEX_UNLOCK(&inst->ri_queue_lock);
674 	MUTEX_UNLOCK(&inst->ri_lock);
675 
676 	startd_free(svc_name, max_scf_name_size);
677 	startd_free(inst_name, max_scf_name_size);
678 	scf_snapshot_destroy(snap);
679 	scf_instance_destroy(scf_inst);
680 	scf_service_destroy(scf_svc);
681 
682 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
683 	    name);
684 
685 	return (0);
686 
687 deleted:
688 	MUTEX_UNLOCK(&instance_list.ril_lock);
689 	startd_free(inst_name, max_scf_name_size);
690 	startd_free(svc_name, max_scf_name_size);
691 	if (snap != NULL)
692 		scf_snapshot_destroy(snap);
693 	scf_pg_destroy(pg);
694 	scf_instance_destroy(scf_inst);
695 	scf_service_destroy(scf_svc);
696 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
697 	uu_list_destroy(inst->ri_queue);
698 	if (inst->ri_logstem != NULL)
699 		startd_free(inst->ri_logstem, PATH_MAX);
700 	if (inst->ri_common_name != NULL)
701 		startd_free(inst->ri_common_name,
702 		    strlen(inst->ri_common_name) + 1);
703 	if (inst->ri_C_common_name != NULL)
704 		startd_free(inst->ri_C_common_name,
705 		    strlen(inst->ri_C_common_name) + 1);
706 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
707 	startd_free(inst, sizeof (restarter_inst_t));
708 	return (ENOENT);
709 }
710 
711 static void
712 restarter_delete_inst(restarter_inst_t *ri)
713 {
714 	int id;
715 	restarter_inst_t *rip;
716 	void *cookie = NULL;
717 	restarter_instance_qentry_t *e;
718 
719 	assert(MUTEX_HELD(&ri->ri_lock));
720 
721 	/*
722 	 * Must drop the instance lock so we can pick up the instance_list
723 	 * lock & remove the instance.
724 	 */
725 	id = ri->ri_id;
726 	MUTEX_UNLOCK(&ri->ri_lock);
727 
728 	MUTEX_LOCK(&instance_list.ril_lock);
729 
730 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
731 	if (rip == NULL) {
732 		MUTEX_UNLOCK(&instance_list.ril_lock);
733 		return;
734 	}
735 
736 	assert(ri == rip);
737 
738 	uu_list_remove(instance_list.ril_instance_list, ri);
739 
740 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
741 	    ri->ri_i.i_fmri);
742 
743 	MUTEX_UNLOCK(&instance_list.ril_lock);
744 
745 	/*
746 	 * We can lock the instance without holding the instance_list lock
747 	 * since we removed the instance from the list.
748 	 */
749 	MUTEX_LOCK(&ri->ri_lock);
750 	MUTEX_LOCK(&ri->ri_queue_lock);
751 
752 	if (ri->ri_i.i_primary_ctid >= 1)
753 		contract_hash_remove(ri->ri_i.i_primary_ctid);
754 
755 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
756 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
757 
758 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
759 		startd_free(e, sizeof (*e));
760 	uu_list_destroy(ri->ri_queue);
761 
762 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
763 	startd_free(ri->ri_logstem, PATH_MAX);
764 	if (ri->ri_common_name != NULL)
765 		startd_free(ri->ri_common_name,
766 		    strlen(ri->ri_common_name) + 1);
767 	if (ri->ri_C_common_name != NULL)
768 		startd_free(ri->ri_C_common_name,
769 		    strlen(ri->ri_C_common_name) + 1);
770 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
771 	(void) pthread_mutex_destroy(&ri->ri_lock);
772 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
773 	startd_free(ri, sizeof (restarter_inst_t));
774 }
775 
776 /*
777  * instance_is_wait_style()
778  *
779  *   Returns 1 if the given instance is a "wait-style" service instance.
780  */
781 int
782 instance_is_wait_style(restarter_inst_t *inst)
783 {
784 	assert(MUTEX_HELD(&inst->ri_lock));
785 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
786 }
787 
788 /*
789  * instance_is_transient_style()
790  *
791  *   Returns 1 if the given instance is a transient service instance.
792  */
793 int
794 instance_is_transient_style(restarter_inst_t *inst)
795 {
796 	assert(MUTEX_HELD(&inst->ri_lock));
797 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
798 }
799 
800 /*
801  * instance_in_transition()
802  * Returns 1 if instance is in transition, 0 if not
803  */
804 int
805 instance_in_transition(restarter_inst_t *inst)
806 {
807 	assert(MUTEX_HELD(&inst->ri_lock));
808 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
809 		return (0);
810 	return (1);
811 }
812 
813 /*
814  * returns 1 if instance is already started, 0 if not
815  */
816 static int
817 instance_started(restarter_inst_t *inst)
818 {
819 	int ret;
820 
821 	assert(MUTEX_HELD(&inst->ri_lock));
822 
823 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
824 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
825 		ret = 1;
826 	else
827 		ret = 0;
828 
829 	return (ret);
830 }
831 
832 /*
833  * Returns
834  *   0 - success
835  *   ECONNRESET - success, but h was rebound
836  */
837 int
838 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
839     restarter_instance_state_t new_state,
840     restarter_instance_state_t new_state_next, restarter_error_t err,
841     restarter_str_t reason)
842 {
843 	protocol_states_t *states;
844 	int e;
845 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
846 	boolean_t rebound = B_FALSE;
847 	int prev_state_online;
848 	int state_online;
849 
850 	assert(MUTEX_HELD(&ri->ri_lock));
851 
852 	prev_state_online = instance_started(ri);
853 
854 retry:
855 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
856 	    restarter_get_str_short(reason));
857 	switch (e) {
858 	case 0:
859 		break;
860 
861 	case ENOMEM:
862 		++retry_count;
863 		if (retry_count < ALLOC_RETRY) {
864 			(void) poll(NULL, 0, msecs);
865 			msecs *= ALLOC_DELAY_MULT;
866 			goto retry;
867 		}
868 
869 		/* Like startd_alloc(). */
870 		uu_die("Insufficient memory.\n");
871 		/* NOTREACHED */
872 
873 	case ECONNABORTED:
874 		libscf_handle_rebind(h);
875 		rebound = B_TRUE;
876 		goto retry;
877 
878 	case EPERM:
879 	case EACCES:
880 	case EROFS:
881 		log_error(LOG_NOTICE, "Could not commit state change for %s "
882 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
883 		/* FALLTHROUGH */
884 
885 	case ENOENT:
886 		ri->ri_i.i_state = new_state;
887 		ri->ri_i.i_next_state = new_state_next;
888 		break;
889 
890 	case EINVAL:
891 	default:
892 		bad_error("_restarter_commit_states", e);
893 	}
894 
895 	states = startd_alloc(sizeof (protocol_states_t));
896 	states->ps_state = new_state;
897 	states->ps_state_next = new_state_next;
898 	states->ps_err = err;
899 	states->ps_reason = reason;
900 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
901 	    (void *)states);
902 
903 	state_online = instance_started(ri);
904 
905 	if (prev_state_online && !state_online)
906 		ri->ri_post_offline_hook();
907 	else if (!prev_state_online && state_online)
908 		ri->ri_post_online_hook();
909 
910 	return (rebound ? ECONNRESET : 0);
911 }
912 
913 void
914 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
915 {
916 	restarter_inst_t *inst;
917 
918 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
919 
920 	inst = inst_lookup_by_name(fmri);
921 	if (inst == NULL)
922 		return;
923 
924 	inst->ri_flags |= flag;
925 
926 	MUTEX_UNLOCK(&inst->ri_lock);
927 }
928 
929 static void
930 restarter_take_pending_snapshots(scf_handle_t *h)
931 {
932 	restarter_inst_t *inst;
933 	int r;
934 
935 	MUTEX_LOCK(&instance_list.ril_lock);
936 
937 	for (inst = uu_list_first(instance_list.ril_instance_list);
938 	    inst != NULL;
939 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
940 		const char *fmri;
941 		scf_instance_t *sinst = NULL;
942 
943 		MUTEX_LOCK(&inst->ri_lock);
944 
945 		/*
946 		 * This is where we'd check inst->ri_method_thread and if it
947 		 * were nonzero we'd wait in anticipation of another thread
948 		 * executing a method for inst.  Doing so with the instance_list
949 		 * locked, though, leads to deadlock.  Since taking a snapshot
950 		 * during that window won't hurt anything, we'll just continue.
951 		 */
952 
953 		fmri = inst->ri_i.i_fmri;
954 
955 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
956 			scf_snapshot_t *rsnap;
957 
958 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
959 
960 			rsnap = libscf_get_or_make_running_snapshot(sinst,
961 			    fmri, B_FALSE);
962 
963 			scf_instance_destroy(sinst);
964 
965 			if (rsnap != NULL)
966 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
967 
968 			scf_snapshot_destroy(rsnap);
969 		}
970 
971 		if (inst->ri_flags & RINST_RETAKE_START) {
972 			switch (r = libscf_snapshots_poststart(h, fmri,
973 			    B_FALSE)) {
974 			case 0:
975 			case ENOENT:
976 				inst->ri_flags &= ~RINST_RETAKE_START;
977 				break;
978 
979 			case ECONNABORTED:
980 				break;
981 
982 			case EACCES:
983 			default:
984 				bad_error("libscf_snapshots_poststart", r);
985 			}
986 		}
987 
988 		MUTEX_UNLOCK(&inst->ri_lock);
989 	}
990 
991 	MUTEX_UNLOCK(&instance_list.ril_lock);
992 }
993 
994 /* ARGSUSED */
995 void *
996 restarter_post_fsminimal_thread(void *unused)
997 {
998 	scf_handle_t *h;
999 	int r;
1000 
1001 	h = libscf_handle_create_bound_loop();
1002 
1003 	for (;;) {
1004 		r = libscf_create_self(h);
1005 		if (r == 0)
1006 			break;
1007 
1008 		assert(r == ECONNABORTED);
1009 		libscf_handle_rebind(h);
1010 	}
1011 
1012 	restarter_take_pending_snapshots(h);
1013 
1014 	(void) scf_handle_unbind(h);
1015 	scf_handle_destroy(h);
1016 
1017 	return (NULL);
1018 }
1019 
1020 /*
1021  * int stop_instance()
1022  *
1023  *   Stop the instance identified by the instance given as the second argument,
1024  *   for the cause stated.
1025  *
1026  *   Returns
1027  *     0 - success
1028  *     -1 - inst is in transition
1029  */
1030 static int
1031 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1032     stop_cause_t cause)
1033 {
1034 	fork_info_t *info;
1035 	const char *cp;
1036 	int err;
1037 	restarter_error_t re;
1038 	restarter_str_t	reason;
1039 
1040 	assert(MUTEX_HELD(&inst->ri_lock));
1041 	assert(inst->ri_method_thread == 0);
1042 
1043 	switch (cause) {
1044 	case RSTOP_EXIT:
1045 		re = RERR_RESTART;
1046 		reason = restarter_str_ct_ev_exit;
1047 		cp = "all processes in service exited";
1048 		break;
1049 	case RSTOP_CORE:
1050 		re = RERR_FAULT;
1051 		reason = restarter_str_ct_ev_core;
1052 		cp = "process dumped core";
1053 		break;
1054 	case RSTOP_SIGNAL:
1055 		re = RERR_FAULT;
1056 		reason = restarter_str_ct_ev_signal;
1057 		cp = "process received fatal signal from outside the service";
1058 		break;
1059 	case RSTOP_HWERR:
1060 		re = RERR_FAULT;
1061 		reason = restarter_str_ct_ev_hwerr;
1062 		cp = "process killed due to uncorrectable hardware error";
1063 		break;
1064 	case RSTOP_DEPENDENCY:
1065 		re = RERR_RESTART;
1066 		reason = restarter_str_dependency_activity;
1067 		cp = "dependency activity requires stop";
1068 		break;
1069 	case RSTOP_DISABLE:
1070 		re = RERR_RESTART;
1071 		reason = restarter_str_disable_request;
1072 		cp = "service disabled";
1073 		break;
1074 	case RSTOP_RESTART:
1075 		re = RERR_RESTART;
1076 		reason = restarter_str_restart_request;
1077 		cp = "service restarting";
1078 		break;
1079 	default:
1080 #ifndef NDEBUG
1081 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1082 		    cause, __FILE__, __LINE__);
1083 #endif
1084 		abort();
1085 	}
1086 
1087 	/* Services in the disabled and maintenance state are ignored */
1088 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1089 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1090 		log_framework(LOG_DEBUG,
1091 		    "%s: stop_instance -> is maint/disabled\n",
1092 		    inst->ri_i.i_fmri);
1093 		return (0);
1094 	}
1095 
1096 	/* Already stopped instances are left alone */
1097 	if (instance_started(inst) == 0) {
1098 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1099 		    inst->ri_i.i_fmri);
1100 		return (0);
1101 	}
1102 
1103 	if (instance_in_transition(inst)) {
1104 		/* requeue event by returning -1 */
1105 		log_framework(LOG_DEBUG,
1106 		    "Restarter: Not stopping %s, in transition.\n",
1107 		    inst->ri_i.i_fmri);
1108 		return (-1);
1109 	}
1110 
1111 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1112 
1113 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1114 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1115 
1116 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1117 		/*
1118 		 * No need to stop instance, as child has exited; remove
1119 		 * contract and move the instance to the offline state.
1120 		 */
1121 		switch (err = restarter_instance_update_states(local_handle,
1122 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1123 		    reason)) {
1124 		case 0:
1125 		case ECONNRESET:
1126 			break;
1127 
1128 		default:
1129 			bad_error("restarter_instance_update_states", err);
1130 		}
1131 
1132 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1133 		reset_start_times(inst);
1134 
1135 		if (inst->ri_i.i_primary_ctid != 0) {
1136 			inst->ri_m_inst =
1137 			    safe_scf_instance_create(local_handle);
1138 			inst->ri_mi_deleted = B_FALSE;
1139 
1140 			libscf_reget_instance(inst);
1141 			method_remove_contract(inst, B_TRUE, B_TRUE);
1142 
1143 			scf_instance_destroy(inst->ri_m_inst);
1144 			inst->ri_m_inst = NULL;
1145 		}
1146 
1147 		switch (err = restarter_instance_update_states(local_handle,
1148 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1149 		    reason)) {
1150 		case 0:
1151 		case ECONNRESET:
1152 			break;
1153 
1154 		default:
1155 			bad_error("restarter_instance_update_states", err);
1156 		}
1157 
1158 		return (0);
1159 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1160 		/*
1161 		 * Stopping a wait service through means other than the pid
1162 		 * exiting should keep wait_thread() from restarting the
1163 		 * service, by removing it from the wait list.
1164 		 * We cannot remove it right now otherwise the process will
1165 		 * end up <defunct> so mark it to be ignored.
1166 		 */
1167 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1168 	}
1169 
1170 	switch (err = restarter_instance_update_states(local_handle, inst,
1171 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1172 	    RESTARTER_STATE_DISABLED, RERR_NONE, reason)) {
1173 	case 0:
1174 	case ECONNRESET:
1175 		break;
1176 
1177 	default:
1178 		bad_error("restarter_instance_update_states", err);
1179 	}
1180 
1181 	info = startd_zalloc(sizeof (fork_info_t));
1182 
1183 	info->sf_id = inst->ri_id;
1184 	info->sf_method_type = METHOD_STOP;
1185 	info->sf_event_type = re;
1186 	info->sf_reason = reason;
1187 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1188 
1189 	return (0);
1190 }
1191 
1192 /*
1193  * Returns
1194  *   ENOENT - fmri is not in instance_list
1195  *   0 - success
1196  *   ECONNRESET - success, though handle was rebound
1197  *   -1 - instance is in transition
1198  */
1199 int
1200 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1201 {
1202 	restarter_inst_t *rip;
1203 	int r;
1204 
1205 	rip = inst_lookup_by_name(fmri);
1206 	if (rip == NULL)
1207 		return (ENOENT);
1208 
1209 	r = stop_instance(h, rip, flags);
1210 
1211 	MUTEX_UNLOCK(&rip->ri_lock);
1212 
1213 	return (r);
1214 }
1215 
1216 static void
1217 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1218     unmaint_cause_t cause)
1219 {
1220 	ctid_t ctid;
1221 	scf_instance_t *inst;
1222 	int r;
1223 	uint_t tries = 0, msecs = ALLOC_DELAY;
1224 	const char *cp;
1225 	restarter_str_t	reason;
1226 
1227 	assert(MUTEX_HELD(&rip->ri_lock));
1228 
1229 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1230 		log_error(LOG_DEBUG, "Restarter: "
1231 		    "Ignoring maintenance off command because %s is not in the "
1232 		    "maintenance state.\n", rip->ri_i.i_fmri);
1233 		return;
1234 	}
1235 
1236 	switch (cause) {
1237 	case RUNMAINT_CLEAR:
1238 		cp = "clear requested";
1239 		reason = restarter_str_clear_request;
1240 		break;
1241 	case RUNMAINT_DISABLE:
1242 		cp = "disable requested";
1243 		reason = restarter_str_disable_request;
1244 		break;
1245 	default:
1246 #ifndef NDEBUG
1247 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1248 		    cause, __FILE__, __LINE__);
1249 #endif
1250 		abort();
1251 	}
1252 
1253 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1254 	    cp);
1255 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1256 	    "%s.\n", rip->ri_i.i_fmri, cp);
1257 
1258 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1259 	    RESTARTER_STATE_NONE, RERR_RESTART, reason);
1260 
1261 	/*
1262 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1263 	 * a primary contract.
1264 	 */
1265 	if (rip->ri_i.i_primary_ctid == 0)
1266 		return;
1267 
1268 	ctid = rip->ri_i.i_primary_ctid;
1269 	contract_abandon(ctid);
1270 	rip->ri_i.i_primary_ctid = 0;
1271 
1272 rep_retry:
1273 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1274 	case 0:
1275 		break;
1276 
1277 	case ECONNABORTED:
1278 		libscf_handle_rebind(h);
1279 		goto rep_retry;
1280 
1281 	case ENOENT:
1282 		/* Must have been deleted. */
1283 		return;
1284 
1285 	case EINVAL:
1286 	case ENOTSUP:
1287 	default:
1288 		bad_error("libscf_handle_rebind", r);
1289 	}
1290 
1291 again:
1292 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1293 	switch (r) {
1294 	case 0:
1295 		break;
1296 
1297 	case ENOMEM:
1298 		++tries;
1299 		if (tries < ALLOC_RETRY) {
1300 			(void) poll(NULL, 0, msecs);
1301 			msecs *= ALLOC_DELAY_MULT;
1302 			goto again;
1303 		}
1304 
1305 		uu_die("Insufficient memory.\n");
1306 		/* NOTREACHED */
1307 
1308 	case ECONNABORTED:
1309 		scf_instance_destroy(inst);
1310 		libscf_handle_rebind(h);
1311 		goto rep_retry;
1312 
1313 	case ECANCELED:
1314 		break;
1315 
1316 	case EPERM:
1317 	case EACCES:
1318 	case EROFS:
1319 		log_error(LOG_INFO,
1320 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1321 		    rip->ri_i.i_fmri, strerror(r));
1322 		break;
1323 
1324 	case EINVAL:
1325 	case EBADF:
1326 	default:
1327 		bad_error("restarter_remove_contract", r);
1328 	}
1329 
1330 	scf_instance_destroy(inst);
1331 }
1332 
1333 /*
1334  * enable_inst()
1335  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1336  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1337  *   disabled, move it to offline.  If the event is _DISABLE or
1338  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1339  *
1340  *   Returns
1341  *     0 - success
1342  *     ECONNRESET - h was rebound
1343  */
1344 static int
1345 enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1346     restarter_instance_qentry_t *riq)
1347 {
1348 	restarter_instance_state_t state;
1349 	restarter_event_type_t e = riq->riq_type;
1350 	restarter_str_t reason = restarter_str_per_configuration;
1351 	int r;
1352 
1353 	assert(MUTEX_HELD(&inst->ri_lock));
1354 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1355 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1356 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1357 	assert(instance_in_transition(inst) == 0);
1358 
1359 	state = inst->ri_i.i_state;
1360 
1361 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1362 		inst->ri_i.i_enabled = 1;
1363 
1364 		if (state == RESTARTER_STATE_UNINIT ||
1365 		    state == RESTARTER_STATE_DISABLED) {
1366 			/*
1367 			 * B_FALSE: Don't log an error if the log_instance()
1368 			 * fails because it will fail on the miniroot before
1369 			 * install-discovery runs.
1370 			 */
1371 			log_instance(inst, B_FALSE, "Enabled.");
1372 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1373 			    inst->ri_i.i_fmri);
1374 
1375 			/*
1376 			 * If we are coming from DISABLED, it was obviously an
1377 			 * enable request. If we are coming from UNINIT, it may
1378 			 * have been a sevice in MAINT that was cleared.
1379 			 */
1380 			if (riq->riq_reason == restarter_str_clear_request)
1381 				reason = restarter_str_clear_request;
1382 			else if (state == RESTARTER_STATE_DISABLED)
1383 				reason = restarter_str_enable_request;
1384 			(void) restarter_instance_update_states(h, inst,
1385 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1386 			    RERR_NONE, reason);
1387 		} else {
1388 			log_framework(LOG_DEBUG, "Restarter: "
1389 			    "Not changing state of %s for enable command.\n",
1390 			    inst->ri_i.i_fmri);
1391 		}
1392 	} else {
1393 		inst->ri_i.i_enabled = 0;
1394 
1395 		switch (state) {
1396 		case RESTARTER_STATE_ONLINE:
1397 		case RESTARTER_STATE_DEGRADED:
1398 			r = stop_instance(h, inst, RSTOP_DISABLE);
1399 			return (r == ECONNRESET ? 0 : r);
1400 
1401 		case RESTARTER_STATE_OFFLINE:
1402 		case RESTARTER_STATE_UNINIT:
1403 			if (inst->ri_i.i_primary_ctid != 0) {
1404 				inst->ri_m_inst = safe_scf_instance_create(h);
1405 				inst->ri_mi_deleted = B_FALSE;
1406 
1407 				libscf_reget_instance(inst);
1408 				method_remove_contract(inst, B_TRUE, B_TRUE);
1409 
1410 				scf_instance_destroy(inst->ri_m_inst);
1411 			}
1412 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1413 			log_instance(inst, B_FALSE, "Disabled.");
1414 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1415 			    inst->ri_i.i_fmri);
1416 
1417 			/*
1418 			 * If we are coming from OFFLINE, it was obviously a
1419 			 * disable request. But if we are coming from
1420 			 * UNINIT, it may have been a disable request for a
1421 			 * service in MAINT.
1422 			 */
1423 			if (riq->riq_reason == restarter_str_disable_request ||
1424 			    state == RESTARTER_STATE_OFFLINE)
1425 				reason = restarter_str_disable_request;
1426 			(void) restarter_instance_update_states(h, inst,
1427 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1428 			    RERR_RESTART, reason);
1429 			return (0);
1430 
1431 		case RESTARTER_STATE_DISABLED:
1432 			break;
1433 
1434 		case RESTARTER_STATE_MAINT:
1435 			/*
1436 			 * We only want to pull the instance out of maintenance
1437 			 * if the disable is on adminstrative request.  The
1438 			 * graph engine sends _DISABLE events whenever a
1439 			 * service isn't in the disabled state, and we don't
1440 			 * want to pull the service out of maintenance if,
1441 			 * for example, it is there due to a dependency cycle.
1442 			 */
1443 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1444 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1445 			break;
1446 
1447 		default:
1448 #ifndef NDEBUG
1449 			(void) fprintf(stderr, "Restarter instance %s has "
1450 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1451 #endif
1452 			abort();
1453 		}
1454 	}
1455 
1456 	return (0);
1457 }
1458 
1459 static void
1460 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1461     int32_t reason)
1462 {
1463 	fork_info_t *info;
1464 	restarter_str_t	new_reason;
1465 
1466 	assert(MUTEX_HELD(&inst->ri_lock));
1467 	assert(instance_in_transition(inst) == 0);
1468 	assert(inst->ri_method_thread == 0);
1469 
1470 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1471 	    inst->ri_i.i_fmri);
1472 
1473 	/*
1474 	 * We want to keep the original reason for restarts and clear actions
1475 	 */
1476 	switch (reason) {
1477 	case restarter_str_restart_request:
1478 	case restarter_str_clear_request:
1479 		new_reason = reason;
1480 		break;
1481 	default:
1482 		new_reason = restarter_str_dependencies_satisfied;
1483 	}
1484 
1485 	/* Services in the disabled and maintenance state are ignored */
1486 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1487 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1488 	    inst->ri_i.i_enabled == 0) {
1489 		log_framework(LOG_DEBUG,
1490 		    "%s: start_instance -> is maint/disabled\n",
1491 		    inst->ri_i.i_fmri);
1492 		return;
1493 	}
1494 
1495 	/* Already started instances are left alone */
1496 	if (instance_started(inst) == 1) {
1497 		log_framework(LOG_DEBUG,
1498 		    "%s: start_instance -> is already started\n",
1499 		    inst->ri_i.i_fmri);
1500 		return;
1501 	}
1502 
1503 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1504 
1505 	(void) restarter_instance_update_states(local_handle, inst,
1506 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
1507 
1508 	info = startd_zalloc(sizeof (fork_info_t));
1509 
1510 	info->sf_id = inst->ri_id;
1511 	info->sf_method_type = METHOD_START;
1512 	info->sf_event_type = RERR_NONE;
1513 	info->sf_reason = new_reason;
1514 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1515 }
1516 
1517 static int
1518 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1519 {
1520 	scf_instance_t *inst;
1521 	int ret = 0;
1522 
1523 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1524 		return (-1);
1525 
1526 	ret = restarter_inst_ractions_from_tty(inst);
1527 
1528 	scf_instance_destroy(inst);
1529 	return (ret);
1530 }
1531 
1532 static void
1533 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1534     restarter_str_t reason)
1535 {
1536 	fork_info_t *info;
1537 	scf_instance_t *scf_inst = NULL;
1538 
1539 	assert(MUTEX_HELD(&rip->ri_lock));
1540 	assert(reason != restarter_str_none);
1541 	assert(rip->ri_method_thread == 0);
1542 
1543 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1544 	    restarter_get_str_short(reason));
1545 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1546 	    rip->ri_i.i_fmri, restarter_get_str_short(reason));
1547 
1548 	/* Services in the maintenance state are ignored */
1549 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1550 		log_framework(LOG_DEBUG,
1551 		    "%s: maintain_instance -> is already in maintenance\n",
1552 		    rip->ri_i.i_fmri);
1553 		return;
1554 	}
1555 
1556 	/*
1557 	 * If reason state is restarter_str_service_request and
1558 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1559 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1560 	 */
1561 	if (reason == restarter_str_service_request &&
1562 	    libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
1563 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1564 			if (restarter_inst_set_aux_fmri(scf_inst))
1565 				log_framework(LOG_DEBUG, "%s: "
1566 				    "restarter_inst_set_aux_fmri failed: ",
1567 				    rip->ri_i.i_fmri);
1568 		} else {
1569 			log_framework(LOG_DEBUG, "%s: "
1570 			    "restarter_inst_validate_ractions_aux_fmri "
1571 			    "failed: ", rip->ri_i.i_fmri);
1572 
1573 			if (restarter_inst_reset_aux_fmri(scf_inst))
1574 				log_framework(LOG_DEBUG, "%s: "
1575 				    "restarter_inst_reset_aux_fmri failed: ",
1576 				    rip->ri_i.i_fmri);
1577 		}
1578 		scf_instance_destroy(scf_inst);
1579 	}
1580 
1581 	if (immediate || !instance_started(rip)) {
1582 		if (rip->ri_i.i_primary_ctid != 0) {
1583 			rip->ri_m_inst = safe_scf_instance_create(h);
1584 			rip->ri_mi_deleted = B_FALSE;
1585 
1586 			libscf_reget_instance(rip);
1587 			method_remove_contract(rip, B_TRUE, B_TRUE);
1588 
1589 			scf_instance_destroy(rip->ri_m_inst);
1590 		}
1591 
1592 		(void) restarter_instance_update_states(h, rip,
1593 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1594 		    reason);
1595 		return;
1596 	}
1597 
1598 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1599 	    RESTARTER_STATE_MAINT, RERR_NONE, reason);
1600 
1601 	log_transition(rip, MAINT_REQUESTED);
1602 
1603 	info = startd_zalloc(sizeof (*info));
1604 	info->sf_id = rip->ri_id;
1605 	info->sf_method_type = METHOD_STOP;
1606 	info->sf_event_type = RERR_RESTART;
1607 	info->sf_reason = reason;
1608 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1609 }
1610 
1611 static void
1612 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1613 {
1614 	scf_instance_t *inst;
1615 	scf_snapshot_t *snap;
1616 	fork_info_t *info;
1617 	int r;
1618 
1619 	assert(MUTEX_HELD(&rip->ri_lock));
1620 
1621 	log_instance(rip, B_TRUE, "Rereading configuration.");
1622 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1623 	    rip->ri_i.i_fmri);
1624 
1625 rep_retry:
1626 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1627 	switch (r) {
1628 	case 0:
1629 		break;
1630 
1631 	case ECONNABORTED:
1632 		libscf_handle_rebind(h);
1633 		goto rep_retry;
1634 
1635 	case ENOENT:
1636 		/* Must have been deleted. */
1637 		return;
1638 
1639 	case EINVAL:
1640 	case ENOTSUP:
1641 	default:
1642 		bad_error("libscf_fmri_get_instance", r);
1643 	}
1644 
1645 	snap = libscf_get_running_snapshot(inst);
1646 
1647 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1648 	    &rip->ri_utmpx_prefix);
1649 	switch (r) {
1650 	case 0:
1651 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1652 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1653 		break;
1654 
1655 	case ECONNABORTED:
1656 		scf_instance_destroy(inst);
1657 		scf_snapshot_destroy(snap);
1658 		libscf_handle_rebind(h);
1659 		goto rep_retry;
1660 
1661 	case ECANCELED:
1662 	case ENOENT:
1663 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1664 		break;
1665 
1666 	default:
1667 		bad_error("libscf_get_startd_properties", r);
1668 	}
1669 
1670 	if (instance_started(rip)) {
1671 		/* Refresh does not change the state. */
1672 		(void) restarter_instance_update_states(h, rip,
1673 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1674 		    restarter_str_refresh);
1675 
1676 		info = startd_zalloc(sizeof (*info));
1677 		info->sf_id = rip->ri_id;
1678 		info->sf_method_type = METHOD_REFRESH;
1679 		info->sf_event_type = RERR_REFRESH;
1680 		info->sf_reason = NULL;
1681 
1682 		assert(rip->ri_method_thread == 0);
1683 		rip->ri_method_thread =
1684 		    startd_thread_create(method_thread, info);
1685 	}
1686 
1687 	scf_snapshot_destroy(snap);
1688 	scf_instance_destroy(inst);
1689 }
1690 
1691 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1692 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1693 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1694 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1695 	"INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
1696 };
1697 
1698 /*
1699  * void *restarter_process_events()
1700  *
1701  *   Called in a separate thread to process the events on an instance's
1702  *   queue.  Empties the queue completely, and tries to keep the thread
1703  *   around for a little while after the queue is empty to save on
1704  *   startup costs.
1705  */
1706 static void *
1707 restarter_process_events(void *arg)
1708 {
1709 	scf_handle_t *h;
1710 	restarter_instance_qentry_t *event;
1711 	restarter_inst_t *rip;
1712 	char *fmri = (char *)arg;
1713 	struct timespec to;
1714 
1715 	assert(fmri != NULL);
1716 
1717 	h = libscf_handle_create_bound_loop();
1718 
1719 	/* grab the queue lock */
1720 	rip = inst_lookup_queue(fmri);
1721 	if (rip == NULL)
1722 		goto out;
1723 
1724 again:
1725 
1726 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1727 		restarter_inst_t *inst;
1728 
1729 		/* drop the queue lock */
1730 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1731 
1732 		/*
1733 		 * Grab the inst lock -- this waits until any outstanding
1734 		 * method finishes running.
1735 		 */
1736 		inst = inst_lookup_by_name(fmri);
1737 		if (inst == NULL) {
1738 			/* Getting deleted in the middle isn't an error. */
1739 			goto cont;
1740 		}
1741 
1742 		assert(instance_in_transition(inst) == 0);
1743 
1744 		/* process the event */
1745 		switch (event->riq_type) {
1746 		case RESTARTER_EVENT_TYPE_ENABLE:
1747 		case RESTARTER_EVENT_TYPE_DISABLE:
1748 			(void) enable_inst(h, inst, event);
1749 			break;
1750 
1751 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1752 			if (enable_inst(h, inst, event) == 0)
1753 				reset_start_times(inst);
1754 			break;
1755 
1756 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1757 			restarter_delete_inst(inst);
1758 			inst = NULL;
1759 			goto cont;
1760 
1761 		case RESTARTER_EVENT_TYPE_STOP_RESET:
1762 			reset_start_times(inst);
1763 			/* FALLTHROUGH */
1764 		case RESTARTER_EVENT_TYPE_STOP:
1765 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1766 			break;
1767 
1768 		case RESTARTER_EVENT_TYPE_START:
1769 			start_instance(h, inst, event->riq_reason);
1770 			break;
1771 
1772 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1773 			maintain_instance(h, inst, 0,
1774 			    restarter_str_dependency_cycle);
1775 			break;
1776 
1777 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1778 			maintain_instance(h, inst, 0,
1779 			    restarter_str_invalid_dependency);
1780 			break;
1781 
1782 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1783 			if (event_from_tty(h, inst) == 0)
1784 				maintain_instance(h, inst, 0,
1785 				    restarter_str_service_request);
1786 			else
1787 				maintain_instance(h, inst, 0,
1788 				    restarter_str_administrative_request);
1789 			break;
1790 
1791 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1792 			if (event_from_tty(h, inst) == 0)
1793 				maintain_instance(h, inst, 1,
1794 				    restarter_str_service_request);
1795 			else
1796 				maintain_instance(h, inst, 1,
1797 				    restarter_str_administrative_request);
1798 			break;
1799 
1800 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1801 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1802 			reset_start_times(inst);
1803 			break;
1804 
1805 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1806 			refresh_instance(h, inst);
1807 			break;
1808 
1809 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1810 			log_framework(LOG_WARNING, "Restarter: "
1811 			    "%s command (for %s) unimplemented.\n",
1812 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1813 			break;
1814 
1815 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1816 			if (!instance_started(inst)) {
1817 				log_framework(LOG_DEBUG, "Restarter: "
1818 				    "Not restarting %s; not running.\n",
1819 				    inst->ri_i.i_fmri);
1820 			} else {
1821 				/*
1822 				 * Stop the instance.  If it can be restarted,
1823 				 * the graph engine will send a new event.
1824 				 */
1825 				if (stop_instance(h, inst, RSTOP_RESTART) == 0)
1826 					reset_start_times(inst);
1827 			}
1828 			break;
1829 
1830 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1831 		default:
1832 #ifndef NDEBUG
1833 			uu_warn("%s:%d: Bad restarter event %d.  "
1834 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1835 #endif
1836 			abort();
1837 		}
1838 
1839 		assert(inst != NULL);
1840 		MUTEX_UNLOCK(&inst->ri_lock);
1841 
1842 cont:
1843 		/* grab the queue lock */
1844 		rip = inst_lookup_queue(fmri);
1845 		if (rip == NULL)
1846 			goto out;
1847 
1848 		/* delete the event */
1849 		uu_list_remove(rip->ri_queue, event);
1850 		startd_free(event, sizeof (restarter_instance_qentry_t));
1851 	}
1852 
1853 	assert(rip != NULL);
1854 
1855 	/*
1856 	 * Try to preserve the thread for a little while for future use.
1857 	 */
1858 	to.tv_sec = 3;
1859 	to.tv_nsec = 0;
1860 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1861 	    &rip->ri_queue_lock, &to);
1862 
1863 	if (uu_list_first(rip->ri_queue) != NULL)
1864 		goto again;
1865 
1866 	rip->ri_queue_thread = 0;
1867 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1868 
1869 out:
1870 	(void) scf_handle_unbind(h);
1871 	scf_handle_destroy(h);
1872 	free(fmri);
1873 	return (NULL);
1874 }
1875 
1876 static int
1877 is_admin_event(restarter_event_type_t t) {
1878 
1879 	switch (t) {
1880 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1881 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1882 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1883 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1884 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1885 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1886 		return (1);
1887 	default:
1888 		return (0);
1889 	}
1890 }
1891 
1892 static void
1893 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1894 {
1895 	restarter_instance_qentry_t *qe;
1896 	int r;
1897 
1898 	assert(MUTEX_HELD(&ri->ri_queue_lock));
1899 	assert(!MUTEX_HELD(&ri->ri_lock));
1900 
1901 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1902 	qe->riq_type = e->rpe_type;
1903 	qe->riq_reason = e->rpe_reason;
1904 
1905 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1906 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1907 	assert(r == 0);
1908 }
1909 
1910 /*
1911  * void *restarter_event_thread()
1912  *
1913  *  Handle incoming graph events by placing them on a per-instance
1914  *  queue.  We can't lock the main part of the instance structure, so
1915  *  just modify the seprarately locked event queue portion.
1916  */
1917 /*ARGSUSED*/
1918 static void *
1919 restarter_event_thread(void *unused)
1920 {
1921 	scf_handle_t *h;
1922 
1923 	/*
1924 	 * This is a new thread, and thus, gets its own handle
1925 	 * to the repository.
1926 	 */
1927 	h = libscf_handle_create_bound_loop();
1928 
1929 	MUTEX_LOCK(&ru->restarter_update_lock);
1930 
1931 	/*CONSTCOND*/
1932 	while (1) {
1933 		restarter_protocol_event_t *e;
1934 
1935 		while (ru->restarter_update_wakeup == 0)
1936 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1937 			    &ru->restarter_update_lock);
1938 
1939 		ru->restarter_update_wakeup = 0;
1940 
1941 		while ((e = restarter_event_dequeue()) != NULL) {
1942 			restarter_inst_t *rip;
1943 			char *fmri;
1944 
1945 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1946 
1947 			/*
1948 			 * ADD_INSTANCE is special: there's likely no
1949 			 * instance structure yet, so we need to handle the
1950 			 * addition synchronously.
1951 			 */
1952 			switch (e->rpe_type) {
1953 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1954 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1955 					log_error(LOG_INFO, "Restarter: "
1956 					    "Could not add %s.\n", e->rpe_inst);
1957 
1958 				MUTEX_LOCK(&st->st_load_lock);
1959 				if (--st->st_load_instances == 0)
1960 					(void) pthread_cond_broadcast(
1961 					    &st->st_load_cv);
1962 				MUTEX_UNLOCK(&st->st_load_lock);
1963 
1964 				goto nolookup;
1965 			}
1966 
1967 			/*
1968 			 * Lookup the instance, locking only the event queue.
1969 			 * Can't grab ri_lock here because it might be held
1970 			 * by a long-running method.
1971 			 */
1972 			rip = inst_lookup_queue(e->rpe_inst);
1973 			if (rip == NULL) {
1974 				log_error(LOG_INFO, "Restarter: "
1975 				    "Ignoring %s command for unknown service "
1976 				    "%s.\n", event_names[e->rpe_type],
1977 				    e->rpe_inst);
1978 				goto nolookup;
1979 			}
1980 
1981 			/* Keep ADMIN events from filling up the queue. */
1982 			if (is_admin_event(e->rpe_type) &&
1983 			    uu_list_numnodes(rip->ri_queue) >
1984 			    RINST_QUEUE_THRESHOLD) {
1985 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1986 				log_instance(rip, B_TRUE, "Instance event "
1987 				    "queue overflow.  Dropping administrative "
1988 				    "request.");
1989 				log_framework(LOG_DEBUG, "%s: Instance event "
1990 				    "queue overflow.  Dropping administrative "
1991 				    "request.\n", rip->ri_i.i_fmri);
1992 				goto nolookup;
1993 			}
1994 
1995 			/* Now add the event to the instance queue. */
1996 			restarter_queue_event(rip, e);
1997 
1998 			if (rip->ri_queue_thread == 0) {
1999 				/*
2000 				 * Start a thread if one isn't already
2001 				 * running.
2002 				 */
2003 				fmri = safe_strdup(e->rpe_inst);
2004 				rip->ri_queue_thread =  startd_thread_create(
2005 				    restarter_process_events, (void *)fmri);
2006 			} else {
2007 				/*
2008 				 * Signal the existing thread that there's
2009 				 * a new event.
2010 				 */
2011 				(void) pthread_cond_broadcast(
2012 				    &rip->ri_queue_cv);
2013 			}
2014 
2015 			MUTEX_UNLOCK(&rip->ri_queue_lock);
2016 nolookup:
2017 			restarter_event_release(e);
2018 
2019 			MUTEX_LOCK(&ru->restarter_update_lock);
2020 		}
2021 	}
2022 
2023 	/*
2024 	 * Unreachable for now -- there's currently no graceful cleanup
2025 	 * called on exit().
2026 	 */
2027 	(void) scf_handle_unbind(h);
2028 	scf_handle_destroy(h);
2029 	return (NULL);
2030 }
2031 
2032 static restarter_inst_t *
2033 contract_to_inst(ctid_t ctid)
2034 {
2035 	restarter_inst_t *inst;
2036 	int id;
2037 
2038 	id = lookup_inst_by_contract(ctid);
2039 	if (id == -1)
2040 		return (NULL);
2041 
2042 	inst = inst_lookup_by_id(id);
2043 	if (inst != NULL) {
2044 		/*
2045 		 * Since ri_lock isn't held by the contract id lookup, this
2046 		 * instance may have been restarted and now be in a new
2047 		 * contract, making the old contract no longer valid for this
2048 		 * instance.
2049 		 */
2050 		if (ctid != inst->ri_i.i_primary_ctid) {
2051 			MUTEX_UNLOCK(&inst->ri_lock);
2052 			inst = NULL;
2053 		}
2054 	}
2055 	return (inst);
2056 }
2057 
2058 /*
2059  * void contract_action()
2060  *   Take action on contract events.
2061  */
2062 static void
2063 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
2064     uint32_t type)
2065 {
2066 	const char *fmri = inst->ri_i.i_fmri;
2067 
2068 	assert(MUTEX_HELD(&inst->ri_lock));
2069 
2070 	/*
2071 	 * If startd has stopped this contract, there is no need to
2072 	 * stop it again.
2073 	 */
2074 	if (inst->ri_i.i_primary_ctid > 0 &&
2075 	    inst->ri_i.i_primary_ctid_stopped)
2076 		return;
2077 
2078 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
2079 	    | CT_PR_EV_HWERR)) == 0) {
2080 		/*
2081 		 * There shouldn't be other events, since that's not how we set
2082 		 * the terms. Thus, just log an error and drive on.
2083 		 */
2084 		log_framework(LOG_NOTICE,
2085 		    "%s: contract %ld received unexpected critical event "
2086 		    "(%d)\n", fmri, id, type);
2087 		return;
2088 	}
2089 
2090 	assert(instance_in_transition(inst) == 0);
2091 
2092 	if (instance_is_wait_style(inst)) {
2093 		/*
2094 		 * We ignore all events; if they impact the
2095 		 * process we're monitoring, then the
2096 		 * wait_thread will stop the instance.
2097 		 */
2098 		log_framework(LOG_DEBUG,
2099 		    "%s: ignoring contract event on wait-style service\n",
2100 		    fmri);
2101 	} else {
2102 		/*
2103 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
2104 		 */
2105 		switch (type) {
2106 		case CT_PR_EV_EMPTY:
2107 			(void) stop_instance(h, inst, RSTOP_EXIT);
2108 			break;
2109 		case CT_PR_EV_CORE:
2110 			(void) stop_instance(h, inst, RSTOP_CORE);
2111 			break;
2112 		case CT_PR_EV_SIGNAL:
2113 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
2114 			break;
2115 		case CT_PR_EV_HWERR:
2116 			(void) stop_instance(h, inst, RSTOP_HWERR);
2117 			break;
2118 		}
2119 	}
2120 }
2121 
2122 /*
2123  * void *restarter_contract_event_thread(void *)
2124  *   Listens to the process contract bundle for critical events, taking action
2125  *   on events from contracts we know we are responsible for.
2126  */
2127 /*ARGSUSED*/
2128 static void *
2129 restarter_contracts_event_thread(void *unused)
2130 {
2131 	int fd, err;
2132 	scf_handle_t *local_handle;
2133 
2134 	/*
2135 	 * Await graph load completion.  That is, stop here, until we've scanned
2136 	 * the repository for contract - instance associations.
2137 	 */
2138 	MUTEX_LOCK(&st->st_load_lock);
2139 	while (!(st->st_load_complete && st->st_load_instances == 0))
2140 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2141 	MUTEX_UNLOCK(&st->st_load_lock);
2142 
2143 	/*
2144 	 * This is a new thread, and thus, gets its own handle
2145 	 * to the repository.
2146 	 */
2147 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2148 		uu_die("Unable to bind a new repository handle: %s\n",
2149 		    scf_strerror(scf_error()));
2150 
2151 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2152 	if (fd == -1)
2153 		uu_die("process bundle open failed");
2154 
2155 	/*
2156 	 * Make sure we get all events (including those generated by configd
2157 	 * before this thread was started).
2158 	 */
2159 	err = ct_event_reset(fd);
2160 	assert(err == 0);
2161 
2162 	for (;;) {
2163 		int efd, sfd;
2164 		ct_evthdl_t ev;
2165 		uint32_t type;
2166 		ctevid_t evid;
2167 		ct_stathdl_t status;
2168 		ctid_t ctid;
2169 		restarter_inst_t *inst;
2170 		uint64_t cookie;
2171 
2172 		if (err = ct_event_read_critical(fd, &ev)) {
2173 			log_error(LOG_WARNING,
2174 			    "Error reading next contract event: %s",
2175 			    strerror(err));
2176 			continue;
2177 		}
2178 
2179 		evid = ct_event_get_evid(ev);
2180 		ctid = ct_event_get_ctid(ev);
2181 		type = ct_event_get_type(ev);
2182 
2183 		/* Fetch cookie. */
2184 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2185 		    < 0) {
2186 			ct_event_free(ev);
2187 			continue;
2188 		}
2189 
2190 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2191 			log_framework(LOG_WARNING, "Could not get status for "
2192 			    "contract %ld: %s\n", ctid, strerror(err));
2193 
2194 			startd_close(sfd);
2195 			ct_event_free(ev);
2196 			continue;
2197 		}
2198 
2199 		cookie = ct_status_get_cookie(status);
2200 
2201 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2202 		    "cookie %lld\n", type, ctid, cookie);
2203 
2204 		ct_status_free(status);
2205 
2206 		startd_close(sfd);
2207 
2208 		/*
2209 		 * svc.configd(1M) restart handling performed by the
2210 		 * fork_configd_thread.  We don't acknowledge, as that thread
2211 		 * will do so.
2212 		 */
2213 		if (cookie == CONFIGD_COOKIE) {
2214 			ct_event_free(ev);
2215 			continue;
2216 		}
2217 
2218 		inst = NULL;
2219 		if (storing_contract != 0 &&
2220 		    (inst = contract_to_inst(ctid)) == NULL) {
2221 			/*
2222 			 * This can happen for two reasons:
2223 			 * - method_run() has not yet stored the
2224 			 *    the contract into the internal hash table.
2225 			 * - we receive an EMPTY event for an abandoned
2226 			 *    contract.
2227 			 * If there is any contract in the process of
2228 			 * being stored into the hash table then re-read
2229 			 * the event later.
2230 			 */
2231 			log_framework(LOG_DEBUG,
2232 			    "Reset event %d for unknown "
2233 			    "contract id %ld\n", type, ctid);
2234 
2235 			/* don't go too fast */
2236 			(void) poll(NULL, 0, 100);
2237 
2238 			(void) ct_event_reset(fd);
2239 			ct_event_free(ev);
2240 			continue;
2241 		}
2242 
2243 		/*
2244 		 * Do not call contract_to_inst() again if first
2245 		 * call succeeded.
2246 		 */
2247 		if (inst == NULL)
2248 			inst = contract_to_inst(ctid);
2249 		if (inst == NULL) {
2250 			/*
2251 			 * This can happen if we receive an EMPTY
2252 			 * event for an abandoned contract.
2253 			 */
2254 			log_framework(LOG_DEBUG,
2255 			    "Received event %d for unknown contract id "
2256 			    "%ld\n", type, ctid);
2257 		} else {
2258 			log_framework(LOG_DEBUG,
2259 			    "Received event %d for contract id "
2260 			    "%ld (%s)\n", type, ctid,
2261 			    inst->ri_i.i_fmri);
2262 
2263 			contract_action(local_handle, inst, ctid, type);
2264 
2265 			MUTEX_UNLOCK(&inst->ri_lock);
2266 		}
2267 
2268 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2269 		    O_WRONLY);
2270 		if (efd != -1) {
2271 			(void) ct_ctl_ack(efd, evid);
2272 			startd_close(efd);
2273 		}
2274 
2275 		ct_event_free(ev);
2276 
2277 	}
2278 
2279 	/*NOTREACHED*/
2280 	return (NULL);
2281 }
2282 
2283 /*
2284  * Timeout queue, processed by restarter_timeouts_event_thread().
2285  */
2286 timeout_queue_t *timeouts;
2287 static uu_list_pool_t *timeout_pool;
2288 
2289 typedef struct timeout_update {
2290 	pthread_mutex_t		tu_lock;
2291 	pthread_cond_t		tu_cv;
2292 	int			tu_wakeup;
2293 } timeout_update_t;
2294 
2295 timeout_update_t *tu;
2296 
2297 static const char *timeout_ovr_svcs[] = {
2298 	"svc:/system/manifest-import:default",
2299 	"svc:/network/initial:default",
2300 	"svc:/network/service:default",
2301 	"svc:/system/rmtmpfiles:default",
2302 	"svc:/network/loopback:default",
2303 	"svc:/network/physical:default",
2304 	"svc:/system/device/local:default",
2305 	"svc:/system/metainit:default",
2306 	"svc:/system/filesystem/usr:default",
2307 	"svc:/system/filesystem/minimal:default",
2308 	"svc:/system/filesystem/local:default",
2309 	NULL
2310 };
2311 
2312 int
2313 is_timeout_ovr(restarter_inst_t *inst)
2314 {
2315 	int i;
2316 
2317 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2318 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2319 			log_instance(inst, B_TRUE, "Timeout override by "
2320 			    "svc.startd.  Using infinite timeout.");
2321 			return (1);
2322 		}
2323 	}
2324 
2325 	return (0);
2326 }
2327 
2328 /*ARGSUSED*/
2329 static int
2330 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2331 {
2332 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2333 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2334 
2335 	if (t1 > t2)
2336 		return (1);
2337 	else if (t1 < t2)
2338 		return (-1);
2339 	return (0);
2340 }
2341 
2342 void
2343 timeout_init()
2344 {
2345 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2346 
2347 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2348 
2349 	timeout_pool = startd_list_pool_create("timeouts",
2350 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2351 	    timeout_compare, UU_LIST_POOL_DEBUG);
2352 	assert(timeout_pool != NULL);
2353 
2354 	timeouts->tq_list = startd_list_create(timeout_pool,
2355 	    timeouts, UU_LIST_SORTED);
2356 	assert(timeouts->tq_list != NULL);
2357 
2358 	tu = startd_zalloc(sizeof (timeout_update_t));
2359 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2360 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2361 }
2362 
2363 void
2364 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2365 {
2366 	hrtime_t now, timeout;
2367 	timeout_entry_t *entry;
2368 	uu_list_index_t idx;
2369 
2370 	assert(MUTEX_HELD(&inst->ri_lock));
2371 
2372 	now = gethrtime();
2373 
2374 	/*
2375 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2376 	 * just return.
2377 	 */
2378 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2379 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2380 		    "treating as infinite.");
2381 		return;
2382 	}
2383 
2384 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2385 	timeout = now + (timeout_sec * 1000000000LL);
2386 
2387 	entry = startd_alloc(sizeof (timeout_entry_t));
2388 	entry->te_timeout = timeout;
2389 	entry->te_ctid = cid;
2390 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2391 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2392 	entry->te_fired = 0;
2393 	/* Insert the calculated timeout time onto the queue. */
2394 	MUTEX_LOCK(&timeouts->tq_lock);
2395 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2396 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2397 	uu_list_insert(timeouts->tq_list, entry, idx);
2398 	MUTEX_UNLOCK(&timeouts->tq_lock);
2399 
2400 	assert(inst->ri_timeout == NULL);
2401 	inst->ri_timeout = entry;
2402 
2403 	MUTEX_LOCK(&tu->tu_lock);
2404 	tu->tu_wakeup = 1;
2405 	(void) pthread_cond_broadcast(&tu->tu_cv);
2406 	MUTEX_UNLOCK(&tu->tu_lock);
2407 }
2408 
2409 
2410 void
2411 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2412 {
2413 	assert(MUTEX_HELD(&inst->ri_lock));
2414 
2415 	if (inst->ri_timeout == NULL)
2416 		return;
2417 
2418 	assert(inst->ri_timeout->te_ctid == cid);
2419 
2420 	MUTEX_LOCK(&timeouts->tq_lock);
2421 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2422 	MUTEX_UNLOCK(&timeouts->tq_lock);
2423 
2424 	free(inst->ri_timeout->te_fmri);
2425 	free(inst->ri_timeout->te_logstem);
2426 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2427 	inst->ri_timeout = NULL;
2428 }
2429 
2430 static int
2431 timeout_now()
2432 {
2433 	timeout_entry_t *e;
2434 	hrtime_t now;
2435 	int ret;
2436 
2437 	now = gethrtime();
2438 
2439 	/*
2440 	 * Walk through the (sorted) timeouts list.  While the timeout
2441 	 * at the head of the list is <= the current time, kill the
2442 	 * method.
2443 	 */
2444 	MUTEX_LOCK(&timeouts->tq_lock);
2445 
2446 	for (e = uu_list_first(timeouts->tq_list);
2447 	    e != NULL && e->te_timeout <= now;
2448 	    e = uu_list_next(timeouts->tq_list, e)) {
2449 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2450 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2451 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2452 		    "Method or service exit timed out.  Killing contract %ld.",
2453 		    e->te_ctid);
2454 		e->te_fired = 1;
2455 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2456 	}
2457 
2458 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2459 		ret = 0;
2460 	else
2461 		ret = -1;
2462 
2463 	MUTEX_UNLOCK(&timeouts->tq_lock);
2464 
2465 	return (ret);
2466 }
2467 
2468 /*
2469  * void *restarter_timeouts_event_thread(void *)
2470  *   Responsible for monitoring the method timeouts.  This thread must
2471  *   be started before any methods are called.
2472  */
2473 /*ARGSUSED*/
2474 static void *
2475 restarter_timeouts_event_thread(void *unused)
2476 {
2477 	/*
2478 	 * Timeouts are entered on a priority queue, which is processed by
2479 	 * this thread.  As timeouts are specified in seconds, we'll do
2480 	 * the necessary processing every second, as long as the queue
2481 	 * is not empty.
2482 	 */
2483 
2484 	/*CONSTCOND*/
2485 	while (1) {
2486 		/*
2487 		 * As long as the timeout list isn't empty, process it
2488 		 * every second.
2489 		 */
2490 		if (timeout_now() == 0) {
2491 			(void) sleep(1);
2492 			continue;
2493 		}
2494 
2495 		/* The list is empty, wait until we have more timeouts. */
2496 		MUTEX_LOCK(&tu->tu_lock);
2497 
2498 		while (tu->tu_wakeup == 0)
2499 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2500 
2501 		tu->tu_wakeup = 0;
2502 		MUTEX_UNLOCK(&tu->tu_lock);
2503 	}
2504 
2505 	return (NULL);
2506 }
2507 
2508 void
2509 restarter_start()
2510 {
2511 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2512 	(void) startd_thread_create(restarter_event_thread, NULL);
2513 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2514 	(void) startd_thread_create(wait_thread, NULL);
2515 }
2516 
2517 
2518 void
2519 restarter_init()
2520 {
2521 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2522 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2523 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2524 	(void) memset(&instance_list, 0, sizeof (instance_list));
2525 
2526 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2527 	instance_list.ril_instance_list = startd_list_create(
2528 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2529 
2530 	restarter_queue_pool = startd_list_pool_create(
2531 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2532 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2533 	    UU_LIST_POOL_DEBUG);
2534 
2535 	contract_list_pool = startd_list_pool_create(
2536 	    "contract_list", sizeof (contract_entry_t),
2537 	    offsetof(contract_entry_t,  ce_link), NULL,
2538 	    UU_LIST_POOL_DEBUG);
2539 	contract_hash_init();
2540 
2541 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2542 }
2543