xref: /illumos-gate/usr/src/cmd/svc/startd/restarter.c (revision 53f3aea0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * restarter.c - service manipulation
29  *
30  * This component manages services whose restarter is svc.startd, the standard
31  * restarter.  It translates restarter protocol events from the graph engine
32  * into actions on processes, as a delegated restarter would do.
33  *
34  * The master restarter manages a number of always-running threads:
35  *   - restarter event thread: events from the graph engine
36  *   - timeout thread: thread to fire queued timeouts
37  *   - contract thread: thread to handle contract events
38  *   - wait thread: thread to handle wait-based services
39  *
40  * The other threads are created as-needed:
41  *   - per-instance method threads
42  *   - per-instance event processing threads
43  *
44  * The interaction of all threads must result in the following conditions
45  * being satisfied (on a per-instance basis):
46  *   - restarter events must be processed in order
47  *   - method execution must be serialized
48  *   - instance delete must be held until outstanding methods are complete
49  *   - contract events shouldn't be processed while a method is running
50  *   - timeouts should fire even when a method is running
51  *
52  * Service instances are represented by restarter_inst_t's and are kept in the
53  * instance_list list.
54  *
55  * Service States
56  *   The current state of a service instance is kept in
57  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
58  *   some time, then before we effect the transition we set
59  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
60  *   rotate i_next_state to i_state and set i_next_state to
61  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
62  *   held.  The exception is when we launch methods, which are done with
63  *   a separate thread.  To keep any other threads from grabbing ri_lock before
64  *   method_thread() does, we set ri_method_thread to the thread id of the
65  *   method thread, and when it is nonzero any thread with a different thread id
66  *   waits on ri_method_cv.
67  *
68  * Method execution is serialized by blocking on ri_method_cv in
69  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
70  * also prevents the instance structure from being deleted until all
71  * outstanding operations such as method_thread() have finished.
72  *
73  * Lock ordering:
74  *
75  * dgraph_lock [can be held when taking:]
76  *   utmpx_lock
77  *   dictionary->dict_lock
78  *   st->st_load_lock
79  *   wait_info_lock
80  *   ru->restarter_update_lock
81  *     restarter_queue->rpeq_lock
82  *   instance_list.ril_lock
83  *     inst->ri_lock
84  *   st->st_configd_live_lock
85  *
86  * instance_list.ril_lock
87  *   graph_queue->gpeq_lock
88  *   gu->gu_lock
89  *   st->st_configd_live_lock
90  *   dictionary->dict_lock
91  *   inst->ri_lock
92  *     graph_queue->gpeq_lock
93  *     gu->gu_lock
94  *     tu->tu_lock
95  *     tq->tq_lock
96  *     inst->ri_queue_lock
97  *       wait_info_lock
98  *       bp->cb_lock
99  *     utmpx_lock
100  *
101  * single_user_thread_lock
102  *   wait_info_lock
103  *   utmpx_lock
104  *
105  * gu_freeze_lock
106  *
107  * logbuf_mutex nests inside pretty much everything.
108  */
109 
110 #include <sys/contract/process.h>
111 #include <sys/ctfs.h>
112 #include <sys/stat.h>
113 #include <sys/time.h>
114 #include <sys/types.h>
115 #include <sys/uio.h>
116 #include <sys/wait.h>
117 #include <assert.h>
118 #include <errno.h>
119 #include <fcntl.h>
120 #include <libcontract.h>
121 #include <libcontract_priv.h>
122 #include <libintl.h>
123 #include <librestart.h>
124 #include <librestart_priv.h>
125 #include <libuutil.h>
126 #include <limits.h>
127 #include <poll.h>
128 #include <port.h>
129 #include <pthread.h>
130 #include <stdarg.h>
131 #include <stdio.h>
132 #include <strings.h>
133 #include <unistd.h>
134 
135 #include "startd.h"
136 #include "protocol.h"
137 
138 static uu_list_pool_t *restarter_instance_pool;
139 static restarter_instance_list_t instance_list;
140 
141 static uu_list_pool_t *restarter_queue_pool;
142 
143 /*ARGSUSED*/
144 static int
145 restarter_instance_compare(const void *lc_arg, const void *rc_arg,
146     void *private)
147 {
148 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
149 	int rc_id = *(int *)rc_arg;
150 
151 	if (lc_id > rc_id)
152 		return (1);
153 	if (lc_id < rc_id)
154 		return (-1);
155 	return (0);
156 }
157 
158 static restarter_inst_t *
159 inst_lookup_by_name(const char *name)
160 {
161 	int id;
162 
163 	id = dict_lookup_byname(name);
164 	if (id == -1)
165 		return (NULL);
166 
167 	return (inst_lookup_by_id(id));
168 }
169 
170 restarter_inst_t *
171 inst_lookup_by_id(int id)
172 {
173 	restarter_inst_t *inst;
174 
175 	MUTEX_LOCK(&instance_list.ril_lock);
176 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
177 	if (inst != NULL)
178 		MUTEX_LOCK(&inst->ri_lock);
179 	MUTEX_UNLOCK(&instance_list.ril_lock);
180 
181 	if (inst != NULL) {
182 		while (inst->ri_method_thread != 0 &&
183 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
184 			++inst->ri_method_waiters;
185 			(void) pthread_cond_wait(&inst->ri_method_cv,
186 			    &inst->ri_lock);
187 			assert(inst->ri_method_waiters > 0);
188 			--inst->ri_method_waiters;
189 		}
190 	}
191 
192 	return (inst);
193 }
194 
195 static restarter_inst_t *
196 inst_lookup_queue(const char *name)
197 {
198 	int id;
199 	restarter_inst_t *inst;
200 
201 	id = dict_lookup_byname(name);
202 	if (id == -1)
203 		return (NULL);
204 
205 	MUTEX_LOCK(&instance_list.ril_lock);
206 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
207 	if (inst != NULL)
208 		MUTEX_LOCK(&inst->ri_queue_lock);
209 	MUTEX_UNLOCK(&instance_list.ril_lock);
210 
211 	return (inst);
212 }
213 
214 const char *
215 service_style(int flags)
216 {
217 	switch (flags & RINST_STYLE_MASK) {
218 	case RINST_CONTRACT:	return ("contract");
219 	case RINST_TRANSIENT:	return ("transient");
220 	case RINST_WAIT:	return ("wait");
221 
222 	default:
223 #ifndef NDEBUG
224 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
225 #endif
226 		abort();
227 		/* NOTREACHED */
228 	}
229 }
230 
231 /*
232  * Fails with ECONNABORTED or ECANCELED.
233  */
234 static int
235 check_contract(restarter_inst_t *inst, boolean_t primary,
236     scf_instance_t *scf_inst)
237 {
238 	ctid_t *ctidp;
239 	int fd, r;
240 
241 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
242 	    &inst->ri_i.i_transient_ctid;
243 
244 	assert(*ctidp >= 1);
245 
246 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
247 	if (fd >= 0) {
248 		r = close(fd);
249 		assert(r == 0);
250 		return (0);
251 	}
252 
253 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
254 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
255 	switch (r) {
256 	case 0:
257 	case ECONNABORTED:
258 	case ECANCELED:
259 		*ctidp = 0;
260 		return (r);
261 
262 	case ENOMEM:
263 		uu_die("Out of memory\n");
264 		/* NOTREACHED */
265 
266 	case EPERM:
267 		uu_die("Insufficient privilege.\n");
268 		/* NOTREACHED */
269 
270 	case EACCES:
271 		uu_die("Repository backend access denied.\n");
272 		/* NOTREACHED */
273 
274 	case EROFS:
275 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
276 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
277 		return (0);
278 
279 	case EINVAL:
280 	case EBADF:
281 	default:
282 		assert(0);
283 		abort();
284 		/* NOTREACHED */
285 	}
286 }
287 
288 static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
289 
290 /*
291  * int restarter_insert_inst(scf_handle_t *, char *)
292  *   If the inst is already in the restarter list, return its id.  If the inst
293  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
294  *   states, insert it into the list, and return 0.
295  *
296  *   Fails with
297  *     ENOENT - name is not in the repository
298  */
299 static int
300 restarter_insert_inst(scf_handle_t *h, const char *name)
301 {
302 	int id, r;
303 	restarter_inst_t *inst;
304 	uu_list_index_t idx;
305 	scf_service_t *scf_svc;
306 	scf_instance_t *scf_inst;
307 	scf_snapshot_t *snap = NULL;
308 	scf_propertygroup_t *pg;
309 	char *svc_name, *inst_name;
310 	char logfilebuf[PATH_MAX];
311 	char *c;
312 	boolean_t do_commit_states;
313 	restarter_instance_state_t state, next_state;
314 	protocol_states_t *ps;
315 	pid_t start_pid;
316 
317 	MUTEX_LOCK(&instance_list.ril_lock);
318 
319 	/*
320 	 * We don't use inst_lookup_by_name() here because we want the lookup
321 	 * & insert to be atomic.
322 	 */
323 	id = dict_lookup_byname(name);
324 	if (id != -1) {
325 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
326 		    &idx);
327 		if (inst != NULL) {
328 			MUTEX_UNLOCK(&instance_list.ril_lock);
329 			return (0);
330 		}
331 	}
332 
333 	/* Allocate an instance */
334 	inst = startd_zalloc(sizeof (restarter_inst_t));
335 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
336 	inst->ri_utmpx_prefix[0] = '\0';
337 
338 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
339 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
340 
341 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
342 
343 	/*
344 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
345 	 * just in case.
346 	 */
347 	inst->ri_id = (id != -1 ? id : dict_insert(name));
348 
349 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
350 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
351 
352 	scf_svc = safe_scf_service_create(h);
353 	scf_inst = safe_scf_instance_create(h);
354 	pg = safe_scf_pg_create(h);
355 	svc_name = startd_alloc(max_scf_name_size);
356 	inst_name = startd_alloc(max_scf_name_size);
357 
358 rep_retry:
359 	if (snap != NULL)
360 		scf_snapshot_destroy(snap);
361 	if (inst->ri_logstem != NULL)
362 		startd_free(inst->ri_logstem, PATH_MAX);
363 	if (inst->ri_common_name != NULL)
364 		startd_free(inst->ri_common_name, max_scf_value_size);
365 	if (inst->ri_C_common_name != NULL)
366 		startd_free(inst->ri_C_common_name, max_scf_value_size);
367 	snap = NULL;
368 	inst->ri_logstem = NULL;
369 	inst->ri_common_name = NULL;
370 	inst->ri_C_common_name = NULL;
371 
372 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
373 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
374 		switch (scf_error()) {
375 		case SCF_ERROR_CONNECTION_BROKEN:
376 			libscf_handle_rebind(h);
377 			goto rep_retry;
378 
379 		case SCF_ERROR_NOT_FOUND:
380 			goto deleted;
381 		}
382 
383 		uu_die("Can't decode FMRI %s: %s\n", name,
384 		    scf_strerror(scf_error()));
385 	}
386 
387 	/*
388 	 * If there's no running snapshot, then we execute using the editing
389 	 * snapshot.  Pending snapshots will be taken later.
390 	 */
391 	snap = libscf_get_running_snapshot(scf_inst);
392 
393 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
394 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
395 	    0)) {
396 		switch (scf_error()) {
397 		case SCF_ERROR_NOT_SET:
398 			break;
399 
400 		case SCF_ERROR_CONNECTION_BROKEN:
401 			libscf_handle_rebind(h);
402 			goto rep_retry;
403 
404 		default:
405 			assert(0);
406 			abort();
407 		}
408 
409 		goto deleted;
410 	}
411 
412 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
413 	for (c = logfilebuf; *c != '\0'; c++)
414 		if (*c == '/')
415 			*c = '-';
416 
417 	inst->ri_logstem = startd_alloc(PATH_MAX);
418 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
419 	    LOG_SUFFIX);
420 
421 	/*
422 	 * If the restarter group is missing, use uninit/none.  Otherwise,
423 	 * we're probably being restarted & don't want to mess up the states
424 	 * that are there.
425 	 */
426 	state = RESTARTER_STATE_UNINIT;
427 	next_state = RESTARTER_STATE_NONE;
428 
429 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
430 	if (r != 0) {
431 		switch (scf_error()) {
432 		case SCF_ERROR_CONNECTION_BROKEN:
433 			libscf_handle_rebind(h);
434 			goto rep_retry;
435 
436 		case SCF_ERROR_NOT_SET:
437 			goto deleted;
438 
439 		case SCF_ERROR_NOT_FOUND:
440 			/*
441 			 * This shouldn't happen since the graph engine should
442 			 * have initialized the state to uninitialized/none if
443 			 * there was no restarter pg.  In case somebody
444 			 * deleted it, though....
445 			 */
446 			do_commit_states = B_TRUE;
447 			break;
448 
449 		default:
450 			assert(0);
451 			abort();
452 		}
453 	} else {
454 		r = libscf_read_states(pg, &state, &next_state);
455 		if (r != 0) {
456 			do_commit_states = B_TRUE;
457 		} else {
458 			if (next_state != RESTARTER_STATE_NONE) {
459 				/*
460 				 * Force next_state to _NONE since we
461 				 * don't look for method processes.
462 				 */
463 				next_state = RESTARTER_STATE_NONE;
464 				do_commit_states = B_TRUE;
465 			} else {
466 				/*
467 				 * Inform the restarter of our state without
468 				 * changing the STIME in the repository.
469 				 */
470 				ps = startd_alloc(sizeof (*ps));
471 				inst->ri_i.i_state = ps->ps_state = state;
472 				inst->ri_i.i_next_state = ps->ps_state_next =
473 				    next_state;
474 
475 				graph_protocol_send_event(inst->ri_i.i_fmri,
476 				    GRAPH_UPDATE_STATE_CHANGE, ps);
477 
478 				do_commit_states = B_FALSE;
479 			}
480 		}
481 	}
482 
483 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
484 	    &inst->ri_utmpx_prefix)) {
485 	case 0:
486 		break;
487 
488 	case ECONNABORTED:
489 		libscf_handle_rebind(h);
490 		goto rep_retry;
491 
492 	case ECANCELED:
493 		goto deleted;
494 
495 	case ENOENT:
496 		/*
497 		 * This is odd, because the graph engine should have required
498 		 * the general property group.  So we'll just use default
499 		 * flags in anticipation of the graph engine sending us
500 		 * REMOVE_INSTANCE when it finds out that the general property
501 		 * group has been deleted.
502 		 */
503 		inst->ri_flags = RINST_CONTRACT;
504 		break;
505 
506 	default:
507 		assert(0);
508 		abort();
509 	}
510 
511 	switch (libscf_get_template_values(scf_inst, snap,
512 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
513 	case 0:
514 		break;
515 
516 	case ECONNABORTED:
517 		libscf_handle_rebind(h);
518 		goto rep_retry;
519 
520 	case ECANCELED:
521 		goto deleted;
522 
523 	case ECHILD:
524 	case ENOENT:
525 		break;
526 
527 	default:
528 		assert(0);
529 		abort();
530 	}
531 
532 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
533 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
534 	    &start_pid)) {
535 	case 0:
536 		break;
537 
538 	case ECONNABORTED:
539 		libscf_handle_rebind(h);
540 		goto rep_retry;
541 
542 	case ECANCELED:
543 		goto deleted;
544 
545 	default:
546 		assert(0);
547 		abort();
548 	}
549 
550 	if (inst->ri_i.i_primary_ctid >= 1) {
551 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
552 
553 		switch (check_contract(inst, B_TRUE, scf_inst)) {
554 		case 0:
555 			break;
556 
557 		case ECONNABORTED:
558 			libscf_handle_rebind(h);
559 			goto rep_retry;
560 
561 		case ECANCELED:
562 			goto deleted;
563 
564 		default:
565 			assert(0);
566 			abort();
567 		}
568 	}
569 
570 	if (inst->ri_i.i_transient_ctid >= 1) {
571 		switch (check_contract(inst, B_FALSE, scf_inst)) {
572 		case 0:
573 			break;
574 
575 		case ECONNABORTED:
576 			libscf_handle_rebind(h);
577 			goto rep_retry;
578 
579 		case ECANCELED:
580 			goto deleted;
581 
582 		default:
583 			assert(0);
584 			abort();
585 		}
586 	}
587 
588 	/* No more failures we live through, so add it to the list. */
589 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
590 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
591 	MUTEX_LOCK(&inst->ri_lock);
592 	MUTEX_LOCK(&inst->ri_queue_lock);
593 
594 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
595 
596 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
597 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
598 	MUTEX_UNLOCK(&instance_list.ril_lock);
599 
600 	if (start_pid != -1 &&
601 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
602 		int ret;
603 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
604 		if (ret == -1) {
605 			/*
606 			 * Implication:  if we can't reregister the
607 			 * instance, we will start another one.  Two
608 			 * instances may or may not result in a resource
609 			 * conflict.
610 			 */
611 			log_error(LOG_WARNING,
612 			    "%s: couldn't reregister %ld for wait\n",
613 			    inst->ri_i.i_fmri, start_pid);
614 		} else if (ret == 1) {
615 			/*
616 			 * Leading PID has exited.
617 			 */
618 			(void) stop_instance(h, inst, RSTOP_EXIT);
619 		}
620 	}
621 
622 
623 	scf_pg_destroy(pg);
624 
625 	if (do_commit_states)
626 		(void) restarter_instance_update_states(h, inst, state,
627 		    next_state, RERR_NONE, NULL);
628 
629 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
630 	    service_style(inst->ri_flags));
631 
632 	MUTEX_UNLOCK(&inst->ri_queue_lock);
633 	MUTEX_UNLOCK(&inst->ri_lock);
634 
635 	startd_free(svc_name, max_scf_name_size);
636 	startd_free(inst_name, max_scf_name_size);
637 	scf_snapshot_destroy(snap);
638 	scf_instance_destroy(scf_inst);
639 	scf_service_destroy(scf_svc);
640 
641 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
642 	    name);
643 
644 	return (0);
645 
646 deleted:
647 	MUTEX_UNLOCK(&instance_list.ril_lock);
648 	startd_free(inst_name, max_scf_name_size);
649 	startd_free(svc_name, max_scf_name_size);
650 	if (snap != NULL)
651 		scf_snapshot_destroy(snap);
652 	scf_pg_destroy(pg);
653 	scf_instance_destroy(scf_inst);
654 	scf_service_destroy(scf_svc);
655 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
656 	uu_list_destroy(inst->ri_queue);
657 	if (inst->ri_logstem != NULL)
658 		startd_free(inst->ri_logstem, PATH_MAX);
659 	if (inst->ri_common_name != NULL)
660 		startd_free(inst->ri_common_name, max_scf_value_size);
661 	if (inst->ri_C_common_name != NULL)
662 		startd_free(inst->ri_C_common_name, max_scf_value_size);
663 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
664 	startd_free(inst, sizeof (restarter_inst_t));
665 	return (ENOENT);
666 }
667 
668 static void
669 restarter_delete_inst(restarter_inst_t *ri)
670 {
671 	int id;
672 	restarter_inst_t *rip;
673 	void *cookie = NULL;
674 	restarter_instance_qentry_t *e;
675 
676 	assert(MUTEX_HELD(&ri->ri_lock));
677 
678 	/*
679 	 * Must drop the instance lock so we can pick up the instance_list
680 	 * lock & remove the instance.
681 	 */
682 	id = ri->ri_id;
683 	MUTEX_UNLOCK(&ri->ri_lock);
684 
685 	MUTEX_LOCK(&instance_list.ril_lock);
686 
687 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
688 	if (rip == NULL) {
689 		MUTEX_UNLOCK(&instance_list.ril_lock);
690 		return;
691 	}
692 
693 	assert(ri == rip);
694 
695 	uu_list_remove(instance_list.ril_instance_list, ri);
696 
697 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
698 	    ri->ri_i.i_fmri);
699 
700 	MUTEX_UNLOCK(&instance_list.ril_lock);
701 
702 	/*
703 	 * We can lock the instance without holding the instance_list lock
704 	 * since we removed the instance from the list.
705 	 */
706 	MUTEX_LOCK(&ri->ri_lock);
707 	MUTEX_LOCK(&ri->ri_queue_lock);
708 
709 	if (ri->ri_i.i_primary_ctid >= 1)
710 		contract_hash_remove(ri->ri_i.i_primary_ctid);
711 
712 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
713 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
714 
715 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
716 		startd_free(e, sizeof (*e));
717 	uu_list_destroy(ri->ri_queue);
718 
719 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
720 	startd_free(ri->ri_logstem, PATH_MAX);
721 	if (ri->ri_common_name != NULL)
722 		startd_free(ri->ri_common_name, max_scf_value_size);
723 	if (ri->ri_C_common_name != NULL)
724 		startd_free(ri->ri_C_common_name, max_scf_value_size);
725 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
726 	(void) pthread_mutex_destroy(&ri->ri_lock);
727 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
728 	startd_free(ri, sizeof (restarter_inst_t));
729 }
730 
731 /*
732  * instance_is_wait_style()
733  *
734  *   Returns 1 if the given instance is a "wait-style" service instance.
735  */
736 int
737 instance_is_wait_style(restarter_inst_t *inst)
738 {
739 	assert(MUTEX_HELD(&inst->ri_lock));
740 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
741 }
742 
743 /*
744  * instance_is_transient_style()
745  *
746  *   Returns 1 if the given instance is a transient service instance.
747  */
748 int
749 instance_is_transient_style(restarter_inst_t *inst)
750 {
751 	assert(MUTEX_HELD(&inst->ri_lock));
752 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
753 }
754 
755 /*
756  * instance_in_transition()
757  * Returns 1 if instance is in transition, 0 if not
758  */
759 int
760 instance_in_transition(restarter_inst_t *inst)
761 {
762 	assert(MUTEX_HELD(&inst->ri_lock));
763 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
764 		return (0);
765 	return (1);
766 }
767 
768 /*
769  * returns 1 if instance is already started, 0 if not
770  */
771 static int
772 instance_started(restarter_inst_t *inst)
773 {
774 	int ret;
775 
776 	assert(MUTEX_HELD(&inst->ri_lock));
777 
778 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
779 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
780 		ret = 1;
781 	else
782 		ret = 0;
783 
784 	return (ret);
785 }
786 
787 /*
788  * Returns
789  *   0 - success
790  *   ECONNRESET - success, but h was rebound
791  */
792 int
793 restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
794     restarter_instance_state_t new_state,
795     restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
796 {
797 	protocol_states_t *states;
798 	int e;
799 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
800 	boolean_t rebound = B_FALSE;
801 	int prev_state_online;
802 	int state_online;
803 
804 	assert(MUTEX_HELD(&ri->ri_lock));
805 
806 	prev_state_online = instance_started(ri);
807 
808 retry:
809 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
810 	    aux);
811 	switch (e) {
812 	case 0:
813 		break;
814 
815 	case ENOMEM:
816 		++retry_count;
817 		if (retry_count < ALLOC_RETRY) {
818 			(void) poll(NULL, 0, msecs);
819 			msecs *= ALLOC_DELAY_MULT;
820 			goto retry;
821 		}
822 
823 		/* Like startd_alloc(). */
824 		uu_die("Insufficient memory.\n");
825 		/* NOTREACHED */
826 
827 	case ECONNABORTED:
828 		libscf_handle_rebind(h);
829 		rebound = B_TRUE;
830 		goto retry;
831 
832 	case EPERM:
833 	case EACCES:
834 	case EROFS:
835 		log_error(LOG_NOTICE, "Could not commit state change for %s "
836 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
837 		/* FALLTHROUGH */
838 
839 	case ENOENT:
840 		ri->ri_i.i_state = new_state;
841 		ri->ri_i.i_next_state = new_state_next;
842 		break;
843 
844 	case EINVAL:
845 	default:
846 		bad_error("_restarter_commit_states", e);
847 	}
848 
849 	states = startd_alloc(sizeof (protocol_states_t));
850 	states->ps_state = new_state;
851 	states->ps_state_next = new_state_next;
852 	states->ps_err = err;
853 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
854 	    (void *)states);
855 
856 	state_online = instance_started(ri);
857 
858 	if (prev_state_online && !state_online)
859 		ri->ri_post_offline_hook();
860 	else if (!prev_state_online && state_online)
861 		ri->ri_post_online_hook();
862 
863 	return (rebound ? ECONNRESET : 0);
864 }
865 
866 void
867 restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
868 {
869 	restarter_inst_t *inst;
870 
871 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
872 
873 	inst = inst_lookup_by_name(fmri);
874 	if (inst == NULL)
875 		return;
876 
877 	inst->ri_flags |= flag;
878 
879 	MUTEX_UNLOCK(&inst->ri_lock);
880 }
881 
882 static void
883 restarter_take_pending_snapshots(scf_handle_t *h)
884 {
885 	restarter_inst_t *inst;
886 	int r;
887 
888 	MUTEX_LOCK(&instance_list.ril_lock);
889 
890 	for (inst = uu_list_first(instance_list.ril_instance_list);
891 	    inst != NULL;
892 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
893 		const char *fmri;
894 		scf_instance_t *sinst = NULL;
895 
896 		MUTEX_LOCK(&inst->ri_lock);
897 
898 		/*
899 		 * This is where we'd check inst->ri_method_thread and if it
900 		 * were nonzero we'd wait in anticipation of another thread
901 		 * executing a method for inst.  Doing so with the instance_list
902 		 * locked, though, leads to deadlock.  Since taking a snapshot
903 		 * during that window won't hurt anything, we'll just continue.
904 		 */
905 
906 		fmri = inst->ri_i.i_fmri;
907 
908 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
909 			scf_snapshot_t *rsnap;
910 
911 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
912 
913 			rsnap = libscf_get_or_make_running_snapshot(sinst,
914 			    fmri, B_FALSE);
915 
916 			scf_instance_destroy(sinst);
917 
918 			if (rsnap != NULL)
919 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
920 
921 			scf_snapshot_destroy(rsnap);
922 		}
923 
924 		if (inst->ri_flags & RINST_RETAKE_START) {
925 			switch (r = libscf_snapshots_poststart(h, fmri,
926 			    B_FALSE)) {
927 			case 0:
928 			case ENOENT:
929 				inst->ri_flags &= ~RINST_RETAKE_START;
930 				break;
931 
932 			case ECONNABORTED:
933 				break;
934 
935 			case EACCES:
936 			default:
937 				bad_error("libscf_snapshots_poststart", r);
938 			}
939 		}
940 
941 		MUTEX_UNLOCK(&inst->ri_lock);
942 	}
943 
944 	MUTEX_UNLOCK(&instance_list.ril_lock);
945 }
946 
947 /* ARGSUSED */
948 void *
949 restarter_post_fsminimal_thread(void *unused)
950 {
951 	scf_handle_t *h;
952 	int r;
953 
954 	h = libscf_handle_create_bound_loop();
955 
956 	for (;;) {
957 		r = libscf_create_self(h);
958 		if (r == 0)
959 			break;
960 
961 		assert(r == ECONNABORTED);
962 		libscf_handle_rebind(h);
963 	}
964 
965 	restarter_take_pending_snapshots(h);
966 
967 	(void) scf_handle_unbind(h);
968 	scf_handle_destroy(h);
969 
970 	return (NULL);
971 }
972 
973 /*
974  * int stop_instance()
975  *
976  *   Stop the instance identified by the instance given as the second argument,
977  *   for the cause stated.
978  *
979  *   Returns
980  *     0 - success
981  *     -1 - inst is in transition
982  */
983 static int
984 stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
985     stop_cause_t cause)
986 {
987 	fork_info_t *info;
988 	const char *cp;
989 	int err;
990 	restarter_error_t re;
991 
992 	assert(MUTEX_HELD(&inst->ri_lock));
993 	assert(inst->ri_method_thread == 0);
994 
995 	switch (cause) {
996 	case RSTOP_EXIT:
997 		re = RERR_RESTART;
998 		cp = "all processes in service exited";
999 		break;
1000 	case RSTOP_CORE:
1001 		re = RERR_FAULT;
1002 		cp = "process dumped core";
1003 		break;
1004 	case RSTOP_SIGNAL:
1005 		re = RERR_FAULT;
1006 		cp = "process received fatal signal from outside the service";
1007 		break;
1008 	case RSTOP_HWERR:
1009 		re = RERR_FAULT;
1010 		cp = "process killed due to uncorrectable hardware error";
1011 		break;
1012 	case RSTOP_DEPENDENCY:
1013 		re = RERR_RESTART;
1014 		cp = "dependency activity requires stop";
1015 		break;
1016 	case RSTOP_DISABLE:
1017 		re = RERR_RESTART;
1018 		cp = "service disabled";
1019 		break;
1020 	case RSTOP_RESTART:
1021 		re = RERR_RESTART;
1022 		cp = "service restarting";
1023 		break;
1024 	default:
1025 #ifndef NDEBUG
1026 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1027 		    cause, __FILE__, __LINE__);
1028 #endif
1029 		abort();
1030 	}
1031 
1032 	/* Services in the disabled and maintenance state are ignored */
1033 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1034 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1035 		log_framework(LOG_DEBUG,
1036 		    "%s: stop_instance -> is maint/disabled\n",
1037 		    inst->ri_i.i_fmri);
1038 		return (0);
1039 	}
1040 
1041 	/* Already stopped instances are left alone */
1042 	if (instance_started(inst) == 0) {
1043 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1044 		    inst->ri_i.i_fmri);
1045 		return (0);
1046 	}
1047 
1048 	if (instance_in_transition(inst)) {
1049 		/* requeue event by returning -1 */
1050 		log_framework(LOG_DEBUG,
1051 		    "Restarter: Not stopping %s, in transition.\n",
1052 		    inst->ri_i.i_fmri);
1053 		return (-1);
1054 	}
1055 
1056 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1057 
1058 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1059 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1060 
1061 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1062 		/*
1063 		 * No need to stop instance, as child has exited; remove
1064 		 * contract and move the instance to the offline state.
1065 		 */
1066 		switch (err = restarter_instance_update_states(local_handle,
1067 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1068 		    NULL)) {
1069 		case 0:
1070 		case ECONNRESET:
1071 			break;
1072 
1073 		default:
1074 			bad_error("restarter_instance_update_states", err);
1075 		}
1076 
1077 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1078 
1079 		if (inst->ri_i.i_primary_ctid != 0) {
1080 			inst->ri_m_inst =
1081 			    safe_scf_instance_create(local_handle);
1082 			inst->ri_mi_deleted = B_FALSE;
1083 
1084 			libscf_reget_instance(inst);
1085 			method_remove_contract(inst, B_TRUE, B_TRUE);
1086 
1087 			scf_instance_destroy(inst->ri_m_inst);
1088 			inst->ri_m_inst = NULL;
1089 		}
1090 
1091 		switch (err = restarter_instance_update_states(local_handle,
1092 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1093 		    NULL)) {
1094 		case 0:
1095 		case ECONNRESET:
1096 			break;
1097 
1098 		default:
1099 			bad_error("restarter_instance_update_states", err);
1100 		}
1101 
1102 		return (0);
1103 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1104 		/*
1105 		 * Stopping a wait service through means other than the pid
1106 		 * exiting should keep wait_thread() from restarting the
1107 		 * service, by removing it from the wait list.
1108 		 * We cannot remove it right now otherwise the process will
1109 		 * end up <defunct> so mark it to be ignored.
1110 		 */
1111 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1112 	}
1113 
1114 	switch (err = restarter_instance_update_states(local_handle, inst,
1115 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1116 	    RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
1117 	case 0:
1118 	case ECONNRESET:
1119 		break;
1120 
1121 	default:
1122 		bad_error("restarter_instance_update_states", err);
1123 	}
1124 
1125 	info = startd_zalloc(sizeof (fork_info_t));
1126 
1127 	info->sf_id = inst->ri_id;
1128 	info->sf_method_type = METHOD_STOP;
1129 	info->sf_event_type = re;
1130 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1131 
1132 	return (0);
1133 }
1134 
1135 /*
1136  * Returns
1137  *   ENOENT - fmri is not in instance_list
1138  *   0 - success
1139  *   ECONNRESET - success, though handle was rebound
1140  *   -1 - instance is in transition
1141  */
1142 int
1143 stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1144 {
1145 	restarter_inst_t *rip;
1146 	int r;
1147 
1148 	rip = inst_lookup_by_name(fmri);
1149 	if (rip == NULL)
1150 		return (ENOENT);
1151 
1152 	r = stop_instance(h, rip, flags);
1153 
1154 	MUTEX_UNLOCK(&rip->ri_lock);
1155 
1156 	return (r);
1157 }
1158 
1159 static void
1160 unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1161     unmaint_cause_t cause)
1162 {
1163 	ctid_t ctid;
1164 	scf_instance_t *inst;
1165 	int r;
1166 	uint_t tries = 0, msecs = ALLOC_DELAY;
1167 	const char *cp;
1168 
1169 	assert(MUTEX_HELD(&rip->ri_lock));
1170 
1171 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1172 		log_error(LOG_DEBUG, "Restarter: "
1173 		    "Ignoring maintenance off command because %s is not in the "
1174 		    "maintenance state.\n", rip->ri_i.i_fmri);
1175 		return;
1176 	}
1177 
1178 	switch (cause) {
1179 	case RUNMAINT_CLEAR:
1180 		cp = "clear requested";
1181 		break;
1182 	case RUNMAINT_DISABLE:
1183 		cp = "disable requested";
1184 		break;
1185 	default:
1186 #ifndef NDEBUG
1187 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1188 		    cause, __FILE__, __LINE__);
1189 #endif
1190 		abort();
1191 	}
1192 
1193 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1194 	    cp);
1195 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1196 	    "%s.\n", rip->ri_i.i_fmri, cp);
1197 
1198 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1199 	    RESTARTER_STATE_NONE, RERR_RESTART, "none");
1200 
1201 	/*
1202 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1203 	 * a primary contract.
1204 	 */
1205 	if (rip->ri_i.i_primary_ctid == 0)
1206 		return;
1207 
1208 	ctid = rip->ri_i.i_primary_ctid;
1209 	contract_abandon(ctid);
1210 	rip->ri_i.i_primary_ctid = 0;
1211 
1212 rep_retry:
1213 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1214 	case 0:
1215 		break;
1216 
1217 	case ECONNABORTED:
1218 		libscf_handle_rebind(h);
1219 		goto rep_retry;
1220 
1221 	case ENOENT:
1222 		/* Must have been deleted. */
1223 		return;
1224 
1225 	case EINVAL:
1226 	case ENOTSUP:
1227 	default:
1228 		bad_error("libscf_handle_rebind", r);
1229 	}
1230 
1231 again:
1232 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1233 	switch (r) {
1234 	case 0:
1235 		break;
1236 
1237 	case ENOMEM:
1238 		++tries;
1239 		if (tries < ALLOC_RETRY) {
1240 			(void) poll(NULL, 0, msecs);
1241 			msecs *= ALLOC_DELAY_MULT;
1242 			goto again;
1243 		}
1244 
1245 		uu_die("Insufficient memory.\n");
1246 		/* NOTREACHED */
1247 
1248 	case ECONNABORTED:
1249 		scf_instance_destroy(inst);
1250 		libscf_handle_rebind(h);
1251 		goto rep_retry;
1252 
1253 	case ECANCELED:
1254 		break;
1255 
1256 	case EPERM:
1257 	case EACCES:
1258 	case EROFS:
1259 		log_error(LOG_INFO,
1260 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1261 		    rip->ri_i.i_fmri, strerror(r));
1262 		break;
1263 
1264 	case EINVAL:
1265 	case EBADF:
1266 	default:
1267 		bad_error("restarter_remove_contract", r);
1268 	}
1269 
1270 	scf_instance_destroy(inst);
1271 }
1272 
1273 /*
1274  * enable_inst()
1275  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1276  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1277  *   disabled, move it to offline.  If the event is _DISABLE or
1278  *   _ADMIN_DISABLE, make sure inst will move to disabled.
1279  *
1280  *   Returns
1281  *     0 - success
1282  *     ECONNRESET - h was rebound
1283  */
1284 static int
1285 enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
1286 {
1287 	restarter_instance_state_t state;
1288 	int r;
1289 
1290 	assert(MUTEX_HELD(&inst->ri_lock));
1291 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1292 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1293 	    e == RESTARTER_EVENT_TYPE_ENABLE);
1294 	assert(instance_in_transition(inst) == 0);
1295 
1296 	state = inst->ri_i.i_state;
1297 
1298 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1299 		inst->ri_i.i_enabled = 1;
1300 
1301 		if (state == RESTARTER_STATE_UNINIT ||
1302 		    state == RESTARTER_STATE_DISABLED) {
1303 			/*
1304 			 * B_FALSE: Don't log an error if the log_instance()
1305 			 * fails because it will fail on the miniroot before
1306 			 * install-discovery runs.
1307 			 */
1308 			log_instance(inst, B_FALSE, "Enabled.");
1309 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1310 			    inst->ri_i.i_fmri);
1311 			(void) restarter_instance_update_states(h, inst,
1312 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1313 			    RERR_NONE, NULL);
1314 		} else {
1315 			log_framework(LOG_DEBUG, "Restarter: "
1316 			    "Not changing state of %s for enable command.\n",
1317 			    inst->ri_i.i_fmri);
1318 		}
1319 	} else {
1320 		inst->ri_i.i_enabled = 0;
1321 
1322 		switch (state) {
1323 		case RESTARTER_STATE_ONLINE:
1324 		case RESTARTER_STATE_DEGRADED:
1325 			r = stop_instance(h, inst, RSTOP_DISABLE);
1326 			return (r == ECONNRESET ? 0 : r);
1327 
1328 		case RESTARTER_STATE_OFFLINE:
1329 		case RESTARTER_STATE_UNINIT:
1330 			if (inst->ri_i.i_primary_ctid != 0) {
1331 				inst->ri_m_inst = safe_scf_instance_create(h);
1332 				inst->ri_mi_deleted = B_FALSE;
1333 
1334 				libscf_reget_instance(inst);
1335 				method_remove_contract(inst, B_TRUE, B_TRUE);
1336 
1337 				scf_instance_destroy(inst->ri_m_inst);
1338 			}
1339 			/* B_FALSE: See log_instance(..., "Enabled."); above */
1340 			log_instance(inst, B_FALSE, "Disabled.");
1341 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1342 			    inst->ri_i.i_fmri);
1343 			(void) restarter_instance_update_states(h, inst,
1344 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1345 			    RERR_RESTART, NULL);
1346 			return (0);
1347 
1348 		case RESTARTER_STATE_DISABLED:
1349 			break;
1350 
1351 		case RESTARTER_STATE_MAINT:
1352 			/*
1353 			 * We only want to pull the instance out of maintenance
1354 			 * if the disable is on adminstrative request.  The
1355 			 * graph engine sends _DISABLE events whenever a
1356 			 * service isn't in the disabled state, and we don't
1357 			 * want to pull the service out of maintenance if,
1358 			 * for example, it is there due to a dependency cycle.
1359 			 */
1360 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1361 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1362 			break;
1363 
1364 		default:
1365 #ifndef NDEBUG
1366 			(void) fprintf(stderr, "Restarter instance %s has "
1367 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1368 #endif
1369 			abort();
1370 		}
1371 	}
1372 
1373 	return (0);
1374 }
1375 
1376 static void
1377 start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
1378 {
1379 	fork_info_t *info;
1380 
1381 	assert(MUTEX_HELD(&inst->ri_lock));
1382 	assert(instance_in_transition(inst) == 0);
1383 	assert(inst->ri_method_thread == 0);
1384 
1385 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1386 	    inst->ri_i.i_fmri);
1387 
1388 	/* Services in the disabled and maintenance state are ignored */
1389 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1390 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1391 	    inst->ri_i.i_enabled == 0) {
1392 		log_framework(LOG_DEBUG,
1393 		    "%s: start_instance -> is maint/disabled\n",
1394 		    inst->ri_i.i_fmri);
1395 		return;
1396 	}
1397 
1398 	/* Already started instances are left alone */
1399 	if (instance_started(inst) == 1) {
1400 		log_framework(LOG_DEBUG,
1401 		    "%s: start_instance -> is already started\n",
1402 		    inst->ri_i.i_fmri);
1403 		return;
1404 	}
1405 
1406 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1407 
1408 	(void) restarter_instance_update_states(local_handle, inst,
1409 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, "none");
1410 
1411 	info = startd_zalloc(sizeof (fork_info_t));
1412 
1413 	info->sf_id = inst->ri_id;
1414 	info->sf_method_type = METHOD_START;
1415 	info->sf_event_type = RERR_NONE;
1416 	inst->ri_method_thread = startd_thread_create(method_thread, info);
1417 }
1418 
1419 static int
1420 event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1421 {
1422 	scf_instance_t *inst;
1423 	int ret = 0;
1424 
1425 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1426 		return (-1);
1427 
1428 	ret = restarter_inst_ractions_from_tty(inst);
1429 
1430 	scf_instance_destroy(inst);
1431 	return (ret);
1432 }
1433 
1434 static void
1435 maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1436     const char *aux)
1437 {
1438 	fork_info_t *info;
1439 	scf_instance_t *scf_inst = NULL;
1440 
1441 	assert(MUTEX_HELD(&rip->ri_lock));
1442 	assert(aux != NULL);
1443 	assert(rip->ri_method_thread == 0);
1444 
1445 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
1446 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1447 	    rip->ri_i.i_fmri, aux);
1448 
1449 	/* Services in the maintenance state are ignored */
1450 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1451 		log_framework(LOG_DEBUG,
1452 		    "%s: maintain_instance -> is already in maintenance\n",
1453 		    rip->ri_i.i_fmri);
1454 		return;
1455 	}
1456 
1457 	/*
1458 	 * If aux state is "service_request" and
1459 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1460 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1461 	 */
1462 	if (strcmp(aux, "service_request") == 0 && libscf_fmri_get_instance(h,
1463 	    rip->ri_i.i_fmri, &scf_inst) == 0) {
1464 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1465 			if (restarter_inst_set_aux_fmri(scf_inst))
1466 				log_framework(LOG_DEBUG, "%s: "
1467 				    "restarter_inst_set_aux_fmri failed: ",
1468 				    rip->ri_i.i_fmri);
1469 		} else {
1470 			log_framework(LOG_DEBUG, "%s: "
1471 			    "restarter_inst_validate_ractions_aux_fmri "
1472 			    "failed: ", rip->ri_i.i_fmri);
1473 
1474 			if (restarter_inst_reset_aux_fmri(scf_inst))
1475 				log_framework(LOG_DEBUG, "%s: "
1476 				    "restarter_inst_reset_aux_fmri failed: ",
1477 				    rip->ri_i.i_fmri);
1478 		}
1479 		scf_instance_destroy(scf_inst);
1480 	}
1481 
1482 	if (immediate || !instance_started(rip)) {
1483 		if (rip->ri_i.i_primary_ctid != 0) {
1484 			rip->ri_m_inst = safe_scf_instance_create(h);
1485 			rip->ri_mi_deleted = B_FALSE;
1486 
1487 			libscf_reget_instance(rip);
1488 			method_remove_contract(rip, B_TRUE, B_TRUE);
1489 
1490 			scf_instance_destroy(rip->ri_m_inst);
1491 		}
1492 
1493 		(void) restarter_instance_update_states(h, rip,
1494 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1495 		    (char *)aux);
1496 		return;
1497 	}
1498 
1499 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1500 	    RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
1501 
1502 	log_transition(rip, MAINT_REQUESTED);
1503 
1504 	info = startd_zalloc(sizeof (*info));
1505 	info->sf_id = rip->ri_id;
1506 	info->sf_method_type = METHOD_STOP;
1507 	info->sf_event_type = RERR_RESTART;
1508 	rip->ri_method_thread = startd_thread_create(method_thread, info);
1509 }
1510 
1511 static void
1512 refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1513 {
1514 	scf_instance_t *inst;
1515 	scf_snapshot_t *snap;
1516 	fork_info_t *info;
1517 	int r;
1518 
1519 	assert(MUTEX_HELD(&rip->ri_lock));
1520 
1521 	log_instance(rip, B_TRUE, "Rereading configuration.");
1522 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1523 	    rip->ri_i.i_fmri);
1524 
1525 rep_retry:
1526 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1527 	switch (r) {
1528 	case 0:
1529 		break;
1530 
1531 	case ECONNABORTED:
1532 		libscf_handle_rebind(h);
1533 		goto rep_retry;
1534 
1535 	case ENOENT:
1536 		/* Must have been deleted. */
1537 		return;
1538 
1539 	case EINVAL:
1540 	case ENOTSUP:
1541 	default:
1542 		bad_error("libscf_fmri_get_instance", r);
1543 	}
1544 
1545 	snap = libscf_get_running_snapshot(inst);
1546 
1547 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1548 	    &rip->ri_utmpx_prefix);
1549 	switch (r) {
1550 	case 0:
1551 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1552 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1553 		break;
1554 
1555 	case ECONNABORTED:
1556 		scf_instance_destroy(inst);
1557 		scf_snapshot_destroy(snap);
1558 		libscf_handle_rebind(h);
1559 		goto rep_retry;
1560 
1561 	case ECANCELED:
1562 	case ENOENT:
1563 		/* Succeed in anticipation of REMOVE_INSTANCE. */
1564 		break;
1565 
1566 	default:
1567 		bad_error("libscf_get_startd_properties", r);
1568 	}
1569 
1570 	if (instance_started(rip)) {
1571 		/* Refresh does not change the state. */
1572 		(void) restarter_instance_update_states(h, rip,
1573 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
1574 
1575 		info = startd_zalloc(sizeof (*info));
1576 		info->sf_id = rip->ri_id;
1577 		info->sf_method_type = METHOD_REFRESH;
1578 		info->sf_event_type = RERR_REFRESH;
1579 
1580 		assert(rip->ri_method_thread == 0);
1581 		rip->ri_method_thread =
1582 		    startd_thread_create(method_thread, info);
1583 	}
1584 
1585 	scf_snapshot_destroy(snap);
1586 	scf_instance_destroy(inst);
1587 }
1588 
1589 const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1590 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1591 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1592 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1593 	"INVALID_DEPENDENCY", "ADMIN_DISABLE"
1594 };
1595 
1596 /*
1597  * void *restarter_process_events()
1598  *
1599  *   Called in a separate thread to process the events on an instance's
1600  *   queue.  Empties the queue completely, and tries to keep the thread
1601  *   around for a little while after the queue is empty to save on
1602  *   startup costs.
1603  */
1604 static void *
1605 restarter_process_events(void *arg)
1606 {
1607 	scf_handle_t *h;
1608 	restarter_instance_qentry_t *event;
1609 	restarter_inst_t *rip;
1610 	char *fmri = (char *)arg;
1611 	struct timespec to;
1612 
1613 	assert(fmri != NULL);
1614 
1615 	h = libscf_handle_create_bound_loop();
1616 
1617 	/* grab the queue lock */
1618 	rip = inst_lookup_queue(fmri);
1619 	if (rip == NULL)
1620 		goto out;
1621 
1622 again:
1623 
1624 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1625 		restarter_inst_t *inst;
1626 
1627 		/* drop the queue lock */
1628 		MUTEX_UNLOCK(&rip->ri_queue_lock);
1629 
1630 		/*
1631 		 * Grab the inst lock -- this waits until any outstanding
1632 		 * method finishes running.
1633 		 */
1634 		inst = inst_lookup_by_name(fmri);
1635 		if (inst == NULL) {
1636 			/* Getting deleted in the middle isn't an error. */
1637 			goto cont;
1638 		}
1639 
1640 		assert(instance_in_transition(inst) == 0);
1641 
1642 		/* process the event */
1643 		switch (event->riq_type) {
1644 		case RESTARTER_EVENT_TYPE_ENABLE:
1645 		case RESTARTER_EVENT_TYPE_DISABLE:
1646 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1647 			(void) enable_inst(h, inst, event->riq_type);
1648 			break;
1649 
1650 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1651 			restarter_delete_inst(inst);
1652 			inst = NULL;
1653 			goto cont;
1654 
1655 		case RESTARTER_EVENT_TYPE_STOP:
1656 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1657 			break;
1658 
1659 		case RESTARTER_EVENT_TYPE_START:
1660 			start_instance(h, inst);
1661 			break;
1662 
1663 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1664 			maintain_instance(h, inst, 0, "dependency_cycle");
1665 			break;
1666 
1667 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1668 			maintain_instance(h, inst, 0, "invalid_dependency");
1669 			break;
1670 
1671 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1672 			if (event_from_tty(h, inst) == 0)
1673 				maintain_instance(h, inst, 0,
1674 				    "service_request");
1675 			else
1676 				maintain_instance(h, inst, 0,
1677 				    "administrative_request");
1678 			break;
1679 
1680 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1681 			if (event_from_tty(h, inst) == 0)
1682 				maintain_instance(h, inst, 1,
1683 				    "service_request");
1684 			else
1685 				maintain_instance(h, inst, 1,
1686 				    "administrative_request");
1687 			break;
1688 
1689 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1690 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1691 			break;
1692 
1693 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1694 			refresh_instance(h, inst);
1695 			break;
1696 
1697 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1698 			log_framework(LOG_WARNING, "Restarter: "
1699 			    "%s command (for %s) unimplemented.\n",
1700 			    event_names[event->riq_type], inst->ri_i.i_fmri);
1701 			break;
1702 
1703 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1704 			if (!instance_started(inst)) {
1705 				log_framework(LOG_DEBUG, "Restarter: "
1706 				    "Not restarting %s; not running.\n",
1707 				    inst->ri_i.i_fmri);
1708 			} else {
1709 				/*
1710 				 * Stop the instance.  If it can be restarted,
1711 				 * the graph engine will send a new event.
1712 				 */
1713 				(void) stop_instance(h, inst, RSTOP_RESTART);
1714 			}
1715 			break;
1716 
1717 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1718 		default:
1719 #ifndef NDEBUG
1720 			uu_warn("%s:%d: Bad restarter event %d.  "
1721 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1722 #endif
1723 			abort();
1724 		}
1725 
1726 		assert(inst != NULL);
1727 		MUTEX_UNLOCK(&inst->ri_lock);
1728 
1729 cont:
1730 		/* grab the queue lock */
1731 		rip = inst_lookup_queue(fmri);
1732 		if (rip == NULL)
1733 			goto out;
1734 
1735 		/* delete the event */
1736 		uu_list_remove(rip->ri_queue, event);
1737 		startd_free(event, sizeof (restarter_instance_qentry_t));
1738 	}
1739 
1740 	assert(rip != NULL);
1741 
1742 	/*
1743 	 * Try to preserve the thread for a little while for future use.
1744 	 */
1745 	to.tv_sec = 3;
1746 	to.tv_nsec = 0;
1747 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1748 	    &rip->ri_queue_lock, &to);
1749 
1750 	if (uu_list_first(rip->ri_queue) != NULL)
1751 		goto again;
1752 
1753 	rip->ri_queue_thread = 0;
1754 	MUTEX_UNLOCK(&rip->ri_queue_lock);
1755 out:
1756 	(void) scf_handle_unbind(h);
1757 	scf_handle_destroy(h);
1758 	free(fmri);
1759 	return (NULL);
1760 }
1761 
1762 static int
1763 is_admin_event(restarter_event_type_t t) {
1764 
1765 	switch (t) {
1766 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1767 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1768 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1769 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1770 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1771 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1772 		return (1);
1773 	default:
1774 		return (0);
1775 	}
1776 }
1777 
1778 static void
1779 restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1780 {
1781 	restarter_instance_qentry_t *qe;
1782 	int r;
1783 
1784 	assert(MUTEX_HELD(&ri->ri_queue_lock));
1785 	assert(!MUTEX_HELD(&ri->ri_lock));
1786 
1787 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1788 	qe->riq_type = e->rpe_type;
1789 
1790 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1791 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1792 	assert(r == 0);
1793 }
1794 
1795 /*
1796  * void *restarter_event_thread()
1797  *
1798  *  Handle incoming graph events by placing them on a per-instance
1799  *  queue.  We can't lock the main part of the instance structure, so
1800  *  just modify the seprarately locked event queue portion.
1801  */
1802 /*ARGSUSED*/
1803 static void *
1804 restarter_event_thread(void *unused)
1805 {
1806 	scf_handle_t *h;
1807 
1808 	/*
1809 	 * This is a new thread, and thus, gets its own handle
1810 	 * to the repository.
1811 	 */
1812 	h = libscf_handle_create_bound_loop();
1813 
1814 	MUTEX_LOCK(&ru->restarter_update_lock);
1815 
1816 	/*CONSTCOND*/
1817 	while (1) {
1818 		restarter_protocol_event_t *e;
1819 
1820 		while (ru->restarter_update_wakeup == 0)
1821 			(void) pthread_cond_wait(&ru->restarter_update_cv,
1822 			    &ru->restarter_update_lock);
1823 
1824 		ru->restarter_update_wakeup = 0;
1825 
1826 		while ((e = restarter_event_dequeue()) != NULL) {
1827 			restarter_inst_t *rip;
1828 			char *fmri;
1829 
1830 			MUTEX_UNLOCK(&ru->restarter_update_lock);
1831 
1832 			/*
1833 			 * ADD_INSTANCE is special: there's likely no
1834 			 * instance structure yet, so we need to handle the
1835 			 * addition synchronously.
1836 			 */
1837 			switch (e->rpe_type) {
1838 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1839 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1840 					log_error(LOG_INFO, "Restarter: "
1841 					    "Could not add %s.\n", e->rpe_inst);
1842 
1843 				MUTEX_LOCK(&st->st_load_lock);
1844 				if (--st->st_load_instances == 0)
1845 					(void) pthread_cond_broadcast(
1846 					    &st->st_load_cv);
1847 				MUTEX_UNLOCK(&st->st_load_lock);
1848 
1849 				goto nolookup;
1850 			}
1851 
1852 			/*
1853 			 * Lookup the instance, locking only the event queue.
1854 			 * Can't grab ri_lock here because it might be held
1855 			 * by a long-running method.
1856 			 */
1857 			rip = inst_lookup_queue(e->rpe_inst);
1858 			if (rip == NULL) {
1859 				log_error(LOG_INFO, "Restarter: "
1860 				    "Ignoring %s command for unknown service "
1861 				    "%s.\n", event_names[e->rpe_type],
1862 				    e->rpe_inst);
1863 				goto nolookup;
1864 			}
1865 
1866 			/* Keep ADMIN events from filling up the queue. */
1867 			if (is_admin_event(e->rpe_type) &&
1868 			    uu_list_numnodes(rip->ri_queue) >
1869 			    RINST_QUEUE_THRESHOLD) {
1870 				MUTEX_UNLOCK(&rip->ri_queue_lock);
1871 				log_instance(rip, B_TRUE, "Instance event "
1872 				    "queue overflow.  Dropping administrative "
1873 				    "request.");
1874 				log_framework(LOG_DEBUG, "%s: Instance event "
1875 				    "queue overflow.  Dropping administrative "
1876 				    "request.\n", rip->ri_i.i_fmri);
1877 				goto nolookup;
1878 			}
1879 
1880 			/* Now add the event to the instance queue. */
1881 			restarter_queue_event(rip, e);
1882 
1883 			if (rip->ri_queue_thread == 0) {
1884 				/*
1885 				 * Start a thread if one isn't already
1886 				 * running.
1887 				 */
1888 				fmri = safe_strdup(e->rpe_inst);
1889 				rip->ri_queue_thread =  startd_thread_create(
1890 				    restarter_process_events, (void *)fmri);
1891 			} else {
1892 				/*
1893 				 * Signal the existing thread that there's
1894 				 * a new event.
1895 				 */
1896 				(void) pthread_cond_broadcast(
1897 				    &rip->ri_queue_cv);
1898 			}
1899 
1900 			MUTEX_UNLOCK(&rip->ri_queue_lock);
1901 nolookup:
1902 			restarter_event_release(e);
1903 
1904 			MUTEX_LOCK(&ru->restarter_update_lock);
1905 		}
1906 	}
1907 
1908 	/*
1909 	 * Unreachable for now -- there's currently no graceful cleanup
1910 	 * called on exit().
1911 	 */
1912 	(void) scf_handle_unbind(h);
1913 	scf_handle_destroy(h);
1914 	return (NULL);
1915 }
1916 
1917 static restarter_inst_t *
1918 contract_to_inst(ctid_t ctid)
1919 {
1920 	restarter_inst_t *inst;
1921 	int id;
1922 
1923 	id = lookup_inst_by_contract(ctid);
1924 	if (id == -1)
1925 		return (NULL);
1926 
1927 	inst = inst_lookup_by_id(id);
1928 	if (inst != NULL) {
1929 		/*
1930 		 * Since ri_lock isn't held by the contract id lookup, this
1931 		 * instance may have been restarted and now be in a new
1932 		 * contract, making the old contract no longer valid for this
1933 		 * instance.
1934 		 */
1935 		if (ctid != inst->ri_i.i_primary_ctid) {
1936 			MUTEX_UNLOCK(&inst->ri_lock);
1937 			inst = NULL;
1938 		}
1939 	}
1940 	return (inst);
1941 }
1942 
1943 /*
1944  * void contract_action()
1945  *   Take action on contract events.
1946  */
1947 static void
1948 contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
1949     uint32_t type)
1950 {
1951 	const char *fmri = inst->ri_i.i_fmri;
1952 
1953 	assert(MUTEX_HELD(&inst->ri_lock));
1954 
1955 	/*
1956 	 * If startd has stopped this contract, there is no need to
1957 	 * stop it again.
1958 	 */
1959 	if (inst->ri_i.i_primary_ctid > 0 &&
1960 	    inst->ri_i.i_primary_ctid_stopped)
1961 		return;
1962 
1963 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
1964 	    | CT_PR_EV_HWERR)) == 0) {
1965 		/*
1966 		 * There shouldn't be other events, since that's not how we set
1967 		 * the terms. Thus, just log an error and drive on.
1968 		 */
1969 		log_framework(LOG_NOTICE,
1970 		    "%s: contract %ld received unexpected critical event "
1971 		    "(%d)\n", fmri, id, type);
1972 		return;
1973 	}
1974 
1975 	assert(instance_in_transition(inst) == 0);
1976 
1977 	if (instance_is_wait_style(inst)) {
1978 		/*
1979 		 * We ignore all events; if they impact the
1980 		 * process we're monitoring, then the
1981 		 * wait_thread will stop the instance.
1982 		 */
1983 		log_framework(LOG_DEBUG,
1984 		    "%s: ignoring contract event on wait-style service\n",
1985 		    fmri);
1986 	} else {
1987 		/*
1988 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
1989 		 */
1990 		switch (type) {
1991 		case CT_PR_EV_EMPTY:
1992 			(void) stop_instance(h, inst, RSTOP_EXIT);
1993 			break;
1994 		case CT_PR_EV_CORE:
1995 			(void) stop_instance(h, inst, RSTOP_CORE);
1996 			break;
1997 		case CT_PR_EV_SIGNAL:
1998 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
1999 			break;
2000 		case CT_PR_EV_HWERR:
2001 			(void) stop_instance(h, inst, RSTOP_HWERR);
2002 			break;
2003 		}
2004 	}
2005 }
2006 
2007 /*
2008  * void *restarter_contract_event_thread(void *)
2009  *   Listens to the process contract bundle for critical events, taking action
2010  *   on events from contracts we know we are responsible for.
2011  */
2012 /*ARGSUSED*/
2013 static void *
2014 restarter_contracts_event_thread(void *unused)
2015 {
2016 	int fd, err;
2017 	scf_handle_t *local_handle;
2018 
2019 	/*
2020 	 * Await graph load completion.  That is, stop here, until we've scanned
2021 	 * the repository for contract - instance associations.
2022 	 */
2023 	MUTEX_LOCK(&st->st_load_lock);
2024 	while (!(st->st_load_complete && st->st_load_instances == 0))
2025 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2026 	MUTEX_UNLOCK(&st->st_load_lock);
2027 
2028 	/*
2029 	 * This is a new thread, and thus, gets its own handle
2030 	 * to the repository.
2031 	 */
2032 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2033 		uu_die("Unable to bind a new repository handle: %s\n",
2034 		    scf_strerror(scf_error()));
2035 
2036 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2037 	if (fd == -1)
2038 		uu_die("process bundle open failed");
2039 
2040 	/*
2041 	 * Make sure we get all events (including those generated by configd
2042 	 * before this thread was started).
2043 	 */
2044 	err = ct_event_reset(fd);
2045 	assert(err == 0);
2046 
2047 	for (;;) {
2048 		int efd, sfd;
2049 		ct_evthdl_t ev;
2050 		uint32_t type;
2051 		ctevid_t evid;
2052 		ct_stathdl_t status;
2053 		ctid_t ctid;
2054 		restarter_inst_t *inst;
2055 		uint64_t cookie;
2056 
2057 		if (err = ct_event_read_critical(fd, &ev)) {
2058 			log_error(LOG_WARNING,
2059 			    "Error reading next contract event: %s",
2060 			    strerror(err));
2061 			continue;
2062 		}
2063 
2064 		evid = ct_event_get_evid(ev);
2065 		ctid = ct_event_get_ctid(ev);
2066 		type = ct_event_get_type(ev);
2067 
2068 		/* Fetch cookie. */
2069 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2070 		    < 0) {
2071 			ct_event_free(ev);
2072 			continue;
2073 		}
2074 
2075 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2076 			log_framework(LOG_WARNING, "Could not get status for "
2077 			    "contract %ld: %s\n", ctid, strerror(err));
2078 
2079 			startd_close(sfd);
2080 			ct_event_free(ev);
2081 			continue;
2082 		}
2083 
2084 		cookie = ct_status_get_cookie(status);
2085 
2086 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2087 		    "cookie %lld\n", type, ctid, cookie);
2088 
2089 		ct_status_free(status);
2090 
2091 		startd_close(sfd);
2092 
2093 		/*
2094 		 * svc.configd(1M) restart handling performed by the
2095 		 * fork_configd_thread.  We don't acknowledge, as that thread
2096 		 * will do so.
2097 		 */
2098 		if (cookie == CONFIGD_COOKIE) {
2099 			ct_event_free(ev);
2100 			continue;
2101 		}
2102 
2103 		inst = NULL;
2104 		if (storing_contract != 0 &&
2105 		    (inst = contract_to_inst(ctid)) == NULL) {
2106 			/*
2107 			 * This can happen for two reasons:
2108 			 * - method_run() has not yet stored the
2109 			 *    the contract into the internal hash table.
2110 			 * - we receive an EMPTY event for an abandoned
2111 			 *    contract.
2112 			 * If there is any contract in the process of
2113 			 * being stored into the hash table then re-read
2114 			 * the event later.
2115 			 */
2116 			log_framework(LOG_DEBUG,
2117 			    "Reset event %d for unknown "
2118 			    "contract id %ld\n", type, ctid);
2119 
2120 			/* don't go too fast */
2121 			(void) poll(NULL, 0, 100);
2122 
2123 			(void) ct_event_reset(fd);
2124 			ct_event_free(ev);
2125 			continue;
2126 		}
2127 
2128 		/*
2129 		 * Do not call contract_to_inst() again if first
2130 		 * call succeeded.
2131 		 */
2132 		if (inst == NULL)
2133 			inst = contract_to_inst(ctid);
2134 		if (inst == NULL) {
2135 			/*
2136 			 * This can happen if we receive an EMPTY
2137 			 * event for an abandoned contract.
2138 			 */
2139 			log_framework(LOG_DEBUG,
2140 			    "Received event %d for unknown contract id "
2141 			    "%ld\n", type, ctid);
2142 		} else {
2143 			log_framework(LOG_DEBUG,
2144 			    "Received event %d for contract id "
2145 			    "%ld (%s)\n", type, ctid,
2146 			    inst->ri_i.i_fmri);
2147 
2148 			contract_action(local_handle, inst, ctid, type);
2149 
2150 			MUTEX_UNLOCK(&inst->ri_lock);
2151 		}
2152 
2153 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2154 		    O_WRONLY);
2155 		if (efd != -1) {
2156 			(void) ct_ctl_ack(efd, evid);
2157 			startd_close(efd);
2158 		}
2159 
2160 		ct_event_free(ev);
2161 
2162 	}
2163 
2164 	/*NOTREACHED*/
2165 	return (NULL);
2166 }
2167 
2168 /*
2169  * Timeout queue, processed by restarter_timeouts_event_thread().
2170  */
2171 timeout_queue_t *timeouts;
2172 static uu_list_pool_t *timeout_pool;
2173 
2174 typedef struct timeout_update {
2175 	pthread_mutex_t		tu_lock;
2176 	pthread_cond_t		tu_cv;
2177 	int			tu_wakeup;
2178 } timeout_update_t;
2179 
2180 timeout_update_t *tu;
2181 
2182 static const char *timeout_ovr_svcs[] = {
2183 	"svc:/system/manifest-import:default",
2184 	"svc:/network/initial:default",
2185 	"svc:/network/service:default",
2186 	"svc:/system/rmtmpfiles:default",
2187 	"svc:/network/loopback:default",
2188 	"svc:/network/physical:default",
2189 	"svc:/system/device/local:default",
2190 	"svc:/system/metainit:default",
2191 	"svc:/system/filesystem/usr:default",
2192 	"svc:/system/filesystem/minimal:default",
2193 	"svc:/system/filesystem/local:default",
2194 	NULL
2195 };
2196 
2197 int
2198 is_timeout_ovr(restarter_inst_t *inst)
2199 {
2200 	int i;
2201 
2202 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2203 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2204 			log_instance(inst, B_TRUE, "Timeout override by "
2205 			    "svc.startd.  Using infinite timeout.");
2206 			return (1);
2207 		}
2208 	}
2209 
2210 	return (0);
2211 }
2212 
2213 /*ARGSUSED*/
2214 static int
2215 timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2216 {
2217 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2218 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2219 
2220 	if (t1 > t2)
2221 		return (1);
2222 	else if (t1 < t2)
2223 		return (-1);
2224 	return (0);
2225 }
2226 
2227 void
2228 timeout_init()
2229 {
2230 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2231 
2232 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2233 
2234 	timeout_pool = startd_list_pool_create("timeouts",
2235 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2236 	    timeout_compare, UU_LIST_POOL_DEBUG);
2237 	assert(timeout_pool != NULL);
2238 
2239 	timeouts->tq_list = startd_list_create(timeout_pool,
2240 	    timeouts, UU_LIST_SORTED);
2241 	assert(timeouts->tq_list != NULL);
2242 
2243 	tu = startd_zalloc(sizeof (timeout_update_t));
2244 	(void) pthread_cond_init(&tu->tu_cv, NULL);
2245 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2246 }
2247 
2248 void
2249 timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2250 {
2251 	hrtime_t now, timeout;
2252 	timeout_entry_t *entry;
2253 	uu_list_index_t idx;
2254 
2255 	assert(MUTEX_HELD(&inst->ri_lock));
2256 
2257 	now = gethrtime();
2258 
2259 	/*
2260 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2261 	 * just return.
2262 	 */
2263 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2264 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2265 		    "treating as infinite.");
2266 		return;
2267 	}
2268 
2269 	/* hrtime is in nanoseconds. Convert timeout_sec. */
2270 	timeout = now + (timeout_sec * 1000000000LL);
2271 
2272 	entry = startd_alloc(sizeof (timeout_entry_t));
2273 	entry->te_timeout = timeout;
2274 	entry->te_ctid = cid;
2275 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2276 	entry->te_logstem = safe_strdup(inst->ri_logstem);
2277 	entry->te_fired = 0;
2278 	/* Insert the calculated timeout time onto the queue. */
2279 	MUTEX_LOCK(&timeouts->tq_lock);
2280 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2281 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2282 	uu_list_insert(timeouts->tq_list, entry, idx);
2283 	MUTEX_UNLOCK(&timeouts->tq_lock);
2284 
2285 	assert(inst->ri_timeout == NULL);
2286 	inst->ri_timeout = entry;
2287 
2288 	MUTEX_LOCK(&tu->tu_lock);
2289 	tu->tu_wakeup = 1;
2290 	(void) pthread_cond_broadcast(&tu->tu_cv);
2291 	MUTEX_UNLOCK(&tu->tu_lock);
2292 }
2293 
2294 
2295 void
2296 timeout_remove(restarter_inst_t *inst, ctid_t cid)
2297 {
2298 	assert(MUTEX_HELD(&inst->ri_lock));
2299 
2300 	if (inst->ri_timeout == NULL)
2301 		return;
2302 
2303 	assert(inst->ri_timeout->te_ctid == cid);
2304 
2305 	MUTEX_LOCK(&timeouts->tq_lock);
2306 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2307 	MUTEX_UNLOCK(&timeouts->tq_lock);
2308 
2309 	free(inst->ri_timeout->te_fmri);
2310 	free(inst->ri_timeout->te_logstem);
2311 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2312 	inst->ri_timeout = NULL;
2313 }
2314 
2315 static int
2316 timeout_now()
2317 {
2318 	timeout_entry_t *e;
2319 	hrtime_t now;
2320 	int ret;
2321 
2322 	now = gethrtime();
2323 
2324 	/*
2325 	 * Walk through the (sorted) timeouts list.  While the timeout
2326 	 * at the head of the list is <= the current time, kill the
2327 	 * method.
2328 	 */
2329 	MUTEX_LOCK(&timeouts->tq_lock);
2330 
2331 	for (e = uu_list_first(timeouts->tq_list);
2332 	    e != NULL && e->te_timeout <= now;
2333 	    e = uu_list_next(timeouts->tq_list, e)) {
2334 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2335 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2336 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2337 		    "Method or service exit timed out.  Killing contract %ld.",
2338 		    e->te_ctid);
2339 		e->te_fired = 1;
2340 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2341 	}
2342 
2343 	if (uu_list_numnodes(timeouts->tq_list) > 0)
2344 		ret = 0;
2345 	else
2346 		ret = -1;
2347 
2348 	MUTEX_UNLOCK(&timeouts->tq_lock);
2349 
2350 	return (ret);
2351 }
2352 
2353 /*
2354  * void *restarter_timeouts_event_thread(void *)
2355  *   Responsible for monitoring the method timeouts.  This thread must
2356  *   be started before any methods are called.
2357  */
2358 /*ARGSUSED*/
2359 static void *
2360 restarter_timeouts_event_thread(void *unused)
2361 {
2362 	/*
2363 	 * Timeouts are entered on a priority queue, which is processed by
2364 	 * this thread.  As timeouts are specified in seconds, we'll do
2365 	 * the necessary processing every second, as long as the queue
2366 	 * is not empty.
2367 	 */
2368 
2369 	/*CONSTCOND*/
2370 	while (1) {
2371 		/*
2372 		 * As long as the timeout list isn't empty, process it
2373 		 * every second.
2374 		 */
2375 		if (timeout_now() == 0) {
2376 			(void) sleep(1);
2377 			continue;
2378 		}
2379 
2380 		/* The list is empty, wait until we have more timeouts. */
2381 		MUTEX_LOCK(&tu->tu_lock);
2382 
2383 		while (tu->tu_wakeup == 0)
2384 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2385 
2386 		tu->tu_wakeup = 0;
2387 		MUTEX_UNLOCK(&tu->tu_lock);
2388 	}
2389 
2390 	return (NULL);
2391 }
2392 
2393 void
2394 restarter_start()
2395 {
2396 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2397 	(void) startd_thread_create(restarter_event_thread, NULL);
2398 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2399 	(void) startd_thread_create(wait_thread, NULL);
2400 }
2401 
2402 
2403 void
2404 restarter_init()
2405 {
2406 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2407 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2408 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2409 	(void) memset(&instance_list, 0, sizeof (instance_list));
2410 
2411 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2412 	instance_list.ril_instance_list = startd_list_create(
2413 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2414 
2415 	restarter_queue_pool = startd_list_pool_create(
2416 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2417 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2418 	    UU_LIST_POOL_DEBUG);
2419 
2420 	contract_list_pool = startd_list_pool_create(
2421 	    "contract_list", sizeof (contract_entry_t),
2422 	    offsetof(contract_entry_t,  ce_link), NULL,
2423 	    UU_LIST_POOL_DEBUG);
2424 	contract_hash_init();
2425 
2426 	log_framework(LOG_DEBUG, "Initialized restarter\n");
2427 }
2428