xref: /illumos-gate/usr/src/cmd/svc/configd/rc_node.c (revision 8a8d276f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * rc_node.c - object management primitives
30  *
31  * This layer manages entities, their data structure, its locking, iterators,
32  * transactions, and change notification requests.  Entities (scopes,
33  * services, instances, snapshots, snaplevels, property groups, "composed"
34  * property groups (see composition below), and properties) are represented by
35  * rc_node_t's and are kept in the cache_hash hash table.  (Property values
36  * are kept in the rn_values member of the respective property -- not as
37  * separate objects.)  Iterators are represented by rc_node_iter_t's.
38  * Transactions are represented by rc_node_tx_t's and are only allocated as
39  * part of repcache_tx_t's in the client layer (client.c).  Change
40  * notification requests are represented by rc_notify_t structures and are
41  * described below.
42  *
43  * The entity tree is rooted at rc_scope, which rc_node_init() initializes to
44  * the "localhost" scope.  The tree is filled in from the database on-demand
45  * by rc_node_fill_children(), usually from rc_iter_create() since iterators
46  * are the only way to find the children of an entity.
47  *
48  * Each rc_node_t is protected by its rn_lock member.  Operations which can
49  * take too long, however, should serialize on an RC_NODE_WAITING_FLAGS bit in
50  * rn_flags with the rc_node_{hold,rele}_flag() functions.  And since pointers
51  * to rc_node_t's are allowed, rn_refs is a reference count maintained by
52  * rc_node_{hold,rele}().  See configd.h for locking order information.
53  *
54  * When a node (property group or snapshot) is updated, a new node takes the
55  * place of the old node in the global hash, and the old node is hung off of
56  * the rn_former list of the new node.  At the same time, all of its children
57  * have their rn_parent_ref pointer set, and any holds they have are reflected
58  * in the old node's rn_other_refs count.  This is automatically kept up
59  * to date, until the final reference to the subgraph is dropped, at which
60  * point the node is unrefed and destroyed, along with all of its children.
61  *
62  * Locking rules: To dereference an rc_node_t * (usually to lock it), you must
63  * have a hold (rc_node_hold()) on it or otherwise be sure that it hasn't been
64  * rc_node_destroy()ed (hold a lock on its parent or child, hold a flag,
65  * etc.).  Once you have locked an rc_node_t you must check its rn_flags for
66  * RC_NODE_DEAD before you can use it.  This is usually done with the
67  * rc_node_{wait,hold}_flag() functions (often via the rc_node_check_*()
68  * functions & RC_NODE_*() macros), which fail if the object has died.
69  *
70  * An ITER_START for a non-ENTITY_VALUE induces an rc_node_fill_children()
71  * call via rc_node_setup_iter() to populate the rn_children uu_list of the
72  * rc_node_t * in question and a call to uu_list_walk_start() on that list.  For
73  * ITER_READ, rc_iter_next() uses uu_list_walk_next() to find the next
74  * apropriate child.
75  *
76  * An ITER_START for an ENTITY_VALUE makes sure the node has its values
77  * filled, and sets up the iterator.  An ITER_READ_VALUE just copies out
78  * the proper values and updates the offset information.
79  *
80  * When a property group gets changed by a transaction, it sticks around as
81  * a child of its replacement property group, but is removed from the parent.
82  *
83  * To allow aliases, snapshots are implemented with a level of indirection.
84  * A snapshot rc_node_t has a snapid which refers to an rc_snapshot_t in
85  * snapshot.c which contains the authoritative snaplevel information.  The
86  * snapid is "assigned" by rc_attach_snapshot().
87  *
88  * We provide the client layer with rc_node_ptr_t's to reference objects.
89  * Objects referred to by them are automatically held & released by
90  * rc_node_assign() & rc_node_clear().  The RC_NODE_PTR_*() macros are used at
91  * client.c entry points to read the pointers.  They fetch the pointer to the
92  * object, return (from the function) if it is dead, and lock, hold, or hold
93  * a flag of the object.
94  */
95 
96 /*
97  * Permission checking is authorization-based: some operations may only
98  * proceed if the user has been assigned at least one of a set of
99  * authorization strings.  The set of enabling authorizations depends on the
100  * operation and the target object.  The set of authorizations assigned to
101  * a user is determined by reading /etc/security/policy.conf, querying the
102  * user_attr database, and possibly querying the prof_attr database, as per
103  * chkauthattr() in libsecdb.
104  *
105  * The fastest way to decide whether the two sets intersect is by entering the
106  * strings into a hash table and detecting collisions, which takes linear time
107  * in the total size of the sets.  Except for the authorization patterns which
108  * may be assigned to users, which without advanced pattern-matching
109  * algorithms will take O(n) in the number of enabling authorizations, per
110  * pattern.
111  *
112  * We can achieve some practical speed-ups by noting that if we enter all of
113  * the authorizations from one of the sets into the hash table we can merely
114  * check the elements of the second set for existence without adding them.
115  * This reduces memory requirements and hash table clutter.  The enabling set
116  * is well suited for this because it is internal to configd (for now, at
117  * least).  Combine this with short-circuiting and we can even minimize the
118  * number of queries to the security databases (user_attr & prof_attr).
119  *
120  * To force this usage onto clients we provide functions for adding
121  * authorizations to the enabling set of a permission context structure
122  * (perm_add_*()) and one to decide whether the the user associated with the
123  * current door call client possesses any of them (perm_granted()).
124  *
125  * At some point, a generic version of this should move to libsecdb.
126  */
127 
128 /*
129  * Composition is the combination of sets of properties.  The sets are ordered
130  * and properties in higher sets obscure properties of the same name in lower
131  * sets.  Here we present a composed view of an instance's properties as the
132  * union of its properties and its service's properties.  Similarly the
133  * properties of snaplevels are combined to form a composed view of the
134  * properties of a snapshot (which should match the composed view of the
135  * properties of the instance when the snapshot was taken).
136  *
137  * In terms of the client interface, the client may request that a property
138  * group iterator for an instance or snapshot be composed.  Property groups
139  * traversed by such an iterator may not have the target entity as a parent.
140  * Similarly, the properties traversed by a property iterator for those
141  * property groups may not have the property groups iterated as parents.
142  *
143  * Implementation requires that iterators for instances and snapshots be
144  * composition-savvy, and that we have a "composed property group" entity
145  * which represents the composition of a number of property groups.  Iteration
146  * over "composed property groups" yields properties which may have different
147  * parents, but for all other operations a composed property group behaves
148  * like the top-most property group it represents.
149  *
150  * The implementation is based on the rn_cchain[] array of rc_node_t pointers
151  * in rc_node_t.  For instances, the pointers point to the instance and its
152  * parent service.  For snapshots they point to the child snaplevels, and for
153  * composed property groups they point to property groups.  A composed
154  * iterator carries an index into rn_cchain[].  Thus most of the magic ends up
155  * int the rc_iter_*() code.
156  */
157 
158 #include <assert.h>
159 #include <atomic.h>
160 #include <errno.h>
161 #include <libuutil.h>
162 #include <libscf.h>
163 #include <libscf_priv.h>
164 #include <prof_attr.h>
165 #include <pthread.h>
166 #include <stdio.h>
167 #include <stdlib.h>
168 #include <strings.h>
169 #include <sys/types.h>
170 #include <unistd.h>
171 #include <user_attr.h>
172 
173 #include "configd.h"
174 
175 #define	AUTH_PREFIX		"solaris.smf."
176 #define	AUTH_MANAGE		AUTH_PREFIX "manage"
177 #define	AUTH_MODIFY		AUTH_PREFIX "modify"
178 #define	AUTH_MODIFY_PREFIX	AUTH_MODIFY "."
179 #define	AUTH_PG_ACTIONS		SCF_PG_RESTARTER_ACTIONS
180 #define	AUTH_PG_ACTIONS_TYPE	SCF_PG_RESTARTER_ACTIONS_TYPE
181 #define	AUTH_PG_GENERAL		SCF_PG_GENERAL
182 #define	AUTH_PG_GENERAL_TYPE	SCF_PG_GENERAL_TYPE
183 #define	AUTH_PG_GENERAL_OVR	SCF_PG_GENERAL_OVR
184 #define	AUTH_PG_GENERAL_OVR_TYPE  SCF_PG_GENERAL_OVR_TYPE
185 #define	AUTH_PROP_ACTION	"action_authorization"
186 #define	AUTH_PROP_ENABLED	"enabled"
187 #define	AUTH_PROP_MODIFY	"modify_authorization"
188 #define	AUTH_PROP_VALUE		"value_authorization"
189 /* libsecdb should take care of this. */
190 #define	RBAC_AUTH_SEP		","
191 
192 #define	MAX_VALID_CHILDREN 3
193 
194 typedef struct rc_type_info {
195 	uint32_t	rt_type;		/* matches array index */
196 	uint32_t	rt_num_ids;
197 	uint32_t	rt_name_flags;
198 	uint32_t	rt_valid_children[MAX_VALID_CHILDREN];
199 } rc_type_info_t;
200 
201 #define	RT_NO_NAME	-1U
202 
203 static rc_type_info_t rc_types[] = {
204 	{REP_PROTOCOL_ENTITY_NONE, 0, RT_NO_NAME},
205 	{REP_PROTOCOL_ENTITY_SCOPE, 0, 0,
206 	    {REP_PROTOCOL_ENTITY_SERVICE, REP_PROTOCOL_ENTITY_SCOPE}},
207 	{REP_PROTOCOL_ENTITY_SERVICE, 0, UU_NAME_DOMAIN | UU_NAME_PATH,
208 	    {REP_PROTOCOL_ENTITY_INSTANCE, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
209 	{REP_PROTOCOL_ENTITY_INSTANCE, 1, UU_NAME_DOMAIN,
210 	    {REP_PROTOCOL_ENTITY_SNAPSHOT, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
211 	{REP_PROTOCOL_ENTITY_SNAPSHOT, 2, UU_NAME_DOMAIN,
212 	    {REP_PROTOCOL_ENTITY_SNAPLEVEL, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
213 	{REP_PROTOCOL_ENTITY_SNAPLEVEL, 4, RT_NO_NAME,
214 	    {REP_PROTOCOL_ENTITY_PROPERTYGRP}},
215 	{REP_PROTOCOL_ENTITY_PROPERTYGRP, 5, UU_NAME_DOMAIN,
216 	    {REP_PROTOCOL_ENTITY_PROPERTY}},
217 	{REP_PROTOCOL_ENTITY_CPROPERTYGRP, 0, UU_NAME_DOMAIN,
218 	    {REP_PROTOCOL_ENTITY_PROPERTY}},
219 	{REP_PROTOCOL_ENTITY_PROPERTY, 7, UU_NAME_DOMAIN},
220 	{-1UL}
221 };
222 #define	NUM_TYPES	((sizeof (rc_types) / sizeof (*rc_types)))
223 
224 /* Element of a permcheck_t hash table. */
225 struct pc_elt {
226 	struct pc_elt	*pce_next;
227 	char		pce_auth[1];
228 };
229 
230 /* An authorization set hash table. */
231 typedef struct {
232 	struct pc_elt	**pc_buckets;
233 	uint_t		pc_bnum;		/* number of buckets */
234 	uint_t		pc_enum;		/* number of elements */
235 } permcheck_t;
236 
237 static uu_list_pool_t *rc_children_pool;
238 static uu_list_pool_t *rc_pg_notify_pool;
239 static uu_list_pool_t *rc_notify_pool;
240 static uu_list_pool_t *rc_notify_info_pool;
241 
242 static rc_node_t *rc_scope;
243 
244 static pthread_mutex_t	rc_pg_notify_lock = PTHREAD_MUTEX_INITIALIZER;
245 static pthread_cond_t	rc_pg_notify_cv = PTHREAD_COND_INITIALIZER;
246 static uint_t		rc_notify_in_use;	/* blocks removals */
247 
248 static pthread_mutex_t	perm_lock = PTHREAD_MUTEX_INITIALIZER;
249 
250 static void rc_node_unrefed(rc_node_t *np);
251 
252 /*
253  * We support an arbitrary number of clients interested in events for certain
254  * types of changes.  Each client is represented by an rc_notify_info_t, and
255  * all clients are chained onto the rc_notify_info_list.
256  *
257  * The rc_notify_list is the global notification list.  Each entry is of
258  * type rc_notify_t, which is embedded in one of three other structures:
259  *
260  *	rc_node_t		property group update notification
261  *	rc_notify_delete_t	object deletion notification
262  *	rc_notify_info_t	notification clients
263  *
264  * Which type of object is determined by which pointer in the rc_notify_t is
265  * non-NULL.
266  *
267  * New notifications and clients are added to the end of the list.
268  * Notifications no-one is interested in are never added to the list.
269  *
270  * Clients use their position in the list to track which notifications they
271  * have not yet reported.  As they process notifications, they move forward
272  * in the list past them.  There is always a client at the beginning of the
273  * list -- as he moves past notifications, he removes them from the list and
274  * cleans them up.
275  *
276  * The rc_pg_notify_lock protects all notification state.  The rc_pg_notify_cv
277  * is used for global signalling, and each client has a cv which he waits for
278  * events of interest on.
279  */
280 static uu_list_t	*rc_notify_info_list;
281 static uu_list_t	*rc_notify_list;
282 
283 #define	HASH_SIZE	512
284 #define	HASH_MASK	(HASH_SIZE - 1)
285 
286 #pragma align 64(cache_hash)
287 static cache_bucket_t cache_hash[HASH_SIZE];
288 
289 #define	CACHE_BUCKET(h)		(&cache_hash[(h) & HASH_MASK])
290 
291 static uint32_t
292 rc_node_hash(rc_node_lookup_t *lp)
293 {
294 	uint32_t type = lp->rl_type;
295 	uint32_t backend = lp->rl_backend;
296 	uint32_t mainid = lp->rl_main_id;
297 	uint32_t *ids = lp->rl_ids;
298 
299 	rc_type_info_t *tp = &rc_types[type];
300 	uint32_t num_ids;
301 	uint32_t left;
302 	uint32_t hash;
303 
304 	assert(backend == BACKEND_TYPE_NORMAL ||
305 	    backend == BACKEND_TYPE_NONPERSIST);
306 
307 	assert(type > 0 && type < NUM_TYPES);
308 	num_ids = tp->rt_num_ids;
309 
310 	left = MAX_IDS - num_ids;
311 	assert(num_ids <= MAX_IDS);
312 
313 	hash = type * 7 + mainid * 5 + backend;
314 
315 	while (num_ids-- > 0)
316 		hash = hash * 11 + *ids++ * 7;
317 
318 	/*
319 	 * the rest should be zeroed
320 	 */
321 	while (left-- > 0)
322 		assert(*ids++ == 0);
323 
324 	return (hash);
325 }
326 
327 static int
328 rc_node_match(rc_node_t *np, rc_node_lookup_t *l)
329 {
330 	rc_node_lookup_t *r = &np->rn_id;
331 	rc_type_info_t *tp;
332 	uint32_t type;
333 	uint32_t num_ids;
334 
335 	if (r->rl_main_id != l->rl_main_id)
336 		return (0);
337 
338 	type = r->rl_type;
339 	if (type != l->rl_type)
340 		return (0);
341 
342 	assert(type > 0 && type < NUM_TYPES);
343 
344 	tp = &rc_types[r->rl_type];
345 	num_ids = tp->rt_num_ids;
346 
347 	assert(num_ids <= MAX_IDS);
348 	while (num_ids-- > 0)
349 		if (r->rl_ids[num_ids] != l->rl_ids[num_ids])
350 			return (0);
351 
352 	return (1);
353 }
354 
355 /*
356  * the "other" references on a node are maintained in an atomically
357  * updated refcount, rn_other_refs.  This can be bumped from arbitrary
358  * context, and tracks references to a possibly out-of-date node's children.
359  *
360  * To prevent the node from disappearing between the final drop of
361  * rn_other_refs and the unref handling, rn_other_refs_held is bumped on
362  * 0->1 transitions and decremented (with the node lock held) on 1->0
363  * transitions.
364  */
365 static void
366 rc_node_hold_other(rc_node_t *np)
367 {
368 	if (atomic_add_32_nv(&np->rn_other_refs, 1) == 1) {
369 		atomic_add_32(&np->rn_other_refs_held, 1);
370 		assert(np->rn_other_refs_held > 0);
371 	}
372 	assert(np->rn_other_refs > 0);
373 }
374 
375 /*
376  * No node locks may be held
377  */
378 static void
379 rc_node_rele_other(rc_node_t *np)
380 {
381 	assert(np->rn_other_refs > 0);
382 	if (atomic_add_32_nv(&np->rn_other_refs, -1) == 0) {
383 		(void) pthread_mutex_lock(&np->rn_lock);
384 		assert(np->rn_other_refs_held > 0);
385 		if (atomic_add_32_nv(&np->rn_other_refs_held, -1) == 0 &&
386 		    np->rn_refs == 0 && (np->rn_flags & RC_NODE_OLD))
387 			rc_node_unrefed(np);
388 		else
389 			(void) pthread_mutex_unlock(&np->rn_lock);
390 	}
391 }
392 
393 static void
394 rc_node_hold_locked(rc_node_t *np)
395 {
396 	assert(MUTEX_HELD(&np->rn_lock));
397 
398 	if (np->rn_refs == 0 && (np->rn_flags & RC_NODE_PARENT_REF))
399 		rc_node_hold_other(np->rn_parent_ref);
400 	np->rn_refs++;
401 	assert(np->rn_refs > 0);
402 }
403 
404 static void
405 rc_node_hold(rc_node_t *np)
406 {
407 	(void) pthread_mutex_lock(&np->rn_lock);
408 	rc_node_hold_locked(np);
409 	(void) pthread_mutex_unlock(&np->rn_lock);
410 }
411 
412 static void
413 rc_node_rele_locked(rc_node_t *np)
414 {
415 	int unref = 0;
416 	rc_node_t *par_ref = NULL;
417 
418 	assert(MUTEX_HELD(&np->rn_lock));
419 	assert(np->rn_refs > 0);
420 
421 	if (--np->rn_refs == 0) {
422 		if (np->rn_flags & RC_NODE_PARENT_REF)
423 			par_ref = np->rn_parent_ref;
424 
425 		/*
426 		 * Composed property groups are only as good as their
427 		 * references.
428 		 */
429 		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
430 			np->rn_flags |= RC_NODE_DEAD;
431 
432 		if ((np->rn_flags & (RC_NODE_DEAD|RC_NODE_OLD)) &&
433 		    np->rn_other_refs == 0 && np->rn_other_refs_held == 0)
434 			unref = 1;
435 	}
436 
437 	if (unref)
438 		rc_node_unrefed(np);
439 	else
440 		(void) pthread_mutex_unlock(&np->rn_lock);
441 
442 	if (par_ref != NULL)
443 		rc_node_rele_other(par_ref);
444 }
445 
446 void
447 rc_node_rele(rc_node_t *np)
448 {
449 	(void) pthread_mutex_lock(&np->rn_lock);
450 	rc_node_rele_locked(np);
451 }
452 
453 static cache_bucket_t *
454 cache_hold(uint32_t h)
455 {
456 	cache_bucket_t *bp = CACHE_BUCKET(h);
457 	(void) pthread_mutex_lock(&bp->cb_lock);
458 	return (bp);
459 }
460 
461 static void
462 cache_release(cache_bucket_t *bp)
463 {
464 	(void) pthread_mutex_unlock(&bp->cb_lock);
465 }
466 
467 static rc_node_t *
468 cache_lookup_unlocked(cache_bucket_t *bp, rc_node_lookup_t *lp)
469 {
470 	uint32_t h = rc_node_hash(lp);
471 	rc_node_t *np;
472 
473 	assert(MUTEX_HELD(&bp->cb_lock));
474 	assert(bp == CACHE_BUCKET(h));
475 
476 	for (np = bp->cb_head; np != NULL; np = np->rn_hash_next) {
477 		if (np->rn_hash == h && rc_node_match(np, lp)) {
478 			rc_node_hold(np);
479 			return (np);
480 		}
481 	}
482 
483 	return (NULL);
484 }
485 
486 static rc_node_t *
487 cache_lookup(rc_node_lookup_t *lp)
488 {
489 	uint32_t h;
490 	cache_bucket_t *bp;
491 	rc_node_t *np;
492 
493 	h = rc_node_hash(lp);
494 	bp = cache_hold(h);
495 
496 	np = cache_lookup_unlocked(bp, lp);
497 
498 	cache_release(bp);
499 
500 	return (np);
501 }
502 
503 static void
504 cache_insert_unlocked(cache_bucket_t *bp, rc_node_t *np)
505 {
506 	assert(MUTEX_HELD(&bp->cb_lock));
507 	assert(np->rn_hash == rc_node_hash(&np->rn_id));
508 	assert(bp == CACHE_BUCKET(np->rn_hash));
509 
510 	assert(np->rn_hash_next == NULL);
511 
512 	np->rn_hash_next = bp->cb_head;
513 	bp->cb_head = np;
514 }
515 
516 static void
517 cache_remove_unlocked(cache_bucket_t *bp, rc_node_t *np)
518 {
519 	rc_node_t **npp;
520 
521 	assert(MUTEX_HELD(&bp->cb_lock));
522 	assert(np->rn_hash == rc_node_hash(&np->rn_id));
523 	assert(bp == CACHE_BUCKET(np->rn_hash));
524 
525 	for (npp = &bp->cb_head; *npp != NULL; npp = &(*npp)->rn_hash_next)
526 		if (*npp == np)
527 			break;
528 
529 	assert(*npp == np);
530 	*npp = np->rn_hash_next;
531 	np->rn_hash_next = NULL;
532 }
533 
534 /*
535  * verify that the 'parent' type can have a child typed 'child'
536  * Fails with
537  *   _INVALID_TYPE - argument is invalid
538  *   _TYPE_MISMATCH - parent type cannot have children of type child
539  */
540 static int
541 rc_check_parent_child(uint32_t parent, uint32_t child)
542 {
543 	int idx;
544 	uint32_t type;
545 
546 	if (parent == 0 || parent >= NUM_TYPES ||
547 	    child == 0 || child >= NUM_TYPES)
548 		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
549 
550 	for (idx = 0; idx < MAX_VALID_CHILDREN; idx++) {
551 		type = rc_types[parent].rt_valid_children[idx];
552 		if (type == child)
553 			return (REP_PROTOCOL_SUCCESS);
554 	}
555 
556 	return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
557 }
558 
559 /*
560  * Fails with
561  *   _INVALID_TYPE - type is invalid
562  *   _BAD_REQUEST - name is an invalid name for a node of type type
563  */
564 int
565 rc_check_type_name(uint32_t type, const char *name)
566 {
567 	if (type == 0 || type >= NUM_TYPES)
568 		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
569 
570 	if (uu_check_name(name, rc_types[type].rt_name_flags) == -1)
571 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
572 
573 	return (REP_PROTOCOL_SUCCESS);
574 }
575 
576 static int
577 rc_check_pgtype_name(const char *name)
578 {
579 	if (uu_check_name(name, UU_NAME_DOMAIN) == -1)
580 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
581 
582 	return (REP_PROTOCOL_SUCCESS);
583 }
584 
585 static int
586 rc_notify_info_interested(rc_notify_info_t *rnip, rc_notify_t *np)
587 {
588 	rc_node_t *nnp = np->rcn_node;
589 	int i;
590 
591 	assert(MUTEX_HELD(&rc_pg_notify_lock));
592 
593 	if (np->rcn_delete != NULL) {
594 		assert(np->rcn_info == NULL && np->rcn_node == NULL);
595 		return (1);		/* everyone likes deletes */
596 	}
597 	if (np->rcn_node == NULL) {
598 		assert(np->rcn_info != NULL || np->rcn_delete != NULL);
599 		return (0);
600 	}
601 	assert(np->rcn_info == NULL);
602 
603 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
604 		if (rnip->rni_namelist[i] != NULL) {
605 			if (strcmp(nnp->rn_name, rnip->rni_namelist[i]) == 0)
606 				return (1);
607 		}
608 		if (rnip->rni_typelist[i] != NULL) {
609 			if (strcmp(nnp->rn_type, rnip->rni_typelist[i]) == 0)
610 				return (1);
611 		}
612 	}
613 	return (0);
614 }
615 
616 static void
617 rc_notify_insert_node(rc_node_t *nnp)
618 {
619 	rc_notify_t *np = &nnp->rn_notify;
620 	rc_notify_info_t *nip;
621 	int found = 0;
622 
623 	assert(np->rcn_info == NULL);
624 
625 	if (nnp->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
626 		return;
627 
628 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
629 	np->rcn_node = nnp;
630 	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
631 	    nip = uu_list_next(rc_notify_info_list, nip)) {
632 		if (rc_notify_info_interested(nip, np)) {
633 			(void) pthread_cond_broadcast(&nip->rni_cv);
634 			found++;
635 		}
636 	}
637 	if (found)
638 		(void) uu_list_insert_before(rc_notify_list, NULL, np);
639 	else
640 		np->rcn_node = NULL;
641 
642 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
643 }
644 
645 static void
646 rc_notify_deletion(rc_notify_delete_t *ndp, const char *service,
647     const char *instance, const char *pg)
648 {
649 	rc_notify_info_t *nip;
650 
651 	uu_list_node_init(&ndp->rnd_notify, &ndp->rnd_notify.rcn_list_node,
652 	    rc_notify_pool);
653 	ndp->rnd_notify.rcn_delete = ndp;
654 
655 	(void) snprintf(ndp->rnd_fmri, sizeof (ndp->rnd_fmri),
656 	    "svc:/%s%s%s%s%s", service,
657 	    (instance != NULL)? ":" : "", (instance != NULL)? instance : "",
658 	    (pg != NULL)? "/:properties/" : "", (pg != NULL)? pg : "");
659 
660 	/*
661 	 * add to notification list, notify watchers
662 	 */
663 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
664 	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
665 	    nip = uu_list_next(rc_notify_info_list, nip))
666 		(void) pthread_cond_broadcast(&nip->rni_cv);
667 	(void) uu_list_insert_before(rc_notify_list, NULL, ndp);
668 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
669 }
670 
671 static void
672 rc_notify_remove_node(rc_node_t *nnp)
673 {
674 	rc_notify_t *np = &nnp->rn_notify;
675 
676 	assert(np->rcn_info == NULL);
677 	assert(!MUTEX_HELD(&nnp->rn_lock));
678 
679 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
680 	while (np->rcn_node != NULL) {
681 		if (rc_notify_in_use) {
682 			(void) pthread_cond_wait(&rc_pg_notify_cv,
683 			    &rc_pg_notify_lock);
684 			continue;
685 		}
686 		(void) uu_list_remove(rc_notify_list, np);
687 		np->rcn_node = NULL;
688 		break;
689 	}
690 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
691 }
692 
693 static void
694 rc_notify_remove_locked(rc_notify_t *np)
695 {
696 	assert(MUTEX_HELD(&rc_pg_notify_lock));
697 	assert(rc_notify_in_use == 0);
698 
699 	(void) uu_list_remove(rc_notify_list, np);
700 	if (np->rcn_node) {
701 		np->rcn_node = NULL;
702 	} else if (np->rcn_delete) {
703 		uu_free(np->rcn_delete);
704 	} else {
705 		assert(0);	/* CAN'T HAPPEN */
706 	}
707 }
708 
709 /*
710  * Permission checking functions.  See comment atop this file.
711  */
712 #ifndef NATIVE_BUILD
713 static permcheck_t *
714 pc_create()
715 {
716 	permcheck_t *p;
717 
718 	p = uu_zalloc(sizeof (*p));
719 	if (p == NULL)
720 		return (NULL);
721 	p->pc_bnum = 8;			/* Normal case will only have 2 elts. */
722 	p->pc_buckets = uu_zalloc(sizeof (*p->pc_buckets) * p->pc_bnum);
723 	if (p->pc_buckets == NULL) {
724 		uu_free(p);
725 		return (NULL);
726 	}
727 
728 	p->pc_enum = 0;
729 	return (p);
730 }
731 
732 static void
733 pc_free(permcheck_t *pcp)
734 {
735 	uint_t i;
736 	struct pc_elt *ep, *next;
737 
738 	for (i = 0; i < pcp->pc_bnum; ++i) {
739 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
740 			next = ep->pce_next;
741 			free(ep);
742 		}
743 	}
744 
745 	free(pcp->pc_buckets);
746 	free(pcp);
747 }
748 
749 static uint32_t
750 pc_hash(const char *auth)
751 {
752 	uint32_t h = 0, g;
753 	const char *p;
754 
755 	/*
756 	 * Generic hash function from uts/common/os/modhash.c.
757 	 */
758 	for (p = auth; *p != '\0'; ++p) {
759 		h = (h << 4) + *p;
760 		g = (h & 0xf0000000);
761 		if (g != 0) {
762 			h ^= (g >> 24);
763 			h ^= g;
764 		}
765 	}
766 
767 	return (h);
768 }
769 
770 static int
771 pc_exists(const permcheck_t *pcp, const char *auth)
772 {
773 	uint32_t h;
774 	struct pc_elt *ep;
775 
776 	h = pc_hash(auth);
777 	for (ep = pcp->pc_buckets[h & (pcp->pc_bnum - 1)];
778 	    ep != NULL;
779 	    ep = ep->pce_next) {
780 		if (strcmp(auth, ep->pce_auth) == 0)
781 			return (1);
782 	}
783 
784 	return (0);
785 }
786 
787 static int
788 pc_match(const permcheck_t *pcp, const char *pattern)
789 {
790 	uint_t i;
791 	struct pc_elt *ep;
792 
793 	for (i = 0; i < pcp->pc_bnum; ++i) {
794 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = ep->pce_next) {
795 			if (_auth_match(pattern, ep->pce_auth))
796 				return (1);
797 		}
798 	}
799 
800 	return (0);
801 }
802 
803 static int
804 pc_grow(permcheck_t *pcp)
805 {
806 	uint_t new_bnum, i, j;
807 	struct pc_elt **new_buckets;
808 	struct pc_elt *ep, *next;
809 
810 	new_bnum = pcp->pc_bnum * 2;
811 	if (new_bnum < pcp->pc_bnum)
812 		/* Homey don't play that. */
813 		return (-1);
814 
815 	new_buckets = uu_zalloc(sizeof (*new_buckets) * new_bnum);
816 	if (new_buckets == NULL)
817 		return (-1);
818 
819 	for (i = 0; i < pcp->pc_bnum; ++i) {
820 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
821 			next = ep->pce_next;
822 			j = pc_hash(ep->pce_auth) & (new_bnum - 1);
823 			ep->pce_next = new_buckets[j];
824 			new_buckets[j] = ep;
825 		}
826 	}
827 
828 	uu_free(pcp->pc_buckets);
829 	pcp->pc_buckets = new_buckets;
830 	pcp->pc_bnum = new_bnum;
831 
832 	return (0);
833 }
834 
835 static int
836 pc_add(permcheck_t *pcp, const char *auth)
837 {
838 	struct pc_elt *ep;
839 	uint_t i;
840 
841 	ep = uu_zalloc(offsetof(struct pc_elt, pce_auth) + strlen(auth) + 1);
842 	if (ep == NULL)
843 		return (-1);
844 
845 	/* Grow if pc_enum / pc_bnum > 3/4. */
846 	if (pcp->pc_enum * 4 > 3 * pcp->pc_bnum)
847 		/* Failure is not a stopper; we'll try again next time. */
848 		(void) pc_grow(pcp);
849 
850 	(void) strcpy(ep->pce_auth, auth);
851 
852 	i = pc_hash(auth) & (pcp->pc_bnum - 1);
853 	ep->pce_next = pcp->pc_buckets[i];
854 	pcp->pc_buckets[i] = ep;
855 
856 	++pcp->pc_enum;
857 
858 	return (0);
859 }
860 
861 /*
862  * For the type of a property group, return the authorization which may be
863  * used to modify it.
864  */
865 static const char *
866 perm_auth_for_pgtype(const char *pgtype)
867 {
868 	if (strcmp(pgtype, SCF_GROUP_METHOD) == 0)
869 		return (AUTH_MODIFY_PREFIX "method");
870 	else if (strcmp(pgtype, SCF_GROUP_DEPENDENCY) == 0)
871 		return (AUTH_MODIFY_PREFIX "dependency");
872 	else if (strcmp(pgtype, SCF_GROUP_APPLICATION) == 0)
873 		return (AUTH_MODIFY_PREFIX "application");
874 	else if (strcmp(pgtype, SCF_GROUP_FRAMEWORK) == 0)
875 		return (AUTH_MODIFY_PREFIX "framework");
876 	else
877 		return (NULL);
878 }
879 
880 /*
881  * Fails with
882  *   _NO_RESOURCES - out of memory
883  */
884 static int
885 perm_add_enabling(permcheck_t *pcp, const char *auth)
886 {
887 	return (pc_add(pcp, auth) == 0 ? REP_PROTOCOL_SUCCESS :
888 	    REP_PROTOCOL_FAIL_NO_RESOURCES);
889 }
890 
891 /* Note that perm_add_enabling_values() is defined below. */
892 
893 /*
894  * perm_granted() returns 1 if the current door caller has one of the enabling
895  * authorizations in pcp, 0 if it doesn't, and -1 if an error (usually lack of
896  * memory) occurs.  check_auth_list() checks an RBAC_AUTH_SEP-separated list
897  * of authorizations for existance in pcp, and check_prof_list() checks the
898  * authorizations granted to an RBAC_AUTH_SEP-separated list of profiles.
899  */
900 static int
901 check_auth_list(const permcheck_t *pcp, char *authlist)
902 {
903 	char *auth, *lasts;
904 	int ret;
905 
906 	for (auth = (char *)strtok_r(authlist, RBAC_AUTH_SEP, &lasts);
907 	    auth != NULL;
908 	    auth = (char *)strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
909 		if (strchr(auth, KV_WILDCHAR) == NULL)
910 			ret = pc_exists(pcp, auth);
911 		else
912 			ret = pc_match(pcp, auth);
913 
914 		if (ret)
915 			return (ret);
916 	}
917 
918 	return (0);
919 }
920 
921 static int
922 check_prof_list(const permcheck_t *pcp, char *proflist)
923 {
924 	char *prof, *lasts, *authlist, *subproflist;
925 	profattr_t *pap;
926 	int ret = 0;
927 
928 	for (prof = strtok_r(proflist, RBAC_AUTH_SEP, &lasts);
929 	    prof != NULL;
930 	    prof = strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
931 		pap = getprofnam(prof);
932 		if (pap == NULL)
933 			continue;
934 
935 		authlist = kva_match(pap->attr, PROFATTR_AUTHS_KW);
936 		if (authlist != NULL)
937 			ret = check_auth_list(pcp, authlist);
938 
939 		if (!ret) {
940 			subproflist = kva_match(pap->attr, PROFATTR_PROFS_KW);
941 			if (subproflist != NULL)
942 				/* depth check to avoid invinite recursion? */
943 				ret = check_prof_list(pcp, subproflist);
944 		}
945 
946 		free_profattr(pap);
947 		if (ret)
948 			return (ret);
949 	}
950 
951 	return (ret);
952 }
953 
954 static int
955 perm_granted(const permcheck_t *pcp)
956 {
957 	ucred_t *uc;
958 
959 	int ret = 0;
960 	uid_t uid;
961 	userattr_t *uap;
962 	char *authlist, *userattr_authlist, *proflist, *def_prof = NULL;
963 
964 	/*
965 	 * Get generic authorizations from policy.conf
966 	 *
967 	 * Note that _get_auth_policy is not threadsafe, so we single-thread
968 	 * access to it.
969 	 */
970 	(void) pthread_mutex_lock(&perm_lock);
971 	ret = _get_auth_policy(&authlist, &def_prof);
972 	(void) pthread_mutex_unlock(&perm_lock);
973 
974 	if (ret != 0)
975 		return (-1);
976 
977 	if (authlist != NULL) {
978 		ret = check_auth_list(pcp, authlist);
979 
980 		if (ret) {
981 			_free_auth_policy(authlist, def_prof);
982 			return (ret);
983 		}
984 	}
985 
986 	/*
987 	 * Put off checking def_prof for later in an attempt to consolidate
988 	 * prof_attr accesses.
989 	 */
990 
991 	/* Get the uid */
992 	if ((uc = get_ucred()) == NULL) {
993 		_free_auth_policy(authlist, def_prof);
994 
995 		if (errno == EINVAL) {
996 			/*
997 			 * Client is no longer waiting for our response (e.g.,
998 			 * it received a signal & resumed with EINTR).
999 			 * Punting with door_return() would be nice but we
1000 			 * need to release all of the locks & references we
1001 			 * hold.  And we must report failure to the client
1002 			 * layer to keep it from ignoring retries as
1003 			 * already-done (idempotency & all that).  None of the
1004 			 * error codes fit very well, so we might as well
1005 			 * force the return of _PERMISSION_DENIED since we
1006 			 * couldn't determine the user.
1007 			 */
1008 			return (0);
1009 		}
1010 		assert(0);
1011 		abort();
1012 	}
1013 
1014 	uid = ucred_geteuid(uc);
1015 	assert(uid != (uid_t)-1);
1016 
1017 	uap = getuseruid(uid);
1018 	if (uap != NULL) {
1019 		/* Get the authorizations from user_attr. */
1020 		userattr_authlist = kva_match(uap->attr, USERATTR_AUTHS_KW);
1021 		if (userattr_authlist != NULL)
1022 			ret = check_auth_list(pcp, userattr_authlist);
1023 	}
1024 
1025 	if (!ret && def_prof != NULL) {
1026 		/* Check generic profiles. */
1027 		ret = check_prof_list(pcp, def_prof);
1028 	}
1029 
1030 	if (!ret && uap != NULL) {
1031 		proflist = kva_match(uap->attr, USERATTR_PROFILES_KW);
1032 		if (proflist != NULL)
1033 			ret = check_prof_list(pcp, proflist);
1034 	}
1035 
1036 	_free_auth_policy(authlist, def_prof);
1037 	if (uap != NULL)
1038 		free_userattr(uap);
1039 
1040 	return (ret);
1041 }
1042 #endif /* NATIVE_BUILD */
1043 
1044 /*
1045  * flags in RC_NODE_WAITING_FLAGS are broadcast when unset, and are used to
1046  * serialize certain actions, and to wait for certain operations to complete
1047  *
1048  * The waiting flags are:
1049  *	RC_NODE_CHILDREN_CHANGING
1050  *		The child list is being built or changed (due to creation
1051  *		or deletion).  All iterators pause.
1052  *
1053  *	RC_NODE_USING_PARENT
1054  *		Someone is actively using the parent pointer, so we can't
1055  *		be removed from the parent list.
1056  *
1057  *	RC_NODE_CREATING_CHILD
1058  *		A child is being created -- locks out other creations, to
1059  *		prevent insert-insert races.
1060  *
1061  *	RC_NODE_IN_TX
1062  *		This object is running a transaction.
1063  *
1064  *	RC_NODE_DYING
1065  *		This node might be dying.  Always set as a set, using
1066  *		RC_NODE_DYING_FLAGS (which is everything but
1067  *		RC_NODE_USING_PARENT)
1068  */
1069 static int
1070 rc_node_hold_flag(rc_node_t *np, uint32_t flag)
1071 {
1072 	assert(MUTEX_HELD(&np->rn_lock));
1073 	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1074 
1075 	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag)) {
1076 		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1077 	}
1078 	if (np->rn_flags & RC_NODE_DEAD)
1079 		return (0);
1080 
1081 	np->rn_flags |= flag;
1082 	return (1);
1083 }
1084 
1085 static void
1086 rc_node_rele_flag(rc_node_t *np, uint32_t flag)
1087 {
1088 	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1089 	assert(MUTEX_HELD(&np->rn_lock));
1090 	assert((np->rn_flags & flag) == flag);
1091 	np->rn_flags &= ~flag;
1092 	(void) pthread_cond_broadcast(&np->rn_cv);
1093 }
1094 
1095 /*
1096  * wait until a particular flag has cleared.  Fails if the object dies.
1097  */
1098 static int
1099 rc_node_wait_flag(rc_node_t *np, uint32_t flag)
1100 {
1101 	assert(MUTEX_HELD(&np->rn_lock));
1102 	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag))
1103 		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1104 
1105 	return (!(np->rn_flags & RC_NODE_DEAD));
1106 }
1107 
1108 /*
1109  * On entry, np's lock must be held, and this thread must be holding
1110  * RC_NODE_USING_PARENT.  On return, both of them are released.
1111  *
1112  * If the return value is NULL, np either does not have a parent, or
1113  * the parent has been marked DEAD.
1114  *
1115  * If the return value is non-NULL, it is the parent of np, and both
1116  * its lock and the requested flags are held.
1117  */
1118 static rc_node_t *
1119 rc_node_hold_parent_flag(rc_node_t *np, uint32_t flag)
1120 {
1121 	rc_node_t *pp;
1122 
1123 	assert(MUTEX_HELD(&np->rn_lock));
1124 	assert(np->rn_flags & RC_NODE_USING_PARENT);
1125 
1126 	if ((pp = np->rn_parent) == NULL) {
1127 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1128 		(void) pthread_mutex_unlock(&np->rn_lock);
1129 		return (NULL);
1130 	}
1131 	(void) pthread_mutex_unlock(&np->rn_lock);
1132 
1133 	(void) pthread_mutex_lock(&pp->rn_lock);
1134 	(void) pthread_mutex_lock(&np->rn_lock);
1135 	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1136 	(void) pthread_mutex_unlock(&np->rn_lock);
1137 
1138 	if (!rc_node_hold_flag(pp, flag)) {
1139 		(void) pthread_mutex_unlock(&pp->rn_lock);
1140 		return (NULL);
1141 	}
1142 	return (pp);
1143 }
1144 
1145 rc_node_t *
1146 rc_node_alloc(void)
1147 {
1148 	rc_node_t *np = uu_zalloc(sizeof (*np));
1149 
1150 	if (np == NULL)
1151 		return (NULL);
1152 
1153 	(void) pthread_mutex_init(&np->rn_lock, NULL);
1154 	(void) pthread_cond_init(&np->rn_cv, NULL);
1155 
1156 	np->rn_children = uu_list_create(rc_children_pool, np, 0);
1157 	np->rn_pg_notify_list = uu_list_create(rc_pg_notify_pool, np, 0);
1158 
1159 	uu_list_node_init(np, &np->rn_sibling_node, rc_children_pool);
1160 
1161 	uu_list_node_init(&np->rn_notify, &np->rn_notify.rcn_list_node,
1162 	    rc_notify_pool);
1163 
1164 	return (np);
1165 }
1166 
1167 void
1168 rc_node_destroy(rc_node_t *np)
1169 {
1170 	int i;
1171 
1172 	if (np->rn_flags & RC_NODE_UNREFED)
1173 		return;				/* being handled elsewhere */
1174 
1175 	assert(np->rn_refs == 0 && np->rn_other_refs == 0);
1176 	assert(np->rn_former == NULL);
1177 
1178 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
1179 		/* Release the holds from rc_iter_next(). */
1180 		for (i = 0; i < COMPOSITION_DEPTH; ++i) {
1181 			/* rn_cchain[i] may be NULL for empty snapshots. */
1182 			if (np->rn_cchain[i] != NULL)
1183 				rc_node_rele(np->rn_cchain[i]);
1184 		}
1185 	}
1186 
1187 	if (np->rn_name != NULL)
1188 		free((void *)np->rn_name);
1189 	np->rn_name = NULL;
1190 	if (np->rn_type != NULL)
1191 		free((void *)np->rn_type);
1192 	np->rn_type = NULL;
1193 	if (np->rn_values != NULL)
1194 		object_free_values(np->rn_values, np->rn_valtype,
1195 		    np->rn_values_count, np->rn_values_size);
1196 	np->rn_values = NULL;
1197 
1198 	if (np->rn_snaplevel != NULL)
1199 		rc_snaplevel_rele(np->rn_snaplevel);
1200 	np->rn_snaplevel = NULL;
1201 
1202 	uu_list_node_fini(np, &np->rn_sibling_node, rc_children_pool);
1203 
1204 	uu_list_node_fini(&np->rn_notify, &np->rn_notify.rcn_list_node,
1205 	    rc_notify_pool);
1206 
1207 	assert(uu_list_first(np->rn_children) == NULL);
1208 	uu_list_destroy(np->rn_children);
1209 	uu_list_destroy(np->rn_pg_notify_list);
1210 
1211 	(void) pthread_mutex_destroy(&np->rn_lock);
1212 	(void) pthread_cond_destroy(&np->rn_cv);
1213 
1214 	uu_free(np);
1215 }
1216 
1217 /*
1218  * Link in a child node.
1219  *
1220  * Because of the lock ordering, cp has to already be in the hash table with
1221  * its lock dropped before we get it.  To prevent anyone from noticing that
1222  * it is parentless, the creation code sets the RC_NODE_USING_PARENT.  Once
1223  * we've linked it in, we release the flag.
1224  */
1225 static void
1226 rc_node_link_child(rc_node_t *np, rc_node_t *cp)
1227 {
1228 	assert(!MUTEX_HELD(&np->rn_lock));
1229 	assert(!MUTEX_HELD(&cp->rn_lock));
1230 
1231 	(void) pthread_mutex_lock(&np->rn_lock);
1232 	(void) pthread_mutex_lock(&cp->rn_lock);
1233 	assert(!(cp->rn_flags & RC_NODE_IN_PARENT) &&
1234 	    (cp->rn_flags & RC_NODE_USING_PARENT));
1235 
1236 	assert(rc_check_parent_child(np->rn_id.rl_type, cp->rn_id.rl_type) ==
1237 	    REP_PROTOCOL_SUCCESS);
1238 
1239 	cp->rn_parent = np;
1240 	cp->rn_flags |= RC_NODE_IN_PARENT;
1241 	(void) uu_list_insert_before(np->rn_children, NULL, cp);
1242 
1243 	(void) pthread_mutex_unlock(&np->rn_lock);
1244 
1245 	rc_node_rele_flag(cp, RC_NODE_USING_PARENT);
1246 	(void) pthread_mutex_unlock(&cp->rn_lock);
1247 }
1248 
1249 /*
1250  * Sets the rn_parent_ref field of all the children of np to pp -- always
1251  * initially invoked as rc_node_setup_parent_ref(np, np), we then recurse.
1252  *
1253  * This is used when we mark a node RC_NODE_OLD, so that when the object and
1254  * its children are no longer referenced, they will all be deleted as a unit.
1255  */
1256 static void
1257 rc_node_setup_parent_ref(rc_node_t *np, rc_node_t *pp)
1258 {
1259 	rc_node_t *cp;
1260 
1261 	assert(MUTEX_HELD(&np->rn_lock));
1262 
1263 	for (cp = uu_list_first(np->rn_children); cp != NULL;
1264 	    cp = uu_list_next(np->rn_children, cp)) {
1265 		(void) pthread_mutex_lock(&cp->rn_lock);
1266 		if (cp->rn_flags & RC_NODE_PARENT_REF) {
1267 			assert(cp->rn_parent_ref == pp);
1268 		} else {
1269 			assert(cp->rn_parent_ref == NULL);
1270 
1271 			cp->rn_flags |= RC_NODE_PARENT_REF;
1272 			cp->rn_parent_ref = pp;
1273 			if (cp->rn_refs != 0)
1274 				rc_node_hold_other(pp);
1275 		}
1276 		rc_node_setup_parent_ref(cp, pp);		/* recurse */
1277 		(void) pthread_mutex_unlock(&cp->rn_lock);
1278 	}
1279 }
1280 
1281 /*
1282  * Atomically replace 'np' with 'newp', with a parent of 'pp'.
1283  *
1284  * Requirements:
1285  *	*no* node locks may be held.
1286  *	pp must be held with RC_NODE_CHILDREN_CHANGING
1287  *	newp and np must be held with RC_NODE_IN_TX
1288  *	np must be marked RC_NODE_IN_PARENT, newp must not be
1289  *	np must be marked RC_NODE_OLD
1290  *
1291  * Afterwards:
1292  *	pp's RC_NODE_CHILDREN_CHANGING is dropped
1293  *	newp and np's RC_NODE_IN_TX is dropped
1294  *	newp->rn_former = np;
1295  *	newp is RC_NODE_IN_PARENT, np is not.
1296  *	interested notify subscribers have been notified of newp's new status.
1297  */
1298 static void
1299 rc_node_relink_child(rc_node_t *pp, rc_node_t *np, rc_node_t *newp)
1300 {
1301 	cache_bucket_t *bp;
1302 	/*
1303 	 * First, swap np and nnp in the cache.  newp's RC_NODE_IN_TX flag
1304 	 * keeps rc_node_update() from seeing it until we are done.
1305 	 */
1306 	bp = cache_hold(newp->rn_hash);
1307 	cache_remove_unlocked(bp, np);
1308 	cache_insert_unlocked(bp, newp);
1309 	cache_release(bp);
1310 
1311 	/*
1312 	 * replace np with newp in pp's list, and attach it to newp's rn_former
1313 	 * link.
1314 	 */
1315 	(void) pthread_mutex_lock(&pp->rn_lock);
1316 	assert(pp->rn_flags & RC_NODE_CHILDREN_CHANGING);
1317 
1318 	(void) pthread_mutex_lock(&newp->rn_lock);
1319 	assert(!(newp->rn_flags & RC_NODE_IN_PARENT));
1320 	assert(newp->rn_flags & RC_NODE_IN_TX);
1321 
1322 	(void) pthread_mutex_lock(&np->rn_lock);
1323 	assert(np->rn_flags & RC_NODE_IN_PARENT);
1324 	assert(np->rn_flags & RC_NODE_OLD);
1325 	assert(np->rn_flags & RC_NODE_IN_TX);
1326 
1327 	newp->rn_parent = pp;
1328 	newp->rn_flags |= RC_NODE_IN_PARENT;
1329 
1330 	/*
1331 	 * Note that we carefully add newp before removing np -- this
1332 	 * keeps iterators on the list from missing us.
1333 	 */
1334 	(void) uu_list_insert_after(pp->rn_children, np, newp);
1335 	(void) uu_list_remove(pp->rn_children, np);
1336 
1337 	/*
1338 	 * re-set np
1339 	 */
1340 	newp->rn_former = np;
1341 	np->rn_parent = NULL;
1342 	np->rn_flags &= ~RC_NODE_IN_PARENT;
1343 	np->rn_flags |= RC_NODE_ON_FORMER;
1344 
1345 	rc_notify_insert_node(newp);
1346 
1347 	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
1348 	(void) pthread_mutex_unlock(&pp->rn_lock);
1349 	rc_node_rele_flag(newp, RC_NODE_USING_PARENT | RC_NODE_IN_TX);
1350 	(void) pthread_mutex_unlock(&newp->rn_lock);
1351 	rc_node_setup_parent_ref(np, np);
1352 	rc_node_rele_flag(np, RC_NODE_IN_TX);
1353 	(void) pthread_mutex_unlock(&np->rn_lock);
1354 }
1355 
1356 /*
1357  * makes sure a node with lookup 'nip', name 'name', and parent 'pp' exists.
1358  * 'cp' is used (and returned) if the node does not yet exist.  If it does
1359  * exist, 'cp' is freed, and the existent node is returned instead.
1360  */
1361 rc_node_t *
1362 rc_node_setup(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1363     rc_node_t *pp)
1364 {
1365 	rc_node_t *np;
1366 	cache_bucket_t *bp;
1367 	uint32_t h = rc_node_hash(nip);
1368 
1369 	assert(cp->rn_refs == 0);
1370 
1371 	bp = cache_hold(h);
1372 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1373 		cache_release(bp);
1374 
1375 		/*
1376 		 * make sure it matches our expectations
1377 		 */
1378 		(void) pthread_mutex_lock(&np->rn_lock);
1379 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1380 			assert(np->rn_parent == pp);
1381 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1382 			assert(strcmp(np->rn_name, name) == 0);
1383 			assert(np->rn_type == NULL);
1384 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1385 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1386 		}
1387 		(void) pthread_mutex_unlock(&np->rn_lock);
1388 
1389 		rc_node_destroy(cp);
1390 		return (np);
1391 	}
1392 
1393 	/*
1394 	 * No one is there -- create a new node.
1395 	 */
1396 	np = cp;
1397 	rc_node_hold(np);
1398 	np->rn_id = *nip;
1399 	np->rn_hash = h;
1400 	np->rn_name = strdup(name);
1401 
1402 	np->rn_flags |= RC_NODE_USING_PARENT;
1403 
1404 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE) {
1405 #if COMPOSITION_DEPTH == 2
1406 		np->rn_cchain[0] = np;
1407 		np->rn_cchain[1] = pp;
1408 #else
1409 #error This code must be updated.
1410 #endif
1411 	}
1412 
1413 	cache_insert_unlocked(bp, np);
1414 	cache_release(bp);		/* we are now visible */
1415 
1416 	rc_node_link_child(pp, np);
1417 
1418 	return (np);
1419 }
1420 
1421 /*
1422  * makes sure a snapshot with lookup 'nip', name 'name', and parent 'pp' exists.
1423  * 'cp' is used (and returned) if the node does not yet exist.  If it does
1424  * exist, 'cp' is freed, and the existent node is returned instead.
1425  */
1426 rc_node_t *
1427 rc_node_setup_snapshot(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1428     uint32_t snap_id, rc_node_t *pp)
1429 {
1430 	rc_node_t *np;
1431 	cache_bucket_t *bp;
1432 	uint32_t h = rc_node_hash(nip);
1433 
1434 	assert(cp->rn_refs == 0);
1435 
1436 	bp = cache_hold(h);
1437 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1438 		cache_release(bp);
1439 
1440 		/*
1441 		 * make sure it matches our expectations
1442 		 */
1443 		(void) pthread_mutex_lock(&np->rn_lock);
1444 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1445 			assert(np->rn_parent == pp);
1446 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1447 			assert(strcmp(np->rn_name, name) == 0);
1448 			assert(np->rn_type == NULL);
1449 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1450 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1451 		}
1452 		(void) pthread_mutex_unlock(&np->rn_lock);
1453 
1454 		rc_node_destroy(cp);
1455 		return (np);
1456 	}
1457 
1458 	/*
1459 	 * No one is there -- create a new node.
1460 	 */
1461 	np = cp;
1462 	rc_node_hold(np);
1463 	np->rn_id = *nip;
1464 	np->rn_hash = h;
1465 	np->rn_name = strdup(name);
1466 	np->rn_snapshot_id = snap_id;
1467 
1468 	np->rn_flags |= RC_NODE_USING_PARENT;
1469 
1470 	cache_insert_unlocked(bp, np);
1471 	cache_release(bp);		/* we are now visible */
1472 
1473 	rc_node_link_child(pp, np);
1474 
1475 	return (np);
1476 }
1477 
1478 /*
1479  * makes sure a snaplevel with lookup 'nip' and parent 'pp' exists.  'cp' is
1480  * used (and returned) if the node does not yet exist.  If it does exist, 'cp'
1481  * is freed, and the existent node is returned instead.
1482  */
1483 rc_node_t *
1484 rc_node_setup_snaplevel(rc_node_t *cp, rc_node_lookup_t *nip,
1485     rc_snaplevel_t *lvl, rc_node_t *pp)
1486 {
1487 	rc_node_t *np;
1488 	cache_bucket_t *bp;
1489 	uint32_t h = rc_node_hash(nip);
1490 
1491 	assert(cp->rn_refs == 0);
1492 
1493 	bp = cache_hold(h);
1494 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1495 		cache_release(bp);
1496 
1497 		/*
1498 		 * make sure it matches our expectations
1499 		 */
1500 		(void) pthread_mutex_lock(&np->rn_lock);
1501 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1502 			assert(np->rn_parent == pp);
1503 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1504 			assert(np->rn_name == NULL);
1505 			assert(np->rn_type == NULL);
1506 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1507 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1508 		}
1509 		(void) pthread_mutex_unlock(&np->rn_lock);
1510 
1511 		rc_node_destroy(cp);
1512 		return (np);
1513 	}
1514 
1515 	/*
1516 	 * No one is there -- create a new node.
1517 	 */
1518 	np = cp;
1519 	rc_node_hold(np);	/* released in snapshot_fill_children() */
1520 	np->rn_id = *nip;
1521 	np->rn_hash = h;
1522 
1523 	rc_snaplevel_hold(lvl);
1524 	np->rn_snaplevel = lvl;
1525 
1526 	np->rn_flags |= RC_NODE_USING_PARENT;
1527 
1528 	cache_insert_unlocked(bp, np);
1529 	cache_release(bp);		/* we are now visible */
1530 
1531 	/* Add this snaplevel to the snapshot's composition chain. */
1532 	assert(pp->rn_cchain[lvl->rsl_level_num - 1] == NULL);
1533 	pp->rn_cchain[lvl->rsl_level_num - 1] = np;
1534 
1535 	rc_node_link_child(pp, np);
1536 
1537 	return (np);
1538 }
1539 
1540 /*
1541  * Returns NULL if strdup() fails.
1542  */
1543 rc_node_t *
1544 rc_node_setup_pg(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1545     const char *type, uint32_t flags, uint32_t gen_id, rc_node_t *pp)
1546 {
1547 	rc_node_t *np;
1548 	cache_bucket_t *bp;
1549 
1550 	uint32_t h = rc_node_hash(nip);
1551 	bp = cache_hold(h);
1552 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1553 		cache_release(bp);
1554 
1555 		/*
1556 		 * make sure it matches our expectations (don't check
1557 		 * the generation number or parent, since someone could
1558 		 * have gotten a transaction through while we weren't
1559 		 * looking)
1560 		 */
1561 		(void) pthread_mutex_lock(&np->rn_lock);
1562 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1563 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1564 			assert(strcmp(np->rn_name, name) == 0);
1565 			assert(strcmp(np->rn_type, type) == 0);
1566 			assert(np->rn_pgflags == flags);
1567 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1568 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1569 		}
1570 		(void) pthread_mutex_unlock(&np->rn_lock);
1571 
1572 		rc_node_destroy(cp);
1573 		return (np);
1574 	}
1575 
1576 	np = cp;
1577 	rc_node_hold(np);		/* released in fill_pg_callback() */
1578 	np->rn_id = *nip;
1579 	np->rn_hash = h;
1580 	np->rn_name = strdup(name);
1581 	if (np->rn_name == NULL) {
1582 		rc_node_rele(np);
1583 		return (NULL);
1584 	}
1585 	np->rn_type = strdup(type);
1586 	if (np->rn_type == NULL) {
1587 		free((void *)np->rn_name);
1588 		rc_node_rele(np);
1589 		return (NULL);
1590 	}
1591 	np->rn_pgflags = flags;
1592 	np->rn_gen_id = gen_id;
1593 
1594 	np->rn_flags |= RC_NODE_USING_PARENT;
1595 
1596 	cache_insert_unlocked(bp, np);
1597 	cache_release(bp);		/* we are now visible */
1598 
1599 	rc_node_link_child(pp, np);
1600 
1601 	return (np);
1602 }
1603 
1604 #if COMPOSITION_DEPTH == 2
1605 /*
1606  * Initialize a "composed property group" which represents the composition of
1607  * property groups pg1 & pg2.  It is ephemeral: once created & returned for an
1608  * ITER_READ request, keeping it out of cache_hash and any child lists
1609  * prevents it from being looked up.  Operations besides iteration are passed
1610  * through to pg1.
1611  *
1612  * pg1 & pg2 should be held before entering this function.  They will be
1613  * released in rc_node_destroy().
1614  */
1615 static int
1616 rc_node_setup_cpg(rc_node_t *cpg, rc_node_t *pg1, rc_node_t *pg2)
1617 {
1618 	if (strcmp(pg1->rn_type, pg2->rn_type) != 0)
1619 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
1620 
1621 	cpg->rn_id.rl_type = REP_PROTOCOL_ENTITY_CPROPERTYGRP;
1622 	cpg->rn_name = strdup(pg1->rn_name);
1623 	if (cpg->rn_name == NULL)
1624 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1625 
1626 	cpg->rn_cchain[0] = pg1;
1627 	cpg->rn_cchain[1] = pg2;
1628 
1629 	return (REP_PROTOCOL_SUCCESS);
1630 }
1631 #else
1632 #error This code must be updated.
1633 #endif
1634 
1635 /*
1636  * Fails with _NO_RESOURCES.
1637  */
1638 int
1639 rc_node_create_property(rc_node_t *pp, rc_node_lookup_t *nip,
1640     const char *name, rep_protocol_value_type_t type,
1641     const char *vals, size_t count, size_t size)
1642 {
1643 	rc_node_t *np;
1644 	cache_bucket_t *bp;
1645 
1646 	uint32_t h = rc_node_hash(nip);
1647 	bp = cache_hold(h);
1648 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1649 		cache_release(bp);
1650 		/*
1651 		 * make sure it matches our expectations
1652 		 */
1653 		(void) pthread_mutex_lock(&np->rn_lock);
1654 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1655 			assert(np->rn_parent == pp);
1656 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1657 			assert(strcmp(np->rn_name, name) == 0);
1658 			assert(np->rn_valtype == type);
1659 			assert(np->rn_values_count == count);
1660 			assert(np->rn_values_size == size);
1661 			assert(vals == NULL ||
1662 			    memcmp(np->rn_values, vals, size) == 0);
1663 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1664 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1665 		}
1666 		rc_node_rele_locked(np);
1667 		object_free_values(vals, type, count, size);
1668 		return (REP_PROTOCOL_SUCCESS);
1669 	}
1670 
1671 	/*
1672 	 * No one is there -- create a new node.
1673 	 */
1674 	np = rc_node_alloc();
1675 	if (np == NULL) {
1676 		cache_release(bp);
1677 		object_free_values(vals, type, count, size);
1678 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1679 	}
1680 	np->rn_id = *nip;
1681 	np->rn_hash = h;
1682 	np->rn_name = strdup(name);
1683 	if (np->rn_name == NULL) {
1684 		cache_release(bp);
1685 		object_free_values(vals, type, count, size);
1686 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1687 	}
1688 
1689 	np->rn_valtype = type;
1690 	np->rn_values = vals;
1691 	np->rn_values_count = count;
1692 	np->rn_values_size = size;
1693 
1694 	np->rn_flags |= RC_NODE_USING_PARENT;
1695 
1696 	cache_insert_unlocked(bp, np);
1697 	cache_release(bp);		/* we are now visible */
1698 
1699 	rc_node_link_child(pp, np);
1700 
1701 	return (REP_PROTOCOL_SUCCESS);
1702 }
1703 
1704 int
1705 rc_node_init(void)
1706 {
1707 	rc_node_t *np;
1708 	cache_bucket_t *bp;
1709 
1710 	rc_children_pool = uu_list_pool_create("rc_children_pool",
1711 	    sizeof (rc_node_t), offsetof(rc_node_t, rn_sibling_node),
1712 	    NULL, UU_LIST_POOL_DEBUG);
1713 
1714 	rc_pg_notify_pool = uu_list_pool_create("rc_pg_notify_pool",
1715 	    sizeof (rc_node_pg_notify_t),
1716 	    offsetof(rc_node_pg_notify_t, rnpn_node),
1717 	    NULL, UU_LIST_POOL_DEBUG);
1718 
1719 	rc_notify_pool = uu_list_pool_create("rc_notify_pool",
1720 	    sizeof (rc_notify_t), offsetof(rc_notify_t, rcn_list_node),
1721 	    NULL, UU_LIST_POOL_DEBUG);
1722 
1723 	rc_notify_info_pool = uu_list_pool_create("rc_notify_info_pool",
1724 	    sizeof (rc_notify_info_t),
1725 	    offsetof(rc_notify_info_t, rni_list_node),
1726 	    NULL, UU_LIST_POOL_DEBUG);
1727 
1728 	if (rc_children_pool == NULL || rc_pg_notify_pool == NULL ||
1729 	    rc_notify_pool == NULL || rc_notify_info_pool == NULL)
1730 		uu_die("out of memory");
1731 
1732 	rc_notify_list = uu_list_create(rc_notify_pool,
1733 	    &rc_notify_list, 0);
1734 
1735 	rc_notify_info_list = uu_list_create(rc_notify_info_pool,
1736 	    &rc_notify_info_list, 0);
1737 
1738 	if (rc_notify_list == NULL || rc_notify_info_list == NULL)
1739 		uu_die("out of memory");
1740 
1741 	if ((np = rc_node_alloc()) == NULL)
1742 		uu_die("out of memory");
1743 
1744 	rc_node_hold(np);
1745 	np->rn_id.rl_type = REP_PROTOCOL_ENTITY_SCOPE;
1746 	np->rn_id.rl_backend = BACKEND_TYPE_NORMAL;
1747 	np->rn_hash = rc_node_hash(&np->rn_id);
1748 	np->rn_name = "localhost";
1749 
1750 	bp = cache_hold(np->rn_hash);
1751 	cache_insert_unlocked(bp, np);
1752 	cache_release(bp);
1753 
1754 	rc_scope = np;
1755 	return (1);
1756 }
1757 
1758 /*
1759  * Fails with
1760  *   _INVALID_TYPE - type is invalid
1761  *   _TYPE_MISMATCH - np doesn't carry children of type type
1762  *   _DELETED - np has been deleted
1763  *   _NO_RESOURCES
1764  */
1765 static int
1766 rc_node_fill_children(rc_node_t *np, uint32_t type)
1767 {
1768 	int rc;
1769 
1770 	assert(MUTEX_HELD(&np->rn_lock));
1771 
1772 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
1773 	    REP_PROTOCOL_SUCCESS)
1774 		return (rc);
1775 
1776 	if (!rc_node_hold_flag(np, RC_NODE_CHILDREN_CHANGING))
1777 		return (REP_PROTOCOL_FAIL_DELETED);
1778 
1779 	if (np->rn_flags & RC_NODE_HAS_CHILDREN) {
1780 		rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1781 		return (REP_PROTOCOL_SUCCESS);
1782 	}
1783 
1784 	(void) pthread_mutex_unlock(&np->rn_lock);
1785 	rc = object_fill_children(np);
1786 	(void) pthread_mutex_lock(&np->rn_lock);
1787 
1788 	if (rc == REP_PROTOCOL_SUCCESS) {
1789 		np->rn_flags |= RC_NODE_HAS_CHILDREN;
1790 	}
1791 	rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1792 
1793 	return (rc);
1794 }
1795 
1796 /*
1797  * Returns
1798  *   _INVALID_TYPE - type is invalid
1799  *   _TYPE_MISMATCH - np doesn't carry children of type type
1800  *   _DELETED - np has been deleted
1801  *   _NO_RESOURCES
1802  *   _SUCCESS - if *cpp is not NULL, it is held
1803  */
1804 static int
1805 rc_node_find_named_child(rc_node_t *np, const char *name, uint32_t type,
1806     rc_node_t **cpp)
1807 {
1808 	int ret;
1809 	rc_node_t *cp;
1810 
1811 	assert(MUTEX_HELD(&np->rn_lock));
1812 	assert(np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP);
1813 
1814 	ret = rc_node_fill_children(np, type);
1815 	if (ret != REP_PROTOCOL_SUCCESS)
1816 		return (ret);
1817 
1818 	for (cp = uu_list_first(np->rn_children);
1819 	    cp != NULL;
1820 	    cp = uu_list_next(np->rn_children, cp)) {
1821 		if (cp->rn_id.rl_type == type && strcmp(cp->rn_name, name) == 0)
1822 			break;
1823 	}
1824 
1825 	if (cp != NULL)
1826 		rc_node_hold(cp);
1827 	*cpp = cp;
1828 
1829 	return (REP_PROTOCOL_SUCCESS);
1830 }
1831 
1832 #ifndef NATIVE_BUILD
1833 static int rc_node_parent(rc_node_t *, rc_node_t **);
1834 
1835 /*
1836  * If the propname property exists in pg, and it is of type string, add its
1837  * values as authorizations to pcp.  pg must not be locked on entry, and it is
1838  * returned unlocked.  Returns
1839  *   _DELETED - pg was deleted
1840  *   _NO_RESOURCES
1841  *   _NOT_FOUND - pg has no property named propname
1842  *   _SUCCESS
1843  */
1844 static int
1845 perm_add_pg_prop_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
1846 {
1847 	rc_node_t *prop;
1848 	int result;
1849 
1850 	uint_t count;
1851 	const char *cp;
1852 
1853 	assert(!MUTEX_HELD(&pg->rn_lock));
1854 	assert(pg->rn_id.rl_type == REP_PROTOCOL_ENTITY_PROPERTYGRP);
1855 	assert(pg->rn_id.rl_ids[ID_SNAPSHOT] == 0);
1856 
1857 	(void) pthread_mutex_lock(&pg->rn_lock);
1858 	result = rc_node_find_named_child(pg, propname,
1859 	    REP_PROTOCOL_ENTITY_PROPERTY, &prop);
1860 	(void) pthread_mutex_unlock(&pg->rn_lock);
1861 	if (result != REP_PROTOCOL_SUCCESS) {
1862 		switch (result) {
1863 		case REP_PROTOCOL_FAIL_DELETED:
1864 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1865 			return (result);
1866 
1867 		case REP_PROTOCOL_FAIL_INVALID_TYPE:
1868 		case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
1869 		default:
1870 			bad_error("rc_node_find_named_child", result);
1871 		}
1872 	}
1873 
1874 	if (prop == NULL)
1875 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1876 
1877 	/* rn_valtype is immutable, so no locking. */
1878 	if (prop->rn_valtype != REP_PROTOCOL_TYPE_STRING) {
1879 		rc_node_rele(prop);
1880 		return (REP_PROTOCOL_SUCCESS);
1881 	}
1882 
1883 	(void) pthread_mutex_lock(&prop->rn_lock);
1884 	for (count = prop->rn_values_count, cp = prop->rn_values;
1885 	    count > 0;
1886 	    --count) {
1887 		result = perm_add_enabling(pcp, cp);
1888 		if (result != REP_PROTOCOL_SUCCESS)
1889 			break;
1890 
1891 		cp = strchr(cp, '\0') + 1;
1892 	}
1893 
1894 	rc_node_rele_locked(prop);
1895 
1896 	return (result);
1897 }
1898 
1899 /*
1900  * Assuming that ent is a service or instance node, if the pgname property
1901  * group has type pgtype, and it has a propname property with string type, add
1902  * its values as authorizations to pcp.  If pgtype is NULL, it is not checked.
1903  * Returns
1904  *   _SUCCESS
1905  *   _DELETED - ent was deleted
1906  *   _NO_RESOURCES - no resources
1907  *   _NOT_FOUND - ent does not have pgname pg or propname property
1908  */
1909 static int
1910 perm_add_ent_prop_values(permcheck_t *pcp, rc_node_t *ent, const char *pgname,
1911     const char *pgtype, const char *propname)
1912 {
1913 	int r;
1914 	rc_node_t *pg;
1915 
1916 	assert(!MUTEX_HELD(&ent->rn_lock));
1917 
1918 	(void) pthread_mutex_lock(&ent->rn_lock);
1919 	r = rc_node_find_named_child(ent, pgname,
1920 	    REP_PROTOCOL_ENTITY_PROPERTYGRP, &pg);
1921 	(void) pthread_mutex_unlock(&ent->rn_lock);
1922 
1923 	switch (r) {
1924 	case REP_PROTOCOL_SUCCESS:
1925 		break;
1926 
1927 	case REP_PROTOCOL_FAIL_DELETED:
1928 	case REP_PROTOCOL_FAIL_NO_RESOURCES:
1929 		return (r);
1930 
1931 	default:
1932 		bad_error("rc_node_find_named_child", r);
1933 	}
1934 
1935 	if (pg == NULL)
1936 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1937 
1938 	if (pgtype == NULL || strcmp(pg->rn_type, pgtype) == 0) {
1939 		r = perm_add_pg_prop_values(pcp, pg, propname);
1940 		switch (r) {
1941 		case REP_PROTOCOL_FAIL_DELETED:
1942 			r = REP_PROTOCOL_FAIL_NOT_FOUND;
1943 			break;
1944 
1945 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1946 		case REP_PROTOCOL_SUCCESS:
1947 		case REP_PROTOCOL_FAIL_NOT_FOUND:
1948 			break;
1949 
1950 		default:
1951 			bad_error("perm_add_pg_prop_values", r);
1952 		}
1953 	}
1954 
1955 	rc_node_rele(pg);
1956 
1957 	return (r);
1958 }
1959 
1960 /*
1961  * If pg has a property named propname, and it string typed, add its values as
1962  * authorizations to pcp.  If pg has no such property, and its parent is an
1963  * instance, walk up to the service and try doing the same with the property
1964  * of the same name from the property group of the same name.  Returns
1965  *   _SUCCESS
1966  *   _NO_RESOURCES
1967  *   _DELETED - pg (or an ancestor) was deleted
1968  */
1969 static int
1970 perm_add_enabling_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
1971 {
1972 	int r;
1973 
1974 	r = perm_add_pg_prop_values(pcp, pg, propname);
1975 
1976 	if (r == REP_PROTOCOL_FAIL_NOT_FOUND) {
1977 		char pgname[REP_PROTOCOL_NAME_LEN + 1];
1978 		rc_node_t *inst, *svc;
1979 		size_t sz;
1980 
1981 		assert(!MUTEX_HELD(&pg->rn_lock));
1982 
1983 		if (pg->rn_id.rl_ids[ID_INSTANCE] == 0) {
1984 			/* not an instance pg */
1985 			return (REP_PROTOCOL_SUCCESS);
1986 		}
1987 
1988 		sz = strlcpy(pgname, pg->rn_name, sizeof (pgname));
1989 		assert(sz < sizeof (pgname));
1990 
1991 		/* get pg's parent */
1992 		r = rc_node_parent(pg, &inst);
1993 		if (r != REP_PROTOCOL_SUCCESS) {
1994 			assert(r == REP_PROTOCOL_FAIL_DELETED);
1995 			return (r);
1996 		}
1997 
1998 		assert(inst->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
1999 
2000 		/* get instance's parent */
2001 		r = rc_node_parent(inst, &svc);
2002 		rc_node_rele(inst);
2003 		if (r != REP_PROTOCOL_SUCCESS) {
2004 			assert(r == REP_PROTOCOL_FAIL_DELETED);
2005 			return (r);
2006 		}
2007 
2008 		assert(svc->rn_id.rl_type == REP_PROTOCOL_ENTITY_SERVICE);
2009 
2010 		r = perm_add_ent_prop_values(pcp, svc, pgname, NULL, propname);
2011 
2012 		rc_node_rele(svc);
2013 
2014 		if (r == REP_PROTOCOL_FAIL_NOT_FOUND)
2015 			r = REP_PROTOCOL_SUCCESS;
2016 	}
2017 
2018 	return (r);
2019 }
2020 
2021 /*
2022  * Call perm_add_enabling_values() for the "action_authorization" property of
2023  * the "general" property group of inst.  Returns
2024  *   _DELETED - inst (or an ancestor) was deleted
2025  *   _NO_RESOURCES
2026  *   _SUCCESS
2027  */
2028 static int
2029 perm_add_inst_action_auth(permcheck_t *pcp, rc_node_t *inst)
2030 {
2031 	int r;
2032 	rc_node_t *svc;
2033 
2034 	assert(inst->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
2035 
2036 	r = perm_add_ent_prop_values(pcp, inst, AUTH_PG_GENERAL,
2037 	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2038 
2039 	if (r != REP_PROTOCOL_FAIL_NOT_FOUND)
2040 		return (r);
2041 
2042 	r = rc_node_parent(inst, &svc);
2043 	if (r != REP_PROTOCOL_SUCCESS) {
2044 		assert(r == REP_PROTOCOL_FAIL_DELETED);
2045 		return (r);
2046 	}
2047 
2048 	r = perm_add_ent_prop_values(pcp, svc, AUTH_PG_GENERAL,
2049 	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2050 
2051 	return (r == REP_PROTOCOL_FAIL_NOT_FOUND ? REP_PROTOCOL_SUCCESS : r);
2052 }
2053 #endif /* NATIVE_BUILD */
2054 
2055 void
2056 rc_node_ptr_init(rc_node_ptr_t *out)
2057 {
2058 	out->rnp_node = NULL;
2059 	out->rnp_authorized = 0;
2060 	out->rnp_deleted = 0;
2061 }
2062 
2063 static void
2064 rc_node_assign(rc_node_ptr_t *out, rc_node_t *val)
2065 {
2066 	rc_node_t *cur = out->rnp_node;
2067 	if (val != NULL)
2068 		rc_node_hold(val);
2069 	out->rnp_node = val;
2070 	if (cur != NULL)
2071 		rc_node_rele(cur);
2072 	out->rnp_authorized = 0;
2073 	out->rnp_deleted = 0;
2074 }
2075 
2076 void
2077 rc_node_clear(rc_node_ptr_t *out, int deleted)
2078 {
2079 	rc_node_assign(out, NULL);
2080 	out->rnp_deleted = deleted;
2081 }
2082 
2083 void
2084 rc_node_ptr_assign(rc_node_ptr_t *out, const rc_node_ptr_t *val)
2085 {
2086 	rc_node_assign(out, val->rnp_node);
2087 }
2088 
2089 /*
2090  * rc_node_check()/RC_NODE_CHECK()
2091  *	generic "entry" checks, run before the use of an rc_node pointer.
2092  *
2093  * Fails with
2094  *   _NOT_SET
2095  *   _DELETED
2096  */
2097 static int
2098 rc_node_check_and_lock(rc_node_t *np)
2099 {
2100 	int result = REP_PROTOCOL_SUCCESS;
2101 	if (np == NULL)
2102 		return (REP_PROTOCOL_FAIL_NOT_SET);
2103 
2104 	(void) pthread_mutex_lock(&np->rn_lock);
2105 	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2106 		result = REP_PROTOCOL_FAIL_DELETED;
2107 		(void) pthread_mutex_unlock(&np->rn_lock);
2108 	}
2109 
2110 	return (result);
2111 }
2112 
2113 /*
2114  * Fails with
2115  *   _NOT_SET - ptr is reset
2116  *   _DELETED - node has been deleted
2117  */
2118 static rc_node_t *
2119 rc_node_ptr_check_and_lock(rc_node_ptr_t *npp, int *res)
2120 {
2121 	rc_node_t *np = npp->rnp_node;
2122 	if (np == NULL) {
2123 		if (npp->rnp_deleted)
2124 			*res = REP_PROTOCOL_FAIL_DELETED;
2125 		else
2126 			*res = REP_PROTOCOL_FAIL_NOT_SET;
2127 		return (NULL);
2128 	}
2129 
2130 	(void) pthread_mutex_lock(&np->rn_lock);
2131 	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2132 		(void) pthread_mutex_unlock(&np->rn_lock);
2133 		rc_node_clear(npp, 1);
2134 		*res = REP_PROTOCOL_FAIL_DELETED;
2135 		return (NULL);
2136 	}
2137 	return (np);
2138 }
2139 
2140 #define	RC_NODE_CHECK_AND_LOCK(n) {					\
2141 	int rc__res;							\
2142 	if ((rc__res = rc_node_check_and_lock(n)) != REP_PROTOCOL_SUCCESS) \
2143 		return (rc__res);					\
2144 }
2145 
2146 #define	RC_NODE_CHECK(n) {						\
2147 	RC_NODE_CHECK_AND_LOCK(n);					\
2148 	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2149 }
2150 
2151 #define	RC_NODE_CHECK_AND_HOLD(n) {					\
2152 	RC_NODE_CHECK_AND_LOCK(n);					\
2153 	rc_node_hold_locked(n);						\
2154 	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2155 }
2156 
2157 #define	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp) {			\
2158 	int rc__res;							\
2159 	if (((np) = rc_node_ptr_check_and_lock(npp, &rc__res)) == NULL)	\
2160 		return (rc__res);					\
2161 }
2162 
2163 #define	RC_NODE_PTR_GET_CHECK(np, npp) {				\
2164 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2165 	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2166 }
2167 
2168 #define	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp) {			\
2169 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2170 	rc_node_hold_locked(np);					\
2171 	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2172 }
2173 
2174 #define	HOLD_FLAG_OR_RETURN(np, flag) {					\
2175 	assert(MUTEX_HELD(&(np)->rn_lock));				\
2176 	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2177 	if (!rc_node_hold_flag((np), flag)) {				\
2178 		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2179 		return (REP_PROTOCOL_FAIL_DELETED);			\
2180 	}								\
2181 }
2182 
2183 #define	HOLD_PTR_FLAG_OR_RETURN(np, npp, flag) {			\
2184 	assert(MUTEX_HELD(&(np)->rn_lock));				\
2185 	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2186 	if (!rc_node_hold_flag((np), flag)) {				\
2187 		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2188 		assert((np) == (npp)->rnp_node);			\
2189 		rc_node_clear(npp, 1);					\
2190 		return (REP_PROTOCOL_FAIL_DELETED);			\
2191 	}								\
2192 }
2193 
2194 int
2195 rc_local_scope(uint32_t type, rc_node_ptr_t *out)
2196 {
2197 	if (type != REP_PROTOCOL_ENTITY_SCOPE) {
2198 		rc_node_clear(out, 0);
2199 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2200 	}
2201 
2202 	/*
2203 	 * the main scope never gets destroyed
2204 	 */
2205 	rc_node_assign(out, rc_scope);
2206 
2207 	return (REP_PROTOCOL_SUCCESS);
2208 }
2209 
2210 /*
2211  * Fails with
2212  *   _NOT_SET - npp is not set
2213  *   _DELETED - the node npp pointed at has been deleted
2214  *   _TYPE_MISMATCH - type is not _SCOPE
2215  *   _NOT_FOUND - scope has no parent
2216  */
2217 static int
2218 rc_scope_parent_scope(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2219 {
2220 	rc_node_t *np;
2221 
2222 	rc_node_clear(out, 0);
2223 
2224 	RC_NODE_PTR_GET_CHECK(np, npp);
2225 
2226 	if (type != REP_PROTOCOL_ENTITY_SCOPE)
2227 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2228 
2229 	return (REP_PROTOCOL_FAIL_NOT_FOUND);
2230 }
2231 
2232 /*
2233  * Fails with
2234  *   _NOT_SET
2235  *   _DELETED
2236  *   _NOT_APPLICABLE
2237  *   _NOT_FOUND
2238  *   _BAD_REQUEST
2239  *   _TRUNCATED
2240  */
2241 int
2242 rc_node_name(rc_node_ptr_t *npp, char *buf, size_t sz, uint32_t answertype,
2243     size_t *sz_out)
2244 {
2245 	size_t actual;
2246 	rc_node_t *np;
2247 
2248 	assert(sz == *sz_out);
2249 
2250 	RC_NODE_PTR_GET_CHECK(np, npp);
2251 
2252 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2253 		np = np->rn_cchain[0];
2254 		RC_NODE_CHECK(np);
2255 	}
2256 
2257 	switch (answertype) {
2258 	case RP_ENTITY_NAME_NAME:
2259 		if (np->rn_name == NULL)
2260 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2261 		actual = strlcpy(buf, np->rn_name, sz);
2262 		break;
2263 	case RP_ENTITY_NAME_PGTYPE:
2264 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2265 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2266 		actual = strlcpy(buf, np->rn_type, sz);
2267 		break;
2268 	case RP_ENTITY_NAME_PGFLAGS:
2269 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2270 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2271 		actual = snprintf(buf, sz, "%d", np->rn_pgflags);
2272 		break;
2273 	case RP_ENTITY_NAME_SNAPLEVEL_SCOPE:
2274 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2275 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2276 		actual = strlcpy(buf, np->rn_snaplevel->rsl_scope, sz);
2277 		break;
2278 	case RP_ENTITY_NAME_SNAPLEVEL_SERVICE:
2279 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2280 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2281 		actual = strlcpy(buf, np->rn_snaplevel->rsl_service, sz);
2282 		break;
2283 	case RP_ENTITY_NAME_SNAPLEVEL_INSTANCE:
2284 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2285 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2286 		if (np->rn_snaplevel->rsl_instance == NULL)
2287 			return (REP_PROTOCOL_FAIL_NOT_FOUND);
2288 		actual = strlcpy(buf, np->rn_snaplevel->rsl_instance, sz);
2289 		break;
2290 	default:
2291 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2292 	}
2293 	if (actual >= sz)
2294 		return (REP_PROTOCOL_FAIL_TRUNCATED);
2295 
2296 	*sz_out = actual;
2297 	return (REP_PROTOCOL_SUCCESS);
2298 }
2299 
2300 int
2301 rc_node_get_property_type(rc_node_ptr_t *npp, rep_protocol_value_type_t *out)
2302 {
2303 	rc_node_t *np;
2304 
2305 	RC_NODE_PTR_GET_CHECK(np, npp);
2306 
2307 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
2308 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2309 
2310 	*out = np->rn_valtype;
2311 
2312 	return (REP_PROTOCOL_SUCCESS);
2313 }
2314 
2315 /*
2316  * Get np's parent.  If np is deleted, returns _DELETED.  Otherwise puts a hold
2317  * on the parent, returns a pointer to it in *out, and returns _SUCCESS.
2318  */
2319 static int
2320 rc_node_parent(rc_node_t *np, rc_node_t **out)
2321 {
2322 	rc_node_t *pnp;
2323 	rc_node_t *np_orig;
2324 
2325 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2326 		RC_NODE_CHECK_AND_LOCK(np);
2327 	} else {
2328 		np = np->rn_cchain[0];
2329 		RC_NODE_CHECK_AND_LOCK(np);
2330 	}
2331 
2332 	np_orig = np;
2333 	rc_node_hold_locked(np);		/* simplifies the remainder */
2334 
2335 	for (;;) {
2336 		if (!rc_node_wait_flag(np,
2337 		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
2338 			rc_node_rele_locked(np);
2339 			return (REP_PROTOCOL_FAIL_DELETED);
2340 		}
2341 
2342 		if (!(np->rn_flags & RC_NODE_OLD))
2343 			break;
2344 
2345 		rc_node_rele_locked(np);
2346 		np = cache_lookup(&np_orig->rn_id);
2347 		assert(np != np_orig);
2348 
2349 		if (np == NULL)
2350 			goto deleted;
2351 		(void) pthread_mutex_lock(&np->rn_lock);
2352 	}
2353 
2354 	/* guaranteed to succeed without dropping the lock */
2355 	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2356 		(void) pthread_mutex_unlock(&np->rn_lock);
2357 		*out = NULL;
2358 		rc_node_rele(np);
2359 		return (REP_PROTOCOL_FAIL_DELETED);
2360 	}
2361 
2362 	assert(np->rn_parent != NULL);
2363 	pnp = np->rn_parent;
2364 	(void) pthread_mutex_unlock(&np->rn_lock);
2365 
2366 	(void) pthread_mutex_lock(&pnp->rn_lock);
2367 	(void) pthread_mutex_lock(&np->rn_lock);
2368 	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2369 	(void) pthread_mutex_unlock(&np->rn_lock);
2370 
2371 	rc_node_hold_locked(pnp);
2372 
2373 	(void) pthread_mutex_unlock(&pnp->rn_lock);
2374 
2375 	rc_node_rele(np);
2376 	*out = pnp;
2377 	return (REP_PROTOCOL_SUCCESS);
2378 
2379 deleted:
2380 	rc_node_rele(np);
2381 	return (REP_PROTOCOL_FAIL_DELETED);
2382 }
2383 
2384 /*
2385  * Fails with
2386  *   _NOT_SET
2387  *   _DELETED
2388  */
2389 static int
2390 rc_node_ptr_parent(rc_node_ptr_t *npp, rc_node_t **out)
2391 {
2392 	rc_node_t *np;
2393 
2394 	RC_NODE_PTR_GET_CHECK(np, npp);
2395 
2396 	return (rc_node_parent(np, out));
2397 }
2398 
2399 /*
2400  * Fails with
2401  *   _NOT_SET - npp is not set
2402  *   _DELETED - the node npp pointed at has been deleted
2403  *   _TYPE_MISMATCH - npp's node's parent is not of type type
2404  *
2405  * If npp points to a scope, can also fail with
2406  *   _NOT_FOUND - scope has no parent
2407  */
2408 int
2409 rc_node_get_parent(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2410 {
2411 	rc_node_t *pnp;
2412 	int rc;
2413 
2414 	if (npp->rnp_node != NULL &&
2415 	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE)
2416 		return (rc_scope_parent_scope(npp, type, out));
2417 
2418 	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS) {
2419 		rc_node_clear(out, 0);
2420 		return (rc);
2421 	}
2422 
2423 	if (type != pnp->rn_id.rl_type) {
2424 		rc_node_rele(pnp);
2425 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2426 	}
2427 
2428 	rc_node_assign(out, pnp);
2429 	rc_node_rele(pnp);
2430 
2431 	return (REP_PROTOCOL_SUCCESS);
2432 }
2433 
2434 int
2435 rc_node_parent_type(rc_node_ptr_t *npp, uint32_t *type_out)
2436 {
2437 	rc_node_t *pnp;
2438 	int rc;
2439 
2440 	if (npp->rnp_node != NULL &&
2441 	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE) {
2442 		*type_out = REP_PROTOCOL_ENTITY_SCOPE;
2443 		return (REP_PROTOCOL_SUCCESS);
2444 	}
2445 
2446 	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS)
2447 		return (rc);
2448 
2449 	*type_out = pnp->rn_id.rl_type;
2450 
2451 	rc_node_rele(pnp);
2452 
2453 	return (REP_PROTOCOL_SUCCESS);
2454 }
2455 
2456 /*
2457  * Fails with
2458  *   _INVALID_TYPE - type is invalid
2459  *   _TYPE_MISMATCH - np doesn't carry children of type type
2460  *   _DELETED - np has been deleted
2461  *   _NOT_FOUND - no child with that name/type combo found
2462  *   _NO_RESOURCES
2463  *   _BACKEND_ACCESS
2464  */
2465 int
2466 rc_node_get_child(rc_node_ptr_t *npp, const char *name, uint32_t type,
2467     rc_node_ptr_t *outp)
2468 {
2469 	rc_node_t *np, *cp;
2470 	rc_node_t *child = NULL;
2471 	int ret, idx;
2472 
2473 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2474 	if ((ret = rc_check_type_name(type, name)) == REP_PROTOCOL_SUCCESS) {
2475 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2476 			ret = rc_node_find_named_child(np, name, type, &child);
2477 		} else {
2478 			(void) pthread_mutex_unlock(&np->rn_lock);
2479 			ret = REP_PROTOCOL_SUCCESS;
2480 			for (idx = 0; idx < COMPOSITION_DEPTH; idx++) {
2481 				cp = np->rn_cchain[idx];
2482 				if (cp == NULL)
2483 					break;
2484 				RC_NODE_CHECK_AND_LOCK(cp);
2485 				ret = rc_node_find_named_child(cp, name, type,
2486 				    &child);
2487 				(void) pthread_mutex_unlock(&cp->rn_lock);
2488 				/*
2489 				 * loop only if we succeeded, but no child of
2490 				 * the correct name was found.
2491 				 */
2492 				if (ret != REP_PROTOCOL_SUCCESS ||
2493 				    child != NULL)
2494 					break;
2495 			}
2496 			(void) pthread_mutex_lock(&np->rn_lock);
2497 		}
2498 	}
2499 	(void) pthread_mutex_unlock(&np->rn_lock);
2500 
2501 	if (ret == REP_PROTOCOL_SUCCESS) {
2502 		rc_node_assign(outp, child);
2503 		if (child != NULL)
2504 			rc_node_rele(child);
2505 		else
2506 			ret = REP_PROTOCOL_FAIL_NOT_FOUND;
2507 	} else {
2508 		rc_node_assign(outp, NULL);
2509 	}
2510 	return (ret);
2511 }
2512 
2513 int
2514 rc_node_update(rc_node_ptr_t *npp)
2515 {
2516 	cache_bucket_t *bp;
2517 	rc_node_t *np = npp->rnp_node;
2518 	rc_node_t *nnp;
2519 	rc_node_t *cpg = NULL;
2520 
2521 	if (np != NULL &&
2522 	    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2523 		/*
2524 		 * If we're updating a composed property group, actually
2525 		 * update the top-level property group & return the
2526 		 * appropriate value.  But leave *nnp pointing at us.
2527 		 */
2528 		cpg = np;
2529 		np = np->rn_cchain[0];
2530 	}
2531 
2532 	RC_NODE_CHECK(np);
2533 
2534 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP &&
2535 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT)
2536 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2537 
2538 	for (;;) {
2539 		bp = cache_hold(np->rn_hash);
2540 		nnp = cache_lookup_unlocked(bp, &np->rn_id);
2541 		if (nnp == NULL) {
2542 			cache_release(bp);
2543 			rc_node_clear(npp, 1);
2544 			return (REP_PROTOCOL_FAIL_DELETED);
2545 		}
2546 		/*
2547 		 * grab the lock before dropping the cache bucket, so
2548 		 * that no one else can sneak in
2549 		 */
2550 		(void) pthread_mutex_lock(&nnp->rn_lock);
2551 		cache_release(bp);
2552 
2553 		if (!(nnp->rn_flags & RC_NODE_IN_TX) ||
2554 		    !rc_node_wait_flag(nnp, RC_NODE_IN_TX))
2555 			break;
2556 
2557 		rc_node_rele_locked(nnp);
2558 	}
2559 
2560 	/*
2561 	 * If it is dead, we want to update it so that it will continue to
2562 	 * report being dead.
2563 	 */
2564 	if (nnp->rn_flags & RC_NODE_DEAD) {
2565 		(void) pthread_mutex_unlock(&nnp->rn_lock);
2566 		if (nnp != np && cpg == NULL)
2567 			rc_node_assign(npp, nnp);	/* updated */
2568 		rc_node_rele(nnp);
2569 		return (REP_PROTOCOL_FAIL_DELETED);
2570 	}
2571 
2572 	assert(!(nnp->rn_flags & RC_NODE_OLD));
2573 	(void) pthread_mutex_unlock(&nnp->rn_lock);
2574 
2575 	if (nnp != np && cpg == NULL)
2576 		rc_node_assign(npp, nnp);		/* updated */
2577 
2578 	rc_node_rele(nnp);
2579 
2580 	return ((nnp == np)? REP_PROTOCOL_SUCCESS : REP_PROTOCOL_DONE);
2581 }
2582 
2583 /*
2584  * does a generic modification check, for creation, deletion, and snapshot
2585  * management only.  Property group transactions have different checks.
2586  */
2587 int
2588 rc_node_modify_permission_check(void)
2589 {
2590 	int rc = REP_PROTOCOL_SUCCESS;
2591 	permcheck_t *pcp;
2592 	int granted;
2593 
2594 	if (!client_is_privileged()) {
2595 #ifdef NATIVE_BUILD
2596 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2597 #else
2598 		pcp = pc_create();
2599 		if (pcp != NULL) {
2600 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2601 
2602 			if (rc == REP_PROTOCOL_SUCCESS) {
2603 				granted = perm_granted(pcp);
2604 
2605 				if (granted < 0)
2606 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2607 			}
2608 
2609 			pc_free(pcp);
2610 		} else {
2611 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2612 		}
2613 
2614 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2615 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2616 #endif /* NATIVE_BUILD */
2617 	}
2618 	return (rc);
2619 }
2620 
2621 /*
2622  * Fails with
2623  *   _DELETED - node has been deleted
2624  *   _NOT_SET - npp is reset
2625  *   _NOT_APPLICABLE - type is _PROPERTYGRP
2626  *   _INVALID_TYPE - node is corrupt or type is invalid
2627  *   _TYPE_MISMATCH - node cannot have children of type type
2628  *   _BAD_REQUEST - name is invalid
2629  *		    cannot create children for this type of node
2630  *   _NO_RESOURCES - out of memory, or could not allocate new id
2631  *   _PERMISSION_DENIED
2632  *   _BACKEND_ACCESS
2633  *   _BACKEND_READONLY
2634  *   _EXISTS - child already exists
2635  */
2636 int
2637 rc_node_create_child(rc_node_ptr_t *npp, uint32_t type, const char *name,
2638     rc_node_ptr_t *cpp)
2639 {
2640 	rc_node_t *np;
2641 	rc_node_t *cp = NULL;
2642 	int rc;
2643 
2644 	rc_node_clear(cpp, 0);
2645 
2646 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2647 
2648 	/*
2649 	 * there is a separate interface for creating property groups
2650 	 */
2651 	if (type == REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2652 		(void) pthread_mutex_unlock(&np->rn_lock);
2653 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2654 	}
2655 
2656 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2657 		(void) pthread_mutex_unlock(&np->rn_lock);
2658 		np = np->rn_cchain[0];
2659 		RC_NODE_CHECK_AND_LOCK(np);
2660 	}
2661 
2662 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2663 	    REP_PROTOCOL_SUCCESS) {
2664 		(void) pthread_mutex_unlock(&np->rn_lock);
2665 		return (rc);
2666 	}
2667 	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS) {
2668 		(void) pthread_mutex_unlock(&np->rn_lock);
2669 		return (rc);
2670 	}
2671 
2672 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
2673 		(void) pthread_mutex_unlock(&np->rn_lock);
2674 		return (rc);
2675 	}
2676 
2677 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2678 	(void) pthread_mutex_unlock(&np->rn_lock);
2679 
2680 	rc = object_create(np, type, name, &cp);
2681 	assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2682 
2683 	if (rc == REP_PROTOCOL_SUCCESS) {
2684 		rc_node_assign(cpp, cp);
2685 		rc_node_rele(cp);
2686 	}
2687 
2688 	(void) pthread_mutex_lock(&np->rn_lock);
2689 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2690 	(void) pthread_mutex_unlock(&np->rn_lock);
2691 
2692 	return (rc);
2693 }
2694 
2695 int
2696 rc_node_create_child_pg(rc_node_ptr_t *npp, uint32_t type, const char *name,
2697     const char *pgtype, uint32_t flags, rc_node_ptr_t *cpp)
2698 {
2699 	rc_node_t *np;
2700 	rc_node_t *cp;
2701 	int rc;
2702 	permcheck_t *pcp;
2703 	int granted;
2704 
2705 	rc_node_clear(cpp, 0);
2706 
2707 	/* verify flags is valid */
2708 	if (flags & ~SCF_PG_FLAG_NONPERSISTENT)
2709 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2710 
2711 	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
2712 
2713 	if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2714 		rc_node_rele(np);
2715 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2716 	}
2717 
2718 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2719 	    REP_PROTOCOL_SUCCESS) {
2720 		rc_node_rele(np);
2721 		return (rc);
2722 	}
2723 	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS ||
2724 	    (rc = rc_check_pgtype_name(pgtype)) != REP_PROTOCOL_SUCCESS) {
2725 		rc_node_rele(np);
2726 		return (rc);
2727 	}
2728 
2729 	if (!client_is_privileged()) {
2730 #ifdef NATIVE_BUILD
2731 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2732 #else
2733 		/* Must have .smf.modify or smf.modify.<type> authorization */
2734 		pcp = pc_create();
2735 		if (pcp != NULL) {
2736 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2737 
2738 			if (rc == REP_PROTOCOL_SUCCESS) {
2739 				const char * const auth =
2740 				    perm_auth_for_pgtype(pgtype);
2741 
2742 				if (auth != NULL)
2743 					rc = perm_add_enabling(pcp, auth);
2744 			}
2745 
2746 			/*
2747 			 * .manage or $action_authorization can be used to
2748 			 * create the actions pg and the general_ovr pg.
2749 			 */
2750 			if (rc == REP_PROTOCOL_SUCCESS &&
2751 			    (flags & SCF_PG_FLAG_NONPERSISTENT) != 0 &&
2752 			    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE &&
2753 			    ((strcmp(name, AUTH_PG_ACTIONS) == 0 &&
2754 			    strcmp(pgtype, AUTH_PG_ACTIONS_TYPE) == 0) ||
2755 			    (strcmp(name, AUTH_PG_GENERAL_OVR) == 0 &&
2756 			    strcmp(pgtype, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
2757 				rc = perm_add_enabling(pcp, AUTH_MANAGE);
2758 
2759 				if (rc == REP_PROTOCOL_SUCCESS)
2760 					rc = perm_add_inst_action_auth(pcp, np);
2761 			}
2762 
2763 			if (rc == REP_PROTOCOL_SUCCESS) {
2764 				granted = perm_granted(pcp);
2765 
2766 				if (granted < 0)
2767 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2768 			}
2769 
2770 			pc_free(pcp);
2771 		} else {
2772 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2773 		}
2774 
2775 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2776 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2777 #endif /* NATIVE_BUILD */
2778 
2779 		if (rc != REP_PROTOCOL_SUCCESS) {
2780 			rc_node_rele(np);
2781 			return (rc);
2782 		}
2783 	}
2784 
2785 	(void) pthread_mutex_lock(&np->rn_lock);
2786 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2787 	(void) pthread_mutex_unlock(&np->rn_lock);
2788 
2789 	rc = object_create_pg(np, type, name, pgtype, flags, &cp);
2790 
2791 	if (rc == REP_PROTOCOL_SUCCESS) {
2792 		rc_node_assign(cpp, cp);
2793 		rc_node_rele(cp);
2794 	}
2795 
2796 	(void) pthread_mutex_lock(&np->rn_lock);
2797 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2798 	(void) pthread_mutex_unlock(&np->rn_lock);
2799 
2800 	return (rc);
2801 }
2802 
2803 static void
2804 rc_pg_notify_fire(rc_node_pg_notify_t *pnp)
2805 {
2806 	assert(MUTEX_HELD(&rc_pg_notify_lock));
2807 
2808 	if (pnp->rnpn_pg != NULL) {
2809 		uu_list_remove(pnp->rnpn_pg->rn_pg_notify_list, pnp);
2810 		(void) close(pnp->rnpn_fd);
2811 
2812 		pnp->rnpn_pg = NULL;
2813 		pnp->rnpn_fd = -1;
2814 	} else {
2815 		assert(pnp->rnpn_fd == -1);
2816 	}
2817 }
2818 
2819 static void
2820 rc_notify_node_delete(rc_notify_delete_t *ndp, rc_node_t *np_arg)
2821 {
2822 	rc_node_t *svc = NULL;
2823 	rc_node_t *inst = NULL;
2824 	rc_node_t *pg = NULL;
2825 	rc_node_t *np = np_arg;
2826 	rc_node_t *nnp;
2827 
2828 	while (svc == NULL) {
2829 		(void) pthread_mutex_lock(&np->rn_lock);
2830 		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2831 			(void) pthread_mutex_unlock(&np->rn_lock);
2832 			goto cleanup;
2833 		}
2834 		nnp = np->rn_parent;
2835 		rc_node_hold_locked(np);	/* hold it in place */
2836 
2837 		switch (np->rn_id.rl_type) {
2838 		case REP_PROTOCOL_ENTITY_PROPERTYGRP:
2839 			assert(pg == NULL);
2840 			pg = np;
2841 			break;
2842 		case REP_PROTOCOL_ENTITY_INSTANCE:
2843 			assert(inst == NULL);
2844 			inst = np;
2845 			break;
2846 		case REP_PROTOCOL_ENTITY_SERVICE:
2847 			assert(svc == NULL);
2848 			svc = np;
2849 			break;
2850 		default:
2851 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2852 			rc_node_rele_locked(np);
2853 			goto cleanup;
2854 		}
2855 
2856 		(void) pthread_mutex_unlock(&np->rn_lock);
2857 
2858 		np = nnp;
2859 		if (np == NULL)
2860 			goto cleanup;
2861 	}
2862 
2863 	rc_notify_deletion(ndp,
2864 	    svc->rn_name,
2865 	    inst != NULL ? inst->rn_name : NULL,
2866 	    pg != NULL ? pg->rn_name : NULL);
2867 
2868 	ndp = NULL;
2869 
2870 cleanup:
2871 	if (ndp != NULL)
2872 		uu_free(ndp);
2873 
2874 	for (;;) {
2875 		if (svc != NULL) {
2876 			np = svc;
2877 			svc = NULL;
2878 		} else if (inst != NULL) {
2879 			np = inst;
2880 			inst = NULL;
2881 		} else if (pg != NULL) {
2882 			np = pg;
2883 			pg = NULL;
2884 		} else
2885 			break;
2886 
2887 		(void) pthread_mutex_lock(&np->rn_lock);
2888 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2889 		rc_node_rele_locked(np);
2890 	}
2891 }
2892 
2893 /*
2894  * N.B.:  this function drops np->rn_lock on the way out.
2895  */
2896 static void
2897 rc_node_delete_hold(rc_node_t *np, int andformer)
2898 {
2899 	rc_node_t *cp;
2900 
2901 again:
2902 	assert(MUTEX_HELD(&np->rn_lock));
2903 	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
2904 
2905 	for (cp = uu_list_first(np->rn_children); cp != NULL;
2906 	    cp = uu_list_next(np->rn_children, cp)) {
2907 		(void) pthread_mutex_lock(&cp->rn_lock);
2908 		(void) pthread_mutex_unlock(&np->rn_lock);
2909 		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS)) {
2910 			/*
2911 			 * already marked as dead -- can't happen, since that
2912 			 * would require setting RC_NODE_CHILDREN_CHANGING
2913 			 * in np, and we're holding that...
2914 			 */
2915 			abort();
2916 		}
2917 		rc_node_delete_hold(cp, andformer);	/* recurse, drop lock */
2918 
2919 		(void) pthread_mutex_lock(&np->rn_lock);
2920 	}
2921 	if (andformer && (cp = np->rn_former) != NULL) {
2922 		(void) pthread_mutex_lock(&cp->rn_lock);
2923 		(void) pthread_mutex_unlock(&np->rn_lock);
2924 		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS))
2925 			abort();		/* can't happen, see above */
2926 		np = cp;
2927 		goto again;		/* tail-recurse down rn_former */
2928 	}
2929 	(void) pthread_mutex_unlock(&np->rn_lock);
2930 }
2931 
2932 /*
2933  * N.B.:  this function drops np->rn_lock on the way out.
2934  */
2935 static void
2936 rc_node_delete_rele(rc_node_t *np, int andformer)
2937 {
2938 	rc_node_t *cp;
2939 
2940 again:
2941 	assert(MUTEX_HELD(&np->rn_lock));
2942 	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
2943 
2944 	for (cp = uu_list_first(np->rn_children); cp != NULL;
2945 	    cp = uu_list_next(np->rn_children, cp)) {
2946 		(void) pthread_mutex_lock(&cp->rn_lock);
2947 		(void) pthread_mutex_unlock(&np->rn_lock);
2948 		rc_node_delete_rele(cp, andformer);	/* recurse, drop lock */
2949 		(void) pthread_mutex_lock(&np->rn_lock);
2950 	}
2951 	if (andformer && (cp = np->rn_former) != NULL) {
2952 		(void) pthread_mutex_lock(&cp->rn_lock);
2953 		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
2954 		(void) pthread_mutex_unlock(&np->rn_lock);
2955 
2956 		np = cp;
2957 		goto again;		/* tail-recurse down rn_former */
2958 	}
2959 	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
2960 	(void) pthread_mutex_unlock(&np->rn_lock);
2961 }
2962 
2963 static void
2964 rc_node_finish_delete(rc_node_t *cp)
2965 {
2966 	cache_bucket_t *bp;
2967 	rc_node_pg_notify_t *pnp;
2968 
2969 	assert(MUTEX_HELD(&cp->rn_lock));
2970 
2971 	if (!(cp->rn_flags & RC_NODE_OLD)) {
2972 		assert(cp->rn_flags & RC_NODE_IN_PARENT);
2973 		if (!rc_node_wait_flag(cp, RC_NODE_USING_PARENT)) {
2974 			abort();		/* can't happen, see above */
2975 		}
2976 		cp->rn_flags &= ~RC_NODE_IN_PARENT;
2977 		cp->rn_parent = NULL;
2978 	}
2979 
2980 	cp->rn_flags |= RC_NODE_DEAD;
2981 
2982 	/*
2983 	 * If this node is not out-dated, we need to remove it from
2984 	 * the notify list and cache hash table.
2985 	 */
2986 	if (!(cp->rn_flags & RC_NODE_OLD)) {
2987 		assert(cp->rn_refs > 0);	/* can't go away yet */
2988 		(void) pthread_mutex_unlock(&cp->rn_lock);
2989 
2990 		(void) pthread_mutex_lock(&rc_pg_notify_lock);
2991 		while ((pnp = uu_list_first(cp->rn_pg_notify_list)) != NULL)
2992 			rc_pg_notify_fire(pnp);
2993 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
2994 		rc_notify_remove_node(cp);
2995 
2996 		bp = cache_hold(cp->rn_hash);
2997 		(void) pthread_mutex_lock(&cp->rn_lock);
2998 		cache_remove_unlocked(bp, cp);
2999 		cache_release(bp);
3000 	}
3001 }
3002 
3003 /*
3004  * N.B.:  this function drops np->rn_lock and a reference on the way out.
3005  */
3006 static void
3007 rc_node_delete_children(rc_node_t *np, int andformer)
3008 {
3009 	rc_node_t *cp;
3010 
3011 again:
3012 	assert(np->rn_refs > 0);
3013 	assert(MUTEX_HELD(&np->rn_lock));
3014 	assert(np->rn_flags & RC_NODE_DEAD);
3015 
3016 	while ((cp = uu_list_first(np->rn_children)) != NULL) {
3017 		uu_list_remove(np->rn_children, cp);
3018 		(void) pthread_mutex_lock(&cp->rn_lock);
3019 		(void) pthread_mutex_unlock(&np->rn_lock);
3020 		rc_node_hold_locked(cp);	/* hold while we recurse */
3021 		rc_node_finish_delete(cp);
3022 		rc_node_delete_children(cp, andformer);	/* drops lock + ref */
3023 		(void) pthread_mutex_lock(&np->rn_lock);
3024 	}
3025 
3026 	/*
3027 	 * when we drop cp's lock, all the children will be gone, so we
3028 	 * can release DYING_FLAGS.
3029 	 */
3030 	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3031 	if (andformer && (cp = np->rn_former) != NULL) {
3032 		np->rn_former = NULL;		/* unlink */
3033 		(void) pthread_mutex_lock(&cp->rn_lock);
3034 		(void) pthread_mutex_unlock(&np->rn_lock);
3035 		np->rn_flags &= ~RC_NODE_ON_FORMER;
3036 
3037 		rc_node_hold_locked(cp);	/* hold while we loop */
3038 
3039 		rc_node_finish_delete(cp);
3040 
3041 		rc_node_rele(np);		/* drop the old reference */
3042 
3043 		np = cp;
3044 		goto again;		/* tail-recurse down rn_former */
3045 	}
3046 	rc_node_rele_locked(np);
3047 }
3048 
3049 static void
3050 rc_node_unrefed(rc_node_t *np)
3051 {
3052 	int unrefed;
3053 	rc_node_t *pp, *cur;
3054 
3055 	assert(MUTEX_HELD(&np->rn_lock));
3056 	assert(np->rn_refs == 0);
3057 	assert(np->rn_other_refs == 0);
3058 	assert(np->rn_other_refs_held == 0);
3059 
3060 	if (np->rn_flags & RC_NODE_DEAD) {
3061 		(void) pthread_mutex_unlock(&np->rn_lock);
3062 		rc_node_destroy(np);
3063 		return;
3064 	}
3065 
3066 	assert(np->rn_flags & RC_NODE_OLD);
3067 	if (np->rn_flags & RC_NODE_UNREFED) {
3068 		(void) pthread_mutex_unlock(&np->rn_lock);
3069 		return;
3070 	}
3071 	np->rn_flags |= RC_NODE_UNREFED;
3072 
3073 	(void) pthread_mutex_unlock(&np->rn_lock);
3074 
3075 	/*
3076 	 * find the current in-hash object, and grab it's RC_NODE_IN_TX
3077 	 * flag.  That protects the entire rn_former chain.
3078 	 */
3079 	for (;;) {
3080 		pp = cache_lookup(&np->rn_id);
3081 		if (pp == NULL) {
3082 			(void) pthread_mutex_lock(&np->rn_lock);
3083 			if (np->rn_flags & RC_NODE_DEAD)
3084 				goto died;
3085 			/*
3086 			 * We are trying to unreference this node, but the
3087 			 * owner of the former list does not exist.  It must
3088 			 * be the case that another thread is deleting this
3089 			 * entire sub-branch, but has not yet reached us.
3090 			 * We will in short order be deleted.
3091 			 */
3092 			np->rn_flags &= ~RC_NODE_UNREFED;
3093 			(void) pthread_mutex_unlock(&np->rn_lock);
3094 			return;
3095 		}
3096 		if (pp == np) {
3097 			/*
3098 			 * no longer unreferenced
3099 			 */
3100 			(void) pthread_mutex_lock(&np->rn_lock);
3101 			np->rn_flags &= ~RC_NODE_UNREFED;
3102 			rc_node_rele_locked(np);
3103 			return;
3104 		}
3105 		(void) pthread_mutex_lock(&pp->rn_lock);
3106 		if ((pp->rn_flags & RC_NODE_OLD) ||
3107 		    !rc_node_hold_flag(pp, RC_NODE_IN_TX)) {
3108 			rc_node_rele_locked(pp);
3109 			continue;
3110 		}
3111 		if (!(pp->rn_flags & RC_NODE_OLD)) {
3112 			(void) pthread_mutex_unlock(&pp->rn_lock);
3113 			break;
3114 		}
3115 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3116 		rc_node_rele_locked(pp);
3117 	}
3118 
3119 	(void) pthread_mutex_lock(&np->rn_lock);
3120 	if (!(np->rn_flags & (RC_NODE_OLD | RC_NODE_DEAD)) ||
3121 	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3122 	    np->rn_other_refs_held != 0) {
3123 		np->rn_flags &= ~RC_NODE_UNREFED;
3124 		(void) pthread_mutex_lock(&pp->rn_lock);
3125 
3126 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3127 		rc_node_rele_locked(pp);
3128 		return;
3129 	}
3130 
3131 	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3132 		(void) pthread_mutex_unlock(&np->rn_lock);
3133 
3134 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3135 		rc_node_rele_locked(pp);
3136 
3137 		(void) pthread_mutex_lock(&np->rn_lock);
3138 		goto died;
3139 	}
3140 
3141 	rc_node_delete_hold(np, 0);
3142 
3143 	(void) pthread_mutex_lock(&np->rn_lock);
3144 	if (!(np->rn_flags & RC_NODE_OLD) ||
3145 	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3146 	    np->rn_other_refs_held != 0) {
3147 		np->rn_flags &= ~RC_NODE_UNREFED;
3148 		rc_node_delete_rele(np, 0);
3149 
3150 		(void) pthread_mutex_lock(&pp->rn_lock);
3151 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3152 		rc_node_rele_locked(pp);
3153 		return;
3154 	}
3155 
3156 	np->rn_flags |= RC_NODE_DEAD;
3157 	rc_node_hold_locked(np);
3158 	rc_node_delete_children(np, 0);
3159 
3160 	/*
3161 	 * It's gone -- remove it from the former chain and destroy it.
3162 	 */
3163 	(void) pthread_mutex_lock(&pp->rn_lock);
3164 	for (cur = pp; cur != NULL && cur->rn_former != np;
3165 	    cur = cur->rn_former)
3166 		;
3167 	assert(cur != NULL && cur != np);
3168 
3169 	cur->rn_former = np->rn_former;
3170 	np->rn_former = NULL;
3171 
3172 	rc_node_rele_flag(pp, RC_NODE_IN_TX);
3173 	rc_node_rele_locked(pp);
3174 
3175 	(void) pthread_mutex_lock(&np->rn_lock);
3176 	assert(np->rn_flags & RC_NODE_ON_FORMER);
3177 	np->rn_flags &= ~(RC_NODE_UNREFED | RC_NODE_ON_FORMER);
3178 	(void) pthread_mutex_unlock(&np->rn_lock);
3179 	rc_node_destroy(np);
3180 	return;
3181 
3182 died:
3183 	np->rn_flags &= ~RC_NODE_UNREFED;
3184 	unrefed = (np->rn_refs == 0 && np->rn_other_refs == 0 &&
3185 	    np->rn_other_refs_held == 0);
3186 	(void) pthread_mutex_unlock(&np->rn_lock);
3187 	if (unrefed)
3188 		rc_node_destroy(np);
3189 }
3190 
3191 /*
3192  * Fails with
3193  *   _NOT_SET
3194  *   _DELETED
3195  *   _BAD_REQUEST
3196  *   _PERMISSION_DENIED
3197  *   _NO_RESOURCES
3198  * and whatever object_delete() fails with.
3199  */
3200 int
3201 rc_node_delete(rc_node_ptr_t *npp)
3202 {
3203 	rc_node_t *np, *np_orig;
3204 	rc_node_t *pp = NULL;
3205 	int rc;
3206 	rc_node_pg_notify_t *pnp;
3207 	cache_bucket_t *bp;
3208 	rc_notify_delete_t *ndp;
3209 	permcheck_t *pcp;
3210 	int granted;
3211 
3212 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3213 
3214 	switch (np->rn_id.rl_type) {
3215 	case REP_PROTOCOL_ENTITY_SERVICE:
3216 	case REP_PROTOCOL_ENTITY_INSTANCE:
3217 	case REP_PROTOCOL_ENTITY_SNAPSHOT:
3218 		break;			/* deletable */
3219 
3220 	case REP_PROTOCOL_ENTITY_SCOPE:
3221 	case REP_PROTOCOL_ENTITY_SNAPLEVEL:
3222 		/* Scopes and snaplevels are indelible. */
3223 		(void) pthread_mutex_unlock(&np->rn_lock);
3224 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3225 
3226 	case REP_PROTOCOL_ENTITY_CPROPERTYGRP:
3227 		(void) pthread_mutex_unlock(&np->rn_lock);
3228 		np = np->rn_cchain[0];
3229 		RC_NODE_CHECK_AND_LOCK(np);
3230 		break;
3231 
3232 	case REP_PROTOCOL_ENTITY_PROPERTYGRP:
3233 		if (np->rn_id.rl_ids[ID_SNAPSHOT] == 0)
3234 			break;
3235 
3236 		/* Snapshot property groups are indelible. */
3237 		(void) pthread_mutex_unlock(&np->rn_lock);
3238 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
3239 
3240 	case REP_PROTOCOL_ENTITY_PROPERTY:
3241 		(void) pthread_mutex_unlock(&np->rn_lock);
3242 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3243 
3244 	default:
3245 		assert(0);
3246 		abort();
3247 		break;
3248 	}
3249 
3250 	np_orig = np;
3251 	rc_node_hold_locked(np);	/* simplifies rest of the code */
3252 
3253 again:
3254 	/*
3255 	 * The following loop is to deal with the fact that snapshots and
3256 	 * property groups are moving targets -- changes to them result
3257 	 * in a new "child" node.  Since we can only delete from the top node,
3258 	 * we have to loop until we have a non-RC_NODE_OLD version.
3259 	 */
3260 	for (;;) {
3261 		if (!rc_node_wait_flag(np,
3262 		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
3263 			rc_node_rele_locked(np);
3264 			return (REP_PROTOCOL_FAIL_DELETED);
3265 		}
3266 
3267 		if (np->rn_flags & RC_NODE_OLD) {
3268 			rc_node_rele_locked(np);
3269 			np = cache_lookup(&np_orig->rn_id);
3270 			assert(np != np_orig);
3271 
3272 			if (np == NULL) {
3273 				rc = REP_PROTOCOL_FAIL_DELETED;
3274 				goto fail;
3275 			}
3276 			(void) pthread_mutex_lock(&np->rn_lock);
3277 			continue;
3278 		}
3279 
3280 		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3281 			rc_node_rele_locked(np);
3282 			rc_node_clear(npp, 1);
3283 			return (REP_PROTOCOL_FAIL_DELETED);
3284 		}
3285 
3286 		/*
3287 		 * Mark our parent as children changing.  this call drops our
3288 		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3289 		 * pp's lock held
3290 		 */
3291 		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3292 		if (pp == NULL) {
3293 			/* our parent is gone, we're going next... */
3294 			rc_node_rele(np);
3295 
3296 			rc_node_clear(npp, 1);
3297 			return (REP_PROTOCOL_FAIL_DELETED);
3298 		}
3299 
3300 		rc_node_hold_locked(pp);		/* hold for later */
3301 		(void) pthread_mutex_unlock(&pp->rn_lock);
3302 
3303 		(void) pthread_mutex_lock(&np->rn_lock);
3304 		if (!(np->rn_flags & RC_NODE_OLD))
3305 			break;			/* not old -- we're done */
3306 
3307 		(void) pthread_mutex_unlock(&np->rn_lock);
3308 		(void) pthread_mutex_lock(&pp->rn_lock);
3309 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3310 		rc_node_rele_locked(pp);
3311 		(void) pthread_mutex_lock(&np->rn_lock);
3312 		continue;			/* loop around and try again */
3313 	}
3314 	/*
3315 	 * Everyone out of the pool -- we grab everything but
3316 	 * RC_NODE_USING_PARENT (including RC_NODE_DYING) to keep
3317 	 * any changes from occurring while we are attempting to
3318 	 * delete the node.
3319 	 */
3320 	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3321 		(void) pthread_mutex_unlock(&np->rn_lock);
3322 		rc = REP_PROTOCOL_FAIL_DELETED;
3323 		goto fail;
3324 	}
3325 
3326 	assert(!(np->rn_flags & RC_NODE_OLD));
3327 
3328 	if (!client_is_privileged()) {
3329 		/* permission check */
3330 		(void) pthread_mutex_unlock(&np->rn_lock);
3331 
3332 #ifdef NATIVE_BUILD
3333 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3334 #else
3335 		pcp = pc_create();
3336 		if (pcp != NULL) {
3337 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
3338 
3339 			/* add .smf.modify.<type> for pgs. */
3340 			if (rc == REP_PROTOCOL_SUCCESS && np->rn_id.rl_type ==
3341 			    REP_PROTOCOL_ENTITY_PROPERTYGRP) {
3342 				const char * const auth =
3343 				    perm_auth_for_pgtype(np->rn_type);
3344 
3345 				if (auth != NULL)
3346 					rc = perm_add_enabling(pcp, auth);
3347 			}
3348 
3349 			if (rc == REP_PROTOCOL_SUCCESS) {
3350 				granted = perm_granted(pcp);
3351 
3352 				if (granted < 0)
3353 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3354 			}
3355 
3356 			pc_free(pcp);
3357 		} else {
3358 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3359 		}
3360 
3361 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
3362 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3363 #endif /* NATIVE_BUILD */
3364 
3365 		if (rc != REP_PROTOCOL_SUCCESS) {
3366 			(void) pthread_mutex_lock(&np->rn_lock);
3367 			rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3368 			(void) pthread_mutex_unlock(&np->rn_lock);
3369 			goto fail;
3370 		}
3371 
3372 		(void) pthread_mutex_lock(&np->rn_lock);
3373 	}
3374 
3375 	ndp = uu_zalloc(sizeof (*ndp));
3376 	if (ndp == NULL) {
3377 		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3378 		(void) pthread_mutex_unlock(&np->rn_lock);
3379 		rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3380 		goto fail;
3381 	}
3382 
3383 	rc_node_delete_hold(np, 1);	/* hold entire subgraph, drop lock */
3384 
3385 	rc = object_delete(np);
3386 
3387 	if (rc != REP_PROTOCOL_SUCCESS) {
3388 		(void) pthread_mutex_lock(&np->rn_lock);
3389 		rc_node_delete_rele(np, 1);		/* drops lock */
3390 		uu_free(ndp);
3391 		goto fail;
3392 	}
3393 
3394 	/*
3395 	 * Now, delicately unlink and delete the object.
3396 	 *
3397 	 * Create the delete notification, atomically remove
3398 	 * from the hash table and set the NODE_DEAD flag, and
3399 	 * remove from the parent's children list.
3400 	 */
3401 	rc_notify_node_delete(ndp, np); /* frees or uses ndp */
3402 
3403 	bp = cache_hold(np->rn_hash);
3404 
3405 	(void) pthread_mutex_lock(&np->rn_lock);
3406 	cache_remove_unlocked(bp, np);
3407 	cache_release(bp);
3408 
3409 	np->rn_flags |= RC_NODE_DEAD;
3410 	if (pp != NULL) {
3411 		(void) pthread_mutex_unlock(&np->rn_lock);
3412 
3413 		(void) pthread_mutex_lock(&pp->rn_lock);
3414 		(void) pthread_mutex_lock(&np->rn_lock);
3415 		uu_list_remove(pp->rn_children, np);
3416 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3417 		(void) pthread_mutex_unlock(&pp->rn_lock);
3418 		np->rn_flags &= ~RC_NODE_IN_PARENT;
3419 	}
3420 	/*
3421 	 * finally, propagate death to our children, handle notifications,
3422 	 * and release our hold.
3423 	 */
3424 	rc_node_hold_locked(np);	/* hold for delete */
3425 	rc_node_delete_children(np, 1);	/* drops DYING_FLAGS, lock, ref */
3426 
3427 	rc_node_clear(npp, 1);
3428 
3429 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
3430 	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
3431 		rc_pg_notify_fire(pnp);
3432 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
3433 	rc_notify_remove_node(np);
3434 
3435 	rc_node_rele(np);
3436 
3437 	return (rc);
3438 
3439 fail:
3440 	rc_node_rele(np);
3441 	if (rc == REP_PROTOCOL_FAIL_DELETED)
3442 		rc_node_clear(npp, 1);
3443 	if (pp != NULL) {
3444 		(void) pthread_mutex_lock(&pp->rn_lock);
3445 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3446 		rc_node_rele_locked(pp);	/* drop ref and lock */
3447 	}
3448 	return (rc);
3449 }
3450 
3451 int
3452 rc_node_next_snaplevel(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3453 {
3454 	rc_node_t *np;
3455 	rc_node_t *cp, *pp;
3456 	int res;
3457 
3458 	rc_node_clear(cpp, 0);
3459 
3460 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3461 
3462 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT &&
3463 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL) {
3464 		(void) pthread_mutex_unlock(&np->rn_lock);
3465 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
3466 	}
3467 
3468 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
3469 		if ((res = rc_node_fill_children(np,
3470 		    REP_PROTOCOL_ENTITY_SNAPLEVEL)) != REP_PROTOCOL_SUCCESS) {
3471 			(void) pthread_mutex_unlock(&np->rn_lock);
3472 			return (res);
3473 		}
3474 
3475 		for (cp = uu_list_first(np->rn_children);
3476 		    cp != NULL;
3477 		    cp = uu_list_next(np->rn_children, cp)) {
3478 			if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3479 				continue;
3480 			rc_node_hold(cp);
3481 			break;
3482 		}
3483 
3484 		(void) pthread_mutex_unlock(&np->rn_lock);
3485 	} else {
3486 		HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_USING_PARENT);
3487 		/*
3488 		 * mark our parent as children changing.  This call drops our
3489 		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3490 		 * pp's lock held
3491 		 */
3492 		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3493 		if (pp == NULL) {
3494 			/* our parent is gone, we're going next... */
3495 
3496 			rc_node_clear(npp, 1);
3497 			return (REP_PROTOCOL_FAIL_DELETED);
3498 		}
3499 
3500 		/*
3501 		 * find the next snaplevel
3502 		 */
3503 		cp = np;
3504 		while ((cp = uu_list_next(pp->rn_children, cp)) != NULL &&
3505 		    cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3506 			;
3507 
3508 		/* it must match the snaplevel list */
3509 		assert((cp == NULL && np->rn_snaplevel->rsl_next == NULL) ||
3510 		    (cp != NULL && np->rn_snaplevel->rsl_next ==
3511 		    cp->rn_snaplevel));
3512 
3513 		if (cp != NULL)
3514 			rc_node_hold(cp);
3515 
3516 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3517 
3518 		(void) pthread_mutex_unlock(&pp->rn_lock);
3519 	}
3520 
3521 	rc_node_assign(cpp, cp);
3522 	if (cp != NULL) {
3523 		rc_node_rele(cp);
3524 
3525 		return (REP_PROTOCOL_SUCCESS);
3526 	}
3527 	return (REP_PROTOCOL_FAIL_NOT_FOUND);
3528 }
3529 
3530 /*
3531  * This call takes a snapshot (np) and either:
3532  *	an existing snapid (to be associated with np), or
3533  *	a non-NULL parentp (from which a new snapshot is taken, and associated
3534  *	    with np)
3535  *
3536  * To do the association, np is duplicated, the duplicate is made to
3537  * represent the new snapid, and np is replaced with the new rc_node_t on
3538  * np's parent's child list. np is placed on the new node's rn_former list,
3539  * and replaces np in cache_hash (so rc_node_update() will find the new one).
3540  */
3541 static int
3542 rc_attach_snapshot(rc_node_t *np, uint32_t snapid, rc_node_t *parentp)
3543 {
3544 	rc_node_t *np_orig;
3545 	rc_node_t *nnp, *prev;
3546 	rc_node_t *pp;
3547 	int rc;
3548 
3549 	if (parentp != NULL)
3550 		assert(snapid == 0);
3551 
3552 	assert(MUTEX_HELD(&np->rn_lock));
3553 
3554 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
3555 		(void) pthread_mutex_unlock(&np->rn_lock);
3556 		return (rc);
3557 	}
3558 
3559 	np_orig = np;
3560 	rc_node_hold_locked(np);		/* simplifies the remainder */
3561 
3562 	/*
3563 	 * get the latest node, holding RC_NODE_IN_TX to keep the rn_former
3564 	 * list from changing.
3565 	 */
3566 	for (;;) {
3567 		if (!(np->rn_flags & RC_NODE_OLD)) {
3568 			if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3569 				goto again;
3570 			}
3571 			pp = rc_node_hold_parent_flag(np,
3572 			    RC_NODE_CHILDREN_CHANGING);
3573 
3574 			(void) pthread_mutex_lock(&np->rn_lock);
3575 			if (pp == NULL) {
3576 				goto again;
3577 			}
3578 			if (np->rn_flags & RC_NODE_OLD) {
3579 				rc_node_rele_flag(pp,
3580 				    RC_NODE_CHILDREN_CHANGING);
3581 				(void) pthread_mutex_unlock(&pp->rn_lock);
3582 				goto again;
3583 			}
3584 			(void) pthread_mutex_unlock(&pp->rn_lock);
3585 
3586 			if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
3587 				/*
3588 				 * Can't happen, since we're holding our
3589 				 * parent's CHILDREN_CHANGING flag...
3590 				 */
3591 				abort();
3592 			}
3593 			break;			/* everything's ready */
3594 		}
3595 again:
3596 		rc_node_rele_locked(np);
3597 		np = cache_lookup(&np_orig->rn_id);
3598 
3599 		if (np == NULL)
3600 			return (REP_PROTOCOL_FAIL_DELETED);
3601 
3602 		(void) pthread_mutex_lock(&np->rn_lock);
3603 	}
3604 
3605 	if (parentp != NULL) {
3606 		if (pp != parentp) {
3607 			rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
3608 			goto fail;
3609 		}
3610 		nnp = NULL;
3611 	} else {
3612 		/*
3613 		 * look for a former node with the snapid we need.
3614 		 */
3615 		if (np->rn_snapshot_id == snapid) {
3616 			rc_node_rele_flag(np, RC_NODE_IN_TX);
3617 			rc_node_rele_locked(np);
3618 
3619 			(void) pthread_mutex_lock(&pp->rn_lock);
3620 			rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3621 			(void) pthread_mutex_unlock(&pp->rn_lock);
3622 			return (REP_PROTOCOL_SUCCESS);	/* nothing to do */
3623 		}
3624 
3625 		prev = np;
3626 		while ((nnp = prev->rn_former) != NULL) {
3627 			if (nnp->rn_snapshot_id == snapid) {
3628 				rc_node_hold(nnp);
3629 				break;		/* existing node with that id */
3630 			}
3631 			prev = nnp;
3632 		}
3633 	}
3634 
3635 	if (nnp == NULL) {
3636 		prev = NULL;
3637 		nnp = rc_node_alloc();
3638 		if (nnp == NULL) {
3639 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3640 			goto fail;
3641 		}
3642 
3643 		nnp->rn_id = np->rn_id;		/* structure assignment */
3644 		nnp->rn_hash = np->rn_hash;
3645 		nnp->rn_name = strdup(np->rn_name);
3646 		nnp->rn_snapshot_id = snapid;
3647 		nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
3648 
3649 		if (nnp->rn_name == NULL) {
3650 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3651 			goto fail;
3652 		}
3653 	}
3654 
3655 	(void) pthread_mutex_unlock(&np->rn_lock);
3656 
3657 	rc = object_snapshot_attach(&np->rn_id, &snapid, (parentp != NULL));
3658 
3659 	if (parentp != NULL)
3660 		nnp->rn_snapshot_id = snapid;	/* fill in new snapid */
3661 	else
3662 		assert(nnp->rn_snapshot_id == snapid);
3663 
3664 	(void) pthread_mutex_lock(&np->rn_lock);
3665 	if (rc != REP_PROTOCOL_SUCCESS)
3666 		goto fail;
3667 
3668 	/*
3669 	 * fix up the former chain
3670 	 */
3671 	if (prev != NULL) {
3672 		prev->rn_former = nnp->rn_former;
3673 		(void) pthread_mutex_lock(&nnp->rn_lock);
3674 		nnp->rn_flags &= ~RC_NODE_ON_FORMER;
3675 		nnp->rn_former = NULL;
3676 		(void) pthread_mutex_unlock(&nnp->rn_lock);
3677 	}
3678 	np->rn_flags |= RC_NODE_OLD;
3679 	(void) pthread_mutex_unlock(&np->rn_lock);
3680 
3681 	/*
3682 	 * replace np with nnp
3683 	 */
3684 	rc_node_relink_child(pp, np, nnp);
3685 
3686 	rc_node_rele(np);
3687 
3688 	return (REP_PROTOCOL_SUCCESS);
3689 
3690 fail:
3691 	rc_node_rele_flag(np, RC_NODE_IN_TX);
3692 	rc_node_rele_locked(np);
3693 	(void) pthread_mutex_lock(&pp->rn_lock);
3694 	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3695 	(void) pthread_mutex_unlock(&pp->rn_lock);
3696 
3697 	if (nnp != NULL) {
3698 		if (prev == NULL)
3699 			rc_node_destroy(nnp);
3700 		else
3701 			rc_node_rele(nnp);
3702 	}
3703 
3704 	return (rc);
3705 }
3706 
3707 int
3708 rc_snapshot_take_new(rc_node_ptr_t *npp, const char *svcname,
3709     const char *instname, const char *name, rc_node_ptr_t *outpp)
3710 {
3711 	rc_node_t *np;
3712 	rc_node_t *outp = NULL;
3713 	int rc;
3714 
3715 	rc_node_clear(outpp, 0);
3716 
3717 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3718 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3719 		(void) pthread_mutex_unlock(&np->rn_lock);
3720 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3721 	}
3722 
3723 	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_SNAPSHOT, name);
3724 	if (rc != REP_PROTOCOL_SUCCESS) {
3725 		(void) pthread_mutex_unlock(&np->rn_lock);
3726 		return (rc);
3727 	}
3728 
3729 	if (svcname != NULL && (rc =
3730 	    rc_check_type_name(REP_PROTOCOL_ENTITY_SERVICE, svcname)) !=
3731 	    REP_PROTOCOL_SUCCESS) {
3732 		(void) pthread_mutex_unlock(&np->rn_lock);
3733 		return (rc);
3734 	}
3735 
3736 	if (instname != NULL && (rc =
3737 	    rc_check_type_name(REP_PROTOCOL_ENTITY_INSTANCE, instname)) !=
3738 	    REP_PROTOCOL_SUCCESS) {
3739 		(void) pthread_mutex_unlock(&np->rn_lock);
3740 		return (rc);
3741 	}
3742 
3743 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
3744 		(void) pthread_mutex_unlock(&np->rn_lock);
3745 		return (rc);
3746 	}
3747 
3748 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
3749 	(void) pthread_mutex_unlock(&np->rn_lock);
3750 
3751 	rc = object_snapshot_take_new(np, svcname, instname, name, &outp);
3752 
3753 	if (rc == REP_PROTOCOL_SUCCESS) {
3754 		rc_node_assign(outpp, outp);
3755 		rc_node_rele(outp);
3756 	}
3757 
3758 	(void) pthread_mutex_lock(&np->rn_lock);
3759 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
3760 	(void) pthread_mutex_unlock(&np->rn_lock);
3761 
3762 	return (rc);
3763 }
3764 
3765 int
3766 rc_snapshot_take_attach(rc_node_ptr_t *npp, rc_node_ptr_t *outpp)
3767 {
3768 	rc_node_t *np, *outp;
3769 
3770 	RC_NODE_PTR_GET_CHECK(np, npp);
3771 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3772 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3773 	}
3774 
3775 	RC_NODE_PTR_GET_CHECK_AND_LOCK(outp, outpp);
3776 	if (outp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3777 		(void) pthread_mutex_unlock(&outp->rn_lock);
3778 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3779 	}
3780 
3781 	return (rc_attach_snapshot(outp, 0, np));	/* drops outp's lock */
3782 }
3783 
3784 int
3785 rc_snapshot_attach(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3786 {
3787 	rc_node_t *np;
3788 	rc_node_t *cp;
3789 	uint32_t snapid;
3790 
3791 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3792 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3793 		(void) pthread_mutex_unlock(&np->rn_lock);
3794 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3795 	}
3796 	snapid = np->rn_snapshot_id;
3797 	(void) pthread_mutex_unlock(&np->rn_lock);
3798 
3799 	RC_NODE_PTR_GET_CHECK_AND_LOCK(cp, cpp);
3800 	if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3801 		(void) pthread_mutex_unlock(&cp->rn_lock);
3802 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3803 	}
3804 
3805 	return (rc_attach_snapshot(cp, snapid, NULL));	/* drops cp's lock */
3806 }
3807 
3808 /*
3809  * Iteration
3810  */
3811 static int
3812 rc_iter_filter_name(rc_node_t *np, void *s)
3813 {
3814 	const char *name = s;
3815 
3816 	return (strcmp(np->rn_name, name) == 0);
3817 }
3818 
3819 static int
3820 rc_iter_filter_type(rc_node_t *np, void *s)
3821 {
3822 	const char *type = s;
3823 
3824 	return (np->rn_type != NULL && strcmp(np->rn_type, type) == 0);
3825 }
3826 
3827 /*ARGSUSED*/
3828 static int
3829 rc_iter_null_filter(rc_node_t *np, void *s)
3830 {
3831 	return (1);
3832 }
3833 
3834 /*
3835  * Allocate & initialize an rc_node_iter_t structure.  Essentially, ensure
3836  * np->rn_children is populated and call uu_list_walk_start(np->rn_children).
3837  * If successful, leaves a hold on np & increments np->rn_other_refs
3838  *
3839  * If composed is true, then set up for iteration across the top level of np's
3840  * composition chain.  If successful, leaves a hold on np and increments
3841  * rn_other_refs for the top level of np's composition chain.
3842  *
3843  * Fails with
3844  *   _NO_RESOURCES
3845  *   _INVALID_TYPE
3846  *   _TYPE_MISMATCH - np cannot carry type children
3847  *   _DELETED
3848  */
3849 static int
3850 rc_iter_create(rc_node_iter_t **resp, rc_node_t *np, uint32_t type,
3851     rc_iter_filter_func *filter, void *arg, boolean_t composed)
3852 {
3853 	rc_node_iter_t *nip;
3854 	int res;
3855 
3856 	assert(*resp == NULL);
3857 
3858 	nip = uu_zalloc(sizeof (*nip));
3859 	if (nip == NULL)
3860 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
3861 
3862 	/* np is held by the client's rc_node_ptr_t */
3863 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
3864 		composed = 1;
3865 
3866 	if (!composed) {
3867 		(void) pthread_mutex_lock(&np->rn_lock);
3868 
3869 		if ((res = rc_node_fill_children(np, type)) !=
3870 		    REP_PROTOCOL_SUCCESS) {
3871 			(void) pthread_mutex_unlock(&np->rn_lock);
3872 			uu_free(nip);
3873 			return (res);
3874 		}
3875 
3876 		nip->rni_clevel = -1;
3877 
3878 		nip->rni_iter = uu_list_walk_start(np->rn_children,
3879 		    UU_WALK_ROBUST);
3880 		if (nip->rni_iter != NULL) {
3881 			nip->rni_iter_node = np;
3882 			rc_node_hold_other(np);
3883 		} else {
3884 			(void) pthread_mutex_unlock(&np->rn_lock);
3885 			uu_free(nip);
3886 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
3887 		}
3888 		(void) pthread_mutex_unlock(&np->rn_lock);
3889 	} else {
3890 		rc_node_t *ent;
3891 
3892 		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
3893 			/* rn_cchain isn't valid until children are loaded. */
3894 			(void) pthread_mutex_lock(&np->rn_lock);
3895 			res = rc_node_fill_children(np,
3896 			    REP_PROTOCOL_ENTITY_SNAPLEVEL);
3897 			(void) pthread_mutex_unlock(&np->rn_lock);
3898 			if (res != REP_PROTOCOL_SUCCESS) {
3899 				uu_free(nip);
3900 				return (res);
3901 			}
3902 
3903 			/* Check for an empty snapshot. */
3904 			if (np->rn_cchain[0] == NULL)
3905 				goto empty;
3906 		}
3907 
3908 		/* Start at the top of the composition chain. */
3909 		for (nip->rni_clevel = 0; ; ++nip->rni_clevel) {
3910 			if (nip->rni_clevel >= COMPOSITION_DEPTH) {
3911 				/* Empty composition chain. */
3912 empty:
3913 				nip->rni_clevel = -1;
3914 				nip->rni_iter = NULL;
3915 				/* It's ok, iter_next() will return _DONE. */
3916 				goto out;
3917 			}
3918 
3919 			ent = np->rn_cchain[nip->rni_clevel];
3920 			assert(ent != NULL);
3921 
3922 			if (rc_node_check_and_lock(ent) == REP_PROTOCOL_SUCCESS)
3923 				break;
3924 
3925 			/* Someone deleted it, so try the next one. */
3926 		}
3927 
3928 		res = rc_node_fill_children(ent, type);
3929 
3930 		if (res == REP_PROTOCOL_SUCCESS) {
3931 			nip->rni_iter = uu_list_walk_start(ent->rn_children,
3932 			    UU_WALK_ROBUST);
3933 
3934 			if (nip->rni_iter == NULL)
3935 				res = REP_PROTOCOL_FAIL_NO_RESOURCES;
3936 			else {
3937 				nip->rni_iter_node = ent;
3938 				rc_node_hold_other(ent);
3939 			}
3940 		}
3941 
3942 		if (res != REP_PROTOCOL_SUCCESS) {
3943 			(void) pthread_mutex_unlock(&ent->rn_lock);
3944 			uu_free(nip);
3945 			return (res);
3946 		}
3947 
3948 		(void) pthread_mutex_unlock(&ent->rn_lock);
3949 	}
3950 
3951 out:
3952 	rc_node_hold(np);		/* released by rc_iter_end() */
3953 	nip->rni_parent = np;
3954 	nip->rni_type = type;
3955 	nip->rni_filter = (filter != NULL)? filter : rc_iter_null_filter;
3956 	nip->rni_filter_arg = arg;
3957 	*resp = nip;
3958 	return (REP_PROTOCOL_SUCCESS);
3959 }
3960 
3961 static void
3962 rc_iter_end(rc_node_iter_t *iter)
3963 {
3964 	rc_node_t *np = iter->rni_parent;
3965 
3966 	if (iter->rni_clevel >= 0)
3967 		np = np->rn_cchain[iter->rni_clevel];
3968 
3969 	assert(MUTEX_HELD(&np->rn_lock));
3970 	if (iter->rni_iter != NULL)
3971 		uu_list_walk_end(iter->rni_iter);
3972 	iter->rni_iter = NULL;
3973 
3974 	(void) pthread_mutex_unlock(&np->rn_lock);
3975 	rc_node_rele(iter->rni_parent);
3976 	if (iter->rni_iter_node != NULL)
3977 		rc_node_rele_other(iter->rni_iter_node);
3978 }
3979 
3980 /*
3981  * Fails with
3982  *   _NOT_SET - npp is reset
3983  *   _DELETED - npp's node has been deleted
3984  *   _NOT_APPLICABLE - npp's node is not a property
3985  *   _NO_RESOURCES - out of memory
3986  */
3987 static int
3988 rc_node_setup_value_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp)
3989 {
3990 	rc_node_t *np;
3991 
3992 	rc_node_iter_t *nip;
3993 
3994 	assert(*iterp == NULL);
3995 
3996 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3997 
3998 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
3999 		(void) pthread_mutex_unlock(&np->rn_lock);
4000 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
4001 	}
4002 
4003 	nip = uu_zalloc(sizeof (*nip));
4004 	if (nip == NULL) {
4005 		(void) pthread_mutex_unlock(&np->rn_lock);
4006 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4007 	}
4008 
4009 	nip->rni_parent = np;
4010 	nip->rni_iter = NULL;
4011 	nip->rni_clevel = -1;
4012 	nip->rni_type = REP_PROTOCOL_ENTITY_VALUE;
4013 	nip->rni_offset = 0;
4014 	nip->rni_last_offset = 0;
4015 
4016 	rc_node_hold_locked(np);
4017 
4018 	*iterp = nip;
4019 	(void) pthread_mutex_unlock(&np->rn_lock);
4020 
4021 	return (REP_PROTOCOL_SUCCESS);
4022 }
4023 
4024 /*
4025  * Returns:
4026  *   _NOT_SET - npp is reset
4027  *   _DELETED - npp's node has been deleted
4028  *   _TYPE_MISMATCH - npp's node is not a property
4029  *   _NOT_FOUND - property has no values
4030  *   _TRUNCATED - property has >1 values (first is written into out)
4031  *   _SUCCESS - property has 1 value (which is written into out)
4032  *
4033  * We shorten *sz_out to not include anything after the final '\0'.
4034  */
4035 int
4036 rc_node_get_property_value(rc_node_ptr_t *npp,
4037     struct rep_protocol_value_response *out, size_t *sz_out)
4038 {
4039 	rc_node_t *np;
4040 	size_t w;
4041 	int ret;
4042 
4043 	assert(*sz_out == sizeof (*out));
4044 
4045 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
4046 
4047 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
4048 		(void) pthread_mutex_unlock(&np->rn_lock);
4049 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4050 	}
4051 
4052 	if (np->rn_values_size == 0) {
4053 		(void) pthread_mutex_unlock(&np->rn_lock);
4054 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
4055 	}
4056 	out->rpr_type = np->rn_valtype;
4057 	w = strlcpy(out->rpr_value, &np->rn_values[0],
4058 	    sizeof (out->rpr_value));
4059 
4060 	if (w >= sizeof (out->rpr_value))
4061 		backend_panic("value too large");
4062 
4063 	*sz_out = offsetof(struct rep_protocol_value_response,
4064 	    rpr_value[w + 1]);
4065 
4066 	ret = (np->rn_values_count != 1)? REP_PROTOCOL_FAIL_TRUNCATED :
4067 	    REP_PROTOCOL_SUCCESS;
4068 	(void) pthread_mutex_unlock(&np->rn_lock);
4069 	return (ret);
4070 }
4071 
4072 int
4073 rc_iter_next_value(rc_node_iter_t *iter,
4074     struct rep_protocol_value_response *out, size_t *sz_out, int repeat)
4075 {
4076 	rc_node_t *np = iter->rni_parent;
4077 	const char *vals;
4078 	size_t len;
4079 
4080 	size_t start;
4081 	size_t w;
4082 
4083 	rep_protocol_responseid_t result;
4084 
4085 	assert(*sz_out == sizeof (*out));
4086 
4087 	(void) memset(out, '\0', *sz_out);
4088 
4089 	if (iter->rni_type != REP_PROTOCOL_ENTITY_VALUE)
4090 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4091 
4092 	RC_NODE_CHECK_AND_LOCK(np);
4093 
4094 	vals = np->rn_values;
4095 	len = np->rn_values_size;
4096 
4097 	out->rpr_type = np->rn_valtype;
4098 
4099 	start = (repeat)? iter->rni_last_offset : iter->rni_offset;
4100 
4101 	if (len == 0 || start >= len) {
4102 		result = REP_PROTOCOL_DONE;
4103 		*sz_out -= sizeof (out->rpr_value);
4104 	} else {
4105 		w = strlcpy(out->rpr_value, &vals[start],
4106 		    sizeof (out->rpr_value));
4107 
4108 		if (w >= sizeof (out->rpr_value))
4109 			backend_panic("value too large");
4110 
4111 		*sz_out = offsetof(struct rep_protocol_value_response,
4112 		    rpr_value[w + 1]);
4113 
4114 		/*
4115 		 * update the offsets if we're not repeating
4116 		 */
4117 		if (!repeat) {
4118 			iter->rni_last_offset = iter->rni_offset;
4119 			iter->rni_offset += (w + 1);
4120 		}
4121 
4122 		result = REP_PROTOCOL_SUCCESS;
4123 	}
4124 
4125 	(void) pthread_mutex_unlock(&np->rn_lock);
4126 	return (result);
4127 }
4128 
4129 /*
4130  * Entry point for ITER_START from client.c.  Validate the arguments & call
4131  * rc_iter_create().
4132  *
4133  * Fails with
4134  *   _NOT_SET
4135  *   _DELETED
4136  *   _TYPE_MISMATCH - np cannot carry type children
4137  *   _BAD_REQUEST - flags is invalid
4138  *		    pattern is invalid
4139  *   _NO_RESOURCES
4140  *   _INVALID_TYPE
4141  *   _TYPE_MISMATCH - *npp cannot have children of type
4142  *   _BACKEND_ACCESS
4143  */
4144 int
4145 rc_node_setup_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp,
4146     uint32_t type, uint32_t flags, const char *pattern)
4147 {
4148 	rc_node_t *np;
4149 	rc_iter_filter_func *f = NULL;
4150 	int rc;
4151 
4152 	RC_NODE_PTR_GET_CHECK(np, npp);
4153 
4154 	if (pattern != NULL && pattern[0] == '\0')
4155 		pattern = NULL;
4156 
4157 	if (type == REP_PROTOCOL_ENTITY_VALUE) {
4158 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
4159 			return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4160 		if (flags != RP_ITER_START_ALL || pattern != NULL)
4161 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4162 
4163 		rc = rc_node_setup_value_iter(npp, iterp);
4164 		assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
4165 		return (rc);
4166 	}
4167 
4168 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
4169 	    REP_PROTOCOL_SUCCESS)
4170 		return (rc);
4171 
4172 	if (((flags & RP_ITER_START_FILT_MASK) == RP_ITER_START_ALL) ^
4173 	    (pattern == NULL))
4174 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4175 
4176 	/* Composition only works for instances & snapshots. */
4177 	if ((flags & RP_ITER_START_COMPOSED) &&
4178 	    (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE &&
4179 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT))
4180 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4181 
4182 	if (pattern != NULL) {
4183 		if ((rc = rc_check_type_name(type, pattern)) !=
4184 		    REP_PROTOCOL_SUCCESS)
4185 			return (rc);
4186 		pattern = strdup(pattern);
4187 		if (pattern == NULL)
4188 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4189 	}
4190 
4191 	switch (flags & RP_ITER_START_FILT_MASK) {
4192 	case RP_ITER_START_ALL:
4193 		f = NULL;
4194 		break;
4195 	case RP_ITER_START_EXACT:
4196 		f = rc_iter_filter_name;
4197 		break;
4198 	case RP_ITER_START_PGTYPE:
4199 		if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4200 			free((void *)pattern);
4201 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4202 		}
4203 		f = rc_iter_filter_type;
4204 		break;
4205 	default:
4206 		free((void *)pattern);
4207 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4208 	}
4209 
4210 	rc = rc_iter_create(iterp, np, type, f, (void *)pattern,
4211 	    flags & RP_ITER_START_COMPOSED);
4212 	if (rc != REP_PROTOCOL_SUCCESS && pattern != NULL)
4213 		free((void *)pattern);
4214 
4215 	return (rc);
4216 }
4217 
4218 /*
4219  * Do uu_list_walk_next(iter->rni_iter) until we find a child which matches
4220  * the filter.
4221  * For composed iterators, then check to see if there's an overlapping entity
4222  * (see embedded comments).  If we reach the end of the list, start over at
4223  * the next level.
4224  *
4225  * Returns
4226  *   _BAD_REQUEST - iter walks values
4227  *   _TYPE_MISMATCH - iter does not walk type entities
4228  *   _DELETED - parent was deleted
4229  *   _NO_RESOURCES
4230  *   _INVALID_TYPE - type is invalid
4231  *   _DONE
4232  *   _SUCCESS
4233  *
4234  * For composed property group iterators, can also return
4235  *   _TYPE_MISMATCH - parent cannot have type children
4236  */
4237 int
4238 rc_iter_next(rc_node_iter_t *iter, rc_node_ptr_t *out, uint32_t type)
4239 {
4240 	rc_node_t *np = iter->rni_parent;
4241 	rc_node_t *res;
4242 	int rc;
4243 
4244 	if (iter->rni_type == REP_PROTOCOL_ENTITY_VALUE)
4245 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4246 
4247 	if (iter->rni_iter == NULL) {
4248 		rc_node_clear(out, 0);
4249 		return (REP_PROTOCOL_DONE);
4250 	}
4251 
4252 	if (iter->rni_type != type) {
4253 		rc_node_clear(out, 0);
4254 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4255 	}
4256 
4257 	(void) pthread_mutex_lock(&np->rn_lock);  /* held by _iter_create() */
4258 
4259 	if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4260 		(void) pthread_mutex_unlock(&np->rn_lock);
4261 		rc_node_clear(out, 1);
4262 		return (REP_PROTOCOL_FAIL_DELETED);
4263 	}
4264 
4265 	if (iter->rni_clevel >= 0) {
4266 		/* Composed iterator.  Iterate over appropriate level. */
4267 		(void) pthread_mutex_unlock(&np->rn_lock);
4268 		np = np->rn_cchain[iter->rni_clevel];
4269 		/*
4270 		 * If iter->rni_parent is an instance or a snapshot, np must
4271 		 * be valid since iter holds iter->rni_parent & possible
4272 		 * levels (service, instance, snaplevel) cannot be destroyed
4273 		 * while rni_parent is held.  If iter->rni_parent is
4274 		 * a composed property group then rc_node_setup_cpg() put
4275 		 * a hold on np.
4276 		 */
4277 
4278 		(void) pthread_mutex_lock(&np->rn_lock);
4279 
4280 		if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4281 			(void) pthread_mutex_unlock(&np->rn_lock);
4282 			rc_node_clear(out, 1);
4283 			return (REP_PROTOCOL_FAIL_DELETED);
4284 		}
4285 	}
4286 
4287 	assert(np->rn_flags & RC_NODE_HAS_CHILDREN);
4288 
4289 	for (;;) {
4290 		res = uu_list_walk_next(iter->rni_iter);
4291 		if (res == NULL) {
4292 			rc_node_t *parent = iter->rni_parent;
4293 
4294 #if COMPOSITION_DEPTH == 2
4295 			if (iter->rni_clevel < 0 || iter->rni_clevel == 1) {
4296 				/* release walker and lock */
4297 				rc_iter_end(iter);
4298 				break;
4299 			}
4300 
4301 			/* Stop walking current level. */
4302 			uu_list_walk_end(iter->rni_iter);
4303 			iter->rni_iter = NULL;
4304 			(void) pthread_mutex_unlock(&np->rn_lock);
4305 			rc_node_rele_other(iter->rni_iter_node);
4306 			iter->rni_iter_node = NULL;
4307 
4308 			/* Start walking next level. */
4309 			++iter->rni_clevel;
4310 			np = parent->rn_cchain[iter->rni_clevel];
4311 			assert(np != NULL);
4312 #else
4313 #error This code must be updated.
4314 #endif
4315 
4316 			(void) pthread_mutex_lock(&np->rn_lock);
4317 
4318 			rc = rc_node_fill_children(np, iter->rni_type);
4319 
4320 			if (rc == REP_PROTOCOL_SUCCESS) {
4321 				iter->rni_iter =
4322 				    uu_list_walk_start(np->rn_children,
4323 					UU_WALK_ROBUST);
4324 
4325 				if (iter->rni_iter == NULL)
4326 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
4327 				else {
4328 					iter->rni_iter_node = np;
4329 					rc_node_hold_other(np);
4330 				}
4331 			}
4332 
4333 			if (rc != REP_PROTOCOL_SUCCESS) {
4334 				(void) pthread_mutex_unlock(&np->rn_lock);
4335 				rc_node_clear(out, 0);
4336 				return (rc);
4337 			}
4338 
4339 			continue;
4340 		}
4341 
4342 		if (res->rn_id.rl_type != type ||
4343 		    !iter->rni_filter(res, iter->rni_filter_arg))
4344 			continue;
4345 
4346 		/*
4347 		 * If we're composed and not at the top level, check to see if
4348 		 * there's an entity at a higher level with the same name.  If
4349 		 * so, skip this one.
4350 		 */
4351 		if (iter->rni_clevel > 0) {
4352 			rc_node_t *ent = iter->rni_parent->rn_cchain[0];
4353 			rc_node_t *pg;
4354 
4355 #if COMPOSITION_DEPTH == 2
4356 			assert(iter->rni_clevel == 1);
4357 
4358 			(void) pthread_mutex_unlock(&np->rn_lock);
4359 			(void) pthread_mutex_lock(&ent->rn_lock);
4360 			rc = rc_node_find_named_child(ent, res->rn_name, type,
4361 			    &pg);
4362 			if (rc == REP_PROTOCOL_SUCCESS && pg != NULL)
4363 				rc_node_rele(pg);
4364 			(void) pthread_mutex_unlock(&ent->rn_lock);
4365 			if (rc != REP_PROTOCOL_SUCCESS) {
4366 				rc_node_clear(out, 0);
4367 				return (rc);
4368 			}
4369 			(void) pthread_mutex_lock(&np->rn_lock);
4370 
4371 			/* Make sure np isn't being deleted all of a sudden. */
4372 			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4373 				(void) pthread_mutex_unlock(&np->rn_lock);
4374 				rc_node_clear(out, 1);
4375 				return (REP_PROTOCOL_FAIL_DELETED);
4376 			}
4377 
4378 			if (pg != NULL)
4379 				/* Keep going. */
4380 				continue;
4381 #else
4382 #error This code must be updated.
4383 #endif
4384 		}
4385 
4386 		/*
4387 		 * If we're composed, iterating over property groups, and not
4388 		 * at the bottom level, check to see if there's a pg at lower
4389 		 * level with the same name.  If so, return a cpg.
4390 		 */
4391 		if (iter->rni_clevel >= 0 &&
4392 		    type == REP_PROTOCOL_ENTITY_PROPERTYGRP &&
4393 		    iter->rni_clevel < COMPOSITION_DEPTH - 1) {
4394 #if COMPOSITION_DEPTH == 2
4395 			rc_node_t *pg;
4396 			rc_node_t *ent = iter->rni_parent->rn_cchain[1];
4397 
4398 			rc_node_hold(res);	/* While we drop np->rn_lock */
4399 
4400 			(void) pthread_mutex_unlock(&np->rn_lock);
4401 			(void) pthread_mutex_lock(&ent->rn_lock);
4402 			rc = rc_node_find_named_child(ent, res->rn_name, type,
4403 			    &pg);
4404 			/* holds pg if not NULL */
4405 			(void) pthread_mutex_unlock(&ent->rn_lock);
4406 			if (rc != REP_PROTOCOL_SUCCESS) {
4407 				rc_node_rele(res);
4408 				rc_node_clear(out, 0);
4409 				return (rc);
4410 			}
4411 
4412 			(void) pthread_mutex_lock(&np->rn_lock);
4413 			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4414 				(void) pthread_mutex_unlock(&np->rn_lock);
4415 				rc_node_rele(res);
4416 				if (pg != NULL)
4417 					rc_node_rele(pg);
4418 				rc_node_clear(out, 1);
4419 				return (REP_PROTOCOL_FAIL_DELETED);
4420 			}
4421 
4422 			if (pg == NULL) {
4423 				rc_node_rele(res);
4424 			} else {
4425 				rc_node_t *cpg;
4426 
4427 				/* Keep res held for rc_node_setup_cpg(). */
4428 
4429 				cpg = rc_node_alloc();
4430 				if (cpg == NULL) {
4431 					(void) pthread_mutex_unlock(
4432 					    &np->rn_lock);
4433 					rc_node_rele(res);
4434 					rc_node_rele(pg);
4435 					rc_node_clear(out, 0);
4436 					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4437 				}
4438 
4439 				switch (rc_node_setup_cpg(cpg, res, pg)) {
4440 				case REP_PROTOCOL_SUCCESS:
4441 					res = cpg;
4442 					break;
4443 
4444 				case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
4445 					/* Nevermind. */
4446 					rc_node_destroy(cpg);
4447 					rc_node_rele(pg);
4448 					rc_node_rele(res);
4449 					break;
4450 
4451 				case REP_PROTOCOL_FAIL_NO_RESOURCES:
4452 					rc_node_destroy(cpg);
4453 					(void) pthread_mutex_unlock(
4454 					    &np->rn_lock);
4455 					rc_node_rele(res);
4456 					rc_node_rele(pg);
4457 					rc_node_clear(out, 0);
4458 					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4459 
4460 				default:
4461 					assert(0);
4462 					abort();
4463 				}
4464 			}
4465 #else
4466 #error This code must be updated.
4467 #endif
4468 		}
4469 
4470 		rc_node_hold(res);
4471 		(void) pthread_mutex_unlock(&np->rn_lock);
4472 		break;
4473 	}
4474 	rc_node_assign(out, res);
4475 
4476 	if (res == NULL)
4477 		return (REP_PROTOCOL_DONE);
4478 	rc_node_rele(res);
4479 	return (REP_PROTOCOL_SUCCESS);
4480 }
4481 
4482 void
4483 rc_iter_destroy(rc_node_iter_t **nipp)
4484 {
4485 	rc_node_iter_t *nip = *nipp;
4486 	rc_node_t *np;
4487 
4488 	if (nip == NULL)
4489 		return;				/* already freed */
4490 
4491 	np = nip->rni_parent;
4492 
4493 	if (nip->rni_filter_arg != NULL)
4494 		free(nip->rni_filter_arg);
4495 	nip->rni_filter_arg = NULL;
4496 
4497 	if (nip->rni_type == REP_PROTOCOL_ENTITY_VALUE ||
4498 	    nip->rni_iter != NULL) {
4499 		if (nip->rni_clevel < 0)
4500 			(void) pthread_mutex_lock(&np->rn_lock);
4501 		else
4502 			(void) pthread_mutex_lock(
4503 			    &np->rn_cchain[nip->rni_clevel]->rn_lock);
4504 		rc_iter_end(nip);		/* release walker and lock */
4505 	}
4506 	nip->rni_parent = NULL;
4507 
4508 	uu_free(nip);
4509 	*nipp = NULL;
4510 }
4511 
4512 int
4513 rc_node_setup_tx(rc_node_ptr_t *npp, rc_node_ptr_t *txp)
4514 {
4515 	rc_node_t *np;
4516 	permcheck_t *pcp;
4517 	int ret;
4518 	int authorized = 0;
4519 
4520 	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
4521 
4522 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
4523 		rc_node_rele(np);
4524 		np = np->rn_cchain[0];
4525 		RC_NODE_CHECK_AND_HOLD(np);
4526 	}
4527 
4528 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4529 		rc_node_rele(np);
4530 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4531 	}
4532 
4533 	if (np->rn_id.rl_ids[ID_SNAPSHOT] != 0) {
4534 		rc_node_rele(np);
4535 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4536 	}
4537 
4538 	if (client_is_privileged())
4539 		goto skip_checks;
4540 
4541 #ifdef NATIVE_BUILD
4542 	rc_node_rele(np);
4543 	return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4544 #else
4545 	/* permission check */
4546 	pcp = pc_create();
4547 	if (pcp == NULL) {
4548 		rc_node_rele(np);
4549 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4550 	}
4551 
4552 	if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&	/* instance pg */
4553 	    ((strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0 &&
4554 	    strcmp(np->rn_type, AUTH_PG_ACTIONS_TYPE) == 0) ||
4555 	    (strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
4556 	    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
4557 		rc_node_t *instn;
4558 
4559 		/* solaris.smf.manage can be used. */
4560 		ret = perm_add_enabling(pcp, AUTH_MANAGE);
4561 
4562 		if (ret != REP_PROTOCOL_SUCCESS) {
4563 			pc_free(pcp);
4564 			rc_node_rele(np);
4565 			return (ret);
4566 		}
4567 
4568 		/* general/action_authorization values can be used. */
4569 		ret = rc_node_parent(np, &instn);
4570 		if (ret != REP_PROTOCOL_SUCCESS) {
4571 			assert(ret == REP_PROTOCOL_FAIL_DELETED);
4572 			rc_node_rele(np);
4573 			pc_free(pcp);
4574 			return (REP_PROTOCOL_FAIL_DELETED);
4575 		}
4576 
4577 		assert(instn->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
4578 
4579 		ret = perm_add_inst_action_auth(pcp, instn);
4580 		rc_node_rele(instn);
4581 		switch (ret) {
4582 		case REP_PROTOCOL_SUCCESS:
4583 			break;
4584 
4585 		case REP_PROTOCOL_FAIL_DELETED:
4586 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
4587 			rc_node_rele(np);
4588 			pc_free(pcp);
4589 			return (ret);
4590 
4591 		default:
4592 			bad_error("perm_add_inst_action_auth", ret);
4593 		}
4594 
4595 		if (strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0)
4596 			authorized = 1;		/* Don't check on commit. */
4597 	} else {
4598 		ret = perm_add_enabling(pcp, AUTH_MODIFY);
4599 
4600 		if (ret == REP_PROTOCOL_SUCCESS) {
4601 			/* propertygroup-type-specific authorization */
4602 			/* no locking because rn_type won't change anyway */
4603 			const char * const auth =
4604 			    perm_auth_for_pgtype(np->rn_type);
4605 
4606 			if (auth != NULL)
4607 				ret = perm_add_enabling(pcp, auth);
4608 		}
4609 
4610 		if (ret == REP_PROTOCOL_SUCCESS)
4611 			/* propertygroup/transaction-type-specific auths */
4612 			ret =
4613 			    perm_add_enabling_values(pcp, np, AUTH_PROP_VALUE);
4614 
4615 		if (ret == REP_PROTOCOL_SUCCESS)
4616 			ret =
4617 			    perm_add_enabling_values(pcp, np, AUTH_PROP_MODIFY);
4618 
4619 		/* AUTH_MANAGE can manipulate general/AUTH_PROP_ACTION */
4620 		if (ret == REP_PROTOCOL_SUCCESS &&
4621 		    strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
4622 		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0)
4623 			ret = perm_add_enabling(pcp, AUTH_MANAGE);
4624 
4625 		if (ret != REP_PROTOCOL_SUCCESS) {
4626 			pc_free(pcp);
4627 			rc_node_rele(np);
4628 			return (ret);
4629 		}
4630 	}
4631 
4632 	ret = perm_granted(pcp);
4633 	if (ret != 1) {
4634 		pc_free(pcp);
4635 		rc_node_rele(np);
4636 		return (ret == 0 ? REP_PROTOCOL_FAIL_PERMISSION_DENIED :
4637 		    REP_PROTOCOL_FAIL_NO_RESOURCES);
4638 	}
4639 
4640 	pc_free(pcp);
4641 #endif /* NATIVE_BUILD */
4642 
4643 skip_checks:
4644 	rc_node_assign(txp, np);
4645 	txp->rnp_authorized = authorized;
4646 
4647 	rc_node_rele(np);
4648 	return (REP_PROTOCOL_SUCCESS);
4649 }
4650 
4651 /*
4652  * Return 1 if the given transaction commands only modify the values of
4653  * properties other than "modify_authorization".  Return -1 if any of the
4654  * commands are invalid, and 0 otherwise.
4655  */
4656 static int
4657 tx_allow_value(const void *cmds_arg, size_t cmds_sz, rc_node_t *pg)
4658 {
4659 	const struct rep_protocol_transaction_cmd *cmds;
4660 	uintptr_t loc;
4661 	uint32_t sz;
4662 	rc_node_t *prop;
4663 	boolean_t ok;
4664 
4665 	assert(!MUTEX_HELD(&pg->rn_lock));
4666 
4667 	loc = (uintptr_t)cmds_arg;
4668 
4669 	while (cmds_sz > 0) {
4670 		cmds = (struct rep_protocol_transaction_cmd *)loc;
4671 
4672 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4673 			return (-1);
4674 
4675 		sz = cmds->rptc_size;
4676 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4677 			return (-1);
4678 
4679 		sz = TX_SIZE(sz);
4680 		if (sz > cmds_sz)
4681 			return (-1);
4682 
4683 		switch (cmds[0].rptc_action) {
4684 		case REP_PROTOCOL_TX_ENTRY_CLEAR:
4685 			break;
4686 
4687 		case REP_PROTOCOL_TX_ENTRY_REPLACE:
4688 			/* Check type */
4689 			(void) pthread_mutex_lock(&pg->rn_lock);
4690 			if (rc_node_find_named_child(pg,
4691 			    (const char *)cmds[0].rptc_data,
4692 			    REP_PROTOCOL_ENTITY_PROPERTY, &prop) ==
4693 			    REP_PROTOCOL_SUCCESS) {
4694 				ok = (prop != NULL &&
4695 				    prop->rn_valtype == cmds[0].rptc_type);
4696 			} else {
4697 				/* Return more particular error? */
4698 				ok = B_FALSE;
4699 			}
4700 			(void) pthread_mutex_unlock(&pg->rn_lock);
4701 			if (ok)
4702 				break;
4703 			return (0);
4704 
4705 		default:
4706 			return (0);
4707 		}
4708 
4709 		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_MODIFY)
4710 		    == 0)
4711 			return (0);
4712 
4713 		loc += sz;
4714 		cmds_sz -= sz;
4715 	}
4716 
4717 	return (1);
4718 }
4719 
4720 /*
4721  * Return 1 if any of the given transaction commands affect
4722  * "action_authorization".  Return -1 if any of the commands are invalid and
4723  * 0 in all other cases.
4724  */
4725 static int
4726 tx_modifies_action(const void *cmds_arg, size_t cmds_sz)
4727 {
4728 	const struct rep_protocol_transaction_cmd *cmds;
4729 	uintptr_t loc;
4730 	uint32_t sz;
4731 
4732 	loc = (uintptr_t)cmds_arg;
4733 
4734 	while (cmds_sz > 0) {
4735 		cmds = (struct rep_protocol_transaction_cmd *)loc;
4736 
4737 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4738 			return (-1);
4739 
4740 		sz = cmds->rptc_size;
4741 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4742 			return (-1);
4743 
4744 		sz = TX_SIZE(sz);
4745 		if (sz > cmds_sz)
4746 			return (-1);
4747 
4748 		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_ACTION)
4749 		    == 0)
4750 			return (1);
4751 
4752 		loc += sz;
4753 		cmds_sz -= sz;
4754 	}
4755 
4756 	return (0);
4757 }
4758 
4759 /*
4760  * Returns 1 if the transaction commands only modify properties named
4761  * 'enabled'.
4762  */
4763 static int
4764 tx_only_enabled(const void *cmds_arg, size_t cmds_sz)
4765 {
4766 	const struct rep_protocol_transaction_cmd *cmd;
4767 	uintptr_t loc;
4768 	uint32_t sz;
4769 
4770 	loc = (uintptr_t)cmds_arg;
4771 
4772 	while (cmds_sz > 0) {
4773 		cmd = (struct rep_protocol_transaction_cmd *)loc;
4774 
4775 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4776 			return (-1);
4777 
4778 		sz = cmd->rptc_size;
4779 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4780 			return (-1);
4781 
4782 		sz = TX_SIZE(sz);
4783 		if (sz > cmds_sz)
4784 			return (-1);
4785 
4786 		if (strcmp((const char *)cmd->rptc_data, AUTH_PROP_ENABLED)
4787 		    != 0)
4788 			return (0);
4789 
4790 		loc += sz;
4791 		cmds_sz -= sz;
4792 	}
4793 
4794 	return (1);
4795 }
4796 
4797 int
4798 rc_tx_commit(rc_node_ptr_t *txp, const void *cmds, size_t cmds_sz)
4799 {
4800 	rc_node_t *np = txp->rnp_node;
4801 	rc_node_t *pp;
4802 	rc_node_t *nnp;
4803 	rc_node_pg_notify_t *pnp;
4804 	int rc;
4805 	permcheck_t *pcp;
4806 	int granted, normal;
4807 
4808 	RC_NODE_CHECK(np);
4809 
4810 	if (!client_is_privileged() && !txp->rnp_authorized) {
4811 #ifdef NATIVE_BUILD
4812 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4813 #else
4814 		/* permission check: depends on contents of transaction */
4815 		pcp = pc_create();
4816 		if (pcp == NULL)
4817 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4818 
4819 		/* If normal is cleared, we won't do the normal checks. */
4820 		normal = 1;
4821 		rc = REP_PROTOCOL_SUCCESS;
4822 
4823 		if (strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
4824 		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0) {
4825 			/* Touching general[framework]/action_authorization? */
4826 			rc = tx_modifies_action(cmds, cmds_sz);
4827 			if (rc == -1) {
4828 				pc_free(pcp);
4829 				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4830 			}
4831 
4832 			if (rc) {
4833 				/* Yes: only AUTH_MANAGE can be used. */
4834 				rc = perm_add_enabling(pcp, AUTH_MANAGE);
4835 				normal = 0;
4836 			} else {
4837 				rc = REP_PROTOCOL_SUCCESS;
4838 			}
4839 		} else if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&
4840 		    strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
4841 		    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0) {
4842 			rc_node_t *instn;
4843 
4844 			rc = tx_only_enabled(cmds, cmds_sz);
4845 			if (rc == -1) {
4846 				pc_free(pcp);
4847 				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4848 			}
4849 
4850 			if (rc) {
4851 				rc = rc_node_parent(np, &instn);
4852 				if (rc != REP_PROTOCOL_SUCCESS) {
4853 					assert(rc == REP_PROTOCOL_FAIL_DELETED);
4854 					pc_free(pcp);
4855 					return (rc);
4856 				}
4857 
4858 				assert(instn->rn_id.rl_type ==
4859 				    REP_PROTOCOL_ENTITY_INSTANCE);
4860 
4861 				rc = perm_add_inst_action_auth(pcp, instn);
4862 				rc_node_rele(instn);
4863 				switch (rc) {
4864 				case REP_PROTOCOL_SUCCESS:
4865 					break;
4866 
4867 				case REP_PROTOCOL_FAIL_DELETED:
4868 				case REP_PROTOCOL_FAIL_NO_RESOURCES:
4869 					pc_free(pcp);
4870 					return (rc);
4871 
4872 				default:
4873 					bad_error("perm_add_inst_action_auth",
4874 					    rc);
4875 				}
4876 			} else {
4877 				rc = REP_PROTOCOL_SUCCESS;
4878 			}
4879 		}
4880 
4881 		if (rc == REP_PROTOCOL_SUCCESS && normal) {
4882 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
4883 
4884 			if (rc == REP_PROTOCOL_SUCCESS) {
4885 				/* Add pgtype-specific authorization. */
4886 				const char * const auth =
4887 				    perm_auth_for_pgtype(np->rn_type);
4888 
4889 				if (auth != NULL)
4890 					rc = perm_add_enabling(pcp, auth);
4891 			}
4892 
4893 			/* Add pg-specific modify_authorization auths. */
4894 			if (rc == REP_PROTOCOL_SUCCESS)
4895 				rc = perm_add_enabling_values(pcp, np,
4896 				    AUTH_PROP_MODIFY);
4897 
4898 			/* If value_authorization values are ok, add them. */
4899 			if (rc == REP_PROTOCOL_SUCCESS) {
4900 				rc = tx_allow_value(cmds, cmds_sz, np);
4901 				if (rc == -1)
4902 					rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
4903 				else if (rc)
4904 					rc = perm_add_enabling_values(pcp, np,
4905 					    AUTH_PROP_VALUE);
4906 			}
4907 		}
4908 
4909 		if (rc == REP_PROTOCOL_SUCCESS) {
4910 			granted = perm_granted(pcp);
4911 			if (granted < 0)
4912 				rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
4913 		}
4914 
4915 		pc_free(pcp);
4916 
4917 		if (rc != REP_PROTOCOL_SUCCESS)
4918 			return (rc);
4919 
4920 		if (!granted)
4921 			return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4922 #endif /* NATIVE_BUILD */
4923 	}
4924 
4925 	nnp = rc_node_alloc();
4926 	if (nnp == NULL)
4927 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4928 
4929 	nnp->rn_id = np->rn_id;			/* structure assignment */
4930 	nnp->rn_hash = np->rn_hash;
4931 	nnp->rn_name = strdup(np->rn_name);
4932 	nnp->rn_type = strdup(np->rn_type);
4933 	nnp->rn_pgflags = np->rn_pgflags;
4934 
4935 	nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
4936 
4937 	if (nnp->rn_name == NULL || nnp->rn_type == NULL) {
4938 		rc_node_destroy(nnp);
4939 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4940 	}
4941 
4942 	(void) pthread_mutex_lock(&np->rn_lock);
4943 	/*
4944 	 * We must have all of the old properties in the cache, or the
4945 	 * database deletions could cause inconsistencies.
4946 	 */
4947 	if ((rc = rc_node_fill_children(np, REP_PROTOCOL_ENTITY_PROPERTY)) !=
4948 	    REP_PROTOCOL_SUCCESS) {
4949 		(void) pthread_mutex_unlock(&np->rn_lock);
4950 		rc_node_destroy(nnp);
4951 		return (rc);
4952 	}
4953 
4954 	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
4955 		(void) pthread_mutex_unlock(&np->rn_lock);
4956 		rc_node_destroy(nnp);
4957 		return (REP_PROTOCOL_FAIL_DELETED);
4958 	}
4959 
4960 	if (np->rn_flags & RC_NODE_OLD) {
4961 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
4962 		(void) pthread_mutex_unlock(&np->rn_lock);
4963 		rc_node_destroy(nnp);
4964 		return (REP_PROTOCOL_FAIL_NOT_LATEST);
4965 	}
4966 
4967 	pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
4968 	if (pp == NULL) {
4969 		/* our parent is gone, we're going next... */
4970 		rc_node_destroy(nnp);
4971 		(void) pthread_mutex_lock(&np->rn_lock);
4972 		if (np->rn_flags & RC_NODE_OLD) {
4973 			(void) pthread_mutex_unlock(&np->rn_lock);
4974 			return (REP_PROTOCOL_FAIL_NOT_LATEST);
4975 		}
4976 		(void) pthread_mutex_unlock(&np->rn_lock);
4977 		return (REP_PROTOCOL_FAIL_DELETED);
4978 	}
4979 	(void) pthread_mutex_unlock(&pp->rn_lock);
4980 
4981 	/*
4982 	 * prepare for the transaction
4983 	 */
4984 	(void) pthread_mutex_lock(&np->rn_lock);
4985 	if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
4986 		(void) pthread_mutex_unlock(&np->rn_lock);
4987 		(void) pthread_mutex_lock(&pp->rn_lock);
4988 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
4989 		(void) pthread_mutex_unlock(&pp->rn_lock);
4990 		rc_node_destroy(nnp);
4991 		return (REP_PROTOCOL_FAIL_DELETED);
4992 	}
4993 	nnp->rn_gen_id = np->rn_gen_id;
4994 	(void) pthread_mutex_unlock(&np->rn_lock);
4995 
4996 	/* Sets nnp->rn_gen_id on success. */
4997 	rc = object_tx_commit(&np->rn_id, cmds, cmds_sz, &nnp->rn_gen_id);
4998 
4999 	(void) pthread_mutex_lock(&np->rn_lock);
5000 	if (rc != REP_PROTOCOL_SUCCESS) {
5001 		rc_node_rele_flag(np, RC_NODE_IN_TX);
5002 		(void) pthread_mutex_unlock(&np->rn_lock);
5003 		(void) pthread_mutex_lock(&pp->rn_lock);
5004 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
5005 		(void) pthread_mutex_unlock(&pp->rn_lock);
5006 		rc_node_destroy(nnp);
5007 		rc_node_clear(txp, 0);
5008 		if (rc == REP_PROTOCOL_DONE)
5009 			rc = REP_PROTOCOL_SUCCESS; /* successful empty tx */
5010 		return (rc);
5011 	}
5012 
5013 	/*
5014 	 * Notify waiters
5015 	 */
5016 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5017 	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
5018 		rc_pg_notify_fire(pnp);
5019 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5020 
5021 	np->rn_flags |= RC_NODE_OLD;
5022 	(void) pthread_mutex_unlock(&np->rn_lock);
5023 
5024 	rc_notify_remove_node(np);
5025 
5026 	/*
5027 	 * replace np with nnp
5028 	 */
5029 	rc_node_relink_child(pp, np, nnp);
5030 
5031 	/*
5032 	 * all done -- clear the transaction.
5033 	 */
5034 	rc_node_clear(txp, 0);
5035 
5036 	return (REP_PROTOCOL_SUCCESS);
5037 }
5038 
5039 void
5040 rc_pg_notify_init(rc_node_pg_notify_t *pnp)
5041 {
5042 	uu_list_node_init(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5043 	pnp->rnpn_pg = NULL;
5044 	pnp->rnpn_fd = -1;
5045 }
5046 
5047 int
5048 rc_pg_notify_setup(rc_node_pg_notify_t *pnp, rc_node_ptr_t *npp, int fd)
5049 {
5050 	rc_node_t *np;
5051 
5052 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
5053 
5054 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
5055 		(void) pthread_mutex_unlock(&np->rn_lock);
5056 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
5057 	}
5058 
5059 	/*
5060 	 * wait for any transaction in progress to complete
5061 	 */
5062 	if (!rc_node_wait_flag(np, RC_NODE_IN_TX)) {
5063 		(void) pthread_mutex_unlock(&np->rn_lock);
5064 		return (REP_PROTOCOL_FAIL_DELETED);
5065 	}
5066 
5067 	if (np->rn_flags & RC_NODE_OLD) {
5068 		(void) pthread_mutex_unlock(&np->rn_lock);
5069 		return (REP_PROTOCOL_FAIL_NOT_LATEST);
5070 	}
5071 
5072 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5073 	rc_pg_notify_fire(pnp);
5074 	pnp->rnpn_pg = np;
5075 	pnp->rnpn_fd = fd;
5076 	(void) uu_list_insert_after(np->rn_pg_notify_list, NULL, pnp);
5077 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5078 
5079 	(void) pthread_mutex_unlock(&np->rn_lock);
5080 	return (REP_PROTOCOL_SUCCESS);
5081 }
5082 
5083 void
5084 rc_pg_notify_fini(rc_node_pg_notify_t *pnp)
5085 {
5086 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5087 	rc_pg_notify_fire(pnp);
5088 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5089 
5090 	uu_list_node_fini(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5091 }
5092 
5093 void
5094 rc_notify_info_init(rc_notify_info_t *rnip)
5095 {
5096 	int i;
5097 
5098 	uu_list_node_init(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5099 	uu_list_node_init(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5100 	    rc_notify_pool);
5101 
5102 	rnip->rni_notify.rcn_node = NULL;
5103 	rnip->rni_notify.rcn_info = rnip;
5104 
5105 	bzero(rnip->rni_namelist, sizeof (rnip->rni_namelist));
5106 	bzero(rnip->rni_typelist, sizeof (rnip->rni_typelist));
5107 
5108 	(void) pthread_cond_init(&rnip->rni_cv, NULL);
5109 
5110 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5111 		rnip->rni_namelist[i] = NULL;
5112 		rnip->rni_typelist[i] = NULL;
5113 	}
5114 }
5115 
5116 static void
5117 rc_notify_info_insert_locked(rc_notify_info_t *rnip)
5118 {
5119 	assert(MUTEX_HELD(&rc_pg_notify_lock));
5120 
5121 	assert(!(rnip->rni_flags & RC_NOTIFY_ACTIVE));
5122 
5123 	rnip->rni_flags |= RC_NOTIFY_ACTIVE;
5124 	(void) uu_list_insert_after(rc_notify_info_list, NULL, rnip);
5125 	(void) uu_list_insert_before(rc_notify_list, NULL, &rnip->rni_notify);
5126 }
5127 
5128 static void
5129 rc_notify_info_remove_locked(rc_notify_info_t *rnip)
5130 {
5131 	rc_notify_t *me = &rnip->rni_notify;
5132 	rc_notify_t *np;
5133 
5134 	assert(MUTEX_HELD(&rc_pg_notify_lock));
5135 
5136 	assert(rnip->rni_flags & RC_NOTIFY_ACTIVE);
5137 
5138 	assert(!(rnip->rni_flags & RC_NOTIFY_DRAIN));
5139 	rnip->rni_flags |= RC_NOTIFY_DRAIN;
5140 	(void) pthread_cond_broadcast(&rnip->rni_cv);
5141 
5142 	(void) uu_list_remove(rc_notify_info_list, rnip);
5143 
5144 	/*
5145 	 * clean up any notifications at the beginning of the list
5146 	 */
5147 	if (uu_list_first(rc_notify_list) == me) {
5148 		while ((np = uu_list_next(rc_notify_list, me)) != NULL &&
5149 		    np->rcn_info == NULL)
5150 			rc_notify_remove_locked(np);
5151 	}
5152 	(void) uu_list_remove(rc_notify_list, me);
5153 
5154 	while (rnip->rni_waiters) {
5155 		(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5156 		(void) pthread_cond_broadcast(&rnip->rni_cv);
5157 		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5158 	}
5159 
5160 	rnip->rni_flags &= ~(RC_NOTIFY_DRAIN | RC_NOTIFY_ACTIVE);
5161 }
5162 
5163 static int
5164 rc_notify_info_add_watch(rc_notify_info_t *rnip, const char **arr,
5165     const char *name)
5166 {
5167 	int i;
5168 	int rc;
5169 	char *f;
5170 
5171 	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_PROPERTYGRP, name);
5172 	if (rc != REP_PROTOCOL_SUCCESS)
5173 		return (rc);
5174 
5175 	f = strdup(name);
5176 	if (f == NULL)
5177 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5178 
5179 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5180 
5181 	while (rnip->rni_flags & RC_NOTIFY_EMPTYING)
5182 		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5183 
5184 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++)
5185 		if (arr[i] == NULL)
5186 			break;
5187 
5188 	if (i == RC_NOTIFY_MAX_NAMES) {
5189 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5190 		free(f);
5191 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5192 	}
5193 
5194 	arr[i] = f;
5195 	if (!(rnip->rni_flags & RC_NOTIFY_ACTIVE))
5196 		rc_notify_info_insert_locked(rnip);
5197 
5198 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5199 	return (REP_PROTOCOL_SUCCESS);
5200 }
5201 
5202 int
5203 rc_notify_info_add_name(rc_notify_info_t *rnip, const char *name)
5204 {
5205 	return (rc_notify_info_add_watch(rnip, rnip->rni_namelist, name));
5206 }
5207 
5208 int
5209 rc_notify_info_add_type(rc_notify_info_t *rnip, const char *type)
5210 {
5211 	return (rc_notify_info_add_watch(rnip, rnip->rni_typelist, type));
5212 }
5213 
5214 /*
5215  * Wait for and report an event of interest to rnip, a notification client
5216  */
5217 int
5218 rc_notify_info_wait(rc_notify_info_t *rnip, rc_node_ptr_t *out,
5219     char *outp, size_t sz)
5220 {
5221 	rc_notify_t *np;
5222 	rc_notify_t *me = &rnip->rni_notify;
5223 	rc_node_t *nnp;
5224 	rc_notify_delete_t *ndp;
5225 
5226 	int am_first_info;
5227 
5228 	if (sz > 0)
5229 		outp[0] = 0;
5230 
5231 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5232 
5233 	while ((rnip->rni_flags & (RC_NOTIFY_ACTIVE | RC_NOTIFY_DRAIN)) ==
5234 	    RC_NOTIFY_ACTIVE) {
5235 		/*
5236 		 * If I'm first on the notify list, it is my job to
5237 		 * clean up any notifications I pass by.  I can't do that
5238 		 * if someone is blocking the list from removals, so I
5239 		 * have to wait until they have all drained.
5240 		 */
5241 		am_first_info = (uu_list_first(rc_notify_list) == me);
5242 		if (am_first_info && rc_notify_in_use) {
5243 			rnip->rni_waiters++;
5244 			(void) pthread_cond_wait(&rc_pg_notify_cv,
5245 			    &rc_pg_notify_lock);
5246 			rnip->rni_waiters--;
5247 			continue;
5248 		}
5249 
5250 		/*
5251 		 * Search the list for a node of interest.
5252 		 */
5253 		np = uu_list_next(rc_notify_list, me);
5254 		while (np != NULL && !rc_notify_info_interested(rnip, np)) {
5255 			rc_notify_t *next = uu_list_next(rc_notify_list, np);
5256 
5257 			if (am_first_info) {
5258 				if (np->rcn_info) {
5259 					/*
5260 					 * Passing another client -- stop
5261 					 * cleaning up notifications
5262 					 */
5263 					am_first_info = 0;
5264 				} else {
5265 					rc_notify_remove_locked(np);
5266 				}
5267 			}
5268 			np = next;
5269 		}
5270 
5271 		/*
5272 		 * Nothing of interest -- wait for notification
5273 		 */
5274 		if (np == NULL) {
5275 			rnip->rni_waiters++;
5276 			(void) pthread_cond_wait(&rnip->rni_cv,
5277 			    &rc_pg_notify_lock);
5278 			rnip->rni_waiters--;
5279 			continue;
5280 		}
5281 
5282 		/*
5283 		 * found something to report -- move myself after the
5284 		 * notification and process it.
5285 		 */
5286 		(void) uu_list_remove(rc_notify_list, me);
5287 		(void) uu_list_insert_after(rc_notify_list, np, me);
5288 
5289 		if ((ndp = np->rcn_delete) != NULL) {
5290 			(void) strlcpy(outp, ndp->rnd_fmri, sz);
5291 			if (am_first_info)
5292 				rc_notify_remove_locked(np);
5293 			(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5294 			rc_node_clear(out, 0);
5295 			return (REP_PROTOCOL_SUCCESS);
5296 		}
5297 
5298 		nnp = np->rcn_node;
5299 		assert(nnp != NULL);
5300 
5301 		/*
5302 		 * We can't bump nnp's reference count without grabbing its
5303 		 * lock, and rc_pg_notify_lock is a leaf lock.  So we
5304 		 * temporarily block all removals to keep nnp from
5305 		 * disappearing.
5306 		 */
5307 		rc_notify_in_use++;
5308 		assert(rc_notify_in_use > 0);
5309 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5310 
5311 		rc_node_assign(out, nnp);
5312 
5313 		(void) pthread_mutex_lock(&rc_pg_notify_lock);
5314 		assert(rc_notify_in_use > 0);
5315 		rc_notify_in_use--;
5316 		if (am_first_info)
5317 			rc_notify_remove_locked(np);
5318 		if (rc_notify_in_use == 0)
5319 			(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5320 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5321 
5322 		return (REP_PROTOCOL_SUCCESS);
5323 	}
5324 	/*
5325 	 * If we're the last one out, let people know it's clear.
5326 	 */
5327 	if (rnip->rni_waiters == 0)
5328 		(void) pthread_cond_broadcast(&rnip->rni_cv);
5329 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5330 	return (REP_PROTOCOL_DONE);
5331 }
5332 
5333 static void
5334 rc_notify_info_reset(rc_notify_info_t *rnip)
5335 {
5336 	int i;
5337 
5338 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5339 	if (rnip->rni_flags & RC_NOTIFY_ACTIVE)
5340 		rc_notify_info_remove_locked(rnip);
5341 	assert(!(rnip->rni_flags & (RC_NOTIFY_DRAIN | RC_NOTIFY_EMPTYING)));
5342 	rnip->rni_flags |= RC_NOTIFY_EMPTYING;
5343 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5344 
5345 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5346 		if (rnip->rni_namelist[i] != NULL) {
5347 			free((void *)rnip->rni_namelist[i]);
5348 			rnip->rni_namelist[i] = NULL;
5349 		}
5350 		if (rnip->rni_typelist[i] != NULL) {
5351 			free((void *)rnip->rni_typelist[i]);
5352 			rnip->rni_typelist[i] = NULL;
5353 		}
5354 	}
5355 
5356 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5357 	rnip->rni_flags &= ~RC_NOTIFY_EMPTYING;
5358 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5359 }
5360 
5361 void
5362 rc_notify_info_fini(rc_notify_info_t *rnip)
5363 {
5364 	rc_notify_info_reset(rnip);
5365 
5366 	uu_list_node_fini(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5367 	uu_list_node_fini(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5368 	    rc_notify_pool);
5369 }
5370