xref: /illumos-gate/usr/src/uts/common/inet/ip/tn_ipopt.c (revision 179c3dac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/kmem.h>
29 #include <sys/disp.h>
30 #include <sys/stream.h>
31 #include <sys/strsubr.h>
32 #include <sys/strsun.h>
33 #include <sys/policy.h>
34 #include <sys/tsol/label_macro.h>
35 #include <sys/tsol/tndb.h>
36 #include <sys/tsol/tnet.h>
37 #include <inet/ip.h>
38 #include <inet/ip6.h>
39 #include <inet/tcp.h>
40 #include <inet/ipclassifier.h>
41 #include <inet/ip_ire.h>
42 #include <inet/ip_ftable.h>
43 
44 /*
45  * This routine takes a sensitivity label as input and creates a CIPSO
46  * option in the specified buffer.  It returns the size of the CIPSO option.
47  * If the sensitivity label is too large for the CIPSO option, then 0
48  * is returned.
49  *
50  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
51  * (more accurately, success means a return value between 10 and 40).
52  */
53 
54 static int
55 tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
56 {
57 	struct cipso_tag_type_1 *tt1;
58 	const _bslabel_impl_t *bsl;
59 	const uchar_t *ucp;
60 	int i;
61 
62 	if (doi == 0)
63 		return (0);
64 
65 	/* check for Admin High sensitivity label */
66 	if (blequal(sl, label2bslabel(l_admin_high)))
67 		return (0);
68 
69 	/* check whether classification will fit in one octet */
70 	bsl = (const _bslabel_impl_t *)sl;
71 	if (LCLASS(bsl) & 0xFF00)
72 		return (0);
73 
74 	/*
75 	 * Check whether compartments will fit in 30 octets.
76 	 * Compartments 241 - 256 are not allowed.
77 	 */
78 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
79 		return (0);
80 
81 	/*
82 	 * Compute option length and tag length.
83 	 * 'p' points to the last two bytes in the Sensitivity Label's
84 	 * compartments; these cannot be mapped into CIPSO compartments.
85 	 */
86 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
87 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
88 		if (*ucp != 0)
89 			break;
90 
91 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
92 
93 	if (cop == NULL)
94 		return (10 + i);
95 
96 	doi = htonl(doi);
97 	ucp = (const uchar_t *)&doi;
98 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
99 	cop[IPOPT_OLEN] = 10 + i;
100 	cop[IPOPT_OLEN+1] = ucp[0];
101 	cop[IPOPT_OLEN+2] = ucp[1];
102 	cop[IPOPT_OLEN+3] = ucp[2];
103 	cop[IPOPT_OLEN+4] = ucp[3];
104 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
105 	tt1->tag_type = 1;
106 	tt1->tag_align = 0;
107 	tt1->tag_sl = LCLASS(bsl);
108 	tt1->tag_length = 4 + i;
109 
110 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
111 
112 	return (cop[IPOPT_OLEN]);
113 }
114 
115 /*
116  * The following routine copies a datagram's option into the specified buffer
117  * (if buffer pointer is non-null), or returns a pointer to the label within
118  * the streams message (if buffer is null).  In both cases, tsol_get_option
119  * returns the option's type.
120  *
121  * tsol_get_option assumes that the specified buffer is large enough to
122  * hold the largest valid CIPSO option.  Since the total number of
123  * IP header options cannot exceed 40 bytes, a 40 byte buffer is a good choice.
124  */
125 
126 tsol_ip_label_t
127 tsol_get_option(mblk_t *mp, uchar_t **buffer)
128 {
129 	ipha_t	*ipha;
130 	uchar_t	*opt;
131 	uint32_t	totallen;
132 	uint32_t	optval;
133 	uint32_t	optlen;
134 
135 	ipha = (ipha_t *)mp->b_rptr;
136 
137 	/*
138 	 * Get length (in 4 byte octets) of IP header options.
139 	 * If header doesn't contain options, then return OPT_NONE.
140 	 */
141 	totallen = ipha->ipha_version_and_hdr_length -
142 	    (uint8_t)((IP_VERSION << 4) + IP_SIMPLE_HDR_LENGTH_IN_WORDS);
143 
144 	if (totallen == 0)
145 		return (OPT_NONE);
146 
147 	totallen <<= 2;
148 
149 	/*
150 	 * Search for CIPSO option.
151 	 * If no such option is present, then return OPT_NONE.
152 	 */
153 	opt = (uchar_t *)&ipha[1];
154 	while (totallen != 0) {
155 		switch (optval = opt[IPOPT_OPTVAL]) {
156 		case IPOPT_EOL:
157 			return (OPT_NONE);
158 		case IPOPT_NOP:
159 			optlen = 1;
160 			break;
161 		default:
162 			if (totallen <= IPOPT_OLEN)
163 				return (OPT_NONE);
164 			optlen = opt[IPOPT_OLEN];
165 			if (optlen < 2)
166 				return (OPT_NONE);
167 		}
168 		if (optlen > totallen)
169 			return (OPT_NONE);
170 		/*
171 		 * Copy pointer to option into '*buffer' and
172 		 * return the option type.
173 		 */
174 		switch (optval) {
175 		case IPOPT_COMSEC:
176 			*buffer = opt;
177 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
178 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1)
179 				return (OPT_CIPSO);
180 			return (OPT_NONE);
181 		}
182 		totallen -= optlen;
183 		opt += optlen;
184 	}
185 	return (OPT_NONE);
186 }
187 
188 /*
189  * tsol_compute_label()
190  *
191  * This routine computes the IP label that should be on a packet based on the
192  * connection and destination information.
193  *
194  * Returns:
195  *      0		Fetched label
196  *      EACCES		The packet failed the remote host accreditation
197  *      ENOMEM		Memory allocation failure
198  *	EINVAL		Label cannot be computed
199  */
200 int
201 tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage,
202     boolean_t isexempt, ip_stack_t *ipst)
203 {
204 	uint_t		sec_opt_len;
205 	ts_label_t	*tsl;
206 	tsol_tpc_t	*dst_rhtp;
207 	ire_t		*ire, *sire = NULL;
208 	boolean_t	compute_label = B_FALSE;
209 	tsol_ire_gw_secattr_t *attrp;
210 	zoneid_t	zoneid, ip_zoneid;
211 
212 	ASSERT(credp != NULL);
213 
214 	if (opt_storage != NULL)
215 		opt_storage[IPOPT_OLEN] = 0;
216 
217 	if ((tsl = crgetlabel(credp)) == NULL)
218 		return (0);
219 
220 	/* always pass multicast */
221 	if (CLASSD(dst))
222 		return (0);
223 
224 	if ((dst_rhtp = find_tpc(&dst, IPV4_VERSION, B_FALSE)) == NULL) {
225 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v4,
226 		    char *, "destination ip(1) not in database (with creds(2))",
227 		    ipaddr_t, dst, cred_t *, credp);
228 		return (EINVAL);
229 	}
230 
231 	zoneid = crgetzoneid(credp);
232 
233 	/*
234 	 * For exclusive stacks we set the zoneid to zero
235 	 * to operate as if in the global zone for IRE and conn_t comparisons.
236 	 */
237 	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
238 		ip_zoneid = GLOBAL_ZONEID;
239 	else
240 		ip_zoneid = zoneid;
241 
242 	switch (dst_rhtp->tpc_tp.host_type) {
243 	case UNLABELED:
244 		/*
245 		 * Only add a label if the unlabeled destination is
246 		 * not broadcast/local/loopback address, that it is
247 		 * not on the same subnet, and that the next-hop
248 		 * gateway is labeled.
249 		 */
250 		ire = ire_cache_lookup(dst, ip_zoneid, tsl, ipst);
251 
252 		if (ire != NULL && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL |
253 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
254 			IRE_REFRELE(ire);
255 			TPC_RELE(dst_rhtp);
256 			return (0);
257 		} else if (ire == NULL) {
258 			ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire,
259 			    ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
260 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
261 		}
262 
263 		/* no route to destination */
264 		if (ire == NULL) {
265 			DTRACE_PROBE4(
266 			    tx__tnopt__log__info__labeling__routedst__v4,
267 			    char *, "No route to unlabeled dest ip(1)/tpc(2) "
268 			    "with creds(3).", ipaddr_t, dst, tsol_tpc_t *,
269 			    dst_rhtp, cred_t *, credp);
270 			TPC_RELE(dst_rhtp);
271 			return (EINVAL);
272 		}
273 
274 		/*
275 		 * Prefix IRE from f-table lookup means that the destination
276 		 * is not directly connected; check the next-hop attributes.
277 		 */
278 		if (sire != NULL) {
279 			ASSERT(ire != NULL);
280 			IRE_REFRELE(ire);
281 			ire = sire;
282 		}
283 
284 		attrp = ire->ire_gw_secattr;
285 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
286 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
287 			compute_label = B_TRUE;
288 
289 		/*
290 		 * Can talk to unlabeled hosts if
291 		 * (1) zone's label matches the default label, or
292 		 * (2) SO_MAC_EXEMPT is on and we dominate the peer's label
293 		 * (3) SO_MAC_EXEMPT is on and this is the global zone
294 		 */
295 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
296 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
297 		    &tsl->tsl_label) && (!isexempt ||
298 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
299 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
300 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
301 			    char *, "unlabeled dest ip(1)/tpc(2) "
302 			    "non-matching creds(3).", ipaddr_t, dst,
303 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
304 			IRE_REFRELE(ire);
305 			TPC_RELE(dst_rhtp);
306 			return (EACCES);
307 		}
308 
309 		IRE_REFRELE(ire);
310 		break;
311 
312 	case SUN_CIPSO:
313 		/*
314 		 * Can talk to labeled hosts if zone's label is within target's
315 		 * label range or set.
316 		 */
317 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
318 		    (!_blinrange(&tsl->tsl_label,
319 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
320 		    !blinlset(&tsl->tsl_label,
321 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
322 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
323 			    char *, "labeled dest ip(1)/tpc(2) "
324 			    "non-matching creds(3).", ipaddr_t, dst,
325 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
326 			TPC_RELE(dst_rhtp);
327 			return (EACCES);
328 		}
329 		compute_label = B_TRUE;
330 		break;
331 
332 	default:
333 		TPC_RELE(dst_rhtp);
334 		return (EACCES);
335 	}
336 
337 	if (!compute_label) {
338 		TPC_RELE(dst_rhtp);
339 		return (0);
340 	}
341 
342 	/* compute the CIPSO option */
343 	if (dst_rhtp->tpc_tp.host_type != UNLABELED)
344 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
345 		    tsl->tsl_doi);
346 	else
347 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
348 		    opt_storage, tsl->tsl_doi);
349 	TPC_RELE(dst_rhtp);
350 
351 	if (sec_opt_len == 0) {
352 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v4,
353 		    char *,
354 		    "options lack length for dest ip(1)/tpc(2) with creds(3).",
355 		    ipaddr_t, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
356 		return (EINVAL);
357 	}
358 
359 	return (0);
360 }
361 
362 /*
363  * Remove any existing security option (CIPSO) from the given IP
364  * header, move the 'buflen' bytes back to fill the gap, and return the number
365  * of bytes removed (as zero or negative number).  Assumes that the headers are
366  * sane.
367  */
368 int
369 tsol_remove_secopt(ipha_t *ipha, int buflen)
370 {
371 	int remlen, olen, oval, delta;
372 	uchar_t *fptr, *tptr;
373 	boolean_t noop_keep;
374 
375 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
376 	fptr = tptr = (uchar_t *)(ipha + 1);
377 	noop_keep = B_TRUE;
378 	while (remlen > 0) {
379 		oval = fptr[IPOPT_OPTVAL];
380 
381 		/* terminate on end of list */
382 		if (oval == IPOPT_EOL)
383 			break;
384 
385 		/*
386 		 * Delete any no-ops following a deleted option, at least up
387 		 * to a 4 octet alignment; copy others.
388 		 */
389 		if (oval == IPOPT_NOP) {
390 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
391 				noop_keep = B_TRUE;
392 			if (noop_keep)
393 				*tptr++ = oval;
394 			fptr++;
395 			remlen--;
396 			continue;
397 		}
398 
399 		/* stop on corrupted list; just do nothing. */
400 		if (remlen < 2)
401 			return (0);
402 		olen = fptr[IPOPT_OLEN];
403 		if (olen < 2 || olen > remlen)
404 			return (0);
405 
406 		/* skip over security options to delete them */
407 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
408 			noop_keep = B_FALSE;
409 			fptr += olen;
410 			remlen -= olen;
411 			continue;
412 		}
413 
414 		/* copy the rest */
415 		noop_keep = B_TRUE;
416 		if (tptr != fptr)
417 			ovbcopy(fptr, tptr, olen);
418 		fptr += olen;
419 		tptr += olen;
420 		remlen -= olen;
421 	}
422 
423 	fptr += remlen;
424 
425 	/* figure how much padding we'll need for header alignment */
426 	olen = (tptr - (uchar_t *)ipha) & 3;
427 	if (olen > 0) {
428 		olen = 4 - olen;
429 		/* pad with end-of-list */
430 		bzero(tptr, olen);
431 		tptr += olen;
432 	}
433 
434 	/* slide back the headers that follow and update the IP header */
435 	delta = fptr - tptr;
436 	if (delta != 0) {
437 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
438 		ipha->ipha_version_and_hdr_length -= delta / 4;
439 	}
440 	return (-delta);
441 }
442 
443 /*
444  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
445  * move the data following the IP header (up to buflen) to accomodate the new
446  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
447  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
448  * option cannot be inserted.  (Note that negative return values are possible
449  * when noops must be compressed, and that only -1 indicates error.  Successful
450  * return value is always evenly divisible by 4, by definition.)
451  */
452 int
453 tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
454 {
455 	int remlen, padding, lastpad, totlen;
456 	int oval, olen;
457 	int delta;
458 	uchar_t *optr;
459 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
460 
461 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
462 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
463 	    optbuf[IPOPT_OLEN] == 0)
464 		return (0);
465 
466 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
467 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
468 
469 	/* first find the real (unpadded) length of the existing options */
470 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
471 	padding = totlen = lastpad = 0;
472 	optr = (uchar_t *)(ipha + 1);
473 	while (remlen > 0) {
474 		oval = optr[IPOPT_OPTVAL];
475 
476 		/* stop at end of list */
477 		if (oval == IPOPT_EOL)
478 			break;
479 
480 		/* skip no-ops, noting that length byte isn't present */
481 		if (oval == IPOPT_NOP) {
482 			optr++;
483 			padding++;
484 			lastpad++;
485 			totlen++;
486 			remlen--;
487 			continue;
488 		}
489 
490 		/* give up on a corrupted list; report failure */
491 		if (remlen < 2)
492 			return (-1);
493 		olen = optr[IPOPT_OLEN];
494 		if (olen < 2 || olen > remlen)
495 			return (-1);
496 
497 		lastpad = 0;
498 		optr += olen;
499 		totlen += olen;
500 		remlen -= olen;
501 	}
502 
503 	/* completely ignore any trailing padding */
504 	totlen -= lastpad;
505 	padding -= lastpad;
506 
507 	/*
508 	 * If some sort of inter-option alignment was present, try to preserve
509 	 * that alignment.  If alignment pushes us out past the maximum, then
510 	 * discard it and try to compress to fit.  (We just "assume" that any
511 	 * padding added was attempting to get 32 bit alignment.  If that's
512 	 * wrong, that's just too bad.)
513 	 */
514 	if (padding > 0) {
515 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
516 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
517 			totlen -= padding;
518 			if (olen + totlen > IP_MAX_OPT_LENGTH)
519 				return (-1);
520 			padding = 0;
521 		}
522 	}
523 
524 	/*
525 	 * Since we may need to compress or expand the option list, we write to
526 	 * a temporary buffer and then copy the results back to the IP header.
527 	 */
528 	toptr = tempopt;
529 
530 	/* compute actual option to insert */
531 	olen = optbuf[IPOPT_OLEN];
532 	bcopy(optbuf, toptr, olen);
533 	toptr += olen;
534 	if (padding > 0) {
535 		while ((olen & 3) != 0) {
536 			*toptr++ = IPOPT_NOP;
537 			olen++;
538 		}
539 	}
540 
541 	/* copy over the existing options */
542 	optr = (uchar_t *)(ipha + 1);
543 	while (totlen > 0) {
544 		oval = optr[IPOPT_OPTVAL];
545 
546 		/* totlen doesn't include end-of-list marker */
547 		ASSERT(oval != IPOPT_EOL);
548 
549 		/* handle no-ops; copy if desired, ignore otherwise */
550 		if (oval == IPOPT_NOP) {
551 			if (padding > 0) {
552 				/* note: cannot overflow due to checks above */
553 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
554 				*toptr++ = oval;
555 			}
556 			optr++;
557 			totlen--;
558 			continue;
559 		}
560 
561 		/* list cannot be corrupt at this point */
562 		ASSERT(totlen >= 2);
563 		olen = optr[IPOPT_OLEN];
564 		ASSERT(olen >= 2 && olen <= totlen);
565 
566 		/* cannot run out of room due to tests above */
567 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
568 
569 		bcopy(optr, toptr, olen);
570 		optr += olen;
571 		toptr += olen;
572 		totlen -= olen;
573 	}
574 
575 	/* figure how much padding we'll need for header alignment */
576 	olen = (toptr - tempopt) & 3;
577 	if (olen > 0) {
578 		olen = 4 - olen;
579 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
580 		/* pad with end-of-list value */
581 		bzero(toptr, olen);
582 		toptr += olen;
583 	}
584 
585 	/* move the headers as needed and update IP header */
586 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
587 	remlen = IPH_HDR_LENGTH(ipha);
588 	delta = olen - remlen;
589 	if (delta != 0) {
590 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
591 		    buflen - remlen);
592 		ipha->ipha_version_and_hdr_length += delta / 4;
593 	}
594 
595 	/* slap in the new options */
596 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
597 
598 	return (delta);
599 }
600 
601 /*
602  * tsol_check_label()
603  *
604  * This routine computes the IP label that should be on the packet based on the
605  * connection and destination information.  If the label is there, it returns
606  * zero, so the caller knows that the label is syncronized, and further calls
607  * are not required.  If the label isn't right, then the right one is inserted.
608  *
609  * The packet's header is clear before entering IPsec's engine.
610  *
611  * Returns:
612  *      0		Label on packet (was|is now) correct
613  *      EACCES		The packet failed the remote host accreditation.
614  *      ENOMEM		Memory allocation failure.
615  *	EINVAL		Label cannot be computed
616  */
617 int
618 tsol_check_label(const cred_t *credp, mblk_t **mpp, boolean_t isexempt,
619     ip_stack_t *ipst)
620 {
621 	mblk_t *mp = *mpp;
622 	ipha_t  *ipha;
623 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
624 	uint_t hlen;
625 	uint_t sec_opt_len;
626 	uchar_t *optr;
627 	int delta_remove = 0, delta_add, adjust;
628 	int retv;
629 
630 	opt_storage[IPOPT_OPTVAL] = 0;
631 
632 	ipha = (ipha_t *)mp->b_rptr;
633 
634 	retv = tsol_compute_label(credp, ipha->ipha_dst, opt_storage, isexempt,
635 	    ipst);
636 	if (retv != 0)
637 		return (retv);
638 
639 	optr = (uchar_t *)(ipha + 1);
640 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
641 	sec_opt_len = opt_storage[IPOPT_OLEN];
642 
643 	if (hlen >= sec_opt_len) {
644 		/* If no option is supposed to be there, make sure it's not */
645 		if (sec_opt_len == 0 && hlen > 0 &&
646 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
647 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
648 			return (0);
649 		/* if the option is there, it's always first */
650 		if (sec_opt_len != 0 &&
651 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
652 			return (0);
653 	}
654 
655 	/*
656 	 * If there is an option there, then it must be the wrong one; delete.
657 	 */
658 	if (hlen > 0) {
659 		delta_remove = tsol_remove_secopt(ipha, MBLKL(mp));
660 		mp->b_wptr += delta_remove;
661 	}
662 
663 	/* Make sure we have room for the worst-case addition */
664 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
665 	hlen = (hlen + 3) & ~3;
666 	if (hlen > IP_MAX_HDR_LENGTH)
667 		hlen = IP_MAX_HDR_LENGTH;
668 	hlen -= IPH_HDR_LENGTH(ipha);
669 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
670 		int copylen;
671 		mblk_t *new_mp;
672 
673 		/* allocate enough to be meaningful, but not *too* much */
674 		copylen = MBLKL(mp);
675 		if (copylen > 256)
676 			copylen = 256;
677 		new_mp = allocb_tmpl(hlen + copylen +
678 		    (mp->b_rptr - mp->b_datap->db_base), mp);
679 		if (new_mp == NULL)
680 			return (ENOMEM);
681 
682 		/* keep the bias */
683 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
684 		new_mp->b_wptr = new_mp->b_rptr + copylen;
685 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
686 		new_mp->b_cont = mp;
687 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
688 			new_mp->b_cont = mp->b_cont;
689 			freeb(mp);
690 		}
691 		*mpp = mp = new_mp;
692 		ipha = (ipha_t *)mp->b_rptr;
693 	}
694 
695 	delta_add = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
696 	if (delta_add == -1)
697 		goto param_prob;
698 
699 	ASSERT((mp->b_wptr + delta_add) <= DB_LIM(mp));
700 	mp->b_wptr += delta_add;
701 
702 	adjust = delta_remove + delta_add;
703 	adjust += ntohs(ipha->ipha_length);
704 	ipha->ipha_length = htons(adjust);
705 
706 	return (0);
707 
708 param_prob:
709 	return (EINVAL);
710 }
711 
712 /*
713  * IPv6 HopOpt extension header for the label option layout:
714  *	- One octet giving the type of the 'next extension header'
715  *	- Header extension length in 8-byte words, not including the
716  *	  1st 8 bytes, but including any pad bytes at the end.
717  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
718  *	- Followed by TLV encoded IPv6 label option. Option layout is
719  *		* One octet, IP6OPT_LS
720  *		* One octet option length in bytes of the option data following
721  *		  the length, but not including any pad bytes at the end.
722  *		* Four-octet DOI (IP6LS_DOI_V4)
723  *		* One octet suboption, IP6LS_TT_V4
724  *		* One octet suboption length in bytes of the suboption
725  *		  following the suboption length, including the suboption
726  *		  header length, but not including any pad bytes at the end.
727  *	- Pad to make the extension header a multiple of 8 bytes.
728  *
729  * This function returns the contents of 'IPv6 option structure' in the above.
730  * i.e starting from the IP6OPT_LS but not including the pad at the end.
731  * The user must prepend two octets (either padding or next header / length)
732  * and append padding out to the next 8 octet boundary.
733  */
734 int
735 tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst,
736     uchar_t *opt_storage, boolean_t isexempt, ip_stack_t *ipst)
737 {
738 	tsol_tpc_t	*dst_rhtp;
739 	ts_label_t	*tsl;
740 	uint_t		sec_opt_len;
741 	uint32_t	doi;
742 	zoneid_t	zoneid, ip_zoneid;
743 	ire_t		*ire, *sire;
744 	tsol_ire_gw_secattr_t *attrp;
745 	boolean_t	compute_label;
746 
747 	ASSERT(credp != NULL);
748 
749 	if (ip6opt_ls == 0)
750 		return (EINVAL);
751 
752 	if (opt_storage != NULL)
753 		opt_storage[IPOPT_OLEN] = 0;
754 
755 	if ((tsl = crgetlabel(credp)) == NULL)
756 		return (0);
757 
758 	/* Always pass multicast */
759 	if (IN6_IS_ADDR_MULTICAST(dst))
760 		return (0);
761 
762 	if ((dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE)) == NULL) {
763 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v6,
764 		    char *, "destination ip6(1) not in database with creds(2)",
765 		    in6_addr_t *, dst, cred_t *, credp);
766 		return (EINVAL);
767 	}
768 
769 	zoneid = crgetzoneid(credp);
770 
771 	/*
772 	 * For exclusive stacks we set the zoneid to zero
773 	 * to operate as if in the global zone for IRE and conn_t comparisons.
774 	 */
775 	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
776 		ip_zoneid = GLOBAL_ZONEID;
777 	else
778 		ip_zoneid = zoneid;
779 
780 	/*
781 	 * Fill in a V6 label.  If a new format is added here, make certain
782 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
783 	 * as TSOL_MAX_IPV6_OPTION.
784 	 */
785 	compute_label = B_FALSE;
786 	switch (dst_rhtp->tpc_tp.host_type) {
787 	case UNLABELED:
788 		/*
789 		 * Only add a label if the unlabeled destination is
790 		 * not local or loopback address, that it is
791 		 * not on the same subnet, and that the next-hop
792 		 * gateway is labeled.
793 		 */
794 		sire = NULL;
795 		ire = ire_cache_lookup_v6(dst, ip_zoneid, tsl, ipst);
796 
797 		if (ire != NULL && (ire->ire_type & (IRE_LOCAL |
798 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
799 			IRE_REFRELE(ire);
800 			TPC_RELE(dst_rhtp);
801 			return (0);
802 		} else if (ire == NULL) {
803 			ire = ire_ftable_lookup_v6(dst, NULL, NULL, 0, NULL,
804 			    &sire, ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
805 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
806 		}
807 
808 		/* no route to destination */
809 		if (ire == NULL) {
810 			DTRACE_PROBE4(
811 			    tx__tnopt__log__info__labeling__routedst__v6,
812 			    char *, "No route to unlabeled dest ip6(1)/tpc(2) "
813 			    "with creds(3).", in6_addr_t *, dst, tsol_tpc_t *,
814 			    dst_rhtp, cred_t *, credp);
815 			TPC_RELE(dst_rhtp);
816 			return (EINVAL);
817 		}
818 
819 		/*
820 		 * Prefix IRE from f-table lookup means that the destination
821 		 * is not directly connected; check the next-hop attributes.
822 		 */
823 		if (sire != NULL) {
824 			ASSERT(ire != NULL);
825 			IRE_REFRELE(ire);
826 			ire = sire;
827 		}
828 
829 		attrp = ire->ire_gw_secattr;
830 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
831 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
832 			compute_label = B_TRUE;
833 
834 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
835 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
836 		    &tsl->tsl_label) && (!isexempt ||
837 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
838 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
839 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
840 			    char *, "unlabeled dest ip6(1)/tpc(2) "
841 			    "non-matching creds(3)", in6_addr_t *, dst,
842 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
843 			IRE_REFRELE(ire);
844 			TPC_RELE(dst_rhtp);
845 			return (EACCES);
846 		}
847 
848 		IRE_REFRELE(ire);
849 		break;
850 
851 	case SUN_CIPSO:
852 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
853 		    (!_blinrange(&tsl->tsl_label,
854 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
855 		    !blinlset(&tsl->tsl_label,
856 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
857 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
858 			    char *,
859 			    "labeled dest ip6(1)/tpc(2) non-matching creds(3).",
860 			    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp,
861 			    cred_t *, credp);
862 			TPC_RELE(dst_rhtp);
863 			return (EACCES);
864 		}
865 		compute_label = B_TRUE;
866 		break;
867 
868 	default:
869 		TPC_RELE(dst_rhtp);
870 		return (EACCES);
871 	}
872 
873 	if (!compute_label) {
874 		TPC_RELE(dst_rhtp);
875 		return (0);
876 	}
877 
878 	/* compute the CIPSO option */
879 	if (opt_storage != NULL)
880 		opt_storage += 8;
881 	if (dst_rhtp->tpc_tp.host_type != UNLABELED) {
882 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
883 		    tsl->tsl_doi);
884 	} else {
885 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
886 		    opt_storage, tsl->tsl_doi);
887 	}
888 	TPC_RELE(dst_rhtp);
889 
890 	if (sec_opt_len == 0) {
891 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v6,
892 		    char *,
893 		    "options lack length for dest ip6(1)/tpc(2) with creds(3).",
894 		    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
895 		return (EINVAL);
896 	}
897 
898 	if (opt_storage == NULL)
899 		return (0);
900 
901 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
902 		opt_storage[sec_opt_len] = IPOPT_EOL;
903 
904 	/*
905 	 * Just in case the option length is odd, round it up to the next even
906 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
907 	 * some reason.
908 	 *
909 	 * Length in the overall option header (IP6OPT_LS) does not include the
910 	 * option header itself, but the length in the suboption does include
911 	 * the suboption header.  Thus, when there's just one suboption, the
912 	 * length in the option header is the suboption length plus 4 (for the
913 	 * DOI value).
914 	 */
915 	opt_storage[-2] = IP6LS_TT_V4;
916 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
917 	opt_storage[-8] = ip6opt_ls;
918 	opt_storage[-7] = opt_storage[-1] + 4;
919 	doi = htons(IP6LS_DOI_V4);
920 	bcopy(&doi, opt_storage - 6, 4);
921 
922 	return (0);
923 }
924 
925 /*
926  * Locate the start of the IP6OPT_LS label option and return it.
927  * Also return the start of the next non-pad option in after_secoptp.
928  * Usually the label option is the first option at least when packets
929  * are generated, but for generality we don't assume that on received packets.
930  */
931 uchar_t *
932 tsol_find_secopt_v6(
933     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
934     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
935     uchar_t **after_secoptp,	/* Non-pad option following the label option */
936     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
937 {
938 	uint_t	optlen;
939 	uint_t	optused;
940 	const uchar_t *optptr;
941 	uchar_t	opt_type;
942 	const uchar_t *secopt = NULL;
943 
944 	*hbh_needed = B_FALSE;
945 	*after_secoptp = NULL;
946 	optlen = hbhlen - 2;
947 	optptr = ip6hbh + 2;
948 	while (optlen != 0) {
949 		opt_type = *optptr;
950 		if (opt_type == IP6OPT_PAD1) {
951 			optptr++;
952 			optlen--;
953 			continue;
954 		}
955 		if (optlen == 1)
956 			break;
957 		optused = 2 + optptr[1];
958 		if (optused > optlen)
959 			break;
960 		/*
961 		 * if we get here, ip6opt_ls can
962 		 * not be 0 because it will always
963 		 * match the IP6OPT_PAD1 above.
964 		 * Therefore ip6opt_ls == 0 forces
965 		 * this test to always fail here.
966 		 */
967 		if (opt_type == ip6opt_ls)
968 			secopt = optptr;
969 		else switch (opt_type) {
970 		case IP6OPT_PADN:
971 			break;
972 		default:
973 			/*
974 			 * There is at least 1 option other than
975 			 * the label option. So the hop-by-hop header is needed
976 			 */
977 			*hbh_needed = B_TRUE;
978 			if (secopt != NULL) {
979 				*after_secoptp = (uchar_t *)optptr;
980 				return ((uchar_t *)secopt);
981 			}
982 			break;
983 		}
984 		optlen -= optused;
985 		optptr += optused;
986 	}
987 	return ((uchar_t *)secopt);
988 }
989 
990 /*
991  * Remove the label option from the hop-by-hop options header if it exists.
992  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
993  * Header and data following the label option that is deleted are copied
994  * (i.e. slid backward) to the right position, and returns the number
995  * of bytes removed (as zero or negative number.)
996  */
997 int
998 tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
999 {
1000 	uchar_t	*ip6hbh;	/* hop-by-hop header */
1001 	uint_t	hbhlen;		/* hop-by-hop extension header length */
1002 	uchar_t *secopt = NULL;
1003 	uchar_t *after_secopt;
1004 	uint_t	pad;
1005 	uint_t	delta;
1006 	boolean_t hbh_needed;
1007 
1008 	/*
1009 	 * hop-by-hop extension header must appear first, if it does not
1010 	 * exist, there is no label option.
1011 	 */
1012 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
1013 		return (0);
1014 
1015 	ip6hbh = (uchar_t *)&ip6h[1];
1016 	hbhlen = (ip6hbh[1] + 1) << 3;
1017 	/*
1018 	 * Locate the start of the label option if it exists and the end
1019 	 * of the label option including pads if any.
1020 	 */
1021 	secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1022 	    &hbh_needed);
1023 	if (secopt == NULL)
1024 		return (0);
1025 	if (!hbh_needed) {
1026 		uchar_t	next_hdr;
1027 		/*
1028 		 * The label option was the only option in the hop-by-hop
1029 		 * header. We don't need the hop-by-hop header itself any
1030 		 * longer.
1031 		 */
1032 		next_hdr = ip6hbh[0];
1033 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
1034 		    buflen - (IPV6_HDR_LEN + hbhlen));
1035 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - hbhlen);
1036 		ip6h->ip6_nxt = next_hdr;
1037 		return (-hbhlen);
1038 	}
1039 
1040 	if (after_secopt == NULL) {
1041 		/* There is no option following the label option */
1042 		after_secopt = ip6hbh + hbhlen;
1043 	}
1044 
1045 	/*
1046 	 * After deleting the label option, we need to slide the headers
1047 	 * and data back, while still maintaining the same alignment (module 8)
1048 	 * for the other options. So we slide the headers and data back only
1049 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
1050 	 * with pads.
1051 	 */
1052 	delta = after_secopt - secopt;
1053 	pad = delta % 8;
1054 	if (pad == 1) {
1055 		secopt[0] = IP6OPT_PAD1;
1056 	} else if (pad > 1) {
1057 		secopt[0] = IP6OPT_PADN;
1058 		secopt[1] = pad - 2;
1059 		if (pad > 2)
1060 			bzero(&secopt[2], pad - 2);
1061 	}
1062 	secopt += pad;
1063 	delta -= pad;
1064 	ovbcopy(after_secopt, secopt,
1065 	    (uchar_t *)ip6h + buflen - after_secopt);
1066 	ip6hbh[1] -= delta/8;
1067 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - delta);
1068 
1069 	return (-delta);
1070 }
1071 
1072 /*
1073  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
1074  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
1075  * option is described in the block comment above tsol_compute_label_v6.
1076  * This function prepends this hop-by-hop option before any other hop-by-hop
1077  * options in the hop-by-hop header if one already exists, else a new
1078  * hop-by-hop header is created and stuffed into the packet following
1079  * the IPv6 header. 'buflen' is the total length of the packet i.e.
1080  * b_wptr - b_rptr. The caller ensures that there is enough space for the
1081  * extra option being added. Header and data following the position where
1082  * the label option is inserted are copied (i.e. slid forward) to the right
1083  * position.
1084  */
1085 int
1086 tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
1087 {
1088 	/*
1089 	 * rawlen is the length of the label option in bytes, not including
1090 	 * any pads, starting from the IP6OPT_LS (option type) byte.
1091 	 */
1092 	uint_t	rawlen;
1093 
1094 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
1095 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
1096 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
1097 	uint_t	pad_len;
1098 	uchar_t	*pad_position;
1099 	int	delta;		/* Actual number of bytes inserted */
1100 
1101 	rawlen = optbuf[1] + 2;	/* Add 2 for the option type, option length */
1102 	ip6hbh = (uchar_t *)&ip6h[1];
1103 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1104 		/*
1105 		 * There is a hop-by-hop header present already. In order to
1106 		 * preserve the alignment of the other options at the existing
1107 		 * value (modulo 8) we need to pad the label option to a
1108 		 * multiple of 8 bytes before prepending it to the other
1109 		 * options. Slide the extension headers and data forward to
1110 		 * accomodate the label option at the start of the hop-by-hop
1111 		 * header
1112 		 */
1113 		delta = optlen = (rawlen + 7) & ~7;
1114 		pad_len = optlen - rawlen;
1115 		pad_position = ip6hbh + 2 + rawlen;
1116 		ovbcopy(ip6hbh + 2, ip6hbh + 2 + optlen,
1117 		    buflen - (IPV6_HDR_LEN + 2));
1118 		/*
1119 		 * Bump up the hop-by-hop extension header length by
1120 		 * the number of 8-byte words added
1121 		 */
1122 		optlen >>= 3;
1123 		if (ip6hbh[1] + optlen > 255)
1124 			return (-1);
1125 		ip6hbh[1] += optlen;
1126 	} else {
1127 		/*
1128 		 * There is no hop-by-hop header in the packet. Construct a
1129 		 * new Hop-by-hop extension header (a multiple of 8 bytes).
1130 		 * Slide any other extension headers and data forward to
1131 		 * accomodate this hop-by-hop header
1132 		 */
1133 		delta = hbhlen = (2 + rawlen + 7) & ~7; /* +2 for nxthdr, len */
1134 		pad_len = hbhlen - (2 + rawlen);
1135 		pad_position = ip6hbh + 2 + rawlen;
1136 		ovbcopy(ip6hbh, ip6hbh + hbhlen, buflen - IPV6_HDR_LEN);
1137 		ip6hbh[0] = ip6h->ip6_nxt;
1138 		/*
1139 		 * hop-by-hop extension header length in 8-byte words, not
1140 		 * including the 1st 8 bytes of the hop-by-hop header.
1141 		 */
1142 		ip6hbh[1] = (hbhlen >> 3) - 1;
1143 		ip6h->ip6_nxt = IPPROTO_HOPOPTS;
1144 	}
1145 	/*
1146 	 * Copy the label option into the hop-by-hop header and insert any
1147 	 * needed pads
1148 	 */
1149 	bcopy(optbuf, ip6hbh + 2, rawlen);
1150 	if (pad_len == 1) {
1151 		pad_position[0] = IP6OPT_PAD1;
1152 	} else if (pad_len > 1) {
1153 		pad_position[0] = IP6OPT_PADN;
1154 		pad_position[1] = pad_len - 2;
1155 		if (pad_len > 2)
1156 			bzero(pad_position + 2, pad_len - 2);
1157 	}
1158 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + delta);
1159 	return (delta);
1160 }
1161 
1162 /*
1163  * tsol_check_label_v6()
1164  *
1165  * This routine computes the IP label that should be on the packet based on the
1166  * connection and destination information.  It's called only by the IP
1167  * forwarding logic, because all internal modules atop IP know how to generate
1168  * their own labels.
1169  *
1170  * Returns:
1171  *      0		Label on packet was already correct
1172  *      EACCESS		The packet failed the remote host accreditation.
1173  *      ENOMEM		Memory allocation failure.
1174  */
1175 int
1176 tsol_check_label_v6(const cred_t *credp, mblk_t **mpp, boolean_t isexempt,
1177     ip_stack_t *ipst)
1178 {
1179 	mblk_t *mp = *mpp;
1180 	ip6_t  *ip6h;
1181 	/*
1182 	 * Label option length is limited to IP_MAX_OPT_LENGTH for
1183 	 * symmetry with IPv4. Can be relaxed if needed
1184 	 */
1185 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
1186 	uint_t hlen;
1187 	uint_t sec_opt_len; /* label option length not including type, len */
1188 	int delta_remove = 0, delta_add;
1189 	int retv;
1190 	uchar_t	*after_secopt;
1191 	uchar_t	*secopt = NULL;
1192 	uchar_t	*ip6hbh;
1193 	uint_t	hbhlen;
1194 	boolean_t hbh_needed;
1195 
1196 	ip6h = (ip6_t *)mp->b_rptr;
1197 	retv = tsol_compute_label_v6(credp, &ip6h->ip6_dst, opt_storage,
1198 	    isexempt, ipst);
1199 	if (retv != 0)
1200 		return (retv);
1201 
1202 	sec_opt_len = opt_storage[1];
1203 
1204 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1205 		ip6hbh = (uchar_t *)&ip6h[1];
1206 		hbhlen = (ip6hbh[1] + 1) << 3;
1207 		secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1208 		    &hbh_needed);
1209 	}
1210 
1211 	if (sec_opt_len == 0 && secopt == NULL) {
1212 		/*
1213 		 * The packet is not supposed to have a label, and it
1214 		 * does not have one currently
1215 		 */
1216 		return (0);
1217 	}
1218 	if (secopt != NULL && sec_opt_len != 0 &&
1219 	    (bcmp(opt_storage, secopt, sec_opt_len + 2) == 0)) {
1220 		/* The packet has the correct label already */
1221 		return (0);
1222 	}
1223 
1224 	/*
1225 	 * If there is an option there, then it must be the wrong one; delete.
1226 	 */
1227 	if (secopt != NULL) {
1228 		delta_remove = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1229 		mp->b_wptr += delta_remove;
1230 	}
1231 
1232 	/*
1233 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
1234 	 * the hop-by-hop ext header's next header and length fields. Add
1235 	 * another 2 bytes for the label option type, len and then round
1236 	 * up to the next 8-byte multiple.
1237 	 */
1238 	hlen = (4 + sec_opt_len + 7) & ~7;
1239 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
1240 		int copylen;
1241 		mblk_t *new_mp;
1242 		uint16_t hdr_len;
1243 
1244 		hdr_len = ip_hdr_length_v6(mp, ip6h);
1245 		/*
1246 		 * Allocate enough to be meaningful, but not *too* much.
1247 		 * Also all the IPv6 extension headers must be in the same mblk
1248 		 */
1249 		copylen = MBLKL(mp);
1250 		if (copylen > 256)
1251 			copylen = 256;
1252 		if (copylen < hdr_len)
1253 			copylen = hdr_len;
1254 		new_mp = allocb_tmpl(hlen + copylen +
1255 		    (mp->b_rptr - mp->b_datap->db_base), mp);
1256 		if (new_mp == NULL)
1257 			return (ENOMEM);
1258 
1259 		/* keep the bias */
1260 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
1261 		new_mp->b_wptr = new_mp->b_rptr + copylen;
1262 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
1263 		new_mp->b_cont = mp;
1264 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
1265 			new_mp->b_cont = mp->b_cont;
1266 			freeb(mp);
1267 		}
1268 		*mpp = mp = new_mp;
1269 		ip6h = (ip6_t *)mp->b_rptr;
1270 	}
1271 
1272 	delta_add = tsol_prepend_option_v6(opt_storage, ip6h, MBLKL(mp));
1273 	if (delta_add == -1)
1274 		goto param_prob;
1275 
1276 	ASSERT(mp->b_wptr + delta_add <= DB_LIM(mp));
1277 	mp->b_wptr += delta_add;
1278 
1279 	return (0);
1280 
1281 param_prob:
1282 	return (EINVAL);
1283 }
1284 
1285 /*
1286  * Update the given IPv6 "sticky options" structure to contain the provided
1287  * label, which is encoded as an IPv6 option.  Existing label is removed if
1288  * necessary, and storage is allocated/freed/resized.
1289  *
1290  * Returns 0 on success, errno on failure.
1291  */
1292 int
1293 tsol_update_sticky(ip6_pkt_t *ipp, uint_t *labellen, const uchar_t *labelopt)
1294 {
1295 	int rawlen, optlen, newlen;
1296 	uchar_t *newopts;
1297 
1298 	/*
1299 	 * rawlen is the size of the IPv6 label to be inserted from labelopt.
1300 	 * optlen is the total length of that option, including any necessary
1301 	 * headers and padding.  newlen is the new size of the total hop-by-hop
1302 	 * options buffer, including user options.
1303 	 */
1304 	ASSERT(*labellen <= ipp->ipp_hopoptslen);
1305 	ASSERT((ipp->ipp_hopopts == NULL && ipp->ipp_hopoptslen == 0) ||
1306 	    (ipp->ipp_hopopts != NULL && ipp->ipp_hopoptslen != 0));
1307 
1308 	if ((rawlen = labelopt[1]) != 0) {
1309 		rawlen += 2;	/* add in header size */
1310 		optlen = (2 + rawlen + 7) & ~7;
1311 	} else {
1312 		optlen = 0;
1313 	}
1314 	newlen = ipp->ipp_hopoptslen + optlen - *labellen;
1315 	if (newlen == 0 && ipp->ipp_hopopts != NULL) {
1316 		/* Deleting all existing hop-by-hop options */
1317 		kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1318 		ipp->ipp_hopopts = NULL;
1319 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
1320 	} else if (optlen != *labellen) {
1321 		/* If the label not same size as last time, then reallocate */
1322 		if (newlen > IP6_MAX_OPT_LENGTH)
1323 			return (EHOSTUNREACH);
1324 		newopts = kmem_alloc(newlen, KM_NOSLEEP);
1325 		if (newopts == NULL)
1326 			return (ENOMEM);
1327 		/*
1328 		 * If the user has hop-by-hop stickyoptions set, then copy his
1329 		 * options in after the security label.
1330 		 */
1331 		if (ipp->ipp_hopoptslen > *labellen) {
1332 			bcopy(ipp->ipp_hopopts + *labellen, newopts + optlen,
1333 			    ipp->ipp_hopoptslen - *labellen);
1334 			/*
1335 			 * Stomp out any header gunk here - this was the
1336 			 * previous next-header and option length field.
1337 			 */
1338 			newopts[optlen] = IP6OPT_PADN;
1339 			newopts[optlen + 1] = 0;
1340 		}
1341 		if (ipp->ipp_hopopts != NULL)
1342 			kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1343 		ipp->ipp_hopopts = (ip6_hbh_t *)newopts;
1344 	}
1345 	ipp->ipp_hopoptslen = newlen;
1346 	*labellen = optlen;
1347 
1348 	newopts = (uchar_t *)ipp->ipp_hopopts;
1349 
1350 	/* If there are any options, then fix up reported length */
1351 	if (newlen > 0) {
1352 		newopts[1] = (newlen + 7) / 8 - 1;
1353 		ipp->ipp_fields |= IPPF_HOPOPTS;
1354 	}
1355 
1356 	/* If there's a label, then insert it now */
1357 	if (optlen > 0) {
1358 		/* skip next-header and length fields */
1359 		newopts += 2;
1360 		bcopy(labelopt, newopts, rawlen);
1361 		newopts += rawlen;
1362 		/* make sure padding comes out right */
1363 		optlen -= 2 + rawlen;
1364 		if (optlen == 1) {
1365 			newopts[0] = IP6OPT_PAD1;
1366 		} else if (optlen > 1) {
1367 			newopts[0] = IP6OPT_PADN;
1368 			optlen -=  2;
1369 			newopts[1] = optlen;
1370 			if (optlen > 0)
1371 				bzero(newopts + 2, optlen);
1372 		}
1373 	}
1374 	return (0);
1375 }
1376 
1377 int
1378 tsol_update_options(uchar_t **opts, uint_t *totlen, uint_t *labellen,
1379     const uchar_t *labelopt)
1380 {
1381 	int optlen, newlen;
1382 	uchar_t *newopts;
1383 
1384 	optlen = (labelopt[IPOPT_OLEN] + 3) & ~3;
1385 	newlen = *totlen + optlen - *labellen;
1386 	if (optlen > *labellen) {
1387 		if (newlen > IP_MAX_OPT_LENGTH)
1388 			return (EHOSTUNREACH);
1389 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1390 		if (newopts == NULL)
1391 			return (ENOMEM);
1392 		if (*totlen > *labellen) {
1393 			bcopy(*opts + *labellen, newopts + optlen,
1394 			    *totlen - *labellen);
1395 		}
1396 		if (*opts != NULL)
1397 			mi_free((char *)*opts);
1398 		*opts = newopts;
1399 	} else if (optlen < *labellen) {
1400 		if (newlen == 0 && *opts != NULL) {
1401 			mi_free((char *)*opts);
1402 			*opts = NULL;
1403 		}
1404 		if (*totlen > *labellen) {
1405 			ovbcopy(*opts + *labellen, *opts + optlen,
1406 			    *totlen - *labellen);
1407 		}
1408 	}
1409 	*totlen = newlen;
1410 	*labellen = optlen;
1411 	if (optlen > 0) {
1412 		newopts = *opts;
1413 		bcopy(labelopt, newopts, optlen);
1414 		/* check if there are user-supplied options that follow */
1415 		if (optlen < newlen) {
1416 			/* compute amount of embedded alignment needed */
1417 			optlen -= newopts[IPOPT_OLEN];
1418 			newopts += newopts[IPOPT_OLEN];
1419 			while (--optlen >= 0)
1420 				*newopts++ = IPOPT_NOP;
1421 		} else if (optlen != newopts[IPOPT_OLEN]) {
1422 			/*
1423 			 * The label option is the only option and it is
1424 			 * not a multiple of 4 bytes.
1425 			 */
1426 			optlen -= newopts[IPOPT_OLEN];
1427 			newopts += newopts[IPOPT_OLEN];
1428 			while (--optlen >= 0)
1429 				*newopts++ = IPOPT_EOL;
1430 		}
1431 	}
1432 	return (0);
1433 }
1434 
1435 /*
1436  * This does the bulk of the processing for setting IPPROTO_IP {T_,}IP_OPTIONS.
1437  */
1438 boolean_t
1439 tsol_option_set(uchar_t **opts, uint_t *optlen, uint_t labellen,
1440     const uchar_t *useropts, uint_t userlen)
1441 {
1442 	int newlen;
1443 	uchar_t *newopts;
1444 
1445 	newlen = userlen + labellen;
1446 	if (newlen > *optlen) {
1447 		/* need more room */
1448 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1449 		if (newopts == NULL)
1450 			return (B_FALSE);
1451 		/*
1452 		 * The supplied *opts can't be NULL in this case,
1453 		 * since there's an existing label.
1454 		 */
1455 		if (labellen > 0)
1456 			bcopy(*opts, newopts, labellen);
1457 		if (*opts != NULL)
1458 			mi_free((char *)*opts);
1459 		*opts = newopts;
1460 	}
1461 
1462 	if (newlen == 0) {
1463 		/* special case -- no remaining IP options at all */
1464 		if (*opts != NULL) {
1465 			mi_free((char *)*opts);
1466 			*opts = NULL;
1467 		}
1468 	} else if (userlen > 0) {
1469 		/* merge in the user's options */
1470 		newopts = *opts;
1471 		if (labellen > 0) {
1472 			int extra = labellen - newopts[IPOPT_OLEN];
1473 
1474 			newopts += newopts[IPOPT_OLEN];
1475 			while (--extra >= 0)
1476 				*newopts++ = IPOPT_NOP;
1477 		}
1478 		bcopy(useropts, newopts, userlen);
1479 	}
1480 
1481 	*optlen = newlen;
1482 	return (B_TRUE);
1483 }
1484