xref: /illumos-gate/usr/src/uts/common/inet/ip/tn_ipopt.c (revision 80ab886d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/kmem.h>
31 #include <sys/disp.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/strsun.h>
35 #include <sys/policy.h>
36 #include <sys/tsol/label_macro.h>
37 #include <sys/tsol/tndb.h>
38 #include <sys/tsol/tnet.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/tcp.h>
42 #include <inet/ipclassifier.h>
43 #include <inet/ip_ire.h>
44 
45 /*
46  * This routine takes a sensitivity label as input and creates a CIPSO
47  * option in the specified buffer.  It returns the size of the CIPSO option.
48  * If the sensitivity label is too large for the CIPSO option, then 0
49  * is returned.
50  *
51  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
52  * (more accurately, success means a return value between 10 and 40).
53  */
54 
55 static int
56 tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
57 {
58 	struct cipso_tag_type_1 *tt1;
59 	const _bslabel_impl_t *bsl;
60 	const uchar_t *ucp;
61 	int i;
62 
63 	if (doi == 0)
64 		return (0);
65 
66 	/* check for Admin High sensitivity label */
67 	if (blequal(sl, label2bslabel(l_admin_high)))
68 		return (0);
69 
70 	/* check whether classification will fit in one octet */
71 	bsl = (const _bslabel_impl_t *)sl;
72 	if (LCLASS(bsl) & 0xFF00)
73 		return (0);
74 
75 	/*
76 	 * Check whether compartments will fit in 30 octets.
77 	 * Compartments 241 - 256 are not allowed.
78 	 */
79 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
80 		return (0);
81 
82 	/*
83 	 * Compute option length and tag length.
84 	 * 'p' points to the last two bytes in the Sensitivity Label's
85 	 * compartments; these cannot be mapped into CIPSO compartments.
86 	 */
87 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
88 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
89 		if (*ucp != 0)
90 			break;
91 
92 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
93 
94 	if (cop == NULL)
95 		return (10 + i);
96 
97 	doi = htonl(doi);
98 	ucp = (const uchar_t *)&doi;
99 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
100 	cop[IPOPT_OLEN] = 10 + i;
101 	cop[IPOPT_OLEN+1] = ucp[0];
102 	cop[IPOPT_OLEN+2] = ucp[1];
103 	cop[IPOPT_OLEN+3] = ucp[2];
104 	cop[IPOPT_OLEN+4] = ucp[3];
105 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
106 	tt1->tag_type = 1;
107 	tt1->tag_align = 0;
108 	tt1->tag_sl = LCLASS(bsl);
109 	tt1->tag_length = 4 + i;
110 
111 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
112 
113 	return (cop[IPOPT_OLEN]);
114 }
115 
116 /*
117  * The following routine copies a datagram's option into the specified buffer
118  * (if buffer pointer is non-null), or returns a pointer to the label within
119  * the streams message (if buffer is null).  In both cases, tsol_get_option
120  * returns the option's type.
121  *
122  * tsol_get_option assumes that the specified buffer is large enough to
123  * hold the largest valid CIPSO option.  Since the total number of
124  * IP header options cannot exceed 40 bytes, a 40 byte buffer is a good choice.
125  */
126 
127 tsol_ip_label_t
128 tsol_get_option(mblk_t *mp, uchar_t **buffer)
129 {
130 	ipha_t	*ipha;
131 	uchar_t	*opt;
132 	uint32_t	totallen;
133 	uint32_t	optval;
134 	uint32_t	optlen;
135 
136 	ipha = (ipha_t *)mp->b_rptr;
137 
138 	/*
139 	 * Get length (in 4 byte octets) of IP header options.
140 	 * If header doesn't contain options, then return OPT_NONE.
141 	 */
142 	totallen = ipha->ipha_version_and_hdr_length -
143 	    (uint8_t)((IP_VERSION << 4) + IP_SIMPLE_HDR_LENGTH_IN_WORDS);
144 
145 	if (totallen == 0)
146 		return (OPT_NONE);
147 
148 	totallen <<= 2;
149 
150 	/*
151 	 * Search for CIPSO option.
152 	 * If no such option is present, then return OPT_NONE.
153 	 */
154 	opt = (uchar_t *)&ipha[1];
155 	while (totallen != 0) {
156 		switch (optval = opt[IPOPT_OPTVAL]) {
157 		case IPOPT_EOL:
158 			return (OPT_NONE);
159 		case IPOPT_NOP:
160 			optlen = 1;
161 			break;
162 		default:
163 			if (totallen <= IPOPT_OLEN)
164 				return (OPT_NONE);
165 			optlen = opt[IPOPT_OLEN];
166 			if (optlen < 2)
167 				return (OPT_NONE);
168 		}
169 		if (optlen > totallen)
170 			return (OPT_NONE);
171 		/*
172 		 * Copy pointer to option into '*buffer' and
173 		 * return the option type.
174 		 */
175 		switch (optval) {
176 		case IPOPT_COMSEC:
177 			*buffer = opt;
178 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
179 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1)
180 				return (OPT_CIPSO);
181 			return (OPT_NONE);
182 		}
183 		totallen -= optlen;
184 		opt += optlen;
185 	}
186 	return (OPT_NONE);
187 }
188 
189 /*
190  * tsol_compute_label()
191  *
192  * This routine computes the IP label that should be on a packet based on the
193  * connection and destination information.
194  *
195  * Returns:
196  *      0		Fetched label
197  *      EACCES		The packet failed the remote host accreditation
198  *      ENOMEM		Memory allocation failure
199  *	EINVAL		Label cannot be computed
200  */
201 int
202 tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage,
203     boolean_t isexempt)
204 {
205 	uint_t		sec_opt_len;
206 	ts_label_t	*tsl;
207 	tsol_tpc_t	*dst_rhtp;
208 	ire_t		*ire, *sire = NULL;
209 	boolean_t	compute_label = B_FALSE;
210 	tsol_ire_gw_secattr_t *attrp;
211 	zoneid_t	zoneid;
212 
213 	if (opt_storage != NULL)
214 		opt_storage[IPOPT_OLEN] = 0;
215 
216 	if ((tsl = crgetlabel(credp)) == NULL)
217 		return (0);
218 
219 	/* always pass multicast */
220 	if (CLASSD(dst))
221 		return (0);
222 
223 	if ((dst_rhtp = find_tpc(&dst, IPV4_VERSION, B_FALSE)) == NULL) {
224 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v4,
225 		    char *, "destination ip(1) not in database (with creds(2))",
226 		    ipaddr_t, dst, cred_t *, credp);
227 		return (EINVAL);
228 	}
229 
230 	zoneid = crgetzoneid(credp);
231 
232 	switch (dst_rhtp->tpc_tp.host_type) {
233 	case UNLABELED:
234 		/*
235 		 * Only add a label if the unlabeled destination is
236 		 * not broadcast/local/loopback address, that it is
237 		 * not on the same subnet, and that the next-hop
238 		 * gateway is labeled.
239 		 */
240 		ire = ire_cache_lookup(dst, zoneid, tsl);
241 
242 		if (ire != NULL && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL |
243 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
244 			IRE_REFRELE(ire);
245 			TPC_RELE(dst_rhtp);
246 			return (0);
247 		} else if (ire == NULL) {
248 			ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire,
249 			    zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
250 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR));
251 		}
252 
253 		/* no route to destination */
254 		if (ire == NULL) {
255 			DTRACE_PROBE4(
256 			    tx__tnopt__log__info__labeling__routedst__v4,
257 			    char *, "No route to unlabeled dest ip(1)/tpc(2) "
258 			    "with creds(3).", ipaddr_t, dst, tsol_tpc_t *,
259 			    dst_rhtp, cred_t *, credp);
260 			TPC_RELE(dst_rhtp);
261 			return (EINVAL);
262 		}
263 
264 		/*
265 		 * Prefix IRE from f-table lookup means that the destination
266 		 * is not directly connected; check the next-hop attributes.
267 		 */
268 		if (sire != NULL) {
269 			ASSERT(ire != NULL);
270 			IRE_REFRELE(ire);
271 			ire = sire;
272 		}
273 
274 		attrp = ire->ire_gw_secattr;
275 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
276 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
277 			compute_label = B_TRUE;
278 
279 		/*
280 		 * Can talk to unlabeled hosts if
281 		 * (1) zone's label matches the default label, or
282 		 * (2) SO_MAC_EXEMPT is on and we dominate the peer's label
283 		 * (3) SO_MAC_EXEMPT is on and this is the global zone
284 		 */
285 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
286 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
287 		    &tsl->tsl_label) && (!isexempt ||
288 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
289 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
290 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
291 			    char *, "unlabeled dest ip(1)/tpc(2) "
292 			    "non-matching creds(3).", ipaddr_t, dst,
293 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
294 			IRE_REFRELE(ire);
295 			TPC_RELE(dst_rhtp);
296 			return (EACCES);
297 		}
298 
299 		IRE_REFRELE(ire);
300 		break;
301 
302 	case SUN_CIPSO:
303 		/*
304 		 * Can talk to labeled hosts if zone's label is within target's
305 		 * label range or set.
306 		 */
307 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
308 		    (!_blinrange(&tsl->tsl_label,
309 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
310 		    !blinlset(&tsl->tsl_label,
311 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
312 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
313 			    char *, "labeled dest ip(1)/tpc(2) "
314 			    "non-matching creds(3).", ipaddr_t, dst,
315 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
316 			TPC_RELE(dst_rhtp);
317 			return (EACCES);
318 		}
319 		compute_label = B_TRUE;
320 		break;
321 
322 	default:
323 		TPC_RELE(dst_rhtp);
324 		return (EACCES);
325 	}
326 
327 	if (!compute_label) {
328 		TPC_RELE(dst_rhtp);
329 		return (0);
330 	}
331 
332 	/* compute the CIPSO option */
333 	if (dst_rhtp->tpc_tp.host_type != UNLABELED)
334 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
335 		    tsl->tsl_doi);
336 	else
337 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
338 		    opt_storage, tsl->tsl_doi);
339 	TPC_RELE(dst_rhtp);
340 
341 	if (sec_opt_len == 0) {
342 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v4,
343 		    char *,
344 		    "options lack length for dest ip(1)/tpc(2) with creds(3).",
345 		    ipaddr_t, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
346 		return (EINVAL);
347 	}
348 
349 	return (0);
350 }
351 
352 /*
353  * Remove any existing security option (CIPSO) from the given IP
354  * header, move the 'buflen' bytes back to fill the gap, and return the number
355  * of bytes removed (as zero or negative number).  Assumes that the headers are
356  * sane.
357  */
358 int
359 tsol_remove_secopt(ipha_t *ipha, int buflen)
360 {
361 	int remlen, olen, oval, delta;
362 	uchar_t *fptr, *tptr;
363 	boolean_t noop_keep;
364 
365 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
366 	fptr = tptr = (uchar_t *)(ipha + 1);
367 	noop_keep = B_TRUE;
368 	while (remlen > 0) {
369 		oval = fptr[IPOPT_OPTVAL];
370 
371 		/* terminate on end of list */
372 		if (oval == IPOPT_EOL)
373 			break;
374 
375 		/*
376 		 * Delete any no-ops following a deleted option, at least up
377 		 * to a 4 octet alignment; copy others.
378 		 */
379 		if (oval == IPOPT_NOP) {
380 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
381 				noop_keep = B_TRUE;
382 			if (noop_keep)
383 				*tptr++ = oval;
384 			fptr++;
385 			remlen--;
386 			continue;
387 		}
388 
389 		/* stop on corrupted list; just do nothing. */
390 		if (remlen < 2)
391 			return (0);
392 		olen = fptr[IPOPT_OLEN];
393 		if (olen < 2 || olen > remlen)
394 			return (0);
395 
396 		/* skip over security options to delete them */
397 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
398 			noop_keep = B_FALSE;
399 			fptr += olen;
400 			remlen -= olen;
401 			continue;
402 		}
403 
404 		/* copy the rest */
405 		noop_keep = B_TRUE;
406 		if (tptr != fptr)
407 			ovbcopy(fptr, tptr, olen);
408 		fptr += olen;
409 		tptr += olen;
410 		remlen -= olen;
411 	}
412 
413 	fptr += remlen;
414 
415 	/* figure how much padding we'll need for header alignment */
416 	olen = (tptr - (uchar_t *)ipha) & 3;
417 	if (olen > 0) {
418 		olen = 4 - olen;
419 		/* pad with end-of-list */
420 		bzero(tptr, olen);
421 		tptr += olen;
422 	}
423 
424 	/* slide back the headers that follow and update the IP header */
425 	delta = fptr - tptr;
426 	if (delta != 0) {
427 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
428 		ipha->ipha_version_and_hdr_length -= delta / 4;
429 	}
430 	return (-delta);
431 }
432 
433 /*
434  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
435  * move the data following the IP header (up to buflen) to accomodate the new
436  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
437  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
438  * option cannot be inserted.  (Note that negative return values are possible
439  * when noops must be compressed, and that only -1 indicates error.  Successful
440  * return value is always evenly divisible by 4, by definition.)
441  */
442 int
443 tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
444 {
445 	int remlen, padding, lastpad, totlen;
446 	int oval, olen;
447 	int delta;
448 	uchar_t *optr;
449 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
450 
451 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
452 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
453 	    optbuf[IPOPT_OLEN] == 0)
454 		return (0);
455 
456 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
457 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
458 
459 	/* first find the real (unpadded) length of the existing options */
460 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
461 	padding = totlen = lastpad = 0;
462 	optr = (uchar_t *)(ipha + 1);
463 	while (remlen > 0) {
464 		oval = optr[IPOPT_OPTVAL];
465 
466 		/* stop at end of list */
467 		if (oval == IPOPT_EOL)
468 			break;
469 
470 		/* skip no-ops, noting that length byte isn't present */
471 		if (oval == IPOPT_NOP) {
472 			optr++;
473 			padding++;
474 			lastpad++;
475 			totlen++;
476 			remlen--;
477 			continue;
478 		}
479 
480 		/* give up on a corrupted list; report failure */
481 		if (remlen < 2)
482 			return (-1);
483 		olen = optr[IPOPT_OLEN];
484 		if (olen < 2 || olen > remlen)
485 			return (-1);
486 
487 		lastpad = 0;
488 		optr += olen;
489 		totlen += olen;
490 		remlen -= olen;
491 	}
492 
493 	/* completely ignore any trailing padding */
494 	totlen -= lastpad;
495 	padding -= lastpad;
496 
497 	/*
498 	 * If some sort of inter-option alignment was present, try to preserve
499 	 * that alignment.  If alignment pushes us out past the maximum, then
500 	 * discard it and try to compress to fit.  (We just "assume" that any
501 	 * padding added was attempting to get 32 bit alignment.  If that's
502 	 * wrong, that's just too bad.)
503 	 */
504 	if (padding > 0) {
505 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
506 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
507 			totlen -= padding;
508 			if (olen + totlen > IP_MAX_OPT_LENGTH)
509 				return (-1);
510 			padding = 0;
511 		}
512 	}
513 
514 	/*
515 	 * Since we may need to compress or expand the option list, we write to
516 	 * a temporary buffer and then copy the results back to the IP header.
517 	 */
518 	toptr = tempopt;
519 
520 	/* compute actual option to insert */
521 	olen = optbuf[IPOPT_OLEN];
522 	bcopy(optbuf, toptr, olen);
523 	toptr += olen;
524 	if (padding > 0) {
525 		while ((olen & 3) != 0) {
526 			*toptr++ = IPOPT_NOP;
527 			olen++;
528 		}
529 	}
530 
531 	/* copy over the existing options */
532 	optr = (uchar_t *)(ipha + 1);
533 	while (totlen > 0) {
534 		oval = optr[IPOPT_OPTVAL];
535 
536 		/* totlen doesn't include end-of-list marker */
537 		ASSERT(oval != IPOPT_EOL);
538 
539 		/* handle no-ops; copy if desired, ignore otherwise */
540 		if (oval == IPOPT_NOP) {
541 			if (padding > 0) {
542 				/* note: cannot overflow due to checks above */
543 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
544 				*toptr++ = oval;
545 			}
546 			optr++;
547 			totlen--;
548 			continue;
549 		}
550 
551 		/* list cannot be corrupt at this point */
552 		ASSERT(totlen >= 2);
553 		olen = optr[IPOPT_OLEN];
554 		ASSERT(olen >= 2 && olen <= totlen);
555 
556 		/* cannot run out of room due to tests above */
557 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
558 
559 		bcopy(optr, toptr, olen);
560 		optr += olen;
561 		toptr += olen;
562 		totlen -= olen;
563 	}
564 
565 	/* figure how much padding we'll need for header alignment */
566 	olen = (toptr - tempopt) & 3;
567 	if (olen > 0) {
568 		olen = 4 - olen;
569 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
570 		/* pad with end-of-list value */
571 		bzero(toptr, olen);
572 		toptr += olen;
573 	}
574 
575 	/* move the headers as needed and update IP header */
576 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
577 	remlen = IPH_HDR_LENGTH(ipha);
578 	delta = olen - remlen;
579 	if (delta != 0) {
580 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
581 		    buflen - remlen);
582 		ipha->ipha_version_and_hdr_length += delta / 4;
583 	}
584 
585 	/* slap in the new options */
586 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
587 
588 	return (delta);
589 }
590 
591 /*
592  * tsol_check_label()
593  *
594  * This routine computes the IP label that should be on the packet based on the
595  * connection and destination information.  If the label is there, it returns
596  * zero, so the caller knows that the label is syncronized, and further calls
597  * are not required.  If the label isn't right, then the right one is inserted.
598  *
599  * The packet's header is clear, before entering IPSec's engine.
600  *
601  * Returns:
602  *      0		Label on packet (was|is now) correct
603  *      EACCES		The packet failed the remote host accreditation.
604  *      ENOMEM		Memory allocation failure.
605  *	EINVAL		Label cannot be computed
606  */
607 int
608 tsol_check_label(const cred_t *credp, mblk_t **mpp, int *addedp,
609     boolean_t isexempt)
610 {
611 	mblk_t *mp = *mpp;
612 	ipha_t  *ipha;
613 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
614 	uint_t hlen;
615 	uint_t sec_opt_len;
616 	uchar_t *optr;
617 	int added;
618 	int retv;
619 
620 	if (addedp != NULL)
621 		*addedp = 0;
622 
623 	opt_storage[IPOPT_OPTVAL] = 0;
624 
625 	ipha = (ipha_t *)mp->b_rptr;
626 
627 	retv = tsol_compute_label(credp, ipha->ipha_dst, opt_storage, isexempt);
628 	if (retv != 0)
629 		return (retv);
630 
631 	optr = (uchar_t *)(ipha + 1);
632 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
633 	sec_opt_len = opt_storage[IPOPT_OLEN];
634 
635 	if (hlen >= sec_opt_len) {
636 		/* If no option is supposed to be there, make sure it's not */
637 		if (sec_opt_len == 0 && hlen > 0 &&
638 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
639 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
640 			return (0);
641 		/* if the option is there, it's always first */
642 		if (sec_opt_len != 0 &&
643 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
644 			return (0);
645 	}
646 
647 	/*
648 	 * If there is an option there, then it must be the wrong one; delete.
649 	 */
650 	if (hlen > 0)
651 		mp->b_wptr += tsol_remove_secopt(ipha, MBLKL(mp));
652 
653 	/* Make sure we have room for the worst-case addition */
654 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
655 	hlen = (hlen + 3) & ~3;
656 	if (hlen > IP_MAX_HDR_LENGTH)
657 		hlen = IP_MAX_HDR_LENGTH;
658 	hlen -= IPH_HDR_LENGTH(ipha);
659 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
660 		int copylen;
661 		mblk_t *new_mp;
662 
663 		/* allocate enough to be meaningful, but not *too* much */
664 		copylen = MBLKL(mp);
665 		if (copylen > 256)
666 			copylen = 256;
667 		new_mp = allocb(hlen + copylen +
668 		    (mp->b_rptr - mp->b_datap->db_base), BPRI_HI);
669 		if (new_mp == NULL)
670 			return (ENOMEM);
671 		mblk_setcred(new_mp, DB_CRED(mp));
672 
673 		/* keep the bias */
674 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
675 		new_mp->b_wptr = new_mp->b_rptr + copylen;
676 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
677 		new_mp->b_cont = mp;
678 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
679 			new_mp->b_cont = mp->b_cont;
680 			freeb(mp);
681 		}
682 		*mpp = mp = new_mp;
683 		ipha = (ipha_t *)mp->b_rptr;
684 	}
685 
686 	added = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
687 	if (added == -1)
688 		goto param_prob;
689 
690 	if (addedp != NULL)
691 		*addedp = added;
692 
693 	ASSERT((mp->b_wptr + added) <= DB_LIM(mp));
694 	mp->b_wptr += added;
695 
696 	return (0);
697 
698 param_prob:
699 	return (EINVAL);
700 }
701 
702 /*
703  * IPv6 HopOpt extension header for the label option layout:
704  *	- One octet giving the type of the 'next extension header'
705  *	- Header extension length in 8-byte words, not including the
706  *	  1st 8 bytes, but including any pad bytes at the end.
707  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
708  *	- Followed by TLV encoded IPv6 label option. Option layout is
709  *		* One octet, IP6OPT_LS
710  *		* One octet option length in bytes of the option data following
711  *		  the length, but not including any pad bytes at the end.
712  *		* Four-octet DOI (IP6LS_DOI_V4)
713  *		* One octet suboption, IP6LS_TT_V4
714  *		* One octet suboption length in bytes of the suboption
715  *		  following the suboption length, including the suboption
716  *		  header length, but not including any pad bytes at the end.
717  *	- Pad to make the extension header a multiple of 8 bytes.
718  *
719  * This function returns the contents of 'IPv6 option structure' in the above.
720  * i.e starting from the IP6OPT_LS but not including the pad at the end.
721  * The user must prepend two octets (either padding or next header / length)
722  * and append padding out to the next 8 octet boundary.
723  */
724 int
725 tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst,
726     uchar_t *opt_storage, boolean_t isexempt)
727 {
728 	tsol_tpc_t	*dst_rhtp;
729 	ts_label_t	*tsl;
730 	uint_t		sec_opt_len;
731 	uint32_t	doi;
732 	zoneid_t	zoneid;
733 	ire_t		*ire, *sire;
734 	tsol_ire_gw_secattr_t *attrp;
735 	boolean_t	compute_label;
736 
737 	if (ip6opt_ls == 0)
738 		return (EINVAL);
739 
740 	if (opt_storage != NULL)
741 		opt_storage[IPOPT_OLEN] = 0;
742 
743 	if ((tsl = crgetlabel(credp)) == NULL)
744 		return (0);
745 
746 	/* Always pass multicast */
747 	if (IN6_IS_ADDR_MULTICAST(dst))
748 		return (0);
749 
750 	if ((dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE)) == NULL) {
751 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v6,
752 		    char *, "destination ip6(1) not in database with creds(2)",
753 		    in6_addr_t *, dst, cred_t *, credp);
754 		return (EINVAL);
755 	}
756 
757 	zoneid = crgetzoneid(credp);
758 
759 	/*
760 	 * Fill in a V6 label.  If a new format is added here, make certain
761 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
762 	 * as TSOL_MAX_IPV6_OPTION.
763 	 */
764 	compute_label = B_FALSE;
765 	switch (dst_rhtp->tpc_tp.host_type) {
766 	case UNLABELED:
767 		/*
768 		 * Only add a label if the unlabeled destination is
769 		 * not local or loopback address, that it is
770 		 * not on the same subnet, and that the next-hop
771 		 * gateway is labeled.
772 		 */
773 		sire = NULL;
774 		ire = ire_cache_lookup_v6(dst, zoneid, tsl);
775 
776 		if (ire != NULL && (ire->ire_type & (IRE_LOCAL |
777 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
778 			IRE_REFRELE(ire);
779 			TPC_RELE(dst_rhtp);
780 			return (0);
781 		} else if (ire == NULL) {
782 			ire = ire_ftable_lookup_v6(dst, NULL, NULL, 0, NULL,
783 			    &sire, zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
784 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR));
785 		}
786 
787 		/* no route to destination */
788 		if (ire == NULL) {
789 			DTRACE_PROBE4(
790 			    tx__tnopt__log__info__labeling__routedst__v6,
791 			    char *, "No route to unlabeled dest ip6(1)/tpc(2) "
792 			    "with creds(3).", in6_addr_t *, dst, tsol_tpc_t *,
793 			    dst_rhtp, cred_t *, credp);
794 			TPC_RELE(dst_rhtp);
795 			return (EINVAL);
796 		}
797 
798 		/*
799 		 * Prefix IRE from f-table lookup means that the destination
800 		 * is not directly connected; check the next-hop attributes.
801 		 */
802 		if (sire != NULL) {
803 			ASSERT(ire != NULL);
804 			IRE_REFRELE(ire);
805 			ire = sire;
806 		}
807 
808 		attrp = ire->ire_gw_secattr;
809 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
810 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
811 			compute_label = B_TRUE;
812 
813 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
814 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
815 		    &tsl->tsl_label) && (!isexempt ||
816 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
817 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
818 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
819 			    char *, "unlabeled dest ip6(1)/tpc(2) "
820 			    "non-matching creds(3)", in6_addr_t *, dst,
821 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
822 			IRE_REFRELE(ire);
823 			TPC_RELE(dst_rhtp);
824 			return (EACCES);
825 		}
826 
827 		IRE_REFRELE(ire);
828 		break;
829 
830 	case SUN_CIPSO:
831 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
832 		    (!_blinrange(&tsl->tsl_label,
833 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
834 		    !blinlset(&tsl->tsl_label,
835 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
836 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
837 			    char *,
838 			    "labeled dest ip6(1)/tpc(2) non-matching creds(3).",
839 			    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp,
840 			    cred_t *, credp);
841 			TPC_RELE(dst_rhtp);
842 			return (EACCES);
843 		}
844 		compute_label = B_TRUE;
845 		break;
846 
847 	default:
848 		TPC_RELE(dst_rhtp);
849 		return (EACCES);
850 	}
851 
852 	if (!compute_label) {
853 		TPC_RELE(dst_rhtp);
854 		return (0);
855 	}
856 
857 	/* compute the CIPSO option */
858 	if (opt_storage != NULL)
859 		opt_storage += 8;
860 	if (dst_rhtp->tpc_tp.host_type != UNLABELED) {
861 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
862 		    tsl->tsl_doi);
863 	} else {
864 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
865 		    opt_storage, tsl->tsl_doi);
866 	}
867 	TPC_RELE(dst_rhtp);
868 
869 	if (sec_opt_len == 0) {
870 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v6,
871 		    char *,
872 		    "options lack length for dest ip6(1)/tpc(2) with creds(3).",
873 		    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
874 		return (EINVAL);
875 	}
876 
877 	if (opt_storage == NULL)
878 		return (0);
879 
880 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
881 		opt_storage[sec_opt_len] = IPOPT_EOL;
882 
883 	/*
884 	 * Just in case the option length is odd, round it up to the next even
885 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
886 	 * some reason.
887 	 *
888 	 * Length in the overall option header (IP6OPT_LS) does not include the
889 	 * option header itself, but the length in the suboption does include
890 	 * the suboption header.  Thus, when there's just one suboption, the
891 	 * length in the option header is the suboption length plus 4 (for the
892 	 * DOI value).
893 	 */
894 	opt_storage[-2] = IP6LS_TT_V4;
895 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
896 	opt_storage[-8] = ip6opt_ls;
897 	opt_storage[-7] = opt_storage[-1] + 4;
898 	doi = htons(IP6LS_DOI_V4);
899 	bcopy(&doi, opt_storage - 6, 4);
900 
901 	return (0);
902 }
903 
904 /*
905  * Locate the start of the IP6OPT_LS label option and return it.
906  * Also return the start of the next non-pad option in after_secoptp.
907  * Usually the label option is the first option at least when packets
908  * are generated, but for generality we don't assume that on received packets.
909  */
910 uchar_t *
911 tsol_find_secopt_v6(
912     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
913     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
914     uchar_t **after_secoptp,	/* Non-pad option following the label option */
915     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
916 {
917 	uint_t	optlen;
918 	uint_t	optused;
919 	const uchar_t *optptr;
920 	uchar_t	opt_type;
921 	const uchar_t *secopt = NULL;
922 
923 	*hbh_needed = B_FALSE;
924 	*after_secoptp = NULL;
925 	optlen = hbhlen - 2;
926 	optptr = ip6hbh + 2;
927 	while (optlen != 0) {
928 		opt_type = *optptr;
929 		if (opt_type == IP6OPT_PAD1) {
930 			optptr++;
931 			optlen--;
932 			continue;
933 		}
934 		if (optlen == 1)
935 			break;
936 		optused = 2 + optptr[1];
937 		if (optused > optlen)
938 			break;
939 		/*
940 		 * if we get here, ip6opt_ls can
941 		 * not be 0 because it will always
942 		 * match the IP6OPT_PAD1 above.
943 		 * Therefore ip6opt_ls == 0 forces
944 		 * this test to always fail here.
945 		 */
946 		if (opt_type == ip6opt_ls)
947 			secopt = optptr;
948 		else switch (opt_type) {
949 		case IP6OPT_PADN:
950 			break;
951 		default:
952 			/*
953 			 * There is at least 1 option other than
954 			 * the label option. So the hop-by-hop header is needed
955 			 */
956 			*hbh_needed = B_TRUE;
957 			if (secopt != NULL) {
958 				*after_secoptp = (uchar_t *)optptr;
959 				return ((uchar_t *)secopt);
960 			}
961 			break;
962 		}
963 		optlen -= optused;
964 		optptr += optused;
965 	}
966 	return ((uchar_t *)secopt);
967 }
968 
969 /*
970  * Remove the label option from the hop-by-hop options header if it exists.
971  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
972  * Header and data following the label option that is deleted are copied
973  * (i.e. slid backward) to the right position.
974  */
975 int
976 tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
977 {
978 	uchar_t	*ip6hbh;	/* hop-by-hop header */
979 	uint_t	hbhlen;		/* hop-by-hop extension header length */
980 	uchar_t *secopt = NULL;
981 	uchar_t *after_secopt;
982 	uint_t	pad;
983 	uint_t	delta;
984 	boolean_t hbh_needed;
985 
986 	/*
987 	 * hop-by-hop extension header must appear first, if it does not
988 	 * exist, there is no label option.
989 	 */
990 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
991 		return (0);
992 
993 	ip6hbh = (uchar_t *)&ip6h[1];
994 	hbhlen = (ip6hbh[1] + 1) << 3;
995 	/*
996 	 * Locate the start of the label option if it exists and the end
997 	 * of the label option including pads if any.
998 	 */
999 	secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1000 	    &hbh_needed);
1001 	if (secopt == NULL)
1002 		return (0);
1003 	if (!hbh_needed) {
1004 		uchar_t	next_hdr;
1005 		/*
1006 		 * The label option was the only option in the hop-by-hop
1007 		 * header. We don't need the hop-by-hop header itself any
1008 		 * longer.
1009 		 */
1010 		next_hdr = ip6hbh[0];
1011 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
1012 		    buflen - (IPV6_HDR_LEN + hbhlen));
1013 		ip6h->ip6_plen -= hbhlen;
1014 		ip6h->ip6_nxt = next_hdr;
1015 		return (hbhlen);
1016 	}
1017 
1018 	if (after_secopt == NULL) {
1019 		/* There is no option following the label option */
1020 		after_secopt = ip6hbh + hbhlen;
1021 	}
1022 
1023 	/*
1024 	 * After deleting the label option, we need to slide the headers
1025 	 * and data back, while still maintaining the same alignment (module 8)
1026 	 * for the other options. So we slide the headers and data back only
1027 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
1028 	 * with pads.
1029 	 */
1030 	delta = after_secopt - secopt;
1031 	pad = delta % 8;
1032 	if (pad == 1) {
1033 		secopt[0] = IP6OPT_PAD1;
1034 	} else if (pad > 1) {
1035 		secopt[0] = IP6OPT_PADN;
1036 		secopt[1] = pad - 2;
1037 		if (pad > 2)
1038 			bzero(&secopt[2], pad - 2);
1039 	}
1040 	secopt += pad;
1041 	delta -= pad;
1042 	ovbcopy(after_secopt, secopt,
1043 	    (uchar_t *)ip6h + buflen - after_secopt);
1044 	ip6hbh[1] -= delta/8;
1045 	ip6h->ip6_plen -= delta;
1046 
1047 	return (delta);
1048 }
1049 
1050 /*
1051  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
1052  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
1053  * option is described in the block comment above tsol_compute_label_v6.
1054  * This function prepends this hop-by-hop option before any other hop-by-hop
1055  * options in the hop-by-hop header if one already exists, else a new
1056  * hop-by-hop header is created and stuffed into the packet following
1057  * the IPv6 header. 'buflen' is the total length of the packet i.e.
1058  * b_wptr - b_rptr. The caller ensures that there is enough space for the
1059  * extra option being added. Header and data following the position where
1060  * the label option is inserted are copied (i.e. slid forward) to the right
1061  * position.
1062  */
1063 int
1064 tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
1065 {
1066 	/*
1067 	 * rawlen is the length of the label option in bytes, not including
1068 	 * any pads, starting from the IP6OPT_LS (option type) byte.
1069 	 */
1070 	uint_t	rawlen;
1071 
1072 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
1073 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
1074 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
1075 	uint_t	pad_len;
1076 	uchar_t	*pad_position;
1077 	int	delta;		/* Actual number of bytes inserted */
1078 
1079 	rawlen = optbuf[1] + 2;	/* Add 2 for the option type, option length */
1080 	ip6hbh = (uchar_t *)&ip6h[1];
1081 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1082 		/*
1083 		 * There is a hop-by-hop header present already. In order to
1084 		 * preserve the alignment of the other options at the existing
1085 		 * value (modulo 8) we need to pad the label option to a
1086 		 * multiple of 8 bytes before prepending it to the other
1087 		 * options. Slide the extension headers and data forward to
1088 		 * accomodate the label option at the start of the hop-by-hop
1089 		 * header
1090 		 */
1091 		delta = optlen = (rawlen + 7) & ~7;
1092 		pad_len = optlen - rawlen;
1093 		pad_position = ip6hbh + 2 + rawlen;
1094 		ovbcopy(ip6hbh + 2, ip6hbh + 2 + optlen,
1095 		    buflen - (IPV6_HDR_LEN + 2));
1096 		/*
1097 		 * Bump up the hop-by-hop extension header length by
1098 		 * the number of 8-byte words added
1099 		 */
1100 		optlen >>= 3;
1101 		if (ip6hbh[1] + optlen > 255)
1102 			return (-1);
1103 		ip6hbh[1] += optlen;
1104 	} else {
1105 		/*
1106 		 * There is no hop-by-hop header in the packet. Construct a
1107 		 * new Hop-by-hop extension header (a multiple of 8 bytes).
1108 		 * Slide any other extension headers and data forward to
1109 		 * accomodate this hop-by-hop header
1110 		 */
1111 		delta = hbhlen = (2 + rawlen + 7) & ~7; /* +2 for nxthdr, len */
1112 		pad_len = hbhlen - (2 + rawlen);
1113 		pad_position = ip6hbh + 2 + rawlen;
1114 		ovbcopy(ip6hbh, ip6hbh + hbhlen, buflen - IPV6_HDR_LEN);
1115 		ip6hbh[0] = ip6h->ip6_nxt;
1116 		/*
1117 		 * hop-by-hop extension header length in 8-byte words, not
1118 		 * including the 1st 8 bytes of the hop-by-hop header.
1119 		 */
1120 		ip6hbh[1] = (hbhlen >> 3) - 1;
1121 		ip6h->ip6_nxt = IPPROTO_HOPOPTS;
1122 	}
1123 	/*
1124 	 * Copy the label option into the hop-by-hop header and insert any
1125 	 * needed pads
1126 	 */
1127 	bcopy(optbuf, ip6hbh + 2, rawlen);
1128 	if (pad_len == 1) {
1129 		pad_position[0] = IP6OPT_PAD1;
1130 	} else if (pad_len > 1) {
1131 		pad_position[0] = IP6OPT_PADN;
1132 		pad_position[1] = pad_len - 2;
1133 		if (pad_len > 2)
1134 			bzero(pad_position + 2, pad_len - 2);
1135 	}
1136 	ip6h->ip6_plen += delta;
1137 	return (delta);
1138 }
1139 
1140 /*
1141  * tsol_check_label_v6()
1142  *
1143  * This routine computes the IP label that should be on the packet based on the
1144  * connection and destination information.  It's called only by the IP
1145  * forwarding logic, because all internal modules atop IP know how to generate
1146  * their own labels.
1147  *
1148  * Returns:
1149  *      0		Label on packet was already correct
1150  *      EACCESS		The packet failed the remote host accreditation.
1151  *      ENOMEM		Memory allocation failure.
1152  */
1153 int
1154 tsol_check_label_v6(const cred_t *credp, mblk_t **mpp, int *addedp,
1155     boolean_t isexempt)
1156 {
1157 	mblk_t *mp = *mpp;
1158 	ip6_t  *ip6h;
1159 	/*
1160 	 * Label option length is limited to IP_MAX_OPT_LENGTH for
1161 	 * symmetry with IPv4. Can be relaxed if needed
1162 	 */
1163 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
1164 	uint_t hlen;
1165 	uint_t sec_opt_len; /* label option length not including type, len */
1166 	int added;
1167 	int retv;
1168 	uchar_t	*after_secopt;
1169 	uchar_t	*secopt = NULL;
1170 	uchar_t	*ip6hbh;
1171 	uint_t	hbhlen;
1172 	boolean_t hbh_needed;
1173 
1174 	if (addedp != NULL)
1175 		*addedp = 0;
1176 
1177 	ip6h = (ip6_t *)mp->b_rptr;
1178 	retv = tsol_compute_label_v6(credp, &ip6h->ip6_dst, opt_storage,
1179 	    isexempt);
1180 	if (retv != 0)
1181 		return (retv);
1182 
1183 	sec_opt_len = opt_storage[1];
1184 
1185 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1186 		ip6hbh = (uchar_t *)&ip6h[1];
1187 		hbhlen = (ip6hbh[1] + 1) << 3;
1188 		secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1189 		    &hbh_needed);
1190 	}
1191 
1192 	if (sec_opt_len == 0 && secopt == NULL) {
1193 		/*
1194 		 * The packet is not supposed to have a label, and it
1195 		 * does not have one currently
1196 		 */
1197 		return (0);
1198 	}
1199 	if (secopt != NULL && sec_opt_len != 0 &&
1200 	    (bcmp(opt_storage, secopt, sec_opt_len + 2) == 0)) {
1201 		/* The packet has the correct label already */
1202 		return (0);
1203 	}
1204 
1205 	/*
1206 	 * If there is an option there, then it must be the wrong one; delete.
1207 	 */
1208 	if (secopt != NULL)
1209 		mp->b_wptr += tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1210 
1211 	/*
1212 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
1213 	 * the hop-by-hop ext header's next header and length fields. Add
1214 	 * another 2 bytes for the label option type, len and then round
1215 	 * up to the next 8-byte multiple.
1216 	 */
1217 	hlen = (4 + sec_opt_len + 7) & ~7;
1218 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
1219 		int copylen;
1220 		mblk_t *new_mp;
1221 		uint16_t hdr_len;
1222 
1223 		hdr_len = ip_hdr_length_v6(mp, ip6h);
1224 		/*
1225 		 * Allocate enough to be meaningful, but not *too* much.
1226 		 * Also all the IPv6 extension headers must be in the same mblk
1227 		 */
1228 		copylen = MBLKL(mp);
1229 		if (copylen > 256)
1230 			copylen = 256;
1231 		if (copylen < hdr_len)
1232 			copylen = hdr_len;
1233 		new_mp = allocb(hlen + copylen +
1234 		    (mp->b_rptr - mp->b_datap->db_base), BPRI_HI);
1235 		if (new_mp == NULL)
1236 			return (ENOMEM);
1237 
1238 		/* keep the bias */
1239 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
1240 		new_mp->b_wptr = new_mp->b_rptr + copylen;
1241 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
1242 		new_mp->b_cont = mp;
1243 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
1244 			new_mp->b_cont = mp->b_cont;
1245 			freeb(mp);
1246 		}
1247 		*mpp = mp = new_mp;
1248 		ip6h = (ip6_t *)mp->b_rptr;
1249 	}
1250 
1251 	added = tsol_prepend_option_v6(opt_storage, ip6h, MBLKL(mp));
1252 	if (added == -1)
1253 		goto param_prob;
1254 
1255 	if (addedp != NULL)
1256 		*addedp = added;
1257 
1258 	ASSERT(mp->b_wptr + added <= DB_LIM(mp));
1259 	mp->b_wptr += added;
1260 
1261 	return (0);
1262 
1263 param_prob:
1264 	return (EINVAL);
1265 }
1266 
1267 /*
1268  * Update the given IPv6 "sticky options" structure to contain the provided
1269  * label, which is encoded as an IPv6 option.  Existing label is removed if
1270  * necessary, and storage is allocated/freed/resized.
1271  *
1272  * Returns 0 on success, errno on failure.
1273  */
1274 int
1275 tsol_update_sticky(ip6_pkt_t *ipp, uint_t *labellen, const uchar_t *labelopt)
1276 {
1277 	int rawlen, optlen, newlen;
1278 	uchar_t *newopts;
1279 
1280 	/*
1281 	 * rawlen is the size of the IPv6 label to be inserted from labelopt.
1282 	 * optlen is the total length of that option, including any necessary
1283 	 * headers and padding.  newlen is the new size of the total hop-by-hop
1284 	 * options buffer, including user options.
1285 	 */
1286 	if ((rawlen = labelopt[1]) != 0) {
1287 		rawlen += 2;	/* add in header size */
1288 		optlen = (2 + rawlen + 7) & ~7;
1289 	} else {
1290 		optlen = 0;
1291 	}
1292 	newlen = ipp->ipp_hopoptslen + optlen - *labellen;
1293 	if (optlen > *labellen) {
1294 		if (newlen > IP6_MAX_OPT_LENGTH)
1295 			return (EHOSTUNREACH);
1296 		/* If the label is bigger than last time, then reallocate */
1297 		newopts = kmem_alloc(newlen, KM_NOSLEEP);
1298 		if (newopts == NULL)
1299 			return (ENOMEM);
1300 		/*
1301 		 * If the user has hop-by-hop stickyoptions set, then copy his
1302 		 * options in after the security label.
1303 		 */
1304 		if (ipp->ipp_hopoptslen > *labellen) {
1305 			bcopy(ipp->ipp_hopopts + *labellen, newopts + optlen,
1306 			    ipp->ipp_hopoptslen - *labellen);
1307 			/*
1308 			 * Stomp out any header gunk here - this was the
1309 			 * previous next-header and option length field.
1310 			 */
1311 			newopts[optlen] = IP6OPT_PADN;
1312 			newopts[optlen + 1] = 0;
1313 		}
1314 		if (ipp->ipp_hopopts != NULL)
1315 			kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1316 		ipp->ipp_hopopts = (ip6_hbh_t *)newopts;
1317 	} else if (optlen < *labellen) {
1318 		/* If the label got smaller, then adjust downward. */
1319 		if (newlen == 0 && ipp->ipp_hopopts != NULL) {
1320 			kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1321 			ipp->ipp_hopopts = NULL;
1322 			ipp->ipp_fields &= ~IPPF_HOPOPTS;
1323 		}
1324 		/* If the user still has options, move those back. */
1325 		if (ipp->ipp_hopoptslen > *labellen) {
1326 			ovbcopy(ipp->ipp_hopopts + *labellen,
1327 			    ipp->ipp_hopopts + optlen,
1328 			    ipp->ipp_hopoptslen - *labellen);
1329 		}
1330 	}
1331 	ipp->ipp_hopoptslen = newlen;
1332 	*labellen = optlen;
1333 
1334 	newopts = (uchar_t *)ipp->ipp_hopopts;
1335 
1336 	/* If there are any options, then fix up reported length */
1337 	if (newlen > 0) {
1338 		newopts[1] = (newlen + 7) / 8 - 1;
1339 		ipp->ipp_fields |= IPPF_HOPOPTS;
1340 	}
1341 
1342 	/* If there's a label, then insert it now */
1343 	if (optlen > 0) {
1344 		/* skip next-header and length fields */
1345 		newopts += 2;
1346 		bcopy(labelopt, newopts, rawlen);
1347 		newopts += rawlen;
1348 		/* make sure padding comes out right */
1349 		optlen -= 2 + rawlen;
1350 		if (optlen == 1) {
1351 			newopts[0] = IP6OPT_PAD1;
1352 		} else if (optlen > 1) {
1353 			newopts[0] = IP6OPT_PADN;
1354 			optlen -=  2;
1355 			newopts[1] = optlen;
1356 			if (optlen > 0)
1357 				bzero(newopts + 2, optlen);
1358 		}
1359 	}
1360 	return (0);
1361 }
1362 
1363 int
1364 tsol_update_options(uchar_t **opts, uint_t *totlen, uint_t *labellen,
1365     const uchar_t *labelopt)
1366 {
1367 	int optlen, newlen;
1368 	uchar_t *newopts;
1369 
1370 	optlen = (labelopt[IPOPT_OLEN] + 3) & ~3;
1371 	newlen = *totlen + optlen - *labellen;
1372 	if (optlen > *labellen) {
1373 		if (newlen > IP_MAX_OPT_LENGTH)
1374 			return (EHOSTUNREACH);
1375 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1376 		if (newopts == NULL)
1377 			return (ENOMEM);
1378 		if (*totlen > *labellen) {
1379 			bcopy(*opts + *labellen, newopts + optlen,
1380 			    *totlen - *labellen);
1381 		}
1382 		if (*opts != NULL)
1383 			mi_free((char *)*opts);
1384 		*opts = newopts;
1385 	} else if (optlen < *labellen) {
1386 		if (newlen == 0 && *opts != NULL) {
1387 			mi_free((char *)*opts);
1388 			*opts = NULL;
1389 		}
1390 		if (*totlen > *labellen) {
1391 			ovbcopy(*opts + *labellen, *opts + optlen,
1392 			    *totlen - *labellen);
1393 		}
1394 	}
1395 	*totlen = newlen;
1396 	*labellen = optlen;
1397 	if (optlen > 0) {
1398 		newopts = *opts;
1399 		bcopy(labelopt, newopts, optlen);
1400 		/* check if there are user-supplied options that follow */
1401 		if (optlen < newlen) {
1402 			/* compute amount of embedded alignment needed */
1403 			optlen -= newopts[IPOPT_OLEN];
1404 			newopts += newopts[IPOPT_OLEN];
1405 			while (--optlen >= 0)
1406 				*newopts++ = IPOPT_NOP;
1407 		} else if (optlen != newopts[IPOPT_OLEN]) {
1408 			/*
1409 			 * The label option is the only option and it is
1410 			 * not a multiple of 4 bytes.
1411 			 */
1412 			optlen -= newopts[IPOPT_OLEN];
1413 			newopts += newopts[IPOPT_OLEN];
1414 			while (--optlen >= 0)
1415 				*newopts++ = IPOPT_EOL;
1416 		}
1417 	}
1418 	return (0);
1419 }
1420 
1421 /*
1422  * This does the bulk of the processing for setting IPPROTO_IP {T_,}IP_OPTIONS.
1423  */
1424 boolean_t
1425 tsol_option_set(uchar_t **opts, uint_t *optlen, uint_t labellen,
1426     const uchar_t *useropts, uint_t userlen)
1427 {
1428 	int newlen;
1429 	uchar_t *newopts;
1430 
1431 	newlen = userlen + labellen;
1432 	if (newlen > *optlen) {
1433 		/* need more room */
1434 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1435 		if (newopts == NULL)
1436 			return (ENOMEM);
1437 		/*
1438 		 * The supplied *opts can't be NULL in this case,
1439 		 * since there's an existing label.
1440 		 */
1441 		if (labellen > 0)
1442 			bcopy(*opts, newopts, labellen);
1443 		if (*opts != NULL)
1444 			mi_free((char *)*opts);
1445 		*opts = newopts;
1446 	}
1447 
1448 	if (newlen == 0) {
1449 		/* special case -- no remaining IP options at all */
1450 		if (*opts != NULL) {
1451 			mi_free((char *)*opts);
1452 			*opts = NULL;
1453 		}
1454 	} else if (userlen > 0) {
1455 		/* merge in the user's options */
1456 		newopts = *opts;
1457 		if (labellen > 0) {
1458 			int extra = labellen - newopts[IPOPT_OLEN];
1459 
1460 			newopts += newopts[IPOPT_OLEN];
1461 			while (--extra >= 0)
1462 				*newopts++ = IPOPT_NOP;
1463 		}
1464 		bcopy(useropts, newopts, userlen);
1465 	}
1466 
1467 	*optlen = newlen;
1468 	return (0);
1469 }
1470