1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
24 */
25
26 /*
27 * iptun - IP Tunneling Driver
28 *
29 * This module is a GLDv3 driver that implements virtual datalinks over IP
30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl
31 * interface (see iptun_ctl.c), and registered with GLDv3 using
32 * mac_register(). It implements the logic for various forms of IP (IPv4 or
33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip
34 * module below it. Each virtual IP tunnel datalink has a conn_t associated
35 * with it representing the "outer" IP connection.
36 *
37 * The module implements the following locking semantics:
38 *
39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock.
40 * See comments above iptun_hash_lock for details.
41 *
42 * No locks are ever held while calling up to GLDv3. The general architecture
43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a
44 * given link will be held while making downcalls (iptun_m_*() callbacks).
45 * Because we need to hold locks while handling downcalls, holding these locks
46 * while issuing upcalls results in deadlock scenarios. See the block comment
47 * above iptun_task_cb() for details on how we safely issue upcalls without
48 * holding any locks.
49 *
50 * The contents of each iptun_t is protected by an iptun_mutex which is held
51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in
52 * iptun_exit().
53 *
54 * See comments in iptun_delete() and iptun_free() for details on how the
55 * iptun_t is deleted safely.
56 */
57
58 #include <sys/types.h>
59 #include <sys/kmem.h>
60 #include <sys/errno.h>
61 #include <sys/modhash.h>
62 #include <sys/list.h>
63 #include <sys/strsun.h>
64 #include <sys/file.h>
65 #include <sys/systm.h>
66 #include <sys/tihdr.h>
67 #include <sys/param.h>
68 #include <sys/mac_provider.h>
69 #include <sys/mac_ipv4.h>
70 #include <sys/mac_ipv6.h>
71 #include <sys/mac_6to4.h>
72 #include <sys/tsol/tnet.h>
73 #include <sys/sunldi.h>
74 #include <netinet/in.h>
75 #include <netinet/ip6.h>
76 #include <inet/ip.h>
77 #include <inet/ip_ire.h>
78 #include <inet/ipsec_impl.h>
79 #include <sys/tsol/label.h>
80 #include <sys/tsol/tnet.h>
81 #include <inet/iptun.h>
82 #include "iptun_impl.h"
83
84 /* Do the tunnel type and address family match? */
85 #define IPTUN_ADDR_MATCH(iptun_type, family) \
86 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \
87 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \
88 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET))
89
90 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key))
91
92 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */
93 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU
94 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t))
95 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \
96 sizeof (iptun_encaplim_t))
97
98 #define IPTUN_MIN_HOPLIMIT 1
99 #define IPTUN_MAX_HOPLIMIT UINT8_MAX
100
101 #define IPTUN_MIN_ENCAPLIMIT 0
102 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX
103
104 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER)
105
106 static iptun_encaplim_t iptun_encaplim_init = {
107 { IPPROTO_NONE, 0 },
108 IP6OPT_TUNNEL_LIMIT,
109 1,
110 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */
111 IP6OPT_PADN,
112 1,
113 0
114 };
115
116 /*
117 * Table containing per-iptun-type information.
118 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU.
119 */
120 static iptun_typeinfo_t iptun_type_table[] = {
121 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION,
122 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE },
123 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION,
124 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE },
125 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION,
126 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE },
127 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE }
128 };
129
130 /*
131 * iptun_hash is an iptun_t lookup table by link ID protected by
132 * iptun_hash_lock. While the hash table's integrity is maintained via
133 * internal locking in the mod_hash_*() functions, we need additional locking
134 * so that an iptun_t cannot be deleted after a hash lookup has returned an
135 * iptun_t and before iptun_lock has been entered. As such, we use
136 * iptun_hash_lock when doing lookups and removals from iptun_hash.
137 */
138 mod_hash_t *iptun_hash;
139 static kmutex_t iptun_hash_lock;
140
141 static uint_t iptun_tunnelcount; /* total for all stacks */
142 kmem_cache_t *iptun_cache;
143 ddi_taskq_t *iptun_taskq;
144
145 typedef enum {
146 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */
147 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */
148 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */
149 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */
150 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */
151 } iptun_task_t;
152
153 typedef struct iptun_task_data_s {
154 iptun_task_t itd_task;
155 datalink_id_t itd_linkid;
156 } iptun_task_data_t;
157
158 static void iptun_task_dispatch(iptun_t *, iptun_task_t);
159 static int iptun_enter(iptun_t *);
160 static void iptun_exit(iptun_t *);
161 static void iptun_headergen(iptun_t *, boolean_t);
162 static void iptun_drop_pkt(mblk_t *, uint64_t *);
163 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *);
164 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *);
165 static void iptun_output(iptun_t *, mblk_t *);
166 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t);
167 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t);
168 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *);
169 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *);
170 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *);
171
172 static void iptun_output_6to4(iptun_t *, mblk_t *);
173 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *);
174 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *,
175 ip_recv_attr_t *);
176
177 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t,
178 ixa_notify_arg_t);
179
180 static mac_callbacks_t iptun_m_callbacks;
181
182 static int
iptun_m_getstat(void * arg,uint_t stat,uint64_t * val)183 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val)
184 {
185 iptun_t *iptun = arg;
186 int err = 0;
187
188 switch (stat) {
189 case MAC_STAT_IERRORS:
190 *val = iptun->iptun_ierrors;
191 break;
192 case MAC_STAT_OERRORS:
193 *val = iptun->iptun_oerrors;
194 break;
195 case MAC_STAT_RBYTES:
196 *val = iptun->iptun_rbytes;
197 break;
198 case MAC_STAT_IPACKETS:
199 *val = iptun->iptun_ipackets;
200 break;
201 case MAC_STAT_OBYTES:
202 *val = iptun->iptun_obytes;
203 break;
204 case MAC_STAT_OPACKETS:
205 *val = iptun->iptun_opackets;
206 break;
207 case MAC_STAT_NORCVBUF:
208 *val = iptun->iptun_norcvbuf;
209 break;
210 case MAC_STAT_NOXMTBUF:
211 *val = iptun->iptun_noxmtbuf;
212 break;
213 default:
214 err = ENOTSUP;
215 }
216
217 return (err);
218 }
219
220 static int
iptun_m_start(void * arg)221 iptun_m_start(void *arg)
222 {
223 iptun_t *iptun = arg;
224 int err;
225
226 if ((err = iptun_enter(iptun)) == 0) {
227 iptun->iptun_flags |= IPTUN_MAC_STARTED;
228 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
229 iptun_exit(iptun);
230 }
231 return (err);
232 }
233
234 static void
iptun_m_stop(void * arg)235 iptun_m_stop(void *arg)
236 {
237 iptun_t *iptun = arg;
238
239 if (iptun_enter(iptun) == 0) {
240 iptun->iptun_flags &= ~IPTUN_MAC_STARTED;
241 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
242 iptun_exit(iptun);
243 }
244 }
245
246 /*
247 * iptun_m_setpromisc() does nothing and always succeeds. This is because a
248 * tunnel data-link only ever receives packets that are destined exclusively
249 * for the local address of the tunnel.
250 */
251 /* ARGSUSED */
252 static int
iptun_m_setpromisc(void * arg,boolean_t on)253 iptun_m_setpromisc(void *arg, boolean_t on)
254 {
255 return (0);
256 }
257
258 /* ARGSUSED */
259 static int
iptun_m_multicst(void * arg,boolean_t add,const uint8_t * addrp)260 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
261 {
262 return (ENOTSUP);
263 }
264
265 /*
266 * iptun_m_unicst() sets the local address.
267 */
268 /* ARGSUSED */
269 static int
iptun_m_unicst(void * arg,const uint8_t * addrp)270 iptun_m_unicst(void *arg, const uint8_t *addrp)
271 {
272 iptun_t *iptun = arg;
273 int err;
274 struct sockaddr_storage ss;
275 struct sockaddr_in *sin;
276 struct sockaddr_in6 *sin6;
277
278 if ((err = iptun_enter(iptun)) == 0) {
279 switch (iptun->iptun_typeinfo->iti_ipvers) {
280 case IPV4_VERSION:
281 sin = (struct sockaddr_in *)&ss;
282 sin->sin_family = AF_INET;
283 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t));
284 break;
285 case IPV6_VERSION:
286 sin6 = (struct sockaddr_in6 *)&ss;
287 sin6->sin6_family = AF_INET6;
288 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t));
289 break;
290 default:
291 ASSERT(0);
292 }
293 err = iptun_setladdr(iptun, &ss);
294 iptun_exit(iptun);
295 }
296 return (err);
297 }
298
299 static mblk_t *
iptun_m_tx(void * arg,mblk_t * mpchain)300 iptun_m_tx(void *arg, mblk_t *mpchain)
301 {
302 mblk_t *mp, *nmp;
303 iptun_t *iptun = arg;
304
305 if (!IS_IPTUN_RUNNING(iptun)) {
306 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf);
307 return (NULL);
308 }
309
310 for (mp = mpchain; mp != NULL; mp = nmp) {
311 nmp = mp->b_next;
312 mp->b_next = NULL;
313 iptun_output(iptun, mp);
314 }
315
316 return (NULL);
317 }
318
319 /* ARGSUSED */
320 static int
iptun_m_setprop(void * barg,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,const void * pr_val)321 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num,
322 uint_t pr_valsize, const void *pr_val)
323 {
324 iptun_t *iptun = barg;
325 uint32_t value = *(uint32_t *)pr_val;
326 int err;
327
328 /*
329 * We need to enter this iptun_t since we'll be modifying the outer
330 * header.
331 */
332 if ((err = iptun_enter(iptun)) != 0)
333 return (err);
334
335 switch (pr_num) {
336 case MAC_PROP_IPTUN_HOPLIMIT:
337 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) {
338 err = EINVAL;
339 break;
340 }
341 if (value != iptun->iptun_hoplimit) {
342 iptun->iptun_hoplimit = (uint8_t)value;
343 iptun_headergen(iptun, B_TRUE);
344 }
345 break;
346 case MAC_PROP_IPTUN_ENCAPLIMIT:
347 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 ||
348 value > IPTUN_MAX_ENCAPLIMIT) {
349 err = EINVAL;
350 break;
351 }
352 if (value != iptun->iptun_encaplimit) {
353 iptun->iptun_encaplimit = (uint8_t)value;
354 iptun_headergen(iptun, B_TRUE);
355 }
356 break;
357 case MAC_PROP_MTU: {
358 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0);
359
360 if (value < iptun->iptun_typeinfo->iti_minmtu ||
361 value > maxmtu) {
362 err = EINVAL;
363 break;
364 }
365 iptun->iptun_flags |= IPTUN_FIXED_MTU;
366 if (value != iptun->iptun_mtu) {
367 iptun->iptun_mtu = value;
368 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE);
369 }
370 break;
371 }
372 default:
373 err = EINVAL;
374 }
375 iptun_exit(iptun);
376 return (err);
377 }
378
379 /* ARGSUSED */
380 static int
iptun_m_getprop(void * barg,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,void * pr_val)381 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num,
382 uint_t pr_valsize, void *pr_val)
383 {
384 iptun_t *iptun = barg;
385 int err;
386
387 if ((err = iptun_enter(iptun)) != 0)
388 return (err);
389
390 switch (pr_num) {
391 case MAC_PROP_IPTUN_HOPLIMIT:
392 ASSERT(pr_valsize >= sizeof (uint32_t));
393 *(uint32_t *)pr_val = iptun->iptun_hoplimit;
394 break;
395
396 case MAC_PROP_IPTUN_ENCAPLIMIT:
397 *(uint32_t *)pr_val = iptun->iptun_encaplimit;
398 break;
399 default:
400 err = ENOTSUP;
401 }
402 done:
403 iptun_exit(iptun);
404 return (err);
405 }
406
407 /* ARGSUSED */
408 static void
iptun_m_propinfo(void * barg,const char * pr_name,mac_prop_id_t pr_num,mac_prop_info_handle_t prh)409 iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num,
410 mac_prop_info_handle_t prh)
411 {
412 iptun_t *iptun = barg;
413
414 switch (pr_num) {
415 case MAC_PROP_IPTUN_HOPLIMIT:
416 mac_prop_info_set_range_uint32(prh,
417 IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT);
418 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT);
419 break;
420
421 case MAC_PROP_IPTUN_ENCAPLIMIT:
422 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6)
423 break;
424 mac_prop_info_set_range_uint32(prh,
425 IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT);
426 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT);
427 break;
428 case MAC_PROP_MTU:
429 mac_prop_info_set_range_uint32(prh,
430 iptun->iptun_typeinfo->iti_minmtu,
431 iptun_get_maxmtu(iptun, NULL, 0));
432 break;
433 }
434 }
435
436 uint_t
iptun_count(void)437 iptun_count(void)
438 {
439 return (iptun_tunnelcount);
440 }
441
442 /*
443 * Enter an iptun_t exclusively. This is essentially just a mutex, but we
444 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of
445 * being deleted.
446 */
447 static int
iptun_enter(iptun_t * iptun)448 iptun_enter(iptun_t *iptun)
449 {
450 mutex_enter(&iptun->iptun_lock);
451 while (iptun->iptun_flags & IPTUN_DELETE_PENDING)
452 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock);
453 if (iptun->iptun_flags & IPTUN_CONDEMNED) {
454 mutex_exit(&iptun->iptun_lock);
455 return (ENOENT);
456 }
457 return (0);
458 }
459
460 /*
461 * Exit the tunnel entered in iptun_enter().
462 */
463 static void
iptun_exit(iptun_t * iptun)464 iptun_exit(iptun_t *iptun)
465 {
466 mutex_exit(&iptun->iptun_lock);
467 }
468
469 /*
470 * Enter the IP tunnel instance by datalink ID.
471 */
472 static int
iptun_enter_by_linkid(datalink_id_t linkid,iptun_t ** iptun)473 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun)
474 {
475 int err;
476
477 mutex_enter(&iptun_hash_lock);
478 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid),
479 (mod_hash_val_t *)iptun) == 0)
480 err = iptun_enter(*iptun);
481 else
482 err = ENOENT;
483 if (err != 0)
484 *iptun = NULL;
485 mutex_exit(&iptun_hash_lock);
486 return (err);
487 }
488
489 /*
490 * Handle tasks that were deferred through the iptun_taskq because they require
491 * calling up to the mac module, and we can't call up to the mac module while
492 * holding locks.
493 *
494 * This is tricky to get right without introducing race conditions and
495 * deadlocks with the mac module, as we cannot issue an upcall while in the
496 * iptun_t. The reason is that upcalls may try and enter the mac perimeter,
497 * while iptun callbacks (such as iptun_m_setprop()) called from the mac
498 * module will already have the perimeter held, and will then try and enter
499 * the iptun_t. You can see the lock ordering problem with this; this will
500 * deadlock.
501 *
502 * The safe way to do this is to enter the iptun_t in question and copy the
503 * information we need out of it so that we can exit it and know that the
504 * information being passed up to the upcalls won't be subject to modification
505 * by other threads. The problem now is that we need to exit it prior to
506 * issuing the upcall, but once we do this, a thread could come along and
507 * delete the iptun_t and thus the mac handle required to issue the upcall.
508 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the
509 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which
510 * iptun_delete() will cv_wait() on. When the upcall completes, we clear
511 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting
512 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having
513 * exited the iptun_t.
514 */
515 static void
iptun_task_cb(void * arg)516 iptun_task_cb(void *arg)
517 {
518 iptun_task_data_t *itd = arg;
519 iptun_task_t task = itd->itd_task;
520 datalink_id_t linkid = itd->itd_linkid;
521 iptun_t *iptun;
522 uint32_t mtu;
523 iptun_addr_t addr;
524 link_state_t linkstate;
525 size_t header_size;
526 iptun_header_t header;
527
528 kmem_free(itd, sizeof (*itd));
529
530 /*
531 * Note that if the lookup fails, it's because the tunnel was deleted
532 * between the time the task was dispatched and now. That isn't an
533 * error.
534 */
535 if (iptun_enter_by_linkid(linkid, &iptun) != 0)
536 return;
537
538 iptun->iptun_flags |= IPTUN_UPCALL_PENDING;
539
540 switch (task) {
541 case IPTUN_TASK_MTU_UPDATE:
542 mtu = iptun->iptun_mtu;
543 break;
544 case IPTUN_TASK_LADDR_UPDATE:
545 addr = iptun->iptun_laddr;
546 break;
547 case IPTUN_TASK_RADDR_UPDATE:
548 addr = iptun->iptun_raddr;
549 break;
550 case IPTUN_TASK_LINK_UPDATE:
551 linkstate = IS_IPTUN_RUNNING(iptun) ?
552 LINK_STATE_UP : LINK_STATE_DOWN;
553 break;
554 case IPTUN_TASK_PDATA_UPDATE:
555 header_size = iptun->iptun_header_size;
556 header = iptun->iptun_header;
557 break;
558 default:
559 ASSERT(0);
560 }
561
562 iptun_exit(iptun);
563
564 switch (task) {
565 case IPTUN_TASK_MTU_UPDATE:
566 (void) mac_maxsdu_update(iptun->iptun_mh, mtu);
567 break;
568 case IPTUN_TASK_LADDR_UPDATE:
569 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr);
570 break;
571 case IPTUN_TASK_RADDR_UPDATE:
572 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr);
573 break;
574 case IPTUN_TASK_LINK_UPDATE:
575 mac_link_update(iptun->iptun_mh, linkstate);
576 break;
577 case IPTUN_TASK_PDATA_UPDATE:
578 if (mac_pdata_update(iptun->iptun_mh,
579 header_size == 0 ? NULL : &header, header_size) != 0)
580 atomic_inc_64(&iptun->iptun_taskq_fail);
581 break;
582 }
583
584 mutex_enter(&iptun->iptun_lock);
585 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING;
586 cv_signal(&iptun->iptun_upcall_cv);
587 mutex_exit(&iptun->iptun_lock);
588 }
589
590 static void
iptun_task_dispatch(iptun_t * iptun,iptun_task_t iptun_task)591 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task)
592 {
593 iptun_task_data_t *itd;
594
595 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP);
596 if (itd == NULL) {
597 atomic_inc_64(&iptun->iptun_taskq_fail);
598 return;
599 }
600 itd->itd_task = iptun_task;
601 itd->itd_linkid = iptun->iptun_linkid;
602 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) {
603 atomic_inc_64(&iptun->iptun_taskq_fail);
604 kmem_free(itd, sizeof (*itd));
605 }
606 }
607
608 /*
609 * Convert an iptun_addr_t to sockaddr_storage.
610 */
611 static void
iptun_getaddr(iptun_addr_t * iptun_addr,struct sockaddr_storage * ss)612 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss)
613 {
614 struct sockaddr_in *sin;
615 struct sockaddr_in6 *sin6;
616
617 bzero(ss, sizeof (*ss));
618 switch (iptun_addr->ia_family) {
619 case AF_INET:
620 sin = (struct sockaddr_in *)ss;
621 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4;
622 break;
623 case AF_INET6:
624 sin6 = (struct sockaddr_in6 *)ss;
625 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6;
626 break;
627 default:
628 ASSERT(0);
629 }
630 ss->ss_family = iptun_addr->ia_family;
631 }
632
633 /*
634 * General purpose function to set an IP tunnel source or destination address.
635 */
636 static int
iptun_setaddr(iptun_type_t iptun_type,iptun_addr_t * iptun_addr,const struct sockaddr_storage * ss)637 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr,
638 const struct sockaddr_storage *ss)
639 {
640 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family))
641 return (EINVAL);
642
643 switch (ss->ss_family) {
644 case AF_INET: {
645 struct sockaddr_in *sin = (struct sockaddr_in *)ss;
646
647 if ((sin->sin_addr.s_addr == INADDR_ANY) ||
648 (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
649 CLASSD(sin->sin_addr.s_addr)) {
650 return (EADDRNOTAVAIL);
651 }
652 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr;
653 break;
654 }
655 case AF_INET6: {
656 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
657
658 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
659 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
660 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
661 return (EADDRNOTAVAIL);
662 }
663 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr;
664 break;
665 }
666 default:
667 return (EAFNOSUPPORT);
668 }
669 iptun_addr->ia_family = ss->ss_family;
670 return (0);
671 }
672
673 static int
iptun_setladdr(iptun_t * iptun,const struct sockaddr_storage * laddr)674 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr)
675 {
676 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type,
677 &iptun->iptun_laddr, laddr));
678 }
679
680 static int
iptun_setraddr(iptun_t * iptun,const struct sockaddr_storage * raddr)681 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr)
682 {
683 if (!(iptun->iptun_typeinfo->iti_hasraddr))
684 return (EINVAL);
685 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type,
686 &iptun->iptun_raddr, raddr));
687 }
688
689 static boolean_t
iptun_canbind(iptun_t * iptun)690 iptun_canbind(iptun_t *iptun)
691 {
692 /*
693 * A tunnel may bind when its source address has been set, and if its
694 * tunnel type requires one, also its destination address.
695 */
696 return ((iptun->iptun_flags & IPTUN_LADDR) &&
697 ((iptun->iptun_flags & IPTUN_RADDR) ||
698 !(iptun->iptun_typeinfo->iti_hasraddr)));
699 }
700
701 /*
702 * Verify that the local address is valid, and insert in the fanout
703 */
704 static int
iptun_bind(iptun_t * iptun)705 iptun_bind(iptun_t *iptun)
706 {
707 conn_t *connp = iptun->iptun_connp;
708 int error = 0;
709 ip_xmit_attr_t *ixa;
710 ip_xmit_attr_t *oldixa;
711 iulp_t uinfo;
712 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
713
714 /*
715 * Get an exclusive ixa for this thread.
716 * We defer updating conn_ixa until later to handle any concurrent
717 * conn_ixa_cleanup thread.
718 */
719 ixa = conn_get_ixa(connp, B_FALSE);
720 if (ixa == NULL)
721 return (ENOMEM);
722
723 /* We create PMTU state including for 6to4 */
724 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
725
726 ASSERT(iptun_canbind(iptun));
727
728 mutex_enter(&connp->conn_lock);
729 /*
730 * Note that conn_proto can't be set since the upper protocol
731 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
732 * ipcl_iptun_classify doesn't use conn_proto.
733 */
734 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers;
735
736 switch (iptun->iptun_typeinfo->iti_type) {
737 case IPTUN_TYPE_IPV4:
738 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4,
739 &connp->conn_laddr_v6);
740 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4,
741 &connp->conn_faddr_v6);
742 ixa->ixa_flags |= IXAF_IS_IPV4;
743 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp),
744 ipst, B_FALSE) != IPVL_UNICAST_UP) {
745 mutex_exit(&connp->conn_lock);
746 error = EADDRNOTAVAIL;
747 goto done;
748 }
749 break;
750 case IPTUN_TYPE_IPV6:
751 connp->conn_laddr_v6 = iptun->iptun_laddr6;
752 connp->conn_faddr_v6 = iptun->iptun_raddr6;
753 ixa->ixa_flags &= ~IXAF_IS_IPV4;
754 /* We use a zero scopeid for now */
755 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp),
756 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) {
757 mutex_exit(&connp->conn_lock);
758 error = EADDRNOTAVAIL;
759 goto done;
760 }
761 break;
762 case IPTUN_TYPE_6TO4:
763 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4,
764 &connp->conn_laddr_v6);
765 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6);
766 ixa->ixa_flags |= IXAF_IS_IPV4;
767 mutex_exit(&connp->conn_lock);
768
769 switch (ip_laddr_verify_v4(iptun->iptun_laddr4,
770 IPCL_ZONEID(connp), ipst, B_FALSE)) {
771 case IPVL_UNICAST_UP:
772 case IPVL_UNICAST_DOWN:
773 break;
774 default:
775 error = EADDRNOTAVAIL;
776 goto done;
777 }
778 goto insert;
779 }
780
781 /* In case previous destination was multirt */
782 ip_attr_newdst(ixa);
783
784 /*
785 * When we set a tunnel's destination address, we do not
786 * care if the destination is reachable. Transient routing
787 * issues should not inhibit the creation of a tunnel
788 * interface, for example. Thus we pass B_FALSE here.
789 */
790 connp->conn_saddr_v6 = connp->conn_laddr_v6;
791 mutex_exit(&connp->conn_lock);
792
793 /* As long as the MTU is large we avoid fragmentation */
794 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF;
795
796 /* We handle IPsec in iptun_output_common */
797 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6,
798 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0,
799 &connp->conn_saddr_v6, &uinfo, 0);
800
801 if (error != 0)
802 goto done;
803
804 /* saddr shouldn't change since it was already set */
805 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
806 &connp->conn_saddr_v6));
807
808 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */
809 ixa->ixa_flags |= IXAF_VERIFY_PMTU;
810 ASSERT(uinfo.iulp_mtu != 0);
811
812 /*
813 * Allow setting new policies.
814 * The addresses/ports are already set, thus the IPsec policy calls
815 * can handle their passed-in conn's.
816 */
817 connp->conn_policy_cached = B_FALSE;
818
819 insert:
820 error = ipcl_conn_insert(connp);
821 if (error != 0)
822 goto done;
823
824 /* Atomically update v6lastdst and conn_ixa */
825 mutex_enter(&connp->conn_lock);
826 /* Record this as the "last" send even though we haven't sent any */
827 connp->conn_v6lastdst = connp->conn_faddr_v6;
828
829 iptun->iptun_flags |= IPTUN_BOUND;
830
831 oldixa = conn_replace_ixa(connp, ixa);
832 /* Done with conn_t */
833 mutex_exit(&connp->conn_lock);
834 ixa_refrele(oldixa);
835
836 /*
837 * Now that we're bound with ip below us, this is a good
838 * time to initialize the destination path MTU and to
839 * re-calculate the tunnel's link MTU.
840 */
841 (void) iptun_update_mtu(iptun, ixa, 0);
842
843 if (IS_IPTUN_RUNNING(iptun))
844 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
845
846 done:
847 ixa_refrele(ixa);
848 return (error);
849 }
850
851 static void
iptun_unbind(iptun_t * iptun)852 iptun_unbind(iptun_t *iptun)
853 {
854 ASSERT(iptun->iptun_flags & IPTUN_BOUND);
855 ASSERT(mutex_owned(&iptun->iptun_lock) ||
856 (iptun->iptun_flags & IPTUN_CONDEMNED));
857 ip_unbind(iptun->iptun_connp);
858 iptun->iptun_flags &= ~IPTUN_BOUND;
859 if (!(iptun->iptun_flags & IPTUN_CONDEMNED))
860 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
861 }
862
863 /*
864 * Re-generate the template data-link header for a given IP tunnel given the
865 * tunnel's current parameters.
866 */
867 static void
iptun_headergen(iptun_t * iptun,boolean_t update_mac)868 iptun_headergen(iptun_t *iptun, boolean_t update_mac)
869 {
870 switch (iptun->iptun_typeinfo->iti_ipvers) {
871 case IPV4_VERSION:
872 /*
873 * We only need to use a custom IP header if the administrator
874 * has supplied a non-default hoplimit.
875 */
876 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) {
877 iptun->iptun_header_size = 0;
878 break;
879 }
880 iptun->iptun_header_size = sizeof (ipha_t);
881 iptun->iptun_header4.ipha_version_and_hdr_length =
882 IP_SIMPLE_HDR_VERSION;
883 iptun->iptun_header4.ipha_fragment_offset_and_flags =
884 htons(IPH_DF);
885 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit;
886 break;
887 case IPV6_VERSION: {
888 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h;
889
890 /*
891 * We only need to use a custom IPv6 header if either the
892 * administrator has supplied a non-default hoplimit, or we
893 * need to include an encapsulation limit option in the outer
894 * header.
895 */
896 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT &&
897 iptun->iptun_encaplimit == 0) {
898 iptun->iptun_header_size = 0;
899 break;
900 }
901
902 (void) memset(ip6hp, 0, sizeof (*ip6hp));
903 if (iptun->iptun_encaplimit == 0) {
904 iptun->iptun_header_size = sizeof (ip6_t);
905 ip6hp->ip6_nxt = IPPROTO_NONE;
906 } else {
907 iptun_encaplim_t *iel;
908
909 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t);
910 /*
911 * The mac_ipv6 plugin requires ip6_plen to be in host
912 * byte order and reflect the extension headers
913 * present in the template. The actual network byte
914 * order ip6_plen will be set on a per-packet basis on
915 * transmit.
916 */
917 ip6hp->ip6_plen = sizeof (*iel);
918 ip6hp->ip6_nxt = IPPROTO_DSTOPTS;
919 iel = &iptun->iptun_header6.it6h_encaplim;
920 *iel = iptun_encaplim_init;
921 iel->iel_telopt.ip6ot_encap_limit =
922 iptun->iptun_encaplimit;
923 }
924
925 ip6hp->ip6_hlim = iptun->iptun_hoplimit;
926 break;
927 }
928 }
929
930 if (update_mac)
931 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE);
932 }
933
934 /*
935 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy
936 * head.
937 */
938 static boolean_t
iptun_insert_simple_policies(ipsec_policy_head_t * ph,ipsec_act_t * actp,uint_t n,netstack_t * ns)939 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp,
940 uint_t n, netstack_t *ns)
941 {
942 int f = IPSEC_AF_V4;
943
944 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) ||
945 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns))
946 return (B_FALSE);
947
948 f = IPSEC_AF_V6;
949 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) &&
950 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns));
951 }
952
953 /*
954 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or
955 * IPTUN_MODIFY ioctls.
956 */
957 static int
iptun_set_sec_simple(iptun_t * iptun,const ipsec_req_t * ipsr)958 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr)
959 {
960 int rc = 0;
961 uint_t nact;
962 ipsec_act_t *actp = NULL;
963 boolean_t clear_all, old_policy = B_FALSE;
964 ipsec_tun_pol_t *itp;
965 char name[MAXLINKNAMELEN];
966 uint64_t gen;
967 netstack_t *ns = iptun->iptun_ns;
968
969 /* Can't specify self-encap on a tunnel. */
970 if (ipsr->ipsr_self_encap_req != 0)
971 return (EINVAL);
972
973 /*
974 * If it's a "clear-all" entry, unset the security flags and resume
975 * normal cleartext (or inherit-from-global) policy.
976 */
977 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 &&
978 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0);
979
980 ASSERT(mutex_owned(&iptun->iptun_lock));
981 itp = iptun->iptun_itp;
982 if (itp == NULL) {
983 if (clear_all)
984 goto bail;
985 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL,
986 NULL, NULL)) != 0)
987 goto bail;
988 ASSERT(name[0] != '\0');
989 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL)
990 goto bail;
991 iptun->iptun_itp = itp;
992 }
993
994 /* Allocate the actvec now, before holding itp or polhead locks. */
995 ipsec_actvec_from_req(ipsr, &actp, &nact, ns);
996 if (actp == NULL) {
997 rc = ENOMEM;
998 goto bail;
999 }
1000
1001 /*
1002 * Just write on the active polhead. Save the primary/secondary stuff
1003 * for spdsock operations.
1004 *
1005 * Mutex because we need to write to the polhead AND flags atomically.
1006 * Other threads will acquire the polhead lock as a reader if the
1007 * (unprotected) flag is set.
1008 */
1009 mutex_enter(&itp->itp_lock);
1010 if (itp->itp_flags & ITPF_P_TUNNEL) {
1011 /* Oops, we lost a race. Let's get out of here. */
1012 rc = EBUSY;
1013 goto mutex_bail;
1014 }
1015 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0);
1016
1017 if (old_policy) {
1018 ITPF_CLONE(itp->itp_flags);
1019 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns);
1020 if (rc != 0) {
1021 /* inactive has already been cleared. */
1022 itp->itp_flags &= ~ITPF_IFLAGS;
1023 goto mutex_bail;
1024 }
1025 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
1026 ipsec_polhead_flush(itp->itp_policy, ns);
1027 } else {
1028 /* Else assume itp->itp_policy is already flushed. */
1029 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
1030 }
1031
1032 if (clear_all) {
1033 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0);
1034 itp->itp_flags &= ~ITPF_PFLAGS;
1035 rw_exit(&itp->itp_policy->iph_lock);
1036 old_policy = B_FALSE; /* Clear out the inactive one too. */
1037 goto recover_bail;
1038 }
1039
1040 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) {
1041 rw_exit(&itp->itp_policy->iph_lock);
1042 /*
1043 * Adjust MTU and make sure the DL side knows what's up.
1044 */
1045 itp->itp_flags = ITPF_P_ACTIVE;
1046 (void) iptun_update_mtu(iptun, NULL, 0);
1047 old_policy = B_FALSE; /* Blank out inactive - we succeeded */
1048 } else {
1049 rw_exit(&itp->itp_policy->iph_lock);
1050 rc = ENOMEM;
1051 }
1052
1053 recover_bail:
1054 if (old_policy) {
1055 /* Recover policy in in active polhead. */
1056 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns);
1057 ITPF_SWAP(itp->itp_flags);
1058 }
1059
1060 /* Clear policy in inactive polhead. */
1061 itp->itp_flags &= ~ITPF_IFLAGS;
1062 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER);
1063 ipsec_polhead_flush(itp->itp_inactive, ns);
1064 rw_exit(&itp->itp_inactive->iph_lock);
1065
1066 mutex_bail:
1067 mutex_exit(&itp->itp_lock);
1068
1069 bail:
1070 if (actp != NULL)
1071 ipsec_actvec_free(actp, nact);
1072
1073 return (rc);
1074 }
1075
1076 static iptun_typeinfo_t *
iptun_gettypeinfo(iptun_type_t type)1077 iptun_gettypeinfo(iptun_type_t type)
1078 {
1079 int i;
1080
1081 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) {
1082 if (iptun_type_table[i].iti_type == type)
1083 break;
1084 }
1085 return (&iptun_type_table[i]);
1086 }
1087
1088 /*
1089 * Set the parameters included in ik on the tunnel iptun. Parameters that can
1090 * only be set at creation time are set in iptun_create().
1091 */
1092 static int
iptun_setparams(iptun_t * iptun,const iptun_kparams_t * ik)1093 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik)
1094 {
1095 int err = 0;
1096 netstack_t *ns = iptun->iptun_ns;
1097 iptun_addr_t orig_laddr, orig_raddr;
1098 uint_t orig_flags = iptun->iptun_flags;
1099
1100 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) {
1101 if (orig_flags & IPTUN_LADDR)
1102 orig_laddr = iptun->iptun_laddr;
1103 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0)
1104 return (err);
1105 iptun->iptun_flags |= IPTUN_LADDR;
1106 }
1107
1108 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) {
1109 if (orig_flags & IPTUN_RADDR)
1110 orig_raddr = iptun->iptun_raddr;
1111 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0)
1112 goto done;
1113 iptun->iptun_flags |= IPTUN_RADDR;
1114 }
1115
1116 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) {
1117 /*
1118 * Set IPsec policy originating from the ifconfig(8) command
1119 * line. This is traditionally called "simple" policy because
1120 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a
1121 * simple policy of "do ESP on everything" and/or "do AH on
1122 * everything" (as opposed to the rich policy that can be
1123 * defined with ipsecconf(8)).
1124 */
1125 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) {
1126 /*
1127 * Can't set security properties for automatic
1128 * tunnels.
1129 */
1130 err = EINVAL;
1131 goto done;
1132 }
1133
1134 if (!ipsec_loaded(ns->netstack_ipsec)) {
1135 /* If IPsec can be loaded, try and load it now. */
1136 if (ipsec_failed(ns->netstack_ipsec)) {
1137 err = EPROTONOSUPPORT;
1138 goto done;
1139 }
1140 ipsec_loader_loadnow(ns->netstack_ipsec);
1141 /*
1142 * ipsec_loader_loadnow() returns while IPsec is
1143 * loaded asynchronously. While a method exists to
1144 * wait for IPsec to load (ipsec_loader_wait()), it
1145 * requires use of a STREAMS queue to do a qwait().
1146 * We're not in STREAMS context here, and so we can't
1147 * use it. This is not a problem in practice because
1148 * in the vast majority of cases, key management and
1149 * global policy will have loaded before any tunnels
1150 * are plumbed, and so IPsec will already have been
1151 * loaded.
1152 */
1153 err = EAGAIN;
1154 goto done;
1155 }
1156
1157 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo);
1158 if (err == 0) {
1159 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY;
1160 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo;
1161 }
1162 }
1163 done:
1164 if (err != 0) {
1165 /* Restore original source and destination. */
1166 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR &&
1167 (orig_flags & IPTUN_LADDR))
1168 iptun->iptun_laddr = orig_laddr;
1169 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) &&
1170 (orig_flags & IPTUN_RADDR))
1171 iptun->iptun_raddr = orig_raddr;
1172 iptun->iptun_flags = orig_flags;
1173 }
1174 return (err);
1175 }
1176
1177 static int
iptun_register(iptun_t * iptun)1178 iptun_register(iptun_t *iptun)
1179 {
1180 mac_register_t *mac;
1181 int err;
1182
1183 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED));
1184
1185 if ((mac = mac_alloc(MAC_VERSION)) == NULL)
1186 return (EINVAL);
1187
1188 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident;
1189 mac->m_driver = iptun;
1190 mac->m_dip = iptun_dip;
1191 mac->m_instance = (uint_t)-1;
1192 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr;
1193 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ?
1194 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL;
1195 mac->m_callbacks = &iptun_m_callbacks;
1196 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu;
1197 mac->m_max_sdu = iptun->iptun_mtu;
1198 if (iptun->iptun_header_size != 0) {
1199 mac->m_pdata = &iptun->iptun_header;
1200 mac->m_pdata_size = iptun->iptun_header_size;
1201 }
1202 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0)
1203 iptun->iptun_flags |= IPTUN_MAC_REGISTERED;
1204 mac_free(mac);
1205 return (err);
1206 }
1207
1208 static int
iptun_unregister(iptun_t * iptun)1209 iptun_unregister(iptun_t *iptun)
1210 {
1211 int err;
1212
1213 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED);
1214 if ((err = mac_unregister(iptun->iptun_mh)) == 0)
1215 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED;
1216 return (err);
1217 }
1218
1219 static conn_t *
iptun_conn_create(iptun_t * iptun,netstack_t * ns,cred_t * credp)1220 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp)
1221 {
1222 conn_t *connp;
1223
1224 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL)
1225 return (NULL);
1226
1227 connp->conn_flags |= IPCL_IPTUN;
1228 connp->conn_iptun = iptun;
1229 connp->conn_recv = iptun_input;
1230 connp->conn_recvicmp = iptun_input_icmp;
1231 connp->conn_verifyicmp = iptun_verifyicmp;
1232
1233 /*
1234 * Register iptun_notify to listen to capability changes detected by IP.
1235 * This upcall is made in the context of the call to conn_ip_output.
1236 */
1237 connp->conn_ixa->ixa_notify = iptun_notify;
1238 connp->conn_ixa->ixa_notify_cookie = iptun;
1239
1240 /*
1241 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done
1242 * for all other conn_t's.
1243 *
1244 * Note that there's an important distinction between iptun_zoneid and
1245 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global
1246 * exclusive stack zones to make the ip module believe that the
1247 * non-global zone is actually a global zone. Therefore, when
1248 * interacting with the ip module, we must always use conn_zoneid.
1249 */
1250 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ?
1251 crgetzoneid(credp) : GLOBAL_ZONEID;
1252 connp->conn_cred = credp;
1253 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */
1254 crhold(connp->conn_cred);
1255 connp->conn_cpid = NOPID;
1256
1257 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
1258 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid;
1259 ASSERT(connp->conn_ref == 1);
1260
1261 /* Cache things in ixa without an extra refhold */
1262 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
1263 connp->conn_ixa->ixa_cred = connp->conn_cred;
1264 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
1265 if (is_system_labeled())
1266 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
1267
1268 /*
1269 * Have conn_ip_output drop packets should our outer source
1270 * go invalid
1271 */
1272 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1273
1274 switch (iptun->iptun_typeinfo->iti_ipvers) {
1275 case IPV4_VERSION:
1276 connp->conn_family = AF_INET6;
1277 break;
1278 case IPV6_VERSION:
1279 connp->conn_family = AF_INET;
1280 break;
1281 }
1282 mutex_enter(&connp->conn_lock);
1283 connp->conn_state_flags &= ~CONN_INCIPIENT;
1284 mutex_exit(&connp->conn_lock);
1285 return (connp);
1286 }
1287
1288 static void
iptun_conn_destroy(conn_t * connp)1289 iptun_conn_destroy(conn_t *connp)
1290 {
1291 ip_quiesce_conn(connp);
1292 connp->conn_iptun = NULL;
1293 ASSERT(connp->conn_ref == 1);
1294 CONN_DEC_REF(connp);
1295 }
1296
1297 static iptun_t *
iptun_alloc(void)1298 iptun_alloc(void)
1299 {
1300 iptun_t *iptun;
1301
1302 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) {
1303 bzero(iptun, sizeof (*iptun));
1304 atomic_inc_32(&iptun_tunnelcount);
1305 }
1306 return (iptun);
1307 }
1308
1309 static void
iptun_free(iptun_t * iptun)1310 iptun_free(iptun_t *iptun)
1311 {
1312 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED);
1313
1314 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) {
1315 iptun_stack_t *iptuns = iptun->iptun_iptuns;
1316
1317 mutex_enter(&iptun_hash_lock);
1318 VERIFY(mod_hash_remove(iptun_hash,
1319 IPTUN_HASH_KEY(iptun->iptun_linkid),
1320 (mod_hash_val_t *)&iptun) == 0);
1321 mutex_exit(&iptun_hash_lock);
1322 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED;
1323 mutex_enter(&iptuns->iptuns_lock);
1324 list_remove(&iptuns->iptuns_iptunlist, iptun);
1325 mutex_exit(&iptuns->iptuns_lock);
1326 }
1327
1328 if (iptun->iptun_flags & IPTUN_BOUND)
1329 iptun_unbind(iptun);
1330
1331 /*
1332 * After iptun_unregister(), there will be no threads executing a
1333 * downcall from the mac module, including in the tx datapath.
1334 */
1335 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED)
1336 VERIFY(iptun_unregister(iptun) == 0);
1337
1338 if (iptun->iptun_itp != NULL) {
1339 /*
1340 * Remove from the AVL tree, AND release the reference iptun_t
1341 * itself holds on the ITP.
1342 */
1343 itp_unlink(iptun->iptun_itp, iptun->iptun_ns);
1344 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns);
1345 iptun->iptun_itp = NULL;
1346 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY;
1347 }
1348
1349 /*
1350 * After ipcl_conn_destroy(), there will be no threads executing an
1351 * upcall from ip (i.e., iptun_input()), and it is then safe to free
1352 * the iptun_t.
1353 */
1354 if (iptun->iptun_connp != NULL) {
1355 iptun_conn_destroy(iptun->iptun_connp);
1356 iptun->iptun_connp = NULL;
1357 }
1358
1359 netstack_rele(iptun->iptun_ns);
1360 kmem_cache_free(iptun_cache, iptun);
1361 atomic_dec_32(&iptun_tunnelcount);
1362 }
1363
1364 int
iptun_create(iptun_kparams_t * ik,cred_t * credp)1365 iptun_create(iptun_kparams_t *ik, cred_t *credp)
1366 {
1367 iptun_t *iptun = NULL;
1368 int err = 0, mherr;
1369 char linkname[MAXLINKNAMELEN];
1370 ipsec_tun_pol_t *itp;
1371 netstack_t *ns = NULL;
1372 iptun_stack_t *iptuns;
1373 datalink_id_t tmpid;
1374 zoneid_t zoneid = crgetzoneid(credp);
1375 boolean_t link_created = B_FALSE;
1376
1377 /* The tunnel type is mandatory */
1378 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE))
1379 return (EINVAL);
1380
1381 /*
1382 * Is the linkid that the caller wishes to associate with this new
1383 * tunnel assigned to this zone?
1384 */
1385 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) {
1386 if (zoneid != GLOBAL_ZONEID)
1387 return (EINVAL);
1388 } else if (zoneid == GLOBAL_ZONEID) {
1389 return (EINVAL);
1390 }
1391
1392 /*
1393 * Make sure that we're not trying to create a tunnel that has already
1394 * been created.
1395 */
1396 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) {
1397 iptun_exit(iptun);
1398 iptun = NULL;
1399 err = EEXIST;
1400 goto done;
1401 }
1402
1403 ns = netstack_find_by_cred(credp);
1404 iptuns = ns->netstack_iptun;
1405
1406 if ((iptun = iptun_alloc()) == NULL) {
1407 err = ENOMEM;
1408 goto done;
1409 }
1410
1411 iptun->iptun_linkid = ik->iptun_kparam_linkid;
1412 iptun->iptun_zoneid = zoneid;
1413 iptun->iptun_ns = ns;
1414
1415 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type);
1416 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) {
1417 err = EINVAL;
1418 goto done;
1419 }
1420
1421 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT)
1422 iptun->iptun_flags |= IPTUN_IMPLICIT;
1423
1424 if ((err = iptun_setparams(iptun, ik)) != 0)
1425 goto done;
1426
1427 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT;
1428 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6)
1429 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT;
1430
1431 iptun_headergen(iptun, B_FALSE);
1432
1433 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp);
1434 if (iptun->iptun_connp == NULL) {
1435 err = ENOMEM;
1436 goto done;
1437 }
1438
1439 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu;
1440 iptun->iptun_dpmtu = iptun->iptun_mtu;
1441
1442 /*
1443 * Find an ITP based on linkname. If we have parms already set via
1444 * the iptun_setparams() call above, it may have created an ITP for
1445 * us. We always try get_tunnel_policy() for DEBUG correctness
1446 * checks, and we may wish to refactor this to only check when
1447 * iptun_itp is NULL.
1448 */
1449 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL,
1450 NULL, NULL)) != 0)
1451 goto done;
1452 if ((itp = get_tunnel_policy(linkname, ns)) != NULL)
1453 iptun->iptun_itp = itp;
1454
1455 /*
1456 * See if we have the necessary IP addresses assigned to this tunnel
1457 * to try and bind them with ip underneath us. If we're not ready to
1458 * bind yet, then we'll defer the bind operation until the addresses
1459 * are modified.
1460 */
1461 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0))
1462 goto done;
1463
1464 if ((err = iptun_register(iptun)) != 0)
1465 goto done;
1466
1467 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid,
1468 iptun->iptun_zoneid);
1469 if (err != 0)
1470 goto done;
1471 link_created = B_TRUE;
1472
1473 /*
1474 * We hash by link-id as that is the key used by all other iptun
1475 * interfaces (modify, delete, etc.).
1476 */
1477 if ((mherr = mod_hash_insert(iptun_hash,
1478 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) {
1479 mutex_enter(&iptuns->iptuns_lock);
1480 list_insert_head(&iptuns->iptuns_iptunlist, iptun);
1481 mutex_exit(&iptuns->iptuns_lock);
1482 iptun->iptun_flags |= IPTUN_HASH_INSERTED;
1483 } else if (mherr == MH_ERR_NOMEM) {
1484 err = ENOMEM;
1485 } else if (mherr == MH_ERR_DUPLICATE) {
1486 err = EEXIST;
1487 } else {
1488 err = EINVAL;
1489 }
1490
1491 done:
1492 if (iptun == NULL && ns != NULL)
1493 netstack_rele(ns);
1494 if (err != 0 && iptun != NULL) {
1495 if (link_created) {
1496 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid,
1497 B_TRUE);
1498 }
1499 iptun->iptun_flags |= IPTUN_CONDEMNED;
1500 iptun_free(iptun);
1501 }
1502 return (err);
1503 }
1504
1505 int
iptun_delete(datalink_id_t linkid,cred_t * credp)1506 iptun_delete(datalink_id_t linkid, cred_t *credp)
1507 {
1508 int err;
1509 iptun_t *iptun = NULL;
1510
1511 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0)
1512 return (err);
1513
1514 /* One cannot delete a tunnel that belongs to another zone. */
1515 if (iptun->iptun_zoneid != crgetzoneid(credp)) {
1516 iptun_exit(iptun);
1517 return (EACCES);
1518 }
1519
1520 /*
1521 * We need to exit iptun in order to issue calls up the stack such as
1522 * dls_devnet_destroy(). If we call up while still in iptun, deadlock
1523 * with calls coming down the stack is possible. We prevent other
1524 * threads from entering this iptun after we've exited it by setting
1525 * the IPTUN_DELETE_PENDING flag. This will cause callers of
1526 * iptun_enter() to block waiting on iptun_enter_cv. The assumption
1527 * here is that the functions we're calling while IPTUN_DELETE_PENDING
1528 * is set dont resuult in an iptun_enter() call, as that would result
1529 * in deadlock.
1530 */
1531 iptun->iptun_flags |= IPTUN_DELETE_PENDING;
1532
1533 /* Wait for any pending upcall to the mac module to complete. */
1534 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING)
1535 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock);
1536
1537 iptun_exit(iptun);
1538
1539 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) {
1540 /*
1541 * mac_disable() will fail with EBUSY if there are references
1542 * to the iptun MAC. If there are none, then mac_disable()
1543 * will assure that none can be acquired until the MAC is
1544 * unregistered.
1545 *
1546 * XXX CR 6791335 prevents us from calling mac_disable() prior
1547 * to dls_devnet_destroy(), so we unfortunately need to
1548 * attempt to re-create the devnet node if mac_disable()
1549 * fails.
1550 */
1551 if ((err = mac_disable(iptun->iptun_mh)) != 0) {
1552 (void) dls_devnet_create(iptun->iptun_mh, linkid,
1553 iptun->iptun_zoneid);
1554 }
1555 }
1556
1557 /*
1558 * Now that we know the fate of this iptun_t, we need to clear
1559 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is
1560 * slated to be freed. Either way, we need to signal the threads
1561 * waiting in iptun_enter() so that they can either fail if
1562 * IPTUN_CONDEMNED is set, or continue if it's not.
1563 */
1564 mutex_enter(&iptun->iptun_lock);
1565 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING;
1566 if (err == 0)
1567 iptun->iptun_flags |= IPTUN_CONDEMNED;
1568 cv_broadcast(&iptun->iptun_enter_cv);
1569 mutex_exit(&iptun->iptun_lock);
1570
1571 /*
1572 * Note that there is no danger in calling iptun_free() after having
1573 * dropped the iptun_lock since callers of iptun_enter() at this point
1574 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of
1575 * threads entering from mac callbacks which call iptun_enter()
1576 * directly) which holds iptun_hash_lock, and iptun_free() grabs this
1577 * lock in order to remove the iptun_t from the hash table.
1578 */
1579 if (err == 0)
1580 iptun_free(iptun);
1581
1582 return (err);
1583 }
1584
1585 int
iptun_modify(const iptun_kparams_t * ik,cred_t * credp)1586 iptun_modify(const iptun_kparams_t *ik, cred_t *credp)
1587 {
1588 iptun_t *iptun;
1589 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE;
1590 int err;
1591
1592 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0)
1593 return (err);
1594
1595 /* One cannot modify a tunnel that belongs to another zone. */
1596 if (iptun->iptun_zoneid != crgetzoneid(credp)) {
1597 err = EACCES;
1598 goto done;
1599 }
1600
1601 /* The tunnel type cannot be changed */
1602 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) {
1603 err = EINVAL;
1604 goto done;
1605 }
1606
1607 if ((err = iptun_setparams(iptun, ik)) != 0)
1608 goto done;
1609 iptun_headergen(iptun, B_FALSE);
1610
1611 /*
1612 * If any of the tunnel's addresses has been modified and the tunnel
1613 * has the necessary addresses assigned to it, we need to try to bind
1614 * with ip underneath us. If we're not ready to bind yet, then we'll
1615 * try again when the addresses are modified later.
1616 */
1617 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR);
1618 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR);
1619 if (laddr_change || raddr_change) {
1620 if (iptun->iptun_flags & IPTUN_BOUND)
1621 iptun_unbind(iptun);
1622 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) {
1623 if (laddr_change)
1624 iptun->iptun_flags &= ~IPTUN_LADDR;
1625 if (raddr_change)
1626 iptun->iptun_flags &= ~IPTUN_RADDR;
1627 goto done;
1628 }
1629 }
1630
1631 if (laddr_change)
1632 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE);
1633 if (raddr_change)
1634 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE);
1635
1636 done:
1637 iptun_exit(iptun);
1638 return (err);
1639 }
1640
1641 /* Given an IP tunnel's datalink id, fill in its parameters. */
1642 int
iptun_info(iptun_kparams_t * ik,cred_t * credp)1643 iptun_info(iptun_kparams_t *ik, cred_t *credp)
1644 {
1645 iptun_t *iptun;
1646 int err;
1647
1648 /* Is the tunnel link visible from the caller's zone? */
1649 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid,
1650 crgetzoneid(credp)))
1651 return (ENOENT);
1652
1653 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0)
1654 return (err);
1655
1656 bzero(ik, sizeof (iptun_kparams_t));
1657
1658 ik->iptun_kparam_linkid = iptun->iptun_linkid;
1659 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type;
1660 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE;
1661
1662 if (iptun->iptun_flags & IPTUN_LADDR) {
1663 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr);
1664 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR;
1665 }
1666 if (iptun->iptun_flags & IPTUN_RADDR) {
1667 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr);
1668 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR;
1669 }
1670
1671 if (iptun->iptun_flags & IPTUN_IMPLICIT)
1672 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT;
1673
1674 if (iptun->iptun_itp != NULL) {
1675 mutex_enter(&iptun->iptun_itp->itp_lock);
1676 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) {
1677 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL;
1678 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) {
1679 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO;
1680 ik->iptun_kparam_secinfo =
1681 iptun->iptun_simple_policy;
1682 }
1683 }
1684 mutex_exit(&iptun->iptun_itp->itp_lock);
1685 }
1686
1687 done:
1688 iptun_exit(iptun);
1689 return (err);
1690 }
1691
1692 int
iptun_set_6to4relay(netstack_t * ns,ipaddr_t relay_addr)1693 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr)
1694 {
1695 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr))
1696 return (EADDRNOTAVAIL);
1697 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr;
1698 return (0);
1699 }
1700
1701 void
iptun_get_6to4relay(netstack_t * ns,ipaddr_t * relay_addr)1702 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr)
1703 {
1704 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr;
1705 }
1706
1707 void
iptun_set_policy(datalink_id_t linkid,ipsec_tun_pol_t * itp)1708 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp)
1709 {
1710 iptun_t *iptun;
1711
1712 if (iptun_enter_by_linkid(linkid, &iptun) != 0)
1713 return;
1714 if (iptun->iptun_itp != itp) {
1715 ASSERT(iptun->iptun_itp == NULL);
1716 ITP_REFHOLD(itp);
1717 iptun->iptun_itp = itp;
1718 }
1719 /*
1720 * IPsec policy means IPsec overhead, which means lower MTU.
1721 * Refresh the MTU for this tunnel.
1722 */
1723 (void) iptun_update_mtu(iptun, NULL, 0);
1724 iptun_exit(iptun);
1725 }
1726
1727 /*
1728 * Obtain the path MTU to the tunnel destination.
1729 * Can return zero in some cases.
1730 */
1731 static uint32_t
iptun_get_dst_pmtu(iptun_t * iptun,ip_xmit_attr_t * ixa)1732 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa)
1733 {
1734 uint32_t pmtu = 0;
1735 conn_t *connp = iptun->iptun_connp;
1736 boolean_t need_rele = B_FALSE;
1737
1738 /*
1739 * We only obtain the pmtu for tunnels that have a remote tunnel
1740 * address.
1741 */
1742 if (!(iptun->iptun_flags & IPTUN_RADDR))
1743 return (0);
1744
1745 if (ixa == NULL) {
1746 ixa = conn_get_ixa(connp, B_FALSE);
1747 if (ixa == NULL)
1748 return (0);
1749 need_rele = B_TRUE;
1750 }
1751 /*
1752 * Guard against ICMP errors before we have sent, as well as against
1753 * and a thread which held conn_ixa.
1754 */
1755 if (ixa->ixa_ire != NULL) {
1756 pmtu = ip_get_pmtu(ixa);
1757
1758 /*
1759 * For both IPv4 and IPv6 we can have indication that the outer
1760 * header needs fragmentation.
1761 */
1762 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) {
1763 /* Must allow fragmentation in ip_output */
1764 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1765 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) {
1766 ixa->ixa_flags |= IXAF_DONTFRAG;
1767 } else {
1768 /* ip_get_pmtu might have set this - we don't want it */
1769 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF;
1770 }
1771 }
1772
1773 if (need_rele)
1774 ixa_refrele(ixa);
1775 return (pmtu);
1776 }
1777
1778 /*
1779 * Update the ip_xmit_attr_t to capture the current lower path mtu as known
1780 * by ip.
1781 */
1782 static void
iptun_update_dst_pmtu(iptun_t * iptun,ip_xmit_attr_t * ixa)1783 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa)
1784 {
1785 uint32_t pmtu;
1786 conn_t *connp = iptun->iptun_connp;
1787 boolean_t need_rele = B_FALSE;
1788
1789 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */
1790 if (!(iptun->iptun_flags & IPTUN_RADDR))
1791 return;
1792
1793 if (ixa == NULL) {
1794 ixa = conn_get_ixa(connp, B_FALSE);
1795 if (ixa == NULL)
1796 return;
1797 need_rele = B_TRUE;
1798 }
1799 /*
1800 * Guard against ICMP errors before we have sent, as well as against
1801 * and a thread which held conn_ixa.
1802 */
1803 if (ixa->ixa_ire != NULL) {
1804 pmtu = ip_get_pmtu(ixa);
1805 /*
1806 * Update ixa_fragsize and ixa_pmtu.
1807 */
1808 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu;
1809
1810 /*
1811 * For both IPv4 and IPv6 we can have indication that the outer
1812 * header needs fragmentation.
1813 */
1814 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) {
1815 /* Must allow fragmentation in ip_output */
1816 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1817 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) {
1818 ixa->ixa_flags |= IXAF_DONTFRAG;
1819 } else {
1820 /* ip_get_pmtu might have set this - we don't want it */
1821 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF;
1822 }
1823 }
1824
1825 if (need_rele)
1826 ixa_refrele(ixa);
1827 }
1828
1829 /*
1830 * There is nothing that iptun can verify in addition to IP having
1831 * verified the IP addresses in the fanout.
1832 */
1833 /* ARGSUSED */
1834 static boolean_t
iptun_verifyicmp(conn_t * connp,void * arg2,icmph_t * icmph,icmp6_t * icmp6,ip_recv_attr_t * ira)1835 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6,
1836 ip_recv_attr_t *ira)
1837 {
1838 return (B_TRUE);
1839 }
1840
1841 /*
1842 * Notify function registered with ip_xmit_attr_t.
1843 */
1844 static void
iptun_notify(void * arg,ip_xmit_attr_t * ixa,ixa_notify_type_t ntype,ixa_notify_arg_t narg)1845 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype,
1846 ixa_notify_arg_t narg)
1847 {
1848 iptun_t *iptun = (iptun_t *)arg;
1849
1850 switch (ntype) {
1851 case IXAN_PMTU:
1852 (void) iptun_update_mtu(iptun, ixa, narg);
1853 break;
1854 }
1855 }
1856
1857 /*
1858 * Returns the max of old_ovhd and the overhead associated with pol.
1859 */
1860 static uint32_t
iptun_max_policy_overhead(ipsec_policy_t * pol,uint32_t old_ovhd)1861 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd)
1862 {
1863 uint32_t new_ovhd = old_ovhd;
1864
1865 while (pol != NULL) {
1866 new_ovhd = max(new_ovhd,
1867 ipsec_act_ovhd(&pol->ipsp_act->ipa_act));
1868 pol = pol->ipsp_hash.hash_next;
1869 }
1870 return (new_ovhd);
1871 }
1872
1873 static uint32_t
iptun_get_ipsec_overhead(iptun_t * iptun)1874 iptun_get_ipsec_overhead(iptun_t *iptun)
1875 {
1876 ipsec_policy_root_t *ipr;
1877 ipsec_policy_head_t *iph;
1878 ipsec_policy_t *pol;
1879 ipsec_selector_t sel;
1880 int i;
1881 uint32_t ipsec_ovhd = 0;
1882 ipsec_tun_pol_t *itp = iptun->iptun_itp;
1883 netstack_t *ns = iptun->iptun_ns;
1884
1885 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) {
1886 /*
1887 * Consult global policy, just in case. This will only work
1888 * if we have both source and destination addresses to work
1889 * with.
1890 */
1891 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) !=
1892 (IPTUN_LADDR|IPTUN_RADDR))
1893 return (0);
1894
1895 iph = ipsec_system_policy(ns);
1896 bzero(&sel, sizeof (sel));
1897 sel.ips_isv4 =
1898 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION);
1899 switch (iptun->iptun_typeinfo->iti_ipvers) {
1900 case IPV4_VERSION:
1901 sel.ips_local_addr_v4 = iptun->iptun_laddr4;
1902 sel.ips_remote_addr_v4 = iptun->iptun_raddr4;
1903 break;
1904 case IPV6_VERSION:
1905 sel.ips_local_addr_v6 = iptun->iptun_laddr6;
1906 sel.ips_remote_addr_v6 = iptun->iptun_raddr6;
1907 break;
1908 }
1909 /* Check for both IPv4 and IPv6. */
1910 sel.ips_protocol = IPPROTO_ENCAP;
1911 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND,
1912 &sel);
1913 if (pol != NULL) {
1914 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act);
1915 IPPOL_REFRELE(pol);
1916 }
1917 sel.ips_protocol = IPPROTO_IPV6;
1918 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND,
1919 &sel);
1920 if (pol != NULL) {
1921 ipsec_ovhd = max(ipsec_ovhd,
1922 ipsec_act_ovhd(&pol->ipsp_act->ipa_act));
1923 IPPOL_REFRELE(pol);
1924 }
1925 IPPH_REFRELE(iph, ns);
1926 } else {
1927 /*
1928 * Look through all of the possible IPsec actions for the
1929 * tunnel, and find the largest potential IPsec overhead.
1930 */
1931 iph = itp->itp_policy;
1932 rw_enter(&iph->iph_lock, RW_READER);
1933 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]);
1934 ipsec_ovhd = iptun_max_policy_overhead(
1935 ipr->ipr_nonhash[IPSEC_AF_V4], 0);
1936 ipsec_ovhd = iptun_max_policy_overhead(
1937 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd);
1938 for (i = 0; i < ipr->ipr_nchains; i++) {
1939 ipsec_ovhd = iptun_max_policy_overhead(
1940 ipr->ipr_hash[i].hash_head, ipsec_ovhd);
1941 }
1942 rw_exit(&iph->iph_lock);
1943 }
1944
1945 return (ipsec_ovhd);
1946 }
1947
1948 /*
1949 * Calculate and return the maximum possible upper MTU for the given tunnel.
1950 *
1951 * If new_pmtu is set then we also need to update the lower path MTU information
1952 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that
1953 * we are notified by conn_ip_output() when the path MTU increases.
1954 */
1955 static uint32_t
iptun_get_maxmtu(iptun_t * iptun,ip_xmit_attr_t * ixa,uint32_t new_pmtu)1956 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu)
1957 {
1958 size_t header_size, ipsec_overhead;
1959 uint32_t maxmtu, pmtu;
1960
1961 /*
1962 * Start with the path-MTU to the remote address, which is either
1963 * provided as the new_pmtu argument, or obtained using
1964 * iptun_get_dst_pmtu().
1965 */
1966 if (new_pmtu != 0) {
1967 if (iptun->iptun_flags & IPTUN_RADDR)
1968 iptun->iptun_dpmtu = new_pmtu;
1969 pmtu = new_pmtu;
1970 } else if (iptun->iptun_flags & IPTUN_RADDR) {
1971 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) {
1972 /*
1973 * We weren't able to obtain the path-MTU of the
1974 * destination. Use the previous value.
1975 */
1976 pmtu = iptun->iptun_dpmtu;
1977 } else {
1978 iptun->iptun_dpmtu = pmtu;
1979 }
1980 } else {
1981 /*
1982 * We have no path-MTU information to go on, use the maximum
1983 * possible value.
1984 */
1985 pmtu = iptun->iptun_typeinfo->iti_maxmtu;
1986 }
1987
1988 /*
1989 * Now calculate tunneling overhead and subtract that from the
1990 * path-MTU information obtained above.
1991 */
1992 if (iptun->iptun_header_size != 0) {
1993 header_size = iptun->iptun_header_size;
1994 } else {
1995 switch (iptun->iptun_typeinfo->iti_ipvers) {
1996 case IPV4_VERSION:
1997 header_size = sizeof (ipha_t);
1998 if (is_system_labeled())
1999 header_size += IP_MAX_OPT_LENGTH;
2000 break;
2001 case IPV6_VERSION:
2002 header_size = sizeof (iptun_ipv6hdrs_t);
2003 break;
2004 }
2005 }
2006
2007 ipsec_overhead = iptun_get_ipsec_overhead(iptun);
2008
2009 maxmtu = pmtu - (header_size + ipsec_overhead);
2010 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu));
2011 }
2012
2013 /*
2014 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer
2015 * of any change in MTU. The new_pmtu argument is the new lower path MTU to
2016 * the tunnel destination to be used in the tunnel MTU calculation. Passing
2017 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using
2018 * ip_get_pmtu().
2019 *
2020 * If the calculated tunnel MTU is different than its previous value, then we
2021 * notify the MAC layer above us of this change using mac_maxsdu_update().
2022 */
2023 static uint32_t
iptun_update_mtu(iptun_t * iptun,ip_xmit_attr_t * ixa,uint32_t new_pmtu)2024 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu)
2025 {
2026 uint32_t newmtu;
2027
2028 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */
2029 iptun_update_dst_pmtu(iptun, ixa);
2030
2031 /*
2032 * We return the current MTU without updating it if it was pegged to a
2033 * static value using the MAC_PROP_MTU link property.
2034 */
2035 if (iptun->iptun_flags & IPTUN_FIXED_MTU)
2036 return (iptun->iptun_mtu);
2037
2038 /* If the MTU isn't fixed, then use the maximum possible value. */
2039 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu);
2040 /*
2041 * We only dynamically adjust the tunnel MTU for tunnels with
2042 * destinations because dynamic MTU calculations are based on the
2043 * destination path-MTU.
2044 */
2045 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) {
2046 iptun->iptun_mtu = newmtu;
2047 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED)
2048 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE);
2049 }
2050
2051 return (newmtu);
2052 }
2053
2054 /*
2055 * Frees a packet or packet chain and bumps stat for each freed packet.
2056 */
2057 static void
iptun_drop_pkt(mblk_t * mp,uint64_t * stat)2058 iptun_drop_pkt(mblk_t *mp, uint64_t *stat)
2059 {
2060 mblk_t *pktmp;
2061
2062 for (pktmp = mp; pktmp != NULL; pktmp = mp) {
2063 mp = mp->b_next;
2064 pktmp->b_next = NULL;
2065 if (stat != NULL)
2066 atomic_inc_64(stat);
2067 freemsg(pktmp);
2068 }
2069 }
2070
2071 /*
2072 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the
2073 * original packet to its b_cont. Returns NULL on failure.
2074 */
2075 static mblk_t *
iptun_build_icmperr(size_t hdrs_size,mblk_t * orig_pkt)2076 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt)
2077 {
2078 mblk_t *icmperr_mp;
2079
2080 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) {
2081 icmperr_mp->b_wptr += hdrs_size;
2082 /* tack on the offending packet */
2083 icmperr_mp->b_cont = orig_pkt;
2084 }
2085 return (icmperr_mp);
2086 }
2087
2088 /*
2089 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in
2090 * the ICMP error.
2091 */
2092 static void
iptun_sendicmp_v4(iptun_t * iptun,icmph_t * icmp,ipha_t * orig_ipha,mblk_t * mp,ts_label_t * tsl)2093 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp,
2094 ts_label_t *tsl)
2095 {
2096 size_t orig_pktsize, hdrs_size;
2097 mblk_t *icmperr_mp;
2098 ipha_t *new_ipha;
2099 icmph_t *new_icmp;
2100 ip_xmit_attr_t ixas;
2101 conn_t *connp = iptun->iptun_connp;
2102
2103 orig_pktsize = msgdsize(mp);
2104 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t);
2105 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) {
2106 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
2107 return;
2108 }
2109
2110 new_ipha = (ipha_t *)icmperr_mp->b_rptr;
2111 new_icmp = (icmph_t *)(new_ipha + 1);
2112
2113 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
2114 new_ipha->ipha_type_of_service = 0;
2115 new_ipha->ipha_ident = 0;
2116 new_ipha->ipha_fragment_offset_and_flags = 0;
2117 new_ipha->ipha_ttl = orig_ipha->ipha_ttl;
2118 new_ipha->ipha_protocol = IPPROTO_ICMP;
2119 new_ipha->ipha_src = orig_ipha->ipha_dst;
2120 new_ipha->ipha_dst = orig_ipha->ipha_src;
2121 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */
2122 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize);
2123
2124 *new_icmp = *icmp;
2125 new_icmp->icmph_checksum = 0;
2126 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0);
2127
2128 bzero(&ixas, sizeof (ixas));
2129 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4;
2130 if (new_ipha->ipha_src == INADDR_ANY) {
2131 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE;
2132 ixas.ixa_flags |= IXAF_SET_SOURCE;
2133 }
2134
2135 ixas.ixa_zoneid = IPCL_ZONEID(connp);
2136 ixas.ixa_ipst = connp->conn_netstack->netstack_ip;
2137 ixas.ixa_cred = connp->conn_cred;
2138 ixas.ixa_cpid = NOPID;
2139 if (is_system_labeled())
2140 ixas.ixa_tsl = tsl;
2141
2142 ixas.ixa_ifindex = 0;
2143 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2144
2145 (void) ip_output_simple(icmperr_mp, &ixas);
2146 ixa_cleanup(&ixas);
2147 }
2148
2149 static void
iptun_sendicmp_v6(iptun_t * iptun,icmp6_t * icmp6,ip6_t * orig_ip6h,mblk_t * mp,ts_label_t * tsl)2150 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp,
2151 ts_label_t *tsl)
2152 {
2153 size_t orig_pktsize, hdrs_size;
2154 mblk_t *icmp6err_mp;
2155 ip6_t *new_ip6h;
2156 icmp6_t *new_icmp6;
2157 ip_xmit_attr_t ixas;
2158 conn_t *connp = iptun->iptun_connp;
2159
2160 orig_pktsize = msgdsize(mp);
2161 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t);
2162 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) {
2163 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
2164 return;
2165 }
2166
2167 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr;
2168 new_icmp6 = (icmp6_t *)(new_ip6h + 1);
2169
2170 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf;
2171 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize);
2172 new_ip6h->ip6_hops = orig_ip6h->ip6_hops;
2173 new_ip6h->ip6_nxt = IPPROTO_ICMPV6;
2174 new_ip6h->ip6_src = orig_ip6h->ip6_dst;
2175 new_ip6h->ip6_dst = orig_ip6h->ip6_src;
2176
2177 *new_icmp6 = *icmp6;
2178 /* The checksum is calculated in ip_output_simple and friends. */
2179 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen;
2180
2181 bzero(&ixas, sizeof (ixas));
2182 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
2183 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) {
2184 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE;
2185 ixas.ixa_flags |= IXAF_SET_SOURCE;
2186 }
2187
2188 ixas.ixa_zoneid = IPCL_ZONEID(connp);
2189 ixas.ixa_ipst = connp->conn_netstack->netstack_ip;
2190 ixas.ixa_cred = connp->conn_cred;
2191 ixas.ixa_cpid = NOPID;
2192 if (is_system_labeled())
2193 ixas.ixa_tsl = tsl;
2194
2195 ixas.ixa_ifindex = 0;
2196 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2197
2198 (void) ip_output_simple(icmp6err_mp, &ixas);
2199 ixa_cleanup(&ixas);
2200 }
2201
2202 static void
iptun_icmp_error_v4(iptun_t * iptun,ipha_t * orig_ipha,mblk_t * mp,uint8_t type,uint8_t code,ts_label_t * tsl)2203 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp,
2204 uint8_t type, uint8_t code, ts_label_t *tsl)
2205 {
2206 icmph_t icmp;
2207
2208 bzero(&icmp, sizeof (icmp));
2209 icmp.icmph_type = type;
2210 icmp.icmph_code = code;
2211
2212 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl);
2213 }
2214
2215 static void
iptun_icmp_fragneeded_v4(iptun_t * iptun,uint32_t newmtu,ipha_t * orig_ipha,mblk_t * mp,ts_label_t * tsl)2216 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha,
2217 mblk_t *mp, ts_label_t *tsl)
2218 {
2219 icmph_t icmp;
2220
2221 icmp.icmph_type = ICMP_DEST_UNREACHABLE;
2222 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED;
2223 icmp.icmph_du_zero = 0;
2224 icmp.icmph_du_mtu = htons(newmtu);
2225
2226 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl);
2227 }
2228
2229 static void
iptun_icmp_error_v6(iptun_t * iptun,ip6_t * orig_ip6h,mblk_t * mp,uint8_t type,uint8_t code,uint32_t offset,ts_label_t * tsl)2230 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp,
2231 uint8_t type, uint8_t code, uint32_t offset, ts_label_t *tsl)
2232 {
2233 icmp6_t icmp6;
2234
2235 bzero(&icmp6, sizeof (icmp6));
2236 icmp6.icmp6_type = type;
2237 icmp6.icmp6_code = code;
2238 if (type == ICMP6_PARAM_PROB)
2239 icmp6.icmp6_pptr = htonl(offset);
2240
2241 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl);
2242 }
2243
2244 static void
iptun_icmp_toobig_v6(iptun_t * iptun,uint32_t newmtu,ip6_t * orig_ip6h,mblk_t * mp,ts_label_t * tsl)2245 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h,
2246 mblk_t *mp, ts_label_t *tsl)
2247 {
2248 icmp6_t icmp6;
2249
2250 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
2251 icmp6.icmp6_code = 0;
2252 icmp6.icmp6_mtu = htonl(newmtu);
2253
2254 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl);
2255 }
2256
2257 /*
2258 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The
2259 * mp argument is only used to do bounds checking.
2260 */
2261 static boolean_t
is_icmp_error(mblk_t * mp,ipha_t * ipha,ip6_t * ip6h)2262 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h)
2263 {
2264 uint16_t hlen;
2265
2266 if (ipha != NULL) {
2267 icmph_t *icmph;
2268
2269 ASSERT(ip6h == NULL);
2270 if (ipha->ipha_protocol != IPPROTO_ICMP)
2271 return (B_FALSE);
2272
2273 hlen = IPH_HDR_LENGTH(ipha);
2274 icmph = (icmph_t *)((uint8_t *)ipha + hlen);
2275 return (ICMP_IS_ERROR(icmph->icmph_type) ||
2276 icmph->icmph_type == ICMP_REDIRECT);
2277 } else {
2278 icmp6_t *icmp6;
2279 uint8_t *nexthdrp;
2280
2281 ASSERT(ip6h != NULL);
2282 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) ||
2283 *nexthdrp != IPPROTO_ICMPV6) {
2284 return (B_FALSE);
2285 }
2286
2287 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen);
2288 return (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
2289 icmp6->icmp6_type == ND_REDIRECT);
2290 }
2291 }
2292
2293 /*
2294 * Find inner and outer IP headers from a tunneled packet as setup for calls
2295 * into ipsec_tun_{in,out}bound().
2296 * Note that we need to allow the outer header to be in a separate mblk from
2297 * the inner header.
2298 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero.
2299 */
2300 static size_t
iptun_find_headers(mblk_t * mp,size_t outer_hlen,ipha_t ** outer4,ipha_t ** inner4,ip6_t ** outer6,ip6_t ** inner6)2301 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4,
2302 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6)
2303 {
2304 ipha_t *ipha;
2305 size_t first_mblkl = MBLKL(mp);
2306 mblk_t *inner_mp;
2307
2308 /*
2309 * Don't bother handling packets that don't have a full IP header in
2310 * the fist mblk. For the input path, the ip module ensures that this
2311 * won't happen, and on the output path, the IP tunneling MAC-type
2312 * plugins ensure that this also won't happen.
2313 */
2314 if (first_mblkl < sizeof (ipha_t))
2315 return (0);
2316 ipha = (ipha_t *)(mp->b_rptr);
2317 switch (IPH_HDR_VERSION(ipha)) {
2318 case IPV4_VERSION:
2319 *outer4 = ipha;
2320 *outer6 = NULL;
2321 if (outer_hlen == 0)
2322 outer_hlen = IPH_HDR_LENGTH(ipha);
2323 break;
2324 case IPV6_VERSION:
2325 *outer4 = NULL;
2326 *outer6 = (ip6_t *)ipha;
2327 if (outer_hlen == 0)
2328 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha);
2329 break;
2330 default:
2331 return (0);
2332 }
2333
2334 if (first_mblkl < outer_hlen ||
2335 (first_mblkl == outer_hlen && mp->b_cont == NULL))
2336 return (0);
2337
2338 /*
2339 * We don't bother doing a pullup here since the outer header will
2340 * just get stripped off soon on input anyway. We just want to ensure
2341 * that the inner* pointer points to a full header.
2342 */
2343 if (first_mblkl == outer_hlen) {
2344 inner_mp = mp->b_cont;
2345 ipha = (ipha_t *)inner_mp->b_rptr;
2346 } else {
2347 inner_mp = mp;
2348 ipha = (ipha_t *)(mp->b_rptr + outer_hlen);
2349 }
2350 switch (IPH_HDR_VERSION(ipha)) {
2351 case IPV4_VERSION:
2352 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t))
2353 return (0);
2354 *inner4 = ipha;
2355 *inner6 = NULL;
2356 break;
2357 case IPV6_VERSION:
2358 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t))
2359 return (0);
2360 *inner4 = NULL;
2361 *inner6 = (ip6_t *)ipha;
2362 break;
2363 default:
2364 return (0);
2365 }
2366
2367 return (outer_hlen);
2368 }
2369
2370 /*
2371 * Received ICMP error in response to an X over IPv4 packet that we
2372 * transmitted.
2373 *
2374 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2375 * the following:
2376 *
2377 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP]
2378 *
2379 * or
2380 *
2381 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP]
2382 *
2383 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to
2384 * whatever the very-inner packet is (IPv4(2) or IPv6).
2385 */
2386 static void
iptun_input_icmp_v4(iptun_t * iptun,mblk_t * data_mp,icmph_t * icmph,ip_recv_attr_t * ira)2387 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph,
2388 ip_recv_attr_t *ira)
2389 {
2390 uint8_t *orig;
2391 ipha_t *outer4, *inner4;
2392 ip6_t *outer6, *inner6;
2393 int outer_hlen;
2394 uint8_t type, code;
2395
2396 ASSERT(data_mp->b_cont == NULL);
2397 /*
2398 * Temporarily move b_rptr forward so that iptun_find_headers() can
2399 * find headers in the ICMP packet payload.
2400 */
2401 orig = data_mp->b_rptr;
2402 data_mp->b_rptr = (uint8_t *)(icmph + 1);
2403 /*
2404 * The ip module ensures that ICMP errors contain at least the
2405 * original IP header (otherwise, the error would never have made it
2406 * here).
2407 */
2408 ASSERT(MBLKL(data_mp) >= 0);
2409 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6,
2410 &inner6);
2411 ASSERT(outer6 == NULL);
2412 data_mp->b_rptr = orig;
2413 if (outer_hlen == 0) {
2414 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors);
2415 return;
2416 }
2417
2418 /* Only ICMP errors due to tunneled packets should reach here. */
2419 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP ||
2420 outer4->ipha_protocol == IPPROTO_IPV6);
2421
2422 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp,
2423 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns);
2424 if (data_mp == NULL) {
2425 /* Callee did all of the freeing. */
2426 atomic_inc_64(&iptun->iptun_ierrors);
2427 return;
2428 }
2429 /* We should never see reassembled fragment here. */
2430 ASSERT(data_mp->b_next == NULL);
2431
2432 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen;
2433
2434 /*
2435 * If the original packet being transmitted was itself an ICMP error,
2436 * then drop this packet. We don't want to generate an ICMP error in
2437 * response to an ICMP error.
2438 */
2439 if (is_icmp_error(data_mp, inner4, inner6)) {
2440 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2441 return;
2442 }
2443
2444 switch (icmph->icmph_type) {
2445 case ICMP_DEST_UNREACHABLE:
2446 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH);
2447 switch (icmph->icmph_code) {
2448 case ICMP_FRAGMENTATION_NEEDED: {
2449 uint32_t newmtu;
2450
2451 /*
2452 * We reconcile this with the fact that the tunnel may
2453 * also have IPsec policy by letting iptun_update_mtu
2454 * take care of it.
2455 */
2456 newmtu = iptun_update_mtu(iptun, NULL,
2457 ntohs(icmph->icmph_du_mtu));
2458
2459 if (inner4 != NULL) {
2460 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4,
2461 data_mp, ira->ira_tsl);
2462 } else {
2463 iptun_icmp_toobig_v6(iptun, newmtu, inner6,
2464 data_mp, ira->ira_tsl);
2465 }
2466 return;
2467 }
2468 case ICMP_DEST_NET_UNREACH_ADMIN:
2469 case ICMP_DEST_HOST_UNREACH_ADMIN:
2470 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN :
2471 ICMP6_DST_UNREACH_ADMIN);
2472 break;
2473 default:
2474 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE :
2475 ICMP6_DST_UNREACH_ADDR);
2476 break;
2477 }
2478 break;
2479 case ICMP_TIME_EXCEEDED:
2480 if (inner6 != NULL) {
2481 type = ICMP6_TIME_EXCEEDED;
2482 code = 0;
2483 } /* else we're already set. */
2484 break;
2485 case ICMP_PARAM_PROBLEM:
2486 /*
2487 * This is a problem with the outer header we transmitted.
2488 * Treat this as an output error.
2489 */
2490 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors);
2491 return;
2492 default:
2493 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2494 return;
2495 }
2496
2497 if (inner4 != NULL) {
2498 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code,
2499 ira->ira_tsl);
2500 } else {
2501 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0,
2502 ira->ira_tsl);
2503 }
2504 }
2505
2506 /*
2507 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel
2508 * Encapsulation Limit destination option. If there is one, set encaplim_ptr
2509 * to point to the option value.
2510 */
2511 static boolean_t
iptun_find_encaplimit(mblk_t * mp,ip6_t * ip6h,uint8_t ** encaplim_ptr)2512 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr)
2513 {
2514 ip_pkt_t pkt;
2515 uint8_t *endptr;
2516 ip6_dest_t *destp;
2517 struct ip6_opt *optp;
2518
2519 pkt.ipp_fields = 0; /* must be initialized */
2520 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &pkt, NULL);
2521 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) {
2522 destp = pkt.ipp_dstopts;
2523 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) {
2524 destp = pkt.ipp_rthdrdstopts;
2525 } else {
2526 return (B_FALSE);
2527 }
2528
2529 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1);
2530 optp = (struct ip6_opt *)(destp + 1);
2531 while (endptr - (uint8_t *)optp > sizeof (*optp)) {
2532 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) {
2533 if ((uint8_t *)(optp + 1) >= endptr)
2534 return (B_FALSE);
2535 *encaplim_ptr = (uint8_t *)&optp[1];
2536 return (B_TRUE);
2537 }
2538 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2);
2539 }
2540 return (B_FALSE);
2541 }
2542
2543 /*
2544 * Received ICMPv6 error in response to an X over IPv6 packet that we
2545 * transmitted.
2546 *
2547 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2548 * the following:
2549 *
2550 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP]
2551 *
2552 * or
2553 *
2554 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP]
2555 *
2556 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to
2557 * whatever the very-inner packet is (IPv4 or IPv6(2)).
2558 */
2559 static void
iptun_input_icmp_v6(iptun_t * iptun,mblk_t * data_mp,icmp6_t * icmp6h,ip_recv_attr_t * ira)2560 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h,
2561 ip_recv_attr_t *ira)
2562 {
2563 uint8_t *orig;
2564 ipha_t *outer4, *inner4;
2565 ip6_t *outer6, *inner6;
2566 int outer_hlen;
2567 uint8_t type, code;
2568
2569 ASSERT(data_mp->b_cont == NULL);
2570
2571 /*
2572 * Temporarily move b_rptr forward so that iptun_find_headers() can
2573 * find IP headers in the ICMP packet payload.
2574 */
2575 orig = data_mp->b_rptr;
2576 data_mp->b_rptr = (uint8_t *)(icmp6h + 1);
2577 /*
2578 * The ip module ensures that ICMP errors contain at least the
2579 * original IP header (otherwise, the error would never have made it
2580 * here).
2581 */
2582 ASSERT(MBLKL(data_mp) >= 0);
2583 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6,
2584 &inner6);
2585 ASSERT(outer4 == NULL);
2586 data_mp->b_rptr = orig; /* Restore r_ptr */
2587 if (outer_hlen == 0) {
2588 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors);
2589 return;
2590 }
2591
2592 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp,
2593 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns);
2594 if (data_mp == NULL) {
2595 /* Callee did all of the freeing. */
2596 atomic_inc_64(&iptun->iptun_ierrors);
2597 return;
2598 }
2599 /* We should never see reassembled fragment here. */
2600 ASSERT(data_mp->b_next == NULL);
2601
2602 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen;
2603
2604 /*
2605 * If the original packet being transmitted was itself an ICMP error,
2606 * then drop this packet. We don't want to generate an ICMP error in
2607 * response to an ICMP error.
2608 */
2609 if (is_icmp_error(data_mp, inner4, inner6)) {
2610 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2611 return;
2612 }
2613
2614 switch (icmp6h->icmp6_type) {
2615 case ICMP6_PARAM_PROB: {
2616 uint8_t *encaplim_ptr;
2617
2618 /*
2619 * If the ICMPv6 error points to a valid Tunnel Encapsulation
2620 * Limit option and the limit value is 0, then fall through
2621 * and send a host unreachable message. Otherwise, treat the
2622 * error as an output error, as there must have been a problem
2623 * with a packet we sent.
2624 */
2625 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) ||
2626 (icmp6h->icmp6_pptr !=
2627 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) ||
2628 *encaplim_ptr != 0) {
2629 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors);
2630 return;
2631 }
2632 }
2633 /* FALLTHROUGH */
2634 case ICMP6_TIME_EXCEEDED:
2635 case ICMP6_DST_UNREACH:
2636 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE :
2637 ICMP6_DST_UNREACH);
2638 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE :
2639 ICMP6_DST_UNREACH_ADDR);
2640 break;
2641 case ICMP6_PACKET_TOO_BIG: {
2642 uint32_t newmtu;
2643
2644 /*
2645 * We reconcile this with the fact that the tunnel may also
2646 * have IPsec policy by letting iptun_update_mtu take care of
2647 * it.
2648 */
2649 newmtu = iptun_update_mtu(iptun, NULL,
2650 ntohl(icmp6h->icmp6_mtu));
2651
2652 if (inner4 != NULL) {
2653 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4,
2654 data_mp, ira->ira_tsl);
2655 } else {
2656 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp,
2657 ira->ira_tsl);
2658 }
2659 return;
2660 }
2661 default:
2662 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2663 return;
2664 }
2665
2666 if (inner4 != NULL) {
2667 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code,
2668 ira->ira_tsl);
2669 } else {
2670 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0,
2671 ira->ira_tsl);
2672 }
2673 }
2674
2675 /*
2676 * Called as conn_recvicmp from IP for ICMP errors.
2677 */
2678 /* ARGSUSED2 */
2679 static void
iptun_input_icmp(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * ira)2680 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2681 {
2682 conn_t *connp = arg;
2683 iptun_t *iptun = connp->conn_iptun;
2684 mblk_t *tmpmp;
2685 size_t hlen;
2686
2687 ASSERT(IPCL_IS_IPTUN(connp));
2688
2689 if (mp->b_cont != NULL) {
2690 /*
2691 * Since ICMP error processing necessitates access to bits
2692 * that are within the ICMP error payload (the original packet
2693 * that caused the error), pull everything up into a single
2694 * block for convenience.
2695 */
2696 if ((tmpmp = msgpullup(mp, -1)) == NULL) {
2697 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf);
2698 return;
2699 }
2700 freemsg(mp);
2701 mp = tmpmp;
2702 }
2703
2704 hlen = ira->ira_ip_hdr_length;
2705 switch (iptun->iptun_typeinfo->iti_ipvers) {
2706 case IPV4_VERSION:
2707 /*
2708 * The outer IP header coming up from IP is always ipha_t
2709 * alligned (otherwise, we would have crashed in ip).
2710 */
2711 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen),
2712 ira);
2713 break;
2714 case IPV6_VERSION:
2715 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen),
2716 ira);
2717 break;
2718 }
2719 }
2720
2721 static boolean_t
iptun_in_6to4_ok(iptun_t * iptun,ipha_t * outer4,ip6_t * inner6)2722 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6)
2723 {
2724 ipaddr_t v4addr;
2725
2726 /*
2727 * It's possible that someone sent us an IPv4-in-IPv4 packet with the
2728 * IPv4 address of a 6to4 tunnel as the destination.
2729 */
2730 if (inner6 == NULL)
2731 return (B_FALSE);
2732
2733 /*
2734 * Make sure that the IPv6 destination is within the site that this
2735 * 6to4 tunnel is routing for. We don't want people bouncing random
2736 * tunneled IPv6 packets through this 6to4 router.
2737 */
2738 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr);
2739 if (outer4->ipha_dst != v4addr)
2740 return (B_FALSE);
2741
2742 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) {
2743 /*
2744 * Section 9 of RFC 3056 (security considerations) suggests
2745 * that when a packet is from a 6to4 site (i.e., it's not a
2746 * global address being forwarded froma relay router), make
2747 * sure that the packet was tunneled by that site's 6to4
2748 * router.
2749 */
2750 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr);
2751 if (outer4->ipha_src != v4addr)
2752 return (B_FALSE);
2753 } else {
2754 /*
2755 * Only accept packets from a relay router if we've configured
2756 * outbound relay router functionality.
2757 */
2758 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY)
2759 return (B_FALSE);
2760 }
2761
2762 return (B_TRUE);
2763 }
2764
2765 /*
2766 * Input function for everything that comes up from the ip module below us.
2767 * This is called directly from the ip module via connp->conn_recv().
2768 *
2769 * We receive M_DATA messages with IP-in-IP tunneled packets.
2770 */
2771 /* ARGSUSED2 */
2772 static void
iptun_input(void * arg,mblk_t * data_mp,void * arg2,ip_recv_attr_t * ira)2773 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira)
2774 {
2775 conn_t *connp = arg;
2776 iptun_t *iptun = connp->conn_iptun;
2777 int outer_hlen;
2778 ipha_t *outer4, *inner4;
2779 ip6_t *outer6, *inner6;
2780
2781 ASSERT(IPCL_IS_IPTUN(connp));
2782 ASSERT(DB_TYPE(data_mp) == M_DATA);
2783
2784 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length,
2785 &outer4, &inner4, &outer6, &inner6);
2786 if (outer_hlen == 0)
2787 goto drop;
2788
2789 /*
2790 * If the system is labeled, we call tsol_check_dest() on the packet
2791 * destination (our local tunnel address) to ensure that the packet as
2792 * labeled should be allowed to be sent to us. We don't need to call
2793 * the more involved tsol_receive_local() since the tunnel link itself
2794 * cannot be assigned to shared-stack non-global zones.
2795 */
2796 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2797 if (ira->ira_tsl == NULL)
2798 goto drop;
2799 if (tsol_check_dest(ira->ira_tsl, (outer4 != NULL ?
2800 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst),
2801 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION),
2802 CONN_MAC_DEFAULT, B_FALSE, NULL) != 0)
2803 goto drop;
2804 }
2805
2806 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp,
2807 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns);
2808 if (data_mp == NULL) {
2809 /* Callee did all of the freeing. */
2810 return;
2811 }
2812
2813 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 &&
2814 !iptun_in_6to4_ok(iptun, outer4, inner6))
2815 goto drop;
2816
2817 /*
2818 * We need to statistically account for each packet individually, so
2819 * we might as well split up any b_next chains here.
2820 */
2821 do {
2822 mblk_t *mp;
2823
2824 mp = data_mp->b_next;
2825 data_mp->b_next = NULL;
2826
2827 atomic_inc_64(&iptun->iptun_ipackets);
2828 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp));
2829 mac_rx(iptun->iptun_mh, NULL, data_mp);
2830
2831 data_mp = mp;
2832 } while (data_mp != NULL);
2833 return;
2834 drop:
2835 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors);
2836 }
2837
2838 /*
2839 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet
2840 * was processed without issue, or B_FALSE if the packet had issues and should
2841 * be dropped.
2842 */
2843 static boolean_t
iptun_out_process_6to4(iptun_t * iptun,ipha_t * outer4,ip6_t * inner6)2844 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6)
2845 {
2846 ipaddr_t v4addr;
2847
2848 /*
2849 * IPv6 source must be a 6to4 address. This is because a conscious
2850 * decision was made to not allow a Solaris system to be used as a
2851 * relay router (for security reasons) when 6to4 was initially
2852 * integrated. If this decision is ever reversed, the following check
2853 * can be removed.
2854 */
2855 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src))
2856 return (B_FALSE);
2857
2858 /*
2859 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4
2860 * portion of the 6to4 IPv6 source address. In other words, make sure
2861 * that we're tunneling packets from our own 6to4 site.
2862 */
2863 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr);
2864 if (outer4->ipha_src != v4addr)
2865 return (B_FALSE);
2866
2867 /*
2868 * Automatically set the destination of the outer IPv4 header as
2869 * described in RFC3056. There are two possibilities:
2870 *
2871 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address
2872 * to the IPv4 portion of the 6to4 address.
2873 * b. If the IPv6 destination is a native IPv6 address, set the IPv4
2874 * destination to the address of a relay router.
2875 *
2876 * Design Note: b shouldn't be necessary here, and this is a flaw in
2877 * the design of the 6to4relay command. Instead of setting a 6to4
2878 * relay address in this module via an ioctl, the 6to4relay command
2879 * could simply add a IPv6 route for native IPv6 addresses (such as a
2880 * default route) in the forwarding table that uses a 6to4 destination
2881 * as its next hop, and the IPv4 portion of that address could be a
2882 * 6to4 relay address. In order for this to work, IP would have to
2883 * resolve the next hop address, which would necessitate a link-layer
2884 * address resolver for 6to4 links, which doesn't exist today.
2885 *
2886 * In fact, if a resolver existed for 6to4 links, then setting the
2887 * IPv4 destination in the outer header could be done as part of
2888 * link-layer address resolution and fast-path header generation, and
2889 * not here.
2890 */
2891 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) {
2892 /* destination is a 6to4 router */
2893 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst,
2894 (struct in_addr *)&outer4->ipha_dst);
2895
2896 /* Reject attempts to send to INADDR_ANY */
2897 if (outer4->ipha_dst == INADDR_ANY)
2898 return (B_FALSE);
2899 } else {
2900 /*
2901 * The destination is a native IPv6 address. If output to a
2902 * relay-router is enabled, use the relay-router's IPv4
2903 * address as the destination.
2904 */
2905 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY)
2906 return (B_FALSE);
2907 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr;
2908 }
2909
2910 /*
2911 * If the outer source and destination are equal, this means that the
2912 * 6to4 router somehow forwarded an IPv6 packet destined for its own
2913 * 6to4 site to its 6to4 tunnel interface, which will result in this
2914 * packet infinitely bouncing between ip and iptun.
2915 */
2916 return (outer4->ipha_src != outer4->ipha_dst);
2917 }
2918
2919 /*
2920 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on
2921 * error.
2922 */
2923 static mblk_t *
iptun_out_process_ipv4(iptun_t * iptun,mblk_t * mp,ipha_t * outer4,ipha_t * inner4,ip6_t * inner6,ip_xmit_attr_t * ixa)2924 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4,
2925 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa)
2926 {
2927 uint8_t *innerptr = (inner4 != NULL ?
2928 (uint8_t *)inner4 : (uint8_t *)inner6);
2929 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu;
2930
2931 if (inner4 != NULL) {
2932 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP);
2933 /*
2934 * Copy the tos from the inner IPv4 header. We mask off ECN
2935 * bits (bits 6 and 7) because there is currently no
2936 * tunnel-tunnel communication to determine if both sides
2937 * support ECN. We opt for the safe choice: don't copy the
2938 * ECN bits when doing encapsulation.
2939 */
2940 outer4->ipha_type_of_service =
2941 inner4->ipha_type_of_service & ~0x03;
2942 } else {
2943 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 &&
2944 inner6 != NULL);
2945 }
2946 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2947 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2948 else
2949 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2950
2951 /*
2952 * As described in section 3.2.2 of RFC4213, if the packet payload is
2953 * less than or equal to the minimum MTU size, then we need to allow
2954 * IPv4 to fragment the packet. The reason is that even if we end up
2955 * receiving an ICMP frag-needed, the interface above this tunnel
2956 * won't be allowed to drop its MTU as a result, since the packet was
2957 * already smaller than the smallest allowable MTU for that interface.
2958 */
2959 if (mp->b_wptr - innerptr <= minmtu) {
2960 outer4->ipha_fragment_offset_and_flags = 0;
2961 ixa->ixa_flags &= ~IXAF_DONTFRAG;
2962 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) &&
2963 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) {
2964 ixa->ixa_flags |= IXAF_DONTFRAG;
2965 }
2966
2967 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4);
2968 ixa->ixa_pktlen = msgdsize(mp);
2969 ixa->ixa_protocol = outer4->ipha_protocol;
2970
2971 outer4->ipha_length = htons(ixa->ixa_pktlen);
2972 return (mp);
2973 }
2974
2975 /*
2976 * Insert an encapsulation limit destination option in the packet provided.
2977 * Always consumes the mp argument and returns a new mblk pointer.
2978 */
2979 static mblk_t *
iptun_insert_encaplimit(iptun_t * iptun,mblk_t * mp,ip6_t * outer6,uint8_t limit)2980 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6,
2981 uint8_t limit)
2982 {
2983 mblk_t *newmp;
2984 iptun_ipv6hdrs_t *newouter6;
2985
2986 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6);
2987 ASSERT(mp->b_cont == NULL);
2988
2989 mp->b_rptr += sizeof (ip6_t);
2990 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED);
2991 if (newmp == NULL) {
2992 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
2993 return (NULL);
2994 }
2995 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t);
2996 /* Copy the payload (Starting with the inner IPv6 header). */
2997 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp));
2998 newmp->b_wptr += MBLKL(mp);
2999 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr;
3000 /* Now copy the outer IPv6 header. */
3001 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t));
3002 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS;
3003 newouter6->it6h_encaplim = iptun_encaplim_init;
3004 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt;
3005 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit;
3006
3007 /*
3008 * The payload length will be set at the end of
3009 * iptun_out_process_ipv6().
3010 */
3011
3012 freemsg(mp);
3013 return (newmp);
3014 }
3015
3016 /*
3017 * Process output packets with outer IPv6 headers. Frees mp and bumps stats
3018 * on error.
3019 */
3020 static mblk_t *
iptun_out_process_ipv6(iptun_t * iptun,mblk_t * mp,ip6_t * outer6,ipha_t * inner4,ip6_t * inner6,ip_xmit_attr_t * ixa)3021 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6,
3022 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa)
3023 {
3024 uint8_t *innerptr = (inner4 != NULL ?
3025 (uint8_t *)inner4 : (uint8_t *)inner6);
3026 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu;
3027 uint8_t *limit, *configlimit;
3028 uint32_t offset;
3029 iptun_ipv6hdrs_t *v6hdrs;
3030
3031 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) {
3032 /*
3033 * The inner packet is an IPv6 packet which itself contains an
3034 * encapsulation limit option. The limit variable points to
3035 * the value in the embedded option. Process the
3036 * encapsulation limit option as specified in RFC 2473.
3037 *
3038 * If limit is 0, then we've exceeded the limit and we need to
3039 * send back an ICMPv6 parameter problem message.
3040 *
3041 * If limit is > 0, then we decrement it by 1 and make sure
3042 * that the encapsulation limit option in the outer header
3043 * reflects that (adding an option if one isn't already
3044 * there).
3045 */
3046 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr);
3047 if (*limit == 0) {
3048 mp->b_rptr = (uint8_t *)inner6;
3049 offset = limit - mp->b_rptr;
3050 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB,
3051 0, offset, ixa->ixa_tsl);
3052 atomic_inc_64(&iptun->iptun_noxmtbuf);
3053 return (NULL);
3054 }
3055
3056 /*
3057 * The outer header requires an encapsulation limit option.
3058 * If there isn't one already, add one.
3059 */
3060 if (iptun->iptun_encaplimit == 0) {
3061 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6,
3062 (*limit - 1))) == NULL)
3063 return (NULL);
3064 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr;
3065 } else {
3066 /*
3067 * There is an existing encapsulation limit option in
3068 * the outer header. If the inner encapsulation limit
3069 * is less than the configured encapsulation limit,
3070 * update the outer encapsulation limit to reflect
3071 * this lesser value.
3072 */
3073 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr;
3074 configlimit =
3075 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit;
3076 if ((*limit - 1) < *configlimit)
3077 *configlimit = (*limit - 1);
3078 }
3079 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t);
3080 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt;
3081 } else {
3082 ixa->ixa_ip_hdr_length = sizeof (ip6_t);
3083 ixa->ixa_protocol = outer6->ip6_nxt;
3084 }
3085 /*
3086 * See iptun_output_process_ipv4() why we allow fragmentation for
3087 * small packets
3088 */
3089 if (mp->b_wptr - innerptr <= minmtu)
3090 ixa->ixa_flags &= ~IXAF_DONTFRAG;
3091 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL))
3092 ixa->ixa_flags |= IXAF_DONTFRAG;
3093
3094 ixa->ixa_pktlen = msgdsize(mp);
3095 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t));
3096 return (mp);
3097 }
3098
3099 /*
3100 * The IP tunneling MAC-type plugins have already done most of the header
3101 * processing and validity checks. We are simply responsible for multiplexing
3102 * down to the ip module below us.
3103 */
3104 static void
iptun_output(iptun_t * iptun,mblk_t * mp)3105 iptun_output(iptun_t *iptun, mblk_t *mp)
3106 {
3107 conn_t *connp = iptun->iptun_connp;
3108 mblk_t *newmp;
3109 int error;
3110 ip_xmit_attr_t *ixa;
3111
3112 ASSERT(mp->b_datap->db_type == M_DATA);
3113
3114 if (mp->b_cont != NULL) {
3115 if ((newmp = msgpullup(mp, -1)) == NULL) {
3116 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
3117 return;
3118 }
3119 freemsg(mp);
3120 mp = newmp;
3121 }
3122
3123 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) {
3124 iptun_output_6to4(iptun, mp);
3125 return;
3126 }
3127
3128 if (is_system_labeled()) {
3129 /*
3130 * Since the label can be different meaning a potentially
3131 * different IRE,we always use a unique ip_xmit_attr_t.
3132 */
3133 ixa = conn_get_ixa_exclusive(connp);
3134 } else {
3135 /*
3136 * If no other thread is using conn_ixa this just gets a
3137 * reference to conn_ixa. Otherwise we get a safe copy of
3138 * conn_ixa.
3139 */
3140 ixa = conn_get_ixa(connp, B_FALSE);
3141 }
3142 if (ixa == NULL) {
3143 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3144 return;
3145 }
3146
3147 /*
3148 * In case we got a safe copy of conn_ixa, then we need
3149 * to fill in any pointers in it.
3150 */
3151 if (ixa->ixa_ire == NULL) {
3152 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6,
3153 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0,
3154 NULL, NULL, 0);
3155 if (error != 0) {
3156 if (ixa->ixa_ire != NULL &&
3157 (error == EHOSTUNREACH || error == ENETUNREACH)) {
3158 /*
3159 * Let conn_ip_output/ire_send_noroute return
3160 * the error and send any local ICMP error.
3161 */
3162 error = 0;
3163 } else {
3164 ixa_refrele(ixa);
3165 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3166 return;
3167 }
3168 }
3169 }
3170
3171 iptun_output_common(iptun, ixa, mp);
3172 ixa_refrele(ixa);
3173 }
3174
3175 /*
3176 * We use an ixa based on the last destination.
3177 */
3178 static void
iptun_output_6to4(iptun_t * iptun,mblk_t * mp)3179 iptun_output_6to4(iptun_t *iptun, mblk_t *mp)
3180 {
3181 conn_t *connp = iptun->iptun_connp;
3182 ipha_t *outer4, *inner4;
3183 ip6_t *outer6, *inner6;
3184 ip_xmit_attr_t *ixa;
3185 ip_xmit_attr_t *oldixa;
3186 int error;
3187 boolean_t need_connect;
3188 in6_addr_t v6dst;
3189
3190 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */
3191
3192 /* Make sure we set ipha_dst before we look at ipha_dst */
3193
3194 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6);
3195 ASSERT(outer4 != NULL);
3196 if (!iptun_out_process_6to4(iptun, outer4, inner6)) {
3197 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3198 return;
3199 }
3200
3201 if (is_system_labeled()) {
3202 /*
3203 * Since the label can be different meaning a potentially
3204 * different IRE,we always use a unique ip_xmit_attr_t.
3205 */
3206 ixa = conn_get_ixa_exclusive(connp);
3207 } else {
3208 /*
3209 * If no other thread is using conn_ixa this just gets a
3210 * reference to conn_ixa. Otherwise we get a safe copy of
3211 * conn_ixa.
3212 */
3213 ixa = conn_get_ixa(connp, B_FALSE);
3214 }
3215 if (ixa == NULL) {
3216 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3217 return;
3218 }
3219
3220 mutex_enter(&connp->conn_lock);
3221 if (connp->conn_v4lastdst == outer4->ipha_dst) {
3222 need_connect = (ixa->ixa_ire == NULL);
3223 } else {
3224 /* In case previous destination was multirt */
3225 ip_attr_newdst(ixa);
3226
3227 /*
3228 * We later update conn_ixa when we update conn_v4lastdst
3229 * which enables subsequent packets to avoid redoing
3230 * ip_attr_connect
3231 */
3232 need_connect = B_TRUE;
3233 }
3234 mutex_exit(&connp->conn_lock);
3235
3236 /*
3237 * In case we got a safe copy of conn_ixa, or otherwise we don't
3238 * have a current ixa_ire, then we need to fill in any pointers in
3239 * the ixa.
3240 */
3241 if (need_connect) {
3242 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst);
3243
3244 /* We handle IPsec in iptun_output_common */
3245 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6,
3246 &v6dst, &v6dst, 0, NULL, NULL, 0);
3247 if (error != 0) {
3248 if (ixa->ixa_ire != NULL &&
3249 (error == EHOSTUNREACH || error == ENETUNREACH)) {
3250 /*
3251 * Let conn_ip_output/ire_send_noroute return
3252 * the error and send any local ICMP error.
3253 */
3254 error = 0;
3255 } else {
3256 ixa_refrele(ixa);
3257 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3258 return;
3259 }
3260 }
3261 }
3262
3263 iptun_output_common(iptun, ixa, mp);
3264
3265 /* Atomically replace conn_ixa and conn_v4lastdst */
3266 mutex_enter(&connp->conn_lock);
3267 if (connp->conn_v4lastdst != outer4->ipha_dst) {
3268 /* Remember the dst which corresponds to conn_ixa */
3269 connp->conn_v6lastdst = v6dst;
3270 oldixa = conn_replace_ixa(connp, ixa);
3271 } else {
3272 oldixa = NULL;
3273 }
3274 mutex_exit(&connp->conn_lock);
3275 ixa_refrele(ixa);
3276 if (oldixa != NULL)
3277 ixa_refrele(oldixa);
3278 }
3279
3280 /*
3281 * Check the destination/label. Modifies *mpp by adding/removing CIPSO.
3282 *
3283 * We get the label from the message in order to honor the
3284 * ULPs/IPs choice of label. This will be NULL for forwarded
3285 * packets, neighbor discovery packets and some others.
3286 */
3287 static int
iptun_output_check_label(mblk_t ** mpp,ip_xmit_attr_t * ixa)3288 iptun_output_check_label(mblk_t **mpp, ip_xmit_attr_t *ixa)
3289 {
3290 cred_t *cr;
3291 int adjust;
3292 int iplen;
3293 int err;
3294 ts_label_t *effective_tsl = NULL;
3295
3296
3297 ASSERT(is_system_labeled());
3298
3299 cr = msg_getcred(*mpp, NULL);
3300 if (cr == NULL)
3301 return (0);
3302
3303 /*
3304 * We need to start with a label based on the IP/ULP above us
3305 */
3306 ip_xmit_attr_restore_tsl(ixa, cr);
3307
3308 /*
3309 * Need to update packet with any CIPSO option since
3310 * conn_ip_output doesn't do that.
3311 */
3312 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3313 ipha_t *ipha;
3314
3315 ipha = (ipha_t *)(*mpp)->b_rptr;
3316 iplen = ntohs(ipha->ipha_length);
3317 err = tsol_check_label_v4(ixa->ixa_tsl,
3318 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE,
3319 ixa->ixa_ipst, &effective_tsl);
3320 if (err != 0)
3321 return (err);
3322
3323 ipha = (ipha_t *)(*mpp)->b_rptr;
3324 adjust = (int)ntohs(ipha->ipha_length) - iplen;
3325 } else {
3326 ip6_t *ip6h;
3327
3328 ip6h = (ip6_t *)(*mpp)->b_rptr;
3329 iplen = ntohs(ip6h->ip6_plen);
3330
3331 err = tsol_check_label_v6(ixa->ixa_tsl,
3332 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE,
3333 ixa->ixa_ipst, &effective_tsl);
3334 if (err != 0)
3335 return (err);
3336
3337 ip6h = (ip6_t *)(*mpp)->b_rptr;
3338 adjust = (int)ntohs(ip6h->ip6_plen) - iplen;
3339 }
3340
3341 if (effective_tsl != NULL) {
3342 /* Update the label */
3343 ip_xmit_attr_replace_tsl(ixa, effective_tsl);
3344 }
3345 ixa->ixa_pktlen += adjust;
3346 ixa->ixa_ip_hdr_length += adjust;
3347 return (0);
3348 }
3349
3350
3351 static void
iptun_output_common(iptun_t * iptun,ip_xmit_attr_t * ixa,mblk_t * mp)3352 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp)
3353 {
3354 ipsec_tun_pol_t *itp = iptun->iptun_itp;
3355 int outer_hlen;
3356 mblk_t *newmp;
3357 ipha_t *outer4, *inner4;
3358 ip6_t *outer6, *inner6;
3359 int error;
3360 boolean_t update_pktlen;
3361
3362 ASSERT(ixa->ixa_ire != NULL);
3363
3364 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6,
3365 &inner6);
3366 if (outer_hlen == 0) {
3367 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3368 return;
3369 }
3370
3371 /* Save IXAF_DONTFRAG value */
3372 iaflags_t dontfrag = ixa->ixa_flags & IXAF_DONTFRAG;
3373
3374 /* Perform header processing. */
3375 if (outer4 != NULL) {
3376 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6,
3377 ixa);
3378 } else {
3379 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6,
3380 ixa);
3381 }
3382 if (mp == NULL)
3383 return;
3384
3385 /*
3386 * Let's hope the compiler optimizes this with "branch taken".
3387 */
3388 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) {
3389 /* This updates the ip_xmit_attr_t */
3390 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4,
3391 outer6, outer_hlen, ixa);
3392 if (mp == NULL) {
3393 atomic_inc_64(&iptun->iptun_oerrors);
3394 return;
3395 }
3396 if (is_system_labeled()) {
3397 /*
3398 * Might change the packet by adding/removing CIPSO.
3399 * After this caller inner* and outer* and outer_hlen
3400 * might be invalid.
3401 */
3402 error = iptun_output_check_label(&mp, ixa);
3403 if (error != 0) {
3404 ip2dbg(("label check failed (%d)\n", error));
3405 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3406 return;
3407 }
3408 }
3409
3410 /*
3411 * ipsec_tun_outbound() returns a chain of tunneled IP
3412 * fragments linked with b_next (or a single message if the
3413 * tunneled packet wasn't a fragment).
3414 * If fragcache returned a list then we need to update
3415 * ixa_pktlen for all packets in the list.
3416 */
3417 update_pktlen = (mp->b_next != NULL);
3418
3419 /*
3420 * Otherwise, we're good to go. The ixa has been updated with
3421 * instructions for outbound IPsec processing.
3422 */
3423 for (newmp = mp; newmp != NULL; newmp = mp) {
3424 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu;
3425
3426 atomic_inc_64(&iptun->iptun_opackets);
3427 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen);
3428 mp = mp->b_next;
3429 newmp->b_next = NULL;
3430
3431 /*
3432 * The IXAF_DONTFRAG flag is global, but there is
3433 * a chain here. Check if we're really already
3434 * smaller than the minimum allowed MTU and reset here
3435 * appropriately. Otherwise one small packet can kill
3436 * the whole chain's path mtu discovery.
3437 * In addition, update the pktlen to the length of
3438 * the actual packet being processed.
3439 */
3440 if (update_pktlen) {
3441 ixa->ixa_pktlen = msgdsize(newmp);
3442 if (ixa->ixa_pktlen <= minmtu)
3443 ixa->ixa_flags &= ~IXAF_DONTFRAG;
3444 }
3445
3446 atomic_inc_64(&iptun->iptun_opackets);
3447 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen);
3448
3449 error = conn_ip_output(newmp, ixa);
3450
3451 /* Restore IXAF_DONTFRAG value */
3452 ixa->ixa_flags |= dontfrag;
3453
3454 if (error == EMSGSIZE) {
3455 /* IPsec policy might have changed */
3456 (void) iptun_update_mtu(iptun, ixa, 0);
3457 }
3458 }
3459 } else {
3460 /*
3461 * The ip module will potentially apply global policy to the
3462 * packet in its output path if there's no active tunnel
3463 * policy.
3464 */
3465 ASSERT(ixa->ixa_ipsec_policy == NULL);
3466 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa);
3467 if (mp == NULL) {
3468 atomic_inc_64(&iptun->iptun_oerrors);
3469 return;
3470 }
3471 if (is_system_labeled()) {
3472 /*
3473 * Might change the packet by adding/removing CIPSO.
3474 * After this caller inner* and outer* and outer_hlen
3475 * might be invalid.
3476 */
3477 error = iptun_output_check_label(&mp, ixa);
3478 if (error != 0) {
3479 ip2dbg(("label check failed (%d)\n", error));
3480 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3481 return;
3482 }
3483 }
3484
3485 atomic_inc_64(&iptun->iptun_opackets);
3486 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen);
3487
3488 error = conn_ip_output(mp, ixa);
3489 if (error == EMSGSIZE) {
3490 /* IPsec policy might have changed */
3491 (void) iptun_update_mtu(iptun, ixa, 0);
3492 }
3493 }
3494 if (ixa->ixa_flags & IXAF_IPSEC_SECURE)
3495 ipsec_out_release_refs(ixa);
3496 }
3497
3498 static mac_callbacks_t iptun_m_callbacks = {
3499 .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO),
3500 .mc_getstat = iptun_m_getstat,
3501 .mc_start = iptun_m_start,
3502 .mc_stop = iptun_m_stop,
3503 .mc_setpromisc = iptun_m_setpromisc,
3504 .mc_multicst = iptun_m_multicst,
3505 .mc_unicst = iptun_m_unicst,
3506 .mc_tx = iptun_m_tx,
3507 .mc_reserved = NULL,
3508 .mc_setprop = iptun_m_setprop,
3509 .mc_getprop = iptun_m_getprop,
3510 .mc_propinfo = iptun_m_propinfo
3511 };
3512