1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/pattr.h>
34 #include <sys/dlpi.h>
35 #include <sys/atomic.h>
36 #include <sys/sunddi.h>
37 #include <sys/socket.h>
38 #include <sys/neti.h>
39 
40 #include <netinet/in.h>
41 #include <inet/common.h>
42 #include <inet/mib2.h>
43 #include <inet/ip.h>
44 #include <inet/ip6.h>
45 #include <inet/ip_if.h>
46 #include <inet/ip_ire.h>
47 #include <inet/ip_impl.h>
48 #include <inet/ip_ndp.h>
49 #include <inet/ipclassifier.h>
50 #include <inet/ipp_common.h>
51 #include <inet/ip_ftable.h>
52 
53 /*
54  * IPv4 netinfo entry point declarations.
55  */
56 static int 		ip_getifname(phy_if_t, char *, const size_t);
57 static int 		ip_getmtu(phy_if_t, lif_if_t);
58 static int 		ip_getpmtuenabled(void);
59 static int 		ip_getlifaddr(phy_if_t, lif_if_t, size_t,
60 			    net_ifaddr_t [], void *);
61 static phy_if_t		ip_phygetnext(phy_if_t);
62 static phy_if_t 	ip_phylookup(const char *);
63 static lif_if_t 	ip_lifgetnext(phy_if_t, lif_if_t);
64 static int 		ip_inject(inject_t, net_inject_t *);
65 static phy_if_t 	ip_routeto(struct sockaddr *);
66 static int 		ip_ispartialchecksum(mblk_t *);
67 static int 		ip_isvalidchecksum(mblk_t *);
68 
69 static int 		ipv6_getifname(phy_if_t, char *, const size_t);
70 static int 		ipv6_getmtu(phy_if_t, lif_if_t);
71 static int 		ipv6_getlifaddr(phy_if_t, lif_if_t, size_t,
72 			    net_ifaddr_t [], void *);
73 static phy_if_t 	ipv6_phygetnext(phy_if_t);
74 static phy_if_t 	ipv6_phylookup(const char *);
75 static lif_if_t 	ipv6_lifgetnext(phy_if_t, lif_if_t);
76 static int 		ipv6_inject(inject_t, net_inject_t *);
77 static phy_if_t 	ipv6_routeto(struct sockaddr *);
78 static int 		ipv6_isvalidchecksum(mblk_t *);
79 
80 /* Netinfo private functions */
81 static	int		ip_getifname_impl(phy_if_t, char *,
82 			    const size_t, boolean_t);
83 static	int		ip_getmtu_impl(phy_if_t, lif_if_t, boolean_t);
84 static	phy_if_t	ip_phylookup_impl(const char *, boolean_t);
85 static	lif_if_t	ip_lifgetnext_impl(phy_if_t, lif_if_t, boolean_t);
86 static	int		ip_inject_impl(inject_t, net_inject_t *, boolean_t);
87 static	int		ip_getifaddr_type(sa_family_t, ipif_t *, lif_if_t,
88 			    void *);
89 static	phy_if_t	ip_routeto_impl(struct sockaddr *);
90 static	int		ip_getlifaddr_impl(sa_family_t, phy_if_t, lif_if_t,
91 			    size_t, net_ifaddr_t [], struct sockaddr *);
92 static	void		ip_ni_queue_in_func(void *);
93 static	void		ip_ni_queue_out_func(void *);
94 static	void		ip_ni_queue_func_impl(injection_t *,  boolean_t);
95 
96 
97 static net_info_t ipv4info = {
98 	NETINFO_VERSION,
99 	NHF_INET,
100 	ip_getifname,
101 	ip_getmtu,
102 	ip_getpmtuenabled,
103 	ip_getlifaddr,
104 	ip_phygetnext,
105 	ip_phylookup,
106 	ip_lifgetnext,
107 	ip_inject,
108 	ip_routeto,
109 	ip_ispartialchecksum,
110 	ip_isvalidchecksum
111 };
112 
113 
114 static net_info_t ipv6info = {
115 	NETINFO_VERSION,
116 	NHF_INET6,
117 	ipv6_getifname,
118 	ipv6_getmtu,
119 	ip_getpmtuenabled,
120 	ipv6_getlifaddr,
121 	ipv6_phygetnext,
122 	ipv6_phylookup,
123 	ipv6_lifgetnext,
124 	ipv6_inject,
125 	ipv6_routeto,
126 	ip_ispartialchecksum,
127 	ipv6_isvalidchecksum
128 };
129 
130 /*
131  * The taskq eventq_queue_in is used to process the upside inject messages.
132  * The taskq eventq_queue_out is used to process the downside inject messages.
133  * The taskq eventq_queue_nic is used to process the nic event messages.
134  */
135 static ddi_taskq_t 	*eventq_queue_in = NULL;
136 static ddi_taskq_t 	*eventq_queue_out = NULL;
137 ddi_taskq_t 	*eventq_queue_nic = NULL;
138 
139 static hook_family_t	ipv4root;
140 static hook_family_t	ipv6root;
141 
142 /*
143  * Hooks for firewalling
144  */
145 hook_event_t		ip4_physical_in_event;
146 hook_event_t		ip4_physical_out_event;
147 hook_event_t		ip4_forwarding_event;
148 hook_event_t		ip4_loopback_in_event;
149 hook_event_t		ip4_loopback_out_event;
150 hook_event_t		ip4_nic_events;
151 hook_event_t		ip6_physical_in_event;
152 hook_event_t		ip6_physical_out_event;
153 hook_event_t		ip6_forwarding_event;
154 hook_event_t		ip6_loopback_in_event;
155 hook_event_t		ip6_loopback_out_event;
156 hook_event_t		ip6_nic_events;
157 
158 hook_event_token_t	ipv4firewall_physical_in;
159 hook_event_token_t	ipv4firewall_physical_out;
160 hook_event_token_t	ipv4firewall_forwarding;
161 hook_event_token_t	ipv4firewall_loopback_in;
162 hook_event_token_t	ipv4firewall_loopback_out;
163 hook_event_token_t	ipv4nicevents;
164 hook_event_token_t	ipv6firewall_physical_in;
165 hook_event_token_t	ipv6firewall_physical_out;
166 hook_event_token_t	ipv6firewall_forwarding;
167 hook_event_token_t	ipv6firewall_loopback_in;
168 hook_event_token_t	ipv6firewall_loopback_out;
169 hook_event_token_t	ipv6nicevents;
170 
171 net_data_t		ipv4 = NULL;
172 net_data_t		ipv6 = NULL;
173 
174 
175 /*
176  * Register IPv4 and IPv6 netinfo functions and initialize queues for inject.
177  */
178 void
179 ip_net_init()
180 {
181 	ipv4 = net_register(&ipv4info);
182 	ASSERT(ipv4 != NULL);
183 
184 	ipv6 = net_register(&ipv6info);
185 	ASSERT(ipv6 != NULL);
186 
187 	if (eventq_queue_out == NULL) {
188 		eventq_queue_out = ddi_taskq_create(NULL,
189 		    "IP_INJECT_QUEUE_OUT", 1, TASKQ_DEFAULTPRI, 0);
190 
191 		if (eventq_queue_out == NULL)
192 			cmn_err(CE_NOTE, "ipv4_net_init: "
193 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_OUT");
194 	}
195 
196 	if (eventq_queue_in == NULL) {
197 		eventq_queue_in = ddi_taskq_create(NULL,
198 		    "IP_INJECT_QUEUE_IN", 1, TASKQ_DEFAULTPRI, 0);
199 
200 		if (eventq_queue_in == NULL)
201 			cmn_err(CE_NOTE, "ipv4_net_init: "
202 			    "ddi_taskq_create failed for IP_INJECT_QUEUE_IN");
203 	}
204 
205 	if (eventq_queue_nic == NULL) {
206 		eventq_queue_nic = ddi_taskq_create(NULL,
207 		    "IP_NIC_EVENT_QUEUE", 1, TASKQ_DEFAULTPRI, 0);
208 
209 		if (eventq_queue_nic == NULL)
210 			cmn_err(CE_NOTE, "ipv4_net_init: "
211 			    "ddi_taskq_create failed for IP_NIC_EVENT_QUEUE");
212 	}
213 }
214 
215 /*
216  * Unregister IPv4 and IPv6 functions and inject queues
217  */
218 void
219 ip_net_destroy()
220 {
221 	if (eventq_queue_nic != NULL) {
222 		ddi_taskq_destroy(eventq_queue_nic);
223 		eventq_queue_nic = NULL;
224 	}
225 
226 	if (eventq_queue_in != NULL) {
227 		ddi_taskq_destroy(eventq_queue_in);
228 		eventq_queue_in = NULL;
229 	}
230 
231 	if (eventq_queue_out != NULL) {
232 		ddi_taskq_destroy(eventq_queue_out);
233 		eventq_queue_out = NULL;
234 	}
235 
236 	if (ipv4 != NULL) {
237 		if (net_unregister(ipv4) == 0)
238 			ipv4 = NULL;
239 	}
240 
241 	if (ipv6 != NULL) {
242 		if (net_unregister(ipv6) == 0)
243 			ipv6 = NULL;
244 	}
245 }
246 
247 /*
248  * Initialize IPv4 hooks family the event
249  */
250 void
251 ipv4_hook_init()
252 {
253 	HOOK_FAMILY_INIT(&ipv4root, Hn_IPV4);
254 	if (net_register_family(ipv4, &ipv4root) != 0) {
255 		cmn_err(CE_NOTE, "ipv4_hook_init: "
256 		    "net_register_family failed for ipv4");
257 	}
258 
259 	HOOK_EVENT_INIT(&ip4_physical_in_event, NH_PHYSICAL_IN);
260 	ipv4firewall_physical_in = net_register_event(ipv4,
261 	    &ip4_physical_in_event);
262 	if (ipv4firewall_physical_in == NULL) {
263 		cmn_err(CE_NOTE, "ipv4_hook_init: "
264 		    "net_register_event failed for ipv4/physical_in");
265 	}
266 
267 	HOOK_EVENT_INIT(&ip4_physical_out_event, NH_PHYSICAL_OUT);
268 	ipv4firewall_physical_out = net_register_event(ipv4,
269 	    &ip4_physical_out_event);
270 	if (ipv4firewall_physical_out == NULL) {
271 		cmn_err(CE_NOTE, "ipv4_hook_init: "
272 		    "net_register_event failed for ipv4/physical_out");
273 	}
274 
275 	HOOK_EVENT_INIT(&ip4_forwarding_event, NH_FORWARDING);
276 	ipv4firewall_forwarding = net_register_event(ipv4,
277 	    &ip4_forwarding_event);
278 	if (ipv4firewall_forwarding == NULL) {
279 		cmn_err(CE_NOTE, "ipv4_hook_init: "
280 		    "net_register_event failed for ipv4/forwarding");
281 	}
282 
283 	HOOK_EVENT_INIT(&ip4_loopback_in_event, NH_LOOPBACK_IN);
284 	ipv4firewall_loopback_in = net_register_event(ipv4,
285 	    &ip4_loopback_in_event);
286 	if (ipv4firewall_loopback_in == NULL) {
287 		cmn_err(CE_NOTE, "ipv4_hook_init: "
288 		    "net_register_event failed for ipv4/loopback_in");
289 	}
290 
291 	HOOK_EVENT_INIT(&ip4_loopback_out_event, NH_LOOPBACK_OUT);
292 	ipv4firewall_loopback_out = net_register_event(ipv4,
293 	    &ip4_loopback_out_event);
294 	if (ipv4firewall_loopback_out == NULL) {
295 		cmn_err(CE_NOTE, "ipv4_hook_init: "
296 		    "net_register_event failed for ipv4/loopback_out");
297 	}
298 
299 	HOOK_EVENT_INIT(&ip4_nic_events, NH_NIC_EVENTS);
300 	ip4_nic_events.he_flags = HOOK_RDONLY;
301 	ipv4nicevents = net_register_event(ipv4, &ip4_nic_events);
302 	if (ipv4nicevents == NULL) {
303 		cmn_err(CE_NOTE, "ipv4_hook_init: "
304 		    "net_register_event failed for ipv4/nic_events");
305 	}
306 }
307 
308 void
309 ipv4_hook_destroy()
310 {
311 	if (ipv4firewall_forwarding != NULL) {
312 		if (net_unregister_event(ipv4, &ip4_forwarding_event) == 0)
313 			ipv4firewall_forwarding = NULL;
314 	}
315 
316 	if (ipv4firewall_physical_in != NULL) {
317 		if (net_unregister_event(ipv4, &ip4_physical_in_event) == 0)
318 			ipv4firewall_physical_in = NULL;
319 	}
320 
321 	if (ipv4firewall_physical_out != NULL) {
322 		if (net_unregister_event(ipv4, &ip4_physical_out_event) == 0)
323 			ipv4firewall_physical_out = NULL;
324 	}
325 
326 	if (ipv4firewall_loopback_in != NULL) {
327 		if (net_unregister_event(ipv4, &ip4_loopback_in_event) == 0)
328 			ipv4firewall_loopback_in = NULL;
329 	}
330 
331 	if (ipv4firewall_loopback_out != NULL) {
332 		if (net_unregister_event(ipv4, &ip4_loopback_out_event) == 0)
333 			ipv4firewall_loopback_out = NULL;
334 	}
335 
336 	if (ipv4nicevents != NULL) {
337 		if (net_unregister_event(ipv4, &ip4_nic_events) == 0)
338 			ipv4nicevents = NULL;
339 	}
340 
341 	(void) net_unregister_family(ipv4, &ipv4root);
342 }
343 
344 /*
345  * Initialize IPv6 hooks family and event
346  */
347 void
348 ipv6_hook_init()
349 {
350 
351 	HOOK_FAMILY_INIT(&ipv6root, Hn_IPV6);
352 	if (net_register_family(ipv6, &ipv6root) != 0) {
353 		cmn_err(CE_NOTE, "ipv6_hook_init: "
354 		    "net_register_family failed for ipv6");
355 	}
356 
357 	HOOK_EVENT_INIT(&ip6_physical_in_event, NH_PHYSICAL_IN);
358 	ipv6firewall_physical_in = net_register_event(ipv6,
359 	    &ip6_physical_in_event);
360 	if (ipv6firewall_physical_in == NULL) {
361 		cmn_err(CE_NOTE, "ipv6_hook_init: "
362 		    "net_register_event failed for ipv6/physical_in");
363 	}
364 
365 	HOOK_EVENT_INIT(&ip6_physical_out_event, NH_PHYSICAL_OUT);
366 	ipv6firewall_physical_out = net_register_event(ipv6,
367 	    &ip6_physical_out_event);
368 	if (ipv6firewall_physical_out == NULL) {
369 		cmn_err(CE_NOTE, "ipv6_hook_init: "
370 		    "net_register_event failed for ipv6/physical_out");
371 	}
372 
373 	HOOK_EVENT_INIT(&ip6_forwarding_event, NH_FORWARDING);
374 	ipv6firewall_forwarding = net_register_event(ipv6,
375 	    &ip6_forwarding_event);
376 	if (ipv6firewall_forwarding == NULL) {
377 		cmn_err(CE_NOTE, "ipv6_hook_init: "
378 		    "net_register_event failed for ipv6/forwarding");
379 	}
380 
381 	HOOK_EVENT_INIT(&ip6_loopback_in_event, NH_LOOPBACK_IN);
382 	ipv6firewall_loopback_in = net_register_event(ipv6,
383 	    &ip6_loopback_in_event);
384 	if (ipv6firewall_loopback_in == NULL) {
385 		cmn_err(CE_NOTE, "ipv6_hook_init: "
386 		    "net_register_event failed for ipv6/loopback_in");
387 	}
388 
389 	HOOK_EVENT_INIT(&ip6_loopback_out_event, NH_LOOPBACK_OUT);
390 	ipv6firewall_loopback_out = net_register_event(ipv6,
391 	    &ip6_loopback_out_event);
392 	if (ipv6firewall_loopback_out == NULL) {
393 		cmn_err(CE_NOTE, "ipv6_hook_init: "
394 		    "net_register_event failed for ipv6/loopback_out");
395 	}
396 
397 	HOOK_EVENT_INIT(&ip6_nic_events, NH_NIC_EVENTS);
398 	ip6_nic_events.he_flags = HOOK_RDONLY;
399 	ipv6nicevents = net_register_event(ipv6, &ip6_nic_events);
400 	if (ipv6nicevents == NULL) {
401 		cmn_err(CE_NOTE, "ipv6_hook_init: "
402 		    "net_register_event failed for ipv6/nic_events");
403 	}
404 }
405 
406 void
407 ipv6_hook_destroy()
408 {
409 	if (ipv6firewall_forwarding != NULL) {
410 		if (net_unregister_event(ipv6, &ip6_forwarding_event) == 0)
411 			ipv6firewall_forwarding = NULL;
412 	}
413 
414 	if (ipv6firewall_physical_in != NULL) {
415 		if (net_unregister_event(ipv6, &ip6_physical_in_event) == 0)
416 			ipv6firewall_physical_in = NULL;
417 	}
418 
419 	if (ipv6firewall_physical_out != NULL) {
420 		if (net_unregister_event(ipv6, &ip6_physical_out_event) == 0)
421 			ipv6firewall_physical_out = NULL;
422 	}
423 
424 	if (ipv6firewall_loopback_in != NULL) {
425 		if (net_unregister_event(ipv6, &ip6_loopback_in_event) == 0)
426 			ipv6firewall_loopback_in = NULL;
427 	}
428 
429 	if (ipv6firewall_loopback_out != NULL) {
430 		if (net_unregister_event(ipv6, &ip6_loopback_out_event) == 0)
431 			ipv6firewall_loopback_out = NULL;
432 	}
433 
434 	if (ipv6nicevents != NULL) {
435 		if (net_unregister_event(ipv6, &ip6_nic_events) == 0)
436 			ipv6nicevents = NULL;
437 	}
438 
439 	(void) net_unregister_family(ipv6, &ipv6root);
440 }
441 
442 /*
443  * Determine the name of an IPv4 interface
444  */
445 static int
446 ip_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen)
447 {
448 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_FALSE));
449 }
450 
451 /*
452  * Determine the name of an IPv6 interface
453  */
454 static int
455 ipv6_getifname(phy_if_t phy_ifdata, char *buffer, const size_t buflen)
456 {
457 	return (ip_getifname_impl(phy_ifdata, buffer, buflen, B_TRUE));
458 }
459 
460 /*
461  * Shared implementation to determine the name of a given network interface
462  */
463 /* ARGSUSED */
464 static int
465 ip_getifname_impl(phy_if_t phy_ifdata,
466     char *buffer, const size_t buflen, boolean_t isv6)
467 {
468 	ill_t *ill;
469 
470 	ASSERT(buffer != NULL);
471 
472 	ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, NULL, NULL,
473 	    NULL, NULL);
474 	if (ill == NULL)
475 		return (1);
476 
477 	if (ill->ill_name != NULL) {
478 		(void) strlcpy(buffer, ill->ill_name, buflen);
479 		ill_refrele(ill);
480 		return (0);
481 	} else {
482 		ill_refrele(ill);
483 		return (1);
484 	}
485 
486 }
487 
488 /*
489  * Determine the MTU of an IPv4 network interface
490  */
491 static int
492 ip_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata)
493 {
494 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_FALSE));
495 }
496 
497 /*
498  * Determine the MTU of an IPv6 network interface
499  */
500 static int
501 ipv6_getmtu(phy_if_t phy_ifdata, lif_if_t ifdata)
502 {
503 	return (ip_getmtu_impl(phy_ifdata, ifdata, B_TRUE));
504 }
505 
506 /*
507  * Shared implementation to determine the MTU of a network interface
508  */
509 /* ARGSUSED */
510 static int
511 ip_getmtu_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6)
512 {
513 	lif_if_t ipifid;
514 	ipif_t *ipif;
515 	int mtu;
516 
517 	ipifid = UNMAP_IPIF_ID(ifdata);
518 
519 	ipif = ipif_getby_indexes((uint_t)phy_ifdata, (uint_t)ipifid, isv6);
520 	if (ipif == NULL)
521 		return (0);
522 
523 	mtu = ipif->ipif_mtu;
524 	ipif_refrele(ipif);
525 
526 	if (mtu == 0) {
527 		ill_t *ill;
528 
529 		if ((ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6,
530 		    NULL, NULL, NULL, NULL)) == NULL) {
531 			return (0);
532 		}
533 		mtu = ill->ill_max_frag;
534 		ill_refrele(ill);
535 	}
536 
537 	return (mtu);
538 }
539 
540 /*
541  * Determine if path MTU discovery is enabled for IP
542  */
543 static int
544 ip_getpmtuenabled(void)
545 {
546 	return (ip_path_mtu_discovery);
547 }
548 
549 /*
550  * Get next interface from the current list of IPv4 physical network interfaces
551  */
552 static phy_if_t
553 ip_phygetnext(phy_if_t phy_ifdata)
554 {
555 	return (ill_get_next_ifindex(phy_ifdata, B_FALSE));
556 }
557 
558 /*
559  * Get next interface from the current list of IPv6 physical network interfaces
560  */
561 static phy_if_t
562 ipv6_phygetnext(phy_if_t phy_ifdata)
563 {
564 	return (ill_get_next_ifindex(phy_ifdata, B_TRUE));
565 }
566 
567 /*
568  * Determine if a network interface name exists for IPv4
569  */
570 static phy_if_t
571 ip_phylookup(const char *name)
572 {
573 	return (ip_phylookup_impl(name, B_FALSE));
574 
575 }
576 
577 /*
578  * Determine if a network interface name exists for IPv6
579  */
580 static phy_if_t
581 ipv6_phylookup(const char *name)
582 {
583 	return (ip_phylookup_impl(name, B_TRUE));
584 }
585 
586 /*
587  * Implement looking up an ill_t based on the name supplied and matching
588  * it up with either IPv4 or IPv6.  ill_get_ifindex_by_name() is not used
589  * because it does not match on the address family in addition to the name.
590  */
591 static phy_if_t
592 ip_phylookup_impl(const char *name, boolean_t isv6)
593 {
594 	phy_if_t phy;
595 	ill_t *ill;
596 
597 	ill = ill_lookup_on_name((char *)name, B_FALSE, isv6, NULL, NULL,
598 	    NULL, NULL, NULL);
599 
600 	if (ill == NULL)
601 		return (0);
602 
603 	phy = ill->ill_phyint->phyint_ifindex;
604 
605 	ill_refrele(ill);
606 
607 	return (phy);
608 }
609 
610 /*
611  * Get next interface from the current list of IPv4 logical network interfaces
612  */
613 static lif_if_t
614 ip_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata)
615 {
616 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_FALSE));
617 }
618 
619 /*
620  * Get next interface from the current list of IPv6 logical network interfaces
621  */
622 static lif_if_t
623 ipv6_lifgetnext(phy_if_t phy_ifdata, lif_if_t ifdata)
624 {
625 	return (ip_lifgetnext_impl(phy_ifdata, ifdata, B_TRUE));
626 }
627 
628 /*
629  * Shared implementation to get next interface from the current list of
630  * logical network interfaces
631  */
632 static lif_if_t
633 ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6)
634 {
635 	lif_if_t newidx, oldidx;
636 	boolean_t nextok;
637 	ipif_t *ipif;
638 	ill_t *ill;
639 
640 	ill = ill_lookup_on_ifindex(phy_ifdata, isv6, NULL, NULL, NULL, NULL);
641 	if (ill == NULL)
642 		return (0);
643 
644 	if (ifdata != 0) {
645 		oldidx = UNMAP_IPIF_ID(ifdata);
646 		nextok = B_FALSE;
647 	} else {
648 		oldidx = 0;
649 		nextok = B_TRUE;
650 	}
651 
652 	mutex_enter(&ill->ill_lock);
653 	if (ill->ill_state_flags & ILL_CONDEMNED) {
654 		mutex_exit(&ill->ill_lock);
655 		ill_refrele(ill);
656 		return (0);
657 	}
658 
659 	/*
660 	 * It's safe to iterate the ill_ipif list when holding an ill_lock.
661 	 * And it's also safe to access ipif_id without ipif refhold.
662 	 * See ipif_get_id().
663 	 */
664 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
665 		if (!IPIF_CAN_LOOKUP(ipif))
666 			continue;
667 		if (nextok) {
668 			ipif_refhold_locked(ipif);
669 			break;
670 		} else if (oldidx == ipif->ipif_id) {
671 			nextok = B_TRUE;
672 		}
673 	}
674 
675 	mutex_exit(&ill->ill_lock);
676 	ill_refrele(ill);
677 
678 	if (ipif == NULL)
679 		return (0);
680 
681 	newidx = ipif->ipif_id;
682 	ipif_refrele(ipif);
683 
684 	return (MAP_IPIF_ID(newidx));
685 }
686 
687 /*
688  * Inject an IPv4 packet to or from an interface
689  */
690 static int
691 ip_inject(inject_t style, net_inject_t *packet)
692 {
693 	return (ip_inject_impl(style, packet, B_FALSE));
694 }
695 
696 
697 /*
698  * Inject an IPv6 packet to or from an interface
699  */
700 static int
701 ipv6_inject(inject_t style, net_inject_t *packet)
702 {
703 	return (ip_inject_impl(style, packet, B_TRUE));
704 }
705 
706 /*
707  * Shared implementation to inject a packet to or from an interface
708  * Return value:
709  *   0: successful
710  *  -1: memory allocation failed
711  *   1: other errors
712  */
713 static int
714 ip_inject_impl(inject_t style, net_inject_t *packet, boolean_t isv6)
715 {
716 	struct sockaddr_in6 *sin6;
717 	ddi_taskq_t *tq = NULL;
718 	void (* func)(void*);
719 	injection_t *inject;
720 	ip6_t *ip6h;
721 	ire_t *ire;
722 	mblk_t *mp;
723 
724 	ASSERT(packet != NULL);
725 	ASSERT(packet->ni_packet != NULL);
726 	ASSERT(packet->ni_packet->b_datap->db_type == M_DATA);
727 
728 	switch (style) {
729 	case NI_QUEUE_IN:
730 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
731 		if (inject == NULL)
732 			return (-1);
733 		inject->inj_data = *packet;
734 		inject->inj_isv6 = isv6;
735 		/*
736 		 * deliver up into the kernel, immitating its reception by a
737 		 * network interface, add to list and schedule timeout
738 		 */
739 		func = ip_ni_queue_in_func;
740 		tq = eventq_queue_in;
741 		break;
742 
743 	case NI_QUEUE_OUT:
744 		inject = kmem_alloc(sizeof (*inject), KM_NOSLEEP);
745 		if (inject == NULL)
746 			return (-1);
747 		inject->inj_data = *packet;
748 		inject->inj_isv6 = isv6;
749 		/*
750 		 * deliver out of the kernel, as if it were being sent via a
751 		 * raw socket so that IPFilter will see it again, add to list
752 		 * and schedule timeout
753 		 */
754 		func = ip_ni_queue_out_func;
755 		tq = eventq_queue_out;
756 		break;
757 
758 	case NI_DIRECT_OUT:
759 		/*
760 		 * Note:
761 		 * For IPv4, the code path below will be greatly simplified
762 		 * with the delivery of surya - it will become a single
763 		 * function call to X.  A follow on project is aimed to
764 		 * provide similar functionality for IPv6.
765 		 */
766 		mp = packet->ni_packet;
767 
768 		if (!isv6) {
769 			struct sockaddr *sock;
770 
771 			sock = (struct sockaddr *)&packet->ni_addr;
772 			/*
773 			 * ipfil_sendpkt was provided by surya to ease the
774 			 * problems associated with sending out a packet.
775 			 * Currently this function only supports IPv4.
776 			 */
777 			switch (ipfil_sendpkt(sock, mp, packet->ni_physical,
778 			    ALL_ZONES)) {
779 			case 0 :
780 			case EINPROGRESS:
781 				return (0);
782 			case ECOMM :
783 			case ENONET :
784 				return (1);
785 			default :
786 				return (1);
787 			}
788 			/* NOTREACHED */
789 
790 		}
791 
792 		ip6h = (ip6_t *)mp->b_rptr;
793 		sin6 = (struct sockaddr_in6 *)&packet->ni_addr;
794 		ASSERT(sin6->sin6_family == AF_INET6);
795 
796 		ire = ire_route_lookup_v6(&sin6->sin6_addr, 0, 0, 0,
797 		    NULL, NULL, ALL_ZONES, NULL,
798 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE);
799 
800 		if (ire == NULL) {
801 			ip2dbg(("ip_inject: ire_cache_lookup failed\n"));
802 			freemsg(mp);
803 			return (1);
804 		}
805 
806 		if (ire->ire_stq == NULL) {
807 			/* Send to loopback destination. */
808 			if (ire->ire_rfq == NULL) {
809 				ip2dbg(("ip_inject: bad nexthop\n"));
810 				ire_refrele(ire);
811 				freemsg(mp);
812 				return (1);
813 			}
814 			ip_wput_local_v6(ire->ire_rfq,
815 			    ire->ire_ipif->ipif_ill, ip6h, mp, ire, 0);
816 			ire_refrele(ire);
817 			return (0);
818 		}
819 
820 		mp->b_queue = ire->ire_stq;
821 
822 		if (ire->ire_nce == NULL ||
823 		    ire->ire_nce->nce_fp_mp == NULL &&
824 		    ire->ire_nce->nce_res_mp == NULL) {
825 			ip_newroute_v6(ire->ire_stq, mp,
826 			    &sin6->sin6_addr, NULL, NULL, ALL_ZONES);
827 
828 			ire_refrele(ire);
829 			return (0);
830 		} else {
831 			/* prepend L2 header for IPv6 packets. */
832 			mblk_t *llmp;
833 
834 			/*
835 			 * Lock IREs, see 6420438
836 			 */
837 			mutex_enter(&ire->ire_lock);
838 			llmp = ire->ire_nce->nce_fp_mp ?
839 			    ire->ire_nce->nce_fp_mp :
840 			    ire->ire_nce->nce_res_mp;
841 
842 			if ((mp = dupb(llmp)) == NULL &&
843 			    (mp = copyb(llmp)) == NULL) {
844 				ip2dbg(("ip_inject: llhdr failed\n"));
845 				mutex_exit(&ire->ire_lock);
846 				ire_refrele(ire);
847 				freemsg(mp);
848 				return (1);
849 			}
850 			mutex_exit(&ire->ire_lock);
851 			linkb(mp, packet->ni_packet);
852 		}
853 
854 		mp->b_queue = ire->ire_stq;
855 
856 		break;
857 	default:
858 		freemsg(packet->ni_packet);
859 		return (1);
860 	}
861 
862 	if (tq) {
863 		if (ddi_taskq_dispatch(tq, func, (void *)inject,
864 		    DDI_SLEEP) == DDI_FAILURE) {
865 			ip2dbg(("ip_inject:  ddi_taskq_dispatch failed\n"));
866 			freemsg(packet->ni_packet);
867 			return (1);
868 		}
869 	} else {
870 		putnext(ire->ire_stq, mp);
871 		ire_refrele(ire);
872 	}
873 
874 	return (0);
875 }
876 
877 /*
878  * Find the interface used for traffic to a given IPv4 address
879  */
880 static phy_if_t
881 ip_routeto(struct sockaddr *address)
882 {
883 	ASSERT(address != NULL);
884 
885 	if (address->sa_family != AF_INET)
886 		return (0);
887 	return (ip_routeto_impl(address));
888 }
889 
890 /*
891  * Find the interface used for traffic to a given IPv6 address
892  */
893 static phy_if_t
894 ipv6_routeto(struct sockaddr *address)
895 {
896 	ASSERT(address != NULL);
897 
898 	if (address->sa_family != AF_INET6)
899 		return (0);
900 	return (ip_routeto_impl(address));
901 }
902 
903 
904 /*
905  * Find the interface used for traffic to an address
906  */
907 static phy_if_t
908 ip_routeto_impl(struct sockaddr *address)
909 {
910 	ire_t *ire;
911 	ill_t *ill;
912 	phy_if_t phy_if;
913 
914 	if (address->sa_family == AF_INET6) {
915 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)address;
916 		ire = ire_route_lookup_v6(&sin6->sin6_addr, NULL,
917 		    0, 0, NULL, NULL, ALL_ZONES, NULL,
918 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE);
919 	} else {
920 		struct sockaddr_in *sin = (struct sockaddr_in *)address;
921 		ire = ire_route_lookup(sin->sin_addr.s_addr, 0,
922 		    0, 0, NULL, NULL, ALL_ZONES, NULL,
923 		    MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE);
924 	}
925 
926 	if (ire == NULL)
927 		return (0);
928 
929 	ill = ire_to_ill(ire);
930 	if (ill == NULL)
931 		return (0);
932 
933 	ASSERT(ill != NULL);
934 	phy_if = (phy_if_t)ill->ill_phyint->phyint_ifindex;
935 	ire_refrele(ire);
936 
937 	return (phy_if);
938 }
939 
940 /*
941  * Determine if checksumming is being used for the given packet.
942  *
943  * Return value:
944  *   NET_HCK_NONE: full checksum recalculation is required
945  *   NET_HCK_L3_FULL: full layer 3 checksum
946  *   NET_HCK_L4_FULL: full layer 4 checksum
947  *   NET_HCK_L4_PART: partial layer 4 checksum
948  */
949 static int
950 ip_ispartialchecksum(mblk_t *mp)
951 {
952 	int ret = 0;
953 
954 	ASSERT(mp != NULL);
955 
956 	if ((DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) != 0) {
957 		ret |= (int)NET_HCK_L4_FULL;
958 		if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) != 0)
959 			ret |= (int)NET_HCK_L3_FULL;
960 	}
961 	if ((DB_CKSUMFLAGS(mp) & HCK_PARTIALCKSUM) != 0) {
962 		ret |= (int)NET_HCK_L4_PART;
963 		if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) != 0)
964 			ret |= (int)NET_HCK_L3_FULL;
965 	}
966 
967 	return (ret);
968 }
969 
970 /*
971  * Return true or false, indicating whether the network and transport
972  * headers are correct.  Use the capabilities flags and flags set in the
973  * dblk_t to determine whether or not the checksum is valid.
974  *
975  * Return:
976  *   0: the checksum was incorrect
977  *   1: the original checksum was correct
978  */
979 static int
980 ip_isvalidchecksum(mblk_t *mp)
981 {
982 	unsigned char *wptr;
983 	ipha_t *ipha = (ipha_t *)mp->b_rptr;
984 	int hlen;
985 	int ret;
986 
987 	ASSERT(mp != NULL);
988 
989 	if (dohwcksum &&
990 	    DB_CKSUM16(mp) != 0xFFFF &&
991 	    (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM) &&
992 	    (DB_CKSUMFLAGS(mp) & HCK_FULLCKSUM_OK) &&
993 	    (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM))
994 		return (1);
995 
996 	hlen = (ipha->ipha_version_and_hdr_length & 0x0F) << 2;
997 
998 	/*
999 	 * Check that the mblk being passed in has enough data in it
1000 	 * before blindly checking ip_cksum.
1001 	 */
1002 	if (msgdsize(mp) < hlen)
1003 		return (0);
1004 
1005 	if (mp->b_wptr < mp->b_rptr + hlen) {
1006 		if (pullupmsg(mp, hlen) == 0)
1007 			return (0);
1008 		wptr = mp->b_wptr;
1009 	} else {
1010 		wptr = mp->b_wptr;
1011 		mp->b_wptr = mp->b_rptr + hlen;
1012 	}
1013 
1014 	if (ipha->ipha_hdr_checksum == ip_cksum(mp, 0, ipha->ipha_hdr_checksum))
1015 		ret = 1;
1016 	else
1017 		ret = 0;
1018 	mp->b_wptr = wptr;
1019 
1020 	return (ret);
1021 }
1022 
1023 /*
1024  * Unsupported with IPv6
1025  */
1026 /*ARGSUSED*/
1027 static int
1028 ipv6_isvalidchecksum(mblk_t *mp)
1029 {
1030 	return (-1);
1031 }
1032 
1033 /*
1034  * Determine the network addresses for an IPv4 interface
1035  */
1036 static int
1037 ip_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem,
1038 	net_ifaddr_t type[], void *storage)
1039 {
1040 	return (ip_getlifaddr_impl(AF_INET, phy_ifdata, ifdata,
1041 	    nelem, type, storage));
1042 }
1043 
1044 /*
1045  * Determine the network addresses for an IPv6 interface
1046  */
1047 static int
1048 ipv6_getlifaddr(phy_if_t phy_ifdata, lif_if_t ifdata, size_t nelem,
1049 		net_ifaddr_t type[], void *storage)
1050 {
1051 	return (ip_getlifaddr_impl(AF_INET6, phy_ifdata, ifdata,
1052 	    nelem, type, storage));
1053 }
1054 
1055 /*
1056  * Shared implementation to determine the network addresses for an interface
1057  */
1058 /* ARGSUSED */
1059 static int
1060 ip_getlifaddr_impl(sa_family_t family, phy_if_t phy_ifdata,
1061     lif_if_t ifdata, size_t nelem, net_ifaddr_t type[],
1062     struct sockaddr *storage)
1063 {
1064 	struct sockaddr_in6 *sin6;
1065 	struct sockaddr_in *sin;
1066 	lif_if_t ipifid;
1067 	ipif_t *ipif;
1068 	int i;
1069 
1070 	ASSERT(type != NULL);
1071 	ASSERT(storage != NULL);
1072 
1073 	ipifid = UNMAP_IPIF_ID(ifdata);
1074 
1075 	if (family == AF_INET) {
1076 		if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata,
1077 		    (uint_t)ipifid, B_FALSE)) == NULL)
1078 			return (1);
1079 
1080 		sin = (struct sockaddr_in *)storage;
1081 		for (i = 0; i < nelem; i++, sin++) {
1082 			if (ip_getifaddr_type(AF_INET, ipif, type[i],
1083 			    &sin->sin_addr) < 0) {
1084 				ip2dbg(("ip_getlifaddr_impl failed type %d\n",
1085 				    type[i]));
1086 				ipif_refrele(ipif);
1087 				return (1);
1088 			}
1089 		}
1090 	} else {
1091 		if ((ipif = ipif_getby_indexes((uint_t)phy_ifdata,
1092 		    (uint_t)ipifid, B_TRUE)) == NULL)
1093 			return (1);
1094 
1095 		sin6 = (struct sockaddr_in6 *)storage;
1096 		for (i = 0; i < nelem; i++, sin6++) {
1097 			if (ip_getifaddr_type(AF_INET6, ipif, type[i],
1098 			    &sin6->sin6_addr) < 0) {
1099 				ip2dbg(("ip_getlifaddr_impl failed type %d\n",
1100 				    type[i]));
1101 				ipif_refrele(ipif);
1102 				return (1);
1103 			}
1104 		}
1105 	}
1106 	ipif_refrele(ipif);
1107 	return (0);
1108 }
1109 
1110 /*
1111  * ip_getlifaddr private function
1112  */
1113 static int
1114 ip_getifaddr_type(sa_family_t family, ipif_t *ill_ipif,
1115     lif_if_t type, void *storage)
1116 {
1117 	void *src_addr;
1118 	int mem_size;
1119 
1120 	ASSERT(ill_ipif != NULL);
1121 	ASSERT(storage != NULL);
1122 
1123 	if (family == AF_INET) {
1124 		mem_size = sizeof (struct in_addr);
1125 
1126 		switch (type) {
1127 		case NA_ADDRESS:
1128 			src_addr = &(ill_ipif->ipif_lcl_addr);
1129 			break;
1130 		case NA_PEER:
1131 			src_addr = &(ill_ipif->ipif_pp_dst_addr);
1132 			break;
1133 		case NA_BROADCAST:
1134 			src_addr = &(ill_ipif->ipif_brd_addr);
1135 			break;
1136 		case NA_NETMASK:
1137 			src_addr = &(ill_ipif->ipif_net_mask);
1138 			break;
1139 		default:
1140 			return (-1);
1141 			/*NOTREACHED*/
1142 		}
1143 	} else {
1144 		mem_size = sizeof (struct in6_addr);
1145 
1146 		switch (type) {
1147 		case NA_ADDRESS:
1148 			src_addr = &(ill_ipif->ipif_v6lcl_addr);
1149 			break;
1150 		case NA_PEER:
1151 			src_addr = &(ill_ipif->ipif_v6pp_dst_addr);
1152 			break;
1153 		case NA_BROADCAST:
1154 			src_addr = &(ill_ipif->ipif_v6brd_addr);
1155 			break;
1156 		case NA_NETMASK:
1157 			src_addr = &(ill_ipif->ipif_v6net_mask);
1158 			break;
1159 		default:
1160 			return (-1);
1161 			/*NOTREACHED*/
1162 		}
1163 	}
1164 
1165 	(void) memcpy(storage, src_addr, mem_size);
1166 	return (1);
1167 }
1168 
1169 /*
1170  * Deliver packet up into the kernel, immitating its reception by a
1171  * network interface.
1172  */
1173 static void
1174 ip_ni_queue_in_func(void *inject)
1175 {
1176 	ip_ni_queue_func_impl(inject, B_FALSE);
1177 }
1178 
1179 /*
1180  * Deliver out of the kernel, as if it were being sent via a
1181  * raw socket so that IPFilter will see it again.
1182  */
1183 static void
1184 ip_ni_queue_out_func(void *inject)
1185 {
1186 	ip_ni_queue_func_impl(inject, B_TRUE);
1187 }
1188 
1189 /*
1190  * Shared implementation for inject via ip_output and ip_input
1191  */
1192 static void
1193 ip_ni_queue_func_impl(injection_t *inject,  boolean_t out)
1194 {
1195 	net_inject_t *packet;
1196 	conn_t *conn;
1197 	ill_t *ill;
1198 
1199 	ASSERT(inject != NULL);
1200 	packet = &inject->inj_data;
1201 	ASSERT(packet->ni_packet != NULL);
1202 
1203 	if ((ill = ill_lookup_on_ifindex((uint_t)packet->ni_physical,
1204 	    B_FALSE, NULL, NULL, NULL, NULL)) == NULL) {
1205 		kmem_free(inject, sizeof (*inject));
1206 		return;
1207 	}
1208 
1209 	if (out == 0) {
1210 		if (inject->inj_isv6) {
1211 			ip_rput_v6(ill->ill_rq, packet->ni_packet);
1212 		} else {
1213 			ip_input(ill, NULL, packet->ni_packet, 0);
1214 		}
1215 		kmem_free(inject, sizeof (*inject));
1216 		ill_refrele(ill);
1217 		return;
1218 	}
1219 
1220 	/*
1221 	 * Even though ipcl_conn_create requests that it be passed
1222 	 * a different value for "TCP", in this case there may not
1223 	 * be a TCP connection backing the packet and more than
1224 	 * likely, non-TCP packets will go here too.
1225 	 */
1226 	conn = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP);
1227 	if (conn != NULL) {
1228 		if (inject->inj_isv6) {
1229 			conn->conn_flags |= IPCL_ISV6;
1230 			conn->conn_af_isv6 = B_TRUE;
1231 			conn->conn_src_preferences = IPV6_PREFER_SRC_DEFAULT;
1232 			conn->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1233 			ip_output_v6(conn, packet->ni_packet, ill->ill_wq,
1234 				IP_WPUT);
1235 		} else {
1236 			conn->conn_af_isv6 = B_FALSE;
1237 			conn->conn_pkt_isv6 = B_FALSE;
1238 			conn->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1239 			ip_output(conn, packet->ni_packet, ill->ill_wq,
1240 				IP_WPUT);
1241 		}
1242 
1243 		CONN_DEC_REF(conn);
1244 	}
1245 
1246 	kmem_free(inject, sizeof (*inject));
1247 	ill_refrele(ill);
1248 }
1249 
1250 /*
1251  * taskq function for nic events.
1252  */
1253 void
1254 ip_ne_queue_func(void *arg)
1255 {
1256 	hook_event_int_t *hr;
1257 	hook_nic_event_t *info = (hook_nic_event_t *)arg;
1258 
1259 	hr = (info->hne_family == ipv6) ? ipv6nicevents : ipv4nicevents;
1260 	(void) hook_run(hr, (hook_data_t)info);
1261 
1262 	if (info->hne_data != NULL)
1263 		kmem_free(info->hne_data, info->hne_datalen);
1264 	kmem_free(arg, sizeof (hook_nic_event_t));
1265 }
1266