1 /*
2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7  */
8 
9 #if !defined(lint)
10 static const char sccsid[] = "@(#)ip_fil_solaris.c	1.7 07/22/06 (C) 1993-2000 Darren Reed";
11 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
12 #endif
13 
14 #include <sys/types.h>
15 #include <sys/errno.h>
16 #include <sys/param.h>
17 #include <sys/cpuvar.h>
18 #include <sys/open.h>
19 #include <sys/ioctl.h>
20 #include <sys/filio.h>
21 #include <sys/systm.h>
22 #include <sys/strsubr.h>
23 #include <sys/cred.h>
24 #include <sys/ddi.h>
25 #include <sys/sunddi.h>
26 #include <sys/ksynch.h>
27 #include <sys/kmem.h>
28 #include <sys/mkdev.h>
29 #include <sys/protosw.h>
30 #include <sys/socket.h>
31 #include <sys/dditypes.h>
32 #include <sys/cmn_err.h>
33 #include <sys/zone.h>
34 #include <net/if.h>
35 #include <net/af.h>
36 #include <net/route.h>
37 #include <netinet/in.h>
38 #include <netinet/in_systm.h>
39 #include <netinet/ip.h>
40 #include <netinet/ip_var.h>
41 #include <netinet/tcp.h>
42 #include <netinet/udp.h>
43 #include <netinet/tcpip.h>
44 #include <netinet/ip_icmp.h>
45 #include "netinet/ip_compat.h"
46 #ifdef	USE_INET6
47 # include <netinet/icmp6.h>
48 #endif
49 #include "netinet/ip_fil.h"
50 #include "netinet/ip_nat.h"
51 #include "netinet/ip_frag.h"
52 #include "netinet/ip_state.h"
53 #include "netinet/ip_auth.h"
54 #include "netinet/ip_proxy.h"
55 #include "netinet/ipf_stack.h"
56 #ifdef	IPFILTER_LOOKUP
57 # include "netinet/ip_lookup.h"
58 #endif
59 #include <inet/ip_ire.h>
60 
61 #include <sys/md5.h>
62 #include <sys/neti.h>
63 
64 static	int	frzerostats __P((caddr_t, ipf_stack_t *));
65 static	int	fr_setipfloopback __P((int, ipf_stack_t *));
66 static	int	fr_enableipf __P((ipf_stack_t *, int));
67 static	int	fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
68 static	int	ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
69 static	int	ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
70 static	int	ipf_hook __P((hook_data_t, int, int, void *));
71 static	int	ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
72 static	int	ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
73 static	int	ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
74     void *));
75 static	int	ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
76 static	int	ipf_hook4 __P((hook_data_t, int, int, void *));
77 static	int	ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
78 static	int	ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
79 static	int	ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
80     void *));
81 static	int	ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
82     void *));
83 static	int     ipf_hook6 __P((hook_data_t, int, int, void *));
84 extern	int	ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
85 extern	int	ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
86 
87 #if SOLARIS2 < 10
88 #if SOLARIS2 >= 7
89 u_int		*ip_ttl_ptr = NULL;
90 u_int		*ip_mtudisc = NULL;
91 # if SOLARIS2 >= 8
92 int		*ip_forwarding = NULL;
93 u_int		*ip6_forwarding = NULL;
94 # else
95 u_int		*ip_forwarding = NULL;
96 # endif
97 #else
98 u_long		*ip_ttl_ptr = NULL;
99 u_long		*ip_mtudisc = NULL;
100 u_long		*ip_forwarding = NULL;
101 #endif
102 #endif
103 
104 
105 /* ------------------------------------------------------------------------ */
106 /* Function:    ipldetach                                                   */
107 /* Returns:     int - 0 == success, else error.                             */
108 /* Parameters:  Nil                                                         */
109 /*                                                                          */
110 /* This function is responsible for undoing anything that might have been   */
111 /* done in a call to iplattach().  It must be able to clean up from a call  */
112 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
113 /* configures a table to be so large that we cannot allocate enough memory  */
114 /* for it.                                                                  */
115 /* ------------------------------------------------------------------------ */
116 int ipldetach(ifs)
117 ipf_stack_t *ifs;
118 {
119 
120 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
121 
122 #if SOLARIS2 < 10
123 
124 	if (ifs->ifs_fr_control_forwarding & 2) {
125 		if (ip_forwarding != NULL)
126 			*ip_forwarding = 0;
127 #if SOLARIS2 >= 8
128 		if (ip6_forwarding != NULL)
129 			*ip6_forwarding = 0;
130 #endif
131 	}
132 #endif
133 
134 	/*
135 	 * This lock needs to be dropped around the net_hook_unregister calls
136 	 * because we can deadlock here with:
137 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
138 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
139 	 */
140 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
141 
142 #define	UNDO_HOOK(_f, _b, _e, _h)					\
143 	do {								\
144 		if (ifs->_f != NULL) {					\
145 			if (ifs->_b) {					\
146 				int tmp = net_hook_unregister(ifs->_f,	\
147 					   _e, ifs->_h);		\
148 				ifs->_b = (tmp != 0 && tmp != ENXIO);	\
149 				if (!ifs->_b && ifs->_h != NULL) {	\
150 					hook_free(ifs->_h);		\
151 					ifs->_h = NULL;			\
152 				}					\
153 			} else if (ifs->_h != NULL) {			\
154 				hook_free(ifs->_h);			\
155 				ifs->_h = NULL;				\
156 			}						\
157 		}							\
158 		_NOTE(CONSTCOND)					\
159 	} while (0)
160 
161 	/*
162 	 * Remove IPv6 Hooks
163 	 */
164 	if (ifs->ifs_ipf_ipv6 != NULL) {
165 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
166 			  NH_PHYSICAL_IN, ifs_ipfhook6_in);
167 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
168 			  NH_PHYSICAL_OUT, ifs_ipfhook6_out);
169 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
170 			  NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
171 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
172 			  NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
173 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
174 			  NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
175 
176 		if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
177 			goto detach_failed;
178 		ifs->ifs_ipf_ipv6 = NULL;
179         }
180 
181 	/*
182 	 * Remove IPv4 Hooks
183 	 */
184 	if (ifs->ifs_ipf_ipv4 != NULL) {
185 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
186 			  NH_PHYSICAL_IN, ifs_ipfhook4_in);
187 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
188 			  NH_PHYSICAL_OUT, ifs_ipfhook4_out);
189 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
190 			  NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
191 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
192 			  NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
193 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
194 			  NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
195 
196 		if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
197 			goto detach_failed;
198 		ifs->ifs_ipf_ipv4 = NULL;
199 	}
200 
201 #undef UNDO_HOOK
202 
203 #ifdef	IPFDEBUG
204 	cmn_err(CE_CONT, "ipldetach()\n");
205 #endif
206 
207 	WRITE_ENTER(&ifs->ifs_ipf_global);
208 	fr_deinitialise(ifs);
209 
210 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
211 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
212 
213 	if (ifs->ifs_ipf_locks_done == 1) {
214 		MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
215 		MUTEX_DESTROY(&ifs->ifs_ipf_rw);
216 		RW_DESTROY(&ifs->ifs_ipf_tokens);
217 		RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
218 		ifs->ifs_ipf_locks_done = 0;
219 	}
220 
221 	if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
222 	    ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
223 	    ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
224 	    ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
225 	    ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
226 		return -1;
227 
228 	return 0;
229 
230 detach_failed:
231 	WRITE_ENTER(&ifs->ifs_ipf_global);
232 	return -1;
233 }
234 
235 int iplattach(ifs)
236 ipf_stack_t *ifs;
237 {
238 #if SOLARIS2 < 10
239 	int i;
240 #endif
241 	netid_t id = ifs->ifs_netid;
242 
243 #ifdef	IPFDEBUG
244 	cmn_err(CE_CONT, "iplattach()\n");
245 #endif
246 
247 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
248 	ifs->ifs_fr_flags = IPF_LOGGING;
249 #ifdef _KERNEL
250 	ifs->ifs_fr_update_ipid = 0;
251 #else
252 	ifs->ifs_fr_update_ipid = 1;
253 #endif
254 	ifs->ifs_fr_minttl = 4;
255 	ifs->ifs_fr_icmpminfragmtu = 68;
256 #if defined(IPFILTER_DEFAULT_BLOCK)
257 	ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
258 #else
259 	ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
260 #endif
261 
262 	bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
263 	MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
264 	MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
265 	RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
266 	RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
267 	ifs->ifs_ipf_locks_done = 1;
268 
269 	if (fr_initialise(ifs) < 0)
270 		return -1;
271 
272 	HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
273 		  "ipfilter_hook4_nicevents", ifs);
274 	HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
275 		  "ipfilter_hook4_in", ifs);
276 	HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
277 		  "ipfilter_hook4_out", ifs);
278 	HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
279 		  "ipfilter_hook4_loop_in", ifs);
280 	HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
281 		  "ipfilter_hook4_loop_out", ifs);
282 
283 	/*
284 	 * If we hold this lock over all of the net_hook_register calls, we
285 	 * can cause a deadlock to occur with the following lock ordering:
286 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
287 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
288 	 */
289 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
290 
291 	/*
292 	 * Add IPv4 hooks
293 	 */
294 	ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
295 	if (ifs->ifs_ipf_ipv4 == NULL)
296 		goto hookup_failed;
297 
298 	ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
299 	    NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
300 	if (!ifs->ifs_hook4_nic_events)
301 		goto hookup_failed;
302 
303 	ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
304 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
305 	if (!ifs->ifs_hook4_physical_in)
306 		goto hookup_failed;
307 
308 	ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
309 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
310 	if (!ifs->ifs_hook4_physical_out)
311 		goto hookup_failed;
312 
313 	if (ifs->ifs_ipf_loopback) {
314 		ifs->ifs_hook4_loopback_in = (net_hook_register(
315 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
316 		    ifs->ifs_ipfhook4_loop_in) == 0);
317 		if (!ifs->ifs_hook4_loopback_in)
318 			goto hookup_failed;
319 
320 		ifs->ifs_hook4_loopback_out = (net_hook_register(
321 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
322 		    ifs->ifs_ipfhook4_loop_out) == 0);
323 		if (!ifs->ifs_hook4_loopback_out)
324 			goto hookup_failed;
325 	}
326 	/*
327 	 * Add IPv6 hooks
328 	 */
329 	ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
330 	if (ifs->ifs_ipf_ipv6 == NULL)
331 		goto hookup_failed;
332 
333 	HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
334 		  "ipfilter_hook6_nicevents", ifs);
335 	HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
336 		  "ipfilter_hook6_in", ifs);
337 	HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
338 		  "ipfilter_hook6_out", ifs);
339 	HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
340 		  "ipfilter_hook6_loop_in", ifs);
341 	HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
342 		  "ipfilter_hook6_loop_out", ifs);
343 
344 	ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
345 	    NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
346 	if (!ifs->ifs_hook6_nic_events)
347 		goto hookup_failed;
348 
349 	ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
350 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
351 	if (!ifs->ifs_hook6_physical_in)
352 		goto hookup_failed;
353 
354 	ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
355 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
356 	if (!ifs->ifs_hook6_physical_out)
357 		goto hookup_failed;
358 
359 	if (ifs->ifs_ipf_loopback) {
360 		ifs->ifs_hook6_loopback_in = (net_hook_register(
361 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
362 		    ifs->ifs_ipfhook6_loop_in) == 0);
363 		if (!ifs->ifs_hook6_loopback_in)
364 			goto hookup_failed;
365 
366 		ifs->ifs_hook6_loopback_out = (net_hook_register(
367 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
368 		    ifs->ifs_ipfhook6_loop_out) == 0);
369 		if (!ifs->ifs_hook6_loopback_out)
370 			goto hookup_failed;
371 	}
372 
373 	/*
374 	 * Reacquire ipf_global, now it is safe.
375 	 */
376 	WRITE_ENTER(&ifs->ifs_ipf_global);
377 
378 /* Do not use private interface ip_params_arr[] in Solaris 10 */
379 #if SOLARIS2 < 10
380 
381 #if SOLARIS2 >= 8
382 	ip_forwarding = &ip_g_forward;
383 #endif
384 	/*
385 	 * XXX - There is no terminator for this array, so it is not possible
386 	 * to tell if what we are looking for is missing and go off the end
387 	 * of the array.
388 	 */
389 
390 #if SOLARIS2 <= 8
391 	for (i = 0; ; i++) {
392 		if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
393 			ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
394 		} else if (!strcmp(ip_param_arr[i].ip_param_name,
395 			    "ip_path_mtu_discovery")) {
396 			ip_mtudisc = &ip_param_arr[i].ip_param_value;
397 		}
398 #if SOLARIS2 < 8
399 		else if (!strcmp(ip_param_arr[i].ip_param_name,
400 			    "ip_forwarding")) {
401 			ip_forwarding = &ip_param_arr[i].ip_param_value;
402 		}
403 #else
404 		else if (!strcmp(ip_param_arr[i].ip_param_name,
405 			    "ip6_forwarding")) {
406 			ip6_forwarding = &ip_param_arr[i].ip_param_value;
407 		}
408 #endif
409 
410 		if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
411 #if SOLARIS2 >= 8
412 		    ip6_forwarding != NULL &&
413 #endif
414 		    ip_forwarding != NULL)
415 			break;
416 	}
417 #endif
418 
419 	if (ifs->ifs_fr_control_forwarding & 1) {
420 		if (ip_forwarding != NULL)
421 			*ip_forwarding = 1;
422 #if SOLARIS2 >= 8
423 		if (ip6_forwarding != NULL)
424 			*ip6_forwarding = 1;
425 #endif
426 	}
427 
428 #endif
429 
430 	return 0;
431 hookup_failed:
432 	WRITE_ENTER(&ifs->ifs_ipf_global);
433 	return -1;
434 }
435 
436 static	int	fr_setipfloopback(set, ifs)
437 int set;
438 ipf_stack_t *ifs;
439 {
440 	if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
441 		return EFAULT;
442 
443 	if (set && !ifs->ifs_ipf_loopback) {
444 		ifs->ifs_ipf_loopback = 1;
445 
446 		ifs->ifs_hook4_loopback_in = (net_hook_register(
447 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
448 		    ifs->ifs_ipfhook4_loop_in) == 0);
449 		if (!ifs->ifs_hook4_loopback_in)
450 			return EINVAL;
451 
452 		ifs->ifs_hook4_loopback_out = (net_hook_register(
453 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
454 		    ifs->ifs_ipfhook4_loop_out) == 0);
455 		if (!ifs->ifs_hook4_loopback_out)
456 			return EINVAL;
457 
458 		ifs->ifs_hook6_loopback_in = (net_hook_register(
459 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
460 		    ifs->ifs_ipfhook6_loop_in) == 0);
461 		if (!ifs->ifs_hook6_loopback_in)
462 			return EINVAL;
463 
464 		ifs->ifs_hook6_loopback_out = (net_hook_register(
465 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
466 		    ifs->ifs_ipfhook6_loop_out) == 0);
467 		if (!ifs->ifs_hook6_loopback_out)
468 			return EINVAL;
469 
470 	} else if (!set && ifs->ifs_ipf_loopback) {
471 		ifs->ifs_ipf_loopback = 0;
472 
473 		ifs->ifs_hook4_loopback_in =
474 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
475 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
476 		if (ifs->ifs_hook4_loopback_in)
477 			return EBUSY;
478 
479 		ifs->ifs_hook4_loopback_out =
480 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
481 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
482 		if (ifs->ifs_hook4_loopback_out)
483 			return EBUSY;
484 
485 		ifs->ifs_hook6_loopback_in =
486 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
487 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
488 		if (ifs->ifs_hook6_loopback_in)
489 			return EBUSY;
490 
491 		ifs->ifs_hook6_loopback_out =
492 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
493 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
494 		if (ifs->ifs_hook6_loopback_out)
495 			return EBUSY;
496 	}
497 	return 0;
498 }
499 
500 
501 /*
502  * Filter ioctl interface.
503  */
504 /*ARGSUSED*/
505 int iplioctl(dev, cmd, data, mode, cp, rp)
506 dev_t dev;
507 int cmd;
508 #if SOLARIS2 >= 7
509 intptr_t data;
510 #else
511 int *data;
512 #endif
513 int mode;
514 cred_t *cp;
515 int *rp;
516 {
517 	int error = 0, tmp;
518 	friostat_t fio;
519 	minor_t unit;
520 	u_int enable;
521 	ipf_stack_t *ifs;
522 
523 #ifdef	IPFDEBUG
524 	cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
525 		dev, cmd, data, mode, cp, rp);
526 #endif
527 	unit = getminor(dev);
528 	if (IPL_LOGMAX < unit)
529 		return ENXIO;
530 
531         /*
532 	 * As we're calling ipf_find_stack in user space, from a given zone
533 	 * to find the stack pointer for this zone, there is no need to have
534 	 * a hold/refence count here.
535 	 */
536 	ifs = ipf_find_stack(crgetzoneid(cp));
537 	ASSERT(ifs != NULL);
538 
539 	if (ifs->ifs_fr_running <= 0) {
540 		if (unit != IPL_LOGIPF) {
541 			return EIO;
542 		}
543 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
544 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
545 		    cmd != SIOCGETFS && cmd != SIOCGETFF) {
546 			return EIO;
547 		}
548 	}
549 
550 	READ_ENTER(&ifs->ifs_ipf_global);
551 	if (ifs->ifs_fr_enable_active != 0) {
552 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
553 		return EBUSY;
554 	}
555 
556 	error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
557 			       curproc, ifs);
558 	if (error != -1) {
559 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
560 		return error;
561 	}
562 	error = 0;
563 
564 	switch (cmd)
565 	{
566 	case SIOCFRENB :
567 		if (!(mode & FWRITE))
568 			error = EPERM;
569 		else {
570 			error = COPYIN((caddr_t)data, (caddr_t)&enable,
571 				       sizeof(enable));
572 			if (error != 0) {
573 				error = EFAULT;
574 				break;
575 			}
576 
577 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
578 			WRITE_ENTER(&ifs->ifs_ipf_global);
579 
580 			/*
581 			 * We must recheck fr_enable_active here, since we've
582 			 * dropped ifs_ipf_global from R in order to get it
583 			 * exclusively.
584 			 */
585 			if (ifs->ifs_fr_enable_active == 0) {
586 				ifs->ifs_fr_enable_active = 1;
587 				error = fr_enableipf(ifs, enable);
588 				ifs->ifs_fr_enable_active = 0;
589 			}
590 		}
591 		break;
592 	case SIOCIPFSET :
593 		if (!(mode & FWRITE)) {
594 			error = EPERM;
595 			break;
596 		}
597 		/* FALLTHRU */
598 	case SIOCIPFGETNEXT :
599 	case SIOCIPFGET :
600 		error = fr_ipftune(cmd, (void *)data, ifs);
601 		break;
602 	case SIOCSETFF :
603 		if (!(mode & FWRITE))
604 			error = EPERM;
605 		else {
606 			error = COPYIN((caddr_t)data,
607 				       (caddr_t)&ifs->ifs_fr_flags,
608 				       sizeof(ifs->ifs_fr_flags));
609 			if (error != 0)
610 				error = EFAULT;
611 		}
612 		break;
613 	case SIOCIPFLP :
614 		error = COPYIN((caddr_t)data, (caddr_t)&tmp,
615 			       sizeof(tmp));
616 		if (error != 0)
617 			error = EFAULT;
618 		else
619 			error = fr_setipfloopback(tmp, ifs);
620 		break;
621 	case SIOCGETFF :
622 		error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
623 				sizeof(ifs->ifs_fr_flags));
624 		if (error != 0)
625 			error = EFAULT;
626 		break;
627 	case SIOCFUNCL :
628 		error = fr_resolvefunc((void *)data);
629 		break;
630 	case SIOCINAFR :
631 	case SIOCRMAFR :
632 	case SIOCADAFR :
633 	case SIOCZRLST :
634 		if (!(mode & FWRITE))
635 			error = EPERM;
636 		else
637 			error = frrequest(unit, cmd, (caddr_t)data,
638 					  ifs->ifs_fr_active, 1, ifs);
639 		break;
640 	case SIOCINIFR :
641 	case SIOCRMIFR :
642 	case SIOCADIFR :
643 		if (!(mode & FWRITE))
644 			error = EPERM;
645 		else
646 			error = frrequest(unit, cmd, (caddr_t)data,
647 					  1 - ifs->ifs_fr_active, 1, ifs);
648 		break;
649 	case SIOCSWAPA :
650 		if (!(mode & FWRITE))
651 			error = EPERM;
652 		else {
653 			WRITE_ENTER(&ifs->ifs_ipf_mutex);
654 			bzero((char *)ifs->ifs_frcache,
655 			    sizeof (ifs->ifs_frcache));
656 			error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
657 					(caddr_t)data,
658 					sizeof(ifs->ifs_fr_active));
659 			if (error != 0)
660 				error = EFAULT;
661 			else
662 				ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
663 			RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
664 		}
665 		break;
666 	case SIOCGETFS :
667 		fr_getstat(&fio, ifs);
668 		error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
669 		break;
670 	case SIOCFRZST :
671 		if (!(mode & FWRITE))
672 			error = EPERM;
673 		else
674 			error = fr_zerostats((caddr_t)data, ifs);
675 		break;
676 	case	SIOCIPFFL :
677 		if (!(mode & FWRITE))
678 			error = EPERM;
679 		else {
680 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
681 				       sizeof(tmp));
682 			if (!error) {
683 				tmp = frflush(unit, 4, tmp, ifs);
684 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
685 						sizeof(tmp));
686 				if (error != 0)
687 					error = EFAULT;
688 			} else
689 				error = EFAULT;
690 		}
691 		break;
692 #ifdef USE_INET6
693 	case	SIOCIPFL6 :
694 		if (!(mode & FWRITE))
695 			error = EPERM;
696 		else {
697 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
698 				       sizeof(tmp));
699 			if (!error) {
700 				tmp = frflush(unit, 6, tmp, ifs);
701 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
702 						sizeof(tmp));
703 				if (error != 0)
704 					error = EFAULT;
705 			} else
706 				error = EFAULT;
707 		}
708 		break;
709 #endif
710 	case SIOCSTLCK :
711 		error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
712 		if (error == 0) {
713 			ifs->ifs_fr_state_lock = tmp;
714 			ifs->ifs_fr_nat_lock = tmp;
715 			ifs->ifs_fr_frag_lock = tmp;
716 			ifs->ifs_fr_auth_lock = tmp;
717 		} else
718 			error = EFAULT;
719 	break;
720 #ifdef	IPFILTER_LOG
721 	case	SIOCIPFFB :
722 		if (!(mode & FWRITE))
723 			error = EPERM;
724 		else {
725 			tmp = ipflog_clear(unit, ifs);
726 			error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
727 				       sizeof(tmp));
728 			if (error)
729 				error = EFAULT;
730 		}
731 		break;
732 #endif /* IPFILTER_LOG */
733 	case SIOCFRSYN :
734 		if (!(mode & FWRITE))
735 			error = EPERM;
736 		else {
737 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
738 			WRITE_ENTER(&ifs->ifs_ipf_global);
739 
740 			frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
741 			fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
742 			fr_nataddrsync(0, NULL, NULL, ifs);
743 			fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
744 			error = 0;
745 		}
746 		break;
747 	case SIOCGFRST :
748 		error = fr_outobj((void *)data, fr_fragstats(ifs),
749 				  IPFOBJ_FRAGSTAT);
750 		break;
751 	case FIONREAD :
752 #ifdef	IPFILTER_LOG
753 		tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
754 
755 		error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
756 		if (error != 0)
757 			error = EFAULT;
758 #endif
759 		break;
760 	case SIOCIPFITER :
761 		error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
762 				       curproc, ifs);
763 		break;
764 
765 	case SIOCGENITER :
766 		error = ipf_genericiter((caddr_t)data, crgetuid(cp),
767 					curproc, ifs);
768 		break;
769 
770 	case SIOCIPFDELTOK :
771 		error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
772 		if (error != 0) {
773 			error = EFAULT;
774 		} else {
775 			error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
776 		}
777 		break;
778 
779 	default :
780 #ifdef	IPFDEBUG
781 		cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
782 			cmd, (void *)data);
783 #endif
784 		error = EINVAL;
785 		break;
786 	}
787 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
788 	return error;
789 }
790 
791 
792 static int fr_enableipf(ifs, enable)
793 ipf_stack_t *ifs;
794 int enable;
795 {
796 	int error;
797 
798 	if (!enable) {
799 		error = ipldetach(ifs);
800 		if (error == 0)
801 			ifs->ifs_fr_running = -1;
802 		return error;
803 	}
804 
805 	if (ifs->ifs_fr_running > 0)
806 		return 0;
807 
808 	error = iplattach(ifs);
809 	if (error == 0) {
810 		if (ifs->ifs_fr_timer_id == NULL) {
811 			int hz = drv_usectohz(500000);
812 
813 			ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
814 						       (void *)ifs,
815 						       hz);
816 		}
817 		ifs->ifs_fr_running = 1;
818 	} else {
819 		(void) ipldetach(ifs);
820 	}
821 	return error;
822 }
823 
824 
825 phy_if_t get_unit(name, v, ifs)
826 char *name;
827 int v;
828 ipf_stack_t *ifs;
829 {
830 	net_handle_t nif;
831 
832   	if (v == 4)
833  		nif = ifs->ifs_ipf_ipv4;
834   	else if (v == 6)
835  		nif = ifs->ifs_ipf_ipv6;
836   	else
837  		return 0;
838 
839  	return (net_phylookup(nif, name));
840 }
841 
842 /*
843  * routines below for saving IP headers to buffer
844  */
845 /*ARGSUSED*/
846 int iplopen(devp, flags, otype, cred)
847 dev_t *devp;
848 int flags, otype;
849 cred_t *cred;
850 {
851 	minor_t min = getminor(*devp);
852 
853 #ifdef	IPFDEBUG
854 	cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
855 #endif
856 	if (!(otype & OTYP_CHR))
857 		return ENXIO;
858 
859 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
860 	return min;
861 }
862 
863 
864 /*ARGSUSED*/
865 int iplclose(dev, flags, otype, cred)
866 dev_t dev;
867 int flags, otype;
868 cred_t *cred;
869 {
870 	minor_t	min = getminor(dev);
871 
872 #ifdef	IPFDEBUG
873 	cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
874 #endif
875 
876 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
877 	return min;
878 }
879 
880 #ifdef	IPFILTER_LOG
881 /*
882  * iplread/ipllog
883  * both of these must operate with at least splnet() lest they be
884  * called during packet processing and cause an inconsistancy to appear in
885  * the filter lists.
886  */
887 /*ARGSUSED*/
888 int iplread(dev, uio, cp)
889 dev_t dev;
890 register struct uio *uio;
891 cred_t *cp;
892 {
893 	ipf_stack_t *ifs;
894 	int ret;
895 
896         /*
897 	 * As we're calling ipf_find_stack in user space, from a given zone
898 	 * to find the stack pointer for this zone, there is no need to have
899 	 * a hold/refence count here.
900 	 */
901 	ifs = ipf_find_stack(crgetzoneid(cp));
902 	ASSERT(ifs != NULL);
903 
904 # ifdef	IPFDEBUG
905 	cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
906 # endif
907 
908 	if (ifs->ifs_fr_running < 1) {
909 		return EIO;
910 	}
911 
912 # ifdef	IPFILTER_SYNC
913 	if (getminor(dev) == IPL_LOGSYNC) {
914 		return ipfsync_read(uio);
915 	}
916 # endif
917 
918 	ret = ipflog_read(getminor(dev), uio, ifs);
919 	return ret;
920 }
921 #endif /* IPFILTER_LOG */
922 
923 
924 /*
925  * iplread/ipllog
926  * both of these must operate with at least splnet() lest they be
927  * called during packet processing and cause an inconsistancy to appear in
928  * the filter lists.
929  */
930 int iplwrite(dev, uio, cp)
931 dev_t dev;
932 register struct uio *uio;
933 cred_t *cp;
934 {
935 	ipf_stack_t *ifs;
936 
937         /*
938 	 * As we're calling ipf_find_stack in user space, from a given zone
939 	 * to find the stack pointer for this zone, there is no need to have
940 	 * a hold/refence count here.
941 	 */
942 	ifs = ipf_find_stack(crgetzoneid(cp));
943 	ASSERT(ifs != NULL);
944 
945 #ifdef	IPFDEBUG
946 	cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
947 #endif
948 
949 	if (ifs->ifs_fr_running < 1) {
950 		return EIO;
951 	}
952 
953 #ifdef	IPFILTER_SYNC
954 	if (getminor(dev) == IPL_LOGSYNC)
955 		return ipfsync_write(uio);
956 #endif /* IPFILTER_SYNC */
957 	dev = dev;	/* LINT */
958 	uio = uio;	/* LINT */
959 	cp = cp;	/* LINT */
960 	return ENXIO;
961 }
962 
963 
964 /*
965  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
966  * requires a large amount of setting up and isn't any more efficient.
967  */
968 int fr_send_reset(fin)
969 fr_info_t *fin;
970 {
971 	tcphdr_t *tcp, *tcp2;
972 	int tlen, hlen;
973 	mblk_t *m;
974 #ifdef	USE_INET6
975 	ip6_t *ip6;
976 #endif
977 	ip_t *ip;
978 
979 	tcp = fin->fin_dp;
980 	if (tcp->th_flags & TH_RST)
981 		return -1;
982 
983 #ifndef	IPFILTER_CKSUM
984 	if (fr_checkl4sum(fin) == -1)
985 		return -1;
986 #endif
987 
988 	tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
989 #ifdef	USE_INET6
990 	if (fin->fin_v == 6)
991 		hlen = sizeof(ip6_t);
992 	else
993 #endif
994 		hlen = sizeof(ip_t);
995 	hlen += sizeof(*tcp2);
996 	if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
997 		return -1;
998 
999 	m->b_rptr += 64;
1000 	MTYPE(m) = M_DATA;
1001 	m->b_wptr = m->b_rptr + hlen;
1002 	ip = (ip_t *)m->b_rptr;
1003 	bzero((char *)ip, hlen);
1004 	tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1005 	tcp2->th_dport = tcp->th_sport;
1006 	tcp2->th_sport = tcp->th_dport;
1007 	if (tcp->th_flags & TH_ACK) {
1008 		tcp2->th_seq = tcp->th_ack;
1009 		tcp2->th_flags = TH_RST;
1010 	} else {
1011 		tcp2->th_ack = ntohl(tcp->th_seq);
1012 		tcp2->th_ack += tlen;
1013 		tcp2->th_ack = htonl(tcp2->th_ack);
1014 		tcp2->th_flags = TH_RST|TH_ACK;
1015 	}
1016 	tcp2->th_off = sizeof(struct tcphdr) >> 2;
1017 
1018 	ip->ip_v = fin->fin_v;
1019 #ifdef	USE_INET6
1020 	if (fin->fin_v == 6) {
1021 		ip6 = (ip6_t *)m->b_rptr;
1022 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1023 		ip6->ip6_src = fin->fin_dst6.in6;
1024 		ip6->ip6_dst = fin->fin_src6.in6;
1025 		ip6->ip6_plen = htons(sizeof(*tcp));
1026 		ip6->ip6_nxt = IPPROTO_TCP;
1027 		tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1028 	} else
1029 #endif
1030 	{
1031 		ip->ip_src.s_addr = fin->fin_daddr;
1032 		ip->ip_dst.s_addr = fin->fin_saddr;
1033 		ip->ip_id = fr_nextipid(fin);
1034 		ip->ip_hl = sizeof(*ip) >> 2;
1035 		ip->ip_p = IPPROTO_TCP;
1036 		ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1037 		ip->ip_tos = fin->fin_ip->ip_tos;
1038 		tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1039 	}
1040 	return fr_send_ip(fin, m, &m);
1041 }
1042 
1043 /*
1044  * Function:	fr_send_ip
1045  * Returns:	 0: success
1046  *		-1: failed
1047  * Parameters:
1048  *	fin: packet information
1049  *	m: the message block where ip head starts
1050  *
1051  * Send a new packet through the IP stack.
1052  *
1053  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1054  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1055  * function).
1056  *
1057  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1058  * in by this function.
1059  *
1060  * All other portions of the packet must be in on-the-wire format.
1061  */
1062 /*ARGSUSED*/
1063 static int fr_send_ip(fin, m, mpp)
1064 fr_info_t *fin;
1065 mblk_t *m, **mpp;
1066 {
1067 	qpktinfo_t qpi, *qpip;
1068 	fr_info_t fnew;
1069 	ip_t *ip;
1070 	int i, hlen;
1071 	ipf_stack_t *ifs = fin->fin_ifs;
1072 
1073 	ip = (ip_t *)m->b_rptr;
1074 	bzero((char *)&fnew, sizeof(fnew));
1075 
1076 #ifdef	USE_INET6
1077 	if (fin->fin_v == 6) {
1078 		ip6_t *ip6;
1079 
1080 		ip6 = (ip6_t *)ip;
1081 		ip6->ip6_vfc = 0x60;
1082 		ip6->ip6_hlim = 127;
1083 		fnew.fin_v = 6;
1084 		hlen = sizeof(*ip6);
1085 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1086 	} else
1087 #endif
1088 	{
1089 		fnew.fin_v = 4;
1090 #if SOLARIS2 >= 10
1091 		ip->ip_ttl = 255;
1092 		if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1093 			ip->ip_off = htons(IP_DF);
1094 #else
1095 		if (ip_ttl_ptr != NULL)
1096 			ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1097 		else
1098 			ip->ip_ttl = 63;
1099 		if (ip_mtudisc != NULL)
1100 			ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1101 		else
1102 			ip->ip_off = htons(IP_DF);
1103 #endif
1104 		/*
1105 		 * The dance with byte order and ip_len/ip_off is because in
1106 		 * fr_fastroute, it expects them to be in host byte order but
1107 		 * ipf_cksum expects them to be in network byte order.
1108 		 */
1109 		ip->ip_len = htons(ip->ip_len);
1110 		ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1111 		ip->ip_len = ntohs(ip->ip_len);
1112 		ip->ip_off = ntohs(ip->ip_off);
1113 		hlen = sizeof(*ip);
1114 		fnew.fin_plen = ip->ip_len;
1115 	}
1116 
1117 	qpip = fin->fin_qpi;
1118 	qpi.qpi_off = 0;
1119 	qpi.qpi_ill = qpip->qpi_ill;
1120 	qpi.qpi_m = m;
1121 	qpi.qpi_data = ip;
1122 	fnew.fin_qpi = &qpi;
1123 	fnew.fin_ifp = fin->fin_ifp;
1124 	fnew.fin_flx = FI_NOCKSUM;
1125 	fnew.fin_m = m;
1126 	fnew.fin_qfm = m;
1127 	fnew.fin_ip = ip;
1128 	fnew.fin_mp = mpp;
1129 	fnew.fin_hlen = hlen;
1130 	fnew.fin_dp = (char *)ip + hlen;
1131 	fnew.fin_ifs = fin->fin_ifs;
1132 	(void) fr_makefrip(hlen, ip, &fnew);
1133 
1134 	i = fr_fastroute(m, mpp, &fnew, NULL);
1135 	return i;
1136 }
1137 
1138 
1139 int fr_send_icmp_err(type, fin, dst)
1140 int type;
1141 fr_info_t *fin;
1142 int dst;
1143 {
1144 	struct in_addr dst4;
1145 	struct icmp *icmp;
1146 	qpktinfo_t *qpi;
1147 	int hlen, code;
1148 	phy_if_t phy;
1149 	u_short sz;
1150 #ifdef	USE_INET6
1151 	mblk_t *mb;
1152 #endif
1153 	mblk_t *m;
1154 #ifdef	USE_INET6
1155 	ip6_t *ip6;
1156 #endif
1157 	ip_t *ip;
1158 	ipf_stack_t *ifs = fin->fin_ifs;
1159 
1160 	if ((type < 0) || (type > ICMP_MAXTYPE))
1161 		return -1;
1162 
1163 	code = fin->fin_icode;
1164 #ifdef USE_INET6
1165 	if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1166 		return -1;
1167 #endif
1168 
1169 #ifndef	IPFILTER_CKSUM
1170 	if (fr_checkl4sum(fin) == -1)
1171 		return -1;
1172 #endif
1173 
1174 	qpi = fin->fin_qpi;
1175 
1176 #ifdef	USE_INET6
1177 	mb = fin->fin_qfm;
1178 
1179 	if (fin->fin_v == 6) {
1180 		sz = sizeof(ip6_t);
1181 		sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1182 		hlen = sizeof(ip6_t);
1183 		type = icmptoicmp6types[type];
1184 		if (type == ICMP6_DST_UNREACH)
1185 			code = icmptoicmp6unreach[code];
1186 	} else
1187 #endif
1188 	{
1189 		if ((fin->fin_p == IPPROTO_ICMP) &&
1190 		    !(fin->fin_flx & FI_SHORT))
1191 			switch (ntohs(fin->fin_data[0]) >> 8)
1192 			{
1193 			case ICMP_ECHO :
1194 			case ICMP_TSTAMP :
1195 			case ICMP_IREQ :
1196 			case ICMP_MASKREQ :
1197 				break;
1198 			default :
1199 				return 0;
1200 			}
1201 
1202 		sz = sizeof(ip_t) * 2;
1203 		sz += 8;		/* 64 bits of data */
1204 		hlen = sizeof(ip_t);
1205 	}
1206 
1207 	sz += offsetof(struct icmp, icmp_ip);
1208 	if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1209 		return -1;
1210 	MTYPE(m) = M_DATA;
1211 	m->b_rptr += 64;
1212 	m->b_wptr = m->b_rptr + sz;
1213 	bzero((char *)m->b_rptr, (size_t)sz);
1214 	ip = (ip_t *)m->b_rptr;
1215 	ip->ip_v = fin->fin_v;
1216 	icmp = (struct icmp *)(m->b_rptr + hlen);
1217 	icmp->icmp_type = type & 0xff;
1218 	icmp->icmp_code = code & 0xff;
1219 	phy = (phy_if_t)qpi->qpi_ill;
1220 	if (type == ICMP_UNREACH && (phy != 0) &&
1221 	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1222 		icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1223 
1224 #ifdef	USE_INET6
1225 	if (fin->fin_v == 6) {
1226 		struct in6_addr dst6;
1227 		int csz;
1228 
1229 		if (dst == 0) {
1230 			ipf_stack_t *ifs = fin->fin_ifs;
1231 
1232 			if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1233 				       (void *)&dst6, NULL, ifs) == -1) {
1234 				FREE_MB_T(m);
1235 				return -1;
1236 			}
1237 		} else
1238 			dst6 = fin->fin_dst6.in6;
1239 
1240 		csz = sz;
1241 		sz -= sizeof(ip6_t);
1242 		ip6 = (ip6_t *)m->b_rptr;
1243 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1244 		ip6->ip6_plen = htons((u_short)sz);
1245 		ip6->ip6_nxt = IPPROTO_ICMPV6;
1246 		ip6->ip6_src = dst6;
1247 		ip6->ip6_dst = fin->fin_src6.in6;
1248 		sz -= offsetof(struct icmp, icmp_ip);
1249 		bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1250 		icmp->icmp_cksum = csz - sizeof(ip6_t);
1251 	} else
1252 #endif
1253 	{
1254 		ip->ip_hl = sizeof(*ip) >> 2;
1255 		ip->ip_p = IPPROTO_ICMP;
1256 		ip->ip_id = fin->fin_ip->ip_id;
1257 		ip->ip_tos = fin->fin_ip->ip_tos;
1258 		ip->ip_len = (u_short)sz;
1259 		if (dst == 0) {
1260 			ipf_stack_t *ifs = fin->fin_ifs;
1261 
1262 			if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1263 				       (void *)&dst4, NULL, ifs) == -1) {
1264 				FREE_MB_T(m);
1265 				return -1;
1266 			}
1267 		} else {
1268 			dst4 = fin->fin_dst;
1269 		}
1270 		ip->ip_src = dst4;
1271 		ip->ip_dst = fin->fin_src;
1272 		bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1273 		      sizeof(*fin->fin_ip));
1274 		bcopy((char *)fin->fin_ip + fin->fin_hlen,
1275 		      (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1276 		icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1277 		icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1278 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1279 					     sz - sizeof(ip_t));
1280 	}
1281 
1282 	/*
1283 	 * Need to exit out of these so we don't recursively call rw_enter
1284 	 * from fr_qout.
1285 	 */
1286 	return fr_send_ip(fin, m, &m);
1287 }
1288 
1289 #include <sys/time.h>
1290 #include <sys/varargs.h>
1291 
1292 #ifndef _KERNEL
1293 #include <stdio.h>
1294 #endif
1295 
1296 /*
1297  * Return the first IP Address associated with an interface
1298  * For IPv6, we walk through the list of logical interfaces and return
1299  * the address of the first one that isn't a link-local interface.
1300  * We can't assume that it is :1 because another link-local address
1301  * may have been assigned there.
1302  */
1303 /*ARGSUSED*/
1304 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1305 int v, atype;
1306 void *ifptr;
1307 struct in_addr  *inp, *inpmask;
1308 ipf_stack_t *ifs;
1309 {
1310 	struct sockaddr_in6 v6addr[2];
1311 	struct sockaddr_in v4addr[2];
1312 	net_ifaddr_t type[2];
1313 	net_handle_t net_data;
1314 	phy_if_t phyif;
1315 	void *array;
1316 
1317 	switch (v)
1318 	{
1319 	case 4:
1320 		net_data = ifs->ifs_ipf_ipv4;
1321 		array = v4addr;
1322 		break;
1323 	case 6:
1324 		net_data = ifs->ifs_ipf_ipv6;
1325 		array = v6addr;
1326 		break;
1327 	default:
1328 		net_data = NULL;
1329 		break;
1330 	}
1331 
1332 	if (net_data == NULL)
1333 		return -1;
1334 
1335 	phyif = (phy_if_t)ifptr;
1336 
1337 	switch (atype)
1338 	{
1339 	case FRI_PEERADDR :
1340 		type[0] = NA_PEER;
1341 		break;
1342 
1343 	case FRI_BROADCAST :
1344 		type[0] = NA_BROADCAST;
1345 		break;
1346 
1347 	default :
1348 		type[0] = NA_ADDRESS;
1349 		break;
1350 	}
1351 
1352 	type[1] = NA_NETMASK;
1353 
1354 	if (v == 6) {
1355 		lif_if_t idx = 0;
1356 
1357 		do {
1358 			idx = net_lifgetnext(net_data, phyif, idx);
1359 			if (net_getlifaddr(net_data, phyif, idx, 2, type,
1360 					   array) < 0)
1361 				return -1;
1362 			if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1363 			    !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1364 				break;
1365 		} while (idx != 0);
1366 
1367 		if (idx == 0)
1368 			return -1;
1369 
1370 		return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1371 					inp, inpmask);
1372 	}
1373 
1374 	if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1375 		return -1;
1376 
1377 	return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1378 }
1379 
1380 
1381 u_32_t fr_newisn(fin)
1382 fr_info_t *fin;
1383 {
1384 	static int iss_seq_off = 0;
1385 	u_char hash[16];
1386 	u_32_t newiss;
1387 	MD5_CTX ctx;
1388 	ipf_stack_t *ifs = fin->fin_ifs;
1389 
1390 	/*
1391 	 * Compute the base value of the ISS.  It is a hash
1392 	 * of (saddr, sport, daddr, dport, secret).
1393 	 */
1394 	MD5Init(&ctx);
1395 
1396 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1397 		  sizeof(fin->fin_fi.fi_src));
1398 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1399 		  sizeof(fin->fin_fi.fi_dst));
1400 	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1401 
1402 	MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1403 
1404 	MD5Final(hash, &ctx);
1405 
1406 	bcopy(hash, &newiss, sizeof(newiss));
1407 
1408 	/*
1409 	 * Now increment our "timer", and add it in to
1410 	 * the computed value.
1411 	 *
1412 	 * XXX Use `addin'?
1413 	 * XXX TCP_ISSINCR too large to use?
1414 	 */
1415 	iss_seq_off += 0x00010000;
1416 	newiss += iss_seq_off;
1417 	return newiss;
1418 }
1419 
1420 
1421 /* ------------------------------------------------------------------------ */
1422 /* Function:    fr_nextipid                                                 */
1423 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1424 /* Parameters:  fin(I) - pointer to packet information                      */
1425 /*                                                                          */
1426 /* Returns the next IPv4 ID to use for this packet.                         */
1427 /* ------------------------------------------------------------------------ */
1428 u_short fr_nextipid(fin)
1429 fr_info_t *fin;
1430 {
1431 	static u_short ipid = 0;
1432 	u_short id;
1433 	ipf_stack_t *ifs = fin->fin_ifs;
1434 
1435 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1436 	if (fin->fin_pktnum != 0) {
1437 		id = fin->fin_pktnum & 0xffff;
1438 	} else {
1439 		id = ipid++;
1440 	}
1441 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1442 
1443 	return id;
1444 }
1445 
1446 
1447 #ifndef IPFILTER_CKSUM
1448 /* ARGSUSED */
1449 #endif
1450 INLINE void fr_checkv4sum(fin)
1451 fr_info_t *fin;
1452 {
1453 #ifdef IPFILTER_CKSUM
1454 	if (fr_checkl4sum(fin) == -1)
1455 		fin->fin_flx |= FI_BAD;
1456 #endif
1457 }
1458 
1459 
1460 #ifdef USE_INET6
1461 # ifndef IPFILTER_CKSUM
1462 /* ARGSUSED */
1463 # endif
1464 INLINE void fr_checkv6sum(fin)
1465 fr_info_t *fin;
1466 {
1467 # ifdef IPFILTER_CKSUM
1468 	if (fr_checkl4sum(fin) == -1)
1469 		fin->fin_flx |= FI_BAD;
1470 # endif
1471 }
1472 #endif /* USE_INET6 */
1473 
1474 
1475 #if (SOLARIS2 < 7)
1476 void fr_slowtimer()
1477 #else
1478 /*ARGSUSED*/
1479 void fr_slowtimer __P((void *arg))
1480 #endif
1481 {
1482 	ipf_stack_t *ifs = arg;
1483 
1484 	READ_ENTER(&ifs->ifs_ipf_global);
1485 	if (ifs->ifs_fr_running != 1) {
1486 		ifs->ifs_fr_timer_id = NULL;
1487 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1488 		return;
1489 	}
1490 	ipf_expiretokens(ifs);
1491 	fr_fragexpire(ifs);
1492 	fr_timeoutstate(ifs);
1493 	fr_natexpire(ifs);
1494 	fr_authexpire(ifs);
1495 	ifs->ifs_fr_ticks++;
1496 	if (ifs->ifs_fr_running == 1)
1497 		ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1498 		    drv_usectohz(500000));
1499 	else
1500 		ifs->ifs_fr_timer_id = NULL;
1501 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1502 }
1503 
1504 
1505 /* ------------------------------------------------------------------------ */
1506 /* Function:    fr_pullup                                                   */
1507 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1508 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1509 /*              fin(I) - pointer to packet information                      */
1510 /*              len(I) - number of bytes to pullup                          */
1511 /*                                                                          */
1512 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1513 /* single buffer for ease of access.  Operating system native functions are */
1514 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1515 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1516 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1517 /* and ONLY if the pullup succeeds.                                         */
1518 /*                                                                          */
1519 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1520 /* of buffers that starts at *fin->fin_mp.                                  */
1521 /* ------------------------------------------------------------------------ */
1522 void *fr_pullup(min, fin, len)
1523 mb_t *min;
1524 fr_info_t *fin;
1525 int len;
1526 {
1527 	qpktinfo_t *qpi = fin->fin_qpi;
1528 	int out = fin->fin_out, dpoff, ipoff;
1529 	mb_t *m = min, *m1, *m2;
1530 	char *ip;
1531 	uint32_t start, stuff, end, value, flags;
1532 	ipf_stack_t *ifs = fin->fin_ifs;
1533 
1534 	if (m == NULL)
1535 		return NULL;
1536 
1537 	ip = (char *)fin->fin_ip;
1538 	if ((fin->fin_flx & FI_COALESCE) != 0)
1539 		return ip;
1540 
1541 	ipoff = fin->fin_ipoff;
1542 	if (fin->fin_dp != NULL)
1543 		dpoff = (char *)fin->fin_dp - (char *)ip;
1544 	else
1545 		dpoff = 0;
1546 
1547 	if (M_LEN(m) < len + ipoff) {
1548 
1549 		/*
1550 		 * pfil_precheck ensures the IP header is on a 32bit
1551 		 * aligned address so simply fail if that isn't currently
1552 		 * the case (should never happen).
1553 		 */
1554 		int inc = 0;
1555 
1556 		if (ipoff > 0) {
1557 			if ((ipoff & 3) != 0) {
1558 				inc = 4 - (ipoff & 3);
1559 				if (m->b_rptr - inc >= m->b_datap->db_base)
1560 					m->b_rptr -= inc;
1561 				else
1562 					inc = 0;
1563 			}
1564 		}
1565 
1566 		/*
1567 		 * XXX This is here as a work around for a bug with DEBUG
1568 		 * XXX Solaris kernels.  The problem is b_prev is used by IP
1569 		 * XXX code as a way to stash the phyint_index for a packet,
1570 		 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1571 		 * XXX for both of these to be NULL.  See 6442390.
1572 		 */
1573 		m1 = m;
1574 		m2 = m->b_prev;
1575 
1576 		do {
1577 			m1->b_next = NULL;
1578 			m1->b_prev = NULL;
1579 			m1 = m1->b_cont;
1580 		} while (m1);
1581 
1582 		/*
1583 		 * Need to preserve checksum information by copying them
1584 		 * to newmp which heads the pulluped message.
1585 		 */
1586 		hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1587 		    &value, &flags);
1588 
1589 		if (pullupmsg(m, len + ipoff + inc) == 0) {
1590 			ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1591 			FREE_MB_T(*fin->fin_mp);
1592 			*fin->fin_mp = NULL;
1593 			fin->fin_m = NULL;
1594 			fin->fin_ip = NULL;
1595 			fin->fin_dp = NULL;
1596 			qpi->qpi_data = NULL;
1597 			return NULL;
1598 		}
1599 
1600 		(void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1601 		    value, flags, 0);
1602 
1603 		m->b_prev = m2;
1604 		m->b_rptr += inc;
1605 		fin->fin_m = m;
1606 		ip = MTOD(m, char *) + ipoff;
1607 		qpi->qpi_data = ip;
1608 	}
1609 
1610 	ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1611 	fin->fin_ip = (ip_t *)ip;
1612 	if (fin->fin_dp != NULL)
1613 		fin->fin_dp = (char *)fin->fin_ip + dpoff;
1614 
1615 	if (len == fin->fin_plen)
1616 		fin->fin_flx |= FI_COALESCE;
1617 	return ip;
1618 }
1619 
1620 
1621 /*
1622  * Function:	fr_verifysrc
1623  * Returns:	int (really boolean)
1624  * Parameters:	fin - packet information
1625  *
1626  * Check whether the packet has a valid source address for the interface on
1627  * which the packet arrived, implementing the "fr_chksrc" feature.
1628  * Returns true iff the packet's source address is valid.
1629  */
1630 int fr_verifysrc(fin)
1631 fr_info_t *fin;
1632 {
1633 	net_handle_t net_data_p;
1634 	phy_if_t phy_ifdata_routeto;
1635 	struct sockaddr	sin;
1636 	ipf_stack_t *ifs = fin->fin_ifs;
1637 
1638 	if (fin->fin_v == 4) {
1639 		net_data_p = ifs->ifs_ipf_ipv4;
1640 	} else if (fin->fin_v == 6) {
1641 		net_data_p = ifs->ifs_ipf_ipv6;
1642 	} else {
1643 		return (0);
1644 	}
1645 
1646 	/* Get the index corresponding to the if name */
1647 	sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1648 	bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1649 	phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1650 
1651 	return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1652 }
1653 
1654 
1655 /*
1656  * Function:	fr_fastroute
1657  * Returns:	 0: success;
1658  *		-1: failed
1659  * Parameters:
1660  *	mb: the message block where ip head starts
1661  *	mpp: the pointer to the pointer of the orignal
1662  *		packet message
1663  *	fin: packet information
1664  *	fdp: destination interface information
1665  *	if it is NULL, no interface information provided.
1666  *
1667  * This function is for fastroute/to/dup-to rules. It calls
1668  * pfil_make_lay2_packet to search route, make lay-2 header
1669  * ,and identify output queue for the IP packet.
1670  * The destination address depends on the following conditions:
1671  * 1: for fastroute rule, fdp is passed in as NULL, so the
1672  *	destination address is the IP Packet's destination address
1673  * 2: for to/dup-to rule, if an ip address is specified after
1674  *	the interface name, this address is the as destination
1675  *	address. Otherwise IP Packet's destination address is used
1676  */
1677 int fr_fastroute(mb, mpp, fin, fdp)
1678 mblk_t *mb, **mpp;
1679 fr_info_t *fin;
1680 frdest_t *fdp;
1681 {
1682         net_handle_t net_data_p;
1683 	net_inject_t *inj;
1684 	mblk_t *mp = NULL;
1685 	frentry_t *fr = fin->fin_fr;
1686 	qpktinfo_t *qpi;
1687 	ip_t *ip;
1688 
1689 	struct sockaddr_in *sin;
1690 	struct sockaddr_in6 *sin6;
1691 	struct sockaddr *sinp;
1692 	ipf_stack_t *ifs = fin->fin_ifs;
1693 #ifndef	sparc
1694 	u_short __iplen, __ipoff;
1695 #endif
1696 
1697 	if (fin->fin_v == 4) {
1698 		net_data_p = ifs->ifs_ipf_ipv4;
1699 	} else if (fin->fin_v == 6) {
1700 		net_data_p = ifs->ifs_ipf_ipv6;
1701 	} else {
1702 		return (-1);
1703 	}
1704 
1705 	inj = net_inject_alloc(NETINFO_VERSION);
1706 	if (inj == NULL)
1707 		return -1;
1708 
1709 	ip = fin->fin_ip;
1710 	qpi = fin->fin_qpi;
1711 
1712 	/*
1713 	 * If this is a duplicate mblk then we want ip to point at that
1714 	 * data, not the original, if and only if it is already pointing at
1715 	 * the current mblk data.
1716 	 *
1717 	 * Otherwise, if it's not a duplicate, and we're not already pointing
1718 	 * at the current mblk data, then we want to ensure that the data
1719 	 * points at ip.
1720 	 */
1721 
1722 	if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1723 		ip = (ip_t *)mb->b_rptr;
1724 	} else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1725 		qpi->qpi_m->b_rptr = (uchar_t *)ip;
1726 		qpi->qpi_off = 0;
1727 	}
1728 
1729 	/*
1730 	 * If there is another M_PROTO, we don't want it
1731 	 */
1732 	if (*mpp != mb) {
1733 		mp = unlinkb(*mpp);
1734 		freeb(*mpp);
1735 		*mpp = mp;
1736 	}
1737 
1738 	sinp = (struct sockaddr *)&inj->ni_addr;
1739 	sin = (struct sockaddr_in *)sinp;
1740 	sin6 = (struct sockaddr_in6 *)sinp;
1741 	bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1742 	inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1743 	inj->ni_packet = mb;
1744 
1745 	/*
1746 	 * In case we're here due to "to <if>" being used with
1747 	 * "keep state", check that we're going in the correct
1748 	 * direction.
1749 	 */
1750 	if (fdp != NULL) {
1751 		if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1752 			(fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1753 			goto bad_fastroute;
1754 		inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1755 		if (fin->fin_v == 4) {
1756 			sin->sin_addr = fdp->fd_ip;
1757 		} else {
1758 			sin6->sin6_addr = fdp->fd_ip6.in6;
1759 		}
1760 	} else {
1761 		if (fin->fin_v == 4) {
1762 			sin->sin_addr = ip->ip_dst;
1763 		} else {
1764 			sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1765 		}
1766 		inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1767 	}
1768 
1769 	/*
1770 	 * Clear the hardware checksum flags from packets that we are doing
1771 	 * input processing on as leaving them set will cause the outgoing
1772 	 * NIC (if it supports hardware checksum) to calculate them anew,
1773 	 * using the old (correct) checksums as the pseudo value to start
1774 	 * from.
1775 	 */
1776 	if (fin->fin_out == 0) {
1777 		DB_CKSUMFLAGS(mb) = 0;
1778 	}
1779 
1780 	*mpp = mb;
1781 
1782 	if (fin->fin_out == 0) {
1783 		void *saveifp;
1784 		u_32_t pass;
1785 
1786 		saveifp = fin->fin_ifp;
1787 		fin->fin_ifp = (void *)inj->ni_physical;
1788 		fin->fin_flx &= ~FI_STATE;
1789 		fin->fin_out = 1;
1790 		(void) fr_acctpkt(fin, &pass);
1791 		fin->fin_fr = NULL;
1792 		if (!fr || !(fr->fr_flags & FR_RETMASK))
1793 			(void) fr_checkstate(fin, &pass);
1794 		if (fr_checknatout(fin, NULL) == -1)
1795 			goto bad_fastroute;
1796 		fin->fin_out = 0;
1797 		fin->fin_ifp = saveifp;
1798 	}
1799 #ifndef	sparc
1800 	if (fin->fin_v == 4) {
1801 		__iplen = (u_short)ip->ip_len,
1802 		__ipoff = (u_short)ip->ip_off;
1803 
1804 		ip->ip_len = htons(__iplen);
1805 		ip->ip_off = htons(__ipoff);
1806 	}
1807 #endif
1808 
1809 	if (net_data_p) {
1810 		if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1811 			net_inject_free(inj);
1812 			return (-1);
1813 		}
1814 	}
1815 
1816 	ifs->ifs_fr_frouteok[0]++;
1817 	net_inject_free(inj);
1818 	return 0;
1819 bad_fastroute:
1820 	net_inject_free(inj);
1821 	freemsg(mb);
1822 	ifs->ifs_fr_frouteok[1]++;
1823 	return -1;
1824 }
1825 
1826 
1827 /* ------------------------------------------------------------------------ */
1828 /* Function:    ipf_hook4_out                                               */
1829 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1830 /* Parameters:  event(I)     - pointer to event                             */
1831 /*              info(I)      - pointer to hook information for firewalling  */
1832 /*                                                                          */
1833 /* Calling ipf_hook.                                                        */
1834 /* ------------------------------------------------------------------------ */
1835 /*ARGSUSED*/
1836 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
1837 {
1838 	return ipf_hook(info, 1, 0, arg);
1839 }
1840 /*ARGSUSED*/
1841 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
1842 {
1843 	return ipf_hook6(info, 1, 0, arg);
1844 }
1845 
1846 /* ------------------------------------------------------------------------ */
1847 /* Function:    ipf_hook4_in                                                */
1848 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1849 /* Parameters:  event(I)     - pointer to event                             */
1850 /*              info(I)      - pointer to hook information for firewalling  */
1851 /*                                                                          */
1852 /* Calling ipf_hook.                                                        */
1853 /* ------------------------------------------------------------------------ */
1854 /*ARGSUSED*/
1855 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
1856 {
1857 	return ipf_hook(info, 0, 0, arg);
1858 }
1859 /*ARGSUSED*/
1860 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
1861 {
1862 	return ipf_hook6(info, 0, 0, arg);
1863 }
1864 
1865 
1866 /* ------------------------------------------------------------------------ */
1867 /* Function:    ipf_hook4_loop_out                                          */
1868 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1869 /* Parameters:  event(I)     - pointer to event                             */
1870 /*              info(I)      - pointer to hook information for firewalling  */
1871 /*                                                                          */
1872 /* Calling ipf_hook.                                                        */
1873 /* ------------------------------------------------------------------------ */
1874 /*ARGSUSED*/
1875 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1876 {
1877 	return ipf_hook(info, 1, FI_NOCKSUM, arg);
1878 }
1879 /*ARGSUSED*/
1880 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1881 {
1882 	return ipf_hook6(info, 1, FI_NOCKSUM, arg);
1883 }
1884 
1885 /* ------------------------------------------------------------------------ */
1886 /* Function:    ipf_hook4_loop_in                                           */
1887 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1888 /* Parameters:  event(I)     - pointer to event                             */
1889 /*              info(I)      - pointer to hook information for firewalling  */
1890 /*                                                                          */
1891 /* Calling ipf_hook.                                                        */
1892 /* ------------------------------------------------------------------------ */
1893 /*ARGSUSED*/
1894 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1895 {
1896 	return ipf_hook(info, 0, FI_NOCKSUM, arg);
1897 }
1898 /*ARGSUSED*/
1899 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1900 {
1901 	return ipf_hook6(info, 0, FI_NOCKSUM, arg);
1902 }
1903 
1904 /* ------------------------------------------------------------------------ */
1905 /* Function:    ipf_hook                                                    */
1906 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1907 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
1908 /*              out(I)       - whether packet is going in or out            */
1909 /*              loopback(I)  - whether packet is a loopback packet or not   */
1910 /*                                                                          */
1911 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
1912 /* parameters out of the info structure and forms them up to be useful for  */
1913 /* calling ipfilter.                                                        */
1914 /* ------------------------------------------------------------------------ */
1915 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
1916 {
1917 	hook_pkt_event_t *fw;
1918 	ipf_stack_t *ifs;
1919 	qpktinfo_t qpi;
1920 	int rval, hlen;
1921 	u_short swap;
1922 	phy_if_t phy;
1923 	ip_t *ip;
1924 
1925 	ifs = arg;
1926 	fw = (hook_pkt_event_t *)info;
1927 
1928 	ASSERT(fw != NULL);
1929 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1930 
1931 	ip = fw->hpe_hdr;
1932 	swap = ntohs(ip->ip_len);
1933 	ip->ip_len = swap;
1934 	swap = ntohs(ip->ip_off);
1935 	ip->ip_off = swap;
1936 	hlen = IPH_HDR_LENGTH(ip);
1937 
1938 	qpi.qpi_m = fw->hpe_mb;
1939 	qpi.qpi_data = fw->hpe_hdr;
1940 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1941 	qpi.qpi_ill = (void *)phy;
1942 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1943 	if (qpi.qpi_flags)
1944 		qpi.qpi_flags |= FI_MBCAST;
1945 	qpi.qpi_flags |= loopback;
1946 
1947 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1948 	    &qpi, fw->hpe_mp, ifs);
1949 
1950 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
1951 	if (rval == 0 && *(fw->hpe_mp) == NULL)
1952 		rval = 1;
1953 
1954 	/* Notify IP the packet mblk_t and IP header pointers. */
1955 	fw->hpe_mb = qpi.qpi_m;
1956 	fw->hpe_hdr = qpi.qpi_data;
1957 	if (rval == 0) {
1958 		ip = qpi.qpi_data;
1959 		swap = ntohs(ip->ip_len);
1960 		ip->ip_len = swap;
1961 		swap = ntohs(ip->ip_off);
1962 		ip->ip_off = swap;
1963 	}
1964 	return rval;
1965 
1966 }
1967 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
1968 {
1969 	hook_pkt_event_t *fw;
1970 	int rval, hlen;
1971 	qpktinfo_t qpi;
1972 	phy_if_t phy;
1973 
1974 	fw = (hook_pkt_event_t *)info;
1975 
1976 	ASSERT(fw != NULL);
1977 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1978 
1979 	hlen = sizeof (ip6_t);
1980 
1981 	qpi.qpi_m = fw->hpe_mb;
1982 	qpi.qpi_data = fw->hpe_hdr;
1983 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1984 	qpi.qpi_ill = (void *)phy;
1985 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1986 	if (qpi.qpi_flags)
1987 		qpi.qpi_flags |= FI_MBCAST;
1988 	qpi.qpi_flags |= loopback;
1989 
1990 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1991 	    &qpi, fw->hpe_mp, arg);
1992 
1993 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
1994 	if (rval == 0 && *(fw->hpe_mp) == NULL)
1995 		rval = 1;
1996 
1997 	/* Notify IP the packet mblk_t and IP header pointers. */
1998 	fw->hpe_mb = qpi.qpi_m;
1999 	fw->hpe_hdr = qpi.qpi_data;
2000 	return rval;
2001 
2002 }
2003 
2004 
2005 /* ------------------------------------------------------------------------ */
2006 /* Function:    ipf_nic_event_v4                                            */
2007 /* Returns:     int - 0 == no problems encountered                          */
2008 /* Parameters:  event(I)     - pointer to event                             */
2009 /*              info(I)      - pointer to information about a NIC event     */
2010 /*                                                                          */
2011 /* Function to receive asynchronous NIC events from IP                      */
2012 /* ------------------------------------------------------------------------ */
2013 /*ARGSUSED*/
2014 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2015 {
2016 	struct sockaddr_in *sin;
2017 	hook_nic_event_t *hn;
2018 	ipf_stack_t *ifs = arg;
2019 	void *new_ifp = NULL;
2020 
2021 	if (ifs->ifs_fr_running <= 0)
2022 		return (0);
2023 
2024 	hn = (hook_nic_event_t *)info;
2025 
2026 	switch (hn->hne_event)
2027 	{
2028 	case NE_PLUMB :
2029 		frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2030 		       ifs);
2031 		fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2032 			      hn->hne_data, ifs);
2033 		fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2034 			     hn->hne_data, ifs);
2035 		break;
2036 
2037 	case NE_UNPLUMB :
2038 		frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2039 		fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2040 			      ifs);
2041 		fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2042 		break;
2043 
2044 	case NE_ADDRESS_CHANGE :
2045 		/*
2046 		 * We only respond to events for logical interface 0 because
2047 		 * IPFilter only uses the first address given to a network
2048 		 * interface.  We check for hne_lif==1 because the netinfo
2049 		 * code maps adds 1 to the lif number so that it can return
2050 		 * 0 to indicate "no more lifs" when walking them.
2051 		 */
2052 		if (hn->hne_lif == 1) {
2053 			frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2054 			    ifs);
2055 			sin = hn->hne_data;
2056 			fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2057 			    ifs);
2058 		}
2059 		break;
2060 
2061 #if SOLARIS2 >= 10
2062 	case NE_IFINDEX_CHANGE :
2063 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2064 
2065 		if (hn->hne_data != NULL) {
2066 			/*
2067 			 * The netinfo passes interface index as int (hne_data should be
2068 			 * handled as a pointer to int), which is always 32bit. We need to
2069 			 * convert it to void pointer here, since interfaces are
2070 			 * represented as pointers to void in IPF. The pointers are 64 bits
2071 			 * long on 64bit platforms. Doing something like
2072 			 *	(void *)((int) x)
2073 			 * will throw warning:
2074 			 *   "cast to pointer from integer of different size"
2075 			 * during 64bit compilation.
2076 			 *
2077 			 * The line below uses (size_t) to typecast int to
2078 			 * size_t, which might be 64bit/32bit (depending
2079 			 * on architecture). Once we have proper 64bit/32bit
2080 			 * type (size_t), we can safely convert it to void pointer.
2081 			 */
2082 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2083 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2084 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2085 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2086 		}
2087 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2088 		break;
2089 #endif
2090 
2091 	default :
2092 		break;
2093 	}
2094 
2095 	return 0;
2096 }
2097 
2098 
2099 /* ------------------------------------------------------------------------ */
2100 /* Function:    ipf_nic_event_v6                                            */
2101 /* Returns:     int - 0 == no problems encountered                          */
2102 /* Parameters:  event(I)     - pointer to event                             */
2103 /*              info(I)      - pointer to information about a NIC event     */
2104 /*                                                                          */
2105 /* Function to receive asynchronous NIC events from IP                      */
2106 /* ------------------------------------------------------------------------ */
2107 /*ARGSUSED*/
2108 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2109 {
2110 	struct sockaddr_in6 *sin6;
2111 	hook_nic_event_t *hn;
2112 	ipf_stack_t *ifs = arg;
2113 	void *new_ifp = NULL;
2114 
2115 	if (ifs->ifs_fr_running <= 0)
2116 		return (0);
2117 
2118 	hn = (hook_nic_event_t *)info;
2119 
2120 	switch (hn->hne_event)
2121 	{
2122 	case NE_PLUMB :
2123 		frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2124 		       hn->hne_data, ifs);
2125 		fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2126 			      hn->hne_data, ifs);
2127 		fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2128 			     hn->hne_data, ifs);
2129 		break;
2130 
2131 	case NE_UNPLUMB :
2132 		frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2133 		fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2134 			      ifs);
2135 		fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2136 		break;
2137 
2138 	case NE_ADDRESS_CHANGE :
2139 		if (hn->hne_lif == 1) {
2140 			sin6 = hn->hne_data;
2141 			fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2142 				       ifs);
2143 		}
2144 		break;
2145 
2146 #if SOLARIS2 >= 10
2147 	case NE_IFINDEX_CHANGE :
2148 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2149 		if (hn->hne_data != NULL) {
2150 			/*
2151 			 * The netinfo passes interface index as int (hne_data should be
2152 			 * handled as a pointer to int), which is always 32bit. We need to
2153 			 * convert it to void pointer here, since interfaces are
2154 			 * represented as pointers to void in IPF. The pointers are 64 bits
2155 			 * long on 64bit platforms. Doing something like
2156 			 *	(void *)((int) x)
2157 			 * will throw warning:
2158 			 *   "cast to pointer from integer of different size"
2159 			 * during 64bit compilation.
2160 			 *
2161 			 * The line below uses (size_t) to typecast int to
2162 			 * size_t, which might be 64bit/32bit (depending
2163 			 * on architecture). Once we have proper 64bit/32bit
2164 			 * type (size_t), we can safely convert it to void pointer.
2165 			 */
2166 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2167 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2168 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2169 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2170 		}
2171 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2172 		break;
2173 #endif
2174 
2175 	default :
2176 		break;
2177 	}
2178 
2179 	return 0;
2180 }
2181 
2182 /*
2183  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2184  * are needed in Solaris kernel only. We don't need them in
2185  * ipftest to pretend the ICMP/RST packet was sent as a response.
2186  */
2187 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2188 /* ------------------------------------------------------------------------ */
2189 /* Function:    fr_make_rst                                                 */
2190 /* Returns:     int - 0 on success, -1 on failure			    */
2191 /* Parameters:  fin(I) - pointer to packet information                      */
2192 /*                                                                          */
2193 /* We must alter the original mblks passed to IPF from IP stack via	    */
2194 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2195 /* IPF can basicaly do only these things with mblk representing the packet: */
2196 /*	leave it as it is (pass the packet)				    */
2197 /*                                                                          */
2198 /*	discard it (block the packet)					    */
2199 /*                                                                          */
2200 /*	alter it (i.e. NAT)						    */
2201 /*                                                                          */
2202 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2203 /* instead to IP stack via FW_HOOKS.					    */
2204 /*                                                                          */
2205 /* The return-rst action for packets coming via NIC is handled as follows:  */
2206 /*	mblk with packet is discarded					    */
2207 /*                                                                          */
2208 /*	new mblk with RST response is constructed and injected to network   */
2209 /*                                                                          */
2210 /* IPF can't inject packets to loopback interface, this is just another	    */
2211 /* limitation we have to deal with here. The only option to send RST	    */
2212 /* response to offending TCP packet coming via loopback is to alter it.	    */
2213 /*									    */
2214 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on	    */
2215 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to	    */
2216 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.			    */
2217 /* ------------------------------------------------------------------------ */
2218 int fr_make_rst(fin)
2219 fr_info_t *fin;
2220 {
2221 	uint16_t tmp_port;
2222 	int rv = -1;
2223 	uint32_t old_ack;
2224 	tcphdr_t *tcp = NULL;
2225 	struct in_addr tmp_src;
2226 #ifdef USE_INET6
2227 	struct in6_addr	tmp_src6;
2228 #endif
2229 
2230 	ASSERT(fin->fin_p == IPPROTO_TCP);
2231 
2232 	/*
2233 	 * We do not need to adjust chksum, since it is not being checked by
2234 	 * Solaris IP stack for loopback clients.
2235 	 */
2236 	if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2237 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2238 
2239 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2240 			/* Swap IPv4 addresses. */
2241 			tmp_src = fin->fin_ip->ip_src;
2242 			fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2243 			fin->fin_ip->ip_dst = tmp_src;
2244 
2245 			rv = 0;
2246 		}
2247 		else
2248 			tcp = NULL;
2249 	}
2250 #ifdef USE_INET6
2251 	else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2252 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2253 		/*
2254 		 * We are relying on fact the next header is TCP, which is true
2255 		 * for regular TCP packets coming in over loopback.
2256 		 */
2257 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2258 			/* Swap IPv6 addresses. */
2259 			tmp_src6 = fin->fin_ip6->ip6_src;
2260 			fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2261 			fin->fin_ip6->ip6_dst = tmp_src6;
2262 
2263 			rv = 0;
2264 		}
2265 		else
2266 			tcp = NULL;
2267 	}
2268 #endif
2269 
2270 	if (tcp != NULL) {
2271 		/*
2272 		 * Adjust TCP header:
2273 		 *	swap ports,
2274 		 *	set flags,
2275 		 *	set correct ACK number
2276 		 */
2277 		tmp_port = tcp->th_sport;
2278 		tcp->th_sport = tcp->th_dport;
2279 		tcp->th_dport = tmp_port;
2280 		old_ack = tcp->th_ack;
2281 		tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2282 		tcp->th_seq = old_ack;
2283 		tcp->th_flags = TH_RST | TH_ACK;
2284 	}
2285 
2286 	return (rv);
2287 }
2288 
2289 /* ------------------------------------------------------------------------ */
2290 /* Function:    fr_make_icmp_v4                                             */
2291 /* Returns:     int - 0 on success, -1 on failure			    */
2292 /* Parameters:  fin(I) - pointer to packet information                      */
2293 /*                                                                          */
2294 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2295 /* what is going to happen here and why. Once you read the comment there,   */
2296 /* continue here with next paragraph.					    */
2297 /*									    */
2298 /* To turn IPv4 packet into ICMPv4 response packet, these things must	    */
2299 /* happen here:								    */
2300 /*	(1) Original mblk is copied (duplicated).			    */
2301 /*                                                                          */
2302 /*	(2) ICMP header is created.					    */
2303 /*                                                                          */
2304 /*	(3) Link ICMP header with copy of original mblk, we have ICMPv4	    */
2305 /*	    data ready then.						    */
2306 /*                                                                          */
2307 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2308 /*                                                                          */
2309 /*	(5) The mblk containing original packet is trimmed to contain IP    */
2310 /*	    header only and ICMP chksum is computed.			    */
2311 /*                                                                          */
2312 /*	(6) The ICMP header we have from (3) is linked to original mblk,    */
2313 /*	    which now contains new IP header. If original packet was spread */
2314 /*	    over several mblks, only the first mblk is kept.		    */
2315 /* ------------------------------------------------------------------------ */
2316 static int fr_make_icmp_v4(fin)
2317 fr_info_t *fin;
2318 {
2319 	struct in_addr tmp_src;
2320 	tcphdr_t *tcp;
2321 	struct icmp *icmp;
2322 	mblk_t *mblk_icmp;
2323 	mblk_t *mblk_ip;
2324 	size_t icmp_pld_len;	/* octets to append to ICMP header */
2325 	size_t orig_iphdr_len;	/* length of IP header only */
2326 	uint32_t sum;
2327 	uint16_t *buf;
2328 	int len;
2329 
2330 
2331 	if (fin->fin_v != 4)
2332 		return (-1);
2333 
2334 	/*
2335 	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2336 	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2337 	 */
2338 	tcp = (tcphdr_t *) fin->fin_dp;
2339 
2340 	if ((fin->fin_p == IPPROTO_TCP) &&
2341 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2342 		return (-1);
2343 
2344 	/*
2345 	 * Step (1)
2346 	 *
2347 	 * Make copy of original mblk.
2348 	 *
2349 	 * We want to copy as much data as necessary, not less, not more.  The
2350 	 * ICMPv4 payload length for unreachable messages is:
2351 	 *	original IP header + 8 bytes of L4 (if there are any).
2352 	 *
2353 	 * We determine if there are at least 8 bytes of L4 data following IP
2354 	 * header first.
2355 	 */
2356 	icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2357 		ICMPERR_ICMPHLEN : fin->fin_dlen;
2358 	/*
2359 	 * Since we don't want to copy more data than necessary, we must trim
2360 	 * the original mblk here.  The right way (STREAMish) would be to use
2361 	 * adjmsg() to trim it.  However we would have to calculate the length
2362 	 * argument for adjmsg() from pointers we already have here.
2363 	 *
2364 	 * Since we have pointers and offsets, it's faster and easier for
2365 	 * us to just adjust pointers by hand instead of using adjmsg().
2366 	 */
2367 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2368 	fin->fin_m->b_wptr += icmp_pld_len;
2369 	icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2370 
2371 	/*
2372 	 * Also we don't want to copy any L2 stuff, which might precede IP
2373 	 * header, so we have have to set b_rptr to point to the start of IP
2374 	 * header.
2375 	 */
2376 	fin->fin_m->b_rptr += fin->fin_ipoff;
2377 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2378 		return (-1);
2379 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2380 
2381 	/*
2382 	 * Step (2)
2383 	 *
2384 	 * Create an ICMP header, which will be appened to original mblk later.
2385 	 * ICMP header is just another mblk.
2386 	 */
2387 	mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2388 	if (mblk_icmp == NULL) {
2389 		FREE_MB_T(mblk_ip);
2390 		return (-1);
2391 	}
2392 
2393 	MTYPE(mblk_icmp) = M_DATA;
2394 	icmp = (struct icmp *) mblk_icmp->b_wptr;
2395 	icmp->icmp_type = ICMP_UNREACH;
2396 	icmp->icmp_code = fin->fin_icode & 0xFF;
2397 	icmp->icmp_void = 0;
2398 	icmp->icmp_cksum = 0;
2399 	mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2400 
2401 	/*
2402 	 * Step (3)
2403 	 *
2404 	 * Complete ICMP packet - link ICMP header with L4 data from original
2405 	 * IP packet.
2406 	 */
2407 	linkb(mblk_icmp, mblk_ip);
2408 
2409 	/*
2410 	 * Step (4)
2411 	 *
2412 	 * Swap IP addresses and change IP header fields accordingly in
2413 	 * original IP packet.
2414 	 *
2415 	 * There is a rule option return-icmp as a dest for physical
2416 	 * interfaces. This option becomes useless for loopback, since IPF box
2417 	 * uses same address as a loopback destination. We ignore the option
2418 	 * here, the ICMP packet will always look like as it would have been
2419 	 * sent from the original destination host.
2420 	 */
2421 	tmp_src = fin->fin_ip->ip_src;
2422 	fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2423 	fin->fin_ip->ip_dst = tmp_src;
2424 	fin->fin_ip->ip_p = IPPROTO_ICMP;
2425 	fin->fin_ip->ip_sum = 0;
2426 
2427 	/*
2428 	 * Step (5)
2429 	 *
2430 	 * We trim the orignal mblk to hold IP header only.
2431 	 */
2432 	fin->fin_m->b_wptr = fin->fin_dp;
2433 	orig_iphdr_len = fin->fin_m->b_wptr -
2434 			    (fin->fin_m->b_rptr + fin->fin_ipoff);
2435 	fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2436 			    orig_iphdr_len);
2437 
2438 	/*
2439 	 * ICMP chksum calculation. The data we are calculating chksum for are
2440 	 * spread over two mblks, therefore we have to use two for loops.
2441 	 *
2442 	 * First for loop computes chksum part for ICMP header.
2443 	 */
2444 	buf = (uint16_t *) icmp;
2445 	len = ICMPERR_ICMPHLEN;
2446 	for (sum = 0; len > 1; len -= 2)
2447 		sum += *buf++;
2448 
2449 	/*
2450 	 * Here we add chksum part for ICMP payload.
2451 	 */
2452 	len = icmp_pld_len;
2453 	buf = (uint16_t *) mblk_ip->b_rptr;
2454 	for (; len > 1; len -= 2)
2455 		sum += *buf++;
2456 
2457 	/*
2458 	 * Chksum is done.
2459 	 */
2460 	sum = (sum >> 16) + (sum & 0xffff);
2461 	sum += (sum >> 16);
2462 	icmp->icmp_cksum = ~sum;
2463 
2464 	/*
2465 	 * Step (6)
2466 	 *
2467 	 * Release all packet mblks, except the first one.
2468 	 */
2469 	if (fin->fin_m->b_cont != NULL) {
2470 		FREE_MB_T(fin->fin_m->b_cont);
2471 	}
2472 
2473 	/*
2474 	 * Append ICMP payload to first mblk, which already contains new IP
2475 	 * header.
2476 	 */
2477 	linkb(fin->fin_m, mblk_icmp);
2478 
2479 	return (0);
2480 }
2481 
2482 #ifdef USE_INET6
2483 /* ------------------------------------------------------------------------ */
2484 /* Function:    fr_make_icmp_v6                                             */
2485 /* Returns:     int - 0 on success, -1 on failure			    */
2486 /* Parameters:  fin(I) - pointer to packet information                      */
2487 /*									    */
2488 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2489 /* what and why is going to happen here. Once you read the comment there,   */
2490 /* continue here with next paragraph.					    */
2491 /*									    */
2492 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
2493 /* The algorithm is fairly simple:					    */
2494 /*	1) We need to get copy of complete mblk.			    */
2495 /*									    */
2496 /*	2) New ICMPv6 header is created.				    */
2497 /*									    */
2498 /*	3) The copy of original mblk with packet is linked to ICMPv6	    */
2499 /*	   header.							    */
2500 /*									    */
2501 /*	4) The checksum must be adjusted.				    */
2502 /*									    */
2503 /*	5) IP addresses in original mblk are swapped and IP header data	    */
2504 /*	   are adjusted (protocol number).				    */
2505 /*									    */
2506 /*	6) Original mblk is trimmed to hold IPv6 header only, then it is    */
2507 /*	   linked with the ICMPv6 data we got from (3).			    */
2508 /* ------------------------------------------------------------------------ */
2509 static int fr_make_icmp_v6(fin)
2510 fr_info_t *fin;
2511 {
2512 	struct icmp6_hdr *icmp6;
2513 	tcphdr_t *tcp;
2514 	struct in6_addr	tmp_src6;
2515 	size_t icmp_pld_len;
2516 	mblk_t *mblk_ip, *mblk_icmp;
2517 
2518 	if (fin->fin_v != 6)
2519 		return (-1);
2520 
2521 	/*
2522 	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2523 	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
2524 	 */
2525 	tcp = (tcphdr_t *) fin->fin_dp;
2526 
2527 	if ((fin->fin_p == IPPROTO_TCP) &&
2528 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2529 		return (-1);
2530 
2531 	/*
2532 	 * Step (1)
2533 	 *
2534 	 * We need to copy complete packet in case of IPv6, no trimming is
2535 	 * needed (except the L2 headers).
2536 	 */
2537 	icmp_pld_len = M_LEN(fin->fin_m);
2538 	fin->fin_m->b_rptr += fin->fin_ipoff;
2539 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2540 		return (-1);
2541 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2542 
2543 	/*
2544 	 * Step (2)
2545 	 *
2546 	 * Allocate and create ICMP header.
2547 	 */
2548 	mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2549 			BPRI_HI);
2550 
2551 	if (mblk_icmp == NULL)
2552 		return (-1);
2553 
2554 	MTYPE(mblk_icmp) = M_DATA;
2555 	icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
2556 	icmp6->icmp6_type = ICMP6_DST_UNREACH;
2557 	icmp6->icmp6_code = fin->fin_icode & 0xFF;
2558 	icmp6->icmp6_data32[0] = 0;
2559 	mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2560 
2561 	/*
2562 	 * Step (3)
2563 	 *
2564 	 * Link the copy of IP packet to ICMP header.
2565 	 */
2566 	linkb(mblk_icmp, mblk_ip);
2567 
2568 	/*
2569 	 * Step (4)
2570 	 *
2571 	 * Calculate chksum - this is much more easier task than in case of
2572 	 * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
2573 	 * We are making compensation just for change of packet length.
2574 	 */
2575 	icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2576 
2577 	/*
2578 	 * Step (5)
2579 	 *
2580 	 * Swap IP addresses.
2581 	 */
2582 	tmp_src6 = fin->fin_ip6->ip6_src;
2583 	fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2584 	fin->fin_ip6->ip6_dst = tmp_src6;
2585 
2586 	/*
2587 	 * and adjust IP header data.
2588 	 */
2589 	fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2590 	fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2591 
2592 	/*
2593 	 * Step (6)
2594 	 *
2595 	 * We must release all linked mblks from original packet and keep only
2596 	 * the first mblk with IP header to link ICMP data.
2597 	 */
2598 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2599 
2600 	if (fin->fin_m->b_cont != NULL) {
2601 		FREE_MB_T(fin->fin_m->b_cont);
2602 	}
2603 
2604 	/*
2605 	 * Append ICMP payload to IP header.
2606 	 */
2607 	linkb(fin->fin_m, mblk_icmp);
2608 
2609 	return (0);
2610 }
2611 #endif	/* USE_INET6 */
2612 
2613 /* ------------------------------------------------------------------------ */
2614 /* Function:    fr_make_icmp                                                */
2615 /* Returns:     int - 0 on success, -1 on failure			    */
2616 /* Parameters:  fin(I) - pointer to packet information                      */
2617 /*                                                                          */
2618 /* We must alter the original mblks passed to IPF from IP stack via	    */
2619 /* FW_HOOKS. The reasons why we must alter packet are discussed within	    */
2620 /* comment at fr_make_rst() function.					    */
2621 /*									    */
2622 /* The fr_make_icmp() function acts as a wrapper, which passes the code	    */
2623 /* execution to	fr_make_icmp_v4() or fr_make_icmp_v6() depending on	    */
2624 /* protocol version. However there are some details, which are common to    */
2625 /* both IP versions. The details are going to be explained here.	    */
2626 /*                                                                          */
2627 /* The packet looks as follows:						    */
2628 /*    xxx | IP hdr | IP payload    ...	| 				    */
2629 /*    ^   ^        ^            	^				    */
2630 /*    |   |        |            	|				    */
2631 /*    |   |        |		fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2632 /*    |   |        |							    */
2633 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
2634 /*    |   |								    */
2635 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
2636 /*    |      of loopback)						    */
2637 /*    |   								    */
2638 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC	    */
2639 /*                                                                          */
2640 /* All relevant IP headers are pulled up into the first mblk. It happened   */
2641 /* well in advance before the matching rule was found (the rule, which took */
2642 /* us here, to fr_make_icmp() function).				    */
2643 /*                                                                          */
2644 /* Both functions will turn packet passed in fin->fin_m mblk into a new	    */
2645 /* packet. New packet will be represented as chain of mblks.		    */
2646 /* orig mblk |- b_cont ---.						    */
2647 /*    ^                    `-> ICMP hdr |- b_cont--.			    */
2648 /*    |	                          ^	            `-> duped orig mblk	    */
2649 /*    |                           |				^	    */
2650 /*    `- The original mblk        |				|	    */
2651 /*       will be trimmed to       |				|	    */
2652 /*       to contain IP header     |				|	    */
2653 /*       only                     |				|	    */
2654 /*                                |				|	    */
2655 /*                                `- This is newly		|           */
2656 /*                                   allocated mblk to		|	    */
2657 /*                                   hold ICMPv6 data.		|	    */
2658 /*								|	    */
2659 /*								|	    */
2660 /*								|	    */
2661 /*	    This is the copy of original mblk, it will contain -'	    */
2662 /*	    orignal IP  packet in case of ICMPv6. In case of		    */
2663 /*	    ICMPv4 it will contain up to 8 bytes of IP payload		    */
2664 /*	    (TCP/UDP/L4) data from original packet.			    */
2665 /* ------------------------------------------------------------------------ */
2666 int fr_make_icmp(fin)
2667 fr_info_t *fin;
2668 {
2669 	int rv;
2670 
2671 	if (fin->fin_v == 4)
2672 		rv = fr_make_icmp_v4(fin);
2673 #ifdef USE_INET6
2674 	else if (fin->fin_v == 6)
2675 		rv = fr_make_icmp_v6(fin);
2676 #endif
2677 	else
2678 		rv = -1;
2679 
2680 	return (rv);
2681 }
2682 
2683 /* ------------------------------------------------------------------------ */
2684 /* Function:    fr_buf_sum						    */
2685 /* Returns:     unsigned int - sum of buffer buf			    */
2686 /* Parameters:  buf - pointer to buf we want to sum up			    */
2687 /*              len - length of buffer buf				    */
2688 /*                                                                          */
2689 /* Sums buffer buf. The result is used for chksum calculation. The buf	    */
2690 /* argument must be aligned.						    */
2691 /* ------------------------------------------------------------------------ */
2692 static uint32_t fr_buf_sum(buf, len)
2693 const void *buf;
2694 unsigned int len;
2695 {
2696 	uint32_t	sum = 0;
2697 	uint16_t	*b = (uint16_t *)buf;
2698 
2699 	while (len > 1) {
2700 		sum += *b++;
2701 		len -= 2;
2702 	}
2703 
2704 	if (len == 1)
2705 		sum += htons((*(unsigned char *)b) << 8);
2706 
2707 	return (sum);
2708 }
2709 
2710 /* ------------------------------------------------------------------------ */
2711 /* Function:    fr_calc_chksum						    */
2712 /* Returns:     void							    */
2713 /* Parameters:  fin - pointer to fr_info_t instance with packet data	    */
2714 /*              pkt - pointer to duplicated packet			    */
2715 /*                                                                          */
2716 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP	    */
2717 /* versions.								    */
2718 /* ------------------------------------------------------------------------ */
2719 void fr_calc_chksum(fin, pkt)
2720 fr_info_t *fin;
2721 mb_t *pkt;
2722 {
2723 	struct pseudo_hdr {
2724 		union {
2725 			struct in_addr	in4;
2726 #ifdef USE_INET6
2727 			struct in6_addr	in6;
2728 #endif
2729 		} src_addr;
2730 		union {
2731 			struct in_addr	in4;
2732 #ifdef USE_INET6
2733 			struct in6_addr	in6;
2734 #endif
2735 		} dst_addr;
2736 		char		zero;
2737 		char		proto;
2738 		uint16_t	len;
2739 	}	phdr;
2740 	uint32_t	sum, ip_sum;
2741 	void	*buf;
2742 	uint16_t	*l4_csum_p;
2743 	tcphdr_t	*tcp;
2744 	udphdr_t	*udp;
2745 	icmphdr_t	*icmp;
2746 #ifdef USE_INET6
2747 	struct icmp6_hdr	*icmp6;
2748 #endif
2749 	ip_t		*ip;
2750 	unsigned int	len;
2751 	int		pld_len;
2752 
2753 	/*
2754 	 * We need to pullup the packet to the single continuous buffer to avoid
2755 	 * potential misaligment of b_rptr member in mblk chain.
2756 	 */
2757 	if (pullupmsg(pkt, -1) == 0) {
2758 		cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
2759 		    " will not be computed by IPF");
2760 		return;
2761 	}
2762 
2763 	/*
2764 	 * It is guaranteed IP header starts right at b_rptr, because we are
2765 	 * working with a copy of the original packet.
2766 	 *
2767 	 * Compute pseudo header chksum for TCP and UDP.
2768 	 */
2769 	if ((fin->fin_p == IPPROTO_UDP) ||
2770 	    (fin->fin_p == IPPROTO_TCP)) {
2771 		bzero(&phdr, sizeof (phdr));
2772 #ifdef USE_INET6
2773 		if (fin->fin_v == 6) {
2774 			phdr.src_addr.in6 = fin->fin_srcip6;
2775 			phdr.dst_addr.in6 = fin->fin_dstip6;
2776 		} else {
2777 			phdr.src_addr.in4 = fin->fin_src;
2778 			phdr.dst_addr.in4 = fin->fin_dst;
2779 		}
2780 #else
2781 		phdr.src_addr.in4 = fin->fin_src;
2782 		phdr.dst_addr.in4 = fin->fin_dst;
2783 #endif
2784 		phdr.zero = (char) 0;
2785 		phdr.proto = fin->fin_p;
2786 		phdr.len = htons((uint16_t)fin->fin_dlen);
2787 		sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
2788 	} else {
2789 		sum = 0;
2790 	}
2791 
2792 	/*
2793 	 * Set pointer to the L4 chksum field in the packet, set buf pointer to
2794 	 * the L4 header start.
2795 	 */
2796 	switch (fin->fin_p) {
2797 		case IPPROTO_UDP:
2798 			udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2799 			l4_csum_p = &udp->uh_sum;
2800 			buf = udp;
2801 			break;
2802 		case IPPROTO_TCP:
2803 			tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2804 			l4_csum_p = &tcp->th_sum;
2805 			buf = tcp;
2806 			break;
2807 		case IPPROTO_ICMP:
2808 			icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2809 			l4_csum_p = &icmp->icmp_cksum;
2810 			buf = icmp;
2811 			break;
2812 #ifdef USE_INET6
2813 		case IPPROTO_ICMPV6:
2814 			icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
2815 			l4_csum_p = &icmp6->icmp6_cksum;
2816 			buf = icmp6;
2817 			break;
2818 #endif
2819 		default:
2820 			l4_csum_p = NULL;
2821 	}
2822 
2823 	/*
2824 	 * Compute L4 chksum if needed.
2825 	 */
2826 	if (l4_csum_p != NULL) {
2827 		*l4_csum_p = (uint16_t)0;
2828 		pld_len = fin->fin_dlen;
2829 		len = pkt->b_wptr - (unsigned char *)buf;
2830 		ASSERT(len == pld_len);
2831 		/*
2832 		 * Add payload sum to pseudoheader sum.
2833 		 */
2834 		sum += fr_buf_sum(buf, len);
2835 		while (sum >> 16)
2836 			sum = (sum & 0xFFFF) + (sum >> 16);
2837 
2838 		*l4_csum_p = ~((uint16_t)sum);
2839 		DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
2840 	}
2841 
2842 	/*
2843 	 * The IP header chksum is needed just for IPv4.
2844 	 */
2845 	if (fin->fin_v == 4) {
2846 		/*
2847 		 * Compute IPv4 header chksum.
2848 		 */
2849 		ip = (ip_t *)pkt->b_rptr;
2850 		ip->ip_sum = (uint16_t)0;
2851 		ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
2852 		while (ip_sum >> 16)
2853 			ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
2854 
2855 		ip->ip_sum = ~((uint16_t)ip_sum);
2856 		DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
2857 	}
2858 
2859 	return;
2860 }
2861 
2862 #endif	/* _KERNEL && SOLARIS2 >= 10 */
2863