xref: /freebsd/sys/netpfil/ipfilter/netinet/ip_sync.c (revision 1323ec57)
1 /*	$FreeBSD$	*/
2 
3 /*
4  * Copyright (C) 2012 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define        KERNEL	1
12 # define        _KERNEL	1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/file.h>
18 #if !defined(_KERNEL) && !defined(__KERNEL__)
19 # include <stdio.h>
20 # include <stdlib.h>
21 # include <string.h>
22 # define _KERNEL
23 # define KERNEL
24 # include <sys/uio.h>
25 # undef _KERNEL
26 # undef KERNEL
27 #else
28 # include <sys/systm.h>
29 # if !defined(__SVR4)
30 #  include <sys/mbuf.h>
31 # endif
32 # include <sys/select.h>
33 # ifdef __FreeBSD__
34 #  include <sys/selinfo.h>
35 # endif
36 #endif
37 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
38 # include <sys/proc.h>
39 #endif
40 #if defined(_KERNEL) && defined(__FreeBSD__)
41 # include <sys/filio.h>
42 # include <sys/fcntl.h>
43 #else
44 # include <sys/ioctl.h>
45 #endif
46 #include <sys/time.h>
47 # include <sys/protosw.h>
48 #include <sys/socket.h>
49 #if defined(__SVR4)
50 # include <sys/filio.h>
51 # include <sys/byteorder.h>
52 # ifdef _KERNEL
53 #  include <sys/dditypes.h>
54 # endif
55 # include <sys/stream.h>
56 # include <sys/kmem.h>
57 #endif
58 
59 #include <net/if.h>
60 #ifdef sun
61 # include <net/af.h>
62 #endif
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/ip.h>
66 #include <netinet/tcp.h>
67 # include <netinet/ip_var.h>
68 # include <netinet/tcp_fsm.h>
69 #include <netinet/udp.h>
70 #include <netinet/ip_icmp.h>
71 #include "netinet/ip_compat.h"
72 #include <netinet/tcpip.h>
73 #include "netinet/ip_fil.h"
74 #include "netinet/ip_nat.h"
75 #include "netinet/ip_frag.h"
76 #include "netinet/ip_state.h"
77 #include "netinet/ip_proxy.h"
78 #include "netinet/ip_sync.h"
79 #ifdef  USE_INET6
80 #include <netinet/icmp6.h>
81 #endif
82 #if defined(__FreeBSD__)
83 # include <sys/malloc.h>
84 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
85 #  include <sys/libkern.h>
86 #  include <sys/systm.h>
87 # endif
88 #endif
89 /* END OF INCLUDES */
90 
91 #if !defined(lint)
92 static const char rcsid[] = "@(#)$Id$";
93 #endif
94 
95 #define	SYNC_STATETABSZ	256
96 #define	SYNC_NATTABSZ	256
97 
98 typedef struct ipf_sync_softc_s {
99 	ipfmutex_t	ipf_syncadd;
100 	ipfmutex_t	ipsl_mutex;
101 	ipfrwlock_t	ipf_syncstate;
102 	ipfrwlock_t	ipf_syncnat;
103 #if SOLARIS && defined(_KERNEL)
104 	kcondvar_t	ipslwait;
105 #endif
106 	synclist_t	**syncstatetab;
107 	synclist_t	**syncnattab;
108 	synclogent_t	*synclog;
109 	syncupdent_t	*syncupd;
110 	u_int		ipf_sync_num;
111 	u_int		ipf_sync_wrap;
112 	u_int		sl_idx;		/* next available sync log entry */
113 	u_int		su_idx;		/* next available sync update entry */
114 	u_int		sl_tail;	/* next sync log entry to read */
115 	u_int		su_tail;	/* next sync update entry to read */
116 	int		ipf_sync_log_sz;
117 	int		ipf_sync_nat_tab_sz;
118 	int		ipf_sync_state_tab_sz;
119 	int		ipf_sync_debug;
120 	int		ipf_sync_events;
121 	u_32_t		ipf_sync_lastwakeup;
122 	int		ipf_sync_wake_interval;
123 	int		ipf_sync_event_high_wm;
124 	int		ipf_sync_queue_high_wm;
125 	int		ipf_sync_inited;
126 } ipf_sync_softc_t;
127 
128 static int ipf_sync_flush_table(ipf_sync_softc_t *, int, synclist_t **);
129 static void ipf_sync_wakeup(ipf_main_softc_t *);
130 static void ipf_sync_del(ipf_sync_softc_t *, synclist_t *);
131 static void ipf_sync_poll_wakeup(ipf_main_softc_t *);
132 static int ipf_sync_nat(ipf_main_softc_t *, synchdr_t *, void *);
133 static int ipf_sync_state(ipf_main_softc_t *, synchdr_t *, void *);
134 
135 # if !defined(sparc) && !defined(__hppa)
136 void ipf_sync_tcporder(int, struct tcpdata *);
137 void ipf_sync_natorder(int, struct nat *);
138 void ipf_sync_storder(int, struct ipstate *);
139 # endif
140 
141 
142 void *
143 ipf_sync_soft_create(ipf_main_softc_t *softc)
144 {
145 	ipf_sync_softc_t *softs;
146 
147 	KMALLOC(softs, ipf_sync_softc_t *);
148 	if (softs == NULL) {
149 		IPFERROR(110024);
150 		return (NULL);
151 	}
152 
153 	bzero((char *)softs, sizeof(*softs));
154 
155 	softs->ipf_sync_log_sz = SYNCLOG_SZ;
156 	softs->ipf_sync_nat_tab_sz = SYNC_STATETABSZ;
157 	softs->ipf_sync_state_tab_sz = SYNC_STATETABSZ;
158 	softs->ipf_sync_event_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
159 	softs->ipf_sync_queue_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
160 
161 	return (softs);
162 }
163 
164 
165 /* ------------------------------------------------------------------------ */
166 /* Function:    ipf_sync_init                                               */
167 /* Returns:     int - 0 == success, -1 == failure                           */
168 /* Parameters:  Nil                                                         */
169 /*                                                                          */
170 /* Initialise all of the locks required for the sync code and initialise    */
171 /* any data structures, as required.                                        */
172 /* ------------------------------------------------------------------------ */
173 int
174 ipf_sync_soft_init(ipf_main_softc_t *softc, void *arg)
175 {
176 	ipf_sync_softc_t *softs = arg;
177 
178 	KMALLOCS(softs->synclog, synclogent_t *,
179 		 softs->ipf_sync_log_sz * sizeof(*softs->synclog));
180 	if (softs->synclog == NULL)
181 		return (-1);
182 	bzero((char *)softs->synclog,
183 	      softs->ipf_sync_log_sz * sizeof(*softs->synclog));
184 
185 	KMALLOCS(softs->syncupd, syncupdent_t *,
186 		 softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
187 	if (softs->syncupd == NULL)
188 		return (-2);
189 	bzero((char *)softs->syncupd,
190 	      softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
191 
192 	KMALLOCS(softs->syncstatetab, synclist_t **,
193 		 softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
194 	if (softs->syncstatetab == NULL)
195 		return (-3);
196 	bzero((char *)softs->syncstatetab,
197 	      softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
198 
199 	KMALLOCS(softs->syncnattab, synclist_t **,
200 		 softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
201 	if (softs->syncnattab == NULL)
202 		return (-3);
203 	bzero((char *)softs->syncnattab,
204 	      softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
205 
206 	softs->ipf_sync_num = 1;
207 	softs->ipf_sync_wrap = 0;
208 	softs->sl_idx = 0;
209 	softs->su_idx = 0;
210 	softs->sl_tail = 0;
211 	softs->su_tail = 0;
212 	softs->ipf_sync_events = 0;
213 	softs->ipf_sync_lastwakeup = 0;
214 
215 
216 # if SOLARIS && defined(_KERNEL)
217 	cv_init(&softs->ipslwait, "ipsl condvar", CV_DRIVER, NULL);
218 # endif
219 	RWLOCK_INIT(&softs->ipf_syncstate, "add things to state sync table");
220 	RWLOCK_INIT(&softs->ipf_syncnat, "add things to nat sync table");
221 	MUTEX_INIT(&softs->ipf_syncadd, "add things to sync table");
222 	MUTEX_INIT(&softs->ipsl_mutex, "read ring lock");
223 
224 	softs->ipf_sync_inited = 1;
225 
226 	return (0);
227 }
228 
229 
230 /* ------------------------------------------------------------------------ */
231 /* Function:    ipf_sync_unload                                             */
232 /* Returns:     int - 0 == success, -1 == failure                           */
233 /* Parameters:  Nil                                                         */
234 /*                                                                          */
235 /* Destroy the locks created when initialising and free any memory in use   */
236 /* with the synchronisation tables.                                         */
237 /* ------------------------------------------------------------------------ */
238 int
239 ipf_sync_soft_fini(ipf_main_softc_t *softc, void *arg)
240 {
241 	ipf_sync_softc_t *softs = arg;
242 
243 	if (softs->syncnattab != NULL) {
244 		ipf_sync_flush_table(softs, softs->ipf_sync_nat_tab_sz,
245 				     softs->syncnattab);
246 		KFREES(softs->syncnattab,
247 		       softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
248 		softs->syncnattab = NULL;
249 	}
250 
251 	if (softs->syncstatetab != NULL) {
252 		ipf_sync_flush_table(softs, softs->ipf_sync_state_tab_sz,
253 				     softs->syncstatetab);
254 		KFREES(softs->syncstatetab,
255 		       softs->ipf_sync_state_tab_sz *
256 		       sizeof(*softs->syncstatetab));
257 		softs->syncstatetab = NULL;
258 	}
259 
260 	if (softs->syncupd != NULL) {
261 		KFREES(softs->syncupd,
262 		       softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
263 		softs->syncupd = NULL;
264 	}
265 
266 	if (softs->synclog != NULL) {
267 		KFREES(softs->synclog,
268 		       softs->ipf_sync_log_sz * sizeof(*softs->synclog));
269 		softs->synclog = NULL;
270 	}
271 
272 	if (softs->ipf_sync_inited == 1) {
273 		MUTEX_DESTROY(&softs->ipsl_mutex);
274 		MUTEX_DESTROY(&softs->ipf_syncadd);
275 		RW_DESTROY(&softs->ipf_syncnat);
276 		RW_DESTROY(&softs->ipf_syncstate);
277 		softs->ipf_sync_inited = 0;
278 	}
279 
280 	return (0);
281 }
282 
283 void
284 ipf_sync_soft_destroy(ipf_main_softc_t *softc, void *arg)
285 {
286 	ipf_sync_softc_t *softs = arg;
287 
288 	KFREE(softs);
289 }
290 
291 
292 # if !defined(sparc)
293 /* ------------------------------------------------------------------------ */
294 /* Function:    ipf_sync_tcporder                                           */
295 /* Returns:     Nil                                                         */
296 /* Parameters:  way(I) - direction of byte order conversion.                */
297 /*              td(IO) - pointer to data to be converted.                   */
298 /*                                                                          */
299 /* Do byte swapping on values in the TCP state information structure that   */
300 /* need to be used at both ends by the host in their native byte order.     */
301 /* ------------------------------------------------------------------------ */
302 void
303 ipf_sync_tcporder(int way, tcpdata_t *td)
304 {
305 	if (way) {
306 		td->td_maxwin = htons(td->td_maxwin);
307 		td->td_end = htonl(td->td_end);
308 		td->td_maxend = htonl(td->td_maxend);
309 	} else {
310 		td->td_maxwin = ntohs(td->td_maxwin);
311 		td->td_end = ntohl(td->td_end);
312 		td->td_maxend = ntohl(td->td_maxend);
313 	}
314 }
315 
316 
317 /* ------------------------------------------------------------------------ */
318 /* Function:    ipf_sync_natorder                                           */
319 /* Returns:     Nil                                                         */
320 /* Parameters:  way(I)  - direction of byte order conversion.               */
321 /*              nat(IO) - pointer to data to be converted.                  */
322 /*                                                                          */
323 /* Do byte swapping on values in the NAT data structure that need to be     */
324 /* used at both ends by the host in their native byte order.                */
325 /* ------------------------------------------------------------------------ */
326 void
327 ipf_sync_natorder(int way, nat_t *n)
328 {
329 	if (way) {
330 		n->nat_age = htonl(n->nat_age);
331 		n->nat_flags = htonl(n->nat_flags);
332 		n->nat_ipsumd = htonl(n->nat_ipsumd);
333 		n->nat_use = htonl(n->nat_use);
334 		n->nat_dir = htonl(n->nat_dir);
335 	} else {
336 		n->nat_age = ntohl(n->nat_age);
337 		n->nat_flags = ntohl(n->nat_flags);
338 		n->nat_ipsumd = ntohl(n->nat_ipsumd);
339 		n->nat_use = ntohl(n->nat_use);
340 		n->nat_dir = ntohl(n->nat_dir);
341 	}
342 }
343 
344 
345 /* ------------------------------------------------------------------------ */
346 /* Function:    ipf_sync_storder                                            */
347 /* Returns:     Nil                                                         */
348 /* Parameters:  way(I)  - direction of byte order conversion.               */
349 /*              ips(IO) - pointer to data to be converted.                  */
350 /*                                                                          */
351 /* Do byte swapping on values in the IP state data structure that need to   */
352 /* be used at both ends by the host in their native byte order.             */
353 /* ------------------------------------------------------------------------ */
354 void
355 ipf_sync_storder(int way, ipstate_t *ips)
356 {
357 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[0]);
358 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[1]);
359 
360 	if (way) {
361 		ips->is_hv = htonl(ips->is_hv);
362 		ips->is_die = htonl(ips->is_die);
363 		ips->is_pass = htonl(ips->is_pass);
364 		ips->is_flags = htonl(ips->is_flags);
365 		ips->is_opt[0] = htonl(ips->is_opt[0]);
366 		ips->is_opt[1] = htonl(ips->is_opt[1]);
367 		ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
368 		ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
369 		ips->is_sec = htons(ips->is_sec);
370 		ips->is_secmsk = htons(ips->is_secmsk);
371 		ips->is_auth = htons(ips->is_auth);
372 		ips->is_authmsk = htons(ips->is_authmsk);
373 		ips->is_s0[0] = htonl(ips->is_s0[0]);
374 		ips->is_s0[1] = htonl(ips->is_s0[1]);
375 		ips->is_smsk[0] = htons(ips->is_smsk[0]);
376 		ips->is_smsk[1] = htons(ips->is_smsk[1]);
377 	} else {
378 		ips->is_hv = ntohl(ips->is_hv);
379 		ips->is_die = ntohl(ips->is_die);
380 		ips->is_pass = ntohl(ips->is_pass);
381 		ips->is_flags = ntohl(ips->is_flags);
382 		ips->is_opt[0] = ntohl(ips->is_opt[0]);
383 		ips->is_opt[1] = ntohl(ips->is_opt[1]);
384 		ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
385 		ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
386 		ips->is_sec = ntohs(ips->is_sec);
387 		ips->is_secmsk = ntohs(ips->is_secmsk);
388 		ips->is_auth = ntohs(ips->is_auth);
389 		ips->is_authmsk = ntohs(ips->is_authmsk);
390 		ips->is_s0[0] = ntohl(ips->is_s0[0]);
391 		ips->is_s0[1] = ntohl(ips->is_s0[1]);
392 		ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
393 		ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
394 	}
395 }
396 # else /* !defined(sparc) */
397 #  define	ipf_sync_tcporder(x,y)
398 #  define	ipf_sync_natorder(x,y)
399 #  define	ipf_sync_storder(x,y)
400 # endif /* !defined(sparc) */
401 
402 
403 /* ------------------------------------------------------------------------ */
404 /* Function:    ipf_sync_write                                              */
405 /* Returns:     int    - 0 == success, else error value.                    */
406 /* Parameters:  uio(I) - pointer to information about data to write         */
407 /*                                                                          */
408 /* Moves data from user space into the kernel and uses it for updating data */
409 /* structures in the state/NAT tables.                                      */
410 /* ------------------------------------------------------------------------ */
411 int
412 ipf_sync_write(ipf_main_softc_t *softc, struct uio *uio)
413 {
414 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
415 	synchdr_t sh;
416 
417 	/*
418 	 * THIS MUST BE SUFFICIENT LARGE TO STORE
419 	 * ANY POSSIBLE DATA TYPE
420 	 */
421 	char data[2048];
422 
423 	int err = 0;
424 
425 #  if defined(__NetBSD__) || defined(__FreeBSD__)
426 	uio->uio_rw = UIO_WRITE;
427 #  endif
428 
429 	/* Try to get bytes */
430 	while (uio->uio_resid > 0) {
431 
432 		if (uio->uio_resid >= sizeof(sh)) {
433 
434 			err = UIOMOVE(&sh, sizeof(sh), UIO_WRITE, uio);
435 
436 			if (err) {
437 				if (softs->ipf_sync_debug > 2)
438 					printf("uiomove(header) failed: %d\n",
439 						err);
440 				return (err);
441 			}
442 
443 			/* convert to host order */
444 			sh.sm_magic = ntohl(sh.sm_magic);
445 			sh.sm_len = ntohl(sh.sm_len);
446 			sh.sm_num = ntohl(sh.sm_num);
447 
448 			if (softs->ipf_sync_debug > 8)
449 				printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
450 					sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
451 					sh.sm_table, sh.sm_rev, sh.sm_len,
452 					sh.sm_magic);
453 
454 			if (sh.sm_magic != SYNHDRMAGIC) {
455 				if (softs->ipf_sync_debug > 2)
456 					printf("uiomove(header) invalid %s\n",
457 						"magic");
458 				IPFERROR(110001);
459 				return (EINVAL);
460 			}
461 
462 			if (sh.sm_v != 4 && sh.sm_v != 6) {
463 				if (softs->ipf_sync_debug > 2)
464 					printf("uiomove(header) invalid %s\n",
465 						"protocol");
466 				IPFERROR(110002);
467 				return (EINVAL);
468 			}
469 
470 			if (sh.sm_cmd > SMC_MAXCMD) {
471 				if (softs->ipf_sync_debug > 2)
472 					printf("uiomove(header) invalid %s\n",
473 						"command");
474 				IPFERROR(110003);
475 				return (EINVAL);
476 			}
477 
478 
479 			if (sh.sm_table > SMC_MAXTBL) {
480 				if (softs->ipf_sync_debug > 2)
481 					printf("uiomove(header) invalid %s\n",
482 						"table");
483 				IPFERROR(110004);
484 				return (EINVAL);
485 			}
486 
487 		} else {
488 			/* unsufficient data, wait until next call */
489 			if (softs->ipf_sync_debug > 2)
490 				printf("uiomove(header) insufficient data");
491 			IPFERROR(110005);
492 			return (EAGAIN);
493 	 	}
494 
495 
496 		/*
497 		 * We have a header, so try to read the amount of data
498 		 * needed for the request
499 		 */
500 
501 		/* not supported */
502 		if (sh.sm_len == 0) {
503 			if (softs->ipf_sync_debug > 2)
504 				printf("uiomove(data zero length %s\n",
505 					"not supported");
506 			IPFERROR(110006);
507 			return (EINVAL);
508 		}
509 
510 		if (uio->uio_resid >= sh.sm_len) {
511 
512 			err = UIOMOVE(data, sh.sm_len, UIO_WRITE, uio);
513 
514 			if (err) {
515 				if (softs->ipf_sync_debug > 2)
516 					printf("uiomove(data) failed: %d\n",
517 						err);
518 				return (err);
519 			}
520 
521 			if (softs->ipf_sync_debug > 7)
522 				printf("uiomove(data) %d bytes read\n",
523 					sh.sm_len);
524 
525 			if (sh.sm_table == SMC_STATE)
526 				err = ipf_sync_state(softc, &sh, data);
527 			else if (sh.sm_table == SMC_NAT)
528 				err = ipf_sync_nat(softc, &sh, data);
529 			if (softs->ipf_sync_debug > 7)
530 				printf("[%d] Finished with error %d\n",
531 					sh.sm_num, err);
532 
533 		} else {
534 			/* insufficient data, wait until next call */
535 			if (softs->ipf_sync_debug > 2)
536 				printf("uiomove(data) %s %d bytes, got %d\n",
537 					"insufficient data, need",
538 					sh.sm_len, (int)uio->uio_resid);
539 			IPFERROR(110007);
540 			return (EAGAIN);
541 		}
542 	}
543 
544 	/* no more data */
545 	return (0);
546 }
547 
548 
549 /* ------------------------------------------------------------------------ */
550 /* Function:    ipf_sync_read                                               */
551 /* Returns:     int    - 0 == success, else error value.                    */
552 /* Parameters:  uio(O) - pointer to information about where to store data   */
553 /*                                                                          */
554 /* This function is called when a user program wants to read some data      */
555 /* for pending state/NAT updates.  If no data is available, the caller is   */
556 /* put to sleep, pending a wakeup from the "lower half" of this code.       */
557 /* ------------------------------------------------------------------------ */
558 int
559 ipf_sync_read(ipf_main_softc_t *softc, struct uio *uio)
560 {
561 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
562 	syncupdent_t *su;
563 	synclogent_t *sl;
564 	int err = 0;
565 
566 	if ((uio->uio_resid & 3) || (uio->uio_resid < 8)) {
567 		IPFERROR(110008);
568 		return (EINVAL);
569 	}
570 
571 #  if defined(__NetBSD__) || defined(__FreeBSD__)
572 	uio->uio_rw = UIO_READ;
573 #  endif
574 
575 	MUTEX_ENTER(&softs->ipsl_mutex);
576 	while ((softs->sl_tail == softs->sl_idx) &&
577 	       (softs->su_tail == softs->su_idx)) {
578 #  if defined(_KERNEL)
579 #   if SOLARIS
580 		if (!cv_wait_sig(&softs->ipslwait, &softs->ipsl_mutex.ipf_lk)) {
581 			MUTEX_EXIT(&softs->ipsl_mutex);
582 			IPFERROR(110009);
583 			return (EINTR);
584 		}
585 #   else
586 		MUTEX_EXIT(&softs->ipsl_mutex);
587 		err = SLEEP(&softs->sl_tail, "ipl sleep");
588 		if (err) {
589 			IPFERROR(110012);
590 			return (EINTR);
591 		}
592 		MUTEX_ENTER(&softs->ipsl_mutex);
593 #   endif /* SOLARIS */
594 #  endif /* _KERNEL */
595 	}
596 
597 	while ((softs->sl_tail < softs->sl_idx) &&
598 	       (uio->uio_resid > sizeof(*sl))) {
599 		sl = softs->synclog + softs->sl_tail++;
600 		MUTEX_EXIT(&softs->ipsl_mutex);
601 		err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
602 		if (err != 0)
603 			goto goterror;
604 		MUTEX_ENTER(&softs->ipsl_mutex);
605 	}
606 
607 	while ((softs->su_tail < softs->su_idx) &&
608 	       (uio->uio_resid > sizeof(*su))) {
609 		su = softs->syncupd + softs->su_tail;
610 		softs->su_tail++;
611 		MUTEX_EXIT(&softs->ipsl_mutex);
612 		err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
613 		if (err != 0)
614 			goto goterror;
615 		MUTEX_ENTER(&softs->ipsl_mutex);
616 		if (su->sup_hdr.sm_sl != NULL)
617 			su->sup_hdr.sm_sl->sl_idx = -1;
618 	}
619 	if (softs->sl_tail == softs->sl_idx)
620 		softs->sl_tail = softs->sl_idx = 0;
621 	if (softs->su_tail == softs->su_idx)
622 		softs->su_tail = softs->su_idx = 0;
623 	MUTEX_EXIT(&softs->ipsl_mutex);
624 goterror:
625 	return (err);
626 }
627 
628 
629 /* ------------------------------------------------------------------------ */
630 /* Function:    ipf_sync_state                                              */
631 /* Returns:     int    - 0 == success, else error value.                    */
632 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
633 /*              uio(I) - pointer to user data for further information       */
634 /*                                                                          */
635 /* Updates the state table according to information passed in the sync      */
636 /* header.  As required, more data is fetched from the uio structure but    */
637 /* varies depending on the contents of the sync header.  This function can  */
638 /* create a new state entry or update one.  Deletion is left to the state   */
639 /* structures being timed out correctly.                                    */
640 /* ------------------------------------------------------------------------ */
641 static int
642 ipf_sync_state(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
643 {
644 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
645 	synctcp_update_t su;
646 	ipstate_t *is, sn;
647 	synclist_t *sl;
648 	frentry_t *fr;
649 	u_int hv;
650 	int err = 0;
651 
652 	hv = sp->sm_num & (softs->ipf_sync_state_tab_sz - 1);
653 
654 	switch (sp->sm_cmd)
655 	{
656 	case SMC_CREATE :
657 
658 		bcopy(data, &sn, sizeof(sn));
659 		KMALLOC(is, ipstate_t *);
660 		if (is == NULL) {
661 			IPFERROR(110013);
662 			err = ENOMEM;
663 			break;
664 		}
665 
666 		KMALLOC(sl, synclist_t *);
667 		if (sl == NULL) {
668 			IPFERROR(110014);
669 			err = ENOMEM;
670 			KFREE(is);
671 			break;
672 		}
673 
674 		bzero((char *)is, offsetof(ipstate_t, is_die));
675 		bcopy((char *)&sn.is_die, (char *)&is->is_die,
676 		      sizeof(*is) - offsetof(ipstate_t, is_die));
677 		ipf_sync_storder(0, is);
678 
679 		/*
680 		 * We need to find the same rule on the slave as was used on
681 		 * the master to create this state entry.
682 		 */
683 		READ_ENTER(&softc->ipf_mutex);
684 		fr = ipf_getrulen(softc, IPL_LOGIPF, sn.is_group, sn.is_rulen);
685 		if (fr != NULL) {
686 			MUTEX_ENTER(&fr->fr_lock);
687 			fr->fr_ref++;
688 			fr->fr_statecnt++;
689 			MUTEX_EXIT(&fr->fr_lock);
690 		}
691 		RWLOCK_EXIT(&softc->ipf_mutex);
692 
693 		if (softs->ipf_sync_debug > 4)
694 			printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
695 
696 		is->is_rule = fr;
697 		is->is_sync = sl;
698 
699 		sl->sl_idx = -1;
700 		sl->sl_ips = is;
701 		bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
702 
703 		WRITE_ENTER(&softs->ipf_syncstate);
704 		WRITE_ENTER(&softc->ipf_state);
705 
706 		sl->sl_pnext = softs->syncstatetab + hv;
707 		sl->sl_next = softs->syncstatetab[hv];
708 		if (softs->syncstatetab[hv] != NULL)
709 			softs->syncstatetab[hv]->sl_pnext = &sl->sl_next;
710 		softs->syncstatetab[hv] = sl;
711 		MUTEX_DOWNGRADE(&softs->ipf_syncstate);
712 		ipf_state_insert(softc, is, sp->sm_rev);
713 		/*
714 		 * Do not initialise the interface pointers for the state
715 		 * entry as the full complement of interface names may not
716 		 * be present.
717 		 *
718 		 * Put this state entry on its timeout queue.
719 		 */
720 		/*fr_setstatequeue(is, sp->sm_rev);*/
721 		break;
722 
723 	case SMC_UPDATE :
724 		bcopy(data, &su, sizeof(su));
725 
726 		if (softs->ipf_sync_debug > 4)
727 			printf("[%d] Update age %lu state %d/%d \n",
728 				sp->sm_num, su.stu_age, su.stu_state[0],
729 				su.stu_state[1]);
730 
731 		READ_ENTER(&softs->ipf_syncstate);
732 		for (sl = softs->syncstatetab[hv]; (sl != NULL);
733 		     sl = sl->sl_next)
734 			if (sl->sl_hdr.sm_num == sp->sm_num)
735 				break;
736 		if (sl == NULL) {
737 			if (softs->ipf_sync_debug > 1)
738 				printf("[%d] State not found - can't update\n",
739 					sp->sm_num);
740 			RWLOCK_EXIT(&softs->ipf_syncstate);
741 			IPFERROR(110015);
742 			err = ENOENT;
743 			break;
744 		}
745 
746 		READ_ENTER(&softc->ipf_state);
747 
748 		if (softs->ipf_sync_debug > 6)
749 			printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
750 				sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
751 				sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
752 				sl->sl_hdr.sm_rev);
753 
754 		is = sl->sl_ips;
755 
756 		MUTEX_ENTER(&is->is_lock);
757 		switch (sp->sm_p)
758 		{
759 		case IPPROTO_TCP :
760 			/* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
761 			is->is_send = su.stu_data[0].td_end;
762 			is->is_maxsend = su.stu_data[0].td_maxend;
763 			is->is_maxswin = su.stu_data[0].td_maxwin;
764 			is->is_state[0] = su.stu_state[0];
765 			is->is_dend = su.stu_data[1].td_end;
766 			is->is_maxdend = su.stu_data[1].td_maxend;
767 			is->is_maxdwin = su.stu_data[1].td_maxwin;
768 			is->is_state[1] = su.stu_state[1];
769 			break;
770 		default :
771 			break;
772 		}
773 
774 		if (softs->ipf_sync_debug > 6)
775 			printf("[%d] Setting timers for state\n", sp->sm_num);
776 
777 		ipf_state_setqueue(softc, is, sp->sm_rev);
778 
779 		MUTEX_EXIT(&is->is_lock);
780 		break;
781 
782 	default :
783 		IPFERROR(110016);
784 		err = EINVAL;
785 		break;
786 	}
787 
788 	if (err == 0) {
789 		RWLOCK_EXIT(&softc->ipf_state);
790 		RWLOCK_EXIT(&softs->ipf_syncstate);
791 	}
792 
793 	if (softs->ipf_sync_debug > 6)
794 		printf("[%d] Update completed with error %d\n",
795 			sp->sm_num, err);
796 
797 	return (err);
798 }
799 
800 
801 /* ------------------------------------------------------------------------ */
802 /* Function:    ipf_sync_del                                                */
803 /* Returns:     Nil                                                         */
804 /* Parameters:  sl(I) - pointer to synclist object to delete                */
805 /*                                                                          */
806 /* Deletes an object from the synclist.                                     */
807 /* ------------------------------------------------------------------------ */
808 static void
809 ipf_sync_del(ipf_sync_softc_t *softs, synclist_t *sl)
810 {
811 	*sl->sl_pnext = sl->sl_next;
812 	if (sl->sl_next != NULL)
813 		sl->sl_next->sl_pnext = sl->sl_pnext;
814 	if (sl->sl_idx != -1)
815 		softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
816 }
817 
818 
819 /* ------------------------------------------------------------------------ */
820 /* Function:    ipf_sync_del_state                                          */
821 /* Returns:     Nil                                                         */
822 /* Parameters:  sl(I) - pointer to synclist object to delete                */
823 /*                                                                          */
824 /* Deletes an object from the synclist state table and free's its memory.   */
825 /* ------------------------------------------------------------------------ */
826 void
827 ipf_sync_del_state(void *arg, synclist_t *sl)
828 {
829 	ipf_sync_softc_t *softs = arg;
830 
831 	WRITE_ENTER(&softs->ipf_syncstate);
832 	ipf_sync_del(softs, sl);
833 	RWLOCK_EXIT(&softs->ipf_syncstate);
834 	KFREE(sl);
835 }
836 
837 
838 /* ------------------------------------------------------------------------ */
839 /* Function:    ipf_sync_del_nat                                            */
840 /* Returns:     Nil                                                         */
841 /* Parameters:  sl(I) - pointer to synclist object to delete                */
842 /*                                                                          */
843 /* Deletes an object from the synclist nat table and free's its memory.     */
844 /* ------------------------------------------------------------------------ */
845 void
846 ipf_sync_del_nat(void *arg, synclist_t *sl)
847 {
848 	ipf_sync_softc_t *softs = arg;
849 
850 	WRITE_ENTER(&softs->ipf_syncnat);
851 	ipf_sync_del(softs, sl);
852 	RWLOCK_EXIT(&softs->ipf_syncnat);
853 	KFREE(sl);
854 }
855 
856 
857 /* ------------------------------------------------------------------------ */
858 /* Function:    ipf_sync_nat                                                */
859 /* Returns:     int    - 0 == success, else error value.                    */
860 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
861 /*              uio(I) - pointer to user data for further information       */
862 /*                                                                          */
863 /* Updates the NAT  table according to information passed in the sync       */
864 /* header.  As required, more data is fetched from the uio structure but    */
865 /* varies depending on the contents of the sync header.  This function can  */
866 /* create a new NAT entry or update one.  Deletion is left to the NAT       */
867 /* structures being timed out correctly.                                    */
868 /* ------------------------------------------------------------------------ */
869 static int
870 ipf_sync_nat(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
871 {
872 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
873 	syncupdent_t su;
874 	nat_t *n, *nat;
875 	synclist_t *sl;
876 	u_int hv = 0;
877 	int err = 0;
878 
879 	READ_ENTER(&softs->ipf_syncnat);
880 
881 	switch (sp->sm_cmd)
882 	{
883 	case SMC_CREATE :
884 		KMALLOC(n, nat_t *);
885 		if (n == NULL) {
886 			IPFERROR(110017);
887 			err = ENOMEM;
888 			break;
889 		}
890 
891 		KMALLOC(sl, synclist_t *);
892 		if (sl == NULL) {
893 			IPFERROR(110018);
894 			err = ENOMEM;
895 			KFREE(n);
896 			break;
897 		}
898 
899 		nat = (nat_t *)data;
900 		bzero((char *)n, offsetof(nat_t, nat_age));
901 		bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
902 		      sizeof(*n) - offsetof(nat_t, nat_age));
903 		ipf_sync_natorder(0, n);
904 		n->nat_sync = sl;
905 		n->nat_rev = sl->sl_rev;
906 
907 		sl->sl_idx = -1;
908 		sl->sl_ipn = n;
909 		sl->sl_num = ntohl(sp->sm_num);
910 
911 		WRITE_ENTER(&softc->ipf_nat);
912 		sl->sl_pnext = softs->syncnattab + hv;
913 		sl->sl_next = softs->syncnattab[hv];
914 		if (softs->syncnattab[hv] != NULL)
915 			softs->syncnattab[hv]->sl_pnext = &sl->sl_next;
916 		softs->syncnattab[hv] = sl;
917 		(void) ipf_nat_insert(softc, softc->ipf_nat_soft, n);
918 		RWLOCK_EXIT(&softc->ipf_nat);
919 		break;
920 
921 	case SMC_UPDATE :
922 		bcopy(data, &su, sizeof(su));
923 
924 		for (sl = softs->syncnattab[hv]; (sl != NULL);
925 		     sl = sl->sl_next)
926 			if (sl->sl_hdr.sm_num == sp->sm_num)
927 				break;
928 		if (sl == NULL) {
929 			IPFERROR(110019);
930 			err = ENOENT;
931 			break;
932 		}
933 
934 		READ_ENTER(&softc->ipf_nat);
935 
936 		nat = sl->sl_ipn;
937 		nat->nat_rev = sl->sl_rev;
938 
939 		MUTEX_ENTER(&nat->nat_lock);
940 		ipf_nat_setqueue(softc, softc->ipf_nat_soft, nat);
941 		MUTEX_EXIT(&nat->nat_lock);
942 
943 		RWLOCK_EXIT(&softc->ipf_nat);
944 
945 		break;
946 
947 	default :
948 		IPFERROR(110020);
949 		err = EINVAL;
950 		break;
951 	}
952 
953 	RWLOCK_EXIT(&softs->ipf_syncnat);
954 	return (err);
955 }
956 
957 
958 /* ------------------------------------------------------------------------ */
959 /* Function:    ipf_sync_new                                                */
960 /* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
961 /*                            data structure.                               */
962 /* Parameters:  tab(I) - type of synclist_t to create                       */
963 /*              fin(I) - pointer to packet information                      */
964 /*              ptr(I) - pointer to owning object                           */
965 /*                                                                          */
966 /* Creates a new sync table entry and notifies any sleepers that it's there */
967 /* waiting to be processed.                                                 */
968 /* ------------------------------------------------------------------------ */
969 synclist_t *
970 ipf_sync_new(ipf_main_softc_t *softc, int tab, fr_info_t *fin, void *ptr)
971 {
972 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
973 	synclist_t *sl, *ss;
974 	synclogent_t *sle;
975 	u_int hv, sz;
976 
977 	if (softs->sl_idx == softs->ipf_sync_log_sz)
978 		return (NULL);
979 	KMALLOC(sl, synclist_t *);
980 	if (sl == NULL)
981 		return (NULL);
982 
983 	MUTEX_ENTER(&softs->ipf_syncadd);
984 	/*
985 	 * Get a unique number for this synclist_t.  The number is only meant
986 	 * to be unique for the lifetime of the structure and may be reused
987 	 * later.
988 	 */
989 	softs->ipf_sync_num++;
990 	if (softs->ipf_sync_num == 0) {
991 		softs->ipf_sync_num = 1;
992 		softs->ipf_sync_wrap++;
993 	}
994 
995 	/*
996 	 * Use the synch number of the object as the hash key.  Should end up
997 	 * with relatively even distribution over time.
998 	 * XXX - an attacker could lunch an DoS attack, of sorts, if they are
999 	 * the only one causing new table entries by only keeping open every
1000 	 * nth connection they make, where n is a value in the interval
1001 	 * [0, SYNC_STATETABSZ-1].
1002 	 */
1003 	switch (tab)
1004 	{
1005 	case SMC_STATE :
1006 		hv = softs->ipf_sync_num & (softs->ipf_sync_state_tab_sz - 1);
1007 		while (softs->ipf_sync_wrap != 0) {
1008 			for (ss = softs->syncstatetab[hv]; ss; ss = ss->sl_next)
1009 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1010 					break;
1011 			if (ss == NULL)
1012 				break;
1013 			softs->ipf_sync_num++;
1014 			hv = softs->ipf_sync_num &
1015 			     (softs->ipf_sync_state_tab_sz - 1);
1016 		}
1017 		sl->sl_pnext = softs->syncstatetab + hv;
1018 		sl->sl_next = softs->syncstatetab[hv];
1019 		softs->syncstatetab[hv] = sl;
1020 		break;
1021 
1022 	case SMC_NAT :
1023 		hv = softs->ipf_sync_num & (softs->ipf_sync_nat_tab_sz - 1);
1024 		while (softs->ipf_sync_wrap != 0) {
1025 			for (ss = softs->syncnattab[hv]; ss; ss = ss->sl_next)
1026 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1027 					break;
1028 			if (ss == NULL)
1029 				break;
1030 			softs->ipf_sync_num++;
1031 			hv = softs->ipf_sync_num &
1032 			     (softs->ipf_sync_nat_tab_sz - 1);
1033 		}
1034 		sl->sl_pnext = softs->syncnattab + hv;
1035 		sl->sl_next = softs->syncnattab[hv];
1036 		softs->syncnattab[hv] = sl;
1037 		break;
1038 
1039 	default :
1040 		break;
1041 	}
1042 
1043 	sl->sl_num = softs->ipf_sync_num;
1044 	MUTEX_EXIT(&softs->ipf_syncadd);
1045 
1046 	sl->sl_magic = htonl(SYNHDRMAGIC);
1047 	sl->sl_v = fin->fin_v;
1048 	sl->sl_p = fin->fin_p;
1049 	sl->sl_cmd = SMC_CREATE;
1050 	sl->sl_idx = -1;
1051 	sl->sl_table = tab;
1052 	sl->sl_rev = fin->fin_rev;
1053 	if (tab == SMC_STATE) {
1054 		sl->sl_ips = ptr;
1055 		sz = sizeof(*sl->sl_ips);
1056 	} else if (tab == SMC_NAT) {
1057 		sl->sl_ipn = ptr;
1058 		sz = sizeof(*sl->sl_ipn);
1059 	} else {
1060 		ptr = NULL;
1061 		sz = 0;
1062 	}
1063 	sl->sl_len = sz;
1064 
1065 	/*
1066 	 * Create the log entry to be read by a user daemon.  When it has been
1067 	 * finished and put on the queue, send a signal to wakeup any waiters.
1068 	 */
1069 	MUTEX_ENTER(&softs->ipf_syncadd);
1070 	sle = softs->synclog + softs->sl_idx++;
1071 	bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
1072 	      sizeof(sle->sle_hdr));
1073 	sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
1074 	sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
1075 	if (ptr != NULL) {
1076 		bcopy((char *)ptr, (char *)&sle->sle_un, sz);
1077 		if (tab == SMC_STATE) {
1078 			ipf_sync_storder(1, &sle->sle_un.sleu_ips);
1079 		} else if (tab == SMC_NAT) {
1080 			ipf_sync_natorder(1, &sle->sle_un.sleu_ipn);
1081 		}
1082 	}
1083 	MUTEX_EXIT(&softs->ipf_syncadd);
1084 
1085 	ipf_sync_wakeup(softc);
1086 	return (sl);
1087 }
1088 
1089 
1090 /* ------------------------------------------------------------------------ */
1091 /* Function:    ipf_sync_update                                             */
1092 /* Returns:     Nil                                                         */
1093 /* Parameters:  tab(I) - type of synclist_t to create                       */
1094 /*              fin(I) - pointer to packet information                      */
1095 /*              sl(I)  - pointer to synchronisation object                  */
1096 /*                                                                          */
1097 /* For outbound packets, only, create an sync update record for the user    */
1098 /* process to read.                                                         */
1099 /* ------------------------------------------------------------------------ */
1100 void
1101 ipf_sync_update(ipf_main_softc_t *softc, int tab, fr_info_t *fin,
1102 	synclist_t *sl)
1103 {
1104 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1105 	synctcp_update_t *st;
1106 	syncupdent_t *slu;
1107 	ipstate_t *ips;
1108 	nat_t *nat;
1109 	ipfrwlock_t *lock;
1110 
1111 	if (fin->fin_out == 0 || sl == NULL)
1112 		return;
1113 
1114 	if (tab == SMC_STATE) {
1115 		lock = &softs->ipf_syncstate;
1116 	} else {
1117 		lock = &softs->ipf_syncnat;
1118 	}
1119 
1120 	READ_ENTER(lock);
1121 	if (sl->sl_idx == -1) {
1122 		MUTEX_ENTER(&softs->ipf_syncadd);
1123 		slu = softs->syncupd + softs->su_idx;
1124 		sl->sl_idx = softs->su_idx++;
1125 		MUTEX_EXIT(&softs->ipf_syncadd);
1126 
1127 		bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
1128 		      sizeof(slu->sup_hdr));
1129 		slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
1130 		slu->sup_hdr.sm_sl = sl;
1131 		slu->sup_hdr.sm_cmd = SMC_UPDATE;
1132 		slu->sup_hdr.sm_table = tab;
1133 		slu->sup_hdr.sm_num = htonl(sl->sl_num);
1134 		slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
1135 		slu->sup_hdr.sm_rev = fin->fin_rev;
1136 # if 0
1137 		if (fin->fin_p == IPPROTO_TCP) {
1138 			st->stu_len[0] = 0;
1139 			st->stu_len[1] = 0;
1140 		}
1141 # endif
1142 	} else
1143 		slu = softs->syncupd + sl->sl_idx;
1144 
1145 	/*
1146 	 * Only TCP has complex timeouts, others just use default timeouts.
1147 	 * For TCP, we only need to track the connection state and window.
1148 	 */
1149 	if (fin->fin_p == IPPROTO_TCP) {
1150 		st = &slu->sup_tcp;
1151 		if (tab == SMC_STATE) {
1152 			ips = sl->sl_ips;
1153 			st->stu_age = htonl(ips->is_die);
1154 			st->stu_data[0].td_end = ips->is_send;
1155 			st->stu_data[0].td_maxend = ips->is_maxsend;
1156 			st->stu_data[0].td_maxwin = ips->is_maxswin;
1157 			st->stu_state[0] = ips->is_state[0];
1158 			st->stu_data[1].td_end = ips->is_dend;
1159 			st->stu_data[1].td_maxend = ips->is_maxdend;
1160 			st->stu_data[1].td_maxwin = ips->is_maxdwin;
1161 			st->stu_state[1] = ips->is_state[1];
1162 		} else if (tab == SMC_NAT) {
1163 			nat = sl->sl_ipn;
1164 			st->stu_age = htonl(nat->nat_age);
1165 		}
1166 	}
1167 	RWLOCK_EXIT(lock);
1168 
1169 	ipf_sync_wakeup(softc);
1170 }
1171 
1172 
1173 /* ------------------------------------------------------------------------ */
1174 /* Function:    ipf_sync_flush_table                                        */
1175 /* Returns:     int - number of entries freed by flushing table             */
1176 /* Parameters:  tabsize(I) - size of the array pointed to by table          */
1177 /*              table(I)   - pointer to sync table to empty                 */
1178 /*                                                                          */
1179 /* Walk through a table of sync entries and free each one.  It is assumed   */
1180 /* that some lock is held so that nobody else tries to access the table     */
1181 /* during this cleanup.                                                     */
1182 /* ------------------------------------------------------------------------ */
1183 static int
1184 ipf_sync_flush_table(ipf_sync_softc_t *softs, int tabsize, synclist_t **table)
1185 {
1186 	synclist_t *sl;
1187 	int i, items;
1188 
1189 	items = 0;
1190 
1191 	for (i = 0; i < tabsize; i++) {
1192 		while ((sl = table[i]) != NULL) {
1193 			switch (sl->sl_table) {
1194 			case SMC_STATE :
1195 				if (sl->sl_ips != NULL)
1196 					sl->sl_ips->is_sync = NULL;
1197 				break;
1198 			case SMC_NAT :
1199 				if (sl->sl_ipn != NULL)
1200 					sl->sl_ipn->nat_sync = NULL;
1201 				break;
1202 			}
1203 			if (sl->sl_next != NULL)
1204 				sl->sl_next->sl_pnext = sl->sl_pnext;
1205 			table[i] = sl->sl_next;
1206 			if (sl->sl_idx != -1)
1207 				softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
1208 			KFREE(sl);
1209 			items++;
1210 		}
1211 	}
1212 
1213 	return (items);
1214 }
1215 
1216 
1217 /* ------------------------------------------------------------------------ */
1218 /* Function:    ipf_sync_ioctl                                              */
1219 /* Returns:     int - 0 == success, != 0 == failure                         */
1220 /* Parameters:  data(I) - pointer to ioctl data                             */
1221 /*              cmd(I)  - ioctl command integer                             */
1222 /*              mode(I) - file mode bits used with open                     */
1223 /*                                                                          */
1224 /* This function currently does not handle any ioctls and so just returns   */
1225 /* EINVAL on all occasions.                                                 */
1226 /* ------------------------------------------------------------------------ */
1227 int
1228 ipf_sync_ioctl(ipf_main_softc_t *softc, caddr_t data, ioctlcmd_t cmd,
1229 	int mode, int uid, void *ctx)
1230 {
1231 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1232 	int error, i;
1233 	SPL_INT(s);
1234 
1235 	switch (cmd)
1236 	{
1237 	case SIOCIPFFL:
1238 		error = BCOPYIN(data, &i, sizeof(i));
1239 		if (error != 0) {
1240 			IPFERROR(110023);
1241 			error = EFAULT;
1242 			break;
1243 		}
1244 
1245 		switch (i)
1246 		{
1247 		case SMC_RLOG :
1248 			SPL_NET(s);
1249 			MUTEX_ENTER(&softs->ipsl_mutex);
1250 			i = (softs->sl_tail - softs->sl_idx) +
1251 			    (softs->su_tail - softs->su_idx);
1252 			softs->sl_idx = 0;
1253 			softs->su_idx = 0;
1254 			softs->sl_tail = 0;
1255 			softs->su_tail = 0;
1256 			MUTEX_EXIT(&softs->ipsl_mutex);
1257 			SPL_X(s);
1258 			break;
1259 
1260 		case SMC_NAT :
1261 			SPL_NET(s);
1262 			WRITE_ENTER(&softs->ipf_syncnat);
1263 			i = ipf_sync_flush_table(softs, SYNC_NATTABSZ,
1264 						 softs->syncnattab);
1265 			RWLOCK_EXIT(&softs->ipf_syncnat);
1266 			SPL_X(s);
1267 			break;
1268 
1269 		case SMC_STATE :
1270 			SPL_NET(s);
1271 			WRITE_ENTER(&softs->ipf_syncstate);
1272 			i = ipf_sync_flush_table(softs, SYNC_STATETABSZ,
1273 						 softs->syncstatetab);
1274 			RWLOCK_EXIT(&softs->ipf_syncstate);
1275 			SPL_X(s);
1276 			break;
1277 		}
1278 
1279 		error = BCOPYOUT(&i, data, sizeof(i));
1280 		if (error != 0) {
1281 			IPFERROR(110022);
1282 			error = EFAULT;
1283 		}
1284 		break;
1285 
1286 	default :
1287 		IPFERROR(110021);
1288 		error = EINVAL;
1289 		break;
1290 	}
1291 
1292 	return (error);
1293 }
1294 
1295 
1296 /* ------------------------------------------------------------------------ */
1297 /* Function:    ipf_sync_canread                                            */
1298 /* Returns:     int - 0 == success, != 0 == failure                         */
1299 /* Parameters:  Nil                                                         */
1300 /*                                                                          */
1301 /* This function provides input to the poll handler about whether or not    */
1302 /* there is data waiting to be read from the /dev/ipsync device.            */
1303 /* ------------------------------------------------------------------------ */
1304 int
1305 ipf_sync_canread(void *arg)
1306 {
1307 	ipf_sync_softc_t *softs = arg;
1308 	return (!((softs->sl_tail == softs->sl_idx) &&
1309 		 (softs->su_tail == softs->su_idx)));
1310 }
1311 
1312 
1313 /* ------------------------------------------------------------------------ */
1314 /* Function:    ipf_sync_canwrite                                           */
1315 /* Returns:     int - 1 == can always write                                 */
1316 /* Parameters:  Nil                                                         */
1317 /*                                                                          */
1318 /* This function lets the poll handler know that it is always ready willing */
1319 /* to accept write events.                                                  */
1320 /* XXX Maybe this should return false if the sync table is full?            */
1321 /* ------------------------------------------------------------------------ */
1322 int
1323 ipf_sync_canwrite(void *arg)
1324 {
1325 	return (1);
1326 }
1327 
1328 
1329 /* ------------------------------------------------------------------------ */
1330 /* Function:    ipf_sync_wakeup                                             */
1331 /* Parameters:  Nil                                                         */
1332 /* Returns:     Nil                                                         */
1333 /*                                                                          */
1334 /* This function implements the heuristics that decide how often to         */
1335 /* generate a poll wakeup for programs that are waiting for information     */
1336 /* about when they can do a read on /dev/ipsync.                            */
1337 /*                                                                          */
1338 /* There are three different considerations here:                           */
1339 /* - do not keep a program waiting too long: ipf_sync_wake_interval is the  */
1340 /*   maximum number of ipf ticks to let pass by;                            */
1341 /* - do not let the queue of ouststanding things to generate notifies for   */
1342 /*   get too full (ipf_sync_queue_high_wm is the high water mark);          */
1343 /* - do not let too many events get collapsed in before deciding that the   */
1344 /*   other host(s) need an update (ipf_sync_event_high_wm is the high water */
1345 /*   mark for this counter.)                                                */
1346 /* ------------------------------------------------------------------------ */
1347 static void
1348 ipf_sync_wakeup(ipf_main_softc_t *softc)
1349 {
1350 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1351 
1352 	softs->ipf_sync_events++;
1353 	if ((softc->ipf_ticks >
1354 	    softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval) ||
1355 	    (softs->ipf_sync_events > softs->ipf_sync_event_high_wm) ||
1356 	    ((softs->sl_tail - softs->sl_idx) >
1357 	     softs->ipf_sync_queue_high_wm) ||
1358 	    ((softs->su_tail - softs->su_idx) >
1359 	     softs->ipf_sync_queue_high_wm)) {
1360 
1361 		ipf_sync_poll_wakeup(softc);
1362 	}
1363 }
1364 
1365 
1366 /* ------------------------------------------------------------------------ */
1367 /* Function:    ipf_sync_poll_wakeup                                        */
1368 /* Parameters:  Nil                                                         */
1369 /* Returns:     Nil                                                         */
1370 /*                                                                          */
1371 /* Deliver a poll wakeup and reset counters for two of the three heuristics */
1372 /* ------------------------------------------------------------------------ */
1373 static void
1374 ipf_sync_poll_wakeup(ipf_main_softc_t *softc)
1375 {
1376 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1377 
1378 	softs->ipf_sync_events = 0;
1379 	softs->ipf_sync_lastwakeup = softc->ipf_ticks;
1380 
1381 # ifdef _KERNEL
1382 #  if SOLARIS
1383 	MUTEX_ENTER(&softs->ipsl_mutex);
1384 	cv_signal(&softs->ipslwait);
1385 	MUTEX_EXIT(&softs->ipsl_mutex);
1386 	pollwakeup(&softc->ipf_poll_head[IPL_LOGSYNC], POLLIN|POLLRDNORM);
1387 #  else
1388 	WAKEUP(&softs->sl_tail, 0);
1389 	POLLWAKEUP(IPL_LOGSYNC);
1390 #  endif
1391 # endif
1392 }
1393 
1394 
1395 /* ------------------------------------------------------------------------ */
1396 /* Function:    ipf_sync_expire                                             */
1397 /* Parameters:  Nil                                                         */
1398 /* Returns:     Nil                                                         */
1399 /*                                                                          */
1400 /* This is the function called even ipf_tick.  It implements one of the     */
1401 /* three heuristics above *IF* there are events waiting.                    */
1402 /* ------------------------------------------------------------------------ */
1403 void
1404 ipf_sync_expire(ipf_main_softc_t *softc)
1405 {
1406 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1407 
1408 	if ((softs->ipf_sync_events > 0) &&
1409 	    (softc->ipf_ticks >
1410 	     softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval)) {
1411 		ipf_sync_poll_wakeup(softc);
1412 	}
1413 }
1414