xref: /freebsd/sys/netpfil/ipfilter/netinet/ip_sync.c (revision 29363fb4)
1 
2 /*
3  * Copyright (C) 2012 by Darren Reed.
4  *
5  * See the IPFILTER.LICENCE file for details on licencing.
6  */
7 #if defined(KERNEL) || defined(_KERNEL)
8 # undef KERNEL
9 # undef _KERNEL
10 # define        KERNEL	1
11 # define        _KERNEL	1
12 #endif
13 #include <sys/errno.h>
14 #include <sys/types.h>
15 #include <sys/param.h>
16 #include <sys/file.h>
17 #if !defined(_KERNEL) && !defined(__KERNEL__)
18 # include <stdio.h>
19 # include <stdlib.h>
20 # include <string.h>
21 # define _KERNEL
22 # define KERNEL
23 # include <sys/uio.h>
24 # undef _KERNEL
25 # undef KERNEL
26 #else
27 # include <sys/systm.h>
28 # if !defined(__SVR4)
29 #  include <sys/mbuf.h>
30 # endif
31 # include <sys/select.h>
32 # ifdef __FreeBSD__
33 #  include <sys/selinfo.h>
34 # endif
35 #endif
36 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
37 # include <sys/proc.h>
38 #endif
39 #if defined(_KERNEL) && defined(__FreeBSD__)
40 # include <sys/filio.h>
41 # include <sys/fcntl.h>
42 #else
43 # include <sys/ioctl.h>
44 #endif
45 #include <sys/time.h>
46 # include <sys/protosw.h>
47 #include <sys/socket.h>
48 #if defined(__SVR4)
49 # include <sys/filio.h>
50 # include <sys/byteorder.h>
51 # ifdef _KERNEL
52 #  include <sys/dditypes.h>
53 # endif
54 # include <sys/stream.h>
55 # include <sys/kmem.h>
56 #endif
57 
58 #include <net/if.h>
59 #ifdef sun
60 # include <net/af.h>
61 #endif
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
66 # include <netinet/ip_var.h>
67 # include <netinet/tcp_fsm.h>
68 #include <netinet/udp.h>
69 #include <netinet/ip_icmp.h>
70 #include "netinet/ip_compat.h"
71 #include <netinet/tcpip.h>
72 #include "netinet/ip_fil.h"
73 #include "netinet/ip_nat.h"
74 #include "netinet/ip_frag.h"
75 #include "netinet/ip_state.h"
76 #include "netinet/ip_proxy.h"
77 #include "netinet/ip_sync.h"
78 #ifdef  USE_INET6
79 #include <netinet/icmp6.h>
80 #endif
81 #if defined(__FreeBSD__)
82 # include <sys/malloc.h>
83 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
84 #  include <sys/libkern.h>
85 #  include <sys/systm.h>
86 # endif
87 #endif
88 /* END OF INCLUDES */
89 
90 
91 #define	SYNC_STATETABSZ	256
92 #define	SYNC_NATTABSZ	256
93 
94 typedef struct ipf_sync_softc_s {
95 	ipfmutex_t	ipf_syncadd;
96 	ipfmutex_t	ipsl_mutex;
97 	ipfrwlock_t	ipf_syncstate;
98 	ipfrwlock_t	ipf_syncnat;
99 #if SOLARIS && defined(_KERNEL)
100 	kcondvar_t	ipslwait;
101 #endif
102 	synclist_t	**syncstatetab;
103 	synclist_t	**syncnattab;
104 	synclogent_t	*synclog;
105 	syncupdent_t	*syncupd;
106 	u_int		ipf_sync_num;
107 	u_int		ipf_sync_wrap;
108 	u_int		sl_idx;		/* next available sync log entry */
109 	u_int		su_idx;		/* next available sync update entry */
110 	u_int		sl_tail;	/* next sync log entry to read */
111 	u_int		su_tail;	/* next sync update entry to read */
112 	int		ipf_sync_log_sz;
113 	int		ipf_sync_nat_tab_sz;
114 	int		ipf_sync_state_tab_sz;
115 	int		ipf_sync_debug;
116 	int		ipf_sync_events;
117 	u_32_t		ipf_sync_lastwakeup;
118 	int		ipf_sync_wake_interval;
119 	int		ipf_sync_event_high_wm;
120 	int		ipf_sync_queue_high_wm;
121 	int		ipf_sync_inited;
122 } ipf_sync_softc_t;
123 
124 static int ipf_sync_flush_table(ipf_sync_softc_t *, int, synclist_t **);
125 static void ipf_sync_wakeup(ipf_main_softc_t *);
126 static void ipf_sync_del(ipf_sync_softc_t *, synclist_t *);
127 static void ipf_sync_poll_wakeup(ipf_main_softc_t *);
128 static int ipf_sync_nat(ipf_main_softc_t *, synchdr_t *, void *);
129 static int ipf_sync_state(ipf_main_softc_t *, synchdr_t *, void *);
130 
131 # if !defined(sparc) && !defined(__hppa)
132 void ipf_sync_tcporder(int, struct tcpdata *);
133 void ipf_sync_natorder(int, struct nat *);
134 void ipf_sync_storder(int, struct ipstate *);
135 # endif
136 
137 
138 void *
ipf_sync_soft_create(ipf_main_softc_t * softc)139 ipf_sync_soft_create(ipf_main_softc_t *softc)
140 {
141 	ipf_sync_softc_t *softs;
142 
143 	KMALLOC(softs, ipf_sync_softc_t *);
144 	if (softs == NULL) {
145 		IPFERROR(110024);
146 		return (NULL);
147 	}
148 
149 	bzero((char *)softs, sizeof(*softs));
150 
151 	softs->ipf_sync_log_sz = SYNCLOG_SZ;
152 	softs->ipf_sync_nat_tab_sz = SYNC_STATETABSZ;
153 	softs->ipf_sync_state_tab_sz = SYNC_STATETABSZ;
154 	softs->ipf_sync_event_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
155 	softs->ipf_sync_queue_high_wm = SYNCLOG_SZ * 100 / 90;	/* 90% */
156 
157 	return (softs);
158 }
159 
160 
161 /* ------------------------------------------------------------------------ */
162 /* Function:    ipf_sync_init                                               */
163 /* Returns:     int - 0 == success, -1 == failure                           */
164 /* Parameters:  Nil                                                         */
165 /*                                                                          */
166 /* Initialise all of the locks required for the sync code and initialise    */
167 /* any data structures, as required.                                        */
168 /* ------------------------------------------------------------------------ */
169 int
ipf_sync_soft_init(ipf_main_softc_t * softc,void * arg)170 ipf_sync_soft_init(ipf_main_softc_t *softc, void *arg)
171 {
172 	ipf_sync_softc_t *softs = arg;
173 
174 	KMALLOCS(softs->synclog, synclogent_t *,
175 		 softs->ipf_sync_log_sz * sizeof(*softs->synclog));
176 	if (softs->synclog == NULL)
177 		return (-1);
178 	bzero((char *)softs->synclog,
179 	      softs->ipf_sync_log_sz * sizeof(*softs->synclog));
180 
181 	KMALLOCS(softs->syncupd, syncupdent_t *,
182 		 softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
183 	if (softs->syncupd == NULL)
184 		return (-2);
185 	bzero((char *)softs->syncupd,
186 	      softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
187 
188 	KMALLOCS(softs->syncstatetab, synclist_t **,
189 		 softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
190 	if (softs->syncstatetab == NULL)
191 		return (-3);
192 	bzero((char *)softs->syncstatetab,
193 	      softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
194 
195 	KMALLOCS(softs->syncnattab, synclist_t **,
196 		 softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
197 	if (softs->syncnattab == NULL)
198 		return (-3);
199 	bzero((char *)softs->syncnattab,
200 	      softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
201 
202 	softs->ipf_sync_num = 1;
203 	softs->ipf_sync_wrap = 0;
204 	softs->sl_idx = 0;
205 	softs->su_idx = 0;
206 	softs->sl_tail = 0;
207 	softs->su_tail = 0;
208 	softs->ipf_sync_events = 0;
209 	softs->ipf_sync_lastwakeup = 0;
210 
211 
212 # if SOLARIS && defined(_KERNEL)
213 	cv_init(&softs->ipslwait, "ipsl condvar", CV_DRIVER, NULL);
214 # endif
215 	RWLOCK_INIT(&softs->ipf_syncstate, "add things to state sync table");
216 	RWLOCK_INIT(&softs->ipf_syncnat, "add things to nat sync table");
217 	MUTEX_INIT(&softs->ipf_syncadd, "add things to sync table");
218 	MUTEX_INIT(&softs->ipsl_mutex, "read ring lock");
219 
220 	softs->ipf_sync_inited = 1;
221 
222 	return (0);
223 }
224 
225 
226 /* ------------------------------------------------------------------------ */
227 /* Function:    ipf_sync_unload                                             */
228 /* Returns:     int - 0 == success, -1 == failure                           */
229 /* Parameters:  Nil                                                         */
230 /*                                                                          */
231 /* Destroy the locks created when initialising and free any memory in use   */
232 /* with the synchronisation tables.                                         */
233 /* ------------------------------------------------------------------------ */
234 int
ipf_sync_soft_fini(ipf_main_softc_t * softc,void * arg)235 ipf_sync_soft_fini(ipf_main_softc_t *softc, void *arg)
236 {
237 	ipf_sync_softc_t *softs = arg;
238 
239 	if (softs->syncnattab != NULL) {
240 		ipf_sync_flush_table(softs, softs->ipf_sync_nat_tab_sz,
241 				     softs->syncnattab);
242 		KFREES(softs->syncnattab,
243 		       softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
244 		softs->syncnattab = NULL;
245 	}
246 
247 	if (softs->syncstatetab != NULL) {
248 		ipf_sync_flush_table(softs, softs->ipf_sync_state_tab_sz,
249 				     softs->syncstatetab);
250 		KFREES(softs->syncstatetab,
251 		       softs->ipf_sync_state_tab_sz *
252 		       sizeof(*softs->syncstatetab));
253 		softs->syncstatetab = NULL;
254 	}
255 
256 	if (softs->syncupd != NULL) {
257 		KFREES(softs->syncupd,
258 		       softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
259 		softs->syncupd = NULL;
260 	}
261 
262 	if (softs->synclog != NULL) {
263 		KFREES(softs->synclog,
264 		       softs->ipf_sync_log_sz * sizeof(*softs->synclog));
265 		softs->synclog = NULL;
266 	}
267 
268 	if (softs->ipf_sync_inited == 1) {
269 		MUTEX_DESTROY(&softs->ipsl_mutex);
270 		MUTEX_DESTROY(&softs->ipf_syncadd);
271 		RW_DESTROY(&softs->ipf_syncnat);
272 		RW_DESTROY(&softs->ipf_syncstate);
273 		softs->ipf_sync_inited = 0;
274 	}
275 
276 	return (0);
277 }
278 
279 void
ipf_sync_soft_destroy(ipf_main_softc_t * softc,void * arg)280 ipf_sync_soft_destroy(ipf_main_softc_t *softc, void *arg)
281 {
282 	ipf_sync_softc_t *softs = arg;
283 
284 	KFREE(softs);
285 }
286 
287 
288 # if !defined(sparc)
289 /* ------------------------------------------------------------------------ */
290 /* Function:    ipf_sync_tcporder                                           */
291 /* Returns:     Nil                                                         */
292 /* Parameters:  way(I) - direction of byte order conversion.                */
293 /*              td(IO) - pointer to data to be converted.                   */
294 /*                                                                          */
295 /* Do byte swapping on values in the TCP state information structure that   */
296 /* need to be used at both ends by the host in their native byte order.     */
297 /* ------------------------------------------------------------------------ */
298 void
ipf_sync_tcporder(int way,tcpdata_t * td)299 ipf_sync_tcporder(int way, tcpdata_t *td)
300 {
301 	if (way) {
302 		td->td_maxwin = htons(td->td_maxwin);
303 		td->td_end = htonl(td->td_end);
304 		td->td_maxend = htonl(td->td_maxend);
305 	} else {
306 		td->td_maxwin = ntohs(td->td_maxwin);
307 		td->td_end = ntohl(td->td_end);
308 		td->td_maxend = ntohl(td->td_maxend);
309 	}
310 }
311 
312 
313 /* ------------------------------------------------------------------------ */
314 /* Function:    ipf_sync_natorder                                           */
315 /* Returns:     Nil                                                         */
316 /* Parameters:  way(I)  - direction of byte order conversion.               */
317 /*              nat(IO) - pointer to data to be converted.                  */
318 /*                                                                          */
319 /* Do byte swapping on values in the NAT data structure that need to be     */
320 /* used at both ends by the host in their native byte order.                */
321 /* ------------------------------------------------------------------------ */
322 void
ipf_sync_natorder(int way,nat_t * n)323 ipf_sync_natorder(int way, nat_t *n)
324 {
325 	if (way) {
326 		n->nat_age = htonl(n->nat_age);
327 		n->nat_flags = htonl(n->nat_flags);
328 		n->nat_ipsumd = htonl(n->nat_ipsumd);
329 		n->nat_use = htonl(n->nat_use);
330 		n->nat_dir = htonl(n->nat_dir);
331 	} else {
332 		n->nat_age = ntohl(n->nat_age);
333 		n->nat_flags = ntohl(n->nat_flags);
334 		n->nat_ipsumd = ntohl(n->nat_ipsumd);
335 		n->nat_use = ntohl(n->nat_use);
336 		n->nat_dir = ntohl(n->nat_dir);
337 	}
338 }
339 
340 
341 /* ------------------------------------------------------------------------ */
342 /* Function:    ipf_sync_storder                                            */
343 /* Returns:     Nil                                                         */
344 /* Parameters:  way(I)  - direction of byte order conversion.               */
345 /*              ips(IO) - pointer to data to be converted.                  */
346 /*                                                                          */
347 /* Do byte swapping on values in the IP state data structure that need to   */
348 /* be used at both ends by the host in their native byte order.             */
349 /* ------------------------------------------------------------------------ */
350 void
ipf_sync_storder(int way,ipstate_t * ips)351 ipf_sync_storder(int way, ipstate_t *ips)
352 {
353 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[0]);
354 	ipf_sync_tcporder(way, &ips->is_tcp.ts_data[1]);
355 
356 	if (way) {
357 		ips->is_hv = htonl(ips->is_hv);
358 		ips->is_die = htonl(ips->is_die);
359 		ips->is_pass = htonl(ips->is_pass);
360 		ips->is_flags = htonl(ips->is_flags);
361 		ips->is_opt[0] = htonl(ips->is_opt[0]);
362 		ips->is_opt[1] = htonl(ips->is_opt[1]);
363 		ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
364 		ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
365 		ips->is_sec = htons(ips->is_sec);
366 		ips->is_secmsk = htons(ips->is_secmsk);
367 		ips->is_auth = htons(ips->is_auth);
368 		ips->is_authmsk = htons(ips->is_authmsk);
369 		ips->is_s0[0] = htonl(ips->is_s0[0]);
370 		ips->is_s0[1] = htonl(ips->is_s0[1]);
371 		ips->is_smsk[0] = htons(ips->is_smsk[0]);
372 		ips->is_smsk[1] = htons(ips->is_smsk[1]);
373 	} else {
374 		ips->is_hv = ntohl(ips->is_hv);
375 		ips->is_die = ntohl(ips->is_die);
376 		ips->is_pass = ntohl(ips->is_pass);
377 		ips->is_flags = ntohl(ips->is_flags);
378 		ips->is_opt[0] = ntohl(ips->is_opt[0]);
379 		ips->is_opt[1] = ntohl(ips->is_opt[1]);
380 		ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
381 		ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
382 		ips->is_sec = ntohs(ips->is_sec);
383 		ips->is_secmsk = ntohs(ips->is_secmsk);
384 		ips->is_auth = ntohs(ips->is_auth);
385 		ips->is_authmsk = ntohs(ips->is_authmsk);
386 		ips->is_s0[0] = ntohl(ips->is_s0[0]);
387 		ips->is_s0[1] = ntohl(ips->is_s0[1]);
388 		ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
389 		ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
390 	}
391 }
392 # else /* !defined(sparc) */
393 #  define	ipf_sync_tcporder(x,y)
394 #  define	ipf_sync_natorder(x,y)
395 #  define	ipf_sync_storder(x,y)
396 # endif /* !defined(sparc) */
397 
398 
399 /* ------------------------------------------------------------------------ */
400 /* Function:    ipf_sync_write                                              */
401 /* Returns:     int    - 0 == success, else error value.                    */
402 /* Parameters:  uio(I) - pointer to information about data to write         */
403 /*                                                                          */
404 /* Moves data from user space into the kernel and uses it for updating data */
405 /* structures in the state/NAT tables.                                      */
406 /* ------------------------------------------------------------------------ */
407 int
ipf_sync_write(ipf_main_softc_t * softc,struct uio * uio)408 ipf_sync_write(ipf_main_softc_t *softc, struct uio *uio)
409 {
410 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
411 	synchdr_t sh;
412 
413 	/*
414 	 * THIS MUST BE SUFFICIENT LARGE TO STORE
415 	 * ANY POSSIBLE DATA TYPE
416 	 */
417 	char data[2048];
418 
419 	int err = 0;
420 
421 #  if defined(__NetBSD__) || defined(__FreeBSD__)
422 	uio->uio_rw = UIO_WRITE;
423 #  endif
424 
425 	/* Try to get bytes */
426 	while (uio->uio_resid > 0) {
427 
428 		if (uio->uio_resid >= sizeof(sh)) {
429 
430 			err = UIOMOVE(&sh, sizeof(sh), UIO_WRITE, uio);
431 
432 			if (err) {
433 				if (softs->ipf_sync_debug > 2)
434 					printf("uiomove(header) failed: %d\n",
435 						err);
436 				return (err);
437 			}
438 
439 			/* convert to host order */
440 			sh.sm_magic = ntohl(sh.sm_magic);
441 			sh.sm_len = ntohl(sh.sm_len);
442 			sh.sm_num = ntohl(sh.sm_num);
443 
444 			if (softs->ipf_sync_debug > 8)
445 				printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
446 					sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
447 					sh.sm_table, sh.sm_rev, sh.sm_len,
448 					sh.sm_magic);
449 
450 			if (sh.sm_magic != SYNHDRMAGIC) {
451 				if (softs->ipf_sync_debug > 2)
452 					printf("uiomove(header) invalid %s\n",
453 						"magic");
454 				IPFERROR(110001);
455 				return (EINVAL);
456 			}
457 
458 			if (sh.sm_v != 4 && sh.sm_v != 6) {
459 				if (softs->ipf_sync_debug > 2)
460 					printf("uiomove(header) invalid %s\n",
461 						"protocol");
462 				IPFERROR(110002);
463 				return (EINVAL);
464 			}
465 
466 			if (sh.sm_cmd > SMC_MAXCMD) {
467 				if (softs->ipf_sync_debug > 2)
468 					printf("uiomove(header) invalid %s\n",
469 						"command");
470 				IPFERROR(110003);
471 				return (EINVAL);
472 			}
473 
474 
475 			if (sh.sm_table > SMC_MAXTBL) {
476 				if (softs->ipf_sync_debug > 2)
477 					printf("uiomove(header) invalid %s\n",
478 						"table");
479 				IPFERROR(110004);
480 				return (EINVAL);
481 			}
482 
483 		} else {
484 			/* unsufficient data, wait until next call */
485 			if (softs->ipf_sync_debug > 2)
486 				printf("uiomove(header) insufficient data");
487 			IPFERROR(110005);
488 			return (EAGAIN);
489 	 	}
490 
491 
492 		/*
493 		 * We have a header, so try to read the amount of data
494 		 * needed for the request
495 		 */
496 
497 		/* not supported */
498 		if (sh.sm_len == 0) {
499 			if (softs->ipf_sync_debug > 2)
500 				printf("uiomove(data zero length %s\n",
501 					"not supported");
502 			IPFERROR(110006);
503 			return (EINVAL);
504 		}
505 
506 		if (uio->uio_resid >= sh.sm_len) {
507 
508 			err = UIOMOVE(data, sh.sm_len, UIO_WRITE, uio);
509 
510 			if (err) {
511 				if (softs->ipf_sync_debug > 2)
512 					printf("uiomove(data) failed: %d\n",
513 						err);
514 				return (err);
515 			}
516 
517 			if (softs->ipf_sync_debug > 7)
518 				printf("uiomove(data) %d bytes read\n",
519 					sh.sm_len);
520 
521 			if (sh.sm_table == SMC_STATE)
522 				err = ipf_sync_state(softc, &sh, data);
523 			else if (sh.sm_table == SMC_NAT)
524 				err = ipf_sync_nat(softc, &sh, data);
525 			if (softs->ipf_sync_debug > 7)
526 				printf("[%d] Finished with error %d\n",
527 					sh.sm_num, err);
528 
529 		} else {
530 			/* insufficient data, wait until next call */
531 			if (softs->ipf_sync_debug > 2)
532 				printf("uiomove(data) %s %d bytes, got %d\n",
533 					"insufficient data, need",
534 					sh.sm_len, (int)uio->uio_resid);
535 			IPFERROR(110007);
536 			return (EAGAIN);
537 		}
538 	}
539 
540 	/* no more data */
541 	return (0);
542 }
543 
544 
545 /* ------------------------------------------------------------------------ */
546 /* Function:    ipf_sync_read                                               */
547 /* Returns:     int    - 0 == success, else error value.                    */
548 /* Parameters:  uio(O) - pointer to information about where to store data   */
549 /*                                                                          */
550 /* This function is called when a user program wants to read some data      */
551 /* for pending state/NAT updates.  If no data is available, the caller is   */
552 /* put to sleep, pending a wakeup from the "lower half" of this code.       */
553 /* ------------------------------------------------------------------------ */
554 int
ipf_sync_read(ipf_main_softc_t * softc,struct uio * uio)555 ipf_sync_read(ipf_main_softc_t *softc, struct uio *uio)
556 {
557 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
558 	syncupdent_t *su;
559 	synclogent_t *sl;
560 	int err = 0;
561 
562 	if ((uio->uio_resid & 3) || (uio->uio_resid < 8)) {
563 		IPFERROR(110008);
564 		return (EINVAL);
565 	}
566 
567 #  if defined(__NetBSD__) || defined(__FreeBSD__)
568 	uio->uio_rw = UIO_READ;
569 #  endif
570 
571 	MUTEX_ENTER(&softs->ipsl_mutex);
572 	while ((softs->sl_tail == softs->sl_idx) &&
573 	       (softs->su_tail == softs->su_idx)) {
574 #  if defined(_KERNEL)
575 #   if SOLARIS
576 		if (!cv_wait_sig(&softs->ipslwait, &softs->ipsl_mutex.ipf_lk)) {
577 			MUTEX_EXIT(&softs->ipsl_mutex);
578 			IPFERROR(110009);
579 			return (EINTR);
580 		}
581 #   else
582 		MUTEX_EXIT(&softs->ipsl_mutex);
583 		err = SLEEP(&softs->sl_tail, "ipl sleep");
584 		if (err) {
585 			IPFERROR(110012);
586 			return (EINTR);
587 		}
588 		MUTEX_ENTER(&softs->ipsl_mutex);
589 #   endif /* SOLARIS */
590 #  endif /* _KERNEL */
591 	}
592 
593 	while ((softs->sl_tail < softs->sl_idx) &&
594 	       (uio->uio_resid > sizeof(*sl))) {
595 		sl = softs->synclog + softs->sl_tail++;
596 		MUTEX_EXIT(&softs->ipsl_mutex);
597 		err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
598 		if (err != 0)
599 			goto goterror;
600 		MUTEX_ENTER(&softs->ipsl_mutex);
601 	}
602 
603 	while ((softs->su_tail < softs->su_idx) &&
604 	       (uio->uio_resid > sizeof(*su))) {
605 		su = softs->syncupd + softs->su_tail;
606 		softs->su_tail++;
607 		MUTEX_EXIT(&softs->ipsl_mutex);
608 		err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
609 		if (err != 0)
610 			goto goterror;
611 		MUTEX_ENTER(&softs->ipsl_mutex);
612 		if (su->sup_hdr.sm_sl != NULL)
613 			su->sup_hdr.sm_sl->sl_idx = -1;
614 	}
615 	if (softs->sl_tail == softs->sl_idx)
616 		softs->sl_tail = softs->sl_idx = 0;
617 	if (softs->su_tail == softs->su_idx)
618 		softs->su_tail = softs->su_idx = 0;
619 	MUTEX_EXIT(&softs->ipsl_mutex);
620 goterror:
621 	return (err);
622 }
623 
624 
625 /* ------------------------------------------------------------------------ */
626 /* Function:    ipf_sync_state                                              */
627 /* Returns:     int    - 0 == success, else error value.                    */
628 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
629 /*              uio(I) - pointer to user data for further information       */
630 /*                                                                          */
631 /* Updates the state table according to information passed in the sync      */
632 /* header.  As required, more data is fetched from the uio structure but    */
633 /* varies depending on the contents of the sync header.  This function can  */
634 /* create a new state entry or update one.  Deletion is left to the state   */
635 /* structures being timed out correctly.                                    */
636 /* ------------------------------------------------------------------------ */
637 static int
ipf_sync_state(ipf_main_softc_t * softc,synchdr_t * sp,void * data)638 ipf_sync_state(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
639 {
640 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
641 	synctcp_update_t su;
642 	ipstate_t *is, sn;
643 	synclist_t *sl;
644 	frentry_t *fr;
645 	u_int hv;
646 	int err = 0;
647 
648 	hv = sp->sm_num & (softs->ipf_sync_state_tab_sz - 1);
649 
650 	switch (sp->sm_cmd)
651 	{
652 	case SMC_CREATE :
653 
654 		bcopy(data, &sn, sizeof(sn));
655 		KMALLOC(is, ipstate_t *);
656 		if (is == NULL) {
657 			IPFERROR(110013);
658 			err = ENOMEM;
659 			break;
660 		}
661 
662 		KMALLOC(sl, synclist_t *);
663 		if (sl == NULL) {
664 			IPFERROR(110014);
665 			err = ENOMEM;
666 			KFREE(is);
667 			break;
668 		}
669 
670 		bzero((char *)is, offsetof(ipstate_t, is_die));
671 		bcopy((char *)&sn.is_die, (char *)&is->is_die,
672 		      sizeof(*is) - offsetof(ipstate_t, is_die));
673 		ipf_sync_storder(0, is);
674 
675 		/*
676 		 * We need to find the same rule on the slave as was used on
677 		 * the master to create this state entry.
678 		 */
679 		READ_ENTER(&softc->ipf_mutex);
680 		fr = ipf_getrulen(softc, IPL_LOGIPF, sn.is_group, sn.is_rulen);
681 		if (fr != NULL) {
682 			MUTEX_ENTER(&fr->fr_lock);
683 			fr->fr_ref++;
684 			fr->fr_statecnt++;
685 			MUTEX_EXIT(&fr->fr_lock);
686 		}
687 		RWLOCK_EXIT(&softc->ipf_mutex);
688 
689 		if (softs->ipf_sync_debug > 4)
690 			printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
691 
692 		is->is_rule = fr;
693 		is->is_sync = sl;
694 
695 		sl->sl_idx = -1;
696 		sl->sl_ips = is;
697 		bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
698 
699 		WRITE_ENTER(&softs->ipf_syncstate);
700 		WRITE_ENTER(&softc->ipf_state);
701 
702 		sl->sl_pnext = softs->syncstatetab + hv;
703 		sl->sl_next = softs->syncstatetab[hv];
704 		if (softs->syncstatetab[hv] != NULL)
705 			softs->syncstatetab[hv]->sl_pnext = &sl->sl_next;
706 		softs->syncstatetab[hv] = sl;
707 		MUTEX_DOWNGRADE(&softs->ipf_syncstate);
708 		ipf_state_insert(softc, is, sp->sm_rev);
709 		/*
710 		 * Do not initialise the interface pointers for the state
711 		 * entry as the full complement of interface names may not
712 		 * be present.
713 		 *
714 		 * Put this state entry on its timeout queue.
715 		 */
716 		/*fr_setstatequeue(is, sp->sm_rev);*/
717 		break;
718 
719 	case SMC_UPDATE :
720 		bcopy(data, &su, sizeof(su));
721 
722 		if (softs->ipf_sync_debug > 4)
723 			printf("[%d] Update age %lu state %d/%d \n",
724 				sp->sm_num, su.stu_age, su.stu_state[0],
725 				su.stu_state[1]);
726 
727 		READ_ENTER(&softs->ipf_syncstate);
728 		for (sl = softs->syncstatetab[hv]; (sl != NULL);
729 		     sl = sl->sl_next)
730 			if (sl->sl_hdr.sm_num == sp->sm_num)
731 				break;
732 		if (sl == NULL) {
733 			if (softs->ipf_sync_debug > 1)
734 				printf("[%d] State not found - can't update\n",
735 					sp->sm_num);
736 			RWLOCK_EXIT(&softs->ipf_syncstate);
737 			IPFERROR(110015);
738 			err = ENOENT;
739 			break;
740 		}
741 
742 		READ_ENTER(&softc->ipf_state);
743 
744 		if (softs->ipf_sync_debug > 6)
745 			printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
746 				sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
747 				sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
748 				sl->sl_hdr.sm_rev);
749 
750 		is = sl->sl_ips;
751 
752 		MUTEX_ENTER(&is->is_lock);
753 		switch (sp->sm_p)
754 		{
755 		case IPPROTO_TCP :
756 			/* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
757 			is->is_send = su.stu_data[0].td_end;
758 			is->is_maxsend = su.stu_data[0].td_maxend;
759 			is->is_maxswin = su.stu_data[0].td_maxwin;
760 			is->is_state[0] = su.stu_state[0];
761 			is->is_dend = su.stu_data[1].td_end;
762 			is->is_maxdend = su.stu_data[1].td_maxend;
763 			is->is_maxdwin = su.stu_data[1].td_maxwin;
764 			is->is_state[1] = su.stu_state[1];
765 			break;
766 		default :
767 			break;
768 		}
769 
770 		if (softs->ipf_sync_debug > 6)
771 			printf("[%d] Setting timers for state\n", sp->sm_num);
772 
773 		ipf_state_setqueue(softc, is, sp->sm_rev);
774 
775 		MUTEX_EXIT(&is->is_lock);
776 		break;
777 
778 	default :
779 		IPFERROR(110016);
780 		err = EINVAL;
781 		break;
782 	}
783 
784 	if (err == 0) {
785 		RWLOCK_EXIT(&softc->ipf_state);
786 		RWLOCK_EXIT(&softs->ipf_syncstate);
787 	}
788 
789 	if (softs->ipf_sync_debug > 6)
790 		printf("[%d] Update completed with error %d\n",
791 			sp->sm_num, err);
792 
793 	return (err);
794 }
795 
796 
797 /* ------------------------------------------------------------------------ */
798 /* Function:    ipf_sync_del                                                */
799 /* Returns:     Nil                                                         */
800 /* Parameters:  sl(I) - pointer to synclist object to delete                */
801 /*                                                                          */
802 /* Deletes an object from the synclist.                                     */
803 /* ------------------------------------------------------------------------ */
804 static void
ipf_sync_del(ipf_sync_softc_t * softs,synclist_t * sl)805 ipf_sync_del(ipf_sync_softc_t *softs, synclist_t *sl)
806 {
807 	*sl->sl_pnext = sl->sl_next;
808 	if (sl->sl_next != NULL)
809 		sl->sl_next->sl_pnext = sl->sl_pnext;
810 	if (sl->sl_idx != -1)
811 		softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
812 }
813 
814 
815 /* ------------------------------------------------------------------------ */
816 /* Function:    ipf_sync_del_state                                          */
817 /* Returns:     Nil                                                         */
818 /* Parameters:  sl(I) - pointer to synclist object to delete                */
819 /*                                                                          */
820 /* Deletes an object from the synclist state table and free's its memory.   */
821 /* ------------------------------------------------------------------------ */
822 void
ipf_sync_del_state(void * arg,synclist_t * sl)823 ipf_sync_del_state(void *arg, synclist_t *sl)
824 {
825 	ipf_sync_softc_t *softs = arg;
826 
827 	WRITE_ENTER(&softs->ipf_syncstate);
828 	ipf_sync_del(softs, sl);
829 	RWLOCK_EXIT(&softs->ipf_syncstate);
830 	KFREE(sl);
831 }
832 
833 
834 /* ------------------------------------------------------------------------ */
835 /* Function:    ipf_sync_del_nat                                            */
836 /* Returns:     Nil                                                         */
837 /* Parameters:  sl(I) - pointer to synclist object to delete                */
838 /*                                                                          */
839 /* Deletes an object from the synclist nat table and free's its memory.     */
840 /* ------------------------------------------------------------------------ */
841 void
ipf_sync_del_nat(void * arg,synclist_t * sl)842 ipf_sync_del_nat(void *arg, synclist_t *sl)
843 {
844 	ipf_sync_softc_t *softs = arg;
845 
846 	WRITE_ENTER(&softs->ipf_syncnat);
847 	ipf_sync_del(softs, sl);
848 	RWLOCK_EXIT(&softs->ipf_syncnat);
849 	KFREE(sl);
850 }
851 
852 
853 /* ------------------------------------------------------------------------ */
854 /* Function:    ipf_sync_nat                                                */
855 /* Returns:     int    - 0 == success, else error value.                    */
856 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
857 /*              uio(I) - pointer to user data for further information       */
858 /*                                                                          */
859 /* Updates the NAT  table according to information passed in the sync       */
860 /* header.  As required, more data is fetched from the uio structure but    */
861 /* varies depending on the contents of the sync header.  This function can  */
862 /* create a new NAT entry or update one.  Deletion is left to the NAT       */
863 /* structures being timed out correctly.                                    */
864 /* ------------------------------------------------------------------------ */
865 static int
ipf_sync_nat(ipf_main_softc_t * softc,synchdr_t * sp,void * data)866 ipf_sync_nat(ipf_main_softc_t *softc, synchdr_t *sp, void *data)
867 {
868 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
869 	syncupdent_t su;
870 	nat_t *n, *nat;
871 	synclist_t *sl;
872 	u_int hv = 0;
873 	int err = 0;
874 
875 	READ_ENTER(&softs->ipf_syncnat);
876 
877 	switch (sp->sm_cmd)
878 	{
879 	case SMC_CREATE :
880 		KMALLOC(n, nat_t *);
881 		if (n == NULL) {
882 			IPFERROR(110017);
883 			err = ENOMEM;
884 			break;
885 		}
886 
887 		KMALLOC(sl, synclist_t *);
888 		if (sl == NULL) {
889 			IPFERROR(110018);
890 			err = ENOMEM;
891 			KFREE(n);
892 			break;
893 		}
894 
895 		nat = (nat_t *)data;
896 		bzero((char *)n, offsetof(nat_t, nat_age));
897 		bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
898 		      sizeof(*n) - offsetof(nat_t, nat_age));
899 		ipf_sync_natorder(0, n);
900 		n->nat_sync = sl;
901 		n->nat_rev = sl->sl_rev;
902 
903 		sl->sl_idx = -1;
904 		sl->sl_ipn = n;
905 		sl->sl_num = ntohl(sp->sm_num);
906 
907 		WRITE_ENTER(&softc->ipf_nat);
908 		sl->sl_pnext = softs->syncnattab + hv;
909 		sl->sl_next = softs->syncnattab[hv];
910 		if (softs->syncnattab[hv] != NULL)
911 			softs->syncnattab[hv]->sl_pnext = &sl->sl_next;
912 		softs->syncnattab[hv] = sl;
913 		(void) ipf_nat_insert(softc, softc->ipf_nat_soft, n);
914 		RWLOCK_EXIT(&softc->ipf_nat);
915 		break;
916 
917 	case SMC_UPDATE :
918 		bcopy(data, &su, sizeof(su));
919 
920 		for (sl = softs->syncnattab[hv]; (sl != NULL);
921 		     sl = sl->sl_next)
922 			if (sl->sl_hdr.sm_num == sp->sm_num)
923 				break;
924 		if (sl == NULL) {
925 			IPFERROR(110019);
926 			err = ENOENT;
927 			break;
928 		}
929 
930 		READ_ENTER(&softc->ipf_nat);
931 
932 		nat = sl->sl_ipn;
933 		nat->nat_rev = sl->sl_rev;
934 
935 		MUTEX_ENTER(&nat->nat_lock);
936 		ipf_nat_setqueue(softc, softc->ipf_nat_soft, nat);
937 		MUTEX_EXIT(&nat->nat_lock);
938 
939 		RWLOCK_EXIT(&softc->ipf_nat);
940 
941 		break;
942 
943 	default :
944 		IPFERROR(110020);
945 		err = EINVAL;
946 		break;
947 	}
948 
949 	RWLOCK_EXIT(&softs->ipf_syncnat);
950 	return (err);
951 }
952 
953 
954 /* ------------------------------------------------------------------------ */
955 /* Function:    ipf_sync_new                                                */
956 /* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
957 /*                            data structure.                               */
958 /* Parameters:  tab(I) - type of synclist_t to create                       */
959 /*              fin(I) - pointer to packet information                      */
960 /*              ptr(I) - pointer to owning object                           */
961 /*                                                                          */
962 /* Creates a new sync table entry and notifies any sleepers that it's there */
963 /* waiting to be processed.                                                 */
964 /* ------------------------------------------------------------------------ */
965 synclist_t *
ipf_sync_new(ipf_main_softc_t * softc,int tab,fr_info_t * fin,void * ptr)966 ipf_sync_new(ipf_main_softc_t *softc, int tab, fr_info_t *fin, void *ptr)
967 {
968 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
969 	synclist_t *sl, *ss;
970 	synclogent_t *sle;
971 	u_int hv, sz;
972 
973 	if (softs->sl_idx == softs->ipf_sync_log_sz)
974 		return (NULL);
975 	KMALLOC(sl, synclist_t *);
976 	if (sl == NULL)
977 		return (NULL);
978 
979 	MUTEX_ENTER(&softs->ipf_syncadd);
980 	/*
981 	 * Get a unique number for this synclist_t.  The number is only meant
982 	 * to be unique for the lifetime of the structure and may be reused
983 	 * later.
984 	 */
985 	softs->ipf_sync_num++;
986 	if (softs->ipf_sync_num == 0) {
987 		softs->ipf_sync_num = 1;
988 		softs->ipf_sync_wrap++;
989 	}
990 
991 	/*
992 	 * Use the synch number of the object as the hash key.  Should end up
993 	 * with relatively even distribution over time.
994 	 * XXX - an attacker could lunch an DoS attack, of sorts, if they are
995 	 * the only one causing new table entries by only keeping open every
996 	 * nth connection they make, where n is a value in the interval
997 	 * [0, SYNC_STATETABSZ-1].
998 	 */
999 	switch (tab)
1000 	{
1001 	case SMC_STATE :
1002 		hv = softs->ipf_sync_num & (softs->ipf_sync_state_tab_sz - 1);
1003 		while (softs->ipf_sync_wrap != 0) {
1004 			for (ss = softs->syncstatetab[hv]; ss; ss = ss->sl_next)
1005 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1006 					break;
1007 			if (ss == NULL)
1008 				break;
1009 			softs->ipf_sync_num++;
1010 			hv = softs->ipf_sync_num &
1011 			     (softs->ipf_sync_state_tab_sz - 1);
1012 		}
1013 		sl->sl_pnext = softs->syncstatetab + hv;
1014 		sl->sl_next = softs->syncstatetab[hv];
1015 		softs->syncstatetab[hv] = sl;
1016 		break;
1017 
1018 	case SMC_NAT :
1019 		hv = softs->ipf_sync_num & (softs->ipf_sync_nat_tab_sz - 1);
1020 		while (softs->ipf_sync_wrap != 0) {
1021 			for (ss = softs->syncnattab[hv]; ss; ss = ss->sl_next)
1022 				if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1023 					break;
1024 			if (ss == NULL)
1025 				break;
1026 			softs->ipf_sync_num++;
1027 			hv = softs->ipf_sync_num &
1028 			     (softs->ipf_sync_nat_tab_sz - 1);
1029 		}
1030 		sl->sl_pnext = softs->syncnattab + hv;
1031 		sl->sl_next = softs->syncnattab[hv];
1032 		softs->syncnattab[hv] = sl;
1033 		break;
1034 
1035 	default :
1036 		break;
1037 	}
1038 
1039 	sl->sl_num = softs->ipf_sync_num;
1040 	MUTEX_EXIT(&softs->ipf_syncadd);
1041 
1042 	sl->sl_magic = htonl(SYNHDRMAGIC);
1043 	sl->sl_v = fin->fin_v;
1044 	sl->sl_p = fin->fin_p;
1045 	sl->sl_cmd = SMC_CREATE;
1046 	sl->sl_idx = -1;
1047 	sl->sl_table = tab;
1048 	sl->sl_rev = fin->fin_rev;
1049 	if (tab == SMC_STATE) {
1050 		sl->sl_ips = ptr;
1051 		sz = sizeof(*sl->sl_ips);
1052 	} else if (tab == SMC_NAT) {
1053 		sl->sl_ipn = ptr;
1054 		sz = sizeof(*sl->sl_ipn);
1055 	} else {
1056 		ptr = NULL;
1057 		sz = 0;
1058 	}
1059 	sl->sl_len = sz;
1060 
1061 	/*
1062 	 * Create the log entry to be read by a user daemon.  When it has been
1063 	 * finished and put on the queue, send a signal to wakeup any waiters.
1064 	 */
1065 	MUTEX_ENTER(&softs->ipf_syncadd);
1066 	sle = softs->synclog + softs->sl_idx++;
1067 	bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
1068 	      sizeof(sle->sle_hdr));
1069 	sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
1070 	sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
1071 	if (ptr != NULL) {
1072 		bcopy((char *)ptr, (char *)&sle->sle_un, sz);
1073 		if (tab == SMC_STATE) {
1074 			ipf_sync_storder(1, &sle->sle_un.sleu_ips);
1075 		} else if (tab == SMC_NAT) {
1076 			ipf_sync_natorder(1, &sle->sle_un.sleu_ipn);
1077 		}
1078 	}
1079 	MUTEX_EXIT(&softs->ipf_syncadd);
1080 
1081 	ipf_sync_wakeup(softc);
1082 	return (sl);
1083 }
1084 
1085 
1086 /* ------------------------------------------------------------------------ */
1087 /* Function:    ipf_sync_update                                             */
1088 /* Returns:     Nil                                                         */
1089 /* Parameters:  tab(I) - type of synclist_t to create                       */
1090 /*              fin(I) - pointer to packet information                      */
1091 /*              sl(I)  - pointer to synchronisation object                  */
1092 /*                                                                          */
1093 /* For outbound packets, only, create an sync update record for the user    */
1094 /* process to read.                                                         */
1095 /* ------------------------------------------------------------------------ */
1096 void
ipf_sync_update(ipf_main_softc_t * softc,int tab,fr_info_t * fin,synclist_t * sl)1097 ipf_sync_update(ipf_main_softc_t *softc, int tab, fr_info_t *fin,
1098 	synclist_t *sl)
1099 {
1100 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1101 	synctcp_update_t *st;
1102 	syncupdent_t *slu;
1103 	ipstate_t *ips;
1104 	nat_t *nat;
1105 	ipfrwlock_t *lock;
1106 
1107 	if (fin->fin_out == 0 || sl == NULL)
1108 		return;
1109 
1110 	if (tab == SMC_STATE) {
1111 		lock = &softs->ipf_syncstate;
1112 	} else {
1113 		lock = &softs->ipf_syncnat;
1114 	}
1115 
1116 	READ_ENTER(lock);
1117 	if (sl->sl_idx == -1) {
1118 		MUTEX_ENTER(&softs->ipf_syncadd);
1119 		slu = softs->syncupd + softs->su_idx;
1120 		sl->sl_idx = softs->su_idx++;
1121 		MUTEX_EXIT(&softs->ipf_syncadd);
1122 
1123 		bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
1124 		      sizeof(slu->sup_hdr));
1125 		slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
1126 		slu->sup_hdr.sm_sl = sl;
1127 		slu->sup_hdr.sm_cmd = SMC_UPDATE;
1128 		slu->sup_hdr.sm_table = tab;
1129 		slu->sup_hdr.sm_num = htonl(sl->sl_num);
1130 		slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
1131 		slu->sup_hdr.sm_rev = fin->fin_rev;
1132 # if 0
1133 		if (fin->fin_p == IPPROTO_TCP) {
1134 			st->stu_len[0] = 0;
1135 			st->stu_len[1] = 0;
1136 		}
1137 # endif
1138 	} else
1139 		slu = softs->syncupd + sl->sl_idx;
1140 
1141 	/*
1142 	 * Only TCP has complex timeouts, others just use default timeouts.
1143 	 * For TCP, we only need to track the connection state and window.
1144 	 */
1145 	if (fin->fin_p == IPPROTO_TCP) {
1146 		st = &slu->sup_tcp;
1147 		if (tab == SMC_STATE) {
1148 			ips = sl->sl_ips;
1149 			st->stu_age = htonl(ips->is_die);
1150 			st->stu_data[0].td_end = ips->is_send;
1151 			st->stu_data[0].td_maxend = ips->is_maxsend;
1152 			st->stu_data[0].td_maxwin = ips->is_maxswin;
1153 			st->stu_state[0] = ips->is_state[0];
1154 			st->stu_data[1].td_end = ips->is_dend;
1155 			st->stu_data[1].td_maxend = ips->is_maxdend;
1156 			st->stu_data[1].td_maxwin = ips->is_maxdwin;
1157 			st->stu_state[1] = ips->is_state[1];
1158 		} else if (tab == SMC_NAT) {
1159 			nat = sl->sl_ipn;
1160 			st->stu_age = htonl(nat->nat_age);
1161 		}
1162 	}
1163 	RWLOCK_EXIT(lock);
1164 
1165 	ipf_sync_wakeup(softc);
1166 }
1167 
1168 
1169 /* ------------------------------------------------------------------------ */
1170 /* Function:    ipf_sync_flush_table                                        */
1171 /* Returns:     int - number of entries freed by flushing table             */
1172 /* Parameters:  tabsize(I) - size of the array pointed to by table          */
1173 /*              table(I)   - pointer to sync table to empty                 */
1174 /*                                                                          */
1175 /* Walk through a table of sync entries and free each one.  It is assumed   */
1176 /* that some lock is held so that nobody else tries to access the table     */
1177 /* during this cleanup.                                                     */
1178 /* ------------------------------------------------------------------------ */
1179 static int
ipf_sync_flush_table(ipf_sync_softc_t * softs,int tabsize,synclist_t ** table)1180 ipf_sync_flush_table(ipf_sync_softc_t *softs, int tabsize, synclist_t **table)
1181 {
1182 	synclist_t *sl;
1183 	int i, items;
1184 
1185 	items = 0;
1186 
1187 	for (i = 0; i < tabsize; i++) {
1188 		while ((sl = table[i]) != NULL) {
1189 			switch (sl->sl_table) {
1190 			case SMC_STATE :
1191 				if (sl->sl_ips != NULL)
1192 					sl->sl_ips->is_sync = NULL;
1193 				break;
1194 			case SMC_NAT :
1195 				if (sl->sl_ipn != NULL)
1196 					sl->sl_ipn->nat_sync = NULL;
1197 				break;
1198 			}
1199 			if (sl->sl_next != NULL)
1200 				sl->sl_next->sl_pnext = sl->sl_pnext;
1201 			table[i] = sl->sl_next;
1202 			if (sl->sl_idx != -1)
1203 				softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
1204 			KFREE(sl);
1205 			items++;
1206 		}
1207 	}
1208 
1209 	return (items);
1210 }
1211 
1212 
1213 /* ------------------------------------------------------------------------ */
1214 /* Function:    ipf_sync_ioctl                                              */
1215 /* Returns:     int - 0 == success, != 0 == failure                         */
1216 /* Parameters:  data(I) - pointer to ioctl data                             */
1217 /*              cmd(I)  - ioctl command integer                             */
1218 /*              mode(I) - file mode bits used with open                     */
1219 /*                                                                          */
1220 /* This function currently does not handle any ioctls and so just returns   */
1221 /* EINVAL on all occasions.                                                 */
1222 /* ------------------------------------------------------------------------ */
1223 int
ipf_sync_ioctl(ipf_main_softc_t * softc,caddr_t data,ioctlcmd_t cmd,int mode,int uid,void * ctx)1224 ipf_sync_ioctl(ipf_main_softc_t *softc, caddr_t data, ioctlcmd_t cmd,
1225 	int mode, int uid, void *ctx)
1226 {
1227 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1228 	int error, i;
1229 	SPL_INT(s);
1230 
1231 	switch (cmd)
1232 	{
1233 	case SIOCIPFFL:
1234 		error = BCOPYIN(data, &i, sizeof(i));
1235 		if (error != 0) {
1236 			IPFERROR(110023);
1237 			error = EFAULT;
1238 			break;
1239 		}
1240 
1241 		switch (i)
1242 		{
1243 		case SMC_RLOG :
1244 			SPL_NET(s);
1245 			MUTEX_ENTER(&softs->ipsl_mutex);
1246 			i = (softs->sl_tail - softs->sl_idx) +
1247 			    (softs->su_tail - softs->su_idx);
1248 			softs->sl_idx = 0;
1249 			softs->su_idx = 0;
1250 			softs->sl_tail = 0;
1251 			softs->su_tail = 0;
1252 			MUTEX_EXIT(&softs->ipsl_mutex);
1253 			SPL_X(s);
1254 			break;
1255 
1256 		case SMC_NAT :
1257 			SPL_NET(s);
1258 			WRITE_ENTER(&softs->ipf_syncnat);
1259 			i = ipf_sync_flush_table(softs, SYNC_NATTABSZ,
1260 						 softs->syncnattab);
1261 			RWLOCK_EXIT(&softs->ipf_syncnat);
1262 			SPL_X(s);
1263 			break;
1264 
1265 		case SMC_STATE :
1266 			SPL_NET(s);
1267 			WRITE_ENTER(&softs->ipf_syncstate);
1268 			i = ipf_sync_flush_table(softs, SYNC_STATETABSZ,
1269 						 softs->syncstatetab);
1270 			RWLOCK_EXIT(&softs->ipf_syncstate);
1271 			SPL_X(s);
1272 			break;
1273 		}
1274 
1275 		error = BCOPYOUT(&i, data, sizeof(i));
1276 		if (error != 0) {
1277 			IPFERROR(110022);
1278 			error = EFAULT;
1279 		}
1280 		break;
1281 
1282 	default :
1283 		IPFERROR(110021);
1284 		error = EINVAL;
1285 		break;
1286 	}
1287 
1288 	return (error);
1289 }
1290 
1291 
1292 /* ------------------------------------------------------------------------ */
1293 /* Function:    ipf_sync_canread                                            */
1294 /* Returns:     int - 0 == success, != 0 == failure                         */
1295 /* Parameters:  Nil                                                         */
1296 /*                                                                          */
1297 /* This function provides input to the poll handler about whether or not    */
1298 /* there is data waiting to be read from the /dev/ipsync device.            */
1299 /* ------------------------------------------------------------------------ */
1300 int
ipf_sync_canread(void * arg)1301 ipf_sync_canread(void *arg)
1302 {
1303 	ipf_sync_softc_t *softs = arg;
1304 	return (!((softs->sl_tail == softs->sl_idx) &&
1305 		 (softs->su_tail == softs->su_idx)));
1306 }
1307 
1308 
1309 /* ------------------------------------------------------------------------ */
1310 /* Function:    ipf_sync_canwrite                                           */
1311 /* Returns:     int - 1 == can always write                                 */
1312 /* Parameters:  Nil                                                         */
1313 /*                                                                          */
1314 /* This function lets the poll handler know that it is always ready willing */
1315 /* to accept write events.                                                  */
1316 /* XXX Maybe this should return false if the sync table is full?            */
1317 /* ------------------------------------------------------------------------ */
1318 int
ipf_sync_canwrite(void * arg)1319 ipf_sync_canwrite(void *arg)
1320 {
1321 	return (1);
1322 }
1323 
1324 
1325 /* ------------------------------------------------------------------------ */
1326 /* Function:    ipf_sync_wakeup                                             */
1327 /* Parameters:  Nil                                                         */
1328 /* Returns:     Nil                                                         */
1329 /*                                                                          */
1330 /* This function implements the heuristics that decide how often to         */
1331 /* generate a poll wakeup for programs that are waiting for information     */
1332 /* about when they can do a read on /dev/ipsync.                            */
1333 /*                                                                          */
1334 /* There are three different considerations here:                           */
1335 /* - do not keep a program waiting too long: ipf_sync_wake_interval is the  */
1336 /*   maximum number of ipf ticks to let pass by;                            */
1337 /* - do not let the queue of ouststanding things to generate notifies for   */
1338 /*   get too full (ipf_sync_queue_high_wm is the high water mark);          */
1339 /* - do not let too many events get collapsed in before deciding that the   */
1340 /*   other host(s) need an update (ipf_sync_event_high_wm is the high water */
1341 /*   mark for this counter.)                                                */
1342 /* ------------------------------------------------------------------------ */
1343 static void
ipf_sync_wakeup(ipf_main_softc_t * softc)1344 ipf_sync_wakeup(ipf_main_softc_t *softc)
1345 {
1346 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1347 
1348 	softs->ipf_sync_events++;
1349 	if ((softc->ipf_ticks >
1350 	    softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval) ||
1351 	    (softs->ipf_sync_events > softs->ipf_sync_event_high_wm) ||
1352 	    ((softs->sl_tail - softs->sl_idx) >
1353 	     softs->ipf_sync_queue_high_wm) ||
1354 	    ((softs->su_tail - softs->su_idx) >
1355 	     softs->ipf_sync_queue_high_wm)) {
1356 
1357 		ipf_sync_poll_wakeup(softc);
1358 	}
1359 }
1360 
1361 
1362 /* ------------------------------------------------------------------------ */
1363 /* Function:    ipf_sync_poll_wakeup                                        */
1364 /* Parameters:  Nil                                                         */
1365 /* Returns:     Nil                                                         */
1366 /*                                                                          */
1367 /* Deliver a poll wakeup and reset counters for two of the three heuristics */
1368 /* ------------------------------------------------------------------------ */
1369 static void
ipf_sync_poll_wakeup(ipf_main_softc_t * softc)1370 ipf_sync_poll_wakeup(ipf_main_softc_t *softc)
1371 {
1372 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1373 
1374 	softs->ipf_sync_events = 0;
1375 	softs->ipf_sync_lastwakeup = softc->ipf_ticks;
1376 
1377 # ifdef _KERNEL
1378 #  if SOLARIS
1379 	MUTEX_ENTER(&softs->ipsl_mutex);
1380 	cv_signal(&softs->ipslwait);
1381 	MUTEX_EXIT(&softs->ipsl_mutex);
1382 	pollwakeup(&softc->ipf_poll_head[IPL_LOGSYNC], POLLIN|POLLRDNORM);
1383 #  else
1384 	WAKEUP(&softs->sl_tail, 0);
1385 	POLLWAKEUP(IPL_LOGSYNC);
1386 #  endif
1387 # endif
1388 }
1389 
1390 
1391 /* ------------------------------------------------------------------------ */
1392 /* Function:    ipf_sync_expire                                             */
1393 /* Parameters:  Nil                                                         */
1394 /* Returns:     Nil                                                         */
1395 /*                                                                          */
1396 /* This is the function called even ipf_tick.  It implements one of the     */
1397 /* three heuristics above *IF* there are events waiting.                    */
1398 /* ------------------------------------------------------------------------ */
1399 void
ipf_sync_expire(ipf_main_softc_t * softc)1400 ipf_sync_expire(ipf_main_softc_t *softc)
1401 {
1402 	ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1403 
1404 	if ((softs->ipf_sync_events > 0) &&
1405 	    (softc->ipf_ticks >
1406 	     softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval)) {
1407 		ipf_sync_poll_wakeup(softc);
1408 	}
1409 }
1410