xref: /freebsd/sys/netpfil/pf/pflow.c (revision 5f757f3f)
1 /*	$OpenBSD: if_pflow.c,v 1.100 2023/11/09 08:53:20 mvs Exp $	*/
2 
3 /*
4  * Copyright (c) 2023 Rubicon Communications, LLC (Netgate)
5  * Copyright (c) 2011 Florian Obser <florian@narrans.de>
6  * Copyright (c) 2011 Sebastian Benoit <benoit-lists@fb12.de>
7  * Copyright (c) 2008 Henning Brauer <henning@openbsd.org>
8  * Copyright (c) 2008 Joerg Goltermann <jg@osn.de>
9  *
10  * Permission to use, copy, modify, and distribute this software for any
11  * purpose with or without fee is hereby granted, provided that the above
12  * copyright notice and this permission notice appear in all copies.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
16  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
17  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
20  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21  */
22 
23 #include <sys/cdefs.h>
24 #include <sys/param.h>
25 #include <sys/bus.h>
26 #include <sys/callout.h>
27 #include <sys/endian.h>
28 #include <sys/interrupt.h>
29 #include <sys/kernel.h>
30 #include <sys/malloc.h>
31 #include <sys/module.h>
32 #include <sys/mbuf.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <sys/sockio.h>
36 #include <sys/sysctl.h>
37 #include <sys/systm.h>
38 #include <sys/priv.h>
39 
40 #include <net/if.h>
41 #include <net/if_types.h>
42 #include <net/bpf.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <netinet/if_ether.h>
46 #include <netinet/tcp.h>
47 
48 #include <netinet/ip.h>
49 #include <netinet/ip_icmp.h>
50 #include <netinet/ip_var.h>
51 #include <netinet/udp.h>
52 #include <netinet/udp_var.h>
53 #include <netinet/in_pcb.h>
54 
55 #include <netlink/netlink.h>
56 #include <netlink/netlink_ctl.h>
57 #include <netlink/netlink_generic.h>
58 #include <netlink/netlink_message_writer.h>
59 
60 #include <net/pfvar.h>
61 #include <net/pflow.h>
62 #include "net/if_var.h"
63 
64 #define PFLOW_MINMTU	\
65     (sizeof(struct pflow_header) + sizeof(struct pflow_flow))
66 
67 #ifdef PFLOWDEBUG
68 #define DPRINTF(x)	do { printf x ; } while (0)
69 #else
70 #define DPRINTF(x)
71 #endif
72 
73 enum pflow_family_t {
74 	PFLOW_INET,
75 	PFLOW_INET6,
76 	PFLOW_NAT4,
77 };
78 
79 static void	pflow_output_process(void *);
80 static int	pflow_create(int);
81 static int	pflow_destroy(int, bool);
82 static int	pflow_calc_mtu(struct pflow_softc *, int, int);
83 static void	pflow_setmtu(struct pflow_softc *, int);
84 static int	pflowvalidsockaddr(const struct sockaddr *, int);
85 
86 static struct mbuf	*pflow_get_mbuf(struct pflow_softc *, u_int16_t);
87 static void	pflow_flush(struct pflow_softc *);
88 static int	pflow_sendout_v5(struct pflow_softc *);
89 static int	pflow_sendout_ipfix(struct pflow_softc *, enum pflow_family_t);
90 static int	pflow_sendout_ipfix_tmpl(struct pflow_softc *);
91 static int	pflow_sendout_mbuf(struct pflow_softc *, struct mbuf *);
92 static int	sysctl_pflowstats(SYSCTL_HANDLER_ARGS);
93 static void	pflow_timeout(void *);
94 static void	pflow_timeout6(void *);
95 static void	pflow_timeout_tmpl(void *);
96 static void	pflow_timeout_nat4(void *);
97 static void	copy_flow_data(struct pflow_flow *, struct pflow_flow *,
98 	const struct pf_kstate *, struct pf_state_key *, int, int);
99 static void	copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *,
100 	struct pflow_ipfix_flow4 *, const struct pf_kstate *, struct pf_state_key *,
101 	struct pflow_softc *, int, int);
102 static void	copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *,
103 	struct pflow_ipfix_flow6 *, const struct pf_kstate *, struct pf_state_key *,
104 	struct pflow_softc *, int, int);
105 static int	pflow_pack_flow(const struct pf_kstate *, struct pf_state_key *,
106 	struct pflow_softc *);
107 static int	pflow_pack_flow_ipfix(const struct pf_kstate *, struct pf_state_key *,
108 	struct pflow_softc *);
109 static void	export_pflow(const struct pf_kstate *);
110 static int	export_pflow_if(const struct pf_kstate*, struct pf_state_key *,
111 	struct pflow_softc *);
112 static int	copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc);
113 static int	copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow,
114 	struct pflow_softc *sc);
115 static int	copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow,
116 	struct pflow_softc *sc);
117 static int	copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *,
118 	const struct pf_kstate *, struct pflow_softc *,
119 	uint8_t, uint64_t);
120 
121 static const char pflowname[] = "pflow";
122 
123 enum pflowstat_counters {
124 	pflow_flows,
125 	pflow_packets,
126 	pflow_onomem,
127 	pflow_oerrors,
128 	pflow_ncounters,
129 };
130 struct pflowstats_ctr {
131 	counter_u64_t	c[pflow_ncounters];
132 };
133 
134 /**
135  * Locking concept
136  *
137  * The list of pflow devices (V_pflowif_list) is managed through epoch.
138  * It is safe to read the list without locking (while in NET_EPOCH).
139  * There may only be one simultaneous modifier, hence we need V_pflow_list_mtx
140  * on every add/delete.
141  *
142  * Each pflow interface protects its own data with the sc_lock mutex.
143  *
144  * We do not require any pf locks, and in fact expect to be called without
145  * hashrow locks held.
146  **/
147 
148 VNET_DEFINE(struct unrhdr *,	pflow_unr);
149 #define	V_pflow_unr	VNET(pflow_unr)
150 VNET_DEFINE(CK_LIST_HEAD(, pflow_softc), pflowif_list);
151 #define	V_pflowif_list	VNET(pflowif_list)
152 VNET_DEFINE(struct mtx, pflowif_list_mtx);
153 #define	V_pflowif_list_mtx	VNET(pflowif_list_mtx)
154 VNET_DEFINE(struct pflowstats_ctr,	 pflowstat);
155 #define	V_pflowstats	VNET(pflowstat)
156 
157 #define	PFLOW_LOCK(_sc)		mtx_lock(&(_sc)->sc_lock)
158 #define	PFLOW_UNLOCK(_sc)	mtx_unlock(&(_sc)->sc_lock)
159 #define	PFLOW_ASSERT(_sc)	mtx_assert(&(_sc)->sc_lock, MA_OWNED)
160 
161 SYSCTL_NODE(_net, OID_AUTO, pflow, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
162     "PFLOW");
163 SYSCTL_PROC(_net_pflow, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
164     0, 0, sysctl_pflowstats, "S,pflowstats",
165     "PFLOW statistics (struct pflowstats, net/if_pflow.h)");
166 
167 static inline void
168 pflowstat_inc(enum pflowstat_counters c)
169 {
170 	counter_u64_add(V_pflowstats.c[c], 1);
171 }
172 
173 static void
174 vnet_pflowattach(void)
175 {
176 	CK_LIST_INIT(&V_pflowif_list);
177 	mtx_init(&V_pflowif_list_mtx, "pflow interface list mtx", NULL, MTX_DEF);
178 
179 	V_pflow_unr = new_unrhdr(0, PFLOW_MAX_ENTRIES - 1, &V_pflowif_list_mtx);
180 
181 	for (int i = 0; i < pflow_ncounters; i++)
182 		V_pflowstats.c[i] = counter_u64_alloc(M_WAITOK);
183 }
184 VNET_SYSINIT(vnet_pflowattach, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
185     vnet_pflowattach, NULL);
186 
187 static void
188 vnet_pflowdetach(void)
189 {
190 	struct pflow_softc	*sc;
191 
192 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
193 		pflow_destroy(sc->sc_id, false);
194 	}
195 
196 	MPASS(CK_LIST_EMPTY(&V_pflowif_list));
197 	delete_unrhdr(V_pflow_unr);
198 	mtx_destroy(&V_pflowif_list_mtx);
199 
200 	for (int i = 0; i < pflow_ncounters; i++)
201 		counter_u64_free(V_pflowstats.c[i]);
202 }
203 VNET_SYSUNINIT(vnet_pflowdetach, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
204     vnet_pflowdetach, NULL);
205 
206 static void
207 vnet_pflow_finalise(void)
208 {
209 	/*
210 	 * Ensure we've freed all interfaces, and do not have pending
211 	 * epoch cleanup calls.
212 	 */
213 	NET_EPOCH_DRAIN_CALLBACKS();
214 }
215 VNET_SYSUNINIT(vnet_pflow_finalise, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
216     vnet_pflow_finalise, NULL);
217 
218 static void
219 pflow_output_process(void *arg)
220 {
221 	struct mbufq ml;
222 	struct pflow_softc *sc = arg;
223 	struct mbuf *m;
224 
225 	mbufq_init(&ml, 0);
226 
227 	PFLOW_LOCK(sc);
228 	mbufq_concat(&ml, &sc->sc_outputqueue);
229 	PFLOW_UNLOCK(sc);
230 
231 	CURVNET_SET(sc->sc_vnet);
232 	while ((m = mbufq_dequeue(&ml)) != NULL) {
233 		pflow_sendout_mbuf(sc, m);
234 	}
235 	CURVNET_RESTORE();
236 }
237 
238 static int
239 pflow_create(int unit)
240 {
241 	struct pflow_softc	*pflowif;
242 	int			 error;
243 
244 	pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
245 	mtx_init(&pflowif->sc_lock, "pflowlk", NULL, MTX_DEF);
246 	pflowif->sc_version = PFLOW_PROTO_DEFAULT;
247 	pflowif->sc_observation_dom = PFLOW_ENGINE_TYPE;
248 
249 	/* ipfix template init */
250 	bzero(&pflowif->sc_tmpl_ipfix,sizeof(pflowif->sc_tmpl_ipfix));
251 	pflowif->sc_tmpl_ipfix.set_header.set_id =
252 	    htons(PFLOW_IPFIX_TMPL_SET_ID);
253 	pflowif->sc_tmpl_ipfix.set_header.set_length =
254 	    htons(sizeof(struct pflow_ipfix_tmpl));
255 
256 	/* ipfix IPv4 template */
257 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.tmpl_id =
258 	    htons(PFLOW_IPFIX_TMPL_IPV4_ID);
259 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.field_count
260 	    = htons(PFLOW_IPFIX_TMPL_IPV4_FIELD_COUNT);
261 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.field_id =
262 	    htons(PFIX_IE_sourceIPv4Address);
263 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.len = htons(4);
264 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.field_id =
265 	    htons(PFIX_IE_destinationIPv4Address);
266 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.len = htons(4);
267 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.field_id =
268 	    htons(PFIX_IE_ingressInterface);
269 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.len = htons(4);
270 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.field_id =
271 	    htons(PFIX_IE_egressInterface);
272 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.len = htons(4);
273 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.field_id =
274 	    htons(PFIX_IE_packetDeltaCount);
275 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.len = htons(8);
276 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.field_id =
277 	    htons(PFIX_IE_octetDeltaCount);
278 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.len = htons(8);
279 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.field_id =
280 	    htons(PFIX_IE_flowStartMilliseconds);
281 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.len = htons(8);
282 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.field_id =
283 	    htons(PFIX_IE_flowEndMilliseconds);
284 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.len = htons(8);
285 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.field_id =
286 	    htons(PFIX_IE_sourceTransportPort);
287 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.len = htons(2);
288 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.field_id =
289 	    htons(PFIX_IE_destinationTransportPort);
290 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.len = htons(2);
291 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.field_id =
292 	    htons(PFIX_IE_ipClassOfService);
293 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.len = htons(1);
294 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.field_id =
295 	    htons(PFIX_IE_protocolIdentifier);
296 	pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.len = htons(1);
297 
298 	/* ipfix IPv6 template */
299 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.tmpl_id =
300 	    htons(PFLOW_IPFIX_TMPL_IPV6_ID);
301 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.field_count =
302 	    htons(PFLOW_IPFIX_TMPL_IPV6_FIELD_COUNT);
303 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.field_id =
304 	    htons(PFIX_IE_sourceIPv6Address);
305 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.len = htons(16);
306 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.field_id =
307 	    htons(PFIX_IE_destinationIPv6Address);
308 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.len = htons(16);
309 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.field_id =
310 	    htons(PFIX_IE_ingressInterface);
311 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.len = htons(4);
312 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.field_id =
313 	    htons(PFIX_IE_egressInterface);
314 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.len = htons(4);
315 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.field_id =
316 	    htons(PFIX_IE_packetDeltaCount);
317 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.len = htons(8);
318 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.field_id =
319 	    htons(PFIX_IE_octetDeltaCount);
320 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.len = htons(8);
321 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.field_id =
322 	    htons(PFIX_IE_flowStartMilliseconds);
323 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.len = htons(8);
324 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.field_id =
325 	    htons(PFIX_IE_flowEndMilliseconds);
326 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.len = htons(8);
327 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.field_id =
328 	    htons(PFIX_IE_sourceTransportPort);
329 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.len = htons(2);
330 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.field_id =
331 	    htons(PFIX_IE_destinationTransportPort);
332 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.len = htons(2);
333 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.field_id =
334 	    htons(PFIX_IE_ipClassOfService);
335 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.len = htons(1);
336 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.field_id =
337 	    htons(PFIX_IE_protocolIdentifier);
338 	pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.len = htons(1);
339 
340 	/* NAT44 create template */
341 	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.tmpl_id =
342 	    htons(PFLOW_IPFIX_TMPL_NAT44_ID);
343 	pflowif->sc_tmpl_ipfix.nat44_tmpl.h.field_count =
344 	    htons(PFLOW_IPFIX_TMPL_NAT44_FIELD_COUNT);
345 	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.field_id =
346 	    htons(PFIX_IE_timeStamp);
347 	pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.len =
348 	    htons(8);
349 	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.field_id =
350 	    htons(PFIX_IE_natEvent);
351 	pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.len =
352 	    htons(1);
353 	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.field_id =
354 	    htons(PFIX_IE_protocolIdentifier);
355 	pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.len = htons(1);
356 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.field_id =
357 	    htons(PFIX_IE_sourceIPv4Address);
358 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.len =
359 	    htons(4);
360 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.field_id =
361 	    htons(PFIX_IE_sourceTransportPort);
362 	pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.len = htons(2);
363 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.field_id =
364 	    htons(PFIX_IE_postNATSourceIPv4Address);
365 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.len =
366 	    htons(4);
367 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.field_id =
368 	    htons(PFIX_IE_postNAPTSourceTransportPort);
369 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.len =
370 	    htons(2);
371 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.field_id =
372 	    htons(PFIX_IE_destinationIPv4Address);
373 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.len =
374 	    htons(4);
375 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.field_id =
376 	    htons(PFIX_IE_destinationTransportPort);
377 	pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.len = htons(2);
378 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.field_id =
379 	    htons(PFIX_IE_postNATDestinationIPv4Address);
380 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.len =
381 	    htons(4);
382 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.field_id =
383 	    htons(PFIX_IE_postNAPTDestinationTransportPort);
384 	pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.len =
385 	    htons(2);
386 
387 	pflowif->sc_id = unit;
388 	pflowif->sc_vnet = curvnet;
389 
390 	mbufq_init(&pflowif->sc_outputqueue, 8192);
391 	pflow_setmtu(pflowif, ETHERMTU);
392 
393 	callout_init_mtx(&pflowif->sc_tmo, &pflowif->sc_lock, 0);
394 	callout_init_mtx(&pflowif->sc_tmo6, &pflowif->sc_lock, 0);
395 	callout_init_mtx(&pflowif->sc_tmo_nat4, &pflowif->sc_lock, 0);
396 	callout_init_mtx(&pflowif->sc_tmo_tmpl, &pflowif->sc_lock, 0);
397 
398 	error = swi_add(&pflowif->sc_swi_ie, pflowname, pflow_output_process,
399 	    pflowif, SWI_NET, INTR_MPSAFE, &pflowif->sc_swi_cookie);
400 	if (error) {
401 		free(pflowif, M_DEVBUF);
402 		return (error);
403 	}
404 
405 	/* Insert into list of pflows */
406 	mtx_lock(&V_pflowif_list_mtx);
407 	CK_LIST_INSERT_HEAD(&V_pflowif_list, pflowif, sc_next);
408 	mtx_unlock(&V_pflowif_list_mtx);
409 
410 	V_pflow_export_state_ptr = export_pflow;
411 
412 	return (0);
413 }
414 
415 static void
416 pflow_free_cb(struct epoch_context *ctx)
417 {
418 	struct pflow_softc *sc;
419 
420 	sc = __containerof(ctx, struct pflow_softc, sc_epoch_ctx);
421 
422 	free(sc, M_DEVBUF);
423 }
424 
425 static int
426 pflow_destroy(int unit, bool drain)
427 {
428 	struct pflow_softc	*sc;
429 	int			 error __diagused;
430 
431 	mtx_lock(&V_pflowif_list_mtx);
432 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
433 		if (sc->sc_id == unit)
434 			break;
435 	}
436 	if (sc == NULL) {
437 		mtx_unlock(&V_pflowif_list_mtx);
438 		return (ENOENT);
439 	}
440 	CK_LIST_REMOVE(sc, sc_next);
441 	if (CK_LIST_EMPTY(&V_pflowif_list))
442 		V_pflow_export_state_ptr = NULL;
443 	mtx_unlock(&V_pflowif_list_mtx);
444 
445 	sc->sc_dying = 1;
446 
447 	if (drain) {
448 		/* Let's be sure no one is using this interface any more. */
449 		NET_EPOCH_DRAIN_CALLBACKS();
450 	}
451 
452 	error = swi_remove(sc->sc_swi_cookie);
453 	MPASS(error == 0);
454 	error = intr_event_destroy(sc->sc_swi_ie);
455 	MPASS(error == 0);
456 
457 	callout_drain(&sc->sc_tmo);
458 	callout_drain(&sc->sc_tmo6);
459 	callout_drain(&sc->sc_tmo_nat4);
460 	callout_drain(&sc->sc_tmo_tmpl);
461 
462 	m_freem(sc->sc_mbuf);
463 	m_freem(sc->sc_mbuf6);
464 	m_freem(sc->sc_mbuf_nat4);
465 
466 	PFLOW_LOCK(sc);
467 	mbufq_drain(&sc->sc_outputqueue);
468 	if (sc->so != NULL) {
469 		soclose(sc->so);
470 		sc->so = NULL;
471 	}
472 	if (sc->sc_flowdst != NULL)
473 		free(sc->sc_flowdst, M_DEVBUF);
474 	if (sc->sc_flowsrc != NULL)
475 		free(sc->sc_flowsrc, M_DEVBUF);
476 	PFLOW_UNLOCK(sc);
477 
478 	mtx_destroy(&sc->sc_lock);
479 
480 	free_unr(V_pflow_unr, unit);
481 
482 	NET_EPOCH_CALL(pflow_free_cb, &sc->sc_epoch_ctx);
483 
484 	return (0);
485 }
486 
487 static int
488 pflowvalidsockaddr(const struct sockaddr *sa, int ignore_port)
489 {
490 	const struct sockaddr_in6	*sin6;
491 	const struct sockaddr_in	*sin;
492 
493 	if (sa == NULL)
494 		return (0);
495 	switch(sa->sa_family) {
496 	case AF_INET:
497 		sin = (const struct sockaddr_in *)sa;
498 		return (sin->sin_addr.s_addr != INADDR_ANY &&
499 		    (ignore_port || sin->sin_port != 0));
500 	case AF_INET6:
501 		sin6 = (const struct sockaddr_in6 *)sa;
502 		return (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
503 		    (ignore_port || sin6->sin6_port != 0));
504 	default:
505 		return (0);
506 	}
507 }
508 
509 int
510 pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz)
511 {
512 	size_t min;
513 
514 	sc->sc_maxcount4 = (mtu - hdrsz -
515 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4);
516 	sc->sc_maxcount6 = (mtu - hdrsz -
517 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow6);
518 	sc->sc_maxcount_nat4 = (mtu - hdrsz -
519 	    sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_nat4);
520 	if (sc->sc_maxcount4 > PFLOW_MAXFLOWS)
521 		sc->sc_maxcount4 = PFLOW_MAXFLOWS;
522 	if (sc->sc_maxcount6 > PFLOW_MAXFLOWS)
523 		sc->sc_maxcount6 = PFLOW_MAXFLOWS;
524 	if (sc->sc_maxcount_nat4 > PFLOW_MAXFLOWS)
525 		sc->sc_maxcount_nat4 = PFLOW_MAXFLOWS;
526 
527 	min = MIN(sc->sc_maxcount4 * sizeof(struct pflow_ipfix_flow4),
528 	    sc->sc_maxcount6 * sizeof(struct pflow_ipfix_flow6));
529 	min = MIN(min, sc->sc_maxcount_nat4 * sizeof(struct pflow_ipfix_nat4));
530 
531 	return (hdrsz + sizeof(struct udpiphdr) + min);
532 }
533 
534 static void
535 pflow_setmtu(struct pflow_softc *sc, int mtu_req)
536 {
537 	int	mtu;
538 
539 	mtu = mtu_req;
540 
541 	switch (sc->sc_version) {
542 	case PFLOW_PROTO_5:
543 		sc->sc_maxcount = (mtu - sizeof(struct pflow_header) -
544 		    sizeof(struct udpiphdr)) / sizeof(struct pflow_flow);
545 		if (sc->sc_maxcount > PFLOW_MAXFLOWS)
546 		    sc->sc_maxcount = PFLOW_MAXFLOWS;
547 		break;
548 	case PFLOW_PROTO_10:
549 		pflow_calc_mtu(sc, mtu, sizeof(struct pflow_v10_header));
550 		break;
551 	default: /* NOTREACHED */
552 		break;
553 	}
554 }
555 
556 static struct mbuf *
557 pflow_get_mbuf(struct pflow_softc *sc, u_int16_t set_id)
558 {
559 	struct pflow_set_header	 set_hdr;
560 	struct pflow_header	 h;
561 	struct mbuf		*m;
562 
563 	MGETHDR(m, M_NOWAIT, MT_DATA);
564 	if (m == NULL) {
565 		pflowstat_inc(pflow_onomem);
566 		return (NULL);
567 	}
568 
569 	MCLGET(m, M_NOWAIT);
570 	if ((m->m_flags & M_EXT) == 0) {
571 		m_free(m);
572 		pflowstat_inc(pflow_onomem);
573 		return (NULL);
574 	}
575 
576 	m->m_len = m->m_pkthdr.len = 0;
577 
578 	if (sc == NULL)		/* get only a new empty mbuf */
579 		return (m);
580 
581 	switch (sc->sc_version) {
582 	case PFLOW_PROTO_5:
583 		/* populate pflow_header */
584 		h.reserved1 = 0;
585 		h.reserved2 = 0;
586 		h.count = 0;
587 		h.version = htons(PFLOW_PROTO_5);
588 		h.flow_sequence = htonl(sc->sc_gcounter);
589 		h.engine_type = PFLOW_ENGINE_TYPE;
590 		h.engine_id = PFLOW_ENGINE_ID;
591 		m_copyback(m, 0, PFLOW_HDRLEN, (caddr_t)&h);
592 
593 		sc->sc_count = 0;
594 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
595 		    pflow_timeout, sc);
596 		break;
597 	case PFLOW_PROTO_10:
598 		/* populate pflow_set_header */
599 		set_hdr.set_length = 0;
600 		set_hdr.set_id = htons(set_id);
601 		m_copyback(m, 0, PFLOW_SET_HDRLEN, (caddr_t)&set_hdr);
602 		break;
603 	default: /* NOTREACHED */
604 		break;
605 	}
606 
607 	return (m);
608 }
609 
610 static void
611 copy_flow_data(struct pflow_flow *flow1, struct pflow_flow *flow2,
612     const struct pf_kstate *st, struct pf_state_key *sk, int src, int dst)
613 {
614 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
615 	flow1->src_port = flow2->dest_port = sk->port[src];
616 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
617 	flow1->dest_port = flow2->src_port = sk->port[dst];
618 
619 	flow1->dest_as = flow2->src_as =
620 	    flow1->src_as = flow2->dest_as = 0;
621 	flow1->if_index_in = htons(st->if_index_in);
622 	flow1->if_index_out = htons(st->if_index_out);
623 	flow2->if_index_in = htons(st->if_index_out);
624 	flow2->if_index_out = htons(st->if_index_in);
625 	flow1->dest_mask = flow2->src_mask =
626 	    flow1->src_mask = flow2->dest_mask = 0;
627 
628 	flow1->flow_packets = htonl(st->packets[0]);
629 	flow2->flow_packets = htonl(st->packets[1]);
630 	flow1->flow_octets = htonl(st->bytes[0]);
631 	flow2->flow_octets = htonl(st->bytes[1]);
632 
633 	/*
634 	 * Pretend the flow was created or expired when the machine came up
635 	 * when creation is in the future of the last time a package was seen
636 	 * or was created / expired before this machine came up due to pfsync.
637 	 */
638 	flow1->flow_start = flow2->flow_start = st->creation < 0 ||
639 	    st->creation > st->expire ? htonl(0) : htonl(st->creation);
640 	flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? htonl(0) :
641 	    htonl(st->expire);
642 	flow1->tcp_flags = flow2->tcp_flags = 0;
643 	flow1->protocol = flow2->protocol = sk->proto;
644 	flow1->tos = flow2->tos = st->rule.ptr->tos;
645 }
646 
647 static void
648 copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *flow1,
649     struct pflow_ipfix_flow4 *flow2, const struct pf_kstate *st,
650     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
651 {
652 	flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr;
653 	flow1->src_port = flow2->dest_port = sk->port[src];
654 	flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr;
655 	flow1->dest_port = flow2->src_port = sk->port[dst];
656 
657 	flow1->if_index_in = htonl(st->if_index_in);
658 	flow1->if_index_out = htonl(st->if_index_out);
659 	flow2->if_index_in = htonl(st->if_index_out);
660 	flow2->if_index_out = htonl(st->if_index_in);
661 
662 	flow1->flow_packets = htobe64(st->packets[0]);
663 	flow2->flow_packets = htobe64(st->packets[1]);
664 	flow1->flow_octets = htobe64(st->bytes[0]);
665 	flow2->flow_octets = htobe64(st->bytes[1]);
666 
667 	/*
668 	 * Pretend the flow was created when the machine came up when creation
669 	 * is in the future of the last time a package was seen due to pfsync.
670 	 */
671 	if (st->creation > st->expire)
672 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
673 		    time_uptime)*1000);
674 	else
675 		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
676 		    (pf_get_uptime() - st->creation)));
677 	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
678 	    (pf_get_uptime() - st->expire)));
679 
680 	flow1->protocol = flow2->protocol = sk->proto;
681 	flow1->tos = flow2->tos = st->rule.ptr->tos;
682 }
683 
684 static void
685 copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *flow1,
686     struct pflow_ipfix_flow6 *flow2, const struct pf_kstate *st,
687     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
688 {
689 	bcopy(&sk->addr[src].v6, &flow1->src_ip, sizeof(flow1->src_ip));
690 	bcopy(&sk->addr[src].v6, &flow2->dest_ip, sizeof(flow2->dest_ip));
691 	flow1->src_port = flow2->dest_port = sk->port[src];
692 	bcopy(&sk->addr[dst].v6, &flow1->dest_ip, sizeof(flow1->dest_ip));
693 	bcopy(&sk->addr[dst].v6, &flow2->src_ip, sizeof(flow2->src_ip));
694 	flow1->dest_port = flow2->src_port = sk->port[dst];
695 
696 	flow1->if_index_in = htonl(st->if_index_in);
697 	flow1->if_index_out = htonl(st->if_index_out);
698 	flow2->if_index_in = htonl(st->if_index_out);
699 	flow2->if_index_out = htonl(st->if_index_in);
700 
701 	flow1->flow_packets = htobe64(st->packets[0]);
702 	flow2->flow_packets = htobe64(st->packets[1]);
703 	flow1->flow_octets = htobe64(st->bytes[0]);
704 	flow2->flow_octets = htobe64(st->bytes[1]);
705 
706 	/*
707 	 * Pretend the flow was created when the machine came up when creation
708 	 * is in the future of the last time a package was seen due to pfsync.
709 	 */
710 	if (st->creation > st->expire)
711 		flow1->flow_start = flow2->flow_start = htobe64((time_second -
712 		    time_uptime)*1000);
713 	else
714 		flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() -
715 		    (pf_get_uptime() - st->creation)));
716 	flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() -
717 	    (pf_get_uptime() - st->expire)));
718 
719 	flow1->protocol = flow2->protocol = sk->proto;
720 	flow1->tos = flow2->tos = st->rule.ptr->tos;
721 }
722 
723 static void
724 copy_nat_ipfix_4_data(struct pflow_ipfix_nat4 *nat1,
725     struct pflow_ipfix_nat4 *nat2, const struct pf_kstate *st,
726     struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst)
727 {
728 	nat1->src_ip = nat2->dest_ip = st->key[PF_SK_STACK]->addr[src].v4.s_addr;
729 	nat1->src_port = nat2->dest_port = st->key[PF_SK_STACK]->port[src];
730 	nat1->dest_ip = nat2->src_ip = st->key[PF_SK_STACK]->addr[dst].v4.s_addr;
731 	nat1->dest_port = nat2->src_port = st->key[PF_SK_STACK]->port[dst];
732 	nat1->postnat_src_ip = nat2->postnat_dest_ip = st->key[PF_SK_WIRE]->addr[src].v4.s_addr;
733 	nat1->postnat_src_port = nat2->postnat_dest_port = st->key[PF_SK_WIRE]->port[src];
734 	nat1->postnat_dest_ip = nat2->postnat_src_ip = st->key[PF_SK_WIRE]->addr[dst].v4.s_addr;
735 	nat1->postnat_dest_port = nat2->postnat_src_port = st->key[PF_SK_WIRE]->port[dst];
736 	nat1->protocol = nat2->protocol = sk->proto;
737 
738 	/*
739 	 * Because we have to generate a create and delete event we'll fill out the
740 	 * timestamp and nat_event fields when we transmit. As opposed to doing this
741 	 * work a second time.
742 	*/
743 }
744 
745 static void
746 export_pflow(const struct pf_kstate *st)
747 {
748 	struct pflow_softc	*sc = NULL;
749 	struct pf_state_key	*sk;
750 
751 	NET_EPOCH_ASSERT();
752 
753 	sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
754 
755 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
756 		PFLOW_LOCK(sc);
757 		switch (sc->sc_version) {
758 		case PFLOW_PROTO_5:
759 			if (sk->af == AF_INET)
760 				export_pflow_if(st, sk, sc);
761 			break;
762 		case PFLOW_PROTO_10:
763 			if (sk->af == AF_INET || sk->af == AF_INET6)
764 				export_pflow_if(st, sk, sc);
765 			break;
766 		default: /* NOTREACHED */
767 			break;
768 		}
769 		PFLOW_UNLOCK(sc);
770 	}
771 }
772 
773 static int
774 export_pflow_if(const struct pf_kstate *st, struct pf_state_key *sk,
775     struct pflow_softc *sc)
776 {
777 	struct pf_kstate	 pfs_copy;
778 	u_int64_t		 bytes[2];
779 	int			 ret = 0;
780 
781 	if (sc->sc_version == PFLOW_PROTO_10)
782 		return (pflow_pack_flow_ipfix(st, sk, sc));
783 
784 	/* PFLOW_PROTO_5 */
785 	if ((st->bytes[0] < (u_int64_t)PFLOW_MAXBYTES)
786 	    && (st->bytes[1] < (u_int64_t)PFLOW_MAXBYTES))
787 		return (pflow_pack_flow(st, sk, sc));
788 
789 	/* flow > PFLOW_MAXBYTES need special handling */
790 	bcopy(st, &pfs_copy, sizeof(pfs_copy));
791 	bytes[0] = pfs_copy.bytes[0];
792 	bytes[1] = pfs_copy.bytes[1];
793 
794 	while (bytes[0] > PFLOW_MAXBYTES) {
795 		pfs_copy.bytes[0] = PFLOW_MAXBYTES;
796 		pfs_copy.bytes[1] = 0;
797 
798 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
799 			return (ret);
800 		if ((bytes[0] - PFLOW_MAXBYTES) > 0)
801 			bytes[0] -= PFLOW_MAXBYTES;
802 	}
803 
804 	while (bytes[1] > (u_int64_t)PFLOW_MAXBYTES) {
805 		pfs_copy.bytes[1] = PFLOW_MAXBYTES;
806 		pfs_copy.bytes[0] = 0;
807 
808 		if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0)
809 			return (ret);
810 		if ((bytes[1] - PFLOW_MAXBYTES) > 0)
811 			bytes[1] -= PFLOW_MAXBYTES;
812 	}
813 
814 	pfs_copy.bytes[0] = bytes[0];
815 	pfs_copy.bytes[1] = bytes[1];
816 
817 	return (pflow_pack_flow(&pfs_copy, sk, sc));
818 }
819 
820 static int
821 copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc)
822 {
823 	int		ret = 0;
824 
825 	PFLOW_ASSERT(sc);
826 
827 	if (sc->sc_mbuf == NULL) {
828 		if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL)
829 			return (ENOBUFS);
830 	}
831 	m_copyback(sc->sc_mbuf, PFLOW_HDRLEN +
832 	    (sc->sc_count * sizeof(struct pflow_flow)),
833 	    sizeof(struct pflow_flow), (caddr_t)flow);
834 
835 	pflowstat_inc(pflow_flows);
836 	sc->sc_gcounter++;
837 	sc->sc_count++;
838 
839 	if (sc->sc_count >= sc->sc_maxcount)
840 		ret = pflow_sendout_v5(sc);
841 
842 	return(ret);
843 }
844 
845 static int
846 copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc)
847 {
848 	int		ret = 0;
849 
850 	PFLOW_ASSERT(sc);
851 
852 	if (sc->sc_mbuf == NULL) {
853 		if ((sc->sc_mbuf =
854 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) {
855 			return (ENOBUFS);
856 		}
857 		sc->sc_count4 = 0;
858 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
859 		    pflow_timeout, sc);
860 	}
861 	m_copyback(sc->sc_mbuf, PFLOW_SET_HDRLEN +
862 	    (sc->sc_count4 * sizeof(struct pflow_ipfix_flow4)),
863 	    sizeof(struct pflow_ipfix_flow4), (caddr_t)flow);
864 
865 	pflowstat_inc(pflow_flows);
866 	sc->sc_gcounter++;
867 	sc->sc_count4++;
868 
869 	if (sc->sc_count4 >= sc->sc_maxcount4)
870 		ret = pflow_sendout_ipfix(sc, PFLOW_INET);
871 	return(ret);
872 }
873 
874 static int
875 copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc)
876 {
877 	int		ret = 0;
878 
879 	PFLOW_ASSERT(sc);
880 
881 	if (sc->sc_mbuf6 == NULL) {
882 		if ((sc->sc_mbuf6 =
883 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) {
884 			return (ENOBUFS);
885 		}
886 		sc->sc_count6 = 0;
887 		callout_reset(&sc->sc_tmo6, PFLOW_TIMEOUT * hz,
888 		    pflow_timeout6, sc);
889 	}
890 	m_copyback(sc->sc_mbuf6, PFLOW_SET_HDRLEN +
891 	    (sc->sc_count6 * sizeof(struct pflow_ipfix_flow6)),
892 	    sizeof(struct pflow_ipfix_flow6), (caddr_t)flow);
893 
894 	pflowstat_inc(pflow_flows);
895 	sc->sc_gcounter++;
896 	sc->sc_count6++;
897 
898 	if (sc->sc_count6 >= sc->sc_maxcount6)
899 		ret = pflow_sendout_ipfix(sc, PFLOW_INET6);
900 
901 	return(ret);
902 }
903 
904 int
905 copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *nat, const struct pf_kstate *st,
906     struct pflow_softc *sc, uint8_t event, uint64_t timestamp)
907 {
908 	int		ret = 0;
909 
910 	PFLOW_ASSERT(sc);
911 
912 	if (sc->sc_mbuf_nat4 == NULL) {
913 		if ((sc->sc_mbuf_nat4 =
914 		    pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_NAT44_ID)) == NULL) {
915 			return (ENOBUFS);
916 		}
917 		sc->sc_count_nat4 = 0;
918 		callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz,
919 		    pflow_timeout_nat4, sc);
920 	}
921 
922 	nat->nat_event = event;
923 	nat->timestamp = htobe64(pf_get_time() - (pf_get_uptime() - timestamp));
924 	m_copyback(sc->sc_mbuf_nat4, PFLOW_SET_HDRLEN +
925 	    (sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4)),
926 	    sizeof(struct pflow_ipfix_nat4), (caddr_t)nat);
927 	sc->sc_count_nat4++;
928 
929 	pflowstat_inc(pflow_flows);
930 	sc->sc_gcounter++;
931 
932 	if (sc->sc_count_nat4 >= sc->sc_maxcount_nat4)
933 		ret = pflow_sendout_ipfix(sc, PFLOW_NAT4);
934 
935 	return (ret);
936 }
937 
938 static int
939 pflow_pack_flow(const struct pf_kstate *st, struct pf_state_key *sk,
940     struct pflow_softc *sc)
941 {
942 	struct pflow_flow	 flow1;
943 	struct pflow_flow	 flow2;
944 	int			 ret = 0;
945 
946 	bzero(&flow1, sizeof(flow1));
947 	bzero(&flow2, sizeof(flow2));
948 
949 	if (st->direction == PF_OUT)
950 		copy_flow_data(&flow1, &flow2, st, sk, 1, 0);
951 	else
952 		copy_flow_data(&flow1, &flow2, st, sk, 0, 1);
953 
954 	if (st->bytes[0] != 0) /* first flow from state */
955 		ret = copy_flow_to_m(&flow1, sc);
956 
957 	if (st->bytes[1] != 0) /* second flow from state */
958 		ret = copy_flow_to_m(&flow2, sc);
959 
960 	return (ret);
961 }
962 
963 static bool
964 pflow_is_natd(const struct pf_kstate *st)
965 {
966 	/* If ports or addresses are different we've been NAT-ed. */
967 	return (memcmp(st->key[PF_SK_WIRE], st->key[PF_SK_STACK],
968 	    sizeof(struct pf_addr) * 2 + sizeof(uint16_t) * 2) != 0);
969 }
970 
971 static int
972 pflow_pack_flow_ipfix(const struct pf_kstate *st, struct pf_state_key *sk,
973     struct pflow_softc *sc)
974 {
975 	struct pflow_ipfix_flow4	 flow4_1, flow4_2;
976 	struct pflow_ipfix_nat4		 nat4_1, nat4_2;
977 	struct pflow_ipfix_flow6	 flow6_1, flow6_2;
978 	int				 ret = 0;
979 	bool				 nat = false;
980 
981 	if (sk->af == AF_INET) {
982 		bzero(&flow4_1, sizeof(flow4_1));
983 		bzero(&flow4_2, sizeof(flow4_2));
984 
985 		nat = pflow_is_natd(st);
986 
987 		if (st->direction == PF_OUT)
988 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
989 			    1, 0);
990 		else
991 			copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc,
992 			    0, 1);
993 
994 		if (nat)
995 			copy_nat_ipfix_4_data(&nat4_1, &nat4_2, st, sk, sc, 1, 0);
996 
997 		if (st->bytes[0] != 0) /* first flow from state */ {
998 			ret = copy_flow_ipfix_4_to_m(&flow4_1, sc);
999 
1000 			if (ret == 0 && nat) {
1001 				ret = copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1002 				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1003 				ret |= copy_nat_ipfix_4_to_m(&nat4_1, st, sc,
1004 				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1005 			}
1006 		}
1007 
1008 		if (st->bytes[1] != 0) /* second flow from state */ {
1009 			ret = copy_flow_ipfix_4_to_m(&flow4_2, sc);
1010 
1011 			if (ret == 0 && nat) {
1012 				ret = copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1013 				    PFIX_NAT_EVENT_SESSION_CREATE, st->creation);
1014 				ret |= copy_nat_ipfix_4_to_m(&nat4_2, st, sc,
1015 				    PFIX_NAT_EVENT_SESSION_DELETE, st->expire);
1016 			}
1017 		}
1018 	} else if (sk->af == AF_INET6) {
1019 		bzero(&flow6_1, sizeof(flow6_1));
1020 		bzero(&flow6_2, sizeof(flow6_2));
1021 
1022 		if (st->direction == PF_OUT)
1023 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1024 			    1, 0);
1025 		else
1026 			copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc,
1027 			    0, 1);
1028 
1029 		if (st->bytes[0] != 0) /* first flow from state */
1030 			ret = copy_flow_ipfix_6_to_m(&flow6_1, sc);
1031 
1032 		if (st->bytes[1] != 0) /* second flow from state */
1033 			ret = copy_flow_ipfix_6_to_m(&flow6_2, sc);
1034 	}
1035 	return (ret);
1036 }
1037 
1038 static void
1039 pflow_timeout(void *v)
1040 {
1041 	struct pflow_softc	*sc = v;
1042 
1043 	PFLOW_ASSERT(sc);
1044 	CURVNET_SET(sc->sc_vnet);
1045 
1046 	switch (sc->sc_version) {
1047 	case PFLOW_PROTO_5:
1048 		pflow_sendout_v5(sc);
1049 		break;
1050 	case PFLOW_PROTO_10:
1051 		pflow_sendout_ipfix(sc, PFLOW_INET);
1052 		break;
1053 	default: /* NOTREACHED */
1054 		panic("Unsupported version %d", sc->sc_version);
1055 		break;
1056 	}
1057 
1058 	CURVNET_RESTORE();
1059 }
1060 
1061 static void
1062 pflow_timeout6(void *v)
1063 {
1064 	struct pflow_softc	*sc = v;
1065 
1066 	PFLOW_ASSERT(sc);
1067 
1068 	if (sc->sc_version != PFLOW_PROTO_10)
1069 		return;
1070 
1071 	CURVNET_SET(sc->sc_vnet);
1072 	pflow_sendout_ipfix(sc, PFLOW_INET6);
1073 	CURVNET_RESTORE();
1074 }
1075 
1076 static void
1077 pflow_timeout_tmpl(void *v)
1078 {
1079 	struct pflow_softc	*sc = v;
1080 
1081 	PFLOW_ASSERT(sc);
1082 
1083 	if (sc->sc_version != PFLOW_PROTO_10)
1084 		return;
1085 
1086 	CURVNET_SET(sc->sc_vnet);
1087 	pflow_sendout_ipfix_tmpl(sc);
1088 	CURVNET_RESTORE();
1089 }
1090 
1091 static void
1092 pflow_timeout_nat4(void *v)
1093 {
1094 	struct pflow_softc	*sc = v;
1095 
1096 	PFLOW_ASSERT(sc);
1097 
1098 	if (sc->sc_version != PFLOW_PROTO_10)
1099 		return;
1100 
1101 	CURVNET_SET(sc->sc_vnet);
1102 	pflow_sendout_ipfix(sc, PFLOW_NAT4);
1103 	CURVNET_RESTORE();
1104 }
1105 
1106 static void
1107 pflow_flush(struct pflow_softc *sc)
1108 {
1109 	PFLOW_ASSERT(sc);
1110 
1111 	switch (sc->sc_version) {
1112 	case PFLOW_PROTO_5:
1113 		pflow_sendout_v5(sc);
1114 		break;
1115 	case PFLOW_PROTO_10:
1116 		pflow_sendout_ipfix(sc, PFLOW_INET);
1117 		pflow_sendout_ipfix(sc, PFLOW_INET6);
1118 		pflow_sendout_ipfix(sc, PFLOW_NAT4);
1119 		break;
1120 	default: /* NOTREACHED */
1121 		break;
1122 	}
1123 }
1124 
1125 static int
1126 pflow_sendout_v5(struct pflow_softc *sc)
1127 {
1128 	struct mbuf		*m = sc->sc_mbuf;
1129 	struct pflow_header	*h;
1130 	struct timespec		tv;
1131 
1132 	PFLOW_ASSERT(sc);
1133 
1134 	if (m == NULL)
1135 		return (0);
1136 
1137 	sc->sc_mbuf = NULL;
1138 
1139 	pflowstat_inc(pflow_packets);
1140 	h = mtod(m, struct pflow_header *);
1141 	h->count = htons(sc->sc_count);
1142 
1143 	/* populate pflow_header */
1144 	h->uptime_ms = htonl(time_uptime * 1000);
1145 
1146 	getnanotime(&tv);
1147 	h->time_sec = htonl(tv.tv_sec);			/* XXX 2038 */
1148 	h->time_nanosec = htonl(tv.tv_nsec);
1149 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1150 		swi_sched(sc->sc_swi_cookie, 0);
1151 
1152 	return (0);
1153 }
1154 
1155 static int
1156 pflow_sendout_ipfix(struct pflow_softc *sc, enum pflow_family_t af)
1157 {
1158 	struct mbuf			*m;
1159 	struct pflow_v10_header		*h10;
1160 	struct pflow_set_header		*set_hdr;
1161 	u_int32_t			 count;
1162 	int				 set_length;
1163 
1164 	PFLOW_ASSERT(sc);
1165 
1166 	switch (af) {
1167 	case PFLOW_INET:
1168 		m = sc->sc_mbuf;
1169 		callout_stop(&sc->sc_tmo);
1170 		if (m == NULL)
1171 			return (0);
1172 		sc->sc_mbuf = NULL;
1173 		count = sc->sc_count4;
1174 		set_length = sizeof(struct pflow_set_header)
1175 		    + sc->sc_count4 * sizeof(struct pflow_ipfix_flow4);
1176 		break;
1177 	case PFLOW_INET6:
1178 		m = sc->sc_mbuf6;
1179 		callout_stop(&sc->sc_tmo6);
1180 		if (m == NULL)
1181 			return (0);
1182 		sc->sc_mbuf6 = NULL;
1183 		count = sc->sc_count6;
1184 		set_length = sizeof(struct pflow_set_header)
1185 		    + sc->sc_count6 * sizeof(struct pflow_ipfix_flow6);
1186 		break;
1187 	case PFLOW_NAT4:
1188 		m = sc->sc_mbuf_nat4;
1189 		callout_stop(&sc->sc_tmo_nat4);
1190 		if (m == NULL)
1191 			return (0);
1192 		sc->sc_mbuf_nat4 = NULL;
1193 		count = sc->sc_count_nat4;
1194 		set_length = sizeof(struct pflow_set_header)
1195 		    + sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4);
1196 		break;
1197 	default:
1198 		panic("Unsupported AF %d", af);
1199 	}
1200 
1201 	pflowstat_inc(pflow_packets);
1202 
1203 	set_hdr = mtod(m, struct pflow_set_header *);
1204 	set_hdr->set_length = htons(set_length);
1205 
1206 	/* populate pflow_header */
1207 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1208 	if (m == NULL) {
1209 		pflowstat_inc(pflow_onomem);
1210 		return (ENOBUFS);
1211 	}
1212 	h10 = mtod(m, struct pflow_v10_header *);
1213 	h10->version = htons(PFLOW_PROTO_10);
1214 	h10->length = htons(PFLOW_IPFIX_HDRLEN + set_length);
1215 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1216 	h10->flow_sequence = htonl(sc->sc_sequence);
1217 	sc->sc_sequence += count;
1218 	h10->observation_dom = htonl(sc->sc_observation_dom);
1219 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1220 		swi_sched(sc->sc_swi_cookie, 0);
1221 
1222 	return (0);
1223 }
1224 
1225 static int
1226 pflow_sendout_ipfix_tmpl(struct pflow_softc *sc)
1227 {
1228 	struct mbuf			*m;
1229 	struct pflow_v10_header		*h10;
1230 
1231 	PFLOW_ASSERT(sc);
1232 
1233 	m = pflow_get_mbuf(sc, 0);
1234 	if (m == NULL)
1235 		return (0);
1236 	m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl),
1237 	    (caddr_t)&sc->sc_tmpl_ipfix);
1238 
1239 	pflowstat_inc(pflow_packets);
1240 
1241 	/* populate pflow_header */
1242 	M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT);
1243 	if (m == NULL) {
1244 		pflowstat_inc(pflow_onomem);
1245 		return (ENOBUFS);
1246 	}
1247 	h10 = mtod(m, struct pflow_v10_header *);
1248 	h10->version = htons(PFLOW_PROTO_10);
1249 	h10->length = htons(PFLOW_IPFIX_HDRLEN + sizeof(struct
1250 	    pflow_ipfix_tmpl));
1251 	h10->time_sec = htonl(time_second);		/* XXX 2038 */
1252 	h10->flow_sequence = htonl(sc->sc_sequence);
1253 	h10->observation_dom = htonl(sc->sc_observation_dom);
1254 
1255 	callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1256 	    pflow_timeout_tmpl, sc);
1257 	if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0)
1258 		swi_sched(sc->sc_swi_cookie, 0);
1259 
1260 	return (0);
1261 }
1262 
1263 static int
1264 pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
1265 {
1266 	if (sc->so == NULL) {
1267 		m_freem(m);
1268 		return (EINVAL);
1269 	}
1270 	return (sosend(sc->so, sc->sc_flowdst, NULL, m, NULL, 0, curthread));
1271 }
1272 
1273 static int
1274 sysctl_pflowstats(SYSCTL_HANDLER_ARGS)
1275 {
1276 	struct pflowstats pflowstats;
1277 
1278 	pflowstats.pflow_flows =
1279 	    counter_u64_fetch(V_pflowstats.c[pflow_flows]);
1280 	pflowstats.pflow_packets =
1281 	    counter_u64_fetch(V_pflowstats.c[pflow_packets]);
1282 	pflowstats.pflow_onomem =
1283 	    counter_u64_fetch(V_pflowstats.c[pflow_onomem]);
1284 	pflowstats.pflow_oerrors =
1285 	    counter_u64_fetch(V_pflowstats.c[pflow_oerrors]);
1286 
1287 	return (sysctl_handle_opaque(oidp, &pflowstats, sizeof(pflowstats), req));
1288 }
1289 
1290 static int
1291 pflow_nl_list(struct nlmsghdr *hdr, struct nl_pstate *npt)
1292 {
1293 	struct epoch_tracker	 et;
1294 	struct pflow_softc	*sc = NULL;
1295 	struct nl_writer	 *nw = npt->nw;
1296 	int			 error = 0;
1297 
1298 	hdr->nlmsg_flags |= NLM_F_MULTI;
1299 
1300 	NET_EPOCH_ENTER(et);
1301 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1302 		if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1303 			error = ENOMEM;
1304 			goto out;
1305 		}
1306 
1307 		struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1308 		ghdr_new->cmd = PFLOWNL_CMD_LIST;
1309 		ghdr_new->version = 0;
1310 		ghdr_new->reserved = 0;
1311 
1312 		nlattr_add_u32(nw, PFLOWNL_L_ID, sc->sc_id);
1313 
1314 		if (! nlmsg_end(nw)) {
1315 			error = ENOMEM;
1316 			goto out;
1317 		}
1318 	}
1319 
1320 out:
1321 	NET_EPOCH_EXIT(et);
1322 
1323 	if (error != 0)
1324 		nlmsg_abort(nw);
1325 
1326 	return (error);
1327 }
1328 
1329 static int
1330 pflow_nl_create(struct nlmsghdr *hdr, struct nl_pstate *npt)
1331 {
1332 	struct nl_writer	 *nw = npt->nw;
1333 	int			 error = 0;
1334 	int			 unit;
1335 
1336 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1337 		return (ENOMEM);
1338 	}
1339 
1340 	struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1341 	ghdr_new->cmd = PFLOWNL_CMD_CREATE;
1342 	ghdr_new->version = 0;
1343 	ghdr_new->reserved = 0;
1344 
1345 	unit = alloc_unr(V_pflow_unr);
1346 	if (unit == -1) {
1347 		nlmsg_abort(nw);
1348 		return (ENOMEM);
1349 	}
1350 
1351 	error = pflow_create(unit);
1352 	if (error != 0) {
1353 		free_unr(V_pflow_unr, unit);
1354 		nlmsg_abort(nw);
1355 		return (error);
1356 	}
1357 
1358 	nlattr_add_s32(nw, PFLOWNL_CREATE_ID, unit);
1359 
1360 	if (! nlmsg_end(nw)) {
1361 		pflow_destroy(unit, true);
1362 		return (ENOMEM);
1363 	}
1364 
1365 	return (0);
1366 }
1367 
1368 struct pflow_parsed_del {
1369 	int id;
1370 };
1371 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1372 #define	_OUT(_field)	offsetof(struct pflow_parsed_del, _field)
1373 static const struct nlattr_parser nla_p_del[] = {
1374 	{ .type = PFLOWNL_DEL_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1375 };
1376 static const struct nlfield_parser nlf_p_del[] = {};
1377 #undef _IN
1378 #undef _OUT
1379 NL_DECLARE_PARSER(del_parser, struct genlmsghdr, nlf_p_del, nla_p_del);
1380 
1381 static int
1382 pflow_nl_del(struct nlmsghdr *hdr, struct nl_pstate *npt)
1383 {
1384 	struct pflow_parsed_del d = {};
1385 	int error;
1386 
1387 	error = nl_parse_nlmsg(hdr, &del_parser, npt, &d);
1388 	if (error != 0)
1389 		return (error);
1390 
1391 	error = pflow_destroy(d.id, true);
1392 
1393 	return (error);
1394 }
1395 
1396 struct pflow_parsed_get {
1397 	int id;
1398 };
1399 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1400 #define	_OUT(_field)	offsetof(struct pflow_parsed_get, _field)
1401 static const struct nlattr_parser nla_p_get[] = {
1402 	{ .type = PFLOWNL_GET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1403 };
1404 static const struct nlfield_parser nlf_p_get[] = {};
1405 #undef _IN
1406 #undef _OUT
1407 NL_DECLARE_PARSER(get_parser, struct genlmsghdr, nlf_p_get, nla_p_get);
1408 
1409 static bool
1410 nlattr_add_sockaddr(struct nl_writer *nw, int attr, const struct sockaddr *s)
1411 {
1412 	int off = nlattr_add_nested(nw, attr);
1413 	if (off == 0)
1414 		return (false);
1415 
1416 	nlattr_add_u8(nw, PFLOWNL_ADDR_FAMILY, s->sa_family);
1417 
1418 	switch (s->sa_family) {
1419 	case AF_INET: {
1420 		const struct sockaddr_in *in = (const struct sockaddr_in *)s;
1421 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in->sin_port);
1422 		nlattr_add_in_addr(nw, PFLOWNL_ADDR_IP, &in->sin_addr);
1423 		break;
1424 	}
1425 	case AF_INET6: {
1426 		const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)s;
1427 		nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in6->sin6_port);
1428 		nlattr_add_in6_addr(nw, PFLOWNL_ADDR_IP6, &in6->sin6_addr);
1429 		break;
1430 	}
1431 	default:
1432 		panic("Unknown address family %d", s->sa_family);
1433 	}
1434 
1435 	nlattr_set_len(nw, off);
1436 	return (true);
1437 }
1438 
1439 static int
1440 pflow_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt)
1441 {
1442 	struct epoch_tracker et;
1443 	struct pflow_parsed_get g = {};
1444 	struct pflow_softc *sc = NULL;
1445 	struct nl_writer *nw = npt->nw;
1446 	struct genlmsghdr *ghdr_new;
1447 	int error;
1448 
1449 	error = nl_parse_nlmsg(hdr, &get_parser, npt, &g);
1450 	if (error != 0)
1451 		return (error);
1452 
1453 	NET_EPOCH_ENTER(et);
1454 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1455 		if (sc->sc_id == g.id)
1456 			break;
1457 	}
1458 	if (sc == NULL) {
1459 		error = ENOENT;
1460 		goto out;
1461 	}
1462 
1463 	if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) {
1464 		nlmsg_abort(nw);
1465 		error = ENOMEM;
1466 		goto out;
1467 	}
1468 
1469 	ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
1470 	if (ghdr_new == NULL) {
1471 		nlmsg_abort(nw);
1472 		error = ENOMEM;
1473 		goto out;
1474 	}
1475 
1476 	ghdr_new->cmd = PFLOWNL_CMD_GET;
1477 	ghdr_new->version = 0;
1478 	ghdr_new->reserved = 0;
1479 
1480 	nlattr_add_u32(nw, PFLOWNL_GET_ID, sc->sc_id);
1481 	nlattr_add_u16(nw, PFLOWNL_GET_VERSION, sc->sc_version);
1482 	if (sc->sc_flowsrc)
1483 		nlattr_add_sockaddr(nw, PFLOWNL_GET_SRC, sc->sc_flowsrc);
1484 	if (sc->sc_flowdst)
1485 		nlattr_add_sockaddr(nw, PFLOWNL_GET_DST, sc->sc_flowdst);
1486 	nlattr_add_u32(nw, PFLOWNL_GET_OBSERVATION_DOMAIN,
1487 	    sc->sc_observation_dom);
1488 	nlattr_add_u8(nw, PFLOWNL_GET_SOCKET_STATUS, sc->so != NULL);
1489 
1490 	if (! nlmsg_end(nw)) {
1491 		nlmsg_abort(nw);
1492 		error = ENOMEM;
1493 	}
1494 
1495 out:
1496 	NET_EPOCH_EXIT(et);
1497 
1498 	return (error);
1499 }
1500 
1501 struct pflow_sockaddr {
1502 	union {
1503 		struct sockaddr_in in;
1504 		struct sockaddr_in6 in6;
1505 		struct sockaddr_storage storage;
1506 	};
1507 };
1508 static bool
1509 pflow_postparse_sockaddr(void *parsed_args, struct nl_pstate *npt __unused)
1510 {
1511 	struct pflow_sockaddr *s = (struct pflow_sockaddr *)parsed_args;
1512 
1513 	if (s->storage.ss_family == AF_INET)
1514 		s->storage.ss_len = sizeof(struct sockaddr_in);
1515 	else if (s->storage.ss_family == AF_INET6)
1516 		s->storage.ss_len = sizeof(struct sockaddr_in6);
1517 	else
1518 		return (false);
1519 
1520 	return (true);
1521 }
1522 
1523 #define	_OUT(_field)	offsetof(struct pflow_sockaddr, _field)
1524 static struct nlattr_parser nla_p_sockaddr[] = {
1525 	{ .type = PFLOWNL_ADDR_FAMILY, .off = _OUT(in.sin_family), .cb = nlattr_get_uint8 },
1526 	{ .type = PFLOWNL_ADDR_PORT, .off = _OUT(in.sin_port), .cb = nlattr_get_uint16 },
1527 	{ .type = PFLOWNL_ADDR_IP, .off = _OUT(in.sin_addr), .cb = nlattr_get_in_addr },
1528 	{ .type = PFLOWNL_ADDR_IP6, .off = _OUT(in6.sin6_addr), .cb = nlattr_get_in6_addr },
1529 };
1530 NL_DECLARE_ATTR_PARSER_EXT(addr_parser, nla_p_sockaddr, pflow_postparse_sockaddr);
1531 #undef _OUT
1532 
1533 struct pflow_parsed_set {
1534 	int id;
1535 	uint16_t version;
1536 	struct sockaddr_storage src;
1537 	struct sockaddr_storage dst;
1538 	uint32_t observation_dom;
1539 };
1540 #define	_IN(_field)	offsetof(struct genlmsghdr, _field)
1541 #define	_OUT(_field)	offsetof(struct pflow_parsed_set, _field)
1542 static const struct nlattr_parser nla_p_set[] = {
1543 	{ .type = PFLOWNL_SET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 },
1544 	{ .type = PFLOWNL_SET_VERSION, .off = _OUT(version), .cb = nlattr_get_uint16 },
1545 	{ .type = PFLOWNL_SET_SRC, .off = _OUT(src), .arg = &addr_parser, .cb = nlattr_get_nested },
1546 	{ .type = PFLOWNL_SET_DST, .off = _OUT(dst), .arg = &addr_parser, .cb = nlattr_get_nested },
1547 	{ .type = PFLOWNL_SET_OBSERVATION_DOMAIN, .off = _OUT(observation_dom), .cb = nlattr_get_uint32 },
1548 };
1549 static const struct nlfield_parser nlf_p_set[] = {};
1550 #undef _IN
1551 #undef _OUT
1552 NL_DECLARE_PARSER(set_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
1553 
1554 static int
1555 pflow_set(struct pflow_softc *sc, const struct pflow_parsed_set *pflowr, struct ucred *cred)
1556 {
1557 	struct thread		*td;
1558 	struct socket		*so;
1559 	int			 error = 0;
1560 
1561 	td = curthread;
1562 
1563 	PFLOW_ASSERT(sc);
1564 
1565 	if (pflowr->version != 0) {
1566 		switch(pflowr->version) {
1567 		case PFLOW_PROTO_5:
1568 		case PFLOW_PROTO_10:
1569 			break;
1570 		default:
1571 			return(EINVAL);
1572 		}
1573 	}
1574 
1575 	pflow_flush(sc);
1576 
1577 	if (pflowr->dst.ss_len != 0) {
1578 		if (sc->sc_flowdst != NULL &&
1579 		    sc->sc_flowdst->sa_family != pflowr->dst.ss_family) {
1580 			free(sc->sc_flowdst, M_DEVBUF);
1581 			sc->sc_flowdst = NULL;
1582 			if (sc->so != NULL) {
1583 				soclose(sc->so);
1584 				sc->so = NULL;
1585 			}
1586 		}
1587 
1588 		switch (pflowr->dst.ss_family) {
1589 		case AF_INET:
1590 			if (sc->sc_flowdst == NULL) {
1591 				if ((sc->sc_flowdst = malloc(
1592 				    sizeof(struct sockaddr_in),
1593 				    M_DEVBUF,  M_NOWAIT)) == NULL)
1594 					return (ENOMEM);
1595 			}
1596 			memcpy(sc->sc_flowdst, &pflowr->dst,
1597 			    sizeof(struct sockaddr_in));
1598 			sc->sc_flowdst->sa_len = sizeof(struct
1599 			    sockaddr_in);
1600 			break;
1601 		case AF_INET6:
1602 			if (sc->sc_flowdst == NULL) {
1603 				if ((sc->sc_flowdst = malloc(
1604 				    sizeof(struct sockaddr_in6),
1605 				    M_DEVBUF, M_NOWAIT)) == NULL)
1606 					return (ENOMEM);
1607 			}
1608 			memcpy(sc->sc_flowdst, &pflowr->dst,
1609 			    sizeof(struct sockaddr_in6));
1610 			sc->sc_flowdst->sa_len = sizeof(struct
1611 			    sockaddr_in6);
1612 			break;
1613 		default:
1614 			break;
1615 		}
1616 	}
1617 
1618 	if (pflowr->src.ss_len != 0) {
1619 		if (sc->sc_flowsrc != NULL)
1620 			free(sc->sc_flowsrc, M_DEVBUF);
1621 		sc->sc_flowsrc = NULL;
1622 		if (sc->so != NULL) {
1623 			soclose(sc->so);
1624 			sc->so = NULL;
1625 		}
1626 		switch(pflowr->src.ss_family) {
1627 		case AF_INET:
1628 			if ((sc->sc_flowsrc = malloc(
1629 			    sizeof(struct sockaddr_in),
1630 			    M_DEVBUF, M_NOWAIT)) == NULL)
1631 				return (ENOMEM);
1632 			memcpy(sc->sc_flowsrc, &pflowr->src,
1633 			    sizeof(struct sockaddr_in));
1634 			sc->sc_flowsrc->sa_len = sizeof(struct
1635 			    sockaddr_in);
1636 			break;
1637 		case AF_INET6:
1638 			if ((sc->sc_flowsrc = malloc(
1639 			    sizeof(struct sockaddr_in6),
1640 			    M_DEVBUF, M_NOWAIT)) == NULL)
1641 				return (ENOMEM);
1642 			memcpy(sc->sc_flowsrc, &pflowr->src,
1643 			    sizeof(struct sockaddr_in6));
1644 			sc->sc_flowsrc->sa_len = sizeof(struct
1645 			    sockaddr_in6);
1646 			break;
1647 		default:
1648 			break;
1649 		}
1650 	}
1651 
1652 	if (sc->so == NULL) {
1653 		if (pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1654 			error = socreate(sc->sc_flowdst->sa_family,
1655 			    &so, SOCK_DGRAM, IPPROTO_UDP, cred, td);
1656 			if (error)
1657 				return (error);
1658 			if (pflowvalidsockaddr(sc->sc_flowsrc, 1)) {
1659 				error = sobind(so, sc->sc_flowsrc, td);
1660 				if (error) {
1661 					soclose(so);
1662 					return (error);
1663 				}
1664 			}
1665 			sc->so = so;
1666 		}
1667 	} else if (!pflowvalidsockaddr(sc->sc_flowdst, 0)) {
1668 		soclose(sc->so);
1669 		sc->so = NULL;
1670 	}
1671 
1672 	if (pflowr->observation_dom != 0)
1673 		sc->sc_observation_dom = pflowr->observation_dom;
1674 
1675 	/* error check is above */
1676 	if (pflowr->version != 0)
1677 		sc->sc_version = pflowr->version;
1678 
1679 	pflow_setmtu(sc, ETHERMTU);
1680 
1681 	switch (sc->sc_version) {
1682 	case PFLOW_PROTO_5:
1683 		callout_stop(&sc->sc_tmo6);
1684 		callout_stop(&sc->sc_tmo_tmpl);
1685 		break;
1686 	case PFLOW_PROTO_10:
1687 		callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz,
1688 		    pflow_timeout_tmpl, sc);
1689 		break;
1690 	default: /* NOTREACHED */
1691 		break;
1692 	}
1693 
1694 	return (0);
1695 }
1696 
1697 static int
1698 pflow_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
1699 {
1700 	struct epoch_tracker et;
1701 	struct pflow_parsed_set s = {};
1702 	struct pflow_softc *sc = NULL;
1703 	int error;
1704 
1705 	error = nl_parse_nlmsg(hdr, &set_parser, npt, &s);
1706 	if (error != 0)
1707 		return (error);
1708 
1709 	NET_EPOCH_ENTER(et);
1710 	CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) {
1711 		if (sc->sc_id == s.id)
1712 			break;
1713 	}
1714 	if (sc == NULL) {
1715 		error = ENOENT;
1716 		goto out;
1717 	}
1718 
1719 	PFLOW_LOCK(sc);
1720 	error = pflow_set(sc, &s, nlp_get_cred(npt->nlp));
1721 	PFLOW_UNLOCK(sc);
1722 
1723 out:
1724 	NET_EPOCH_EXIT(et);
1725 	return (error);
1726 }
1727 
1728 static const struct genl_cmd pflow_cmds[] = {
1729 	{
1730 		.cmd_num = PFLOWNL_CMD_LIST,
1731 		.cmd_name = "LIST",
1732 		.cmd_cb = pflow_nl_list,
1733 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1734 		.cmd_priv = PRIV_NETINET_PF,
1735 	},
1736 	{
1737 		.cmd_num = PFLOWNL_CMD_CREATE,
1738 		.cmd_name = "CREATE",
1739 		.cmd_cb = pflow_nl_create,
1740 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1741 		.cmd_priv = PRIV_NETINET_PF,
1742 	},
1743 	{
1744 		.cmd_num = PFLOWNL_CMD_DEL,
1745 		.cmd_name = "DEL",
1746 		.cmd_cb = pflow_nl_del,
1747 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1748 		.cmd_priv = PRIV_NETINET_PF,
1749 	},
1750 	{
1751 		.cmd_num = PFLOWNL_CMD_GET,
1752 		.cmd_name = "GET",
1753 		.cmd_cb = pflow_nl_get,
1754 		.cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1755 		.cmd_priv = PRIV_NETINET_PF,
1756 	},
1757 	{
1758 		.cmd_num = PFLOWNL_CMD_SET,
1759 		.cmd_name = "SET",
1760 		.cmd_cb = pflow_nl_set,
1761 		.cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL,
1762 		.cmd_priv = PRIV_NETINET_PF,
1763 	},
1764 };
1765 
1766 static const struct nlhdr_parser *all_parsers[] = {
1767 	&del_parser,
1768 	&get_parser,
1769 	&set_parser,
1770 };
1771 
1772 static int
1773 pflow_init(void)
1774 {
1775 	bool ret;
1776 	int family_id __diagused;
1777 
1778 	NL_VERIFY_PARSERS(all_parsers);
1779 
1780 	family_id = genl_register_family(PFLOWNL_FAMILY_NAME, 0, 2, PFLOWNL_CMD_MAX);
1781 	MPASS(family_id != 0);
1782 	ret = genl_register_cmds(PFLOWNL_FAMILY_NAME, pflow_cmds, NL_ARRAY_LEN(pflow_cmds));
1783 
1784 	return (ret ? 0 : ENODEV);
1785 }
1786 
1787 static void
1788 pflow_uninit(void)
1789 {
1790 	genl_unregister_family(PFLOWNL_FAMILY_NAME);
1791 }
1792 
1793 static int
1794 pflow_modevent(module_t mod, int type, void *data)
1795 {
1796 	int error = 0;
1797 
1798 	switch (type) {
1799 	case MOD_LOAD:
1800 		error = pflow_init();
1801 		break;
1802 	case MOD_UNLOAD:
1803 		pflow_uninit();
1804 		break;
1805 	default:
1806 		error = EINVAL;
1807 		break;
1808 	}
1809 
1810 	return (error);
1811 }
1812 
1813 static moduledata_t pflow_mod = {
1814 	pflowname,
1815 	pflow_modevent,
1816 	0
1817 };
1818 
1819 DECLARE_MODULE(pflow, pflow_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
1820 MODULE_VERSION(pflow, 1);
1821 MODULE_DEPEND(pflow, pf, PF_MODVER, PF_MODVER, PF_MODVER);
1822