xref: /openbsd/sys/net/pf_ioctl.c (revision 862c3389)
1 /*	$OpenBSD: pf_ioctl.c,v 1.418 2024/07/18 14:46:28 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2018 Henning Brauer <henning@openbsd.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "pfsync.h"
39 #include "pflog.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/sysctl.h>
44 #include <sys/mbuf.h>
45 #include <sys/filio.h>
46 #include <sys/fcntl.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/timeout.h>
52 #include <sys/pool.h>
53 #include <sys/malloc.h>
54 #include <sys/proc.h>
55 #include <sys/rwlock.h>
56 #include <sys/syslog.h>
57 #include <sys/specdev.h>
58 #include <uvm/uvm_extern.h>
59 
60 #include <crypto/md5.h>
61 
62 #include <net/if.h>
63 #include <net/if_var.h>
64 #include <net/route.h>
65 #include <net/hfsc.h>
66 #include <net/fq_codel.h>
67 
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #include <netinet/in_pcb.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/ip_icmp.h>
73 #include <netinet/tcp.h>
74 #include <netinet/udp.h>
75 
76 #ifdef INET6
77 #include <netinet/ip6.h>
78 #include <netinet/icmp6.h>
79 #endif /* INET6 */
80 
81 #include <net/pfvar.h>
82 #include <net/pfvar_priv.h>
83 
84 #if NPFSYNC > 0
85 #include <netinet/ip_ipsp.h>
86 #include <net/if_pfsync.h>
87 #endif /* NPFSYNC > 0 */
88 
89 struct pool		 pf_tag_pl;
90 
91 void			 pfattach(int);
92 int			 pfopen(dev_t, int, int, struct proc *);
93 int			 pfclose(dev_t, int, int, struct proc *);
94 int			 pfioctl(dev_t, u_long, caddr_t, int, struct proc *);
95 int			 pf_begin_rules(u_int32_t *, const char *);
96 void			 pf_rollback_rules(u_int32_t, char *);
97 void			 pf_remove_queues(void);
98 int			 pf_commit_queues(void);
99 void			 pf_free_queues(struct pf_queuehead *);
100 void			 pf_calc_chksum(struct pf_ruleset *);
101 void			 pf_hash_rule(MD5_CTX *, struct pf_rule *);
102 void			 pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
103 int			 pf_commit_rules(u_int32_t, char *);
104 int			 pf_addr_setup(struct pf_ruleset *,
105 			    struct pf_addr_wrap *, sa_family_t);
106 struct pfi_kif		*pf_kif_setup(struct pfi_kif *);
107 void			 pf_addr_copyout(struct pf_addr_wrap *);
108 void			 pf_trans_set_commit(void);
109 void			 pf_pool_copyin(struct pf_pool *, struct pf_pool *);
110 int			 pf_validate_range(u_int8_t, u_int16_t[2], int);
111 int			 pf_rule_copyin(struct pf_rule *, struct pf_rule *);
112 int			 pf_rule_checkaf(struct pf_rule *);
113 u_int16_t		 pf_qname2qid(char *, int);
114 void			 pf_qid2qname(u_int16_t, char *);
115 void			 pf_qid_unref(u_int16_t);
116 int			 pf_states_clr(struct pfioc_state_kill *);
117 int			 pf_states_get(struct pfioc_states *);
118 
119 struct pf_trans		*pf_open_trans(uint32_t);
120 struct pf_trans		*pf_find_trans(uint32_t, uint64_t);
121 void			 pf_free_trans(struct pf_trans *);
122 void			 pf_rollback_trans(struct pf_trans *);
123 
124 void			 pf_init_tgetrule(struct pf_trans *,
125 			    struct pf_anchor *, uint32_t, struct pf_rule *);
126 void			 pf_cleanup_tgetrule(struct pf_trans *t);
127 
128 struct pf_rule		 pf_default_rule, pf_default_rule_new;
129 
130 struct {
131 	char		statusif[IFNAMSIZ];
132 	u_int32_t	debug;
133 	u_int32_t	hostid;
134 	u_int32_t	reass;
135 	u_int32_t	mask;
136 } pf_trans_set;
137 
138 #define	PF_ORDER_HOST	0
139 #define	PF_ORDER_NET	1
140 
141 #define	PF_TSET_STATUSIF	0x01
142 #define	PF_TSET_DEBUG		0x02
143 #define	PF_TSET_HOSTID		0x04
144 #define	PF_TSET_REASS		0x08
145 
146 #define	TAGID_MAX	 50000
147 TAILQ_HEAD(pf_tags, pf_tagname)	pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags),
148 				pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
149 
150 /*
151  * pf_lock protects consistency of PF data structures, which don't have
152  * their dedicated lock yet. The pf_lock currently protects:
153  *	- rules,
154  *	- radix tables,
155  *	- source nodes
156  * All callers must grab pf_lock exclusively.
157  *
158  * pf_state_lock protects consistency of state table. Packets, which do state
159  * look up grab the lock as readers. If packet must create state, then it must
160  * grab the lock as writer. Whenever packet creates state it grabs pf_lock
161  * first then it locks pf_state_lock as the writer.
162  */
163 struct rwlock		 pf_lock = RWLOCK_INITIALIZER("pf_lock");
164 struct rwlock		 pf_state_lock = RWLOCK_INITIALIZER("pf_state_lock");
165 struct rwlock		 pfioctl_rw = RWLOCK_INITIALIZER("pfioctl_rw");
166 
167 struct cpumem *pf_anchor_stack;
168 
169 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
170 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
171 #endif
172 u_int16_t		 tagname2tag(struct pf_tags *, char *, int);
173 void			 tag2tagname(struct pf_tags *, u_int16_t, char *);
174 void			 tag_unref(struct pf_tags *, u_int16_t);
175 int			 pf_rtlabel_add(struct pf_addr_wrap *);
176 void			 pf_rtlabel_remove(struct pf_addr_wrap *);
177 void			 pf_rtlabel_copyout(struct pf_addr_wrap *);
178 
179 LIST_HEAD(, pf_trans)	pf_ioctl_trans = LIST_HEAD_INITIALIZER(pf_trans);
180 
181 /* counts transactions opened by a device */
182 unsigned int pf_tcount[CLONE_MAPSZ * NBBY];
183 #define pf_unit2idx(_unit_)	((_unit_) >> CLONE_SHIFT)
184 
185 void
pfattach(int num)186 pfattach(int num)
187 {
188 	u_int32_t *timeout = pf_default_rule.timeout;
189 	struct pf_anchor_stackframe *sf;
190 	struct cpumem_iter cmi;
191 
192 	pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0,
193 	    IPL_SOFTNET, 0, "pfrule", NULL);
194 	pool_init(&pf_src_tree_pl, sizeof(struct pf_src_node), 0,
195 	    IPL_SOFTNET, 0, "pfsrctr", NULL);
196 	pool_init(&pf_sn_item_pl, sizeof(struct pf_sn_item), 0,
197 	    IPL_SOFTNET, 0, "pfsnitem", NULL);
198 	pool_init(&pf_state_pl, sizeof(struct pf_state), 0,
199 	    IPL_SOFTNET, 0, "pfstate", NULL);
200 	pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0,
201 	    IPL_SOFTNET, 0, "pfstkey", NULL);
202 	pool_init(&pf_state_item_pl, sizeof(struct pf_state_item), 0,
203 	    IPL_SOFTNET, 0, "pfstitem", NULL);
204 	pool_init(&pf_rule_item_pl, sizeof(struct pf_rule_item), 0,
205 	    IPL_SOFTNET, 0, "pfruleitem", NULL);
206 	pool_init(&pf_queue_pl, sizeof(struct pf_queuespec), 0,
207 	    IPL_SOFTNET, 0, "pfqueue", NULL);
208 	pool_init(&pf_tag_pl, sizeof(struct pf_tagname), 0,
209 	    IPL_SOFTNET, 0, "pftag", NULL);
210 	pool_init(&pf_pktdelay_pl, sizeof(struct pf_pktdelay), 0,
211 	    IPL_SOFTNET, 0, "pfpktdelay", NULL);
212 	pool_init(&pf_anchor_pl, sizeof(struct pf_anchor), 0,
213 	    IPL_SOFTNET, 0, "pfanchor", NULL);
214 
215 	hfsc_initialize();
216 	pfr_initialize();
217 	pfi_initialize();
218 	pf_osfp_initialize();
219 	pf_syncookies_init();
220 
221 	pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp,
222 	    pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0);
223 	pool_sethardlimit(pf_pool_limits[PF_LIMIT_ANCHORS].pp,
224 	    pf_pool_limits[PF_LIMIT_ANCHORS].limit, NULL, 0);
225 
226 	if (physmem <= atop(100*1024*1024))
227 		pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit =
228 		    PFR_KENTRY_HIWAT_SMALL;
229 
230 	RB_INIT(&tree_src_tracking);
231 	RB_INIT(&pf_anchors);
232 	pf_init_ruleset(&pf_main_ruleset);
233 	TAILQ_INIT(&pf_queues[0]);
234 	TAILQ_INIT(&pf_queues[1]);
235 	pf_queues_active = &pf_queues[0];
236 	pf_queues_inactive = &pf_queues[1];
237 
238 	/* default rule should never be garbage collected */
239 	pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next;
240 	pf_default_rule.action = PF_PASS;
241 	pf_default_rule.nr = (u_int32_t)-1;
242 	pf_default_rule.rtableid = -1;
243 
244 	/* initialize default timeouts */
245 	timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
246 	timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
247 	timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
248 	timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
249 	timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
250 	timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
251 	timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
252 	timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
253 	timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
254 	timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
255 	timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
256 	timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
257 	timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
258 	timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
259 	timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
260 	timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
261 	timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
262 	timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
263 	timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
264 	timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
265 
266 	pf_default_rule.src.addr.type =  PF_ADDR_ADDRMASK;
267 	pf_default_rule.dst.addr.type =  PF_ADDR_ADDRMASK;
268 	pf_default_rule.rdr.addr.type =  PF_ADDR_NONE;
269 	pf_default_rule.nat.addr.type =  PF_ADDR_NONE;
270 	pf_default_rule.route.addr.type =  PF_ADDR_NONE;
271 
272 	pf_normalize_init();
273 	memset(&pf_status, 0, sizeof(pf_status));
274 	pf_status.debug = LOG_ERR;
275 	pf_status.reass = PF_REASS_ENABLED;
276 
277 	/* XXX do our best to avoid a conflict */
278 	pf_status.hostid = arc4random();
279 
280 	pf_default_rule_new = pf_default_rule;
281 
282 	/*
283 	 * we waste two stack frames as meta-data.
284 	 * frame[0] always presents a top, which can not be used for data
285 	 * frame[PF_ANCHOR_STACK_MAX] denotes a bottom of the stack and keeps
286 	 * the pointer to currently used stack frame.
287 	 */
288 	pf_anchor_stack = cpumem_malloc(
289 	    sizeof(struct pf_anchor_stackframe) * (PF_ANCHOR_STACK_MAX + 2),
290 	    M_PF);
291 	CPUMEM_FOREACH(sf, &cmi, pf_anchor_stack)
292 		sf[PF_ANCHOR_STACK_MAX].sf_stack_top = &sf[0];
293 }
294 
295 int
pfopen(dev_t dev,int flags,int fmt,struct proc * p)296 pfopen(dev_t dev, int flags, int fmt, struct proc *p)
297 {
298 	int unit = minor(dev);
299 
300 	if (unit & ((1 << CLONE_SHIFT) - 1))
301 		return (ENXIO);
302 
303 	return (0);
304 }
305 
306 int
pfclose(dev_t dev,int flags,int fmt,struct proc * p)307 pfclose(dev_t dev, int flags, int fmt, struct proc *p)
308 {
309 	struct pf_trans *w, *s;
310 	LIST_HEAD(, pf_trans)	tmp_list;
311 	uint32_t unit = minor(dev);
312 
313 	LIST_INIT(&tmp_list);
314 	rw_enter_write(&pfioctl_rw);
315 	LIST_FOREACH_SAFE(w, &pf_ioctl_trans, pft_entry, s) {
316 		if (w->pft_unit == unit) {
317 			LIST_REMOVE(w, pft_entry);
318 			LIST_INSERT_HEAD(&tmp_list, w, pft_entry);
319 		}
320 	}
321 	rw_exit_write(&pfioctl_rw);
322 
323 	while ((w = LIST_FIRST(&tmp_list)) != NULL) {
324 		LIST_REMOVE(w, pft_entry);
325 		pf_free_trans(w);
326 	}
327 
328 	return (0);
329 }
330 
331 void
pf_rule_free(struct pf_rule * rule)332 pf_rule_free(struct pf_rule *rule)
333 {
334 	if (rule == NULL)
335 		return;
336 
337 	pfi_kif_free(rule->kif);
338 	pfi_kif_free(rule->rcv_kif);
339 	pfi_kif_free(rule->rdr.kif);
340 	pfi_kif_free(rule->nat.kif);
341 	pfi_kif_free(rule->route.kif);
342 
343 	pool_put(&pf_rule_pl, rule);
344 }
345 
346 void
pf_rm_rule(struct pf_rulequeue * rulequeue,struct pf_rule * rule)347 pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
348 {
349 	if (rulequeue != NULL) {
350 		if (rule->states_cur == 0 && rule->src_nodes == 0) {
351 			/*
352 			 * XXX - we need to remove the table *before* detaching
353 			 * the rule to make sure the table code does not delete
354 			 * the anchor under our feet.
355 			 */
356 			pf_tbladdr_remove(&rule->src.addr);
357 			pf_tbladdr_remove(&rule->dst.addr);
358 			pf_tbladdr_remove(&rule->rdr.addr);
359 			pf_tbladdr_remove(&rule->nat.addr);
360 			pf_tbladdr_remove(&rule->route.addr);
361 			if (rule->overload_tbl)
362 				pfr_detach_table(rule->overload_tbl);
363 		}
364 		TAILQ_REMOVE(rulequeue, rule, entries);
365 		rule->entries.tqe_prev = NULL;
366 		rule->nr = (u_int32_t)-1;
367 	}
368 
369 	if (rule->states_cur > 0 || rule->src_nodes > 0 ||
370 	    rule->entries.tqe_prev != NULL)
371 		return;
372 	pf_tag_unref(rule->tag);
373 	pf_tag_unref(rule->match_tag);
374 	pf_rtlabel_remove(&rule->src.addr);
375 	pf_rtlabel_remove(&rule->dst.addr);
376 	pfi_dynaddr_remove(&rule->src.addr);
377 	pfi_dynaddr_remove(&rule->dst.addr);
378 	pfi_dynaddr_remove(&rule->rdr.addr);
379 	pfi_dynaddr_remove(&rule->nat.addr);
380 	pfi_dynaddr_remove(&rule->route.addr);
381 	if (rulequeue == NULL) {
382 		pf_tbladdr_remove(&rule->src.addr);
383 		pf_tbladdr_remove(&rule->dst.addr);
384 		pf_tbladdr_remove(&rule->rdr.addr);
385 		pf_tbladdr_remove(&rule->nat.addr);
386 		pf_tbladdr_remove(&rule->route.addr);
387 		if (rule->overload_tbl)
388 			pfr_detach_table(rule->overload_tbl);
389 	}
390 	pfi_kif_unref(rule->rcv_kif, PFI_KIF_REF_RULE);
391 	pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE);
392 	pfi_kif_unref(rule->rdr.kif, PFI_KIF_REF_RULE);
393 	pfi_kif_unref(rule->nat.kif, PFI_KIF_REF_RULE);
394 	pfi_kif_unref(rule->route.kif, PFI_KIF_REF_RULE);
395 	pf_remove_anchor(rule);
396 	pool_put(&pf_rule_pl, rule);
397 }
398 
399 u_int16_t
tagname2tag(struct pf_tags * head,char * tagname,int create)400 tagname2tag(struct pf_tags *head, char *tagname, int create)
401 {
402 	struct pf_tagname	*tag, *p = NULL;
403 	u_int16_t		 new_tagid = 1;
404 
405 	TAILQ_FOREACH(tag, head, entries)
406 		if (strcmp(tagname, tag->name) == 0) {
407 			tag->ref++;
408 			return (tag->tag);
409 		}
410 
411 	if (!create)
412 		return (0);
413 
414 	/*
415 	 * to avoid fragmentation, we do a linear search from the beginning
416 	 * and take the first free slot we find. if there is none or the list
417 	 * is empty, append a new entry at the end.
418 	 */
419 
420 	/* new entry */
421 	TAILQ_FOREACH(p, head, entries) {
422 		if (p->tag != new_tagid)
423 			break;
424 		new_tagid = p->tag + 1;
425 	}
426 
427 	if (new_tagid > TAGID_MAX)
428 		return (0);
429 
430 	/* allocate and fill new struct pf_tagname */
431 	tag = pool_get(&pf_tag_pl, PR_NOWAIT | PR_ZERO);
432 	if (tag == NULL)
433 		return (0);
434 	strlcpy(tag->name, tagname, sizeof(tag->name));
435 	tag->tag = new_tagid;
436 	tag->ref++;
437 
438 	if (p != NULL)	/* insert new entry before p */
439 		TAILQ_INSERT_BEFORE(p, tag, entries);
440 	else	/* either list empty or no free slot in between */
441 		TAILQ_INSERT_TAIL(head, tag, entries);
442 
443 	return (tag->tag);
444 }
445 
446 void
tag2tagname(struct pf_tags * head,u_int16_t tagid,char * p)447 tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p)
448 {
449 	struct pf_tagname	*tag;
450 
451 	TAILQ_FOREACH(tag, head, entries)
452 		if (tag->tag == tagid) {
453 			strlcpy(p, tag->name, PF_TAG_NAME_SIZE);
454 			return;
455 		}
456 }
457 
458 void
tag_unref(struct pf_tags * head,u_int16_t tag)459 tag_unref(struct pf_tags *head, u_int16_t tag)
460 {
461 	struct pf_tagname	*p, *next;
462 
463 	if (tag == 0)
464 		return;
465 
466 	TAILQ_FOREACH_SAFE(p, head, entries, next) {
467 		if (tag == p->tag) {
468 			if (--p->ref == 0) {
469 				TAILQ_REMOVE(head, p, entries);
470 				pool_put(&pf_tag_pl, p);
471 			}
472 			break;
473 		}
474 	}
475 }
476 
477 u_int16_t
pf_tagname2tag(char * tagname,int create)478 pf_tagname2tag(char *tagname, int create)
479 {
480 	return (tagname2tag(&pf_tags, tagname, create));
481 }
482 
483 void
pf_tag2tagname(u_int16_t tagid,char * p)484 pf_tag2tagname(u_int16_t tagid, char *p)
485 {
486 	tag2tagname(&pf_tags, tagid, p);
487 }
488 
489 void
pf_tag_ref(u_int16_t tag)490 pf_tag_ref(u_int16_t tag)
491 {
492 	struct pf_tagname *t;
493 
494 	TAILQ_FOREACH(t, &pf_tags, entries)
495 		if (t->tag == tag)
496 			break;
497 	if (t != NULL)
498 		t->ref++;
499 }
500 
501 void
pf_tag_unref(u_int16_t tag)502 pf_tag_unref(u_int16_t tag)
503 {
504 	tag_unref(&pf_tags, tag);
505 }
506 
507 int
pf_rtlabel_add(struct pf_addr_wrap * a)508 pf_rtlabel_add(struct pf_addr_wrap *a)
509 {
510 	if (a->type == PF_ADDR_RTLABEL &&
511 	    (a->v.rtlabel = rtlabel_name2id(a->v.rtlabelname)) == 0)
512 		return (-1);
513 	return (0);
514 }
515 
516 void
pf_rtlabel_remove(struct pf_addr_wrap * a)517 pf_rtlabel_remove(struct pf_addr_wrap *a)
518 {
519 	if (a->type == PF_ADDR_RTLABEL)
520 		rtlabel_unref(a->v.rtlabel);
521 }
522 
523 void
pf_rtlabel_copyout(struct pf_addr_wrap * a)524 pf_rtlabel_copyout(struct pf_addr_wrap *a)
525 {
526 	if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) {
527 		if (rtlabel_id2name(a->v.rtlabel, a->v.rtlabelname,
528 		    sizeof(a->v.rtlabelname)) == NULL)
529 			strlcpy(a->v.rtlabelname, "?",
530 			    sizeof(a->v.rtlabelname));
531 	}
532 }
533 
534 u_int16_t
pf_qname2qid(char * qname,int create)535 pf_qname2qid(char *qname, int create)
536 {
537 	return (tagname2tag(&pf_qids, qname, create));
538 }
539 
540 void
pf_qid2qname(u_int16_t qid,char * p)541 pf_qid2qname(u_int16_t qid, char *p)
542 {
543 	tag2tagname(&pf_qids, qid, p);
544 }
545 
546 void
pf_qid_unref(u_int16_t qid)547 pf_qid_unref(u_int16_t qid)
548 {
549 	tag_unref(&pf_qids, (u_int16_t)qid);
550 }
551 
552 int
pf_begin_rules(u_int32_t * version,const char * anchor)553 pf_begin_rules(u_int32_t *version, const char *anchor)
554 {
555 	struct pf_ruleset	*rs;
556 	struct pf_rule		*rule;
557 
558 	if ((rs = pf_find_or_create_ruleset(anchor)) == NULL)
559 		return (EINVAL);
560 	while ((rule = TAILQ_FIRST(rs->rules.inactive.ptr)) != NULL) {
561 		pf_rm_rule(rs->rules.inactive.ptr, rule);
562 		rs->rules.inactive.rcount--;
563 	}
564 	*version = ++rs->rules.inactive.version;
565 	rs->rules.inactive.open = 1;
566 	return (0);
567 }
568 
569 void
pf_rollback_rules(u_int32_t version,char * anchor)570 pf_rollback_rules(u_int32_t version, char *anchor)
571 {
572 	struct pf_ruleset	*rs;
573 	struct pf_rule		*rule;
574 
575 	rs = pf_find_ruleset(anchor);
576 	if (rs == NULL || !rs->rules.inactive.open ||
577 	    rs->rules.inactive.version != version)
578 		return;
579 	while ((rule = TAILQ_FIRST(rs->rules.inactive.ptr)) != NULL) {
580 		pf_rm_rule(rs->rules.inactive.ptr, rule);
581 		rs->rules.inactive.rcount--;
582 	}
583 	rs->rules.inactive.open = 0;
584 
585 	/* queue defs only in the main ruleset */
586 	if (anchor[0])
587 		return;
588 
589 	pf_free_queues(pf_queues_inactive);
590 }
591 
592 void
pf_free_queues(struct pf_queuehead * where)593 pf_free_queues(struct pf_queuehead *where)
594 {
595 	struct pf_queuespec	*q, *qtmp;
596 
597 	TAILQ_FOREACH_SAFE(q, where, entries, qtmp) {
598 		TAILQ_REMOVE(where, q, entries);
599 		pfi_kif_unref(q->kif, PFI_KIF_REF_RULE);
600 		pool_put(&pf_queue_pl, q);
601 	}
602 }
603 
604 void
pf_remove_queues(void)605 pf_remove_queues(void)
606 {
607 	struct pf_queuespec	*q;
608 	struct ifnet		*ifp;
609 
610 	/* put back interfaces in normal queueing mode */
611 	TAILQ_FOREACH(q, pf_queues_active, entries) {
612 		if (q->parent_qid != 0)
613 			continue;
614 
615 		ifp = q->kif->pfik_ifp;
616 		if (ifp == NULL)
617 			continue;
618 
619 		ifq_attach(&ifp->if_snd, ifq_priq_ops, NULL);
620 	}
621 }
622 
623 struct pf_queue_if {
624 	struct ifnet		*ifp;
625 	const struct ifq_ops	*ifqops;
626 	const struct pfq_ops	*pfqops;
627 	void			*disc;
628 	struct pf_queue_if	*next;
629 };
630 
631 static inline struct pf_queue_if *
pf_ifp2q(struct pf_queue_if * list,struct ifnet * ifp)632 pf_ifp2q(struct pf_queue_if *list, struct ifnet *ifp)
633 {
634 	struct pf_queue_if *qif = list;
635 
636 	while (qif != NULL) {
637 		if (qif->ifp == ifp)
638 			return (qif);
639 
640 		qif = qif->next;
641 	}
642 
643 	return (qif);
644 }
645 
646 int
pf_create_queues(void)647 pf_create_queues(void)
648 {
649 	struct pf_queuespec	*q;
650 	struct ifnet		*ifp;
651 	struct pf_queue_if		*list = NULL, *qif;
652 	int			 error;
653 
654 	/*
655 	 * Find root queues and allocate traffic conditioner
656 	 * private data for these interfaces
657 	 */
658 	TAILQ_FOREACH(q, pf_queues_active, entries) {
659 		if (q->parent_qid != 0)
660 			continue;
661 
662 		ifp = q->kif->pfik_ifp;
663 		if (ifp == NULL)
664 			continue;
665 
666 		qif = malloc(sizeof(*qif), M_PF, M_WAITOK);
667 		qif->ifp = ifp;
668 
669 		if (q->flags & PFQS_ROOTCLASS) {
670 			qif->ifqops = ifq_hfsc_ops;
671 			qif->pfqops = pfq_hfsc_ops;
672 		} else {
673 			qif->ifqops = ifq_fqcodel_ops;
674 			qif->pfqops = pfq_fqcodel_ops;
675 		}
676 
677 		qif->disc = qif->pfqops->pfq_alloc(ifp);
678 
679 		qif->next = list;
680 		list = qif;
681 	}
682 
683 	/* and now everything */
684 	TAILQ_FOREACH(q, pf_queues_active, entries) {
685 		ifp = q->kif->pfik_ifp;
686 		if (ifp == NULL)
687 			continue;
688 
689 		qif = pf_ifp2q(list, ifp);
690 		KASSERT(qif != NULL);
691 
692 		error = qif->pfqops->pfq_addqueue(qif->disc, q);
693 		if (error != 0)
694 			goto error;
695 	}
696 
697 	/* find root queues in old list to disable them if necessary */
698 	TAILQ_FOREACH(q, pf_queues_inactive, entries) {
699 		if (q->parent_qid != 0)
700 			continue;
701 
702 		ifp = q->kif->pfik_ifp;
703 		if (ifp == NULL)
704 			continue;
705 
706 		qif = pf_ifp2q(list, ifp);
707 		if (qif != NULL)
708 			continue;
709 
710 		ifq_attach(&ifp->if_snd, ifq_priq_ops, NULL);
711 	}
712 
713 	/* commit the new queues */
714 	while (list != NULL) {
715 		qif = list;
716 		list = qif->next;
717 
718 		ifp = qif->ifp;
719 
720 		ifq_attach(&ifp->if_snd, qif->ifqops, qif->disc);
721 		free(qif, M_PF, sizeof(*qif));
722 	}
723 
724 	return (0);
725 
726 error:
727 	while (list != NULL) {
728 		qif = list;
729 		list = qif->next;
730 
731 		qif->pfqops->pfq_free(qif->disc);
732 		free(qif, M_PF, sizeof(*qif));
733 	}
734 
735 	return (error);
736 }
737 
738 int
pf_commit_queues(void)739 pf_commit_queues(void)
740 {
741 	struct pf_queuehead	*qswap;
742 	int error;
743 
744 	/* swap */
745 	qswap = pf_queues_active;
746 	pf_queues_active = pf_queues_inactive;
747 	pf_queues_inactive = qswap;
748 
749 	error = pf_create_queues();
750 	if (error != 0) {
751 		pf_queues_inactive = pf_queues_active;
752 		pf_queues_active = qswap;
753 		return (error);
754 	}
755 
756 	pf_free_queues(pf_queues_inactive);
757 
758 	return (0);
759 }
760 
761 const struct pfq_ops *
pf_queue_manager(struct pf_queuespec * q)762 pf_queue_manager(struct pf_queuespec *q)
763 {
764 	if (q->flags & PFQS_FLOWQUEUE)
765 		return pfq_fqcodel_ops;
766 	return (/* pfq_default_ops */ NULL);
767 }
768 
769 #define PF_MD5_UPD(st, elm)						\
770 		MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
771 
772 #define PF_MD5_UPD_STR(st, elm)						\
773 		MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
774 
775 #define PF_MD5_UPD_HTONL(st, elm, stor) do {				\
776 		(stor) = htonl((st)->elm);				\
777 		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
778 } while (0)
779 
780 #define PF_MD5_UPD_HTONS(st, elm, stor) do {				\
781 		(stor) = htons((st)->elm);				\
782 		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
783 } while (0)
784 
785 void
pf_hash_rule_addr(MD5_CTX * ctx,struct pf_rule_addr * pfr)786 pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
787 {
788 	PF_MD5_UPD(pfr, addr.type);
789 	switch (pfr->addr.type) {
790 		case PF_ADDR_DYNIFTL:
791 			PF_MD5_UPD(pfr, addr.v.ifname);
792 			PF_MD5_UPD(pfr, addr.iflags);
793 			break;
794 		case PF_ADDR_TABLE:
795 			if (strncmp(pfr->addr.v.tblname, PF_OPTIMIZER_TABLE_PFX,
796 			    strlen(PF_OPTIMIZER_TABLE_PFX)))
797 				PF_MD5_UPD(pfr, addr.v.tblname);
798 			break;
799 		case PF_ADDR_ADDRMASK:
800 			/* XXX ignore af? */
801 			PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
802 			PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
803 			break;
804 		case PF_ADDR_RTLABEL:
805 			PF_MD5_UPD(pfr, addr.v.rtlabelname);
806 			break;
807 	}
808 
809 	PF_MD5_UPD(pfr, port[0]);
810 	PF_MD5_UPD(pfr, port[1]);
811 	PF_MD5_UPD(pfr, neg);
812 	PF_MD5_UPD(pfr, port_op);
813 }
814 
815 void
pf_hash_rule(MD5_CTX * ctx,struct pf_rule * rule)816 pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule)
817 {
818 	u_int16_t x;
819 	u_int32_t y;
820 
821 	pf_hash_rule_addr(ctx, &rule->src);
822 	pf_hash_rule_addr(ctx, &rule->dst);
823 	PF_MD5_UPD_STR(rule, label);
824 	PF_MD5_UPD_STR(rule, ifname);
825 	PF_MD5_UPD_STR(rule, rcv_ifname);
826 	PF_MD5_UPD_STR(rule, match_tagname);
827 	PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
828 	PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
829 	PF_MD5_UPD_HTONL(rule, prob, y);
830 	PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
831 	PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
832 	PF_MD5_UPD(rule, uid.op);
833 	PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
834 	PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
835 	PF_MD5_UPD(rule, gid.op);
836 	PF_MD5_UPD_HTONL(rule, rule_flag, y);
837 	PF_MD5_UPD(rule, action);
838 	PF_MD5_UPD(rule, direction);
839 	PF_MD5_UPD(rule, af);
840 	PF_MD5_UPD(rule, quick);
841 	PF_MD5_UPD(rule, ifnot);
842 	PF_MD5_UPD(rule, rcvifnot);
843 	PF_MD5_UPD(rule, match_tag_not);
844 	PF_MD5_UPD(rule, keep_state);
845 	PF_MD5_UPD(rule, proto);
846 	PF_MD5_UPD(rule, type);
847 	PF_MD5_UPD(rule, code);
848 	PF_MD5_UPD(rule, flags);
849 	PF_MD5_UPD(rule, flagset);
850 	PF_MD5_UPD(rule, allow_opts);
851 	PF_MD5_UPD(rule, rt);
852 	PF_MD5_UPD(rule, tos);
853 }
854 
855 int
pf_commit_rules(u_int32_t version,char * anchor)856 pf_commit_rules(u_int32_t version, char *anchor)
857 {
858 	struct pf_ruleset	*rs;
859 	struct pf_rule		*rule;
860 	struct pf_rulequeue	*old_rules;
861 	u_int32_t		 old_rcount;
862 
863 	PF_ASSERT_LOCKED();
864 
865 	rs = pf_find_ruleset(anchor);
866 	if (rs == NULL || !rs->rules.inactive.open ||
867 	    version != rs->rules.inactive.version)
868 		return (EBUSY);
869 
870 	if (rs == &pf_main_ruleset)
871 		pf_calc_chksum(rs);
872 
873 	/* Swap rules, keep the old. */
874 	old_rules = rs->rules.active.ptr;
875 	old_rcount = rs->rules.active.rcount;
876 
877 	rs->rules.active.ptr = rs->rules.inactive.ptr;
878 	rs->rules.active.rcount = rs->rules.inactive.rcount;
879 	rs->rules.inactive.ptr = old_rules;
880 	rs->rules.inactive.rcount = old_rcount;
881 
882 	rs->rules.active.version = rs->rules.inactive.version;
883 	pf_calc_skip_steps(rs->rules.active.ptr);
884 
885 
886 	/* Purge the old rule list. */
887 	while ((rule = TAILQ_FIRST(old_rules)) != NULL)
888 		pf_rm_rule(old_rules, rule);
889 	rs->rules.inactive.rcount = 0;
890 	rs->rules.inactive.open = 0;
891 	pf_remove_if_empty_ruleset(rs);
892 
893 	/* queue defs only in the main ruleset */
894 	if (anchor[0])
895 		return (0);
896 	return (pf_commit_queues());
897 }
898 
899 void
pf_calc_chksum(struct pf_ruleset * rs)900 pf_calc_chksum(struct pf_ruleset *rs)
901 {
902 	MD5_CTX			 ctx;
903 	struct pf_rule		*rule;
904 	u_int8_t		 digest[PF_MD5_DIGEST_LENGTH];
905 
906 	MD5Init(&ctx);
907 
908 	if (rs->rules.inactive.rcount) {
909 		TAILQ_FOREACH(rule, rs->rules.inactive.ptr, entries) {
910 			pf_hash_rule(&ctx, rule);
911 		}
912 	}
913 
914 	MD5Final(digest, &ctx);
915 	memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum));
916 }
917 
918 int
pf_addr_setup(struct pf_ruleset * ruleset,struct pf_addr_wrap * addr,sa_family_t af)919 pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr,
920     sa_family_t af)
921 {
922 	if (pfi_dynaddr_setup(addr, af, PR_WAITOK) ||
923 	    pf_tbladdr_setup(ruleset, addr, PR_WAITOK) ||
924 	    pf_rtlabel_add(addr))
925 		return (EINVAL);
926 
927 	return (0);
928 }
929 
930 struct pfi_kif *
pf_kif_setup(struct pfi_kif * kif_buf)931 pf_kif_setup(struct pfi_kif *kif_buf)
932 {
933 	struct pfi_kif *kif;
934 
935 	if (kif_buf == NULL)
936 		return (NULL);
937 
938 	KASSERT(kif_buf->pfik_name[0] != '\0');
939 
940 	kif = pfi_kif_get(kif_buf->pfik_name, &kif_buf);
941 	if (kif_buf != NULL)
942 		pfi_kif_free(kif_buf);
943 	pfi_kif_ref(kif, PFI_KIF_REF_RULE);
944 
945 	return (kif);
946 }
947 
948 void
pf_addr_copyout(struct pf_addr_wrap * addr)949 pf_addr_copyout(struct pf_addr_wrap *addr)
950 {
951 	pfi_dynaddr_copyout(addr);
952 	pf_tbladdr_copyout(addr);
953 	pf_rtlabel_copyout(addr);
954 }
955 
956 int
pf_states_clr(struct pfioc_state_kill * psk)957 pf_states_clr(struct pfioc_state_kill *psk)
958 {
959 	struct pf_state		*st, *nextst;
960 	struct pf_state		*head, *tail;
961 	u_int			 killed = 0;
962 	int			 error;
963 
964 	NET_LOCK();
965 
966 	/* lock against the gc removing an item from the list */
967 	error = rw_enter(&pf_state_list.pfs_rwl, RW_READ|RW_INTR);
968 	if (error != 0)
969 		goto unlock;
970 
971 	/* get a snapshot view of the ends of the list to traverse between */
972 	mtx_enter(&pf_state_list.pfs_mtx);
973 	head = TAILQ_FIRST(&pf_state_list.pfs_list);
974 	tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
975 	mtx_leave(&pf_state_list.pfs_mtx);
976 
977 	st = NULL;
978 	nextst = head;
979 
980 	PF_LOCK();
981 	PF_STATE_ENTER_WRITE();
982 
983 	while (st != tail) {
984 		st = nextst;
985 		nextst = TAILQ_NEXT(st, entry_list);
986 
987 		if (st->timeout == PFTM_UNLINKED)
988 			continue;
989 
990 		if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
991 		    st->kif->pfik_name)) {
992 #if NPFSYNC > 0
993 			/* don't send out individual delete messages */
994 			SET(st->state_flags, PFSTATE_NOSYNC);
995 #endif	/* NPFSYNC > 0 */
996 			pf_remove_state(st);
997 			killed++;
998 		}
999 	}
1000 
1001 	PF_STATE_EXIT_WRITE();
1002 	PF_UNLOCK();
1003 	rw_exit(&pf_state_list.pfs_rwl);
1004 
1005 	psk->psk_killed = killed;
1006 
1007 #if NPFSYNC > 0
1008 	pfsync_clear_states(pf_status.hostid, psk->psk_ifname);
1009 #endif	/* NPFSYNC > 0 */
1010 unlock:
1011 	NET_UNLOCK();
1012 
1013 	return (error);
1014 }
1015 
1016 int
pf_states_get(struct pfioc_states * ps)1017 pf_states_get(struct pfioc_states *ps)
1018 {
1019 	struct pf_state		*st, *nextst;
1020 	struct pf_state		*head, *tail;
1021 	struct pfsync_state	*p, pstore;
1022 	u_int32_t		 nr = 0;
1023 	int			 error;
1024 
1025 	if (ps->ps_len == 0) {
1026 		nr = pf_status.states;
1027 		ps->ps_len = sizeof(struct pfsync_state) * nr;
1028 		return (0);
1029 	}
1030 
1031 	p = ps->ps_states;
1032 
1033 	/* lock against the gc removing an item from the list */
1034 	error = rw_enter(&pf_state_list.pfs_rwl, RW_READ|RW_INTR);
1035 	if (error != 0)
1036 		return (error);
1037 
1038 	/* get a snapshot view of the ends of the list to traverse between */
1039 	mtx_enter(&pf_state_list.pfs_mtx);
1040 	head = TAILQ_FIRST(&pf_state_list.pfs_list);
1041 	tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
1042 	mtx_leave(&pf_state_list.pfs_mtx);
1043 
1044 	st = NULL;
1045 	nextst = head;
1046 
1047 	while (st != tail) {
1048 		st = nextst;
1049 		nextst = TAILQ_NEXT(st, entry_list);
1050 
1051 		if (st->timeout == PFTM_UNLINKED)
1052 			continue;
1053 
1054 		if ((nr+1) * sizeof(*p) > ps->ps_len)
1055 			break;
1056 
1057 		pf_state_export(&pstore, st);
1058 		error = copyout(&pstore, p, sizeof(*p));
1059 		if (error)
1060 			goto fail;
1061 
1062 		p++;
1063 		nr++;
1064 	}
1065 	ps->ps_len = sizeof(struct pfsync_state) * nr;
1066 
1067 fail:
1068 	rw_exit(&pf_state_list.pfs_rwl);
1069 
1070 	return (error);
1071 }
1072 
1073 int
pfioctl(dev_t dev,u_long cmd,caddr_t addr,int flags,struct proc * p)1074 pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
1075 {
1076 	int			 error = 0;
1077 
1078 	/* XXX keep in sync with switch() below */
1079 	if (securelevel > 1)
1080 		switch (cmd) {
1081 		case DIOCGETRULES:
1082 		case DIOCGETRULE:
1083 		case DIOCGETSTATE:
1084 		case DIOCSETSTATUSIF:
1085 		case DIOCGETSTATUS:
1086 		case DIOCCLRSTATUS:
1087 		case DIOCNATLOOK:
1088 		case DIOCSETDEBUG:
1089 		case DIOCGETSTATES:
1090 		case DIOCGETTIMEOUT:
1091 		case DIOCGETLIMIT:
1092 		case DIOCGETRULESETS:
1093 		case DIOCGETRULESET:
1094 		case DIOCGETQUEUES:
1095 		case DIOCGETQUEUE:
1096 		case DIOCGETQSTATS:
1097 		case DIOCRGETTABLES:
1098 		case DIOCRGETTSTATS:
1099 		case DIOCRCLRTSTATS:
1100 		case DIOCRCLRADDRS:
1101 		case DIOCRADDADDRS:
1102 		case DIOCRDELADDRS:
1103 		case DIOCRSETADDRS:
1104 		case DIOCRGETADDRS:
1105 		case DIOCRGETASTATS:
1106 		case DIOCRCLRASTATS:
1107 		case DIOCRTSTADDRS:
1108 		case DIOCOSFPGET:
1109 		case DIOCGETSRCNODES:
1110 		case DIOCCLRSRCNODES:
1111 		case DIOCIGETIFACES:
1112 		case DIOCSETIFFLAG:
1113 		case DIOCCLRIFFLAG:
1114 		case DIOCGETSYNFLWATS:
1115 			break;
1116 		case DIOCRCLRTABLES:
1117 		case DIOCRADDTABLES:
1118 		case DIOCRDELTABLES:
1119 		case DIOCRSETTFLAGS:
1120 			if (((struct pfioc_table *)addr)->pfrio_flags &
1121 			    PFR_FLAG_DUMMY)
1122 				break; /* dummy operation ok */
1123 			return (EPERM);
1124 		default:
1125 			return (EPERM);
1126 		}
1127 
1128 	if (!(flags & FWRITE))
1129 		switch (cmd) {
1130 		case DIOCGETRULES:
1131 		case DIOCGETSTATE:
1132 		case DIOCGETSTATUS:
1133 		case DIOCGETSTATES:
1134 		case DIOCGETTIMEOUT:
1135 		case DIOCGETLIMIT:
1136 		case DIOCGETRULESETS:
1137 		case DIOCGETRULESET:
1138 		case DIOCGETQUEUES:
1139 		case DIOCGETQUEUE:
1140 		case DIOCGETQSTATS:
1141 		case DIOCNATLOOK:
1142 		case DIOCRGETTABLES:
1143 		case DIOCRGETTSTATS:
1144 		case DIOCRGETADDRS:
1145 		case DIOCRGETASTATS:
1146 		case DIOCRTSTADDRS:
1147 		case DIOCOSFPGET:
1148 		case DIOCGETSRCNODES:
1149 		case DIOCIGETIFACES:
1150 		case DIOCGETSYNFLWATS:
1151 		case DIOCXEND:
1152 			break;
1153 		case DIOCRCLRTABLES:
1154 		case DIOCRADDTABLES:
1155 		case DIOCRDELTABLES:
1156 		case DIOCRCLRTSTATS:
1157 		case DIOCRCLRADDRS:
1158 		case DIOCRADDADDRS:
1159 		case DIOCRDELADDRS:
1160 		case DIOCRSETADDRS:
1161 		case DIOCRSETTFLAGS:
1162 			if (((struct pfioc_table *)addr)->pfrio_flags &
1163 			    PFR_FLAG_DUMMY) {
1164 				flags |= FWRITE; /* need write lock for dummy */
1165 				break; /* dummy operation ok */
1166 			}
1167 			return (EACCES);
1168 		case DIOCGETRULE:
1169 			if (((struct pfioc_rule *)addr)->action ==
1170 			    PF_GET_CLR_CNTR)
1171 				return (EACCES);
1172 			break;
1173 		default:
1174 			return (EACCES);
1175 		}
1176 
1177 	rw_enter_write(&pfioctl_rw);
1178 
1179 	switch (cmd) {
1180 
1181 	case DIOCSTART:
1182 		NET_LOCK();
1183 		PF_LOCK();
1184 		if (pf_status.running)
1185 			error = EEXIST;
1186 		else {
1187 			pf_status.running = 1;
1188 			pf_status.since = getuptime();
1189 			if (pf_status.stateid == 0) {
1190 				pf_status.stateid = gettime();
1191 				pf_status.stateid = pf_status.stateid << 32;
1192 			}
1193 			timeout_add_sec(&pf_purge_states_to, 1);
1194 			timeout_add_sec(&pf_purge_to, 1);
1195 			pf_create_queues();
1196 			DPFPRINTF(LOG_NOTICE, "pf: started");
1197 		}
1198 		PF_UNLOCK();
1199 		NET_UNLOCK();
1200 		break;
1201 
1202 	case DIOCSTOP:
1203 		NET_LOCK();
1204 		PF_LOCK();
1205 		if (!pf_status.running)
1206 			error = ENOENT;
1207 		else {
1208 			pf_status.running = 0;
1209 			pf_status.since = getuptime();
1210 			pf_remove_queues();
1211 			DPFPRINTF(LOG_NOTICE, "pf: stopped");
1212 		}
1213 		PF_UNLOCK();
1214 		NET_UNLOCK();
1215 		break;
1216 
1217 	case DIOCGETQUEUES: {
1218 		struct pfioc_queue	*pq = (struct pfioc_queue *)addr;
1219 		struct pf_queuespec	*qs;
1220 		u_int32_t		 nr = 0;
1221 
1222 		PF_LOCK();
1223 		pq->ticket = pf_main_ruleset.rules.active.version;
1224 
1225 		/* save state to not run over them all each time? */
1226 		qs = TAILQ_FIRST(pf_queues_active);
1227 		while (qs != NULL) {
1228 			qs = TAILQ_NEXT(qs, entries);
1229 			nr++;
1230 		}
1231 		pq->nr = nr;
1232 		PF_UNLOCK();
1233 		break;
1234 	}
1235 
1236 	case DIOCGETQUEUE: {
1237 		struct pfioc_queue	*pq = (struct pfioc_queue *)addr;
1238 		struct pf_queuespec	*qs;
1239 		u_int32_t		 nr = 0;
1240 
1241 		PF_LOCK();
1242 		if (pq->ticket != pf_main_ruleset.rules.active.version) {
1243 			error = EBUSY;
1244 			PF_UNLOCK();
1245 			goto fail;
1246 		}
1247 
1248 		/* save state to not run over them all each time? */
1249 		qs = TAILQ_FIRST(pf_queues_active);
1250 		while ((qs != NULL) && (nr++ < pq->nr))
1251 			qs = TAILQ_NEXT(qs, entries);
1252 		if (qs == NULL) {
1253 			error = EBUSY;
1254 			PF_UNLOCK();
1255 			goto fail;
1256 		}
1257 		memcpy(&pq->queue, qs, sizeof(pq->queue));
1258 		PF_UNLOCK();
1259 		break;
1260 	}
1261 
1262 	case DIOCGETQSTATS: {
1263 		struct pfioc_qstats	*pq = (struct pfioc_qstats *)addr;
1264 		struct pf_queuespec	*qs;
1265 		u_int32_t		 nr;
1266 		int			 nbytes;
1267 
1268 		NET_LOCK();
1269 		PF_LOCK();
1270 		if (pq->ticket != pf_main_ruleset.rules.active.version) {
1271 			error = EBUSY;
1272 			PF_UNLOCK();
1273 			NET_UNLOCK();
1274 			goto fail;
1275 		}
1276 		nbytes = pq->nbytes;
1277 		nr = 0;
1278 
1279 		/* save state to not run over them all each time? */
1280 		qs = TAILQ_FIRST(pf_queues_active);
1281 		while ((qs != NULL) && (nr++ < pq->nr))
1282 			qs = TAILQ_NEXT(qs, entries);
1283 		if (qs == NULL) {
1284 			error = EBUSY;
1285 			PF_UNLOCK();
1286 			NET_UNLOCK();
1287 			goto fail;
1288 		}
1289 		memcpy(&pq->queue, qs, sizeof(pq->queue));
1290 		/* It's a root flow queue but is not an HFSC root class */
1291 		if ((qs->flags & PFQS_FLOWQUEUE) && qs->parent_qid == 0 &&
1292 		    !(qs->flags & PFQS_ROOTCLASS))
1293 			error = pfq_fqcodel_ops->pfq_qstats(qs, pq->buf,
1294 			    &nbytes);
1295 		else
1296 			error = pfq_hfsc_ops->pfq_qstats(qs, pq->buf,
1297 			    &nbytes);
1298 		if (error == 0)
1299 			pq->nbytes = nbytes;
1300 		PF_UNLOCK();
1301 		NET_UNLOCK();
1302 		break;
1303 	}
1304 
1305 	case DIOCADDQUEUE: {
1306 		struct pfioc_queue	*q = (struct pfioc_queue *)addr;
1307 		struct pf_queuespec	*qs;
1308 
1309 		qs = pool_get(&pf_queue_pl, PR_WAITOK|PR_LIMITFAIL|PR_ZERO);
1310 		if (qs == NULL) {
1311 			error = ENOMEM;
1312 			goto fail;
1313 		}
1314 
1315 		NET_LOCK();
1316 		PF_LOCK();
1317 		if (q->ticket != pf_main_ruleset.rules.inactive.version) {
1318 			error = EBUSY;
1319 			PF_UNLOCK();
1320 			NET_UNLOCK();
1321 			pool_put(&pf_queue_pl, qs);
1322 			goto fail;
1323 		}
1324 		memcpy(qs, &q->queue, sizeof(*qs));
1325 		qs->qid = pf_qname2qid(qs->qname, 1);
1326 		if (qs->qid == 0) {
1327 			error = EBUSY;
1328 			PF_UNLOCK();
1329 			NET_UNLOCK();
1330 			pool_put(&pf_queue_pl, qs);
1331 			goto fail;
1332 		}
1333 		if (qs->parent[0] && (qs->parent_qid =
1334 		    pf_qname2qid(qs->parent, 0)) == 0) {
1335 			error = ESRCH;
1336 			PF_UNLOCK();
1337 			NET_UNLOCK();
1338 			pool_put(&pf_queue_pl, qs);
1339 			goto fail;
1340 		}
1341 		qs->kif = pfi_kif_get(qs->ifname, NULL);
1342 		if (qs->kif == NULL) {
1343 			error = ESRCH;
1344 			PF_UNLOCK();
1345 			NET_UNLOCK();
1346 			pool_put(&pf_queue_pl, qs);
1347 			goto fail;
1348 		}
1349 		/* XXX resolve bw percentage specs */
1350 		pfi_kif_ref(qs->kif, PFI_KIF_REF_RULE);
1351 
1352 		TAILQ_INSERT_TAIL(pf_queues_inactive, qs, entries);
1353 		PF_UNLOCK();
1354 		NET_UNLOCK();
1355 
1356 		break;
1357 	}
1358 
1359 	case DIOCADDRULE: {
1360 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
1361 		struct pf_ruleset	*ruleset;
1362 		struct pf_rule		*rule, *tail;
1363 
1364 		rule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL|PR_ZERO);
1365 		if (rule == NULL) {
1366 			error = ENOMEM;
1367 			goto fail;
1368 		}
1369 
1370 		if ((error = pf_rule_copyin(&pr->rule, rule))) {
1371 			pf_rule_free(rule);
1372 			rule = NULL;
1373 			goto fail;
1374 		}
1375 
1376 		if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
1377 			error = EINVAL;
1378 			pf_rule_free(rule);
1379 			rule = NULL;
1380 			goto fail;
1381 		}
1382 		if ((error = pf_rule_checkaf(rule))) {
1383 			pf_rule_free(rule);
1384 			rule = NULL;
1385 			goto fail;
1386 		}
1387 		if (rule->src.addr.type == PF_ADDR_NONE ||
1388 		    rule->dst.addr.type == PF_ADDR_NONE) {
1389 			error = EINVAL;
1390 			pf_rule_free(rule);
1391 			rule = NULL;
1392 			goto fail;
1393 		}
1394 
1395 		if (rule->rt && !rule->direction) {
1396 			error = EINVAL;
1397 			pf_rule_free(rule);
1398 			rule = NULL;
1399 			goto fail;
1400 		}
1401 
1402 		NET_LOCK();
1403 		PF_LOCK();
1404 		pr->anchor[sizeof(pr->anchor) - 1] = '\0';
1405 		ruleset = pf_find_ruleset(pr->anchor);
1406 		if (ruleset == NULL) {
1407 			error = EINVAL;
1408 			PF_UNLOCK();
1409 			NET_UNLOCK();
1410 			pf_rule_free(rule);
1411 			goto fail;
1412 		}
1413 		if (pr->ticket != ruleset->rules.inactive.version) {
1414 			error = EBUSY;
1415 			PF_UNLOCK();
1416 			NET_UNLOCK();
1417 			pf_rule_free(rule);
1418 			goto fail;
1419 		}
1420 		rule->cuid = p->p_ucred->cr_ruid;
1421 		rule->cpid = p->p_p->ps_pid;
1422 
1423 		tail = TAILQ_LAST(ruleset->rules.inactive.ptr,
1424 		    pf_rulequeue);
1425 		if (tail)
1426 			rule->nr = tail->nr + 1;
1427 		else
1428 			rule->nr = 0;
1429 
1430 		rule->kif = pf_kif_setup(rule->kif);
1431 		rule->rcv_kif = pf_kif_setup(rule->rcv_kif);
1432 		rule->rdr.kif = pf_kif_setup(rule->rdr.kif);
1433 		rule->nat.kif = pf_kif_setup(rule->nat.kif);
1434 		rule->route.kif = pf_kif_setup(rule->route.kif);
1435 
1436 		if (rule->overload_tblname[0]) {
1437 			if ((rule->overload_tbl = pfr_attach_table(ruleset,
1438 			    rule->overload_tblname, PR_WAITOK)) == NULL)
1439 				error = EINVAL;
1440 			else
1441 				rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE;
1442 		}
1443 
1444 		if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
1445 			error = EINVAL;
1446 		if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
1447 			error = EINVAL;
1448 		if (pf_addr_setup(ruleset, &rule->rdr.addr, rule->af))
1449 			error = EINVAL;
1450 		if (pf_addr_setup(ruleset, &rule->nat.addr, rule->af))
1451 			error = EINVAL;
1452 		if (pf_addr_setup(ruleset, &rule->route.addr, rule->af))
1453 			error = EINVAL;
1454 		if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
1455 			error = EINVAL;
1456 
1457 		if (error) {
1458 			pf_rm_rule(NULL, rule);
1459 			PF_UNLOCK();
1460 			NET_UNLOCK();
1461 			goto fail;
1462 		}
1463 		TAILQ_INSERT_TAIL(ruleset->rules.inactive.ptr,
1464 		    rule, entries);
1465 		ruleset->rules.inactive.rcount++;
1466 		PF_UNLOCK();
1467 		NET_UNLOCK();
1468 		break;
1469 	}
1470 
1471 	case DIOCGETRULES: {
1472 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
1473 		struct pf_ruleset	*ruleset;
1474 		struct pf_rule		*rule;
1475 		struct pf_trans		*t;
1476 		u_int32_t		 ruleset_version;
1477 
1478 		NET_LOCK();
1479 		PF_LOCK();
1480 		pr->anchor[sizeof(pr->anchor) - 1] = '\0';
1481 		ruleset = pf_find_ruleset(pr->anchor);
1482 		if (ruleset == NULL) {
1483 			error = EINVAL;
1484 			PF_UNLOCK();
1485 			NET_UNLOCK();
1486 			goto fail;
1487 		}
1488 		rule = TAILQ_LAST(ruleset->rules.active.ptr, pf_rulequeue);
1489 		if (rule)
1490 			pr->nr = rule->nr + 1;
1491 		else
1492 			pr->nr = 0;
1493 		ruleset_version = ruleset->rules.active.version;
1494 		pf_anchor_take(ruleset->anchor);
1495 		rule = TAILQ_FIRST(ruleset->rules.active.ptr);
1496 		PF_UNLOCK();
1497 		NET_UNLOCK();
1498 
1499 		t = pf_open_trans(minor(dev));
1500 		if (t == NULL) {
1501 			error = EBUSY;
1502 			goto fail;
1503 		}
1504 		pf_init_tgetrule(t, ruleset->anchor, ruleset_version, rule);
1505 		pr->ticket = t->pft_ticket;
1506 
1507 		break;
1508 	}
1509 
1510 	case DIOCGETRULE: {
1511 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
1512 		struct pf_ruleset	*ruleset;
1513 		struct pf_rule		*rule;
1514 		struct pf_trans		*t;
1515 		int			 i;
1516 
1517 		t = pf_find_trans(minor(dev), pr->ticket);
1518 		if (t == NULL) {
1519 			error = ENXIO;
1520 			goto fail;
1521 		}
1522 		KASSERT(t->pft_unit == minor(dev));
1523 		if (t->pft_type != PF_TRANS_GETRULE) {
1524 			error = EINVAL;
1525 			goto fail;
1526 		}
1527 
1528 		NET_LOCK();
1529 		PF_LOCK();
1530 		KASSERT(t->pftgr_anchor != NULL);
1531 		ruleset = &t->pftgr_anchor->ruleset;
1532 		if (t->pftgr_version != ruleset->rules.active.version) {
1533 			error = EBUSY;
1534 			PF_UNLOCK();
1535 			NET_UNLOCK();
1536 			goto fail;
1537 		}
1538 		rule = t->pftgr_rule;
1539 		if (rule == NULL) {
1540 			error = ENOENT;
1541 			PF_UNLOCK();
1542 			NET_UNLOCK();
1543 			goto fail;
1544 		}
1545 		memcpy(&pr->rule, rule, sizeof(struct pf_rule));
1546 		memset(&pr->rule.entries, 0, sizeof(pr->rule.entries));
1547 		pr->rule.kif = NULL;
1548 		pr->rule.nat.kif = NULL;
1549 		pr->rule.rdr.kif = NULL;
1550 		pr->rule.route.kif = NULL;
1551 		pr->rule.rcv_kif = NULL;
1552 		pr->rule.anchor = NULL;
1553 		pr->rule.overload_tbl = NULL;
1554 		pr->rule.pktrate.limit /= PF_THRESHOLD_MULT;
1555 		if (pf_anchor_copyout(ruleset, rule, pr)) {
1556 			error = EBUSY;
1557 			PF_UNLOCK();
1558 			NET_UNLOCK();
1559 			goto fail;
1560 		}
1561 		pf_addr_copyout(&pr->rule.src.addr);
1562 		pf_addr_copyout(&pr->rule.dst.addr);
1563 		pf_addr_copyout(&pr->rule.rdr.addr);
1564 		pf_addr_copyout(&pr->rule.nat.addr);
1565 		pf_addr_copyout(&pr->rule.route.addr);
1566 		for (i = 0; i < PF_SKIP_COUNT; ++i)
1567 			if (rule->skip[i].ptr == NULL)
1568 				pr->rule.skip[i].nr = (u_int32_t)-1;
1569 			else
1570 				pr->rule.skip[i].nr =
1571 				    rule->skip[i].ptr->nr;
1572 
1573 		if (pr->action == PF_GET_CLR_CNTR) {
1574 			rule->evaluations = 0;
1575 			rule->packets[0] = rule->packets[1] = 0;
1576 			rule->bytes[0] = rule->bytes[1] = 0;
1577 			rule->states_tot = 0;
1578 		}
1579 		pr->nr = rule->nr;
1580 		t->pftgr_rule = TAILQ_NEXT(rule, entries);
1581 		PF_UNLOCK();
1582 		NET_UNLOCK();
1583 		break;
1584 	}
1585 
1586 	case DIOCCHANGERULE: {
1587 		struct pfioc_rule	*pcr = (struct pfioc_rule *)addr;
1588 		struct pf_ruleset	*ruleset;
1589 		struct pf_rule		*oldrule = NULL, *newrule = NULL;
1590 		u_int32_t		 nr = 0;
1591 
1592 		if (pcr->action < PF_CHANGE_ADD_HEAD ||
1593 		    pcr->action > PF_CHANGE_GET_TICKET) {
1594 			error = EINVAL;
1595 			goto fail;
1596 		}
1597 
1598 		if (pcr->action == PF_CHANGE_GET_TICKET) {
1599 			NET_LOCK();
1600 			PF_LOCK();
1601 
1602 			ruleset = pf_find_ruleset(pcr->anchor);
1603 			if (ruleset == NULL)
1604 				error = EINVAL;
1605 			else
1606 				pcr->ticket = ++ruleset->rules.active.version;
1607 
1608 			PF_UNLOCK();
1609 			NET_UNLOCK();
1610 			goto fail;
1611 		}
1612 
1613 		if (pcr->action != PF_CHANGE_REMOVE) {
1614 			newrule = pool_get(&pf_rule_pl,
1615 			    PR_WAITOK|PR_LIMITFAIL|PR_ZERO);
1616 			if (newrule == NULL) {
1617 				error = ENOMEM;
1618 				goto fail;
1619 			}
1620 
1621 			if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
1622 				error = EINVAL;
1623 				pool_put(&pf_rule_pl, newrule);
1624 				goto fail;
1625 			}
1626 			error = pf_rule_copyin(&pcr->rule, newrule);
1627 			if (error != 0) {
1628 				pf_rule_free(newrule);
1629 				newrule = NULL;
1630 				goto fail;
1631 			}
1632 			if ((error = pf_rule_checkaf(newrule))) {
1633 				pf_rule_free(newrule);
1634 				newrule = NULL;
1635 				goto fail;
1636 			}
1637 			if (newrule->rt && !newrule->direction) {
1638 				pf_rule_free(newrule);
1639 				error = EINVAL;
1640 				newrule = NULL;
1641 				goto fail;
1642 			}
1643 		}
1644 
1645 		NET_LOCK();
1646 		PF_LOCK();
1647 		ruleset = pf_find_ruleset(pcr->anchor);
1648 		if (ruleset == NULL) {
1649 			error = EINVAL;
1650 			PF_UNLOCK();
1651 			NET_UNLOCK();
1652 			pf_rule_free(newrule);
1653 			goto fail;
1654 		}
1655 
1656 		if (pcr->ticket != ruleset->rules.active.version) {
1657 			error = EINVAL;
1658 			PF_UNLOCK();
1659 			NET_UNLOCK();
1660 			pf_rule_free(newrule);
1661 			goto fail;
1662 		}
1663 
1664 		if (pcr->action != PF_CHANGE_REMOVE) {
1665 			KASSERT(newrule != NULL);
1666 			newrule->cuid = p->p_ucred->cr_ruid;
1667 			newrule->cpid = p->p_p->ps_pid;
1668 
1669 			newrule->kif = pf_kif_setup(newrule->kif);
1670 			newrule->rcv_kif = pf_kif_setup(newrule->rcv_kif);
1671 			newrule->rdr.kif = pf_kif_setup(newrule->rdr.kif);
1672 			newrule->nat.kif = pf_kif_setup(newrule->nat.kif);
1673 			newrule->route.kif = pf_kif_setup(newrule->route.kif);
1674 
1675 			if (newrule->overload_tblname[0]) {
1676 				newrule->overload_tbl = pfr_attach_table(
1677 				    ruleset, newrule->overload_tblname,
1678 				    PR_WAITOK);
1679 				if (newrule->overload_tbl == NULL)
1680 					error = EINVAL;
1681 				else
1682 					newrule->overload_tbl->pfrkt_flags |=
1683 					    PFR_TFLAG_ACTIVE;
1684 			}
1685 
1686 			if (pf_addr_setup(ruleset, &newrule->src.addr,
1687 			    newrule->af))
1688 				error = EINVAL;
1689 			if (pf_addr_setup(ruleset, &newrule->dst.addr,
1690 			    newrule->af))
1691 				error = EINVAL;
1692 			if (pf_addr_setup(ruleset, &newrule->rdr.addr,
1693 			    newrule->af))
1694 				error = EINVAL;
1695 			if (pf_addr_setup(ruleset, &newrule->nat.addr,
1696 			    newrule->af))
1697 				error = EINVAL;
1698 			if (pf_addr_setup(ruleset, &newrule->route.addr,
1699 			    newrule->af))
1700 				error = EINVAL;
1701 			if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call))
1702 				error = EINVAL;
1703 
1704 			if (error) {
1705 				pf_rm_rule(NULL, newrule);
1706 				PF_UNLOCK();
1707 				NET_UNLOCK();
1708 				goto fail;
1709 			}
1710 		}
1711 
1712 		if (pcr->action == PF_CHANGE_ADD_HEAD)
1713 			oldrule = TAILQ_FIRST(ruleset->rules.active.ptr);
1714 		else if (pcr->action == PF_CHANGE_ADD_TAIL)
1715 			oldrule = TAILQ_LAST(ruleset->rules.active.ptr,
1716 			    pf_rulequeue);
1717 		else {
1718 			oldrule = TAILQ_FIRST(ruleset->rules.active.ptr);
1719 			while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
1720 				oldrule = TAILQ_NEXT(oldrule, entries);
1721 			if (oldrule == NULL) {
1722 				if (newrule != NULL)
1723 					pf_rm_rule(NULL, newrule);
1724 				error = EINVAL;
1725 				PF_UNLOCK();
1726 				NET_UNLOCK();
1727 				goto fail;
1728 			}
1729 		}
1730 
1731 		if (pcr->action == PF_CHANGE_REMOVE) {
1732 			pf_rm_rule(ruleset->rules.active.ptr, oldrule);
1733 			ruleset->rules.active.rcount--;
1734 		} else {
1735 			if (oldrule == NULL)
1736 				TAILQ_INSERT_TAIL(
1737 				    ruleset->rules.active.ptr,
1738 				    newrule, entries);
1739 			else if (pcr->action == PF_CHANGE_ADD_HEAD ||
1740 			    pcr->action == PF_CHANGE_ADD_BEFORE)
1741 				TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
1742 			else
1743 				TAILQ_INSERT_AFTER(
1744 				    ruleset->rules.active.ptr,
1745 				    oldrule, newrule, entries);
1746 			ruleset->rules.active.rcount++;
1747 		}
1748 
1749 		nr = 0;
1750 		TAILQ_FOREACH(oldrule, ruleset->rules.active.ptr, entries)
1751 			oldrule->nr = nr++;
1752 
1753 		ruleset->rules.active.version++;
1754 
1755 		pf_calc_skip_steps(ruleset->rules.active.ptr);
1756 		pf_remove_if_empty_ruleset(ruleset);
1757 
1758 		PF_UNLOCK();
1759 		NET_UNLOCK();
1760 		break;
1761 	}
1762 
1763 	case DIOCCLRSTATES:
1764 		error = pf_states_clr((struct pfioc_state_kill *)addr);
1765 		break;
1766 
1767 	case DIOCKILLSTATES: {
1768 		struct pf_state		*st, *nextst;
1769 		struct pf_state_item	*si, *sit;
1770 		struct pf_state_key	*sk, key;
1771 		struct pf_addr		*srcaddr, *dstaddr;
1772 		u_int16_t		 srcport, dstport;
1773 		struct pfioc_state_kill	*psk = (struct pfioc_state_kill *)addr;
1774 		u_int			 i, killed = 0;
1775 		const int		 dirs[] = { PF_IN, PF_OUT };
1776 		int			 sidx, didx;
1777 
1778 		if (psk->psk_pfcmp.id) {
1779 			if (psk->psk_pfcmp.creatorid == 0)
1780 				psk->psk_pfcmp.creatorid = pf_status.hostid;
1781 			NET_LOCK();
1782 			PF_LOCK();
1783 			PF_STATE_ENTER_WRITE();
1784 			if ((st = pf_find_state_byid(&psk->psk_pfcmp))) {
1785 				pf_remove_state(st);
1786 				psk->psk_killed = 1;
1787 			}
1788 			PF_STATE_EXIT_WRITE();
1789 			PF_UNLOCK();
1790 			NET_UNLOCK();
1791 			goto fail;
1792 		}
1793 
1794 		if (psk->psk_af && psk->psk_proto &&
1795 		    psk->psk_src.port_op == PF_OP_EQ &&
1796 		    psk->psk_dst.port_op == PF_OP_EQ) {
1797 
1798 			key.af = psk->psk_af;
1799 			key.proto = psk->psk_proto;
1800 			key.rdomain = psk->psk_rdomain;
1801 
1802 			NET_LOCK();
1803 			PF_LOCK();
1804 			PF_STATE_ENTER_WRITE();
1805 			for (i = 0; i < nitems(dirs); i++) {
1806 				if (dirs[i] == PF_IN) {
1807 					sidx = 0;
1808 					didx = 1;
1809 				} else {
1810 					sidx = 1;
1811 					didx = 0;
1812 				}
1813 				pf_addrcpy(&key.addr[sidx],
1814 				    &psk->psk_src.addr.v.a.addr, key.af);
1815 				pf_addrcpy(&key.addr[didx],
1816 				    &psk->psk_dst.addr.v.a.addr, key.af);
1817 				key.port[sidx] = psk->psk_src.port[0];
1818 				key.port[didx] = psk->psk_dst.port[0];
1819 
1820 				sk = RBT_FIND(pf_state_tree, &pf_statetbl,
1821 				    &key);
1822 				if (sk == NULL)
1823 					continue;
1824 
1825 				TAILQ_FOREACH_SAFE(si, &sk->sk_states,
1826 				    si_entry, sit) {
1827 					struct pf_state *sist = si->si_st;
1828 					if (((sist->key[PF_SK_WIRE]->af ==
1829 					    sist->key[PF_SK_STACK]->af &&
1830 					    sk == (dirs[i] == PF_IN ?
1831 					    sist->key[PF_SK_WIRE] :
1832 					    sist->key[PF_SK_STACK])) ||
1833 					    (sist->key[PF_SK_WIRE]->af !=
1834 					    sist->key[PF_SK_STACK]->af &&
1835 					    dirs[i] == PF_IN &&
1836 					    (sk == sist->key[PF_SK_STACK] ||
1837 					    sk == sist->key[PF_SK_WIRE]))) &&
1838 					    (!psk->psk_ifname[0] ||
1839 					    (sist->kif != pfi_all &&
1840 					    !strcmp(psk->psk_ifname,
1841 					    sist->kif->pfik_name)))) {
1842 						pf_remove_state(sist);
1843 						killed++;
1844 					}
1845 				}
1846 			}
1847 			if (killed)
1848 				psk->psk_killed = killed;
1849 			PF_STATE_EXIT_WRITE();
1850 			PF_UNLOCK();
1851 			NET_UNLOCK();
1852 			goto fail;
1853 		}
1854 
1855 		NET_LOCK();
1856 		PF_LOCK();
1857 		PF_STATE_ENTER_WRITE();
1858 		RBT_FOREACH_SAFE(st, pf_state_tree_id, &tree_id, nextst) {
1859 			if (st->direction == PF_OUT) {
1860 				sk = st->key[PF_SK_STACK];
1861 				srcaddr = &sk->addr[1];
1862 				dstaddr = &sk->addr[0];
1863 				srcport = sk->port[1];
1864 				dstport = sk->port[0];
1865 			} else {
1866 				sk = st->key[PF_SK_WIRE];
1867 				srcaddr = &sk->addr[0];
1868 				dstaddr = &sk->addr[1];
1869 				srcport = sk->port[0];
1870 				dstport = sk->port[1];
1871 			}
1872 			if ((!psk->psk_af || sk->af == psk->psk_af)
1873 			    && (!psk->psk_proto || psk->psk_proto ==
1874 			    sk->proto) && psk->psk_rdomain == sk->rdomain &&
1875 			    pf_match_addr(psk->psk_src.neg,
1876 			    &psk->psk_src.addr.v.a.addr,
1877 			    &psk->psk_src.addr.v.a.mask,
1878 			    srcaddr, sk->af) &&
1879 			    pf_match_addr(psk->psk_dst.neg,
1880 			    &psk->psk_dst.addr.v.a.addr,
1881 			    &psk->psk_dst.addr.v.a.mask,
1882 			    dstaddr, sk->af) &&
1883 			    (psk->psk_src.port_op == 0 ||
1884 			    pf_match_port(psk->psk_src.port_op,
1885 			    psk->psk_src.port[0], psk->psk_src.port[1],
1886 			    srcport)) &&
1887 			    (psk->psk_dst.port_op == 0 ||
1888 			    pf_match_port(psk->psk_dst.port_op,
1889 			    psk->psk_dst.port[0], psk->psk_dst.port[1],
1890 			    dstport)) &&
1891 			    (!psk->psk_label[0] || (st->rule.ptr->label[0] &&
1892 			    !strcmp(psk->psk_label, st->rule.ptr->label))) &&
1893 			    (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
1894 			    st->kif->pfik_name))) {
1895 				pf_remove_state(st);
1896 				killed++;
1897 			}
1898 		}
1899 		psk->psk_killed = killed;
1900 		PF_STATE_EXIT_WRITE();
1901 		PF_UNLOCK();
1902 		NET_UNLOCK();
1903 		break;
1904 	}
1905 
1906 #if NPFSYNC > 0
1907 	case DIOCADDSTATE: {
1908 		struct pfioc_state	*ps = (struct pfioc_state *)addr;
1909 		struct pfsync_state	*sp = &ps->state;
1910 
1911 		if (sp->timeout >= PFTM_MAX) {
1912 			error = EINVAL;
1913 			goto fail;
1914 		}
1915 		NET_LOCK();
1916 		PF_LOCK();
1917 		error = pf_state_import(sp, PFSYNC_SI_IOCTL);
1918 		PF_UNLOCK();
1919 		NET_UNLOCK();
1920 		break;
1921 	}
1922 #endif	/* NPFSYNC > 0 */
1923 
1924 	case DIOCGETSTATE: {
1925 		struct pfioc_state	*ps = (struct pfioc_state *)addr;
1926 		struct pf_state		*st;
1927 		struct pf_state_cmp	 id_key;
1928 
1929 		memset(&id_key, 0, sizeof(id_key));
1930 		id_key.id = ps->state.id;
1931 		id_key.creatorid = ps->state.creatorid;
1932 
1933 		NET_LOCK();
1934 		PF_STATE_ENTER_READ();
1935 		st = pf_find_state_byid(&id_key);
1936 		st = pf_state_ref(st);
1937 		PF_STATE_EXIT_READ();
1938 		NET_UNLOCK();
1939 		if (st == NULL) {
1940 			error = ENOENT;
1941 			goto fail;
1942 		}
1943 
1944 		pf_state_export(&ps->state, st);
1945 		pf_state_unref(st);
1946 		break;
1947 	}
1948 
1949 	case DIOCGETSTATES:
1950 		error = pf_states_get((struct pfioc_states *)addr);
1951 		break;
1952 
1953 	case DIOCGETSTATUS: {
1954 		struct pf_status *s = (struct pf_status *)addr;
1955 		NET_LOCK();
1956 		PF_LOCK();
1957 		PF_FRAG_LOCK();
1958 		memcpy(s, &pf_status, sizeof(struct pf_status));
1959 		PF_FRAG_UNLOCK();
1960 		pfi_update_status(s->ifname, s);
1961 		PF_UNLOCK();
1962 		NET_UNLOCK();
1963 		break;
1964 	}
1965 
1966 	case DIOCSETSTATUSIF: {
1967 		struct pfioc_iface	*pi = (struct pfioc_iface *)addr;
1968 
1969 		NET_LOCK();
1970 		PF_LOCK();
1971 		if (pi->pfiio_name[0] == 0) {
1972 			memset(pf_status.ifname, 0, IFNAMSIZ);
1973 			PF_UNLOCK();
1974 			NET_UNLOCK();
1975 			goto fail;
1976 		}
1977 		strlcpy(pf_trans_set.statusif, pi->pfiio_name, IFNAMSIZ);
1978 		pf_trans_set.mask |= PF_TSET_STATUSIF;
1979 		PF_UNLOCK();
1980 		NET_UNLOCK();
1981 		break;
1982 	}
1983 
1984 	case DIOCCLRSTATUS: {
1985 		struct pfioc_iface	*pi = (struct pfioc_iface *)addr;
1986 
1987 		NET_LOCK();
1988 		PF_LOCK();
1989 		/* if ifname is specified, clear counters there only */
1990 		if (pi->pfiio_name[0]) {
1991 			pfi_update_status(pi->pfiio_name, NULL);
1992 			PF_UNLOCK();
1993 			NET_UNLOCK();
1994 			goto fail;
1995 		}
1996 
1997 		memset(pf_status.counters, 0, sizeof(pf_status.counters));
1998 		memset(pf_status.fcounters, 0, sizeof(pf_status.fcounters));
1999 		memset(pf_status.scounters, 0, sizeof(pf_status.scounters));
2000 		PF_FRAG_LOCK();
2001 		memset(pf_status.ncounters, 0, sizeof(pf_status.ncounters));
2002 		PF_FRAG_UNLOCK();
2003 		pf_status.since = getuptime();
2004 
2005 		PF_UNLOCK();
2006 		NET_UNLOCK();
2007 		break;
2008 	}
2009 
2010 	case DIOCNATLOOK: {
2011 		struct pfioc_natlook	*pnl = (struct pfioc_natlook *)addr;
2012 		struct pf_state_key	*sk;
2013 		struct pf_state		*st;
2014 		struct pf_state_key_cmp	 key;
2015 		int			 m = 0, direction = pnl->direction;
2016 		int			 sidx, didx;
2017 
2018 		switch (pnl->af) {
2019 		case AF_INET:
2020 			break;
2021 #ifdef INET6
2022 		case AF_INET6:
2023 			break;
2024 #endif /* INET6 */
2025 		default:
2026 			error = EAFNOSUPPORT;
2027 			goto fail;
2028 		}
2029 
2030 		/* NATLOOK src and dst are reversed, so reverse sidx/didx */
2031 		sidx = (direction == PF_IN) ? 1 : 0;
2032 		didx = (direction == PF_IN) ? 0 : 1;
2033 
2034 		if (!pnl->proto ||
2035 		    PF_AZERO(&pnl->saddr, pnl->af) ||
2036 		    PF_AZERO(&pnl->daddr, pnl->af) ||
2037 		    ((pnl->proto == IPPROTO_TCP ||
2038 		    pnl->proto == IPPROTO_UDP) &&
2039 		    (!pnl->dport || !pnl->sport)) ||
2040 		    pnl->rdomain > RT_TABLEID_MAX)
2041 			error = EINVAL;
2042 		else {
2043 			key.af = pnl->af;
2044 			key.proto = pnl->proto;
2045 			key.rdomain = pnl->rdomain;
2046 			pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af);
2047 			key.port[sidx] = pnl->sport;
2048 			pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af);
2049 			key.port[didx] = pnl->dport;
2050 
2051 			NET_LOCK();
2052 			PF_STATE_ENTER_READ();
2053 			st = pf_find_state_all(&key, direction, &m);
2054 			st = pf_state_ref(st);
2055 			PF_STATE_EXIT_READ();
2056 			NET_UNLOCK();
2057 
2058 			if (m > 1)
2059 				error = E2BIG;	/* more than one state */
2060 			else if (st != NULL) {
2061 				sk = st->key[sidx];
2062 				pf_addrcpy(&pnl->rsaddr, &sk->addr[sidx],
2063 				    sk->af);
2064 				pnl->rsport = sk->port[sidx];
2065 				pf_addrcpy(&pnl->rdaddr, &sk->addr[didx],
2066 				    sk->af);
2067 				pnl->rdport = sk->port[didx];
2068 				pnl->rrdomain = sk->rdomain;
2069 			} else
2070 				error = ENOENT;
2071 			pf_state_unref(st);
2072 		}
2073 		break;
2074 	}
2075 
2076 	case DIOCSETTIMEOUT: {
2077 		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
2078 
2079 		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
2080 		    pt->seconds < 0) {
2081 			error = EINVAL;
2082 			goto fail;
2083 		}
2084 		NET_LOCK();
2085 		PF_LOCK();
2086 		if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
2087 			pt->seconds = 1;
2088 		pf_default_rule_new.timeout[pt->timeout] = pt->seconds;
2089 		pt->seconds = pf_default_rule.timeout[pt->timeout];
2090 		PF_UNLOCK();
2091 		NET_UNLOCK();
2092 		break;
2093 	}
2094 
2095 	case DIOCGETTIMEOUT: {
2096 		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
2097 
2098 		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
2099 			error = EINVAL;
2100 			goto fail;
2101 		}
2102 		PF_LOCK();
2103 		pt->seconds = pf_default_rule.timeout[pt->timeout];
2104 		PF_UNLOCK();
2105 		break;
2106 	}
2107 
2108 	case DIOCGETLIMIT: {
2109 		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
2110 
2111 		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
2112 			error = EINVAL;
2113 			goto fail;
2114 		}
2115 		PF_LOCK();
2116 		pl->limit = pf_pool_limits[pl->index].limit;
2117 		PF_UNLOCK();
2118 		break;
2119 	}
2120 
2121 	case DIOCSETLIMIT: {
2122 		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
2123 
2124 		PF_LOCK();
2125 		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
2126 			error = EINVAL;
2127 			PF_UNLOCK();
2128 			goto fail;
2129 		}
2130 		if (((struct pool *)pf_pool_limits[pl->index].pp)->pr_nout >
2131 		    pl->limit) {
2132 			error = EBUSY;
2133 			PF_UNLOCK();
2134 			goto fail;
2135 		}
2136 		/* Fragments reference mbuf clusters. */
2137 		if (pl->index == PF_LIMIT_FRAGS && pl->limit > nmbclust) {
2138 			error = EINVAL;
2139 			PF_UNLOCK();
2140 			goto fail;
2141 		}
2142 
2143 		pf_pool_limits[pl->index].limit_new = pl->limit;
2144 		pl->limit = pf_pool_limits[pl->index].limit;
2145 		PF_UNLOCK();
2146 		break;
2147 	}
2148 
2149 	case DIOCSETDEBUG: {
2150 		u_int32_t	*level = (u_int32_t *)addr;
2151 
2152 		NET_LOCK();
2153 		PF_LOCK();
2154 		pf_trans_set.debug = *level;
2155 		pf_trans_set.mask |= PF_TSET_DEBUG;
2156 		PF_UNLOCK();
2157 		NET_UNLOCK();
2158 		break;
2159 	}
2160 
2161 	case DIOCGETRULESETS: {
2162 		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
2163 		struct pf_ruleset	*ruleset;
2164 		struct pf_anchor	*anchor;
2165 
2166 		PF_LOCK();
2167 		pr->path[sizeof(pr->path) - 1] = '\0';
2168 		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
2169 			error = EINVAL;
2170 			PF_UNLOCK();
2171 			goto fail;
2172 		}
2173 		pr->nr = 0;
2174 		if (ruleset == &pf_main_ruleset) {
2175 			/* XXX kludge for pf_main_ruleset */
2176 			RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
2177 				if (anchor->parent == NULL)
2178 					pr->nr++;
2179 		} else {
2180 			RB_FOREACH(anchor, pf_anchor_node,
2181 			    &ruleset->anchor->children)
2182 				pr->nr++;
2183 		}
2184 		PF_UNLOCK();
2185 		break;
2186 	}
2187 
2188 	case DIOCGETRULESET: {
2189 		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
2190 		struct pf_ruleset	*ruleset;
2191 		struct pf_anchor	*anchor;
2192 		u_int32_t		 nr = 0;
2193 
2194 		PF_LOCK();
2195 		pr->path[sizeof(pr->path) - 1] = '\0';
2196 		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
2197 			error = EINVAL;
2198 			PF_UNLOCK();
2199 			goto fail;
2200 		}
2201 		pr->name[0] = '\0';
2202 		if (ruleset == &pf_main_ruleset) {
2203 			/* XXX kludge for pf_main_ruleset */
2204 			RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
2205 				if (anchor->parent == NULL && nr++ == pr->nr) {
2206 					strlcpy(pr->name, anchor->name,
2207 					    sizeof(pr->name));
2208 					break;
2209 				}
2210 		} else {
2211 			RB_FOREACH(anchor, pf_anchor_node,
2212 			    &ruleset->anchor->children)
2213 				if (nr++ == pr->nr) {
2214 					strlcpy(pr->name, anchor->name,
2215 					    sizeof(pr->name));
2216 					break;
2217 				}
2218 		}
2219 		PF_UNLOCK();
2220 		if (!pr->name[0])
2221 			error = EBUSY;
2222 		break;
2223 	}
2224 
2225 	case DIOCRCLRTABLES: {
2226 		struct pfioc_table *io = (struct pfioc_table *)addr;
2227 
2228 		if (io->pfrio_esize != 0) {
2229 			error = ENODEV;
2230 			goto fail;
2231 		}
2232 		NET_LOCK();
2233 		PF_LOCK();
2234 		error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
2235 		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
2236 		PF_UNLOCK();
2237 		NET_UNLOCK();
2238 		break;
2239 	}
2240 
2241 	case DIOCRADDTABLES: {
2242 		struct pfioc_table *io = (struct pfioc_table *)addr;
2243 
2244 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2245 			error = ENODEV;
2246 			goto fail;
2247 		}
2248 		error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size,
2249 		    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2250 		break;
2251 	}
2252 
2253 	case DIOCRDELTABLES: {
2254 		struct pfioc_table *io = (struct pfioc_table *)addr;
2255 
2256 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2257 			error = ENODEV;
2258 			goto fail;
2259 		}
2260 		NET_LOCK();
2261 		PF_LOCK();
2262 		error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size,
2263 		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2264 		PF_UNLOCK();
2265 		NET_UNLOCK();
2266 		break;
2267 	}
2268 
2269 	case DIOCRGETTABLES: {
2270 		struct pfioc_table *io = (struct pfioc_table *)addr;
2271 
2272 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2273 			error = ENODEV;
2274 			goto fail;
2275 		}
2276 		NET_LOCK();
2277 		PF_LOCK();
2278 		error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer,
2279 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2280 		PF_UNLOCK();
2281 		NET_UNLOCK();
2282 		break;
2283 	}
2284 
2285 	case DIOCRGETTSTATS: {
2286 		struct pfioc_table *io = (struct pfioc_table *)addr;
2287 
2288 		if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
2289 			error = ENODEV;
2290 			goto fail;
2291 		}
2292 		NET_LOCK();
2293 		PF_LOCK();
2294 		error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer,
2295 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2296 		PF_UNLOCK();
2297 		NET_UNLOCK();
2298 		break;
2299 	}
2300 
2301 	case DIOCRCLRTSTATS: {
2302 		struct pfioc_table *io = (struct pfioc_table *)addr;
2303 
2304 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2305 			error = ENODEV;
2306 			goto fail;
2307 		}
2308 		NET_LOCK();
2309 		PF_LOCK();
2310 		error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size,
2311 		    &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2312 		PF_UNLOCK();
2313 		NET_UNLOCK();
2314 		break;
2315 	}
2316 
2317 	case DIOCRSETTFLAGS: {
2318 		struct pfioc_table *io = (struct pfioc_table *)addr;
2319 
2320 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
2321 			error = ENODEV;
2322 			goto fail;
2323 		}
2324 		NET_LOCK();
2325 		PF_LOCK();
2326 		error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size,
2327 		    io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
2328 		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2329 		PF_UNLOCK();
2330 		NET_UNLOCK();
2331 		break;
2332 	}
2333 
2334 	case DIOCRCLRADDRS: {
2335 		struct pfioc_table *io = (struct pfioc_table *)addr;
2336 
2337 		if (io->pfrio_esize != 0) {
2338 			error = ENODEV;
2339 			goto fail;
2340 		}
2341 		NET_LOCK();
2342 		PF_LOCK();
2343 		error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
2344 		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
2345 		PF_UNLOCK();
2346 		NET_UNLOCK();
2347 		break;
2348 	}
2349 
2350 	case DIOCRADDADDRS: {
2351 		struct pfioc_table *io = (struct pfioc_table *)addr;
2352 
2353 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2354 			error = ENODEV;
2355 			goto fail;
2356 		}
2357 		error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer,
2358 		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
2359 		    PFR_FLAG_USERIOCTL);
2360 		break;
2361 	}
2362 
2363 	case DIOCRDELADDRS: {
2364 		struct pfioc_table *io = (struct pfioc_table *)addr;
2365 
2366 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2367 			error = ENODEV;
2368 			goto fail;
2369 		}
2370 		NET_LOCK();
2371 		PF_LOCK();
2372 		error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer,
2373 		    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
2374 		    PFR_FLAG_USERIOCTL);
2375 		PF_UNLOCK();
2376 		NET_UNLOCK();
2377 		break;
2378 	}
2379 
2380 	case DIOCRSETADDRS: {
2381 		struct pfioc_table *io = (struct pfioc_table *)addr;
2382 
2383 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2384 			error = ENODEV;
2385 			goto fail;
2386 		}
2387 		NET_LOCK();
2388 		PF_LOCK();
2389 		error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer,
2390 		    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
2391 		    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
2392 		    PFR_FLAG_USERIOCTL, 0);
2393 		PF_UNLOCK();
2394 		NET_UNLOCK();
2395 		break;
2396 	}
2397 
2398 	case DIOCRGETADDRS: {
2399 		struct pfioc_table *io = (struct pfioc_table *)addr;
2400 
2401 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2402 			error = ENODEV;
2403 			goto fail;
2404 		}
2405 		NET_LOCK();
2406 		PF_LOCK();
2407 		error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer,
2408 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2409 		PF_UNLOCK();
2410 		NET_UNLOCK();
2411 		break;
2412 	}
2413 
2414 	case DIOCRGETASTATS: {
2415 		struct pfioc_table *io = (struct pfioc_table *)addr;
2416 
2417 		if (io->pfrio_esize != sizeof(struct pfr_astats)) {
2418 			error = ENODEV;
2419 			goto fail;
2420 		}
2421 		NET_LOCK();
2422 		PF_LOCK();
2423 		error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer,
2424 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2425 		PF_UNLOCK();
2426 		NET_UNLOCK();
2427 		break;
2428 	}
2429 
2430 	case DIOCRCLRASTATS: {
2431 		struct pfioc_table *io = (struct pfioc_table *)addr;
2432 
2433 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2434 			error = ENODEV;
2435 			goto fail;
2436 		}
2437 		NET_LOCK();
2438 		PF_LOCK();
2439 		error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer,
2440 		    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
2441 		    PFR_FLAG_USERIOCTL);
2442 		PF_UNLOCK();
2443 		NET_UNLOCK();
2444 		break;
2445 	}
2446 
2447 	case DIOCRTSTADDRS: {
2448 		struct pfioc_table *io = (struct pfioc_table *)addr;
2449 
2450 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2451 			error = ENODEV;
2452 			goto fail;
2453 		}
2454 		NET_LOCK();
2455 		PF_LOCK();
2456 		error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer,
2457 		    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
2458 		    PFR_FLAG_USERIOCTL);
2459 		PF_UNLOCK();
2460 		NET_UNLOCK();
2461 		break;
2462 	}
2463 
2464 	case DIOCRINADEFINE: {
2465 		struct pfioc_table *io = (struct pfioc_table *)addr;
2466 
2467 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
2468 			error = ENODEV;
2469 			goto fail;
2470 		}
2471 		NET_LOCK();
2472 		PF_LOCK();
2473 		error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer,
2474 		    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
2475 		    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
2476 		PF_UNLOCK();
2477 		NET_UNLOCK();
2478 		break;
2479 	}
2480 
2481 	case DIOCOSFPADD: {
2482 		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
2483 		error = pf_osfp_add(io);
2484 		break;
2485 	}
2486 
2487 	case DIOCOSFPGET: {
2488 		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
2489 		error = pf_osfp_get(io);
2490 		break;
2491 	}
2492 
2493 	case DIOCXBEGIN: {
2494 		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
2495 		struct pfioc_trans_e	*ioe;
2496 		struct pfr_table	*table;
2497 		int			 i;
2498 
2499 		if (io->esize != sizeof(*ioe)) {
2500 			error = ENODEV;
2501 			goto fail;
2502 		}
2503 		ioe = malloc(sizeof(*ioe), M_PF, M_WAITOK);
2504 		table = malloc(sizeof(*table), M_PF, M_WAITOK);
2505 		NET_LOCK();
2506 		PF_LOCK();
2507 		pf_default_rule_new = pf_default_rule;
2508 		PF_UNLOCK();
2509 		NET_UNLOCK();
2510 		memset(&pf_trans_set, 0, sizeof(pf_trans_set));
2511 		for (i = 0; i < io->size; i++) {
2512 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
2513 				free(table, M_PF, sizeof(*table));
2514 				free(ioe, M_PF, sizeof(*ioe));
2515 				error = EFAULT;
2516 				goto fail;
2517 			}
2518 			if (strnlen(ioe->anchor, sizeof(ioe->anchor)) ==
2519 			    sizeof(ioe->anchor)) {
2520 				free(table, M_PF, sizeof(*table));
2521 				free(ioe, M_PF, sizeof(*ioe));
2522 				error = ENAMETOOLONG;
2523 				goto fail;
2524 			}
2525 			NET_LOCK();
2526 			PF_LOCK();
2527 			switch (ioe->type) {
2528 			case PF_TRANS_TABLE:
2529 				memset(table, 0, sizeof(*table));
2530 				strlcpy(table->pfrt_anchor, ioe->anchor,
2531 				    sizeof(table->pfrt_anchor));
2532 				if ((error = pfr_ina_begin(table,
2533 				    &ioe->ticket, NULL, 0))) {
2534 					PF_UNLOCK();
2535 					NET_UNLOCK();
2536 					free(table, M_PF, sizeof(*table));
2537 					free(ioe, M_PF, sizeof(*ioe));
2538 					goto fail;
2539 				}
2540 				break;
2541 			case PF_TRANS_RULESET:
2542 				if ((error = pf_begin_rules(&ioe->ticket,
2543 				    ioe->anchor))) {
2544 					PF_UNLOCK();
2545 					NET_UNLOCK();
2546 					free(table, M_PF, sizeof(*table));
2547 					free(ioe, M_PF, sizeof(*ioe));
2548 					goto fail;
2549 				}
2550 				break;
2551 			default:
2552 				PF_UNLOCK();
2553 				NET_UNLOCK();
2554 				free(table, M_PF, sizeof(*table));
2555 				free(ioe, M_PF, sizeof(*ioe));
2556 				error = EINVAL;
2557 				goto fail;
2558 			}
2559 			PF_UNLOCK();
2560 			NET_UNLOCK();
2561 			if (copyout(ioe, io->array+i, sizeof(io->array[i]))) {
2562 				free(table, M_PF, sizeof(*table));
2563 				free(ioe, M_PF, sizeof(*ioe));
2564 				error = EFAULT;
2565 				goto fail;
2566 			}
2567 		}
2568 		free(table, M_PF, sizeof(*table));
2569 		free(ioe, M_PF, sizeof(*ioe));
2570 		break;
2571 	}
2572 
2573 	case DIOCXROLLBACK: {
2574 		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
2575 		struct pfioc_trans_e	*ioe;
2576 		struct pfr_table	*table;
2577 		int			 i;
2578 
2579 		if (io->esize != sizeof(*ioe)) {
2580 			error = ENODEV;
2581 			goto fail;
2582 		}
2583 		ioe = malloc(sizeof(*ioe), M_PF, M_WAITOK);
2584 		table = malloc(sizeof(*table), M_PF, M_WAITOK);
2585 		for (i = 0; i < io->size; i++) {
2586 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
2587 				free(table, M_PF, sizeof(*table));
2588 				free(ioe, M_PF, sizeof(*ioe));
2589 				error = EFAULT;
2590 				goto fail;
2591 			}
2592 			if (strnlen(ioe->anchor, sizeof(ioe->anchor)) ==
2593 			    sizeof(ioe->anchor)) {
2594 				free(table, M_PF, sizeof(*table));
2595 				free(ioe, M_PF, sizeof(*ioe));
2596 				error = ENAMETOOLONG;
2597 				goto fail;
2598 			}
2599 			NET_LOCK();
2600 			PF_LOCK();
2601 			switch (ioe->type) {
2602 			case PF_TRANS_TABLE:
2603 				memset(table, 0, sizeof(*table));
2604 				strlcpy(table->pfrt_anchor, ioe->anchor,
2605 				    sizeof(table->pfrt_anchor));
2606 				if ((error = pfr_ina_rollback(table,
2607 				    ioe->ticket, NULL, 0))) {
2608 					PF_UNLOCK();
2609 					NET_UNLOCK();
2610 					free(table, M_PF, sizeof(*table));
2611 					free(ioe, M_PF, sizeof(*ioe));
2612 					goto fail; /* really bad */
2613 				}
2614 				break;
2615 			case PF_TRANS_RULESET:
2616 				pf_rollback_rules(ioe->ticket, ioe->anchor);
2617 				break;
2618 			default:
2619 				PF_UNLOCK();
2620 				NET_UNLOCK();
2621 				free(table, M_PF, sizeof(*table));
2622 				free(ioe, M_PF, sizeof(*ioe));
2623 				error = EINVAL;
2624 				goto fail; /* really bad */
2625 			}
2626 			PF_UNLOCK();
2627 			NET_UNLOCK();
2628 		}
2629 		free(table, M_PF, sizeof(*table));
2630 		free(ioe, M_PF, sizeof(*ioe));
2631 		break;
2632 	}
2633 
2634 	case DIOCXCOMMIT: {
2635 		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
2636 		struct pfioc_trans_e	*ioe;
2637 		struct pfr_table	*table;
2638 		struct pf_ruleset	*rs;
2639 		int			 i;
2640 
2641 		if (io->esize != sizeof(*ioe)) {
2642 			error = ENODEV;
2643 			goto fail;
2644 		}
2645 		ioe = malloc(sizeof(*ioe), M_PF, M_WAITOK);
2646 		table = malloc(sizeof(*table), M_PF, M_WAITOK);
2647 		/* first makes sure everything will succeed */
2648 		for (i = 0; i < io->size; i++) {
2649 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
2650 				free(table, M_PF, sizeof(*table));
2651 				free(ioe, M_PF, sizeof(*ioe));
2652 				error = EFAULT;
2653 				goto fail;
2654 			}
2655 			if (strnlen(ioe->anchor, sizeof(ioe->anchor)) ==
2656 			    sizeof(ioe->anchor)) {
2657 				free(table, M_PF, sizeof(*table));
2658 				free(ioe, M_PF, sizeof(*ioe));
2659 				error = ENAMETOOLONG;
2660 				goto fail;
2661 			}
2662 			NET_LOCK();
2663 			PF_LOCK();
2664 			switch (ioe->type) {
2665 			case PF_TRANS_TABLE:
2666 				rs = pf_find_ruleset(ioe->anchor);
2667 				if (rs == NULL || !rs->topen || ioe->ticket !=
2668 				     rs->tticket) {
2669 					PF_UNLOCK();
2670 					NET_UNLOCK();
2671 					free(table, M_PF, sizeof(*table));
2672 					free(ioe, M_PF, sizeof(*ioe));
2673 					error = EBUSY;
2674 					goto fail;
2675 				}
2676 				break;
2677 			case PF_TRANS_RULESET:
2678 				rs = pf_find_ruleset(ioe->anchor);
2679 				if (rs == NULL ||
2680 				    !rs->rules.inactive.open ||
2681 				    rs->rules.inactive.version !=
2682 				    ioe->ticket) {
2683 					PF_UNLOCK();
2684 					NET_UNLOCK();
2685 					free(table, M_PF, sizeof(*table));
2686 					free(ioe, M_PF, sizeof(*ioe));
2687 					error = EBUSY;
2688 					goto fail;
2689 				}
2690 				break;
2691 			default:
2692 				PF_UNLOCK();
2693 				NET_UNLOCK();
2694 				free(table, M_PF, sizeof(*table));
2695 				free(ioe, M_PF, sizeof(*ioe));
2696 				error = EINVAL;
2697 				goto fail;
2698 			}
2699 			PF_UNLOCK();
2700 			NET_UNLOCK();
2701 		}
2702 		NET_LOCK();
2703 		PF_LOCK();
2704 
2705 		/*
2706 		 * Checked already in DIOCSETLIMIT, but check again as the
2707 		 * situation might have changed.
2708 		 */
2709 		for (i = 0; i < PF_LIMIT_MAX; i++) {
2710 			if (((struct pool *)pf_pool_limits[i].pp)->pr_nout >
2711 			    pf_pool_limits[i].limit_new) {
2712 				PF_UNLOCK();
2713 				NET_UNLOCK();
2714 				free(table, M_PF, sizeof(*table));
2715 				free(ioe, M_PF, sizeof(*ioe));
2716 				error = EBUSY;
2717 				goto fail;
2718 			}
2719 		}
2720 		/* now do the commit - no errors should happen here */
2721 		for (i = 0; i < io->size; i++) {
2722 			PF_UNLOCK();
2723 			NET_UNLOCK();
2724 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
2725 				free(table, M_PF, sizeof(*table));
2726 				free(ioe, M_PF, sizeof(*ioe));
2727 				error = EFAULT;
2728 				goto fail;
2729 			}
2730 			if (strnlen(ioe->anchor, sizeof(ioe->anchor)) ==
2731 			    sizeof(ioe->anchor)) {
2732 				free(table, M_PF, sizeof(*table));
2733 				free(ioe, M_PF, sizeof(*ioe));
2734 				error = ENAMETOOLONG;
2735 				goto fail;
2736 			}
2737 			NET_LOCK();
2738 			PF_LOCK();
2739 			switch (ioe->type) {
2740 			case PF_TRANS_TABLE:
2741 				memset(table, 0, sizeof(*table));
2742 				strlcpy(table->pfrt_anchor, ioe->anchor,
2743 				    sizeof(table->pfrt_anchor));
2744 				if ((error = pfr_ina_commit(table, ioe->ticket,
2745 				    NULL, NULL, 0))) {
2746 					PF_UNLOCK();
2747 					NET_UNLOCK();
2748 					free(table, M_PF, sizeof(*table));
2749 					free(ioe, M_PF, sizeof(*ioe));
2750 					goto fail; /* really bad */
2751 				}
2752 				break;
2753 			case PF_TRANS_RULESET:
2754 				if ((error = pf_commit_rules(ioe->ticket,
2755 				    ioe->anchor))) {
2756 					PF_UNLOCK();
2757 					NET_UNLOCK();
2758 					free(table, M_PF, sizeof(*table));
2759 					free(ioe, M_PF, sizeof(*ioe));
2760 					goto fail; /* really bad */
2761 				}
2762 				break;
2763 			default:
2764 				PF_UNLOCK();
2765 				NET_UNLOCK();
2766 				free(table, M_PF, sizeof(*table));
2767 				free(ioe, M_PF, sizeof(*ioe));
2768 				error = EINVAL;
2769 				goto fail; /* really bad */
2770 			}
2771 		}
2772 		for (i = 0; i < PF_LIMIT_MAX; i++) {
2773 			if (pf_pool_limits[i].limit_new !=
2774 			    pf_pool_limits[i].limit &&
2775 			    pool_sethardlimit(pf_pool_limits[i].pp,
2776 			    pf_pool_limits[i].limit_new, NULL, 0) != 0) {
2777 				PF_UNLOCK();
2778 				NET_UNLOCK();
2779 				free(table, M_PF, sizeof(*table));
2780 				free(ioe, M_PF, sizeof(*ioe));
2781 				error = EBUSY;
2782 				goto fail; /* really bad */
2783 			}
2784 			pf_pool_limits[i].limit = pf_pool_limits[i].limit_new;
2785 		}
2786 		for (i = 0; i < PFTM_MAX; i++) {
2787 			int old = pf_default_rule.timeout[i];
2788 
2789 			pf_default_rule.timeout[i] =
2790 			    pf_default_rule_new.timeout[i];
2791 			if (pf_default_rule.timeout[i] == PFTM_INTERVAL &&
2792 			    pf_default_rule.timeout[i] < old &&
2793 			    timeout_del(&pf_purge_to))
2794 				task_add(systqmp, &pf_purge_task);
2795 		}
2796 		pfi_xcommit();
2797 		pf_trans_set_commit();
2798 		PF_UNLOCK();
2799 		NET_UNLOCK();
2800 		free(table, M_PF, sizeof(*table));
2801 		free(ioe, M_PF, sizeof(*ioe));
2802 		break;
2803 	}
2804 
2805 	case DIOCXEND: {
2806 		u_int32_t	*ticket = (u_int32_t *)addr;
2807 		struct pf_trans	*t;
2808 
2809 		t = pf_find_trans(minor(dev), *ticket);
2810 		if (t != NULL)
2811 			pf_rollback_trans(t);
2812 		else
2813 			error = ENXIO;
2814 		break;
2815 	}
2816 
2817 	case DIOCGETSRCNODES: {
2818 		struct pfioc_src_nodes	*psn = (struct pfioc_src_nodes *)addr;
2819 		struct pf_src_node	*n, *p, *pstore;
2820 		u_int32_t		 nr = 0;
2821 		size_t			 space = psn->psn_len;
2822 
2823 		pstore = malloc(sizeof(*pstore), M_PF, M_WAITOK);
2824 
2825 		NET_LOCK();
2826 		PF_LOCK();
2827 		if (space == 0) {
2828 			RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
2829 				nr++;
2830 			psn->psn_len = sizeof(struct pf_src_node) * nr;
2831 			PF_UNLOCK();
2832 			NET_UNLOCK();
2833 			free(pstore, M_PF, sizeof(*pstore));
2834 			goto fail;
2835 		}
2836 
2837 		p = psn->psn_src_nodes;
2838 		RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
2839 			int	secs = getuptime(), diff;
2840 
2841 			if ((nr + 1) * sizeof(*p) > psn->psn_len)
2842 				break;
2843 
2844 			memcpy(pstore, n, sizeof(*pstore));
2845 			memset(&pstore->entry, 0, sizeof(pstore->entry));
2846 			pstore->rule.ptr = NULL;
2847 			pstore->kif = NULL;
2848 			pstore->rule.nr = n->rule.ptr->nr;
2849 			pstore->creation = secs - pstore->creation;
2850 			if (pstore->expire > secs)
2851 				pstore->expire -= secs;
2852 			else
2853 				pstore->expire = 0;
2854 
2855 			/* adjust the connection rate estimate */
2856 			diff = secs - n->conn_rate.last;
2857 			if (diff >= n->conn_rate.seconds)
2858 				pstore->conn_rate.count = 0;
2859 			else
2860 				pstore->conn_rate.count -=
2861 				    n->conn_rate.count * diff /
2862 				    n->conn_rate.seconds;
2863 
2864 			error = copyout(pstore, p, sizeof(*p));
2865 			if (error) {
2866 				PF_UNLOCK();
2867 				NET_UNLOCK();
2868 				free(pstore, M_PF, sizeof(*pstore));
2869 				goto fail;
2870 			}
2871 			p++;
2872 			nr++;
2873 		}
2874 		psn->psn_len = sizeof(struct pf_src_node) * nr;
2875 
2876 		PF_UNLOCK();
2877 		NET_UNLOCK();
2878 		free(pstore, M_PF, sizeof(*pstore));
2879 		break;
2880 	}
2881 
2882 	case DIOCCLRSRCNODES: {
2883 		struct pf_src_node	*n;
2884 		struct pf_state		*st;
2885 
2886 		NET_LOCK();
2887 		PF_LOCK();
2888 		PF_STATE_ENTER_WRITE();
2889 		RBT_FOREACH(st, pf_state_tree_id, &tree_id)
2890 			pf_src_tree_remove_state(st);
2891 		PF_STATE_EXIT_WRITE();
2892 		RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
2893 			n->expire = 1;
2894 		pf_purge_expired_src_nodes();
2895 		PF_UNLOCK();
2896 		NET_UNLOCK();
2897 		break;
2898 	}
2899 
2900 	case DIOCKILLSRCNODES: {
2901 		struct pf_src_node	*sn;
2902 		struct pf_state		*st;
2903 		struct pfioc_src_node_kill *psnk =
2904 		    (struct pfioc_src_node_kill *)addr;
2905 		u_int			killed = 0;
2906 
2907 		NET_LOCK();
2908 		PF_LOCK();
2909 		RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) {
2910 			if (pf_match_addr(psnk->psnk_src.neg,
2911 				&psnk->psnk_src.addr.v.a.addr,
2912 				&psnk->psnk_src.addr.v.a.mask,
2913 				&sn->addr, sn->af) &&
2914 			    pf_match_addr(psnk->psnk_dst.neg,
2915 				&psnk->psnk_dst.addr.v.a.addr,
2916 				&psnk->psnk_dst.addr.v.a.mask,
2917 				&sn->raddr, sn->af)) {
2918 				/* Handle state to src_node linkage */
2919 				if (sn->states != 0) {
2920 					PF_ASSERT_LOCKED();
2921 					PF_STATE_ENTER_WRITE();
2922 					RBT_FOREACH(st, pf_state_tree_id,
2923 					   &tree_id)
2924 						pf_state_rm_src_node(st, sn);
2925 					PF_STATE_EXIT_WRITE();
2926 				}
2927 				sn->expire = 1;
2928 				killed++;
2929 			}
2930 		}
2931 
2932 		if (killed > 0)
2933 			pf_purge_expired_src_nodes();
2934 
2935 		psnk->psnk_killed = killed;
2936 		PF_UNLOCK();
2937 		NET_UNLOCK();
2938 		break;
2939 	}
2940 
2941 	case DIOCSETHOSTID: {
2942 		u_int32_t	*hostid = (u_int32_t *)addr;
2943 
2944 		NET_LOCK();
2945 		PF_LOCK();
2946 		if (*hostid == 0)
2947 			pf_trans_set.hostid = arc4random();
2948 		else
2949 			pf_trans_set.hostid = *hostid;
2950 		pf_trans_set.mask |= PF_TSET_HOSTID;
2951 		PF_UNLOCK();
2952 		NET_UNLOCK();
2953 		break;
2954 	}
2955 
2956 	case DIOCOSFPFLUSH:
2957 		pf_osfp_flush();
2958 		break;
2959 
2960 	case DIOCIGETIFACES: {
2961 		struct pfioc_iface	*io = (struct pfioc_iface *)addr;
2962 		struct pfi_kif		*kif_buf;
2963 		int			 apfiio_size = io->pfiio_size;
2964 
2965 		if (io->pfiio_esize != sizeof(struct pfi_kif)) {
2966 			error = ENODEV;
2967 			goto fail;
2968 		}
2969 
2970 		if ((kif_buf = mallocarray(sizeof(*kif_buf), apfiio_size,
2971 		    M_PF, M_WAITOK|M_CANFAIL)) == NULL) {
2972 			error = EINVAL;
2973 			goto fail;
2974 		}
2975 
2976 		NET_LOCK_SHARED();
2977 		PF_LOCK();
2978 		pfi_get_ifaces(io->pfiio_name, kif_buf, &io->pfiio_size);
2979 		PF_UNLOCK();
2980 		NET_UNLOCK_SHARED();
2981 		if (copyout(kif_buf, io->pfiio_buffer, sizeof(*kif_buf) *
2982 		    io->pfiio_size))
2983 			error = EFAULT;
2984 		free(kif_buf, M_PF, sizeof(*kif_buf) * apfiio_size);
2985 		break;
2986 	}
2987 
2988 	case DIOCSETIFFLAG: {
2989 		struct pfioc_iface *io = (struct pfioc_iface *)addr;
2990 
2991 		if (io == NULL) {
2992 			error = EINVAL;
2993 			goto fail;
2994 		}
2995 
2996 		PF_LOCK();
2997 		error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
2998 		PF_UNLOCK();
2999 		break;
3000 	}
3001 
3002 	case DIOCCLRIFFLAG: {
3003 		struct pfioc_iface *io = (struct pfioc_iface *)addr;
3004 
3005 		if (io == NULL) {
3006 			error = EINVAL;
3007 			goto fail;
3008 		}
3009 
3010 		PF_LOCK();
3011 		error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
3012 		PF_UNLOCK();
3013 		break;
3014 	}
3015 
3016 	case DIOCSETREASS: {
3017 		u_int32_t	*reass = (u_int32_t *)addr;
3018 
3019 		NET_LOCK();
3020 		PF_LOCK();
3021 		pf_trans_set.reass = *reass;
3022 		pf_trans_set.mask |= PF_TSET_REASS;
3023 		PF_UNLOCK();
3024 		NET_UNLOCK();
3025 		break;
3026 	}
3027 
3028 	case DIOCSETSYNFLWATS: {
3029 		struct pfioc_synflwats *io = (struct pfioc_synflwats *)addr;
3030 
3031 		NET_LOCK();
3032 		PF_LOCK();
3033 		error = pf_syncookies_setwats(io->hiwat, io->lowat);
3034 		PF_UNLOCK();
3035 		NET_UNLOCK();
3036 		break;
3037 	}
3038 
3039 	case DIOCGETSYNFLWATS: {
3040 		struct pfioc_synflwats *io = (struct pfioc_synflwats *)addr;
3041 
3042 		NET_LOCK();
3043 		PF_LOCK();
3044 		error = pf_syncookies_getwats(io);
3045 		PF_UNLOCK();
3046 		NET_UNLOCK();
3047 		break;
3048 	}
3049 
3050 	case DIOCSETSYNCOOKIES: {
3051 		u_int8_t	*mode = (u_int8_t *)addr;
3052 
3053 		NET_LOCK();
3054 		PF_LOCK();
3055 		error = pf_syncookies_setmode(*mode);
3056 		PF_UNLOCK();
3057 		NET_UNLOCK();
3058 		break;
3059 	}
3060 
3061 	default:
3062 		error = ENODEV;
3063 		break;
3064 	}
3065 fail:
3066 	rw_exit_write(&pfioctl_rw);
3067 
3068 	return (error);
3069 }
3070 
3071 void
pf_trans_set_commit(void)3072 pf_trans_set_commit(void)
3073 {
3074 	if (pf_trans_set.mask & PF_TSET_STATUSIF)
3075 		strlcpy(pf_status.ifname, pf_trans_set.statusif, IFNAMSIZ);
3076 	if (pf_trans_set.mask & PF_TSET_DEBUG)
3077 		pf_status.debug = pf_trans_set.debug;
3078 	if (pf_trans_set.mask & PF_TSET_HOSTID)
3079 		pf_status.hostid = pf_trans_set.hostid;
3080 	if (pf_trans_set.mask & PF_TSET_REASS)
3081 		pf_status.reass = pf_trans_set.reass;
3082 }
3083 
3084 void
pf_pool_copyin(struct pf_pool * from,struct pf_pool * to)3085 pf_pool_copyin(struct pf_pool *from, struct pf_pool *to)
3086 {
3087 	memmove(to, from, sizeof(*to));
3088 	to->kif = NULL;
3089 	to->addr.p.tbl = NULL;
3090 }
3091 
3092 int
pf_validate_range(u_int8_t op,u_int16_t port[2],int order)3093 pf_validate_range(u_int8_t op, u_int16_t port[2], int order)
3094 {
3095 	u_int16_t a = (order == PF_ORDER_NET) ? ntohs(port[0]) : port[0];
3096 	u_int16_t b = (order == PF_ORDER_NET) ? ntohs(port[1]) : port[1];
3097 
3098 	if ((op == PF_OP_RRG && a > b) ||  /* 34:12,  i.e. none */
3099 	    (op == PF_OP_IRG && a >= b) || /* 34><12, i.e. none */
3100 	    (op == PF_OP_XRG && a > b))    /* 34<>22, i.e. all */
3101 		return 1;
3102 	return 0;
3103 }
3104 
3105 int
pf_rule_copyin(struct pf_rule * from,struct pf_rule * to)3106 pf_rule_copyin(struct pf_rule *from, struct pf_rule *to)
3107 {
3108 	int i;
3109 
3110 	if (from->scrub_flags & PFSTATE_SETPRIO &&
3111 	    (from->set_prio[0] > IFQ_MAXPRIO ||
3112 	    from->set_prio[1] > IFQ_MAXPRIO))
3113 		return (EINVAL);
3114 
3115 	to->src = from->src;
3116 	to->src.addr.p.tbl = NULL;
3117 	to->dst = from->dst;
3118 	to->dst.addr.p.tbl = NULL;
3119 
3120 	if (pf_validate_range(to->src.port_op, to->src.port, PF_ORDER_NET))
3121 		return (EINVAL);
3122 	if (pf_validate_range(to->dst.port_op, to->dst.port, PF_ORDER_NET))
3123 		return (EINVAL);
3124 
3125 	/* XXX union skip[] */
3126 
3127 	strlcpy(to->label, from->label, sizeof(to->label));
3128 	strlcpy(to->ifname, from->ifname, sizeof(to->ifname));
3129 	strlcpy(to->rcv_ifname, from->rcv_ifname, sizeof(to->rcv_ifname));
3130 	strlcpy(to->qname, from->qname, sizeof(to->qname));
3131 	strlcpy(to->pqname, from->pqname, sizeof(to->pqname));
3132 	strlcpy(to->tagname, from->tagname, sizeof(to->tagname));
3133 	strlcpy(to->match_tagname, from->match_tagname,
3134 	    sizeof(to->match_tagname));
3135 	strlcpy(to->overload_tblname, from->overload_tblname,
3136 	    sizeof(to->overload_tblname));
3137 
3138 	pf_pool_copyin(&from->nat, &to->nat);
3139 	pf_pool_copyin(&from->rdr, &to->rdr);
3140 	pf_pool_copyin(&from->route, &to->route);
3141 
3142 	if (pf_validate_range(to->rdr.port_op, to->rdr.proxy_port,
3143 	    PF_ORDER_HOST))
3144 		return (EINVAL);
3145 
3146 	to->kif = (to->ifname[0]) ?
3147 	    pfi_kif_alloc(to->ifname, M_WAITOK) : NULL;
3148 	to->rcv_kif = (to->rcv_ifname[0]) ?
3149 	    pfi_kif_alloc(to->rcv_ifname, M_WAITOK) : NULL;
3150 	to->rdr.kif = (to->rdr.ifname[0]) ?
3151 	    pfi_kif_alloc(to->rdr.ifname, M_WAITOK) : NULL;
3152 	to->nat.kif = (to->nat.ifname[0]) ?
3153 	    pfi_kif_alloc(to->nat.ifname, M_WAITOK) : NULL;
3154 	to->route.kif = (to->route.ifname[0]) ?
3155 	    pfi_kif_alloc(to->route.ifname, M_WAITOK) : NULL;
3156 
3157 	to->os_fingerprint = from->os_fingerprint;
3158 
3159 	to->rtableid = from->rtableid;
3160 	if (to->rtableid >= 0 && !rtable_exists(to->rtableid))
3161 		return (EBUSY);
3162 	to->onrdomain = from->onrdomain;
3163 	if (to->onrdomain != -1 && (to->onrdomain < 0 ||
3164 	    to->onrdomain > RT_TABLEID_MAX))
3165 		return (EINVAL);
3166 
3167 	for (i = 0; i < PFTM_MAX; i++)
3168 		to->timeout[i] = from->timeout[i];
3169 	to->states_tot = from->states_tot;
3170 	to->max_states = from->max_states;
3171 	to->max_src_nodes = from->max_src_nodes;
3172 	to->max_src_states = from->max_src_states;
3173 	to->max_src_conn = from->max_src_conn;
3174 	to->max_src_conn_rate.limit = from->max_src_conn_rate.limit;
3175 	to->max_src_conn_rate.seconds = from->max_src_conn_rate.seconds;
3176 	pf_init_threshold(&to->pktrate, from->pktrate.limit,
3177 	    from->pktrate.seconds);
3178 
3179 	if (to->qname[0] != 0) {
3180 		if ((to->qid = pf_qname2qid(to->qname, 0)) == 0)
3181 			return (EBUSY);
3182 		if (to->pqname[0] != 0) {
3183 			if ((to->pqid = pf_qname2qid(to->pqname, 0)) == 0)
3184 				return (EBUSY);
3185 		} else
3186 			to->pqid = to->qid;
3187 	}
3188 	to->rt_listid = from->rt_listid;
3189 	to->prob = from->prob;
3190 	to->return_icmp = from->return_icmp;
3191 	to->return_icmp6 = from->return_icmp6;
3192 	to->max_mss = from->max_mss;
3193 	if (to->tagname[0])
3194 		if ((to->tag = pf_tagname2tag(to->tagname, 1)) == 0)
3195 			return (EBUSY);
3196 	if (to->match_tagname[0])
3197 		if ((to->match_tag = pf_tagname2tag(to->match_tagname, 1)) == 0)
3198 			return (EBUSY);
3199 	to->scrub_flags = from->scrub_flags;
3200 	to->delay = from->delay;
3201 	to->uid = from->uid;
3202 	to->gid = from->gid;
3203 	to->rule_flag = from->rule_flag;
3204 	to->action = from->action;
3205 	to->direction = from->direction;
3206 	to->log = from->log;
3207 	to->logif = from->logif;
3208 #if NPFLOG > 0
3209 	if (!to->log)
3210 		to->logif = 0;
3211 #endif	/* NPFLOG > 0 */
3212 	to->quick = from->quick;
3213 	to->ifnot = from->ifnot;
3214 	to->rcvifnot = from->rcvifnot;
3215 	to->match_tag_not = from->match_tag_not;
3216 	to->keep_state = from->keep_state;
3217 	to->af = from->af;
3218 	to->naf = from->naf;
3219 	to->proto = from->proto;
3220 	to->type = from->type;
3221 	to->code = from->code;
3222 	to->flags = from->flags;
3223 	to->flagset = from->flagset;
3224 	to->min_ttl = from->min_ttl;
3225 	to->allow_opts = from->allow_opts;
3226 	to->rt = from->rt;
3227 	to->return_ttl = from->return_ttl;
3228 	to->tos = from->tos;
3229 	to->set_tos = from->set_tos;
3230 	to->anchor_relative = from->anchor_relative; /* XXX */
3231 	to->anchor_wildcard = from->anchor_wildcard; /* XXX */
3232 	to->flush = from->flush;
3233 	to->divert.addr = from->divert.addr;
3234 	to->divert.port = from->divert.port;
3235 	to->divert.type = from->divert.type;
3236 	to->prio = from->prio;
3237 	to->set_prio[0] = from->set_prio[0];
3238 	to->set_prio[1] = from->set_prio[1];
3239 
3240 	return (0);
3241 }
3242 
3243 int
pf_rule_checkaf(struct pf_rule * r)3244 pf_rule_checkaf(struct pf_rule *r)
3245 {
3246 	switch (r->af) {
3247 	case 0:
3248 		if (r->rule_flag & PFRULE_AFTO)
3249 			return (EPFNOSUPPORT);
3250 		break;
3251 	case AF_INET:
3252 		if ((r->rule_flag & PFRULE_AFTO) && r->naf != AF_INET6)
3253 			return (EPFNOSUPPORT);
3254 		break;
3255 #ifdef INET6
3256 	case AF_INET6:
3257 		if ((r->rule_flag & PFRULE_AFTO) && r->naf != AF_INET)
3258 			return (EPFNOSUPPORT);
3259 		break;
3260 #endif /* INET6 */
3261 	default:
3262 		return (EPFNOSUPPORT);
3263 	}
3264 
3265 	if ((r->rule_flag & PFRULE_AFTO) == 0 && r->naf != 0)
3266 		return (EPFNOSUPPORT);
3267 
3268 	return (0);
3269 }
3270 
3271 int
pf_sysctl(void * oldp,size_t * oldlenp,void * newp,size_t newlen)3272 pf_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
3273 {
3274 	struct pf_status	pfs;
3275 
3276 	NET_LOCK_SHARED();
3277 	PF_LOCK();
3278 	PF_FRAG_LOCK();
3279 	memcpy(&pfs, &pf_status, sizeof(struct pf_status));
3280 	PF_FRAG_UNLOCK();
3281 	pfi_update_status(pfs.ifname, &pfs);
3282 	PF_UNLOCK();
3283 	NET_UNLOCK_SHARED();
3284 
3285 	return sysctl_rdstruct(oldp, oldlenp, newp, &pfs, sizeof(pfs));
3286 }
3287 
3288 struct pf_trans *
pf_open_trans(uint32_t unit)3289 pf_open_trans(uint32_t unit)
3290 {
3291 	static uint64_t ticket = 1;
3292 	struct pf_trans *t;
3293 
3294 	rw_assert_wrlock(&pfioctl_rw);
3295 
3296 	KASSERT(pf_unit2idx(unit) < nitems(pf_tcount));
3297 	if (pf_tcount[pf_unit2idx(unit)] >= (PF_ANCHOR_STACK_MAX * 8))
3298 		return (NULL);
3299 
3300 	t = malloc(sizeof(*t), M_PF, M_WAITOK|M_ZERO);
3301 	t->pft_unit = unit;
3302 	t->pft_ticket = ticket++;
3303 	pf_tcount[pf_unit2idx(unit)]++;
3304 
3305 	LIST_INSERT_HEAD(&pf_ioctl_trans, t, pft_entry);
3306 
3307 	return (t);
3308 }
3309 
3310 struct pf_trans *
pf_find_trans(uint32_t unit,uint64_t ticket)3311 pf_find_trans(uint32_t unit, uint64_t ticket)
3312 {
3313 	struct pf_trans	*t;
3314 
3315 	rw_assert_anylock(&pfioctl_rw);
3316 
3317 	LIST_FOREACH(t, &pf_ioctl_trans, pft_entry) {
3318 		if (t->pft_ticket == ticket && t->pft_unit == unit)
3319 			break;
3320 	}
3321 
3322 	return (t);
3323 }
3324 
3325 void
pf_init_tgetrule(struct pf_trans * t,struct pf_anchor * a,uint32_t rs_version,struct pf_rule * r)3326 pf_init_tgetrule(struct pf_trans *t, struct pf_anchor *a,
3327     uint32_t rs_version, struct pf_rule *r)
3328 {
3329 	t->pft_type = PF_TRANS_GETRULE;
3330 	if (a == NULL)
3331 		t->pftgr_anchor = &pf_main_anchor;
3332 	else
3333 		t->pftgr_anchor = a;
3334 
3335 	t->pftgr_version = rs_version;
3336 	t->pftgr_rule = r;
3337 }
3338 
3339 void
pf_cleanup_tgetrule(struct pf_trans * t)3340 pf_cleanup_tgetrule(struct pf_trans *t)
3341 {
3342 	KASSERT(t->pft_type == PF_TRANS_GETRULE);
3343 	pf_anchor_rele(t->pftgr_anchor);
3344 }
3345 
3346 void
pf_free_trans(struct pf_trans * t)3347 pf_free_trans(struct pf_trans *t)
3348 {
3349 	switch (t->pft_type) {
3350 	case PF_TRANS_GETRULE:
3351 		pf_cleanup_tgetrule(t);
3352 		break;
3353 	default:
3354 		log(LOG_ERR, "%s unknown transaction type: %d\n",
3355 		    __func__, t->pft_type);
3356 	}
3357 
3358 	KASSERT(pf_unit2idx(t->pft_unit) < nitems(pf_tcount));
3359 	KASSERT(pf_tcount[pf_unit2idx(t->pft_unit)] >= 1);
3360 	pf_tcount[pf_unit2idx(t->pft_unit)]--;
3361 
3362 	free(t, M_PF, sizeof(*t));
3363 }
3364 
3365 void
pf_rollback_trans(struct pf_trans * t)3366 pf_rollback_trans(struct pf_trans *t)
3367 {
3368 	if (t != NULL) {
3369 		rw_assert_wrlock(&pfioctl_rw);
3370 		LIST_REMOVE(t, pft_entry);
3371 		pf_free_trans(t);
3372 	}
3373 }
3374