1 /*
2 ** Copyright (C) 2018-2021 Cisco and/or its affiliates. All rights reserved.
3 ** Author: Michael R. Altizer <mialtize@cisco.com>
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License Version 2 as
7 ** published by the Free Software Foundation. You may not use, modify or
8 ** distribute this program under any other version of the GNU General
9 ** Public License.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ** GNU General Public License for more details.
15 **
16 ** You should have received a copy of the GNU General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24
25 #include <arpa/inet.h>
26
27 #include <errno.h>
28 #include <linux/netfilter.h>
29 #include <linux/netfilter/nfnetlink_queue.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/time.h>
33
34 #include <libmnl/libmnl.h>
35
36 #include "daq_dlt.h"
37 #include "daq_module_api.h"
38
39 /* FIXIT-M Need to figure out how to reimplement inject for NFQ */
40
41 #define DAQ_NFQ_VERSION 8
42
43 #define NFQ_DEFAULT_POOL_SIZE 16
44 #define DEFAULT_QUEUE_MAXLEN 1024 // Based on NFQNL_QMAX_DEFAULT from nfnetlnk_queue_core.c
45
46 #define SET_ERROR(modinst, ...) daq_base_api.set_errbuf(modinst, __VA_ARGS__)
47
48 typedef struct _nfq_pkt_desc
49 {
50 DAQ_Msg_t msg;
51 DAQ_PktHdr_t pkthdr;
52 uint8_t *nlmsg_buf;
53 const struct nlmsghdr *nlmh;
54 struct nfqnl_msg_packet_hdr *nlph;
55 struct _nfq_pkt_desc *next;
56 } NfqPktDesc;
57
58 typedef struct _nfq_msg_pool
59 {
60 NfqPktDesc *pool;
61 NfqPktDesc *freelist;
62 DAQ_MsgPoolInfo_t info;
63 } NfqMsgPool;
64
65 typedef struct _nfq_context
66 {
67 /* Configuration */
68 unsigned queue_num;
69 int snaplen;
70 int timeout;
71 unsigned queue_maxlen;
72 bool fail_open;
73 bool debug;
74 /* State */
75 DAQ_ModuleInstance_h modinst;
76 DAQ_Stats_t stats;
77 NfqMsgPool pool;
78 char *nlmsg_buf;
79 size_t nlmsg_bufsize;
80 struct mnl_socket *nlsock;
81 int nlsock_fd;
82 unsigned portid;
83 volatile bool interrupted;
84 } Nfq_Context_t;
85
86 static DAQ_VariableDesc_t nfq_variable_descriptions[] = {
87 { "debug", "Enable debugging output to stdout", DAQ_VAR_DESC_FORBIDS_ARGUMENT },
88 { "fail_open", "Allow the kernel to bypass the netfilter queue when it is full", DAQ_VAR_DESC_FORBIDS_ARGUMENT },
89 { "queue_maxlen", "Maximum queue length (default: 1024)", DAQ_VAR_DESC_REQUIRES_ARGUMENT },
90 };
91
92 static const DAQ_Verdict verdict_translation_table[MAX_DAQ_VERDICT] = {
93 DAQ_VERDICT_PASS, /* DAQ_VERDICT_PASS */
94 DAQ_VERDICT_BLOCK, /* DAQ_VERDICT_BLOCK */
95 DAQ_VERDICT_REPLACE, /* DAQ_VERDICT_REPLACE */
96 DAQ_VERDICT_PASS, /* DAQ_VERDICT_WHITELIST */
97 DAQ_VERDICT_BLOCK, /* DAQ_VERDICT_BLACKLIST */
98 DAQ_VERDICT_PASS /* DAQ_VERDICT_IGNORE */
99 };
100
101 static DAQ_BaseAPI_t daq_base_api;
102
103
104 /*
105 * Private Functions
106 */
107
destroy_packet_pool(Nfq_Context_t * nfqc)108 static void destroy_packet_pool(Nfq_Context_t *nfqc)
109 {
110 NfqMsgPool *pool = &nfqc->pool;
111 if (pool->pool)
112 {
113 while (pool->info.size > 0)
114 free(pool->pool[--pool->info.size].nlmsg_buf);
115 free(pool->pool);
116 pool->pool = NULL;
117 }
118 pool->freelist = NULL;
119 pool->info.available = 0;
120 pool->info.mem_size = 0;
121 }
122
create_packet_pool(Nfq_Context_t * nfqc,unsigned size)123 static int create_packet_pool(Nfq_Context_t *nfqc, unsigned size)
124 {
125 NfqMsgPool *pool = &nfqc->pool;
126 pool->pool = calloc(sizeof(NfqPktDesc), size);
127 if (!pool->pool)
128 {
129 SET_ERROR(nfqc->modinst, "%s: Could not allocate %zu bytes for a packet descriptor pool!",
130 __func__, sizeof(NfqPktDesc) * size);
131 return DAQ_ERROR_NOMEM;
132 }
133 pool->info.mem_size = sizeof(NfqPktDesc) * size;
134 while (pool->info.size < size)
135 {
136 /* Allocate netlink message receive buffer and set up descriptor */
137 NfqPktDesc *desc = &pool->pool[pool->info.size];
138 desc->nlmsg_buf = malloc(nfqc->nlmsg_bufsize);
139 if (!desc->nlmsg_buf)
140 {
141 SET_ERROR(nfqc->modinst, "%s: Could not allocate %zu bytes for a packet descriptor message buffer!",
142 __func__, nfqc->nlmsg_bufsize);
143 return DAQ_ERROR_NOMEM;
144 }
145 pool->info.mem_size += nfqc->nlmsg_bufsize;
146
147 /* Initialize non-zero invariant packet header fields. */
148 DAQ_PktHdr_t *pkthdr = &desc->pkthdr;
149 pkthdr->ingress_group = DAQ_PKTHDR_UNKNOWN;
150 pkthdr->egress_group = DAQ_PKTHDR_UNKNOWN;
151
152 /* Initialize non-zero invariant message header fields. */
153 DAQ_Msg_t *msg = &desc->msg;
154 msg->type = DAQ_MSG_TYPE_PACKET;
155 msg->hdr_len = sizeof(desc->pkthdr);
156 msg->hdr = &desc->pkthdr;
157 msg->owner = nfqc->modinst;
158 msg->priv = desc;
159
160 /* Place it on the free list */
161 desc->next = nfqc->pool.freelist;
162 nfqc->pool.freelist = desc;
163
164 pool->info.size++;
165 }
166 pool->info.available = pool->info.size;
167 return DAQ_SUCCESS;
168 }
169
170 /* Netlink message building routines vaguely lifted from libmnl's netfilter queue example
171 (nf-queue.c) to avoid having to link the seemingly deprecated libnetfilter_queue (which uses
172 libmnl anyway). */
nfq_hdr_put(char * buf,int type,uint32_t queue_num)173 static inline struct nlmsghdr *nfq_hdr_put(char *buf, int type, uint32_t queue_num)
174 {
175 struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
176 nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | type;
177 nlh->nlmsg_flags = NLM_F_REQUEST;
178
179 struct nfgenmsg *nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
180 nfg->nfgen_family = AF_UNSPEC;
181 nfg->version = NFNETLINK_V0;
182 nfg->res_id = htons(queue_num);
183
184 return nlh;
185 }
186
nfq_build_cfg_command(char * buf,uint16_t pf,uint8_t command,int queue_num)187 static struct nlmsghdr *nfq_build_cfg_command(char *buf, uint16_t pf, uint8_t command, int queue_num)
188 {
189 struct nlmsghdr *nlh = nfq_hdr_put(buf, NFQNL_MSG_CONFIG, queue_num);
190 struct nfqnl_msg_config_cmd cmd = {
191 .command = command,
192 .pf = htons(pf),
193 };
194 mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd);
195
196 return nlh;
197 }
198
nfq_build_cfg_params(char * buf,uint8_t mode,int range,int queue_num)199 static struct nlmsghdr *nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
200 {
201 struct nlmsghdr *nlh = nfq_hdr_put(buf, NFQNL_MSG_CONFIG, queue_num);
202 struct nfqnl_msg_config_params params = {
203 .copy_range = htonl(range),
204 .copy_mode = mode,
205 };
206 mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), ¶ms);
207
208 return nlh;
209 }
210
nfq_build_verdict(char * buf,int id,int queue_num,int verd,uint32_t plen,uint8_t * pkt)211 static struct nlmsghdr *nfq_build_verdict(char *buf, int id, int queue_num, int verd, uint32_t plen, uint8_t *pkt)
212 {
213 struct nlmsghdr *nlh = nfq_hdr_put(buf, NFQNL_MSG_VERDICT, queue_num);
214 struct nfqnl_msg_verdict_hdr vh = {
215 .verdict = htonl(verd),
216 .id = htonl(id),
217 };
218 mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh);
219 if (plen)
220 mnl_attr_put(nlh, NFQA_PAYLOAD, plen, pkt);
221
222 return nlh;
223 }
224
225 /* Oh, don't mind me; I'm just reimplementing all of mnl_socket_recvfrom so that I can pass in
226 a single flag to recvmsg (MSG_DONTWAIT). */
nl_socket_recv(const Nfq_Context_t * nfqc,void * buf,size_t bufsiz,bool blocking)227 static ssize_t nl_socket_recv(const Nfq_Context_t *nfqc, void *buf, size_t bufsiz, bool blocking)
228 {
229 ssize_t ret;
230 struct sockaddr_nl addr;
231 struct iovec iov = {
232 .iov_base = buf,
233 .iov_len = bufsiz,
234 };
235 struct msghdr msg = {
236 .msg_name = &addr,
237 .msg_namelen = sizeof(struct sockaddr_nl),
238 .msg_iov = &iov,
239 .msg_iovlen = 1,
240 .msg_control = NULL,
241 .msg_controllen = 0,
242 .msg_flags = 0,
243 };
244 ret = recvmsg(nfqc->nlsock_fd, &msg, blocking ? 0 : MSG_DONTWAIT);
245 if (ret == -1)
246 return ret;
247
248 if (msg.msg_flags & MSG_TRUNC) {
249 errno = ENOSPC;
250 return -1;
251 }
252 if (msg.msg_namelen != sizeof(struct sockaddr_nl)) {
253 errno = EINVAL;
254 return -1;
255 }
256 return ret;
257 }
258
parse_attr_cb(const struct nlattr * attr,void * data)259 static int parse_attr_cb(const struct nlattr *attr, void *data)
260 {
261 const struct nlattr **tb = data;
262 int type = mnl_attr_get_type(attr);
263
264 /* skip unsupported attribute in user-space */
265 if (mnl_attr_type_valid(attr, NFQA_MAX) < 0)
266 return MNL_CB_OK;
267
268 switch(type) {
269 case NFQA_MARK:
270 case NFQA_IFINDEX_INDEV:
271 case NFQA_IFINDEX_OUTDEV:
272 case NFQA_IFINDEX_PHYSINDEV:
273 case NFQA_IFINDEX_PHYSOUTDEV:
274 case NFQA_CAP_LEN:
275 case NFQA_SKB_INFO:
276 case NFQA_SECCTX:
277 case NFQA_UID:
278 case NFQA_GID:
279 case NFQA_CT_INFO:
280 if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
281 return MNL_CB_ERROR;
282 break;
283 case NFQA_TIMESTAMP:
284 if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
285 sizeof(struct nfqnl_msg_packet_timestamp)) < 0) {
286 return MNL_CB_ERROR;
287 }
288 break;
289 case NFQA_HWADDR:
290 if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
291 sizeof(struct nfqnl_msg_packet_hw)) < 0) {
292 return MNL_CB_ERROR;
293 }
294 break;
295 case NFQA_PACKET_HDR:
296 if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
297 sizeof(struct nfqnl_msg_packet_hdr)) < 0) {
298 return MNL_CB_ERROR;
299 }
300 break;
301 case NFQA_PAYLOAD:
302 case NFQA_CT:
303 case NFQA_EXP:
304 break;
305 }
306 tb[type] = attr;
307 return MNL_CB_OK;
308 }
309
process_message_cb(const struct nlmsghdr * nlh,void * data)310 static int process_message_cb(const struct nlmsghdr *nlh, void *data)
311 {
312 NfqPktDesc *desc = (NfqPktDesc *) data;
313 struct nlattr *attr[NFQA_MAX+1] = { };
314 int ret;
315
316 /* FIXIT-L In the event that there is actually more than one packet per message, handle it gracefully.
317 I haven't actually seen this happen yet. */
318 if (desc->nlmh)
319 return MNL_CB_ERROR;
320
321 /* Parse the message attributes */
322 if ((ret = mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, attr)) != MNL_CB_OK)
323 return ret;
324
325 /* Populate the packet descriptor */
326 desc->nlmh = nlh;
327 desc->nlph = mnl_attr_get_payload(attr[NFQA_PACKET_HDR]);
328
329 /* Set up the DAQ message and packet headers. Most fields are prepopulated and unchanging. */
330 DAQ_Msg_t *msg = &desc->msg;
331 msg->data = mnl_attr_get_payload(attr[NFQA_PAYLOAD]);
332
333 DAQ_PktHdr_t *pkthdr = &desc->pkthdr;
334 pkthdr->pktlen = mnl_attr_get_payload_len(attr[NFQA_PAYLOAD]);
335 if (attr[NFQA_CAP_LEN])
336 msg->data_len = ntohl(mnl_attr_get_u32(attr[NFQA_CAP_LEN]));
337 else
338 msg->data_len = pkthdr->pktlen;
339 /*
340 * FIXIT-M Implement getting timestamps from the message if it happens to have that attribute
341 if (attr[NFQA_TIMESTAMP])
342 {
343 struct nfqnl_msg_packet_timestamp *qpt = (struct nfqnl_msg_packet_timestamp *) mnl_attr_get_payload(attr[NFQA_TIMESTAMP]);
344 ...
345 }
346 else
347 */
348 gettimeofday(&pkthdr->ts, NULL);
349 if (attr[NFQA_IFINDEX_INDEV])
350 pkthdr->ingress_index = ntohl(mnl_attr_get_u32(attr[NFQA_IFINDEX_INDEV]));
351 else
352 pkthdr->ingress_index = DAQ_PKTHDR_UNKNOWN;
353 if (attr[NFQA_IFINDEX_OUTDEV])
354 pkthdr->egress_index = ntohl(mnl_attr_get_u32(attr[NFQA_IFINDEX_OUTDEV]));
355 else
356 pkthdr->egress_index = DAQ_PKTHDR_UNKNOWN;
357
358 return MNL_CB_OK;
359 }
360
361
362 /*
363 * DAQ Module API Implementation
364 */
365
366 /* Module->load() */
nfq_daq_module_load(const DAQ_BaseAPI_t * base_api)367 static int nfq_daq_module_load(const DAQ_BaseAPI_t *base_api)
368 {
369 if (base_api->api_version != DAQ_BASE_API_VERSION || base_api->api_size != sizeof(DAQ_BaseAPI_t))
370 return DAQ_ERROR;
371
372 daq_base_api = *base_api;
373
374 return DAQ_SUCCESS;
375 }
376
377 /* Module->unload() */
nfq_daq_module_unload(void)378 static int nfq_daq_module_unload(void)
379 {
380 memset(&daq_base_api, 0, sizeof(daq_base_api));
381 return DAQ_SUCCESS;
382 }
383
384 /* Module->get_variable_descs() */
nfq_daq_get_variable_descs(const DAQ_VariableDesc_t ** var_desc_table)385 static int nfq_daq_get_variable_descs(const DAQ_VariableDesc_t **var_desc_table)
386 {
387 *var_desc_table = nfq_variable_descriptions;
388
389 return sizeof(nfq_variable_descriptions) / sizeof(DAQ_VariableDesc_t);
390 }
391
392 /* Module->instantiate() */
nfq_daq_instantiate(const DAQ_ModuleConfig_h modcfg,DAQ_ModuleInstance_h modinst,void ** ctxt_ptr)393 static int nfq_daq_instantiate(const DAQ_ModuleConfig_h modcfg, DAQ_ModuleInstance_h modinst, void **ctxt_ptr)
394 {
395 Nfq_Context_t *nfqc;
396 int rval = DAQ_ERROR;
397
398 nfqc = calloc(1, sizeof(Nfq_Context_t));
399 if (!nfqc)
400 {
401 SET_ERROR(modinst, "%s: Couldn't allocate memory for the new NFQ context", __func__);
402 return DAQ_ERROR_NOMEM;
403 }
404 nfqc->modinst = modinst;
405
406 nfqc->queue_maxlen = DEFAULT_QUEUE_MAXLEN;
407
408 char *endptr;
409 errno = 0;
410 nfqc->queue_num = strtoul(daq_base_api.config_get_input(modcfg), &endptr, 10);
411 if (*endptr != '\0' || errno != 0)
412 {
413 SET_ERROR(modinst, "%s: Invalid queue number specified: '%s'",
414 __func__, daq_base_api.config_get_input(modcfg));
415 rval = DAQ_ERROR_INVAL;
416 goto fail;
417 }
418
419 const char *varKey, *varValue;
420 daq_base_api.config_first_variable(modcfg, &varKey, &varValue);
421 while (varKey)
422 {
423 if (!strcmp(varKey, "debug"))
424 nfqc->debug = true;
425 else if (!strcmp(varKey, "fail_open"))
426 nfqc->fail_open = true;
427 else if (!strcmp(varKey, "queue_maxlen"))
428 {
429 errno = 0;
430 nfqc->queue_maxlen = strtol(varValue, NULL, 10);
431 if (*endptr != '\0' || errno != 0)
432 {
433 SET_ERROR(modinst, "%s: Invalid value for key '%s': '%s'",
434 __func__, varKey, varValue);
435 rval = DAQ_ERROR_INVAL;
436 goto fail;
437 }
438 }
439
440 daq_base_api.config_next_variable(modcfg, &varKey, &varValue);
441 }
442
443 nfqc->snaplen = daq_base_api.config_get_snaplen(modcfg);
444
445 /* Largest desired packet payload plus netlink data overhead - this is probably overkill
446 (the libnetfilter_queue example inexplicably halves MNL_SOCKET_BUFFER_SIZE), but it
447 should be safe from truncation. */
448 nfqc->nlmsg_bufsize = nfqc->snaplen + MNL_SOCKET_BUFFER_SIZE;
449 if (nfqc->debug)
450 printf("Netlink message buffer size is %zu\n", nfqc->nlmsg_bufsize);
451
452 /* Allocate a scratch buffer for general usage by the context (basically for anything that's not
453 receiving a packet) */
454 nfqc->nlmsg_buf = malloc(nfqc->nlmsg_bufsize);
455 if (!nfqc->nlmsg_buf)
456 {
457 SET_ERROR(modinst, "%s: Couldn't allocate %zu bytes for a general use buffer",
458 __func__, nfqc->nlmsg_bufsize);
459 rval = DAQ_ERROR_NOMEM;
460 goto fail;
461 }
462
463 /* Netlink message buffer length must be determined prior to creating packet pool */
464 uint32_t pool_size = daq_base_api.config_get_msg_pool_size(modcfg);
465 if ((rval = create_packet_pool(nfqc, pool_size ? pool_size : NFQ_DEFAULT_POOL_SIZE)) != DAQ_SUCCESS)
466 goto fail;
467
468 /* Open the netfilter netlink socket */
469 nfqc->nlsock = mnl_socket_open(NETLINK_NETFILTER);
470 if (!nfqc->nlsock)
471 {
472 SET_ERROR(modinst, "%s: Couldn't open netfilter netlink socket: %s (%d)",
473 __func__, strerror(errno), errno);
474 goto fail;
475 }
476 /* Cache the socket file descriptor for later use in the critical path for receive */
477 nfqc->nlsock_fd = mnl_socket_get_fd(nfqc->nlsock);
478
479 /* Implement the requested timeout by way of the receive timeout on the netlink socket */
480 nfqc->timeout = daq_base_api.config_get_timeout(modcfg);
481 if (nfqc->timeout)
482 {
483 struct timeval tv;
484 tv.tv_sec = nfqc->timeout / 1000;
485 tv.tv_usec = (nfqc->timeout % 1000) * 1000;
486 if (setsockopt(nfqc->nlsock_fd, SOL_SOCKET, SO_RCVTIMEO, (const void*)&tv, sizeof(tv)) == -1)
487 {
488 SET_ERROR(modinst, "%s: Couldn't set receive timeout on netlink socket: %s (%d)",
489 __func__, strerror(errno), errno);
490 goto fail;
491 }
492 }
493
494 /* Set the socket receive buffer to something reasonable based on the desired queue and capture lengths.
495 Try with FORCE first to allow overriding the system's global rmem_max, then fall back on being limited
496 by it if that doesn't work.
497 The value will be doubled to allow room for bookkeeping overhead, so the default of 1024 * 1500 will
498 end up allocating about 3MB of receive buffer space. The unmodified default tends to be around 208KB. */
499 unsigned int socket_rcvbuf_size = nfqc->queue_maxlen * nfqc->snaplen;
500 if (setsockopt(nfqc->nlsock_fd, SOL_SOCKET, SO_RCVBUFFORCE, &socket_rcvbuf_size, sizeof(socket_rcvbuf_size)) == -1)
501 {
502 if (setsockopt(nfqc->nlsock_fd, SOL_SOCKET, SO_RCVBUF, &socket_rcvbuf_size, sizeof(socket_rcvbuf_size)) == -1)
503 {
504 SET_ERROR(modinst, "%s: Couldn't set receive buffer size on netlink socket to %u: %s (%d)",
505 __func__, socket_rcvbuf_size, strerror(errno), errno);
506 goto fail;
507 }
508 }
509 if (nfqc->debug)
510 printf("Set socket receive buffer size to %u\n", socket_rcvbuf_size);
511
512 if (mnl_socket_bind(nfqc->nlsock, 0, MNL_SOCKET_AUTOPID) == -1)
513 {
514 SET_ERROR(modinst, "%s: Couldn't bind the netlink socket: %s (%d)",
515 __func__, strerror(errno), errno);
516 goto fail;
517 }
518 nfqc->portid = mnl_socket_get_portid(nfqc->nlsock);
519
520 struct nlmsghdr *nlh;
521
522 /* The following four packet family unbind/bind commands do nothing on modern (3.8+) kernels.
523 They used to handle binding the netfilter socket to a particular address family. */
524 nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET, NFQNL_CFG_CMD_PF_UNBIND, 0);
525 if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
526 {
527 SET_ERROR(modinst, "%s: Couldn't unbind from NFQ for AF_INET: %s (%d)",
528 __func__, strerror(errno), errno);
529 goto fail;
530 }
531 nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET6, NFQNL_CFG_CMD_PF_UNBIND, 0);
532 if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
533 {
534 SET_ERROR(modinst, "%s: Couldn't unbind from NFQ for AF_INET6: %s (%d)",
535 __func__, strerror(errno), errno);
536 goto fail;
537 }
538 nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET, NFQNL_CFG_CMD_PF_BIND, 0);
539 if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
540 {
541 SET_ERROR(modinst, "%s: Couldn't bind to NFQ for AF_INET: %s (%d)",
542 __func__, strerror(errno), errno);
543 goto fail;
544 }
545 nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET6, NFQNL_CFG_CMD_PF_BIND, 0);
546 if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
547 {
548 SET_ERROR(modinst, "%s: Couldn't bind to NFQ for AF_INET6: %s (%d)",
549 __func__, strerror(errno), errno);
550 goto fail;
551 }
552
553 /* Now, actually bind to the netfilter queue. The address family specified is irrelevant. */
554 nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_UNSPEC, NFQNL_CFG_CMD_BIND, nfqc->queue_num);
555 if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
556 {
557 SET_ERROR(modinst, "%s: Couldn't bind to NFQ queue %u: %s (%d)",
558 __func__, nfqc->queue_num, strerror(errno), errno);
559 goto fail;
560 }
561
562 /*
563 * Set the queue into packet copying mode with a max copying length of our snaplen.
564 * While we're building a configuration message, we might as well tack on our requested
565 * maximum queue length and enable delivery of packets that will be subject to GSO. That
566 * last bit means we'll potentially see packets larger than the device MTU prior to their
567 * trip through the segmentation offload path. They'll probably show up as truncated.
568 */
569 nlh = nfq_build_cfg_params(nfqc->nlmsg_buf, NFQNL_COPY_PACKET, nfqc->snaplen, nfqc->queue_num);
570 mnl_attr_put_u32(nlh, NFQA_CFG_QUEUE_MAXLEN, htonl(nfqc->queue_maxlen));
571 mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(NFQA_CFG_F_GSO));
572 mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(NFQA_CFG_F_GSO));
573 if (nfqc->fail_open)
574 {
575 mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(NFQA_CFG_F_FAIL_OPEN));
576 mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(NFQA_CFG_F_FAIL_OPEN));
577 }
578 if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
579 {
580 SET_ERROR(modinst, "%s: Couldn't configure NFQ parameters: %s (%d)",
581 __func__, strerror(errno), errno);
582 goto fail;
583 }
584
585 *ctxt_ptr = nfqc;
586
587 return DAQ_SUCCESS;
588
589 fail:
590 if (nfqc)
591 {
592 if (nfqc->nlsock)
593 mnl_socket_close(nfqc->nlsock);
594 if (nfqc->nlmsg_buf)
595 free(nfqc->nlmsg_buf);
596 destroy_packet_pool(nfqc);
597 free(nfqc);
598 }
599
600 return rval;
601 }
602
603 /* Module->destroy() */
nfq_daq_destroy(void * handle)604 static void nfq_daq_destroy(void *handle)
605 {
606 Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
607
608 if (nfqc->nlsock)
609 mnl_socket_close(nfqc->nlsock);
610 if (nfqc->nlmsg_buf)
611 free(nfqc->nlmsg_buf);
612 destroy_packet_pool(nfqc);
613 free(nfqc);
614 }
615
616 /* Module->start() */
nfq_daq_start(void * handle)617 static int nfq_daq_start(void *handle)
618 {
619 return DAQ_SUCCESS;
620 }
621
622 /* Module->interrupt() */
nfq_daq_interrupt(void * handle)623 static int nfq_daq_interrupt(void *handle)
624 {
625 Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
626
627 nfqc->interrupted = true;
628
629 return DAQ_SUCCESS;
630 }
631
632 /* Module->stop() */
nfq_daq_stop(void * handle)633 static int nfq_daq_stop(void *handle)
634 {
635 Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
636
637 struct nlmsghdr *nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET, NFQNL_CFG_CMD_UNBIND, nfqc->queue_num);
638 if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
639 {
640 SET_ERROR(nfqc->modinst, "%s: Couldn't bind to NFQ queue %u: %s (%d)",
641 __func__, nfqc->queue_num, strerror(errno), errno);
642 return DAQ_ERROR;
643 }
644 mnl_socket_close(nfqc->nlsock);
645 nfqc->nlsock = NULL;
646
647 return DAQ_SUCCESS;
648 }
649
650 /* Module->get_stats() */
nfq_daq_get_stats(void * handle,DAQ_Stats_t * stats)651 static int nfq_daq_get_stats(void *handle, DAQ_Stats_t *stats)
652 {
653 Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
654
655 /* There is no distinction between packets received by the hardware and those we saw. */
656 nfqc->stats.hw_packets_received = nfqc->stats.packets_received;
657
658 memcpy(stats, &nfqc->stats, sizeof(DAQ_Stats_t));
659
660 return DAQ_SUCCESS;
661 }
662
663 /* Module->reset_stats() */
nfq_daq_reset_stats(void * handle)664 static void nfq_daq_reset_stats(void *handle)
665 {
666 Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
667
668 memset(&nfqc->stats, 0, sizeof(DAQ_Stats_t));
669 }
670
671 /* Module->get_snaplen() */
nfq_daq_get_snaplen(void * handle)672 static int nfq_daq_get_snaplen(void *handle)
673 {
674 Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
675
676 return nfqc->snaplen;
677 }
678
679 /* Module->get_capabilities() */
nfq_daq_get_capabilities(void * handle)680 static uint32_t nfq_daq_get_capabilities(void *handle)
681 {
682 return DAQ_CAPA_BLOCK | DAQ_CAPA_REPLACE | DAQ_CAPA_INTERRUPT;
683 }
684
685 /* Module->get_datalink_type() */
nfq_daq_get_datalink_type(void * handle)686 static int nfq_daq_get_datalink_type(void *handle)
687 {
688 return DLT_RAW;
689 }
690
691 /* Module->msg_receive() */
nfq_daq_msg_receive(void * handle,const unsigned max_recv,const DAQ_Msg_t * msgs[],DAQ_RecvStatus * rstat)692 static unsigned nfq_daq_msg_receive(void *handle, const unsigned max_recv, const DAQ_Msg_t *msgs[], DAQ_RecvStatus *rstat)
693 {
694 Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
695 unsigned idx = 0;
696
697 *rstat = DAQ_RSTAT_OK;
698 while (idx < max_recv)
699 {
700 /* If the receive has been canceled, break out of the loop and return. */
701 if (nfqc->interrupted)
702 {
703 nfqc->interrupted = false;
704 *rstat = DAQ_RSTAT_INTERRUPTED;
705 break;
706 }
707
708 /* Make sure that we have a packet descriptor available to populate. */
709 NfqPktDesc *desc = nfqc->pool.freelist;
710 if (!desc)
711 {
712 *rstat = DAQ_RSTAT_NOBUF;
713 break;
714 }
715
716 ssize_t ret = nl_socket_recv(nfqc, desc->nlmsg_buf, nfqc->nlmsg_bufsize, idx == 0);
717 if (ret < 0)
718 {
719 if (errno == ENOBUFS)
720 {
721 nfqc->stats.hw_packets_dropped++;
722 continue;
723 }
724 else if (errno == EAGAIN || errno == EWOULDBLOCK)
725 *rstat = (idx == 0) ? DAQ_RSTAT_TIMEOUT : DAQ_RSTAT_WOULD_BLOCK;
726 else if (errno == EINTR)
727 {
728 if (!nfqc->interrupted)
729 continue;
730 nfqc->interrupted = false;
731 *rstat = DAQ_RSTAT_INTERRUPTED;
732 }
733 else
734 {
735 SET_ERROR(nfqc->modinst, "%s: Socket receive failed: %zd - %s (%d)",
736 __func__, ret, strerror(errno), errno);
737 *rstat = DAQ_RSTAT_ERROR;
738 }
739 break;
740 }
741 errno = 0;
742 ret = mnl_cb_run(desc->nlmsg_buf, ret, 0, nfqc->portid, process_message_cb, desc);
743 if (ret < 0)
744 {
745 SET_ERROR(nfqc->modinst, "%s: Netlink message processing failed: %zd - %s (%d)",
746 __func__, ret, strerror(errno), errno);
747 *rstat = DAQ_RSTAT_ERROR;
748 break;
749 }
750
751 /* Increment the module instance's packet counter. */
752 nfqc->stats.packets_received++;
753
754 /* Last, but not least, extract this descriptor from the free list and
755 place the message in the return vector. */
756 nfqc->pool.freelist = desc->next;
757 desc->next = NULL;
758 nfqc->pool.info.available--;
759 msgs[idx] = &desc->msg;
760
761 idx++;
762 }
763
764 return idx;
765 }
766
767 /* Module->msg_finalize() */
nfq_daq_msg_finalize(void * handle,const DAQ_Msg_t * msg,DAQ_Verdict verdict)768 static int nfq_daq_msg_finalize(void *handle, const DAQ_Msg_t *msg, DAQ_Verdict verdict)
769 {
770 Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
771 NfqPktDesc *desc = (NfqPktDesc *) msg->priv;
772
773 /* Sanitize the verdict. */
774 if (verdict >= MAX_DAQ_VERDICT)
775 verdict = DAQ_VERDICT_PASS;
776 nfqc->stats.verdicts[verdict]++;
777 verdict = verdict_translation_table[verdict];
778
779 /* Send the verdict back to the kernel through netlink */
780 /* FIXIT-L Consider using an iovec for scatter/gather transmission with the new payload as a
781 separate entry. This would avoid a copy and potentially avoid buffer size restrictions.
782 Only as relevant as REPLACE is common. */
783 uint32_t plen = (verdict == DAQ_VERDICT_REPLACE) ? msg->data_len : 0;
784 int nfq_verdict = (verdict == DAQ_VERDICT_PASS || verdict == DAQ_VERDICT_REPLACE) ? NF_ACCEPT : NF_DROP;;
785 struct nlmsghdr *nlh = nfq_build_verdict(nfqc->nlmsg_buf, ntohl(desc->nlph->packet_id), nfqc->queue_num,
786 nfq_verdict, plen, msg->data);
787 if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
788 {
789 SET_ERROR(nfqc->modinst, "%s: Couldn't send NFQ verdict: %s (%d)",
790 __func__, strerror(errno), errno);
791 return DAQ_ERROR;
792 }
793
794 /* Toss the descriptor back on the free list for reuse.
795 Make sure to clear out the netlink message header to show that it is unused. */
796 desc->nlmh = NULL;
797 desc->next = nfqc->pool.freelist;
798 nfqc->pool.freelist = desc;
799 nfqc->pool.info.available++;
800
801 return DAQ_SUCCESS;
802 }
803
nfq_daq_get_msg_pool_info(void * handle,DAQ_MsgPoolInfo_t * info)804 static int nfq_daq_get_msg_pool_info(void *handle, DAQ_MsgPoolInfo_t *info)
805 {
806 Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
807
808 *info = nfqc->pool.info;
809
810 return DAQ_SUCCESS;
811 }
812
813 #ifdef BUILDING_SO
814 DAQ_SO_PUBLIC const DAQ_ModuleAPI_t DAQ_MODULE_DATA =
815 #else
816 const DAQ_ModuleAPI_t nfq_daq_module_data =
817 #endif
818 {
819 /* .api_version = */ DAQ_MODULE_API_VERSION,
820 /* .api_size = */ sizeof(DAQ_ModuleAPI_t),
821 /* .module_version = */ DAQ_NFQ_VERSION,
822 /* .name = */ "nfq",
823 /* .type = */ DAQ_TYPE_INTF_CAPABLE | DAQ_TYPE_INLINE_CAPABLE | DAQ_TYPE_MULTI_INSTANCE | DAQ_TYPE_NO_UNPRIV,
824 /* .load = */ nfq_daq_module_load,
825 /* .unload = */ nfq_daq_module_unload,
826 /* .get_variable_descs = */ nfq_daq_get_variable_descs,
827 /* .instantiate = */ nfq_daq_instantiate,
828 /* .destroy = */ nfq_daq_destroy,
829 /* .set_filter = */ NULL,
830 /* .start = */ nfq_daq_start,
831 /* .inject = */ NULL,
832 /* .inject_relative = */ NULL,
833 /* .interrupt = */ nfq_daq_interrupt,
834 /* .stop = */ nfq_daq_stop,
835 /* .ioctl = */ NULL,
836 /* .get_stats = */ nfq_daq_get_stats,
837 /* .reset_stats = */ nfq_daq_reset_stats,
838 /* .get_snaplen = */ nfq_daq_get_snaplen,
839 /* .get_capabilities = */ nfq_daq_get_capabilities,
840 /* .get_datalink_type = */ nfq_daq_get_datalink_type,
841 /* .config_load = */ NULL,
842 /* .config_swap = */ NULL,
843 /* .config_free = */ NULL,
844 /* .msg_receive = */ nfq_daq_msg_receive,
845 /* .msg_finalize = */ nfq_daq_msg_finalize,
846 /* .get_msg_pool_info = */ nfq_daq_get_msg_pool_info,
847 };
848