1 /*
2 ** Copyright (C) 2018-2021 Cisco and/or its affiliates. All rights reserved.
3 ** Author: Michael R. Altizer <mialtize@cisco.com>
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License Version 2 as
7 ** published by the Free Software Foundation.  You may not use, modify or
8 ** distribute this program under any other version of the GNU General
9 ** Public License.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 ** GNU General Public License for more details.
15 **
16 ** You should have received a copy of the GNU General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
19 */
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include <arpa/inet.h>
26 
27 #include <errno.h>
28 #include <linux/netfilter.h>
29 #include <linux/netfilter/nfnetlink_queue.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/time.h>
33 
34 #include <libmnl/libmnl.h>
35 
36 #include "daq_dlt.h"
37 #include "daq_module_api.h"
38 
39 /* FIXIT-M Need to figure out how to reimplement inject for NFQ */
40 
41 #define DAQ_NFQ_VERSION 8
42 
43 #define NFQ_DEFAULT_POOL_SIZE   16
44 #define DEFAULT_QUEUE_MAXLEN    1024   // Based on NFQNL_QMAX_DEFAULT from nfnetlnk_queue_core.c
45 
46 #define SET_ERROR(modinst, ...)    daq_base_api.set_errbuf(modinst, __VA_ARGS__)
47 
48 typedef struct _nfq_pkt_desc
49 {
50     DAQ_Msg_t msg;
51     DAQ_PktHdr_t pkthdr;
52     uint8_t *nlmsg_buf;
53     const struct nlmsghdr *nlmh;
54     struct nfqnl_msg_packet_hdr *nlph;
55     struct _nfq_pkt_desc *next;
56 } NfqPktDesc;
57 
58 typedef struct _nfq_msg_pool
59 {
60     NfqPktDesc *pool;
61     NfqPktDesc *freelist;
62     DAQ_MsgPoolInfo_t info;
63 } NfqMsgPool;
64 
65 typedef struct _nfq_context
66 {
67     /* Configuration */
68     unsigned queue_num;
69     int snaplen;
70     int timeout;
71     unsigned queue_maxlen;
72     bool fail_open;
73     bool debug;
74     /* State */
75     DAQ_ModuleInstance_h modinst;
76     DAQ_Stats_t stats;
77     NfqMsgPool pool;
78     char *nlmsg_buf;
79     size_t nlmsg_bufsize;
80     struct mnl_socket *nlsock;
81     int nlsock_fd;
82     unsigned portid;
83     volatile bool interrupted;
84 } Nfq_Context_t;
85 
86 static DAQ_VariableDesc_t nfq_variable_descriptions[] = {
87     { "debug", "Enable debugging output to stdout", DAQ_VAR_DESC_FORBIDS_ARGUMENT },
88     { "fail_open", "Allow the kernel to bypass the netfilter queue when it is full", DAQ_VAR_DESC_FORBIDS_ARGUMENT },
89     { "queue_maxlen", "Maximum queue length (default: 1024)", DAQ_VAR_DESC_REQUIRES_ARGUMENT },
90 };
91 
92 static const DAQ_Verdict verdict_translation_table[MAX_DAQ_VERDICT] = {
93     DAQ_VERDICT_PASS,       /* DAQ_VERDICT_PASS */
94     DAQ_VERDICT_BLOCK,      /* DAQ_VERDICT_BLOCK */
95     DAQ_VERDICT_REPLACE,    /* DAQ_VERDICT_REPLACE */
96     DAQ_VERDICT_PASS,       /* DAQ_VERDICT_WHITELIST */
97     DAQ_VERDICT_BLOCK,      /* DAQ_VERDICT_BLACKLIST */
98     DAQ_VERDICT_PASS        /* DAQ_VERDICT_IGNORE */
99 };
100 
101 static DAQ_BaseAPI_t daq_base_api;
102 
103 
104 /*
105  * Private Functions
106  */
107 
destroy_packet_pool(Nfq_Context_t * nfqc)108 static void destroy_packet_pool(Nfq_Context_t *nfqc)
109 {
110     NfqMsgPool *pool = &nfqc->pool;
111     if (pool->pool)
112     {
113         while (pool->info.size > 0)
114             free(pool->pool[--pool->info.size].nlmsg_buf);
115         free(pool->pool);
116         pool->pool = NULL;
117     }
118     pool->freelist = NULL;
119     pool->info.available = 0;
120     pool->info.mem_size = 0;
121 }
122 
create_packet_pool(Nfq_Context_t * nfqc,unsigned size)123 static int create_packet_pool(Nfq_Context_t *nfqc, unsigned size)
124 {
125     NfqMsgPool *pool = &nfqc->pool;
126     pool->pool = calloc(sizeof(NfqPktDesc), size);
127     if (!pool->pool)
128     {
129         SET_ERROR(nfqc->modinst, "%s: Could not allocate %zu bytes for a packet descriptor pool!",
130                 __func__, sizeof(NfqPktDesc) * size);
131         return DAQ_ERROR_NOMEM;
132     }
133     pool->info.mem_size = sizeof(NfqPktDesc) * size;
134     while (pool->info.size < size)
135     {
136         /* Allocate netlink message receive buffer and set up descriptor */
137         NfqPktDesc *desc = &pool->pool[pool->info.size];
138         desc->nlmsg_buf = malloc(nfqc->nlmsg_bufsize);
139         if (!desc->nlmsg_buf)
140         {
141             SET_ERROR(nfqc->modinst, "%s: Could not allocate %zu bytes for a packet descriptor message buffer!",
142                     __func__, nfqc->nlmsg_bufsize);
143             return DAQ_ERROR_NOMEM;
144         }
145         pool->info.mem_size += nfqc->nlmsg_bufsize;
146 
147         /* Initialize non-zero invariant packet header fields. */
148         DAQ_PktHdr_t *pkthdr = &desc->pkthdr;
149         pkthdr->ingress_group = DAQ_PKTHDR_UNKNOWN;
150         pkthdr->egress_group = DAQ_PKTHDR_UNKNOWN;
151 
152         /* Initialize non-zero invariant message header fields. */
153         DAQ_Msg_t *msg = &desc->msg;
154         msg->type = DAQ_MSG_TYPE_PACKET;
155         msg->hdr_len = sizeof(desc->pkthdr);
156         msg->hdr = &desc->pkthdr;
157         msg->owner = nfqc->modinst;
158         msg->priv = desc;
159 
160         /* Place it on the free list */
161         desc->next = nfqc->pool.freelist;
162         nfqc->pool.freelist = desc;
163 
164         pool->info.size++;
165     }
166     pool->info.available = pool->info.size;
167     return DAQ_SUCCESS;
168 }
169 
170 /* Netlink message building routines vaguely lifted from libmnl's netfilter queue example
171     (nf-queue.c) to avoid having to link the seemingly deprecated libnetfilter_queue (which uses
172     libmnl anyway). */
nfq_hdr_put(char * buf,int type,uint32_t queue_num)173 static inline struct nlmsghdr *nfq_hdr_put(char *buf, int type, uint32_t queue_num)
174 {
175     struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
176     nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | type;
177     nlh->nlmsg_flags = NLM_F_REQUEST;
178 
179     struct nfgenmsg *nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
180     nfg->nfgen_family = AF_UNSPEC;
181     nfg->version = NFNETLINK_V0;
182     nfg->res_id = htons(queue_num);
183 
184     return nlh;
185 }
186 
nfq_build_cfg_command(char * buf,uint16_t pf,uint8_t command,int queue_num)187 static struct nlmsghdr *nfq_build_cfg_command(char *buf, uint16_t pf, uint8_t command, int queue_num)
188 {
189     struct nlmsghdr *nlh = nfq_hdr_put(buf, NFQNL_MSG_CONFIG, queue_num);
190     struct nfqnl_msg_config_cmd cmd = {
191         .command = command,
192         .pf = htons(pf),
193     };
194     mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd);
195 
196     return nlh;
197 }
198 
nfq_build_cfg_params(char * buf,uint8_t mode,int range,int queue_num)199 static struct nlmsghdr *nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
200 {
201     struct nlmsghdr *nlh = nfq_hdr_put(buf, NFQNL_MSG_CONFIG, queue_num);
202     struct nfqnl_msg_config_params params = {
203         .copy_range = htonl(range),
204         .copy_mode = mode,
205     };
206     mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), &params);
207 
208     return nlh;
209 }
210 
nfq_build_verdict(char * buf,int id,int queue_num,int verd,uint32_t plen,uint8_t * pkt)211 static struct nlmsghdr *nfq_build_verdict(char *buf, int id, int queue_num, int verd, uint32_t plen, uint8_t *pkt)
212 {
213     struct nlmsghdr *nlh = nfq_hdr_put(buf, NFQNL_MSG_VERDICT, queue_num);
214     struct nfqnl_msg_verdict_hdr vh = {
215         .verdict = htonl(verd),
216         .id = htonl(id),
217     };
218     mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh);
219     if (plen)
220         mnl_attr_put(nlh, NFQA_PAYLOAD, plen, pkt);
221 
222     return nlh;
223 }
224 
225 /* Oh, don't mind me; I'm just reimplementing all of mnl_socket_recvfrom so that I can pass in
226     a single flag to recvmsg (MSG_DONTWAIT). */
nl_socket_recv(const Nfq_Context_t * nfqc,void * buf,size_t bufsiz,bool blocking)227 static ssize_t nl_socket_recv(const Nfq_Context_t *nfqc, void *buf, size_t bufsiz, bool blocking)
228 {
229     ssize_t ret;
230     struct sockaddr_nl addr;
231     struct iovec iov = {
232         .iov_base   = buf,
233         .iov_len    = bufsiz,
234     };
235     struct msghdr msg = {
236         .msg_name   = &addr,
237         .msg_namelen    = sizeof(struct sockaddr_nl),
238         .msg_iov    = &iov,
239         .msg_iovlen = 1,
240         .msg_control    = NULL,
241         .msg_controllen = 0,
242         .msg_flags  = 0,
243     };
244     ret = recvmsg(nfqc->nlsock_fd, &msg, blocking ? 0 : MSG_DONTWAIT);
245     if (ret == -1)
246         return ret;
247 
248     if (msg.msg_flags & MSG_TRUNC) {
249         errno = ENOSPC;
250         return -1;
251     }
252     if (msg.msg_namelen != sizeof(struct sockaddr_nl)) {
253         errno = EINVAL;
254         return -1;
255     }
256     return ret;
257 }
258 
parse_attr_cb(const struct nlattr * attr,void * data)259 static int parse_attr_cb(const struct nlattr *attr, void *data)
260 {
261     const struct nlattr **tb = data;
262     int type = mnl_attr_get_type(attr);
263 
264     /* skip unsupported attribute in user-space */
265     if (mnl_attr_type_valid(attr, NFQA_MAX) < 0)
266         return MNL_CB_OK;
267 
268     switch(type) {
269         case NFQA_MARK:
270         case NFQA_IFINDEX_INDEV:
271         case NFQA_IFINDEX_OUTDEV:
272         case NFQA_IFINDEX_PHYSINDEV:
273         case NFQA_IFINDEX_PHYSOUTDEV:
274         case NFQA_CAP_LEN:
275         case NFQA_SKB_INFO:
276         case NFQA_SECCTX:
277         case NFQA_UID:
278         case NFQA_GID:
279         case NFQA_CT_INFO:
280             if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
281                 return MNL_CB_ERROR;
282             break;
283         case NFQA_TIMESTAMP:
284             if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
285                         sizeof(struct nfqnl_msg_packet_timestamp)) < 0) {
286                 return MNL_CB_ERROR;
287             }
288             break;
289         case NFQA_HWADDR:
290             if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
291                         sizeof(struct nfqnl_msg_packet_hw)) < 0) {
292                 return MNL_CB_ERROR;
293             }
294             break;
295         case NFQA_PACKET_HDR:
296             if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
297                         sizeof(struct nfqnl_msg_packet_hdr)) < 0) {
298                 return MNL_CB_ERROR;
299             }
300             break;
301         case NFQA_PAYLOAD:
302         case NFQA_CT:
303         case NFQA_EXP:
304             break;
305     }
306     tb[type] = attr;
307     return MNL_CB_OK;
308 }
309 
process_message_cb(const struct nlmsghdr * nlh,void * data)310 static int process_message_cb(const struct nlmsghdr *nlh, void *data)
311 {
312     NfqPktDesc *desc = (NfqPktDesc *) data;
313     struct nlattr *attr[NFQA_MAX+1] = { };
314     int ret;
315 
316     /* FIXIT-L In the event that there is actually more than one packet per message, handle it gracefully.
317         I haven't actually seen this happen yet. */
318     if (desc->nlmh)
319         return MNL_CB_ERROR;
320 
321     /* Parse the message attributes */
322     if ((ret = mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, attr)) != MNL_CB_OK)
323         return ret;
324 
325     /* Populate the packet descriptor */
326     desc->nlmh = nlh;
327     desc->nlph = mnl_attr_get_payload(attr[NFQA_PACKET_HDR]);
328 
329     /* Set up the DAQ message and packet headers.  Most fields are prepopulated and unchanging. */
330     DAQ_Msg_t *msg = &desc->msg;
331     msg->data = mnl_attr_get_payload(attr[NFQA_PAYLOAD]);
332 
333     DAQ_PktHdr_t *pkthdr = &desc->pkthdr;
334     pkthdr->pktlen = mnl_attr_get_payload_len(attr[NFQA_PAYLOAD]);
335     if (attr[NFQA_CAP_LEN])
336         msg->data_len = ntohl(mnl_attr_get_u32(attr[NFQA_CAP_LEN]));
337     else
338         msg->data_len = pkthdr->pktlen;
339     /*
340      * FIXIT-M Implement getting timestamps from the message if it happens to have that attribute
341     if (attr[NFQA_TIMESTAMP])
342     {
343         struct nfqnl_msg_packet_timestamp *qpt = (struct nfqnl_msg_packet_timestamp *) mnl_attr_get_payload(attr[NFQA_TIMESTAMP]);
344         ...
345     }
346     else
347     */
348         gettimeofday(&pkthdr->ts, NULL);
349     if (attr[NFQA_IFINDEX_INDEV])
350         pkthdr->ingress_index = ntohl(mnl_attr_get_u32(attr[NFQA_IFINDEX_INDEV]));
351     else
352         pkthdr->ingress_index = DAQ_PKTHDR_UNKNOWN;
353     if (attr[NFQA_IFINDEX_OUTDEV])
354         pkthdr->egress_index = ntohl(mnl_attr_get_u32(attr[NFQA_IFINDEX_OUTDEV]));
355     else
356         pkthdr->egress_index = DAQ_PKTHDR_UNKNOWN;
357 
358     return MNL_CB_OK;
359 }
360 
361 
362 /*
363  * DAQ Module API Implementation
364  */
365 
366 /* Module->load() */
nfq_daq_module_load(const DAQ_BaseAPI_t * base_api)367 static int nfq_daq_module_load(const DAQ_BaseAPI_t *base_api)
368 {
369     if (base_api->api_version != DAQ_BASE_API_VERSION || base_api->api_size != sizeof(DAQ_BaseAPI_t))
370         return DAQ_ERROR;
371 
372     daq_base_api = *base_api;
373 
374     return DAQ_SUCCESS;
375 }
376 
377 /* Module->unload() */
nfq_daq_module_unload(void)378 static int nfq_daq_module_unload(void)
379 {
380     memset(&daq_base_api, 0, sizeof(daq_base_api));
381     return DAQ_SUCCESS;
382 }
383 
384 /* Module->get_variable_descs() */
nfq_daq_get_variable_descs(const DAQ_VariableDesc_t ** var_desc_table)385 static int nfq_daq_get_variable_descs(const DAQ_VariableDesc_t **var_desc_table)
386 {
387     *var_desc_table = nfq_variable_descriptions;
388 
389     return sizeof(nfq_variable_descriptions) / sizeof(DAQ_VariableDesc_t);
390 }
391 
392 /* Module->instantiate() */
nfq_daq_instantiate(const DAQ_ModuleConfig_h modcfg,DAQ_ModuleInstance_h modinst,void ** ctxt_ptr)393 static int nfq_daq_instantiate(const DAQ_ModuleConfig_h modcfg, DAQ_ModuleInstance_h modinst, void **ctxt_ptr)
394 {
395     Nfq_Context_t *nfqc;
396     int rval = DAQ_ERROR;
397 
398     nfqc = calloc(1, sizeof(Nfq_Context_t));
399     if (!nfqc)
400     {
401         SET_ERROR(modinst, "%s: Couldn't allocate memory for the new NFQ context", __func__);
402         return DAQ_ERROR_NOMEM;
403     }
404     nfqc->modinst = modinst;
405 
406     nfqc->queue_maxlen = DEFAULT_QUEUE_MAXLEN;
407 
408     char *endptr;
409     errno = 0;
410     nfqc->queue_num = strtoul(daq_base_api.config_get_input(modcfg), &endptr, 10);
411     if (*endptr != '\0' || errno != 0)
412     {
413         SET_ERROR(modinst, "%s: Invalid queue number specified: '%s'",
414                 __func__, daq_base_api.config_get_input(modcfg));
415         rval = DAQ_ERROR_INVAL;
416         goto fail;
417     }
418 
419     const char *varKey, *varValue;
420     daq_base_api.config_first_variable(modcfg, &varKey, &varValue);
421     while (varKey)
422     {
423         if (!strcmp(varKey, "debug"))
424             nfqc->debug = true;
425         else if (!strcmp(varKey, "fail_open"))
426             nfqc->fail_open = true;
427         else if (!strcmp(varKey, "queue_maxlen"))
428         {
429             errno = 0;
430             nfqc->queue_maxlen = strtol(varValue, NULL, 10);
431             if (*endptr != '\0' || errno != 0)
432             {
433                 SET_ERROR(modinst, "%s: Invalid value for key '%s': '%s'",
434                         __func__, varKey, varValue);
435                 rval = DAQ_ERROR_INVAL;
436                 goto fail;
437             }
438         }
439 
440         daq_base_api.config_next_variable(modcfg, &varKey, &varValue);
441     }
442 
443     nfqc->snaplen = daq_base_api.config_get_snaplen(modcfg);
444 
445     /* Largest desired packet payload plus netlink data overhead - this is probably overkill
446         (the libnetfilter_queue example inexplicably halves MNL_SOCKET_BUFFER_SIZE), but it
447         should be safe from truncation.  */
448     nfqc->nlmsg_bufsize = nfqc->snaplen + MNL_SOCKET_BUFFER_SIZE;
449     if (nfqc->debug)
450         printf("Netlink message buffer size is %zu\n", nfqc->nlmsg_bufsize);
451 
452     /* Allocate a scratch buffer for general usage by the context (basically for anything that's not
453         receiving a packet) */
454     nfqc->nlmsg_buf = malloc(nfqc->nlmsg_bufsize);
455     if (!nfqc->nlmsg_buf)
456     {
457         SET_ERROR(modinst, "%s: Couldn't allocate %zu bytes for a general use buffer",
458                 __func__, nfqc->nlmsg_bufsize);
459         rval = DAQ_ERROR_NOMEM;
460         goto fail;
461     }
462 
463     /* Netlink message buffer length must be determined prior to creating packet pool */
464     uint32_t pool_size = daq_base_api.config_get_msg_pool_size(modcfg);
465     if ((rval = create_packet_pool(nfqc, pool_size ? pool_size : NFQ_DEFAULT_POOL_SIZE)) != DAQ_SUCCESS)
466         goto fail;
467 
468     /* Open the netfilter netlink socket */
469     nfqc->nlsock = mnl_socket_open(NETLINK_NETFILTER);
470     if (!nfqc->nlsock)
471     {
472         SET_ERROR(modinst, "%s: Couldn't open netfilter netlink socket: %s (%d)",
473                 __func__, strerror(errno), errno);
474         goto fail;
475     }
476     /* Cache the socket file descriptor for later use in the critical path for receive */
477     nfqc->nlsock_fd = mnl_socket_get_fd(nfqc->nlsock);
478 
479     /* Implement the requested timeout by way of the receive timeout on the netlink socket */
480     nfqc->timeout = daq_base_api.config_get_timeout(modcfg);
481     if (nfqc->timeout)
482     {
483         struct timeval tv;
484         tv.tv_sec = nfqc->timeout / 1000;
485         tv.tv_usec = (nfqc->timeout % 1000) * 1000;
486         if (setsockopt(nfqc->nlsock_fd, SOL_SOCKET, SO_RCVTIMEO, (const void*)&tv, sizeof(tv)) == -1)
487         {
488             SET_ERROR(modinst, "%s: Couldn't set receive timeout on netlink socket: %s (%d)",
489                     __func__, strerror(errno), errno);
490             goto fail;
491         }
492     }
493 
494     /* Set the socket receive buffer to something reasonable based on the desired queue and capture lengths.
495         Try with FORCE first to allow overriding the system's global rmem_max, then fall back on being limited
496         by it if that doesn't work.
497         The value will be doubled to allow room for bookkeeping overhead, so the default of 1024 * 1500 will
498         end up allocating about 3MB of receive buffer space.  The unmodified default tends to be around 208KB. */
499     unsigned int socket_rcvbuf_size = nfqc->queue_maxlen * nfqc->snaplen;
500     if (setsockopt(nfqc->nlsock_fd, SOL_SOCKET, SO_RCVBUFFORCE, &socket_rcvbuf_size, sizeof(socket_rcvbuf_size)) == -1)
501     {
502         if (setsockopt(nfqc->nlsock_fd, SOL_SOCKET, SO_RCVBUF, &socket_rcvbuf_size, sizeof(socket_rcvbuf_size)) == -1)
503         {
504             SET_ERROR(modinst, "%s: Couldn't set receive buffer size on netlink socket to %u: %s (%d)",
505                     __func__, socket_rcvbuf_size, strerror(errno), errno);
506             goto fail;
507         }
508     }
509     if (nfqc->debug)
510         printf("Set socket receive buffer size to %u\n", socket_rcvbuf_size);
511 
512     if (mnl_socket_bind(nfqc->nlsock, 0, MNL_SOCKET_AUTOPID) == -1)
513     {
514         SET_ERROR(modinst, "%s: Couldn't bind the netlink socket: %s (%d)",
515                 __func__, strerror(errno), errno);
516         goto fail;
517     }
518     nfqc->portid = mnl_socket_get_portid(nfqc->nlsock);
519 
520     struct nlmsghdr *nlh;
521 
522     /* The following four packet family unbind/bind commands do nothing on modern (3.8+) kernels.
523         They used to handle binding the netfilter socket to a particular address family. */
524     nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET, NFQNL_CFG_CMD_PF_UNBIND, 0);
525     if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
526     {
527         SET_ERROR(modinst, "%s: Couldn't unbind from NFQ for AF_INET: %s (%d)",
528                 __func__, strerror(errno), errno);
529         goto fail;
530     }
531     nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET6, NFQNL_CFG_CMD_PF_UNBIND, 0);
532     if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
533     {
534         SET_ERROR(modinst, "%s: Couldn't unbind from NFQ for AF_INET6: %s (%d)",
535                 __func__, strerror(errno), errno);
536         goto fail;
537     }
538     nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET, NFQNL_CFG_CMD_PF_BIND, 0);
539     if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
540     {
541         SET_ERROR(modinst, "%s: Couldn't bind to NFQ for AF_INET: %s (%d)",
542                 __func__, strerror(errno), errno);
543         goto fail;
544     }
545     nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET6, NFQNL_CFG_CMD_PF_BIND, 0);
546     if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
547     {
548         SET_ERROR(modinst, "%s: Couldn't bind to NFQ for AF_INET6: %s (%d)",
549                 __func__, strerror(errno), errno);
550         goto fail;
551     }
552 
553     /* Now, actually bind to the netfilter queue.  The address family specified is irrelevant. */
554     nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_UNSPEC, NFQNL_CFG_CMD_BIND, nfqc->queue_num);
555     if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
556     {
557         SET_ERROR(modinst, "%s: Couldn't bind to NFQ queue %u: %s (%d)",
558                 __func__, nfqc->queue_num, strerror(errno), errno);
559         goto fail;
560     }
561 
562     /*
563      * Set the queue into packet copying mode with a max copying length of our snaplen.
564      * While we're building a configuration message, we might as well tack on our requested
565      * maximum queue length and enable delivery of packets that will be subject to GSO. That
566      * last bit means we'll potentially see packets larger than the device MTU prior to their
567      * trip through the segmentation offload path.  They'll probably show up as truncated.
568      */
569     nlh = nfq_build_cfg_params(nfqc->nlmsg_buf, NFQNL_COPY_PACKET, nfqc->snaplen, nfqc->queue_num);
570     mnl_attr_put_u32(nlh, NFQA_CFG_QUEUE_MAXLEN, htonl(nfqc->queue_maxlen));
571     mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(NFQA_CFG_F_GSO));
572     mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(NFQA_CFG_F_GSO));
573     if (nfqc->fail_open)
574     {
575         mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(NFQA_CFG_F_FAIL_OPEN));
576         mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(NFQA_CFG_F_FAIL_OPEN));
577     }
578     if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
579     {
580         SET_ERROR(modinst, "%s: Couldn't configure NFQ parameters: %s (%d)",
581                 __func__, strerror(errno), errno);
582         goto fail;
583     }
584 
585     *ctxt_ptr = nfqc;
586 
587     return DAQ_SUCCESS;
588 
589 fail:
590     if (nfqc)
591     {
592         if (nfqc->nlsock)
593             mnl_socket_close(nfqc->nlsock);
594         if (nfqc->nlmsg_buf)
595             free(nfqc->nlmsg_buf);
596         destroy_packet_pool(nfqc);
597         free(nfqc);
598     }
599 
600     return rval;
601 }
602 
603 /* Module->destroy() */
nfq_daq_destroy(void * handle)604 static void nfq_daq_destroy(void *handle)
605 {
606     Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
607 
608     if (nfqc->nlsock)
609         mnl_socket_close(nfqc->nlsock);
610     if (nfqc->nlmsg_buf)
611         free(nfqc->nlmsg_buf);
612     destroy_packet_pool(nfqc);
613     free(nfqc);
614 }
615 
616 /* Module->start() */
nfq_daq_start(void * handle)617 static int nfq_daq_start(void *handle)
618 {
619     return DAQ_SUCCESS;
620 }
621 
622 /* Module->interrupt() */
nfq_daq_interrupt(void * handle)623 static int nfq_daq_interrupt(void *handle)
624 {
625     Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
626 
627     nfqc->interrupted = true;
628 
629     return DAQ_SUCCESS;
630 }
631 
632 /* Module->stop() */
nfq_daq_stop(void * handle)633 static int nfq_daq_stop(void *handle)
634 {
635     Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
636 
637     struct nlmsghdr *nlh = nfq_build_cfg_command(nfqc->nlmsg_buf, AF_INET, NFQNL_CFG_CMD_UNBIND, nfqc->queue_num);
638     if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
639     {
640         SET_ERROR(nfqc->modinst, "%s: Couldn't bind to NFQ queue %u: %s (%d)",
641                 __func__, nfqc->queue_num, strerror(errno), errno);
642         return DAQ_ERROR;
643     }
644     mnl_socket_close(nfqc->nlsock);
645     nfqc->nlsock = NULL;
646 
647     return DAQ_SUCCESS;
648 }
649 
650 /* Module->get_stats() */
nfq_daq_get_stats(void * handle,DAQ_Stats_t * stats)651 static int nfq_daq_get_stats(void *handle, DAQ_Stats_t *stats)
652 {
653     Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
654 
655     /* There is no distinction between packets received by the hardware and those we saw. */
656     nfqc->stats.hw_packets_received = nfqc->stats.packets_received;
657 
658     memcpy(stats, &nfqc->stats, sizeof(DAQ_Stats_t));
659 
660     return DAQ_SUCCESS;
661 }
662 
663 /* Module->reset_stats() */
nfq_daq_reset_stats(void * handle)664 static void nfq_daq_reset_stats(void *handle)
665 {
666     Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
667 
668     memset(&nfqc->stats, 0, sizeof(DAQ_Stats_t));
669 }
670 
671 /* Module->get_snaplen() */
nfq_daq_get_snaplen(void * handle)672 static int nfq_daq_get_snaplen(void *handle)
673 {
674     Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
675 
676     return nfqc->snaplen;
677 }
678 
679 /* Module->get_capabilities() */
nfq_daq_get_capabilities(void * handle)680 static uint32_t nfq_daq_get_capabilities(void *handle)
681 {
682     return DAQ_CAPA_BLOCK | DAQ_CAPA_REPLACE | DAQ_CAPA_INTERRUPT;
683 }
684 
685 /* Module->get_datalink_type() */
nfq_daq_get_datalink_type(void * handle)686 static int nfq_daq_get_datalink_type(void *handle)
687 {
688     return DLT_RAW;
689 }
690 
691 /* Module->msg_receive() */
nfq_daq_msg_receive(void * handle,const unsigned max_recv,const DAQ_Msg_t * msgs[],DAQ_RecvStatus * rstat)692 static unsigned nfq_daq_msg_receive(void *handle, const unsigned max_recv, const DAQ_Msg_t *msgs[], DAQ_RecvStatus *rstat)
693 {
694     Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
695     unsigned idx = 0;
696 
697     *rstat = DAQ_RSTAT_OK;
698     while (idx < max_recv)
699     {
700         /* If the receive has been canceled, break out of the loop and return. */
701         if (nfqc->interrupted)
702         {
703             nfqc->interrupted = false;
704             *rstat = DAQ_RSTAT_INTERRUPTED;
705             break;
706         }
707 
708         /* Make sure that we have a packet descriptor available to populate. */
709         NfqPktDesc *desc = nfqc->pool.freelist;
710         if (!desc)
711         {
712             *rstat = DAQ_RSTAT_NOBUF;
713             break;
714         }
715 
716         ssize_t ret = nl_socket_recv(nfqc, desc->nlmsg_buf, nfqc->nlmsg_bufsize, idx == 0);
717         if (ret < 0)
718         {
719             if (errno == ENOBUFS)
720             {
721                 nfqc->stats.hw_packets_dropped++;
722                 continue;
723             }
724             else if (errno == EAGAIN || errno == EWOULDBLOCK)
725                 *rstat = (idx == 0) ? DAQ_RSTAT_TIMEOUT : DAQ_RSTAT_WOULD_BLOCK;
726             else if (errno == EINTR)
727             {
728                 if (!nfqc->interrupted)
729                     continue;
730                 nfqc->interrupted = false;
731                 *rstat = DAQ_RSTAT_INTERRUPTED;
732             }
733             else
734             {
735                 SET_ERROR(nfqc->modinst, "%s: Socket receive failed: %zd - %s (%d)",
736                         __func__, ret, strerror(errno), errno);
737                 *rstat = DAQ_RSTAT_ERROR;
738             }
739             break;
740         }
741         errno = 0;
742         ret = mnl_cb_run(desc->nlmsg_buf, ret, 0, nfqc->portid, process_message_cb, desc);
743         if (ret < 0)
744         {
745             SET_ERROR(nfqc->modinst, "%s: Netlink message processing failed: %zd - %s (%d)",
746                     __func__, ret, strerror(errno), errno);
747             *rstat = DAQ_RSTAT_ERROR;
748             break;
749         }
750 
751         /* Increment the module instance's packet counter. */
752         nfqc->stats.packets_received++;
753 
754         /* Last, but not least, extract this descriptor from the free list and
755             place the message in the return vector. */
756         nfqc->pool.freelist = desc->next;
757         desc->next = NULL;
758         nfqc->pool.info.available--;
759         msgs[idx] = &desc->msg;
760 
761         idx++;
762     }
763 
764     return idx;
765 }
766 
767 /* Module->msg_finalize() */
nfq_daq_msg_finalize(void * handle,const DAQ_Msg_t * msg,DAQ_Verdict verdict)768 static int nfq_daq_msg_finalize(void *handle, const DAQ_Msg_t *msg, DAQ_Verdict verdict)
769 {
770     Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
771     NfqPktDesc *desc = (NfqPktDesc *) msg->priv;
772 
773     /* Sanitize the verdict. */
774     if (verdict >= MAX_DAQ_VERDICT)
775         verdict = DAQ_VERDICT_PASS;
776     nfqc->stats.verdicts[verdict]++;
777     verdict = verdict_translation_table[verdict];
778 
779     /* Send the verdict back to the kernel through netlink */
780     /* FIXIT-L Consider using an iovec for scatter/gather transmission with the new payload as a
781         separate entry. This would avoid a copy and potentially avoid buffer size restrictions.
782         Only as relevant as REPLACE is common. */
783     uint32_t plen = (verdict == DAQ_VERDICT_REPLACE) ? msg->data_len : 0;
784     int nfq_verdict = (verdict == DAQ_VERDICT_PASS || verdict == DAQ_VERDICT_REPLACE) ? NF_ACCEPT : NF_DROP;;
785     struct nlmsghdr *nlh = nfq_build_verdict(nfqc->nlmsg_buf, ntohl(desc->nlph->packet_id), nfqc->queue_num,
786             nfq_verdict, plen, msg->data);
787     if (mnl_socket_sendto(nfqc->nlsock, nlh, nlh->nlmsg_len) == -1)
788     {
789         SET_ERROR(nfqc->modinst, "%s: Couldn't send NFQ verdict: %s (%d)",
790                         __func__, strerror(errno), errno);
791         return DAQ_ERROR;
792     }
793 
794     /* Toss the descriptor back on the free list for reuse.
795         Make sure to clear out the netlink message header to show that it is unused. */
796     desc->nlmh = NULL;
797     desc->next = nfqc->pool.freelist;
798     nfqc->pool.freelist = desc;
799     nfqc->pool.info.available++;
800 
801     return DAQ_SUCCESS;
802 }
803 
nfq_daq_get_msg_pool_info(void * handle,DAQ_MsgPoolInfo_t * info)804 static int nfq_daq_get_msg_pool_info(void *handle, DAQ_MsgPoolInfo_t *info)
805 {
806     Nfq_Context_t *nfqc = (Nfq_Context_t *) handle;
807 
808     *info = nfqc->pool.info;
809 
810     return DAQ_SUCCESS;
811 }
812 
813 #ifdef BUILDING_SO
814 DAQ_SO_PUBLIC const DAQ_ModuleAPI_t DAQ_MODULE_DATA =
815 #else
816 const DAQ_ModuleAPI_t nfq_daq_module_data =
817 #endif
818 {
819     /* .api_version = */ DAQ_MODULE_API_VERSION,
820     /* .api_size = */ sizeof(DAQ_ModuleAPI_t),
821     /* .module_version = */ DAQ_NFQ_VERSION,
822     /* .name = */ "nfq",
823     /* .type = */ DAQ_TYPE_INTF_CAPABLE | DAQ_TYPE_INLINE_CAPABLE | DAQ_TYPE_MULTI_INSTANCE | DAQ_TYPE_NO_UNPRIV,
824     /* .load = */ nfq_daq_module_load,
825     /* .unload = */ nfq_daq_module_unload,
826     /* .get_variable_descs = */ nfq_daq_get_variable_descs,
827     /* .instantiate = */ nfq_daq_instantiate,
828     /* .destroy = */ nfq_daq_destroy,
829     /* .set_filter = */ NULL,
830     /* .start = */ nfq_daq_start,
831     /* .inject = */ NULL,
832     /* .inject_relative = */ NULL,
833     /* .interrupt = */ nfq_daq_interrupt,
834     /* .stop = */ nfq_daq_stop,
835     /* .ioctl = */ NULL,
836     /* .get_stats = */ nfq_daq_get_stats,
837     /* .reset_stats = */ nfq_daq_reset_stats,
838     /* .get_snaplen = */ nfq_daq_get_snaplen,
839     /* .get_capabilities = */ nfq_daq_get_capabilities,
840     /* .get_datalink_type = */ nfq_daq_get_datalink_type,
841     /* .config_load = */ NULL,
842     /* .config_swap = */ NULL,
843     /* .config_free = */ NULL,
844     /* .msg_receive = */ nfq_daq_msg_receive,
845     /* .msg_finalize = */ nfq_daq_msg_finalize,
846     /* .get_msg_pool_info = */ nfq_daq_get_msg_pool_info,
847 };
848