xref: /dragonfly/sys/netgraph7/tcpmss/ng_tcpmss.c (revision 37de577a)
1 /*-
2  * ng_tcpmss.c
3  *
4  * Copyright (c) 2004, Alexey Popov <lollypop@flexuser.ru>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * This software includes fragments of the following programs:
30  *	tcpmssd		Ruslan Ermilov <ru@FreeBSD.org>
31  *
32  * $FreeBSD: src/sys/netgraph/ng_tcpmss.c,v 1.4 2007/01/15 05:01:31 glebius Exp $
33  * $DragonFly: src/sys/netgraph7/ng_tcpmss.c,v 1.2 2008/06/26 23:05:35 dillon Exp $
34  */
35 
36 /*
37  * This node is netgraph tool for workaround of PMTUD problem. It acts
38  * like filter for IP packets. If configured, it reduces MSS of TCP SYN
39  * packets.
40  *
41  * Configuration can be done by sending NGM_TCPMSS_CONFIG message. The
42  * message sets filter for incoming packets on hook 'inHook'. Packet's
43  * TCP MSS field is lowered to 'maxMSS' parameter and resulting packet
44  * is sent to 'outHook'.
45  *
46  * XXX: statistics are updated not atomically, so they may broke on SMP.
47  */
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/errno.h>
52 #include <sys/kernel.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <netgraph7/ng_message.h>
62 #include <netgraph7/netgraph.h>
63 #include <netgraph7/ng_parse.h>
64 #include "ng_tcpmss.h"
65 
66 /* Per hook info. */
67 typedef struct {
68 	hook_p				outHook;
69 	struct ng_tcpmss_hookstat	stats;
70 } *hpriv_p;
71 
72 /* Netgraph methods. */
73 static ng_constructor_t	ng_tcpmss_constructor;
74 static ng_rcvmsg_t	ng_tcpmss_rcvmsg;
75 static ng_newhook_t	ng_tcpmss_newhook;
76 static ng_rcvdata_t	ng_tcpmss_rcvdata;
77 static ng_disconnect_t	ng_tcpmss_disconnect;
78 
79 static int correct_mss(struct tcphdr *, int, uint16_t, int);
80 
81 /* Parse type for struct ng_tcpmss_hookstat. */
82 static const struct ng_parse_struct_field ng_tcpmss_hookstat_type_fields[]
83 	= NG_TCPMSS_HOOKSTAT_INFO;
84 static const struct ng_parse_type ng_tcpmss_hookstat_type = {
85 	&ng_parse_struct_type,
86 	&ng_tcpmss_hookstat_type_fields
87 };
88 
89 /* Parse type for struct ng_tcpmss_config. */
90 static const struct ng_parse_struct_field ng_tcpmss_config_type_fields[]
91 	= NG_TCPMSS_CONFIG_INFO;
92 static const struct ng_parse_type ng_tcpmss_config_type = {
93 	&ng_parse_struct_type,
94 	ng_tcpmss_config_type_fields
95 };
96 
97 /* List of commands and how to convert arguments to/from ASCII. */
98 static const struct ng_cmdlist ng_tcpmss_cmds[] = {
99 	{
100 	  NGM_TCPMSS_COOKIE,
101 	  NGM_TCPMSS_GET_STATS,
102 	  "getstats",
103 	  &ng_parse_hookbuf_type,
104 	  &ng_tcpmss_hookstat_type
105 	},
106 	{
107 	  NGM_TCPMSS_COOKIE,
108 	  NGM_TCPMSS_CLR_STATS,
109 	  "clrstats",
110 	  &ng_parse_hookbuf_type,
111 	  NULL
112 	},
113 	{
114 	  NGM_TCPMSS_COOKIE,
115 	  NGM_TCPMSS_GETCLR_STATS,
116 	  "getclrstats",
117 	  &ng_parse_hookbuf_type,
118 	  &ng_tcpmss_hookstat_type
119 	},
120 	{
121 	  NGM_TCPMSS_COOKIE,
122 	  NGM_TCPMSS_CONFIG,
123 	  "config",
124 	  &ng_tcpmss_config_type,
125 	  NULL
126 	},
127 	{ 0 }
128 };
129 
130 /* Netgraph type descriptor. */
131 static struct ng_type ng_tcpmss_typestruct = {
132 	.version =	NG_ABI_VERSION,
133 	.name =		NG_TCPMSS_NODE_TYPE,
134 	.constructor =	ng_tcpmss_constructor,
135 	.rcvmsg =	ng_tcpmss_rcvmsg,
136 	.newhook =	ng_tcpmss_newhook,
137 	.rcvdata =	ng_tcpmss_rcvdata,
138 	.disconnect =	ng_tcpmss_disconnect,
139 	.cmdlist =	ng_tcpmss_cmds,
140 };
141 
142 NETGRAPH_INIT(tcpmss, &ng_tcpmss_typestruct);
143 
144 #define	ERROUT(x)	{ error = (x); goto done; }
145 
146 /*
147  * Node constructor. No special actions required.
148  */
149 static int
150 ng_tcpmss_constructor(node_p node)
151 {
152 	return (0);
153 }
154 
155 /*
156  * Add a hook. Any unique name is OK.
157  */
158 static int
159 ng_tcpmss_newhook(node_p node, hook_p hook, const char *name)
160 {
161 	hpriv_p priv;
162 
163 	priv = kmalloc(sizeof(*priv), M_NETGRAPH,
164 		       M_WAITOK | M_NULLOK | M_ZERO);
165 	if (priv == NULL)
166 		return (ENOMEM);
167 
168 	NG_HOOK_SET_PRIVATE(hook, priv);
169 
170 	return (0);
171 }
172 
173 /*
174  * Receive a control message.
175  */
176 static int
177 ng_tcpmss_rcvmsg
178 (node_p node, item_p item, hook_p lasthook)
179 {
180 	struct ng_mesg *msg, *resp = NULL;
181 	int error = 0;
182 
183 	NGI_GET_MSG(item, msg);
184 
185 	switch (msg->header.typecookie) {
186 	case NGM_TCPMSS_COOKIE:
187 		switch (msg->header.cmd) {
188 		case NGM_TCPMSS_GET_STATS:
189 		case NGM_TCPMSS_CLR_STATS:
190 		case NGM_TCPMSS_GETCLR_STATS:
191 		    {
192 			hook_p hook;
193 			hpriv_p priv;
194 
195 			/* Check that message is long enough. */
196 			if (msg->header.arglen != NG_HOOKSIZ)
197 				ERROUT(EINVAL);
198 
199 			/* Find this hook. */
200 			hook = ng_findhook(node, (char *)msg->data);
201 			if (hook == NULL)
202 				ERROUT(ENOENT);
203 
204 			priv = NG_HOOK_PRIVATE(hook);
205 
206 			/* Create response. */
207 			if (msg->header.cmd != NGM_TCPMSS_CLR_STATS) {
208 				NG_MKRESPONSE(resp, msg,
209 				    sizeof(struct ng_tcpmss_hookstat), M_WAITOK | M_NULLOK);
210 				if (resp == NULL)
211 					ERROUT(ENOMEM);
212 				bcopy(&priv->stats, resp->data,
213 				    sizeof(struct ng_tcpmss_hookstat));
214 			}
215 
216 			if (msg->header.cmd != NGM_TCPMSS_GET_STATS)
217 				bzero(&priv->stats,
218 				    sizeof(struct ng_tcpmss_hookstat));
219 			break;
220 		    }
221 		case NGM_TCPMSS_CONFIG:
222 		    {
223 			struct ng_tcpmss_config *set;
224 			hook_p in, out;
225 			hpriv_p priv;
226 
227 			/* Check that message is long enough. */
228 			if (msg->header.arglen !=
229 			    sizeof(struct ng_tcpmss_config))
230 				ERROUT(EINVAL);
231 
232 			set = (struct ng_tcpmss_config *)msg->data;
233 			in = ng_findhook(node, set->inHook);
234 			out = ng_findhook(node, set->outHook);
235 			if (in == NULL || out == NULL)
236 				ERROUT(ENOENT);
237 
238 			/* Configure MSS hack. */
239 			priv = NG_HOOK_PRIVATE(in);
240 			priv->outHook = out;
241 			priv->stats.maxMSS = set->maxMSS;
242 
243 			break;
244  		    }
245 		default:
246 			error = EINVAL;
247 			break;
248 		}
249 		break;
250 	default:
251 		error = EINVAL;
252 		break;
253 	}
254 
255 done:
256 	NG_RESPOND_MSG(error, node, item, resp);
257 	NG_FREE_MSG(msg);
258 
259 	return (error);
260 }
261 
262 /*
263  * Receive data on a hook, and hack MSS.
264  *
265  */
266 static int
267 ng_tcpmss_rcvdata(hook_p hook, item_p item)
268 {
269 	hpriv_p priv = NG_HOOK_PRIVATE(hook);
270 	struct mbuf *m = NULL;
271 	struct ip *ip;
272 	struct tcphdr *tcp;
273 	int iphlen, tcphlen, pktlen;
274 	int pullup_len = 0;
275 	int error = 0;
276 
277 	/* Drop packets if filter is not configured on this hook. */
278 	if (priv->outHook == NULL)
279 		goto done;
280 
281 	NGI_GET_M(item, m);
282 
283 	/* Update stats on incoming hook. */
284 	pktlen = m->m_pkthdr.len;
285 	priv->stats.Octets += pktlen;
286 	priv->stats.Packets++;
287 
288 	/* Check whether we configured to fix MSS. */
289 	if (priv->stats.maxMSS == 0)
290 		goto send;
291 
292 #define	M_CHECK(length) do {					\
293 	pullup_len += length;					\
294 	if ((m)->m_pkthdr.len < pullup_len)			\
295 		goto send;					\
296 	if ((m)->m_len < pullup_len &&				\
297 	   (((m) = m_pullup((m), pullup_len)) == NULL))		\
298 		ERROUT(ENOBUFS);				\
299 	} while (0)
300 
301 	/* Check mbuf packet size and arrange for IP header. */
302 	M_CHECK(sizeof(struct ip));
303 	ip = mtod(m, struct ip *);
304 
305 	/* Check IP version. */
306 	if (ip->ip_v != IPVERSION)
307 		ERROUT(EINVAL);
308 
309 	/* Check IP header length. */
310 	iphlen = ip->ip_hl << 2;
311 	if (iphlen < sizeof(struct ip) || iphlen > pktlen )
312 		ERROUT(EINVAL);
313 
314         /* Check if it is TCP. */
315 	if (!(ip->ip_p == IPPROTO_TCP))
316 		goto send;
317 
318 	/* Check mbuf packet size and arrange for IP+TCP header */
319 	M_CHECK(iphlen - sizeof(struct ip) + sizeof(struct tcphdr));
320 	ip = mtod(m, struct ip *);
321 	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
322 
323 	/* Check TCP header length. */
324 	tcphlen = tcp->th_off << 2;
325 	if (tcphlen < sizeof(struct tcphdr) || tcphlen > pktlen - iphlen)
326 		ERROUT(EINVAL);
327 
328 	/* Check SYN packet and has options. */
329 	if (!(tcp->th_flags & TH_SYN) || tcphlen == sizeof(struct tcphdr))
330 		goto send;
331 
332 	/* Update SYN stats. */
333 	priv->stats.SYNPkts++;
334 
335 	M_CHECK(tcphlen - sizeof(struct tcphdr));
336 	ip = mtod(m, struct ip *);
337 	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
338 
339 #undef	M_CHECK
340 
341 	/* Fix MSS and update stats. */
342 	if (correct_mss(tcp, tcphlen, priv->stats.maxMSS,
343 	    m->m_pkthdr.csum_flags))
344 		priv->stats.FixedPkts++;
345 
346 send:
347 	/* Deliver frame out destination hook. */
348 	NG_FWD_NEW_DATA(error, item, priv->outHook, m);
349 
350 	return (error);
351 
352 done:
353 	NG_FREE_ITEM(item);
354 	NG_FREE_M(m);
355 
356 	return (error);
357 }
358 
359 /*
360  * Hook disconnection.
361  * We must check all hooks, since they may reference this one.
362  */
363 static int
364 ng_tcpmss_disconnect(hook_p hook)
365 {
366 	node_p node = NG_HOOK_NODE(hook);
367 	hook_p hook2;
368 
369 	LIST_FOREACH(hook2, &node->nd_hooks, hk_hooks) {
370 		hpriv_p priv = NG_HOOK_PRIVATE(hook2);
371 
372 		if (priv->outHook == hook)
373 			priv->outHook = NULL;
374 	}
375 
376 	kfree(NG_HOOK_PRIVATE(hook), M_NETGRAPH);
377 
378 	if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0)
379 		ng_rmnode_self(NG_HOOK_NODE(hook));
380 
381 	return (0);
382 }
383 
384 /*
385  * Code from tcpmssd.
386  */
387 
388 /*-
389  * The following macro is used to update an
390  * internet checksum.  "acc" is a 32-bit
391  * accumulation of all the changes to the
392  * checksum (adding in old 16-bit words and
393  * subtracting out new words), and "cksum"
394  * is the checksum value to be updated.
395  */
396 #define TCPMSS_ADJUST_CHECKSUM(acc, cksum) do {		\
397 	acc += cksum;					\
398 	if (acc < 0) {					\
399 		acc = -acc;				\
400 		acc = (acc >> 16) + (acc & 0xffff);	\
401 		acc += acc >> 16;			\
402 		cksum = (u_short) ~acc;			\
403 	} else {					\
404 		acc = (acc >> 16) + (acc & 0xffff);	\
405 		acc += acc >> 16;			\
406 		cksum = (u_short) acc;			\
407 	}						\
408 } while (0);
409 
410 static int
411 correct_mss(struct tcphdr *tc, int hlen, uint16_t maxmss, int flags)
412 {
413 	int olen, optlen;
414 	u_char *opt;
415 	uint16_t *mss;
416 	int accumulate;
417 	int res = 0;
418 
419 	for (olen = hlen - sizeof(struct tcphdr), opt = (u_char *)(tc + 1);
420 	     olen > 0; olen -= optlen, opt += optlen) {
421 		if (*opt == TCPOPT_EOL)
422 			break;
423 		else if (*opt == TCPOPT_NOP)
424 			optlen = 1;
425 		else {
426 			optlen = *(opt + 1);
427 			if (optlen <= 0 || optlen > olen)
428 				break;
429 			if (*opt == TCPOPT_MAXSEG) {
430 				if (optlen != TCPOLEN_MAXSEG)
431 					continue;
432 				mss = (uint16_t *)(opt + 2);
433 				if (ntohs(*mss) > maxmss) {
434 					accumulate = *mss;
435 					*mss = htons(maxmss);
436 					accumulate -= *mss;
437 					if ((flags & CSUM_TCP) == 0)
438 						TCPMSS_ADJUST_CHECKSUM(accumulate, tc->th_sum);
439 					res = 1;
440 				}
441 			}
442 		}
443 	}
444 	return (res);
445 }
446