xref: /linux/net/netfilter/ipvs/ip_vs_app.c (revision 44f57d78)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * ip_vs_app.c: Application module support for IPVS
4  *
5  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
6  *
7  * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
8  * is that ip_vs_app module handles the reverse direction (incoming requests
9  * and outgoing responses).
10  *
11  *		IP_MASQ_APP application masquerading module
12  *
13  * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
14  */
15 
16 #define KMSG_COMPONENT "IPVS"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 
19 #include <linux/module.h>
20 #include <linux/kernel.h>
21 #include <linux/skbuff.h>
22 #include <linux/in.h>
23 #include <linux/ip.h>
24 #include <linux/netfilter.h>
25 #include <linux/slab.h>
26 #include <net/net_namespace.h>
27 #include <net/protocol.h>
28 #include <net/tcp.h>
29 #include <linux/stat.h>
30 #include <linux/proc_fs.h>
31 #include <linux/seq_file.h>
32 #include <linux/mutex.h>
33 
34 #include <net/ip_vs.h>
35 
36 EXPORT_SYMBOL(register_ip_vs_app);
37 EXPORT_SYMBOL(unregister_ip_vs_app);
38 EXPORT_SYMBOL(register_ip_vs_app_inc);
39 
40 static DEFINE_MUTEX(__ip_vs_app_mutex);
41 
42 /*
43  *	Get an ip_vs_app object
44  */
45 static inline int ip_vs_app_get(struct ip_vs_app *app)
46 {
47 	return try_module_get(app->module);
48 }
49 
50 
51 static inline void ip_vs_app_put(struct ip_vs_app *app)
52 {
53 	module_put(app->module);
54 }
55 
56 static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
57 {
58 	kfree(inc->timeout_table);
59 	kfree(inc);
60 }
61 
62 static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
63 {
64 	struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
65 
66 	ip_vs_app_inc_destroy(inc);
67 }
68 
69 /*
70  *	Allocate/initialize app incarnation and register it in proto apps.
71  */
72 static int
73 ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
74 		  __u16 port)
75 {
76 	struct ip_vs_protocol *pp;
77 	struct ip_vs_app *inc;
78 	int ret;
79 
80 	if (!(pp = ip_vs_proto_get(proto)))
81 		return -EPROTONOSUPPORT;
82 
83 	if (!pp->unregister_app)
84 		return -EOPNOTSUPP;
85 
86 	inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
87 	if (!inc)
88 		return -ENOMEM;
89 	INIT_LIST_HEAD(&inc->p_list);
90 	INIT_LIST_HEAD(&inc->incs_list);
91 	inc->app = app;
92 	inc->port = htons(port);
93 	atomic_set(&inc->usecnt, 0);
94 
95 	if (app->timeouts) {
96 		inc->timeout_table =
97 			ip_vs_create_timeout_table(app->timeouts,
98 						   app->timeouts_size);
99 		if (!inc->timeout_table) {
100 			ret = -ENOMEM;
101 			goto out;
102 		}
103 	}
104 
105 	ret = pp->register_app(ipvs, inc);
106 	if (ret)
107 		goto out;
108 
109 	list_add(&inc->a_list, &app->incs_list);
110 	IP_VS_DBG(9, "%s App %s:%u registered\n",
111 		  pp->name, inc->name, ntohs(inc->port));
112 
113 	return 0;
114 
115   out:
116 	ip_vs_app_inc_destroy(inc);
117 	return ret;
118 }
119 
120 
121 /*
122  *	Release app incarnation
123  */
124 static void
125 ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
126 {
127 	struct ip_vs_protocol *pp;
128 
129 	if (!(pp = ip_vs_proto_get(inc->protocol)))
130 		return;
131 
132 	if (pp->unregister_app)
133 		pp->unregister_app(ipvs, inc);
134 
135 	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
136 		  pp->name, inc->name, ntohs(inc->port));
137 
138 	list_del(&inc->a_list);
139 
140 	call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
141 }
142 
143 
144 /*
145  *	Get reference to app inc (only called from softirq)
146  *
147  */
148 int ip_vs_app_inc_get(struct ip_vs_app *inc)
149 {
150 	int result;
151 
152 	result = ip_vs_app_get(inc->app);
153 	if (result)
154 		atomic_inc(&inc->usecnt);
155 	return result;
156 }
157 
158 
159 /*
160  *	Put the app inc (only called from timer or net softirq)
161  */
162 void ip_vs_app_inc_put(struct ip_vs_app *inc)
163 {
164 	atomic_dec(&inc->usecnt);
165 	ip_vs_app_put(inc->app);
166 }
167 
168 
169 /*
170  *	Register an application incarnation in protocol applications
171  */
172 int
173 register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
174 		       __u16 port)
175 {
176 	int result;
177 
178 	mutex_lock(&__ip_vs_app_mutex);
179 
180 	result = ip_vs_app_inc_new(ipvs, app, proto, port);
181 
182 	mutex_unlock(&__ip_vs_app_mutex);
183 
184 	return result;
185 }
186 
187 
188 /* Register application for netns */
189 struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
190 {
191 	struct ip_vs_app *a;
192 	int err = 0;
193 
194 	mutex_lock(&__ip_vs_app_mutex);
195 
196 	list_for_each_entry(a, &ipvs->app_list, a_list) {
197 		if (!strcmp(app->name, a->name)) {
198 			err = -EEXIST;
199 			goto out_unlock;
200 		}
201 	}
202 	a = kmemdup(app, sizeof(*app), GFP_KERNEL);
203 	if (!a) {
204 		err = -ENOMEM;
205 		goto out_unlock;
206 	}
207 	INIT_LIST_HEAD(&a->incs_list);
208 	list_add(&a->a_list, &ipvs->app_list);
209 	/* increase the module use count */
210 	ip_vs_use_count_inc();
211 
212 out_unlock:
213 	mutex_unlock(&__ip_vs_app_mutex);
214 
215 	return err ? ERR_PTR(err) : a;
216 }
217 
218 
219 /*
220  *	ip_vs_app unregistration routine
221  *	We are sure there are no app incarnations attached to services
222  *	Caller should use synchronize_rcu() or rcu_barrier()
223  */
224 void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
225 {
226 	struct ip_vs_app *a, *anxt, *inc, *nxt;
227 
228 	mutex_lock(&__ip_vs_app_mutex);
229 
230 	list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
231 		if (app && strcmp(app->name, a->name))
232 			continue;
233 		list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
234 			ip_vs_app_inc_release(ipvs, inc);
235 		}
236 
237 		list_del(&a->a_list);
238 		kfree(a);
239 
240 		/* decrease the module use count */
241 		ip_vs_use_count_dec();
242 	}
243 
244 	mutex_unlock(&__ip_vs_app_mutex);
245 }
246 
247 
248 /*
249  *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
250  */
251 int ip_vs_bind_app(struct ip_vs_conn *cp,
252 		   struct ip_vs_protocol *pp)
253 {
254 	return pp->app_conn_bind(cp);
255 }
256 
257 
258 /*
259  *	Unbind cp from application incarnation (called by cp destructor)
260  */
261 void ip_vs_unbind_app(struct ip_vs_conn *cp)
262 {
263 	struct ip_vs_app *inc = cp->app;
264 
265 	if (!inc)
266 		return;
267 
268 	if (inc->unbind_conn)
269 		inc->unbind_conn(inc, cp);
270 	if (inc->done_conn)
271 		inc->done_conn(inc, cp);
272 	ip_vs_app_inc_put(inc);
273 	cp->app = NULL;
274 }
275 
276 
277 /*
278  *	Fixes th->seq based on ip_vs_seq info.
279  */
280 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
281 {
282 	__u32 seq = ntohl(th->seq);
283 
284 	/*
285 	 *	Adjust seq with delta-offset for all packets after
286 	 *	the most recent resized pkt seq and with previous_delta offset
287 	 *	for all packets	before most recent resized pkt seq.
288 	 */
289 	if (vseq->delta || vseq->previous_delta) {
290 		if(after(seq, vseq->init_seq)) {
291 			th->seq = htonl(seq + vseq->delta);
292 			IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
293 				  __func__, vseq->delta);
294 		} else {
295 			th->seq = htonl(seq + vseq->previous_delta);
296 			IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
297 				  __func__, vseq->previous_delta);
298 		}
299 	}
300 }
301 
302 
303 /*
304  *	Fixes th->ack_seq based on ip_vs_seq info.
305  */
306 static inline void
307 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
308 {
309 	__u32 ack_seq = ntohl(th->ack_seq);
310 
311 	/*
312 	 * Adjust ack_seq with delta-offset for
313 	 * the packets AFTER most recent resized pkt has caused a shift
314 	 * for packets before most recent resized pkt, use previous_delta
315 	 */
316 	if (vseq->delta || vseq->previous_delta) {
317 		/* since ack_seq is the number of octet that is expected
318 		   to receive next, so compare it with init_seq+delta */
319 		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
320 			th->ack_seq = htonl(ack_seq - vseq->delta);
321 			IP_VS_DBG(9, "%s(): subtracted delta "
322 				  "(%d) from ack_seq\n", __func__, vseq->delta);
323 
324 		} else {
325 			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
326 			IP_VS_DBG(9, "%s(): subtracted "
327 				  "previous_delta (%d) from ack_seq\n",
328 				  __func__, vseq->previous_delta);
329 		}
330 	}
331 }
332 
333 
334 /*
335  *	Updates ip_vs_seq if pkt has been resized
336  *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
337  */
338 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
339 				 unsigned int flag, __u32 seq, int diff)
340 {
341 	/* spinlock is to keep updating cp->flags atomic */
342 	spin_lock_bh(&cp->lock);
343 	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
344 		vseq->previous_delta = vseq->delta;
345 		vseq->delta += diff;
346 		vseq->init_seq = seq;
347 		cp->flags |= flag;
348 	}
349 	spin_unlock_bh(&cp->lock);
350 }
351 
352 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
353 				  struct ip_vs_app *app,
354 				  struct ip_vs_iphdr *ipvsh)
355 {
356 	int diff;
357 	const unsigned int tcp_offset = ip_hdrlen(skb);
358 	struct tcphdr *th;
359 	__u32 seq;
360 
361 	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
362 		return 0;
363 
364 	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
365 
366 	/*
367 	 *	Remember seq number in case this pkt gets resized
368 	 */
369 	seq = ntohl(th->seq);
370 
371 	/*
372 	 *	Fix seq stuff if flagged as so.
373 	 */
374 	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
375 		vs_fix_seq(&cp->out_seq, th);
376 	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
377 		vs_fix_ack_seq(&cp->in_seq, th);
378 
379 	/*
380 	 *	Call private output hook function
381 	 */
382 	if (app->pkt_out == NULL)
383 		return 1;
384 
385 	if (!app->pkt_out(app, cp, skb, &diff, ipvsh))
386 		return 0;
387 
388 	/*
389 	 *	Update ip_vs seq stuff if len has changed.
390 	 */
391 	if (diff != 0)
392 		vs_seq_update(cp, &cp->out_seq,
393 			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
394 
395 	return 1;
396 }
397 
398 /*
399  *	Output pkt hook. Will call bound ip_vs_app specific function
400  *	called by ipvs packet handler, assumes previously checked cp!=NULL
401  *	returns false if it can't handle packet (oom)
402  */
403 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
404 		      struct ip_vs_iphdr *ipvsh)
405 {
406 	struct ip_vs_app *app;
407 
408 	/*
409 	 *	check if application module is bound to
410 	 *	this ip_vs_conn.
411 	 */
412 	if ((app = cp->app) == NULL)
413 		return 1;
414 
415 	/* TCP is complicated */
416 	if (cp->protocol == IPPROTO_TCP)
417 		return app_tcp_pkt_out(cp, skb, app, ipvsh);
418 
419 	/*
420 	 *	Call private output hook function
421 	 */
422 	if (app->pkt_out == NULL)
423 		return 1;
424 
425 	return app->pkt_out(app, cp, skb, NULL, ipvsh);
426 }
427 
428 
429 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
430 				 struct ip_vs_app *app,
431 				 struct ip_vs_iphdr *ipvsh)
432 {
433 	int diff;
434 	const unsigned int tcp_offset = ip_hdrlen(skb);
435 	struct tcphdr *th;
436 	__u32 seq;
437 
438 	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
439 		return 0;
440 
441 	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
442 
443 	/*
444 	 *	Remember seq number in case this pkt gets resized
445 	 */
446 	seq = ntohl(th->seq);
447 
448 	/*
449 	 *	Fix seq stuff if flagged as so.
450 	 */
451 	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
452 		vs_fix_seq(&cp->in_seq, th);
453 	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
454 		vs_fix_ack_seq(&cp->out_seq, th);
455 
456 	/*
457 	 *	Call private input hook function
458 	 */
459 	if (app->pkt_in == NULL)
460 		return 1;
461 
462 	if (!app->pkt_in(app, cp, skb, &diff, ipvsh))
463 		return 0;
464 
465 	/*
466 	 *	Update ip_vs seq stuff if len has changed.
467 	 */
468 	if (diff != 0)
469 		vs_seq_update(cp, &cp->in_seq,
470 			      IP_VS_CONN_F_IN_SEQ, seq, diff);
471 
472 	return 1;
473 }
474 
475 /*
476  *	Input pkt hook. Will call bound ip_vs_app specific function
477  *	called by ipvs packet handler, assumes previously checked cp!=NULL.
478  *	returns false if can't handle packet (oom).
479  */
480 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
481 		     struct ip_vs_iphdr *ipvsh)
482 {
483 	struct ip_vs_app *app;
484 
485 	/*
486 	 *	check if application module is bound to
487 	 *	this ip_vs_conn.
488 	 */
489 	if ((app = cp->app) == NULL)
490 		return 1;
491 
492 	/* TCP is complicated */
493 	if (cp->protocol == IPPROTO_TCP)
494 		return app_tcp_pkt_in(cp, skb, app, ipvsh);
495 
496 	/*
497 	 *	Call private input hook function
498 	 */
499 	if (app->pkt_in == NULL)
500 		return 1;
501 
502 	return app->pkt_in(app, cp, skb, NULL, ipvsh);
503 }
504 
505 
506 #ifdef CONFIG_PROC_FS
507 /*
508  *	/proc/net/ip_vs_app entry function
509  */
510 
511 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
512 {
513 	struct ip_vs_app *app, *inc;
514 
515 	list_for_each_entry(app, &ipvs->app_list, a_list) {
516 		list_for_each_entry(inc, &app->incs_list, a_list) {
517 			if (pos-- == 0)
518 				return inc;
519 		}
520 	}
521 	return NULL;
522 
523 }
524 
525 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
526 {
527 	struct net *net = seq_file_net(seq);
528 	struct netns_ipvs *ipvs = net_ipvs(net);
529 
530 	mutex_lock(&__ip_vs_app_mutex);
531 
532 	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
533 }
534 
535 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
536 {
537 	struct ip_vs_app *inc, *app;
538 	struct list_head *e;
539 	struct net *net = seq_file_net(seq);
540 	struct netns_ipvs *ipvs = net_ipvs(net);
541 
542 	++*pos;
543 	if (v == SEQ_START_TOKEN)
544 		return ip_vs_app_idx(ipvs, 0);
545 
546 	inc = v;
547 	app = inc->app;
548 
549 	if ((e = inc->a_list.next) != &app->incs_list)
550 		return list_entry(e, struct ip_vs_app, a_list);
551 
552 	/* go on to next application */
553 	for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
554 		app = list_entry(e, struct ip_vs_app, a_list);
555 		list_for_each_entry(inc, &app->incs_list, a_list) {
556 			return inc;
557 		}
558 	}
559 	return NULL;
560 }
561 
562 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
563 {
564 	mutex_unlock(&__ip_vs_app_mutex);
565 }
566 
567 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
568 {
569 	if (v == SEQ_START_TOKEN)
570 		seq_puts(seq, "prot port    usecnt name\n");
571 	else {
572 		const struct ip_vs_app *inc = v;
573 
574 		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
575 			   ip_vs_proto_name(inc->protocol),
576 			   ntohs(inc->port),
577 			   atomic_read(&inc->usecnt),
578 			   inc->name);
579 	}
580 	return 0;
581 }
582 
583 static const struct seq_operations ip_vs_app_seq_ops = {
584 	.start = ip_vs_app_seq_start,
585 	.next  = ip_vs_app_seq_next,
586 	.stop  = ip_vs_app_seq_stop,
587 	.show  = ip_vs_app_seq_show,
588 };
589 #endif
590 
591 int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
592 {
593 	INIT_LIST_HEAD(&ipvs->app_list);
594 	proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
595 			sizeof(struct seq_net_private));
596 	return 0;
597 }
598 
599 void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
600 {
601 	unregister_ip_vs_app(ipvs, NULL /* all */);
602 	remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
603 }
604