1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * ip_vs_app.c: Application module support for IPVS 4 * 5 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 6 * 7 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference 8 * is that ip_vs_app module handles the reverse direction (incoming requests 9 * and outgoing responses). 10 * 11 * IP_MASQ_APP application masquerading module 12 * 13 * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> 14 */ 15 16 #define KMSG_COMPONENT "IPVS" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/module.h> 20 #include <linux/kernel.h> 21 #include <linux/skbuff.h> 22 #include <linux/in.h> 23 #include <linux/ip.h> 24 #include <linux/netfilter.h> 25 #include <linux/slab.h> 26 #include <net/net_namespace.h> 27 #include <net/protocol.h> 28 #include <net/tcp.h> 29 #include <linux/stat.h> 30 #include <linux/proc_fs.h> 31 #include <linux/seq_file.h> 32 #include <linux/mutex.h> 33 34 #include <net/ip_vs.h> 35 36 EXPORT_SYMBOL(register_ip_vs_app); 37 EXPORT_SYMBOL(unregister_ip_vs_app); 38 EXPORT_SYMBOL(register_ip_vs_app_inc); 39 40 static DEFINE_MUTEX(__ip_vs_app_mutex); 41 42 /* 43 * Get an ip_vs_app object 44 */ 45 static inline int ip_vs_app_get(struct ip_vs_app *app) 46 { 47 return try_module_get(app->module); 48 } 49 50 51 static inline void ip_vs_app_put(struct ip_vs_app *app) 52 { 53 module_put(app->module); 54 } 55 56 static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) 57 { 58 kfree(inc->timeout_table); 59 kfree(inc); 60 } 61 62 static void ip_vs_app_inc_rcu_free(struct rcu_head *head) 63 { 64 struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); 65 66 ip_vs_app_inc_destroy(inc); 67 } 68 69 /* 70 * Allocate/initialize app incarnation and register it in proto apps. 71 */ 72 static int 73 ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 74 __u16 port) 75 { 76 struct ip_vs_protocol *pp; 77 struct ip_vs_app *inc; 78 int ret; 79 80 if (!(pp = ip_vs_proto_get(proto))) 81 return -EPROTONOSUPPORT; 82 83 if (!pp->unregister_app) 84 return -EOPNOTSUPP; 85 86 inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); 87 if (!inc) 88 return -ENOMEM; 89 INIT_LIST_HEAD(&inc->p_list); 90 INIT_LIST_HEAD(&inc->incs_list); 91 inc->app = app; 92 inc->port = htons(port); 93 atomic_set(&inc->usecnt, 0); 94 95 if (app->timeouts) { 96 inc->timeout_table = 97 ip_vs_create_timeout_table(app->timeouts, 98 app->timeouts_size); 99 if (!inc->timeout_table) { 100 ret = -ENOMEM; 101 goto out; 102 } 103 } 104 105 ret = pp->register_app(ipvs, inc); 106 if (ret) 107 goto out; 108 109 list_add(&inc->a_list, &app->incs_list); 110 IP_VS_DBG(9, "%s App %s:%u registered\n", 111 pp->name, inc->name, ntohs(inc->port)); 112 113 return 0; 114 115 out: 116 ip_vs_app_inc_destroy(inc); 117 return ret; 118 } 119 120 121 /* 122 * Release app incarnation 123 */ 124 static void 125 ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) 126 { 127 struct ip_vs_protocol *pp; 128 129 if (!(pp = ip_vs_proto_get(inc->protocol))) 130 return; 131 132 if (pp->unregister_app) 133 pp->unregister_app(ipvs, inc); 134 135 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 136 pp->name, inc->name, ntohs(inc->port)); 137 138 list_del(&inc->a_list); 139 140 call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); 141 } 142 143 144 /* 145 * Get reference to app inc (only called from softirq) 146 * 147 */ 148 int ip_vs_app_inc_get(struct ip_vs_app *inc) 149 { 150 int result; 151 152 result = ip_vs_app_get(inc->app); 153 if (result) 154 atomic_inc(&inc->usecnt); 155 return result; 156 } 157 158 159 /* 160 * Put the app inc (only called from timer or net softirq) 161 */ 162 void ip_vs_app_inc_put(struct ip_vs_app *inc) 163 { 164 atomic_dec(&inc->usecnt); 165 ip_vs_app_put(inc->app); 166 } 167 168 169 /* 170 * Register an application incarnation in protocol applications 171 */ 172 int 173 register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 174 __u16 port) 175 { 176 int result; 177 178 mutex_lock(&__ip_vs_app_mutex); 179 180 result = ip_vs_app_inc_new(ipvs, app, proto, port); 181 182 mutex_unlock(&__ip_vs_app_mutex); 183 184 return result; 185 } 186 187 188 /* Register application for netns */ 189 struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 190 { 191 struct ip_vs_app *a; 192 int err = 0; 193 194 mutex_lock(&__ip_vs_app_mutex); 195 196 list_for_each_entry(a, &ipvs->app_list, a_list) { 197 if (!strcmp(app->name, a->name)) { 198 err = -EEXIST; 199 goto out_unlock; 200 } 201 } 202 a = kmemdup(app, sizeof(*app), GFP_KERNEL); 203 if (!a) { 204 err = -ENOMEM; 205 goto out_unlock; 206 } 207 INIT_LIST_HEAD(&a->incs_list); 208 list_add(&a->a_list, &ipvs->app_list); 209 /* increase the module use count */ 210 ip_vs_use_count_inc(); 211 212 out_unlock: 213 mutex_unlock(&__ip_vs_app_mutex); 214 215 return err ? ERR_PTR(err) : a; 216 } 217 218 219 /* 220 * ip_vs_app unregistration routine 221 * We are sure there are no app incarnations attached to services 222 * Caller should use synchronize_rcu() or rcu_barrier() 223 */ 224 void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 225 { 226 struct ip_vs_app *a, *anxt, *inc, *nxt; 227 228 mutex_lock(&__ip_vs_app_mutex); 229 230 list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { 231 if (app && strcmp(app->name, a->name)) 232 continue; 233 list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { 234 ip_vs_app_inc_release(ipvs, inc); 235 } 236 237 list_del(&a->a_list); 238 kfree(a); 239 240 /* decrease the module use count */ 241 ip_vs_use_count_dec(); 242 } 243 244 mutex_unlock(&__ip_vs_app_mutex); 245 } 246 247 248 /* 249 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 250 */ 251 int ip_vs_bind_app(struct ip_vs_conn *cp, 252 struct ip_vs_protocol *pp) 253 { 254 return pp->app_conn_bind(cp); 255 } 256 257 258 /* 259 * Unbind cp from application incarnation (called by cp destructor) 260 */ 261 void ip_vs_unbind_app(struct ip_vs_conn *cp) 262 { 263 struct ip_vs_app *inc = cp->app; 264 265 if (!inc) 266 return; 267 268 if (inc->unbind_conn) 269 inc->unbind_conn(inc, cp); 270 if (inc->done_conn) 271 inc->done_conn(inc, cp); 272 ip_vs_app_inc_put(inc); 273 cp->app = NULL; 274 } 275 276 277 /* 278 * Fixes th->seq based on ip_vs_seq info. 279 */ 280 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 281 { 282 __u32 seq = ntohl(th->seq); 283 284 /* 285 * Adjust seq with delta-offset for all packets after 286 * the most recent resized pkt seq and with previous_delta offset 287 * for all packets before most recent resized pkt seq. 288 */ 289 if (vseq->delta || vseq->previous_delta) { 290 if(after(seq, vseq->init_seq)) { 291 th->seq = htonl(seq + vseq->delta); 292 IP_VS_DBG(9, "%s(): added delta (%d) to seq\n", 293 __func__, vseq->delta); 294 } else { 295 th->seq = htonl(seq + vseq->previous_delta); 296 IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n", 297 __func__, vseq->previous_delta); 298 } 299 } 300 } 301 302 303 /* 304 * Fixes th->ack_seq based on ip_vs_seq info. 305 */ 306 static inline void 307 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 308 { 309 __u32 ack_seq = ntohl(th->ack_seq); 310 311 /* 312 * Adjust ack_seq with delta-offset for 313 * the packets AFTER most recent resized pkt has caused a shift 314 * for packets before most recent resized pkt, use previous_delta 315 */ 316 if (vseq->delta || vseq->previous_delta) { 317 /* since ack_seq is the number of octet that is expected 318 to receive next, so compare it with init_seq+delta */ 319 if(after(ack_seq, vseq->init_seq+vseq->delta)) { 320 th->ack_seq = htonl(ack_seq - vseq->delta); 321 IP_VS_DBG(9, "%s(): subtracted delta " 322 "(%d) from ack_seq\n", __func__, vseq->delta); 323 324 } else { 325 th->ack_seq = htonl(ack_seq - vseq->previous_delta); 326 IP_VS_DBG(9, "%s(): subtracted " 327 "previous_delta (%d) from ack_seq\n", 328 __func__, vseq->previous_delta); 329 } 330 } 331 } 332 333 334 /* 335 * Updates ip_vs_seq if pkt has been resized 336 * Assumes already checked proto==IPPROTO_TCP and diff!=0. 337 */ 338 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, 339 unsigned int flag, __u32 seq, int diff) 340 { 341 /* spinlock is to keep updating cp->flags atomic */ 342 spin_lock_bh(&cp->lock); 343 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { 344 vseq->previous_delta = vseq->delta; 345 vseq->delta += diff; 346 vseq->init_seq = seq; 347 cp->flags |= flag; 348 } 349 spin_unlock_bh(&cp->lock); 350 } 351 352 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 353 struct ip_vs_app *app, 354 struct ip_vs_iphdr *ipvsh) 355 { 356 int diff; 357 const unsigned int tcp_offset = ip_hdrlen(skb); 358 struct tcphdr *th; 359 __u32 seq; 360 361 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 362 return 0; 363 364 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 365 366 /* 367 * Remember seq number in case this pkt gets resized 368 */ 369 seq = ntohl(th->seq); 370 371 /* 372 * Fix seq stuff if flagged as so. 373 */ 374 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 375 vs_fix_seq(&cp->out_seq, th); 376 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 377 vs_fix_ack_seq(&cp->in_seq, th); 378 379 /* 380 * Call private output hook function 381 */ 382 if (app->pkt_out == NULL) 383 return 1; 384 385 if (!app->pkt_out(app, cp, skb, &diff, ipvsh)) 386 return 0; 387 388 /* 389 * Update ip_vs seq stuff if len has changed. 390 */ 391 if (diff != 0) 392 vs_seq_update(cp, &cp->out_seq, 393 IP_VS_CONN_F_OUT_SEQ, seq, diff); 394 395 return 1; 396 } 397 398 /* 399 * Output pkt hook. Will call bound ip_vs_app specific function 400 * called by ipvs packet handler, assumes previously checked cp!=NULL 401 * returns false if it can't handle packet (oom) 402 */ 403 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 404 struct ip_vs_iphdr *ipvsh) 405 { 406 struct ip_vs_app *app; 407 408 /* 409 * check if application module is bound to 410 * this ip_vs_conn. 411 */ 412 if ((app = cp->app) == NULL) 413 return 1; 414 415 /* TCP is complicated */ 416 if (cp->protocol == IPPROTO_TCP) 417 return app_tcp_pkt_out(cp, skb, app, ipvsh); 418 419 /* 420 * Call private output hook function 421 */ 422 if (app->pkt_out == NULL) 423 return 1; 424 425 return app->pkt_out(app, cp, skb, NULL, ipvsh); 426 } 427 428 429 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 430 struct ip_vs_app *app, 431 struct ip_vs_iphdr *ipvsh) 432 { 433 int diff; 434 const unsigned int tcp_offset = ip_hdrlen(skb); 435 struct tcphdr *th; 436 __u32 seq; 437 438 if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) 439 return 0; 440 441 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 442 443 /* 444 * Remember seq number in case this pkt gets resized 445 */ 446 seq = ntohl(th->seq); 447 448 /* 449 * Fix seq stuff if flagged as so. 450 */ 451 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 452 vs_fix_seq(&cp->in_seq, th); 453 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 454 vs_fix_ack_seq(&cp->out_seq, th); 455 456 /* 457 * Call private input hook function 458 */ 459 if (app->pkt_in == NULL) 460 return 1; 461 462 if (!app->pkt_in(app, cp, skb, &diff, ipvsh)) 463 return 0; 464 465 /* 466 * Update ip_vs seq stuff if len has changed. 467 */ 468 if (diff != 0) 469 vs_seq_update(cp, &cp->in_seq, 470 IP_VS_CONN_F_IN_SEQ, seq, diff); 471 472 return 1; 473 } 474 475 /* 476 * Input pkt hook. Will call bound ip_vs_app specific function 477 * called by ipvs packet handler, assumes previously checked cp!=NULL. 478 * returns false if can't handle packet (oom). 479 */ 480 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 481 struct ip_vs_iphdr *ipvsh) 482 { 483 struct ip_vs_app *app; 484 485 /* 486 * check if application module is bound to 487 * this ip_vs_conn. 488 */ 489 if ((app = cp->app) == NULL) 490 return 1; 491 492 /* TCP is complicated */ 493 if (cp->protocol == IPPROTO_TCP) 494 return app_tcp_pkt_in(cp, skb, app, ipvsh); 495 496 /* 497 * Call private input hook function 498 */ 499 if (app->pkt_in == NULL) 500 return 1; 501 502 return app->pkt_in(app, cp, skb, NULL, ipvsh); 503 } 504 505 506 #ifdef CONFIG_PROC_FS 507 /* 508 * /proc/net/ip_vs_app entry function 509 */ 510 511 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) 512 { 513 struct ip_vs_app *app, *inc; 514 515 list_for_each_entry(app, &ipvs->app_list, a_list) { 516 list_for_each_entry(inc, &app->incs_list, a_list) { 517 if (pos-- == 0) 518 return inc; 519 } 520 } 521 return NULL; 522 523 } 524 525 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 526 { 527 struct net *net = seq_file_net(seq); 528 struct netns_ipvs *ipvs = net_ipvs(net); 529 530 mutex_lock(&__ip_vs_app_mutex); 531 532 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; 533 } 534 535 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 536 { 537 struct ip_vs_app *inc, *app; 538 struct list_head *e; 539 struct net *net = seq_file_net(seq); 540 struct netns_ipvs *ipvs = net_ipvs(net); 541 542 ++*pos; 543 if (v == SEQ_START_TOKEN) 544 return ip_vs_app_idx(ipvs, 0); 545 546 inc = v; 547 app = inc->app; 548 549 if ((e = inc->a_list.next) != &app->incs_list) 550 return list_entry(e, struct ip_vs_app, a_list); 551 552 /* go on to next application */ 553 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { 554 app = list_entry(e, struct ip_vs_app, a_list); 555 list_for_each_entry(inc, &app->incs_list, a_list) { 556 return inc; 557 } 558 } 559 return NULL; 560 } 561 562 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 563 { 564 mutex_unlock(&__ip_vs_app_mutex); 565 } 566 567 static int ip_vs_app_seq_show(struct seq_file *seq, void *v) 568 { 569 if (v == SEQ_START_TOKEN) 570 seq_puts(seq, "prot port usecnt name\n"); 571 else { 572 const struct ip_vs_app *inc = v; 573 574 seq_printf(seq, "%-3s %-7u %-6d %-17s\n", 575 ip_vs_proto_name(inc->protocol), 576 ntohs(inc->port), 577 atomic_read(&inc->usecnt), 578 inc->name); 579 } 580 return 0; 581 } 582 583 static const struct seq_operations ip_vs_app_seq_ops = { 584 .start = ip_vs_app_seq_start, 585 .next = ip_vs_app_seq_next, 586 .stop = ip_vs_app_seq_stop, 587 .show = ip_vs_app_seq_show, 588 }; 589 #endif 590 591 int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) 592 { 593 INIT_LIST_HEAD(&ipvs->app_list); 594 proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, 595 sizeof(struct seq_net_private)); 596 return 0; 597 } 598 599 void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) 600 { 601 unregister_ip_vs_app(ipvs, NULL /* all */); 602 remove_proc_entry("ip_vs_app", ipvs->net->proc_net); 603 } 604