1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Handle firewalling 4 * Linux ethernet bridge 5 * 6 * Authors: 7 * Lennert Buytenhek <buytenh@gnu.org> 8 * Bart De Schuymer <bdschuym@pandora.be> 9 * 10 * Lennert dedicates this file to Kerstin Wurdinger. 11 */ 12 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/slab.h> 16 #include <linux/ip.h> 17 #include <linux/netdevice.h> 18 #include <linux/skbuff.h> 19 #include <linux/if_arp.h> 20 #include <linux/if_ether.h> 21 #include <linux/if_vlan.h> 22 #include <linux/if_pppox.h> 23 #include <linux/ppp_defs.h> 24 #include <linux/netfilter_bridge.h> 25 #include <uapi/linux/netfilter_bridge.h> 26 #include <linux/netfilter_ipv4.h> 27 #include <linux/netfilter_ipv6.h> 28 #include <linux/netfilter_arp.h> 29 #include <linux/in_route.h> 30 #include <linux/rculist.h> 31 #include <linux/inetdevice.h> 32 33 #include <net/ip.h> 34 #include <net/ipv6.h> 35 #include <net/addrconf.h> 36 #include <net/route.h> 37 #include <net/netfilter/br_netfilter.h> 38 #include <net/netns/generic.h> 39 40 #include <linux/uaccess.h> 41 #include "br_private.h" 42 #ifdef CONFIG_SYSCTL 43 #include <linux/sysctl.h> 44 #endif 45 46 static unsigned int brnf_net_id __read_mostly; 47 48 struct brnf_net { 49 bool enabled; 50 51 #ifdef CONFIG_SYSCTL 52 struct ctl_table_header *ctl_hdr; 53 #endif 54 55 /* default value is 1 */ 56 int call_iptables; 57 int call_ip6tables; 58 int call_arptables; 59 60 /* default value is 0 */ 61 int filter_vlan_tagged; 62 int filter_pppoe_tagged; 63 int pass_vlan_indev; 64 }; 65 66 #define IS_IP(skb) \ 67 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) 68 69 #define IS_IPV6(skb) \ 70 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) 71 72 #define IS_ARP(skb) \ 73 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) 74 75 static inline __be16 vlan_proto(const struct sk_buff *skb) 76 { 77 if (skb_vlan_tag_present(skb)) 78 return skb->protocol; 79 else if (skb->protocol == htons(ETH_P_8021Q)) 80 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 81 else 82 return 0; 83 } 84 85 static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net) 86 { 87 struct brnf_net *brnet = net_generic(net, brnf_net_id); 88 89 return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged; 90 } 91 92 static inline bool is_vlan_ipv6(const struct sk_buff *skb, 93 const struct net *net) 94 { 95 struct brnf_net *brnet = net_generic(net, brnf_net_id); 96 97 return vlan_proto(skb) == htons(ETH_P_IPV6) && 98 brnet->filter_vlan_tagged; 99 } 100 101 static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net) 102 { 103 struct brnf_net *brnet = net_generic(net, brnf_net_id); 104 105 return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged; 106 } 107 108 static inline __be16 pppoe_proto(const struct sk_buff *skb) 109 { 110 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 111 sizeof(struct pppoe_hdr))); 112 } 113 114 static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net) 115 { 116 struct brnf_net *brnet = net_generic(net, brnf_net_id); 117 118 return skb->protocol == htons(ETH_P_PPP_SES) && 119 pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged; 120 } 121 122 static inline bool is_pppoe_ipv6(const struct sk_buff *skb, 123 const struct net *net) 124 { 125 struct brnf_net *brnet = net_generic(net, brnf_net_id); 126 127 return skb->protocol == htons(ETH_P_PPP_SES) && 128 pppoe_proto(skb) == htons(PPP_IPV6) && 129 brnet->filter_pppoe_tagged; 130 } 131 132 /* largest possible L2 header, see br_nf_dev_queue_xmit() */ 133 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) 134 135 struct brnf_frag_data { 136 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; 137 u8 encap_size; 138 u8 size; 139 u16 vlan_tci; 140 __be16 vlan_proto; 141 }; 142 143 static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); 144 145 static void nf_bridge_info_free(struct sk_buff *skb) 146 { 147 skb_ext_del(skb, SKB_EXT_BRIDGE_NF); 148 } 149 150 static inline struct net_device *bridge_parent(const struct net_device *dev) 151 { 152 struct net_bridge_port *port; 153 154 port = br_port_get_rcu(dev); 155 return port ? port->br->dev : NULL; 156 } 157 158 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) 159 { 160 return skb_ext_add(skb, SKB_EXT_BRIDGE_NF); 161 } 162 163 unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) 164 { 165 switch (skb->protocol) { 166 case __cpu_to_be16(ETH_P_8021Q): 167 return VLAN_HLEN; 168 case __cpu_to_be16(ETH_P_PPP_SES): 169 return PPPOE_SES_HLEN; 170 default: 171 return 0; 172 } 173 } 174 175 static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) 176 { 177 unsigned int len = nf_bridge_encap_header_len(skb); 178 179 skb_pull(skb, len); 180 skb->network_header += len; 181 } 182 183 static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) 184 { 185 unsigned int len = nf_bridge_encap_header_len(skb); 186 187 skb_pull_rcsum(skb, len); 188 skb->network_header += len; 189 } 190 191 /* When handing a packet over to the IP layer 192 * check whether we have a skb that is in the 193 * expected format 194 */ 195 196 static int br_validate_ipv4(struct net *net, struct sk_buff *skb) 197 { 198 const struct iphdr *iph; 199 u32 len; 200 201 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 202 goto inhdr_error; 203 204 iph = ip_hdr(skb); 205 206 /* Basic sanity checks */ 207 if (iph->ihl < 5 || iph->version != 4) 208 goto inhdr_error; 209 210 if (!pskb_may_pull(skb, iph->ihl*4)) 211 goto inhdr_error; 212 213 iph = ip_hdr(skb); 214 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 215 goto csum_error; 216 217 len = skb_ip_totlen(skb); 218 if (skb->len < len) { 219 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); 220 goto drop; 221 } else if (len < (iph->ihl*4)) 222 goto inhdr_error; 223 224 if (pskb_trim_rcsum(skb, len)) { 225 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 226 goto drop; 227 } 228 229 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 230 /* We should really parse IP options here but until 231 * somebody who actually uses IP options complains to 232 * us we'll just silently ignore the options because 233 * we're lazy! 234 */ 235 return 0; 236 237 csum_error: 238 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); 239 inhdr_error: 240 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); 241 drop: 242 return -1; 243 } 244 245 void nf_bridge_update_protocol(struct sk_buff *skb) 246 { 247 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 248 249 switch (nf_bridge->orig_proto) { 250 case BRNF_PROTO_8021Q: 251 skb->protocol = htons(ETH_P_8021Q); 252 break; 253 case BRNF_PROTO_PPPOE: 254 skb->protocol = htons(ETH_P_PPP_SES); 255 break; 256 case BRNF_PROTO_UNCHANGED: 257 break; 258 } 259 } 260 261 /* Obtain the correct destination MAC address, while preserving the original 262 * source MAC address. If we already know this address, we just copy it. If we 263 * don't, we use the neighbour framework to find out. In both cases, we make 264 * sure that br_handle_frame_finish() is called afterwards. 265 */ 266 int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) 267 { 268 struct neighbour *neigh; 269 struct dst_entry *dst; 270 271 skb->dev = bridge_parent(skb->dev); 272 if (!skb->dev) 273 goto free_skb; 274 dst = skb_dst(skb); 275 neigh = dst_neigh_lookup_skb(dst, skb); 276 if (neigh) { 277 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 278 int ret; 279 280 if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) { 281 neigh_hh_bridge(&neigh->hh, skb); 282 skb->dev = nf_bridge->physindev; 283 ret = br_handle_frame_finish(net, sk, skb); 284 } else { 285 /* the neighbour function below overwrites the complete 286 * MAC header, so we save the Ethernet source address and 287 * protocol number. 288 */ 289 skb_copy_from_linear_data_offset(skb, 290 -(ETH_HLEN-ETH_ALEN), 291 nf_bridge->neigh_header, 292 ETH_HLEN-ETH_ALEN); 293 /* tell br_dev_xmit to continue with forwarding */ 294 nf_bridge->bridged_dnat = 1; 295 /* FIXME Need to refragment */ 296 ret = neigh->output(neigh, skb); 297 } 298 neigh_release(neigh); 299 return ret; 300 } 301 free_skb: 302 kfree_skb(skb); 303 return 0; 304 } 305 306 static inline bool 307 br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, 308 const struct nf_bridge_info *nf_bridge) 309 { 310 return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; 311 } 312 313 /* This requires some explaining. If DNAT has taken place, 314 * we will need to fix up the destination Ethernet address. 315 * This is also true when SNAT takes place (for the reply direction). 316 * 317 * There are two cases to consider: 318 * 1. The packet was DNAT'ed to a device in the same bridge 319 * port group as it was received on. We can still bridge 320 * the packet. 321 * 2. The packet was DNAT'ed to a different device, either 322 * a non-bridged device or another bridge port group. 323 * The packet will need to be routed. 324 * 325 * The correct way of distinguishing between these two cases is to 326 * call ip_route_input() and to look at skb->dst->dev, which is 327 * changed to the destination device if ip_route_input() succeeds. 328 * 329 * Let's first consider the case that ip_route_input() succeeds: 330 * 331 * If the output device equals the logical bridge device the packet 332 * came in on, we can consider this bridging. The corresponding MAC 333 * address will be obtained in br_nf_pre_routing_finish_bridge. 334 * Otherwise, the packet is considered to be routed and we just 335 * change the destination MAC address so that the packet will 336 * later be passed up to the IP stack to be routed. For a redirected 337 * packet, ip_route_input() will give back the localhost as output device, 338 * which differs from the bridge device. 339 * 340 * Let's now consider the case that ip_route_input() fails: 341 * 342 * This can be because the destination address is martian, in which case 343 * the packet will be dropped. 344 * If IP forwarding is disabled, ip_route_input() will fail, while 345 * ip_route_output_key() can return success. The source 346 * address for ip_route_output_key() is set to zero, so ip_route_output_key() 347 * thinks we're handling a locally generated packet and won't care 348 * if IP forwarding is enabled. If the output device equals the logical bridge 349 * device, we proceed as if ip_route_input() succeeded. If it differs from the 350 * logical bridge port or if ip_route_output_key() fails we drop the packet. 351 */ 352 static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 353 { 354 struct net_device *dev = skb->dev; 355 struct iphdr *iph = ip_hdr(skb); 356 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 357 struct rtable *rt; 358 int err; 359 360 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 361 362 if (nf_bridge->pkt_otherhost) { 363 skb->pkt_type = PACKET_OTHERHOST; 364 nf_bridge->pkt_otherhost = false; 365 } 366 nf_bridge->in_prerouting = 0; 367 if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { 368 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 369 struct in_device *in_dev = __in_dev_get_rcu(dev); 370 371 /* If err equals -EHOSTUNREACH the error is due to a 372 * martian destination or due to the fact that 373 * forwarding is disabled. For most martian packets, 374 * ip_route_output_key() will fail. It won't fail for 2 types of 375 * martian destinations: loopback destinations and destination 376 * 0.0.0.0. In both cases the packet will be dropped because the 377 * destination is the loopback device and not the bridge. */ 378 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 379 goto free_skb; 380 381 rt = ip_route_output(net, iph->daddr, 0, 382 RT_TOS(iph->tos), 0); 383 if (!IS_ERR(rt)) { 384 /* - Bridged-and-DNAT'ed traffic doesn't 385 * require ip_forwarding. */ 386 if (rt->dst.dev == dev) { 387 skb_dst_drop(skb); 388 skb_dst_set(skb, &rt->dst); 389 goto bridged_dnat; 390 } 391 ip_rt_put(rt); 392 } 393 free_skb: 394 kfree_skb(skb); 395 return 0; 396 } else { 397 if (skb_dst(skb)->dev == dev) { 398 bridged_dnat: 399 skb->dev = nf_bridge->physindev; 400 nf_bridge_update_protocol(skb); 401 nf_bridge_push_encap_header(skb); 402 br_nf_hook_thresh(NF_BR_PRE_ROUTING, 403 net, sk, skb, skb->dev, 404 NULL, 405 br_nf_pre_routing_finish_bridge); 406 return 0; 407 } 408 ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); 409 skb->pkt_type = PACKET_HOST; 410 } 411 } else { 412 rt = bridge_parent_rtable(nf_bridge->physindev); 413 if (!rt) { 414 kfree_skb(skb); 415 return 0; 416 } 417 skb_dst_drop(skb); 418 skb_dst_set_noref(skb, &rt->dst); 419 } 420 421 skb->dev = nf_bridge->physindev; 422 nf_bridge_update_protocol(skb); 423 nf_bridge_push_encap_header(skb); 424 br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, 425 br_handle_frame_finish); 426 return 0; 427 } 428 429 static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, 430 const struct net_device *dev, 431 const struct net *net) 432 { 433 struct net_device *vlan, *br; 434 struct brnf_net *brnet = net_generic(net, brnf_net_id); 435 436 br = bridge_parent(dev); 437 438 if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) 439 return br; 440 441 vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, 442 skb_vlan_tag_get(skb) & VLAN_VID_MASK); 443 444 return vlan ? vlan : br; 445 } 446 447 /* Some common code for IPv4/IPv6 */ 448 struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net) 449 { 450 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 451 452 if (skb->pkt_type == PACKET_OTHERHOST) { 453 skb->pkt_type = PACKET_HOST; 454 nf_bridge->pkt_otherhost = true; 455 } 456 457 nf_bridge->in_prerouting = 1; 458 nf_bridge->physindev = skb->dev; 459 skb->dev = brnf_get_logical_dev(skb, skb->dev, net); 460 461 if (skb->protocol == htons(ETH_P_8021Q)) 462 nf_bridge->orig_proto = BRNF_PROTO_8021Q; 463 else if (skb->protocol == htons(ETH_P_PPP_SES)) 464 nf_bridge->orig_proto = BRNF_PROTO_PPPOE; 465 466 /* Must drop socket now because of tproxy. */ 467 skb_orphan(skb); 468 return skb->dev; 469 } 470 471 /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. 472 * Replicate the checks that IPv4 does on packet reception. 473 * Set skb->dev to the bridge device (i.e. parent of the 474 * receiving device) to make netfilter happy, the REDIRECT 475 * target in particular. Save the original destination IP 476 * address to be able to detect DNAT afterwards. */ 477 static unsigned int br_nf_pre_routing(void *priv, 478 struct sk_buff *skb, 479 const struct nf_hook_state *state) 480 { 481 struct nf_bridge_info *nf_bridge; 482 struct net_bridge_port *p; 483 struct net_bridge *br; 484 __u32 len = nf_bridge_encap_header_len(skb); 485 struct brnf_net *brnet; 486 487 if (unlikely(!pskb_may_pull(skb, len))) 488 return NF_DROP; 489 490 p = br_port_get_rcu(state->in); 491 if (p == NULL) 492 return NF_DROP; 493 br = p->br; 494 495 brnet = net_generic(state->net, brnf_net_id); 496 if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 497 is_pppoe_ipv6(skb, state->net)) { 498 if (!brnet->call_ip6tables && 499 !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) 500 return NF_ACCEPT; 501 if (!ipv6_mod_enabled()) { 502 pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); 503 return NF_DROP; 504 } 505 506 nf_bridge_pull_encap_header_rcsum(skb); 507 return br_nf_pre_routing_ipv6(priv, skb, state); 508 } 509 510 if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) 511 return NF_ACCEPT; 512 513 if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) && 514 !is_pppoe_ip(skb, state->net)) 515 return NF_ACCEPT; 516 517 nf_bridge_pull_encap_header_rcsum(skb); 518 519 if (br_validate_ipv4(state->net, skb)) 520 return NF_DROP; 521 522 if (!nf_bridge_alloc(skb)) 523 return NF_DROP; 524 if (!setup_pre_routing(skb, state->net)) 525 return NF_DROP; 526 527 nf_bridge = nf_bridge_info_get(skb); 528 nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; 529 530 skb->protocol = htons(ETH_P_IP); 531 skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; 532 533 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, 534 skb->dev, NULL, 535 br_nf_pre_routing_finish); 536 537 return NF_STOLEN; 538 } 539 540 541 /* PF_BRIDGE/FORWARD *************************************************/ 542 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 543 { 544 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 545 struct net_device *in; 546 547 if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) { 548 549 if (skb->protocol == htons(ETH_P_IP)) 550 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; 551 552 if (skb->protocol == htons(ETH_P_IPV6)) 553 nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; 554 555 in = nf_bridge->physindev; 556 if (nf_bridge->pkt_otherhost) { 557 skb->pkt_type = PACKET_OTHERHOST; 558 nf_bridge->pkt_otherhost = false; 559 } 560 nf_bridge_update_protocol(skb); 561 } else { 562 in = *((struct net_device **)(skb->cb)); 563 } 564 nf_bridge_push_encap_header(skb); 565 566 br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, 567 br_forward_finish); 568 return 0; 569 } 570 571 572 /* This is the 'purely bridged' case. For IP, we pass the packet to 573 * netfilter with indev and outdev set to the bridge device, 574 * but we are still able to filter on the 'real' indev/outdev 575 * because of the physdev module. For ARP, indev and outdev are the 576 * bridge ports. */ 577 static unsigned int br_nf_forward_ip(void *priv, 578 struct sk_buff *skb, 579 const struct nf_hook_state *state) 580 { 581 struct nf_bridge_info *nf_bridge; 582 struct net_device *parent; 583 u_int8_t pf; 584 585 nf_bridge = nf_bridge_info_get(skb); 586 if (!nf_bridge) 587 return NF_ACCEPT; 588 589 /* Need exclusive nf_bridge_info since we might have multiple 590 * different physoutdevs. */ 591 if (!nf_bridge_unshare(skb)) 592 return NF_DROP; 593 594 nf_bridge = nf_bridge_info_get(skb); 595 if (!nf_bridge) 596 return NF_DROP; 597 598 parent = bridge_parent(state->out); 599 if (!parent) 600 return NF_DROP; 601 602 if (IS_IP(skb) || is_vlan_ip(skb, state->net) || 603 is_pppoe_ip(skb, state->net)) 604 pf = NFPROTO_IPV4; 605 else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 606 is_pppoe_ipv6(skb, state->net)) 607 pf = NFPROTO_IPV6; 608 else 609 return NF_ACCEPT; 610 611 nf_bridge_pull_encap_header(skb); 612 613 if (skb->pkt_type == PACKET_OTHERHOST) { 614 skb->pkt_type = PACKET_HOST; 615 nf_bridge->pkt_otherhost = true; 616 } 617 618 if (pf == NFPROTO_IPV4) { 619 if (br_validate_ipv4(state->net, skb)) 620 return NF_DROP; 621 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 622 } 623 624 if (pf == NFPROTO_IPV6) { 625 if (br_validate_ipv6(state->net, skb)) 626 return NF_DROP; 627 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 628 } 629 630 nf_bridge->physoutdev = skb->dev; 631 if (pf == NFPROTO_IPV4) 632 skb->protocol = htons(ETH_P_IP); 633 else 634 skb->protocol = htons(ETH_P_IPV6); 635 636 NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, 637 brnf_get_logical_dev(skb, state->in, state->net), 638 parent, br_nf_forward_finish); 639 640 return NF_STOLEN; 641 } 642 643 static unsigned int br_nf_forward_arp(void *priv, 644 struct sk_buff *skb, 645 const struct nf_hook_state *state) 646 { 647 struct net_bridge_port *p; 648 struct net_bridge *br; 649 struct net_device **d = (struct net_device **)(skb->cb); 650 struct brnf_net *brnet; 651 652 p = br_port_get_rcu(state->out); 653 if (p == NULL) 654 return NF_ACCEPT; 655 br = p->br; 656 657 brnet = net_generic(state->net, brnf_net_id); 658 if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) 659 return NF_ACCEPT; 660 661 if (!IS_ARP(skb)) { 662 if (!is_vlan_arp(skb, state->net)) 663 return NF_ACCEPT; 664 nf_bridge_pull_encap_header(skb); 665 } 666 667 if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) 668 return NF_DROP; 669 670 if (arp_hdr(skb)->ar_pln != 4) { 671 if (is_vlan_arp(skb, state->net)) 672 nf_bridge_push_encap_header(skb); 673 return NF_ACCEPT; 674 } 675 *d = state->in; 676 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, 677 state->in, state->out, br_nf_forward_finish); 678 679 return NF_STOLEN; 680 } 681 682 static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 683 { 684 struct brnf_frag_data *data; 685 int err; 686 687 data = this_cpu_ptr(&brnf_frag_data_storage); 688 err = skb_cow_head(skb, data->size); 689 690 if (err) { 691 kfree_skb(skb); 692 return 0; 693 } 694 695 if (data->vlan_proto) 696 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); 697 698 skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); 699 __skb_push(skb, data->encap_size); 700 701 nf_bridge_info_free(skb); 702 return br_dev_queue_push_xmit(net, sk, skb); 703 } 704 705 static int 706 br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 707 int (*output)(struct net *, struct sock *, struct sk_buff *)) 708 { 709 unsigned int mtu = ip_skb_dst_mtu(sk, skb); 710 struct iphdr *iph = ip_hdr(skb); 711 712 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || 713 (IPCB(skb)->frag_max_size && 714 IPCB(skb)->frag_max_size > mtu))) { 715 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 716 kfree_skb(skb); 717 return -EMSGSIZE; 718 } 719 720 return ip_do_fragment(net, sk, skb, output); 721 } 722 723 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) 724 { 725 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 726 727 if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE) 728 return PPPOE_SES_HLEN; 729 return 0; 730 } 731 732 static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 733 { 734 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 735 unsigned int mtu, mtu_reserved; 736 737 mtu_reserved = nf_bridge_mtu_reduction(skb); 738 mtu = skb->dev->mtu; 739 740 if (nf_bridge->pkt_otherhost) { 741 skb->pkt_type = PACKET_OTHERHOST; 742 nf_bridge->pkt_otherhost = false; 743 } 744 745 if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) 746 mtu = nf_bridge->frag_max_size; 747 748 nf_bridge_update_protocol(skb); 749 nf_bridge_push_encap_header(skb); 750 751 if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { 752 nf_bridge_info_free(skb); 753 return br_dev_queue_push_xmit(net, sk, skb); 754 } 755 756 /* This is wrong! We should preserve the original fragment 757 * boundaries by preserving frag_list rather than refragmenting. 758 */ 759 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) && 760 skb->protocol == htons(ETH_P_IP)) { 761 struct brnf_frag_data *data; 762 763 if (br_validate_ipv4(net, skb)) 764 goto drop; 765 766 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; 767 768 data = this_cpu_ptr(&brnf_frag_data_storage); 769 770 if (skb_vlan_tag_present(skb)) { 771 data->vlan_tci = skb->vlan_tci; 772 data->vlan_proto = skb->vlan_proto; 773 } else { 774 data->vlan_proto = 0; 775 } 776 777 data->encap_size = nf_bridge_encap_header_len(skb); 778 data->size = ETH_HLEN + data->encap_size; 779 780 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 781 data->size); 782 783 return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); 784 } 785 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && 786 skb->protocol == htons(ETH_P_IPV6)) { 787 const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); 788 struct brnf_frag_data *data; 789 790 if (br_validate_ipv6(net, skb)) 791 goto drop; 792 793 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; 794 795 data = this_cpu_ptr(&brnf_frag_data_storage); 796 data->encap_size = nf_bridge_encap_header_len(skb); 797 data->size = ETH_HLEN + data->encap_size; 798 799 skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 800 data->size); 801 802 if (v6ops) 803 return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); 804 805 kfree_skb(skb); 806 return -EMSGSIZE; 807 } 808 nf_bridge_info_free(skb); 809 return br_dev_queue_push_xmit(net, sk, skb); 810 drop: 811 kfree_skb(skb); 812 return 0; 813 } 814 815 /* PF_BRIDGE/POST_ROUTING ********************************************/ 816 static unsigned int br_nf_post_routing(void *priv, 817 struct sk_buff *skb, 818 const struct nf_hook_state *state) 819 { 820 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 821 struct net_device *realoutdev = bridge_parent(skb->dev); 822 u_int8_t pf; 823 824 /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in 825 * on a bridge, but was delivered locally and is now being routed: 826 * 827 * POST_ROUTING was already invoked from the ip stack. 828 */ 829 if (!nf_bridge || !nf_bridge->physoutdev) 830 return NF_ACCEPT; 831 832 if (!realoutdev) 833 return NF_DROP; 834 835 if (IS_IP(skb) || is_vlan_ip(skb, state->net) || 836 is_pppoe_ip(skb, state->net)) 837 pf = NFPROTO_IPV4; 838 else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || 839 is_pppoe_ipv6(skb, state->net)) 840 pf = NFPROTO_IPV6; 841 else 842 return NF_ACCEPT; 843 844 if (skb->pkt_type == PACKET_OTHERHOST) { 845 skb->pkt_type = PACKET_HOST; 846 nf_bridge->pkt_otherhost = true; 847 } 848 849 nf_bridge_pull_encap_header(skb); 850 if (pf == NFPROTO_IPV4) 851 skb->protocol = htons(ETH_P_IP); 852 else 853 skb->protocol = htons(ETH_P_IPV6); 854 855 NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, 856 NULL, realoutdev, 857 br_nf_dev_queue_xmit); 858 859 return NF_STOLEN; 860 } 861 862 /* IP/SABOTAGE *****************************************************/ 863 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING 864 * for the second time. */ 865 static unsigned int ip_sabotage_in(void *priv, 866 struct sk_buff *skb, 867 const struct nf_hook_state *state) 868 { 869 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 870 871 if (nf_bridge && !nf_bridge->in_prerouting && 872 !netif_is_l3_master(skb->dev) && 873 !netif_is_l3_slave(skb->dev)) { 874 nf_bridge_info_free(skb); 875 state->okfn(state->net, state->sk, skb); 876 return NF_STOLEN; 877 } 878 879 return NF_ACCEPT; 880 } 881 882 /* This is called when br_netfilter has called into iptables/netfilter, 883 * and DNAT has taken place on a bridge-forwarded packet. 884 * 885 * neigh->output has created a new MAC header, with local br0 MAC 886 * as saddr. 887 * 888 * This restores the original MAC saddr of the bridged packet 889 * before invoking bridge forward logic to transmit the packet. 890 */ 891 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) 892 { 893 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 894 895 skb_pull(skb, ETH_HLEN); 896 nf_bridge->bridged_dnat = 0; 897 898 BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); 899 900 skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), 901 nf_bridge->neigh_header, 902 ETH_HLEN - ETH_ALEN); 903 skb->dev = nf_bridge->physindev; 904 905 nf_bridge->physoutdev = NULL; 906 br_handle_frame_finish(dev_net(skb->dev), NULL, skb); 907 } 908 909 static int br_nf_dev_xmit(struct sk_buff *skb) 910 { 911 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 912 913 if (nf_bridge && nf_bridge->bridged_dnat) { 914 br_nf_pre_routing_finish_bridge_slow(skb); 915 return 1; 916 } 917 return 0; 918 } 919 920 static const struct nf_br_ops br_ops = { 921 .br_dev_xmit_hook = br_nf_dev_xmit, 922 }; 923 924 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because 925 * br_dev_queue_push_xmit is called afterwards */ 926 static const struct nf_hook_ops br_nf_ops[] = { 927 { 928 .hook = br_nf_pre_routing, 929 .pf = NFPROTO_BRIDGE, 930 .hooknum = NF_BR_PRE_ROUTING, 931 .priority = NF_BR_PRI_BRNF, 932 }, 933 { 934 .hook = br_nf_forward_ip, 935 .pf = NFPROTO_BRIDGE, 936 .hooknum = NF_BR_FORWARD, 937 .priority = NF_BR_PRI_BRNF - 1, 938 }, 939 { 940 .hook = br_nf_forward_arp, 941 .pf = NFPROTO_BRIDGE, 942 .hooknum = NF_BR_FORWARD, 943 .priority = NF_BR_PRI_BRNF, 944 }, 945 { 946 .hook = br_nf_post_routing, 947 .pf = NFPROTO_BRIDGE, 948 .hooknum = NF_BR_POST_ROUTING, 949 .priority = NF_BR_PRI_LAST, 950 }, 951 { 952 .hook = ip_sabotage_in, 953 .pf = NFPROTO_IPV4, 954 .hooknum = NF_INET_PRE_ROUTING, 955 .priority = NF_IP_PRI_FIRST, 956 }, 957 { 958 .hook = ip_sabotage_in, 959 .pf = NFPROTO_IPV6, 960 .hooknum = NF_INET_PRE_ROUTING, 961 .priority = NF_IP6_PRI_FIRST, 962 }, 963 }; 964 965 static int brnf_device_event(struct notifier_block *unused, unsigned long event, 966 void *ptr) 967 { 968 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 969 struct brnf_net *brnet; 970 struct net *net; 971 int ret; 972 973 if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev)) 974 return NOTIFY_DONE; 975 976 ASSERT_RTNL(); 977 978 net = dev_net(dev); 979 brnet = net_generic(net, brnf_net_id); 980 if (brnet->enabled) 981 return NOTIFY_OK; 982 983 ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 984 if (ret) 985 return NOTIFY_BAD; 986 987 brnet->enabled = true; 988 return NOTIFY_OK; 989 } 990 991 static struct notifier_block brnf_notifier __read_mostly = { 992 .notifier_call = brnf_device_event, 993 }; 994 995 /* recursively invokes nf_hook_slow (again), skipping already-called 996 * hooks (< NF_BR_PRI_BRNF). 997 * 998 * Called with rcu read lock held. 999 */ 1000 int br_nf_hook_thresh(unsigned int hook, struct net *net, 1001 struct sock *sk, struct sk_buff *skb, 1002 struct net_device *indev, 1003 struct net_device *outdev, 1004 int (*okfn)(struct net *, struct sock *, 1005 struct sk_buff *)) 1006 { 1007 const struct nf_hook_entries *e; 1008 struct nf_hook_state state; 1009 struct nf_hook_ops **ops; 1010 unsigned int i; 1011 int ret; 1012 1013 e = rcu_dereference(net->nf.hooks_bridge[hook]); 1014 if (!e) 1015 return okfn(net, sk, skb); 1016 1017 ops = nf_hook_entries_get_hook_ops(e); 1018 for (i = 0; i < e->num_hook_entries; i++) { 1019 /* These hooks have already been called */ 1020 if (ops[i]->priority < NF_BR_PRI_BRNF) 1021 continue; 1022 1023 /* These hooks have not been called yet, run them. */ 1024 if (ops[i]->priority > NF_BR_PRI_BRNF) 1025 break; 1026 1027 /* take a closer look at NF_BR_PRI_BRNF. */ 1028 if (ops[i]->hook == br_nf_pre_routing) { 1029 /* This hook diverted the skb to this function, 1030 * hooks after this have not been run yet. 1031 */ 1032 i++; 1033 break; 1034 } 1035 } 1036 1037 nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, 1038 sk, net, okfn); 1039 1040 ret = nf_hook_slow(skb, &state, e, i); 1041 if (ret == 1) 1042 ret = okfn(net, sk, skb); 1043 1044 return ret; 1045 } 1046 1047 #ifdef CONFIG_SYSCTL 1048 static 1049 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, 1050 void *buffer, size_t *lenp, loff_t *ppos) 1051 { 1052 int ret; 1053 1054 ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1055 1056 if (write && *(int *)(ctl->data)) 1057 *(int *)(ctl->data) = 1; 1058 return ret; 1059 } 1060 1061 static struct ctl_table brnf_table[] = { 1062 { 1063 .procname = "bridge-nf-call-arptables", 1064 .maxlen = sizeof(int), 1065 .mode = 0644, 1066 .proc_handler = brnf_sysctl_call_tables, 1067 }, 1068 { 1069 .procname = "bridge-nf-call-iptables", 1070 .maxlen = sizeof(int), 1071 .mode = 0644, 1072 .proc_handler = brnf_sysctl_call_tables, 1073 }, 1074 { 1075 .procname = "bridge-nf-call-ip6tables", 1076 .maxlen = sizeof(int), 1077 .mode = 0644, 1078 .proc_handler = brnf_sysctl_call_tables, 1079 }, 1080 { 1081 .procname = "bridge-nf-filter-vlan-tagged", 1082 .maxlen = sizeof(int), 1083 .mode = 0644, 1084 .proc_handler = brnf_sysctl_call_tables, 1085 }, 1086 { 1087 .procname = "bridge-nf-filter-pppoe-tagged", 1088 .maxlen = sizeof(int), 1089 .mode = 0644, 1090 .proc_handler = brnf_sysctl_call_tables, 1091 }, 1092 { 1093 .procname = "bridge-nf-pass-vlan-input-dev", 1094 .maxlen = sizeof(int), 1095 .mode = 0644, 1096 .proc_handler = brnf_sysctl_call_tables, 1097 }, 1098 { } 1099 }; 1100 1101 static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) 1102 { 1103 brnf->call_iptables = 1; 1104 brnf->call_ip6tables = 1; 1105 brnf->call_arptables = 1; 1106 brnf->filter_vlan_tagged = 0; 1107 brnf->filter_pppoe_tagged = 0; 1108 brnf->pass_vlan_indev = 0; 1109 } 1110 1111 static int br_netfilter_sysctl_init_net(struct net *net) 1112 { 1113 struct ctl_table *table = brnf_table; 1114 struct brnf_net *brnet; 1115 1116 if (!net_eq(net, &init_net)) { 1117 table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL); 1118 if (!table) 1119 return -ENOMEM; 1120 } 1121 1122 brnet = net_generic(net, brnf_net_id); 1123 table[0].data = &brnet->call_arptables; 1124 table[1].data = &brnet->call_iptables; 1125 table[2].data = &brnet->call_ip6tables; 1126 table[3].data = &brnet->filter_vlan_tagged; 1127 table[4].data = &brnet->filter_pppoe_tagged; 1128 table[5].data = &brnet->pass_vlan_indev; 1129 1130 br_netfilter_sysctl_default(brnet); 1131 1132 brnet->ctl_hdr = register_net_sysctl(net, "net/bridge", table); 1133 if (!brnet->ctl_hdr) { 1134 if (!net_eq(net, &init_net)) 1135 kfree(table); 1136 1137 return -ENOMEM; 1138 } 1139 1140 return 0; 1141 } 1142 1143 static void br_netfilter_sysctl_exit_net(struct net *net, 1144 struct brnf_net *brnet) 1145 { 1146 struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg; 1147 1148 unregister_net_sysctl_table(brnet->ctl_hdr); 1149 if (!net_eq(net, &init_net)) 1150 kfree(table); 1151 } 1152 1153 static int __net_init brnf_init_net(struct net *net) 1154 { 1155 return br_netfilter_sysctl_init_net(net); 1156 } 1157 #endif 1158 1159 static void __net_exit brnf_exit_net(struct net *net) 1160 { 1161 struct brnf_net *brnet; 1162 1163 brnet = net_generic(net, brnf_net_id); 1164 if (brnet->enabled) { 1165 nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1166 brnet->enabled = false; 1167 } 1168 1169 #ifdef CONFIG_SYSCTL 1170 br_netfilter_sysctl_exit_net(net, brnet); 1171 #endif 1172 } 1173 1174 static struct pernet_operations brnf_net_ops __read_mostly = { 1175 #ifdef CONFIG_SYSCTL 1176 .init = brnf_init_net, 1177 #endif 1178 .exit = brnf_exit_net, 1179 .id = &brnf_net_id, 1180 .size = sizeof(struct brnf_net), 1181 }; 1182 1183 static int __init br_netfilter_init(void) 1184 { 1185 int ret; 1186 1187 ret = register_pernet_subsys(&brnf_net_ops); 1188 if (ret < 0) 1189 return ret; 1190 1191 ret = register_netdevice_notifier(&brnf_notifier); 1192 if (ret < 0) { 1193 unregister_pernet_subsys(&brnf_net_ops); 1194 return ret; 1195 } 1196 1197 RCU_INIT_POINTER(nf_br_ops, &br_ops); 1198 printk(KERN_NOTICE "Bridge firewalling registered\n"); 1199 return 0; 1200 } 1201 1202 static void __exit br_netfilter_fini(void) 1203 { 1204 RCU_INIT_POINTER(nf_br_ops, NULL); 1205 unregister_netdevice_notifier(&brnf_notifier); 1206 unregister_pernet_subsys(&brnf_net_ops); 1207 } 1208 1209 module_init(br_netfilter_init); 1210 module_exit(br_netfilter_fini); 1211 1212 MODULE_LICENSE("GPL"); 1213 MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); 1214 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); 1215 MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); 1216