1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #include <linux/types.h> 3 #include <linux/ip.h> 4 #include <linux/netfilter.h> 5 #include <linux/netfilter_ipv6.h> 6 #include <linux/netfilter_bridge.h> 7 #include <linux/module.h> 8 #include <linux/skbuff.h> 9 #include <linux/icmp.h> 10 #include <linux/sysctl.h> 11 #include <net/route.h> 12 #include <net/ip.h> 13 14 #include <net/netfilter/nf_conntrack.h> 15 #include <net/netfilter/nf_conntrack_core.h> 16 #include <net/netfilter/nf_conntrack_helper.h> 17 #include <net/netfilter/nf_conntrack_bridge.h> 18 19 #include <linux/netfilter/nf_tables.h> 20 #include <net/netfilter/nf_tables.h> 21 22 #include "../br_private.h" 23 24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff 25 * has been linearized or cloned. 26 */ 27 static int nf_br_ip_fragment(struct net *net, struct sock *sk, 28 struct sk_buff *skb, 29 struct nf_bridge_frag_data *data, 30 int (*output)(struct net *, struct sock *sk, 31 const struct nf_bridge_frag_data *data, 32 struct sk_buff *)) 33 { 34 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; 35 bool mono_delivery_time = skb->mono_delivery_time; 36 unsigned int hlen, ll_rs, mtu; 37 ktime_t tstamp = skb->tstamp; 38 struct ip_frag_state state; 39 struct iphdr *iph; 40 int err; 41 42 /* for offloaded checksums cleanup checksum before fragmentation */ 43 if (skb->ip_summed == CHECKSUM_PARTIAL && 44 (err = skb_checksum_help(skb))) 45 goto blackhole; 46 47 iph = ip_hdr(skb); 48 49 /* 50 * Setup starting values 51 */ 52 53 hlen = iph->ihl * 4; 54 frag_max_size -= hlen; 55 ll_rs = LL_RESERVED_SPACE(skb->dev); 56 mtu = skb->dev->mtu; 57 58 if (skb_has_frag_list(skb)) { 59 unsigned int first_len = skb_pagelen(skb); 60 struct ip_fraglist_iter iter; 61 struct sk_buff *frag; 62 63 if (first_len - hlen > mtu || 64 skb_headroom(skb) < ll_rs) 65 goto blackhole; 66 67 if (skb_cloned(skb)) 68 goto slow_path; 69 70 skb_walk_frags(skb, frag) { 71 if (frag->len > mtu || 72 skb_headroom(frag) < hlen + ll_rs) 73 goto blackhole; 74 75 if (skb_shared(frag)) 76 goto slow_path; 77 } 78 79 ip_fraglist_init(skb, iph, hlen, &iter); 80 81 for (;;) { 82 if (iter.frag) 83 ip_fraglist_prepare(skb, &iter); 84 85 skb_set_delivery_time(skb, tstamp, mono_delivery_time); 86 err = output(net, sk, data, skb); 87 if (err || !iter.frag) 88 break; 89 90 skb = ip_fraglist_next(&iter); 91 } 92 93 if (!err) 94 return 0; 95 96 kfree_skb_list(iter.frag); 97 98 return err; 99 } 100 slow_path: 101 /* This is a linearized skbuff, the original geometry is lost for us. 102 * This may also be a clone skbuff, we could preserve the geometry for 103 * the copies but probably not worth the effort. 104 */ 105 ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state); 106 107 while (state.left > 0) { 108 struct sk_buff *skb2; 109 110 skb2 = ip_frag_next(skb, &state); 111 if (IS_ERR(skb2)) { 112 err = PTR_ERR(skb2); 113 goto blackhole; 114 } 115 116 skb_set_delivery_time(skb2, tstamp, mono_delivery_time); 117 err = output(net, sk, data, skb2); 118 if (err) 119 goto blackhole; 120 } 121 consume_skb(skb); 122 return err; 123 124 blackhole: 125 kfree_skb(skb); 126 return 0; 127 } 128 129 /* ip_defrag() expects IPCB() in place. */ 130 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb, 131 size_t inet_skb_parm_size) 132 { 133 memcpy(cb, skb->cb, sizeof(*cb)); 134 memset(skb->cb, 0, inet_skb_parm_size); 135 } 136 137 static void br_skb_cb_restore(struct sk_buff *skb, 138 const struct br_input_skb_cb *cb, 139 u16 fragsz) 140 { 141 memcpy(skb->cb, cb, sizeof(*cb)); 142 BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz; 143 } 144 145 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, 146 const struct nf_hook_state *state) 147 { 148 u16 zone_id = NF_CT_DEFAULT_ZONE_ID; 149 enum ip_conntrack_info ctinfo; 150 struct br_input_skb_cb cb; 151 const struct nf_conn *ct; 152 int err; 153 154 if (!ip_is_fragment(ip_hdr(skb))) 155 return NF_ACCEPT; 156 157 ct = nf_ct_get(skb, &ctinfo); 158 if (ct) 159 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); 160 161 br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm)); 162 local_bh_disable(); 163 err = ip_defrag(state->net, skb, 164 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); 165 local_bh_enable(); 166 if (!err) { 167 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size); 168 skb->ignore_df = 1; 169 return NF_ACCEPT; 170 } 171 172 return NF_STOLEN; 173 } 174 175 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, 176 const struct nf_hook_state *state) 177 { 178 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 179 u16 zone_id = NF_CT_DEFAULT_ZONE_ID; 180 enum ip_conntrack_info ctinfo; 181 struct br_input_skb_cb cb; 182 const struct nf_conn *ct; 183 int err; 184 185 ct = nf_ct_get(skb, &ctinfo); 186 if (ct) 187 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); 188 189 br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); 190 191 err = nf_ct_frag6_gather(state->net, skb, 192 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); 193 /* queued */ 194 if (err == -EINPROGRESS) 195 return NF_STOLEN; 196 197 br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); 198 return err == 0 ? NF_ACCEPT : NF_DROP; 199 #else 200 return NF_ACCEPT; 201 #endif 202 } 203 204 static int nf_ct_br_ip_check(const struct sk_buff *skb) 205 { 206 const struct iphdr *iph; 207 int nhoff, len; 208 209 nhoff = skb_network_offset(skb); 210 iph = ip_hdr(skb); 211 if (iph->ihl < 5 || 212 iph->version != 4) 213 return -1; 214 215 len = skb_ip_totlen(skb); 216 if (skb->len < nhoff + len || 217 len < (iph->ihl * 4)) 218 return -1; 219 220 return 0; 221 } 222 223 static int nf_ct_br_ipv6_check(const struct sk_buff *skb) 224 { 225 const struct ipv6hdr *hdr; 226 int nhoff, len; 227 228 nhoff = skb_network_offset(skb); 229 hdr = ipv6_hdr(skb); 230 if (hdr->version != 6) 231 return -1; 232 233 len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; 234 if (skb->len < len) 235 return -1; 236 237 return 0; 238 } 239 240 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, 241 const struct nf_hook_state *state) 242 { 243 struct nf_hook_state bridge_state = *state; 244 enum ip_conntrack_info ctinfo; 245 struct nf_conn *ct; 246 u32 len; 247 int ret; 248 249 ct = nf_ct_get(skb, &ctinfo); 250 if ((ct && !nf_ct_is_template(ct)) || 251 ctinfo == IP_CT_UNTRACKED) 252 return NF_ACCEPT; 253 254 switch (skb->protocol) { 255 case htons(ETH_P_IP): 256 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 257 return NF_ACCEPT; 258 259 len = skb_ip_totlen(skb); 260 if (pskb_trim_rcsum(skb, len)) 261 return NF_ACCEPT; 262 263 if (nf_ct_br_ip_check(skb)) 264 return NF_ACCEPT; 265 266 bridge_state.pf = NFPROTO_IPV4; 267 ret = nf_ct_br_defrag4(skb, &bridge_state); 268 break; 269 case htons(ETH_P_IPV6): 270 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 271 return NF_ACCEPT; 272 273 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); 274 if (pskb_trim_rcsum(skb, len)) 275 return NF_ACCEPT; 276 277 if (nf_ct_br_ipv6_check(skb)) 278 return NF_ACCEPT; 279 280 bridge_state.pf = NFPROTO_IPV6; 281 ret = nf_ct_br_defrag6(skb, &bridge_state); 282 break; 283 default: 284 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 285 return NF_ACCEPT; 286 } 287 288 if (ret != NF_ACCEPT) 289 return ret; 290 291 return nf_conntrack_in(skb, &bridge_state); 292 } 293 294 static void nf_ct_bridge_frag_save(struct sk_buff *skb, 295 struct nf_bridge_frag_data *data) 296 { 297 if (skb_vlan_tag_present(skb)) { 298 data->vlan_present = true; 299 data->vlan_tci = skb->vlan_tci; 300 data->vlan_proto = skb->vlan_proto; 301 } else { 302 data->vlan_present = false; 303 } 304 skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); 305 } 306 307 static unsigned int 308 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, 309 int (*output)(struct net *, struct sock *sk, 310 const struct nf_bridge_frag_data *data, 311 struct sk_buff *)) 312 { 313 struct nf_bridge_frag_data data; 314 315 if (!BR_INPUT_SKB_CB(skb)->frag_max_size) 316 return NF_ACCEPT; 317 318 nf_ct_bridge_frag_save(skb, &data); 319 switch (skb->protocol) { 320 case htons(ETH_P_IP): 321 nf_br_ip_fragment(state->net, state->sk, skb, &data, output); 322 break; 323 case htons(ETH_P_IPV6): 324 nf_br_ip6_fragment(state->net, state->sk, skb, &data, output); 325 break; 326 default: 327 WARN_ON_ONCE(1); 328 return NF_DROP; 329 } 330 331 return NF_STOLEN; 332 } 333 334 /* Actually only slow path refragmentation needs this. */ 335 static int nf_ct_bridge_frag_restore(struct sk_buff *skb, 336 const struct nf_bridge_frag_data *data) 337 { 338 int err; 339 340 err = skb_cow_head(skb, ETH_HLEN); 341 if (err) { 342 kfree_skb(skb); 343 return -ENOMEM; 344 } 345 if (data->vlan_present) 346 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); 347 else if (skb_vlan_tag_present(skb)) 348 __vlan_hwaccel_clear_tag(skb); 349 350 skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); 351 skb_reset_mac_header(skb); 352 353 return 0; 354 } 355 356 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, 357 const struct nf_bridge_frag_data *data, 358 struct sk_buff *skb) 359 { 360 int err; 361 362 err = nf_ct_bridge_frag_restore(skb, data); 363 if (err < 0) 364 return err; 365 366 return br_dev_queue_push_xmit(net, sk, skb); 367 } 368 369 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb, 370 const struct nf_hook_state *state) 371 { 372 int ret; 373 374 ret = nf_confirm(priv, skb, state); 375 if (ret != NF_ACCEPT) 376 return ret; 377 378 return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post); 379 } 380 381 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { 382 { 383 .hook = nf_ct_bridge_pre, 384 .pf = NFPROTO_BRIDGE, 385 .hooknum = NF_BR_PRE_ROUTING, 386 .priority = NF_IP_PRI_CONNTRACK, 387 }, 388 { 389 .hook = nf_ct_bridge_post, 390 .pf = NFPROTO_BRIDGE, 391 .hooknum = NF_BR_POST_ROUTING, 392 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 393 }, 394 }; 395 396 static struct nf_ct_bridge_info bridge_info = { 397 .ops = nf_ct_bridge_hook_ops, 398 .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops), 399 .me = THIS_MODULE, 400 }; 401 402 static int __init nf_conntrack_l3proto_bridge_init(void) 403 { 404 nf_ct_bridge_register(&bridge_info); 405 406 return 0; 407 } 408 409 static void __exit nf_conntrack_l3proto_bridge_fini(void) 410 { 411 nf_ct_bridge_unregister(&bridge_info); 412 } 413 414 module_init(nf_conntrack_l3proto_bridge_init); 415 module_exit(nf_conntrack_l3proto_bridge_fini); 416 417 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); 418 MODULE_LICENSE("GPL"); 419