1 /* $NetBSD: npf_inet.c,v 1.6 2011/01/18 20:33:45 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Various procotol related helper routines. 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.6 2011/01/18 20:33:45 rmind Exp $"); 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 42 #include <net/pfil.h> 43 #include <net/if.h> 44 #include <net/ethertypes.h> 45 #include <net/if_ether.h> 46 47 #include <netinet/in_systm.h> 48 #include <netinet/in.h> 49 #include <netinet/in_var.h> 50 #include <netinet/ip.h> 51 #include <netinet/ip6.h> 52 #include <netinet/tcp.h> 53 #include <netinet/udp.h> 54 #include <netinet/ip_icmp.h> 55 56 #include "npf_impl.h" 57 58 /* 59 * npf_fixup{16,32}_cksum: update IPv4 checksum. 60 */ 61 62 uint16_t 63 npf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum) 64 { 65 uint32_t sum; 66 67 /* 68 * RFC 1624: 69 * HC' = ~(~HC + ~m + m') 70 */ 71 sum = ~ntohs(cksum) & 0xffff; 72 sum += (~ntohs(odatum) & 0xffff) + ntohs(ndatum); 73 sum = (sum >> 16) + (sum & 0xffff); 74 sum += (sum >> 16); 75 76 return htons(~sum & 0xffff); 77 } 78 79 uint16_t 80 npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum) 81 { 82 83 cksum = npf_fixup16_cksum(cksum, odatum & 0xffff, ndatum & 0xffff); 84 cksum = npf_fixup16_cksum(cksum, odatum >> 16, ndatum >> 16); 85 return cksum; 86 } 87 88 /* 89 * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6. 90 */ 91 uint16_t 92 npf_addr_cksum(uint16_t cksum, int sz, npf_addr_t *oaddr, npf_addr_t *naddr) 93 { 94 uint32_t *oip32 = (uint32_t *)oaddr, *nip32 = (uint32_t *)naddr; 95 96 KASSERT(sz % sizeof(uint32_t) == 0); 97 do { 98 cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++); 99 sz -= sizeof(uint32_t); 100 } while (sz); 101 102 return cksum; 103 } 104 105 /* 106 * npf_addr_sum: provide IP address as a summed (if needed) 32-bit integer. 107 * Note: used for hash function. 108 */ 109 uint32_t 110 npf_addr_sum(const int sz, const npf_addr_t *a1, const npf_addr_t *a2) 111 { 112 uint32_t mix = 0; 113 int i; 114 115 KASSERT(sz > 0 && a1 != NULL && a2 != NULL); 116 117 for (i = 0; i < (sz >> 2); i++) { 118 mix += a1->s6_addr32[i]; 119 mix += a2->s6_addr32[i]; 120 } 121 return mix; 122 } 123 124 /* 125 * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length. 126 * Returns all values in host byte-order. 127 */ 128 int 129 npf_tcpsaw(npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win) 130 { 131 struct ip *ip = &npc->npc_ip.v4; 132 struct tcphdr *th = &npc->npc_l4.tcp; 133 134 KASSERT(npf_iscached(npc, NPC_IP46 | NPC_TCP)); 135 136 *seq = ntohl(th->th_seq); 137 *ack = ntohl(th->th_ack); 138 *win = (uint32_t)ntohs(th->th_win); 139 140 return ntohs(ip->ip_len) - (ip->ip_hl << 2) - (th->th_off << 2); 141 } 142 143 /* 144 * npf_fetch_tcpopts: parse and return TCP options. 145 */ 146 bool 147 npf_fetch_tcpopts(const npf_cache_t *npc, nbuf_t *nbuf, 148 uint16_t *mss, int *wscale) 149 { 150 void *n_ptr = nbuf_dataptr(nbuf); 151 const struct ip *ip = &npc->npc_ip.v4; 152 const struct tcphdr *th = &npc->npc_l4.tcp; 153 int topts_len, step; 154 uint16_t val16; 155 uint8_t val; 156 157 KASSERT(npf_iscached(npc, NPC_IP46 | NPC_TCP)); 158 159 /* Determine if there are any TCP options, get their length. */ 160 topts_len = (th->th_off << 2) - sizeof(struct tcphdr); 161 if (topts_len <= 0) { 162 /* No options. */ 163 return false; 164 } 165 KASSERT(topts_len <= MAX_TCPOPTLEN); 166 167 /* First step: IP and TCP header up to options. */ 168 step = (ip->ip_hl << 2) + sizeof(struct tcphdr); 169 next: 170 if (nbuf_advfetch(&nbuf, &n_ptr, step, sizeof(val), &val)) { 171 return false; 172 } 173 switch (val) { 174 case TCPOPT_EOL: 175 /* Done. */ 176 return true; 177 case TCPOPT_NOP: 178 topts_len--; 179 step = 1; 180 break; 181 case TCPOPT_MAXSEG: 182 /* 183 * XXX: clean this mess. 184 */ 185 if (mss && *mss) { 186 val16 = *mss; 187 if (nbuf_advstore(&nbuf, &n_ptr, 2, 188 sizeof(val16), &val16)) 189 return false; 190 } else if (nbuf_advfetch(&nbuf, &n_ptr, 2, 191 sizeof(val16), &val16)) { 192 return false; 193 } 194 if (mss) { 195 *mss = val16; 196 } 197 topts_len -= TCPOLEN_MAXSEG; 198 step = sizeof(val16); 199 break; 200 case TCPOPT_WINDOW: 201 if (nbuf_advfetch(&nbuf, &n_ptr, 2, sizeof(val), &val)) { 202 return false; 203 } 204 *wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val; 205 topts_len -= TCPOLEN_WINDOW; 206 step = sizeof(val); 207 break; 208 default: 209 if (nbuf_advfetch(&nbuf, &n_ptr, 1, sizeof(val), &val)) { 210 return false; 211 } 212 if (val < 2 || val >= topts_len) { 213 return false; 214 } 215 topts_len -= val; 216 step = val - 1; 217 } 218 /* Any options left? */ 219 if (__predict_true(topts_len > 0)) { 220 goto next; 221 } 222 return true; 223 } 224 225 /* 226 * npf_fetch_ip: fetch, check and cache IP header. 227 */ 228 bool 229 npf_fetch_ip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr) 230 { 231 struct ip *ip; 232 uint8_t ver; 233 234 if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &ver)) { 235 return false; 236 } 237 switch (ver >> 4) { 238 case IPVERSION: 239 /* IPv4 */ 240 ip = &npc->npc_ip.v4; 241 /* Fetch the header. */ 242 if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(struct ip), ip)) { 243 return false; 244 } 245 /* Check header length and fragment offset. */ 246 if ((ip->ip_hl << 2) < sizeof(struct ip)) { 247 return false; 248 } 249 if (ip->ip_off & ~htons(IP_DF | IP_RF)) { 250 /* Note fragmentation. */ 251 npc->npc_info |= NPC_IPFRAG; 252 } 253 /* Cache: layer 3 - IPv4. */ 254 npc->npc_ipsz = sizeof(struct in_addr); 255 npc->npc_srcip = (npf_addr_t *)&ip->ip_src; 256 npc->npc_dstip = (npf_addr_t *)&ip->ip_dst; 257 npc->npc_info |= NPC_IP4; 258 break; 259 260 case (IPV6_VERSION >> 4): 261 /* TODO */ 262 default: 263 return false; 264 } 265 return true; 266 } 267 268 bool 269 npf_fetch_tcp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr) 270 { 271 struct ip *ip = &npc->npc_ip.v4; 272 struct tcphdr *th; 273 u_int hlen; 274 275 /* Must have IP header processed for its length and protocol. */ 276 if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) { 277 return false; 278 } 279 if (ip->ip_p != IPPROTO_TCP) { 280 return false; 281 } 282 hlen = ip->ip_hl << 2; 283 th = &npc->npc_l4.tcp; 284 285 /* Fetch TCP header. */ 286 if (nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(struct tcphdr), th)) { 287 return false; 288 } 289 290 /* Cache: layer 4 - TCP. */ 291 npc->npc_info |= (NPC_LAYER4 | NPC_TCP); 292 return true; 293 } 294 295 bool 296 npf_fetch_udp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr) 297 { 298 struct ip *ip = &npc->npc_ip.v4; 299 struct udphdr *uh; 300 u_int hlen; 301 302 /* Must have IP header processed for its length and protocol. */ 303 if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) { 304 return false; 305 } 306 if (ip->ip_p != IPPROTO_UDP) { 307 return false; 308 } 309 hlen = ip->ip_hl << 2; 310 uh = &npc->npc_l4.udp; 311 312 /* Fetch ICMP header. */ 313 if (nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(struct udphdr), uh)) { 314 return false; 315 } 316 317 /* Cache: layer 4 - ICMP. */ 318 npc->npc_info |= (NPC_LAYER4 | NPC_UDP); 319 return true; 320 } 321 322 /* 323 * npf_fetch_icmp: fetch ICMP code, type and possible query ID. 324 * 325 * => Stores both all fetched items into the cache. 326 */ 327 bool 328 npf_fetch_icmp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr) 329 { 330 struct ip *ip = &npc->npc_ip.v4; 331 struct icmp *ic; 332 u_int hlen, iclen; 333 334 /* Must have IP header processed for its length and protocol. */ 335 if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) { 336 return false; 337 } 338 if (ip->ip_p != IPPROTO_ICMP) { 339 return false; 340 } 341 hlen = ip->ip_hl << 2; 342 ic = &npc->npc_l4.icmp; 343 344 /* Fetch basic ICMP header, up to the "data" point. */ 345 iclen = offsetof(struct icmp, icmp_data); 346 if (nbuf_advfetch(&nbuf, &n_ptr, hlen, iclen, ic)) { 347 return false; 348 } 349 350 /* Cache: layer 4 - ICMP. */ 351 npc->npc_info |= (NPC_LAYER4 | NPC_ICMP); 352 return true; 353 } 354 355 /* 356 * npf_cache_all: general routine to cache all relevant IP (v4 or v6) 357 * and TCP, UDP or ICMP data. 358 */ 359 bool 360 npf_cache_all(npf_cache_t *npc, nbuf_t *nbuf) 361 { 362 void *n_ptr = nbuf_dataptr(nbuf); 363 364 if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) { 365 return false; 366 } 367 if (npf_iscached(npc, NPC_IPFRAG)) { 368 return true; 369 } 370 switch (npf_cache_ipproto(npc)) { 371 case IPPROTO_TCP: 372 return npf_fetch_tcp(npc, nbuf, n_ptr); 373 case IPPROTO_UDP: 374 return npf_fetch_udp(npc, nbuf, n_ptr); 375 case IPPROTO_ICMP: 376 return npf_fetch_icmp(npc, nbuf, n_ptr); 377 } 378 return false; 379 } 380 381 /* 382 * npf_rwrip: rewrite required IP address, update the cache. 383 */ 384 bool 385 npf_rwrip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di, 386 npf_addr_t *addr) 387 { 388 npf_addr_t *oaddr; 389 u_int offby; 390 391 KASSERT(npf_iscached(npc, NPC_IP46)); 392 393 if (di == PFIL_OUT) { 394 /* Rewrite source address, if outgoing. */ 395 offby = offsetof(struct ip, ip_src); 396 oaddr = npc->npc_srcip; 397 } else { 398 /* Rewrite destination, if incoming. */ 399 offby = offsetof(struct ip, ip_dst); 400 oaddr = npc->npc_dstip; 401 } 402 403 /* Advance to the address and rewrite it. */ 404 if (nbuf_advstore(&nbuf, &n_ptr, offby, npc->npc_ipsz, addr)) 405 return false; 406 407 /* Cache: IP address. */ 408 memcpy(oaddr, addr, npc->npc_ipsz); 409 return true; 410 } 411 412 /* 413 * npf_rwrport: rewrite required TCP/UDP port, update the cache. 414 */ 415 bool 416 npf_rwrport(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di, 417 in_port_t port) 418 { 419 const int proto = npf_cache_ipproto(npc); 420 struct ip *ip = &npc->npc_ip.v4; 421 u_int offby = ip->ip_hl << 2; 422 in_port_t *oport; 423 424 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 425 KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP); 426 427 /* Offset to the port and pointer in the cache. */ 428 if (proto == IPPROTO_TCP) { 429 struct tcphdr *th = &npc->npc_l4.tcp; 430 if (di == PFIL_OUT) { 431 CTASSERT(offsetof(struct tcphdr, th_sport) == 0); 432 oport = &th->th_sport; 433 } else { 434 offby += offsetof(struct tcphdr, th_dport); 435 oport = &th->th_dport; 436 } 437 } else { 438 struct udphdr *uh = &npc->npc_l4.udp; 439 if (di == PFIL_OUT) { 440 CTASSERT(offsetof(struct udphdr, uh_sport) == 0); 441 oport = &uh->uh_sport; 442 } else { 443 offby += offsetof(struct udphdr, uh_dport); 444 oport = &uh->uh_dport; 445 } 446 } 447 448 /* Advance and rewrite the port. */ 449 if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(in_port_t), &port)) 450 return false; 451 452 /* Cache: TCP/UDP port. */ 453 *oport = port; 454 return true; 455 } 456 457 /* 458 * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum, update the cache. 459 */ 460 bool 461 npf_rwrcksum(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di, 462 npf_addr_t *addr, in_port_t port) 463 { 464 const int proto = npf_cache_ipproto(npc); 465 npf_addr_t *oaddr; 466 in_port_t *oport; 467 uint16_t *cksum; 468 u_int offby; 469 470 /* Checksum update for IPv4 header. */ 471 if (npf_iscached(npc, NPC_IP4)) { 472 struct ip *ip = &npc->npc_ip.v4; 473 uint16_t ipsum; 474 475 oaddr = (di == PFIL_OUT) ? npc->npc_srcip : npc->npc_dstip; 476 ipsum = npf_addr_cksum(ip->ip_sum, npc->npc_ipsz, oaddr, addr); 477 478 /* Advance to the IPv4 checksum and rewrite it. */ 479 offby = offsetof(struct ip, ip_sum); 480 if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(ipsum), &ipsum)) 481 return false; 482 483 ip->ip_sum = ipsum; 484 offby = (ip->ip_hl << 2) - offby; 485 } else { 486 /* No checksum for IPv6. */ 487 KASSERT(npf_iscached(npc, NPC_IP6)); 488 oaddr = NULL; 489 offby = 0; 490 return false; /* XXX: Not yet supported. */ 491 } 492 493 /* Determine whether TCP/UDP checksum update is needed. */ 494 if (proto == IPPROTO_ICMP || port == 0) { 495 return true; 496 } 497 KASSERT(npf_iscached(npc, NPC_TCP | NPC_UDP)); 498 499 /* Calculate TCP/UDP checksum. */ 500 if (proto == IPPROTO_TCP) { 501 struct tcphdr *th = &npc->npc_l4.tcp; 502 503 cksum = &th->th_sum; 504 offby += offsetof(struct tcphdr, th_sum); 505 oport = (di == PFIL_OUT) ? &th->th_sport : &th->th_dport; 506 } else { 507 struct udphdr *uh = &npc->npc_l4.udp; 508 509 KASSERT(proto == IPPROTO_UDP); 510 cksum = &uh->uh_sum; 511 if (*cksum == 0) { 512 /* No need to update. */ 513 return true; 514 } 515 offby += offsetof(struct udphdr, uh_sum); 516 oport = (di == PFIL_OUT) ? &uh->uh_sport : &uh->uh_dport; 517 } 518 *cksum = npf_addr_cksum(*cksum, npc->npc_ipsz, oaddr, addr); 519 *cksum = npf_fixup16_cksum(*cksum, *oport, port); 520 521 /* Advance to TCP/UDP checksum and rewrite it. */ 522 if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(uint16_t), cksum)) { 523 return false; 524 } 525 return true; 526 } 527 528 static inline bool 529 npf_normalize_ip4(npf_cache_t *npc, nbuf_t *nbuf, 530 bool rnd, bool no_df, int minttl) 531 { 532 void *n_ptr = nbuf_dataptr(nbuf); 533 struct ip *ip = &npc->npc_ip.v4; 534 uint16_t cksum = ip->ip_sum; 535 uint16_t ip_off = ip->ip_off; 536 uint8_t ttl = ip->ip_ttl; 537 u_int offby = 0; 538 539 KASSERT(rnd || minttl || no_df); 540 541 /* Randomize IPv4 ID. */ 542 if (rnd) { 543 uint16_t oid = ip->ip_id, nid; 544 545 nid = htons(ip_randomid(ip_ids, 0)); 546 offby = offsetof(struct ip, ip_id); 547 if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(nid), &nid)) { 548 return false; 549 } 550 cksum = npf_fixup16_cksum(cksum, oid, nid); 551 ip->ip_id = nid; 552 } 553 554 /* IP_DF flag cleansing. */ 555 if (no_df && (ip_off & htons(IP_DF)) != 0) { 556 uint16_t nip_off = ip_off & ~htons(IP_DF); 557 558 if (nbuf_advstore(&nbuf, &n_ptr, 559 offsetof(struct ip, ip_off) - offby, 560 sizeof(uint16_t), &nip_off)) { 561 return false; 562 } 563 cksum = npf_fixup16_cksum(cksum, ip_off, nip_off); 564 ip->ip_off = nip_off; 565 offby = offsetof(struct ip, ip_off); 566 } 567 568 /* Enforce minimum TTL. */ 569 if (minttl && ttl < minttl) { 570 if (nbuf_advstore(&nbuf, &n_ptr, 571 offsetof(struct ip, ip_ttl) - offby, 572 sizeof(uint8_t), &minttl)) { 573 return false; 574 } 575 cksum = npf_fixup16_cksum(cksum, ttl, minttl); 576 ip->ip_ttl = minttl; 577 offby = offsetof(struct ip, ip_ttl); 578 } 579 580 /* Update IP checksum. */ 581 offby = offsetof(struct ip, ip_sum) - offby; 582 if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) { 583 return false; 584 } 585 ip->ip_sum = cksum; 586 return true; 587 } 588 589 bool 590 npf_normalize(npf_cache_t *npc, nbuf_t *nbuf, 591 bool no_df, bool rnd, u_int minttl, u_int maxmss) 592 { 593 void *n_ptr = nbuf_dataptr(nbuf); 594 struct ip *ip = &npc->npc_ip.v4; 595 struct tcphdr *th = &npc->npc_l4.tcp; 596 uint16_t cksum, mss; 597 int offby, wscale; 598 599 /* Normalize IPv4. */ 600 if (npf_iscached(npc, NPC_IP4) && (rnd || minttl)) { 601 if (!npf_normalize_ip4(npc, nbuf, rnd, no_df, minttl)) { 602 return false; 603 } 604 } else if (!npf_iscached(npc, NPC_IP4)) { 605 /* XXX: no IPv6 */ 606 return false; 607 } 608 609 /* 610 * TCP Maximum Segment Size (MSS) "clamping". Only if SYN packet. 611 * Fetch MSS and check whether rewrite to lower is needed. 612 */ 613 if (maxmss == 0 || !npf_iscached(npc, NPC_TCP) || 614 (th->th_flags & TH_SYN) == 0) { 615 /* Not required; done. */ 616 return true; 617 } 618 mss = 0; 619 if (!npf_fetch_tcpopts(npc, nbuf, &mss, &wscale)) { 620 return false; 621 } 622 if (ntohs(mss) <= maxmss) { 623 return true; 624 } 625 626 /* Calculate TCP checksum, then rewrite MSS and the checksum. */ 627 maxmss = htons(maxmss); 628 cksum = npf_fixup16_cksum(th->th_sum, mss, maxmss); 629 th->th_sum = cksum; 630 mss = maxmss; 631 if (!npf_fetch_tcpopts(npc, nbuf, &mss, &wscale)) { 632 return false; 633 } 634 offby = (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); 635 if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) { 636 return false; 637 } 638 return true; 639 } 640