1 /* 2 ctdb system specific code to manage raw sockets on linux 3 4 Copyright (C) Ronnie Sahlberg 2007 5 Copyright (C) Andrew Tridgell 2007 6 Copyright (C) Marc Dequènes (Duck) 2009 7 Copyright (C) Volker Lendecke 2012 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or 12 (at your option) any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, see <http://www.gnu.org/licenses/>. 21 */ 22 23 #include "replace.h" 24 25 /* 26 * Use BSD struct tcphdr field names for portability. Modern glibc 27 * makes them available by default via <netinet/tcp.h> but older glibc 28 * requires __FAVOR_BSD to be defined. 29 * 30 * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE 31 * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not 32 * set. Including "replace.h" above causes <features.h> to be 33 * indirectly included and this will not set __FAVOR_BSD because 34 * _GNU_SOURCE is set in Samba's "config.h" (which is included by 35 * "replace.h"). 36 * 37 * Therefore, set __FAVOR_BSD by hand below. 38 */ 39 #define __FAVOR_BSD 1 40 #include "system/network.h" 41 42 #ifdef HAVE_NETINET_IF_ETHER_H 43 #include <netinet/if_ether.h> 44 #endif 45 #ifdef HAVE_NETINET_IP6_H 46 #include <netinet/ip6.h> 47 #endif 48 #ifdef HAVE_NETINET_ICMP6_H 49 #include <netinet/icmp6.h> 50 #endif 51 #ifdef HAVE_LINUX_IF_PACKET_H 52 #include <linux/if_packet.h> 53 #endif 54 55 #ifndef ETHERTYPE_IP6 56 #define ETHERTYPE_IP6 0x86dd 57 #endif 58 59 #include "lib/util/debug.h" 60 #include "lib/util/blocking.h" 61 62 #include "protocol/protocol.h" 63 64 #include "common/logging.h" 65 #include "common/system_socket.h" 66 67 /* 68 uint16 checksum for n bytes 69 */ 70 static uint32_t uint16_checksum(uint8_t *data, size_t n) 71 { 72 uint32_t sum=0; 73 uint16_t value; 74 75 while (n>=2) { 76 memcpy(&value, data, 2); 77 sum += (uint32_t)ntohs(value); 78 data += 2; 79 n -= 2; 80 } 81 if (n == 1) { 82 sum += (uint32_t)ntohs(*data); 83 } 84 return sum; 85 } 86 87 /* 88 * See if the given IP is currently on an interface 89 */ 90 bool ctdb_sys_have_ip(ctdb_sock_addr *_addr) 91 { 92 int s; 93 int ret; 94 ctdb_sock_addr __addr = *_addr; 95 ctdb_sock_addr *addr = &__addr; 96 socklen_t addrlen = 0; 97 98 switch (addr->sa.sa_family) { 99 case AF_INET: 100 addr->ip.sin_port = 0; 101 addrlen = sizeof(struct sockaddr_in); 102 break; 103 case AF_INET6: 104 addr->ip6.sin6_port = 0; 105 addrlen = sizeof(struct sockaddr_in6); 106 break; 107 } 108 109 s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP); 110 if (s == -1) { 111 return false; 112 } 113 114 ret = bind(s, (struct sockaddr *)addr, addrlen); 115 116 close(s); 117 return ret == 0; 118 } 119 120 /* 121 * simple TCP checksum - assumes data is multiple of 2 bytes long 122 */ 123 static uint16_t ip_checksum(uint8_t *data, size_t n, struct ip *ip) 124 { 125 uint32_t sum = uint16_checksum(data, n); 126 uint16_t sum2; 127 128 sum += uint16_checksum((uint8_t *)&ip->ip_src, sizeof(ip->ip_src)); 129 sum += uint16_checksum((uint8_t *)&ip->ip_dst, sizeof(ip->ip_dst)); 130 sum += ip->ip_p + n; 131 sum = (sum & 0xFFFF) + (sum >> 16); 132 sum = (sum & 0xFFFF) + (sum >> 16); 133 sum2 = htons(sum); 134 sum2 = ~sum2; 135 if (sum2 == 0) { 136 return 0xFFFF; 137 } 138 return sum2; 139 } 140 141 static uint16_t ip6_checksum(uint8_t *data, size_t n, struct ip6_hdr *ip6) 142 { 143 uint16_t phdr[3]; 144 uint32_t sum = 0; 145 uint16_t sum2; 146 uint32_t len; 147 148 sum += uint16_checksum((uint8_t *)&ip6->ip6_src, 16); 149 sum += uint16_checksum((uint8_t *)&ip6->ip6_dst, 16); 150 151 len = htonl(n); 152 phdr[0] = len & UINT16_MAX; 153 phdr[1] = (len >> 16) & UINT16_MAX; 154 /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */ 155 phdr[2] = htons(ip6->ip6_nxt); 156 sum += uint16_checksum((uint8_t *)phdr, sizeof(phdr)); 157 158 sum += uint16_checksum(data, n); 159 160 sum = (sum & 0xFFFF) + (sum >> 16); 161 sum = (sum & 0xFFFF) + (sum >> 16); 162 sum2 = htons(sum); 163 sum2 = ~sum2; 164 if (sum2 == 0) { 165 return 0xFFFF; 166 } 167 return sum2; 168 } 169 170 /* 171 * Send gratuitous ARP request/reply or IPv6 neighbor advertisement 172 */ 173 174 #ifdef HAVE_PACKETSOCKET 175 176 /* 177 * Create IPv4 ARP requests/replies or IPv6 neighbour advertisement 178 * packets 179 */ 180 181 #define ARP_STRUCT_SIZE sizeof(struct ether_header) + \ 182 sizeof(struct ether_arp) 183 184 #define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \ 185 sizeof(struct ip6_hdr) + \ 186 sizeof(struct nd_neighbor_advert) + \ 187 sizeof(struct nd_opt_hdr) + \ 188 sizeof(struct ether_addr) 189 190 #define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64) 191 192 #define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64) 193 194 static int arp_build(uint8_t *buffer, 195 size_t buflen, 196 const struct sockaddr_in *addr, 197 const struct ether_addr *hwaddr, 198 bool reply, 199 struct ether_addr **ether_dhost, 200 size_t *len) 201 { 202 size_t l = ARP_BUFFER_SIZE; 203 struct ether_header *eh; 204 struct ether_arp *ea; 205 struct arphdr *ah; 206 207 if (addr->sin_family != AF_INET) { 208 return EINVAL; 209 } 210 211 if (buflen < l) { 212 return EMSGSIZE; 213 } 214 215 memset(buffer, 0 , l); 216 217 eh = (struct ether_header *)buffer; 218 memset(eh->ether_dhost, 0xff, ETH_ALEN); 219 memcpy(eh->ether_shost, hwaddr, ETH_ALEN); 220 eh->ether_type = htons(ETHERTYPE_ARP); 221 222 ea = (struct ether_arp *)(buffer + sizeof(struct ether_header)); 223 ah = &ea->ea_hdr; 224 ah->ar_hrd = htons(ARPHRD_ETHER); 225 ah->ar_pro = htons(ETH_P_IP); 226 ah->ar_hln = ETH_ALEN; 227 ah->ar_pln = sizeof(ea->arp_spa); 228 229 if (! reply) { 230 ah->ar_op = htons(ARPOP_REQUEST); 231 memcpy(ea->arp_sha, hwaddr, ETH_ALEN); 232 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa)); 233 memset(ea->arp_tha, 0, ETH_ALEN); 234 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa)); 235 } else { 236 ah->ar_op = htons(ARPOP_REPLY); 237 memcpy(ea->arp_sha, hwaddr, ETH_ALEN); 238 memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa)); 239 memcpy(ea->arp_tha, hwaddr, ETH_ALEN); 240 memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa)); 241 } 242 243 *ether_dhost = (struct ether_addr *)eh->ether_dhost; 244 *len = l; 245 return 0; 246 } 247 248 static int ip6_na_build(uint8_t *buffer, 249 size_t buflen, 250 const struct sockaddr_in6 *addr, 251 const struct ether_addr *hwaddr, 252 struct ether_addr **ether_dhost, 253 size_t *len) 254 { 255 size_t l = IP6_NA_BUFFER_SIZE; 256 struct ether_header *eh; 257 struct ip6_hdr *ip6; 258 struct nd_neighbor_advert *nd_na; 259 struct nd_opt_hdr *nd_oh; 260 struct ether_addr *ea; 261 int ret; 262 263 if (addr->sin6_family != AF_INET6) { 264 return EINVAL; 265 } 266 267 if (buflen < l) { 268 return EMSGSIZE; 269 } 270 271 memset(buffer, 0 , l); 272 273 eh = (struct ether_header *)buffer; 274 /* 275 * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464, 276 * section 7) - note memset 0 above! 277 */ 278 eh->ether_dhost[0] = 0x33; 279 eh->ether_dhost[1] = 0x33; 280 eh->ether_dhost[5] = 0x01; 281 memcpy(eh->ether_shost, hwaddr, ETH_ALEN); 282 eh->ether_type = htons(ETHERTYPE_IP6); 283 284 ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header)); 285 ip6->ip6_vfc = 6 << 4; 286 ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) + 287 sizeof(struct nd_opt_hdr) + 288 ETH_ALEN); 289 ip6->ip6_nxt = IPPROTO_ICMPV6; 290 ip6->ip6_hlim = 255; 291 ip6->ip6_src = addr->sin6_addr; 292 /* all-nodes multicast */ 293 294 ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst); 295 if (ret != 1) { 296 return EIO; 297 } 298 299 nd_na = (struct nd_neighbor_advert *)(buffer + 300 sizeof(struct ether_header) + 301 sizeof(struct ip6_hdr)); 302 nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; 303 nd_na->nd_na_code = 0; 304 nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE; 305 nd_na->nd_na_target = addr->sin6_addr; 306 307 /* Option: Target link-layer address */ 308 nd_oh = (struct nd_opt_hdr *)(buffer + 309 sizeof(struct ether_header) + 310 sizeof(struct ip6_hdr) + 311 sizeof(struct nd_neighbor_advert)); 312 nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR; 313 nd_oh->nd_opt_len = 1; /* multiple of 8 octets */ 314 315 ea = (struct ether_addr *)(buffer + 316 sizeof(struct ether_header) + 317 sizeof(struct ip6_hdr) + 318 sizeof(struct nd_neighbor_advert) + 319 sizeof(struct nd_opt_hdr)); 320 memcpy(ea, hwaddr, ETH_ALEN); 321 322 nd_na->nd_na_cksum = ip6_checksum((uint8_t *)nd_na, 323 ntohs(ip6->ip6_plen), 324 ip6); 325 326 *ether_dhost = (struct ether_addr *)eh->ether_dhost; 327 *len = l; 328 return 0; 329 } 330 331 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface) 332 { 333 int s; 334 struct sockaddr_ll sall = {0}; 335 struct ifreq if_hwaddr = { 336 .ifr_ifru = { 337 .ifru_flags = 0 338 }, 339 }; 340 uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)]; 341 struct ifreq ifr = { 342 .ifr_ifru = { 343 .ifru_flags = 0 344 }, 345 }; 346 struct ether_addr *hwaddr = NULL; 347 struct ether_addr *ether_dhost = NULL; 348 size_t len = 0; 349 int ret = 0; 350 351 s = socket(AF_PACKET, SOCK_RAW, 0); 352 if (s == -1) { 353 ret = errno; 354 DBG_ERR("Failed to open raw socket\n"); 355 return ret; 356 } 357 DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s); 358 359 /* Find interface */ 360 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); 361 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) { 362 ret = errno; 363 DBG_ERR("Interface '%s' not found\n", iface); 364 goto fail; 365 } 366 367 /* Get MAC address */ 368 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name)); 369 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr); 370 if ( ret < 0 ) { 371 ret = errno; 372 DBG_ERR("ioctl failed\n"); 373 goto fail; 374 } 375 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) { 376 ret = 0; 377 D_DEBUG("Ignoring loopback arp request\n"); 378 goto fail; 379 } 380 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) { 381 ret = EINVAL; 382 DBG_ERR("Not an ethernet address family (0x%x)\n", 383 if_hwaddr.ifr_hwaddr.sa_family); 384 goto fail;; 385 } 386 387 /* Set up most of destination address structure */ 388 sall.sll_family = AF_PACKET; 389 sall.sll_halen = sizeof(struct ether_addr); 390 sall.sll_protocol = htons(ETH_P_ALL); 391 sall.sll_ifindex = ifr.ifr_ifindex; 392 393 /* For clarity */ 394 hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data; 395 396 switch (addr->ip.sin_family) { 397 case AF_INET: 398 /* Send gratuitous ARP */ 399 ret = arp_build(buffer, 400 sizeof(buffer), 401 &addr->ip, 402 hwaddr, 403 false, 404 ðer_dhost, 405 &len); 406 if (ret != 0) { 407 DBG_ERR("Failed to build ARP request\n"); 408 goto fail; 409 } 410 411 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen); 412 413 ret = sendto(s, 414 buffer, 415 len, 416 0, 417 (struct sockaddr *)&sall, 418 sizeof(sall)); 419 if (ret < 0 ) { 420 ret = errno; 421 DBG_ERR("Failed sendto\n"); 422 goto fail; 423 } 424 425 /* Send unsolicited ARP reply */ 426 ret = arp_build(buffer, 427 sizeof(buffer), 428 &addr->ip, 429 hwaddr, 430 true, 431 ðer_dhost, 432 &len); 433 if (ret != 0) { 434 DBG_ERR("Failed to build ARP reply\n"); 435 goto fail; 436 } 437 438 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen); 439 440 ret = sendto(s, 441 buffer, 442 len, 443 0, 444 (struct sockaddr *)&sall, 445 sizeof(sall)); 446 if (ret < 0 ) { 447 ret = errno; 448 DBG_ERR("Failed sendto\n"); 449 goto fail; 450 } 451 452 close(s); 453 break; 454 455 case AF_INET6: 456 ret = ip6_na_build(buffer, 457 sizeof(buffer), 458 &addr->ip6, 459 hwaddr, 460 ðer_dhost, 461 &len); 462 if (ret != 0) { 463 DBG_ERR("Failed to build IPv6 neighbor advertisement\n"); 464 goto fail; 465 } 466 467 memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen); 468 469 ret = sendto(s, 470 buffer, 471 len, 472 0, 473 (struct sockaddr *)&sall, 474 sizeof(sall)); 475 if (ret < 0 ) { 476 ret = errno; 477 DBG_ERR("Failed sendto\n"); 478 goto fail; 479 } 480 481 close(s); 482 break; 483 484 default: 485 ret = EINVAL; 486 DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n", 487 addr->ip.sin_family); 488 goto fail; 489 } 490 491 return 0; 492 493 fail: 494 close(s); 495 return ret; 496 } 497 498 #else /* HAVE_PACKETSOCKET */ 499 500 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface) 501 { 502 /* Not implemented */ 503 return ENOSYS; 504 } 505 506 #endif /* HAVE_PACKETSOCKET */ 507 508 509 #define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \ 510 sizeof(struct tcphdr) 511 512 #define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \ 513 sizeof(struct tcphdr) 514 515 static int tcp4_build(uint8_t *buf, 516 size_t buflen, 517 const struct sockaddr_in *src, 518 const struct sockaddr_in *dst, 519 uint32_t seq, 520 uint32_t ack, 521 int rst, 522 size_t *len) 523 { 524 size_t l = IP4_TCP_BUFFER_SIZE; 525 struct { 526 struct ip ip; 527 struct tcphdr tcp; 528 } *ip4pkt; 529 530 if (l != sizeof(*ip4pkt)) { 531 return EMSGSIZE; 532 } 533 534 if (buflen < l) { 535 return EMSGSIZE; 536 } 537 538 ip4pkt = (void *)buf; 539 memset(ip4pkt, 0, l); 540 541 ip4pkt->ip.ip_v = 4; 542 ip4pkt->ip.ip_hl = sizeof(ip4pkt->ip)/sizeof(uint32_t); 543 ip4pkt->ip.ip_len = htons(sizeof(ip4pkt)); 544 ip4pkt->ip.ip_ttl = 255; 545 ip4pkt->ip.ip_p = IPPROTO_TCP; 546 ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr; 547 ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr; 548 ip4pkt->ip.ip_sum = 0; 549 550 ip4pkt->tcp.th_sport = src->sin_port; 551 ip4pkt->tcp.th_dport = dst->sin_port; 552 ip4pkt->tcp.th_seq = seq; 553 ip4pkt->tcp.th_ack = ack; 554 ip4pkt->tcp.th_flags = 0; 555 ip4pkt->tcp.th_flags |= TH_ACK; 556 if (rst) { 557 ip4pkt->tcp.th_flags |= TH_RST; 558 } 559 ip4pkt->tcp.th_off = sizeof(ip4pkt->tcp)/sizeof(uint32_t); 560 /* this makes it easier to spot in a sniffer */ 561 ip4pkt->tcp.th_win = htons(1234); 562 ip4pkt->tcp.th_sum = ip_checksum((uint8_t *)&ip4pkt->tcp, 563 sizeof(ip4pkt->tcp), 564 &ip4pkt->ip); 565 566 *len = l; 567 return 0; 568 } 569 570 static int tcp6_build(uint8_t *buf, 571 size_t buflen, 572 const struct sockaddr_in6 *src, 573 const struct sockaddr_in6 *dst, 574 uint32_t seq, 575 uint32_t ack, 576 int rst, 577 size_t *len) 578 { 579 size_t l = IP6_TCP_BUFFER_SIZE; 580 struct { 581 struct ip6_hdr ip6; 582 struct tcphdr tcp; 583 } *ip6pkt; 584 585 if (l != sizeof(*ip6pkt)) { 586 return EMSGSIZE; 587 } 588 589 if (buflen < l) { 590 return EMSGSIZE; 591 } 592 593 ip6pkt = (void *)buf; 594 memset(ip6pkt, 0, l); 595 596 ip6pkt->ip6.ip6_vfc = 6 << 4; 597 ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr)); 598 ip6pkt->ip6.ip6_nxt = IPPROTO_TCP; 599 ip6pkt->ip6.ip6_hlim = 64; 600 ip6pkt->ip6.ip6_src = src->sin6_addr; 601 ip6pkt->ip6.ip6_dst = dst->sin6_addr; 602 603 ip6pkt->tcp.th_sport = src->sin6_port; 604 ip6pkt->tcp.th_dport = dst->sin6_port; 605 ip6pkt->tcp.th_seq = seq; 606 ip6pkt->tcp.th_ack = ack; 607 ip6pkt->tcp.th_flags = 0; 608 ip6pkt->tcp.th_flags |= TH_ACK; 609 if (rst) { 610 ip6pkt->tcp.th_flags |= TH_RST; 611 } 612 ip6pkt->tcp.th_off = sizeof(ip6pkt->tcp)/sizeof(uint32_t); 613 /* this makes it easier to spot in a sniffer */ 614 ip6pkt->tcp.th_win = htons(1234); 615 ip6pkt->tcp.th_sum = ip6_checksum((uint8_t *)&ip6pkt->tcp, 616 sizeof(ip6pkt->tcp), 617 &ip6pkt->ip6); 618 619 *len = l; 620 return 0; 621 } 622 623 /* 624 * Send tcp segment from the specified IP/port to the specified 625 * destination IP/port. 626 * 627 * This is used to trigger the receiving host into sending its own ACK, 628 * which should trigger early detection of TCP reset by the client 629 * after IP takeover 630 * 631 * This can also be used to send RST segments (if rst is true) and also 632 * if correct seq and ack numbers are provided. 633 */ 634 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest, 635 const ctdb_sock_addr *src, 636 uint32_t seq, 637 uint32_t ack, 638 int rst) 639 { 640 uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)]; 641 size_t len = 0; 642 int ret; 643 int s; 644 uint32_t one = 1; 645 struct sockaddr_in6 tmpdest = { 0 }; 646 int saved_errno; 647 648 switch (src->ip.sin_family) { 649 case AF_INET: 650 ret = tcp4_build(buf, 651 sizeof(buf), 652 &src->ip, 653 &dest->ip, 654 seq, 655 ack, 656 rst, 657 &len); 658 if (ret != 0) { 659 DBG_ERR("Failed to build TCP packet (%d)\n", ret); 660 return ret; 661 } 662 663 /* open a raw socket to send this segment from */ 664 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); 665 if (s == -1) { 666 DBG_ERR("Failed to open raw socket (%s)\n", 667 strerror(errno)); 668 return -1; 669 } 670 671 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one)); 672 if (ret != 0) { 673 DBG_ERR("Failed to setup IP headers (%s)\n", 674 strerror(errno)); 675 close(s); 676 return -1; 677 } 678 679 ret = sendto(s, 680 buf, 681 len, 682 0, 683 (const struct sockaddr *)&dest->ip, 684 sizeof(dest->ip)); 685 saved_errno = errno; 686 close(s); 687 if (ret == -1) { 688 D_ERR("Failed sendto (%s)\n", strerror(saved_errno)); 689 return -1; 690 } 691 if ((size_t)ret != len) { 692 DBG_ERR("Failed sendto - didn't send full packet\n"); 693 return -1; 694 } 695 break; 696 697 case AF_INET6: 698 ret = tcp6_build(buf, 699 sizeof(buf), 700 &src->ip6, 701 &dest->ip6, 702 seq, 703 ack, 704 rst, 705 &len); 706 if (ret != 0) { 707 DBG_ERR("Failed to build TCP packet (%d)\n", ret); 708 return ret; 709 } 710 711 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW); 712 if (s == -1) { 713 DBG_ERR("Failed to open sending socket\n"); 714 return -1; 715 716 } 717 /* 718 * sendto() on an IPv6 raw socket requires the port to 719 * be either 0 or a protocol value 720 */ 721 tmpdest = dest->ip6; 722 tmpdest.sin6_port = 0; 723 724 ret = sendto(s, 725 buf, 726 len, 727 0, 728 (const struct sockaddr *)&tmpdest, 729 sizeof(tmpdest)); 730 saved_errno = errno; 731 close(s); 732 if (ret == -1) { 733 D_ERR("Failed sendto (%s)\n", strerror(saved_errno)); 734 return -1; 735 } 736 if ((size_t)ret != len) { 737 DBG_ERR("Failed sendto - didn't send full packet\n"); 738 return -1; 739 } 740 break; 741 742 default: 743 DBG_ERR("Not an ipv4/v6 address\n"); 744 return -1; 745 } 746 747 return 0; 748 } 749 750 /* 751 * Packet capture 752 * 753 * If AF_PACKET is available then use a raw socket otherwise use pcap. 754 * wscript has checked to make sure that pcap is available if needed. 755 */ 756 757 static int tcp4_extract(const uint8_t *ip_pkt, 758 size_t pktlen, 759 struct sockaddr_in *src, 760 struct sockaddr_in *dst, 761 uint32_t *ack_seq, 762 uint32_t *seq, 763 int *rst, 764 uint16_t *window) 765 { 766 const struct ip *ip; 767 const struct tcphdr *tcp; 768 769 if (pktlen < sizeof(struct ip)) { 770 return EMSGSIZE; 771 } 772 773 ip = (const struct ip *)ip_pkt; 774 775 /* IPv4 only */ 776 if (ip->ip_v != 4) { 777 return ENOMSG; 778 } 779 /* Don't look at fragments */ 780 if ((ntohs(ip->ip_off)&0x1fff) != 0) { 781 return ENOMSG; 782 } 783 /* TCP only */ 784 if (ip->ip_p != IPPROTO_TCP) { 785 return ENOMSG; 786 } 787 788 /* Ensure there is enough of the packet to gather required fields */ 789 if (pktlen < 790 (ip->ip_hl * sizeof(uint32_t)) + offsetof(struct tcphdr, th_sum)) { 791 return EMSGSIZE; 792 } 793 794 tcp = (const struct tcphdr *)(ip_pkt + (ip->ip_hl * sizeof(uint32_t))); 795 796 src->sin_family = AF_INET; 797 src->sin_addr.s_addr = ip->ip_src.s_addr; 798 src->sin_port = tcp->th_sport; 799 800 dst->sin_family = AF_INET; 801 dst->sin_addr.s_addr = ip->ip_dst.s_addr; 802 dst->sin_port = tcp->th_dport; 803 804 *ack_seq = tcp->th_ack; 805 *seq = tcp->th_seq; 806 if (window != NULL) { 807 *window = tcp->th_win; 808 } 809 if (rst != NULL) { 810 *rst = tcp->th_flags & TH_RST; 811 } 812 813 return 0; 814 } 815 816 static int tcp6_extract(const uint8_t *ip_pkt, 817 size_t pktlen, 818 struct sockaddr_in6 *src, 819 struct sockaddr_in6 *dst, 820 uint32_t *ack_seq, 821 uint32_t *seq, 822 int *rst, 823 uint16_t *window) 824 { 825 const struct ip6_hdr *ip6; 826 const struct tcphdr *tcp; 827 828 /* Ensure there is enough of the packet to gather required fields */ 829 if (pktlen < sizeof(struct ip6_hdr) + offsetof(struct tcphdr, th_sum)) { 830 return EMSGSIZE; 831 } 832 833 ip6 = (const struct ip6_hdr *)ip_pkt; 834 835 /* IPv6 only */ 836 if ((ip6->ip6_vfc >> 4) != 6){ 837 return ENOMSG; 838 } 839 840 /* TCP only */ 841 if (ip6->ip6_nxt != IPPROTO_TCP) { 842 return ENOMSG; 843 } 844 845 tcp = (const struct tcphdr *)(ip_pkt + sizeof(struct ip6_hdr)); 846 847 src->sin6_family = AF_INET6; 848 src->sin6_port = tcp->th_sport; 849 src->sin6_addr = ip6->ip6_src; 850 851 dst->sin6_family = AF_INET6; 852 dst->sin6_port = tcp->th_dport; 853 dst->sin6_addr = ip6->ip6_dst; 854 855 *ack_seq = tcp->th_ack; 856 *seq = tcp->th_seq; 857 if (window != NULL) { 858 *window = tcp->th_win; 859 } 860 if (rst != NULL) { 861 *rst = tcp->th_flags & TH_RST; 862 } 863 864 return 0; 865 } 866 867 868 #ifdef HAVE_AF_PACKET 869 870 /* 871 * This function is used to open a raw socket to capture from 872 */ 873 int ctdb_sys_open_capture_socket(const char *iface, void **private_data) 874 { 875 int s, ret; 876 877 /* Open a socket to capture all traffic */ 878 s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 879 if (s == -1) { 880 DBG_ERR("Failed to open raw socket\n"); 881 return -1; 882 } 883 884 DBG_DEBUG("Created RAW SOCKET FD:%d for tcp tickle\n", s); 885 886 ret = set_blocking(s, false); 887 if (ret != 0) { 888 DBG_ERR("Failed to set socket non-blocking (%s)\n", 889 strerror(errno)); 890 close(s); 891 return -1; 892 } 893 894 set_close_on_exec(s); 895 896 return s; 897 } 898 899 /* 900 * This function is used to do any additional cleanup required when closing 901 * a capture socket. 902 * Note that the socket itself is closed automatically in the caller. 903 */ 904 int ctdb_sys_close_capture_socket(void *private_data) 905 { 906 return 0; 907 } 908 909 910 /* 911 * called when the raw socket becomes readable 912 */ 913 int ctdb_sys_read_tcp_packet(int s, void *private_data, 914 ctdb_sock_addr *src, 915 ctdb_sock_addr *dst, 916 uint32_t *ack_seq, 917 uint32_t *seq, 918 int *rst, 919 uint16_t *window) 920 { 921 ssize_t nread; 922 uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */ 923 struct ether_header *eth; 924 int ret; 925 926 nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC); 927 if (nread == -1) { 928 return errno; 929 } 930 if ((size_t)nread < sizeof(*eth)) { 931 return EMSGSIZE; 932 } 933 934 ZERO_STRUCTP(src); 935 ZERO_STRUCTP(dst); 936 937 /* Ethernet */ 938 eth = (struct ether_header *)pkt; 939 940 /* we want either IPv4 or IPv6 */ 941 if (ntohs(eth->ether_type) == ETHERTYPE_IP) { 942 ret = tcp4_extract(pkt + sizeof(struct ether_header), 943 (size_t)nread - sizeof(struct ether_header), 944 &src->ip, 945 &dst->ip, 946 ack_seq, 947 seq, 948 rst, 949 window); 950 return ret; 951 952 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) { 953 ret = tcp6_extract(pkt + sizeof(struct ether_header), 954 (size_t)nread - sizeof(struct ether_header), 955 &src->ip6, 956 &dst->ip6, 957 ack_seq, 958 seq, 959 rst, 960 window); 961 return ret; 962 } 963 964 return ENOMSG; 965 } 966 967 #else /* HAVE_AF_PACKET */ 968 969 #include <pcap.h> 970 971 int ctdb_sys_open_capture_socket(const char *iface, void **private_data) 972 { 973 pcap_t *pt; 974 975 pt=pcap_open_live(iface, 100, 0, 0, NULL); 976 if (pt == NULL) { 977 DBG_ERR("Failed to open capture device %s\n", iface); 978 return -1; 979 } 980 *((pcap_t **)private_data) = pt; 981 982 return pcap_fileno(pt); 983 } 984 985 int ctdb_sys_close_capture_socket(void *private_data) 986 { 987 pcap_t *pt = (pcap_t *)private_data; 988 pcap_close(pt); 989 return 0; 990 } 991 992 int ctdb_sys_read_tcp_packet(int s, 993 void *private_data, 994 ctdb_sock_addr *src, 995 ctdb_sock_addr *dst, 996 uint32_t *ack_seq, 997 uint32_t *seq, 998 int *rst, 999 uint16_t *window) 1000 { 1001 int ret; 1002 struct ether_header *eth; 1003 struct pcap_pkthdr pkthdr; 1004 const u_char *buffer; 1005 pcap_t *pt = (pcap_t *)private_data; 1006 1007 buffer=pcap_next(pt, &pkthdr); 1008 if (buffer==NULL) { 1009 return ENOMSG; 1010 } 1011 1012 ZERO_STRUCTP(src); 1013 ZERO_STRUCTP(dst); 1014 1015 /* Ethernet */ 1016 eth = (struct ether_header *)buffer; 1017 1018 /* we want either IPv4 or IPv6 */ 1019 if (eth->ether_type == htons(ETHERTYPE_IP)) { 1020 ret = tcp4_extract(buffer + sizeof(struct ether_header), 1021 (size_t)(pkthdr.caplen - 1022 sizeof(struct ether_header)), 1023 &src->ip, 1024 &dst->ip, 1025 ack_seq, 1026 seq, 1027 rst, 1028 window); 1029 return ret; 1030 1031 } else if (eth->ether_type == htons(ETHERTYPE_IP6)) { 1032 ret = tcp6_extract(buffer + sizeof(struct ether_header), 1033 (size_t)(pkthdr.caplen - 1034 sizeof(struct ether_header)), 1035 &src->ip6, 1036 &dst->ip6, 1037 ack_seq, 1038 seq, 1039 rst, 1040 window); 1041 return ret; 1042 } 1043 1044 return ENOMSG; 1045 } 1046 1047 #endif /* HAVE_AF_PACKET */ 1048