1 /* $NetBSD: tcp-comms.c,v 1.1.1.2 2009/12/02 00:27:06 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved. 5 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. 6 * 7 * This file is part of LVM2. 8 * 9 * This copyrighted material is made available to anyone wishing to use, 10 * modify, copy, or redistribute it subject to the terms and conditions 11 * of the GNU Lesser General Public License v.2.1. 12 * 13 * You should have received a copy of the GNU Lesser General Public License 14 * along with this program; if not, write to the Free Software Foundation, 15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 /* 19 * This provides the inter-clvmd communications for a system without CMAN. 20 * There is a listening TCP socket which accepts new connections in the 21 * normal way. 22 * It can also make outgoing connnections to the other clvmd nodes. 23 */ 24 25 #define _GNU_SOURCE 26 #define _FILE_OFFSET_BITS 64 27 28 #include <configure.h> 29 #include <pthread.h> 30 #include <sys/types.h> 31 #include <sys/utsname.h> 32 #include <sys/ioctl.h> 33 #include <sys/socket.h> 34 #include <sys/stat.h> 35 #include <sys/socket.h> 36 #include <netinet/in.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <stdint.h> 40 #include <fcntl.h> 41 #include <string.h> 42 #include <stddef.h> 43 #include <stdint.h> 44 #include <unistd.h> 45 #include <errno.h> 46 #include <syslog.h> 47 #include <netdb.h> 48 #include <assert.h> 49 #include <libdevmapper.h> 50 51 #include "clvm.h" 52 #include "clvmd-comms.h" 53 #include "clvmd.h" 54 #include "clvmd-gulm.h" 55 56 #define DEFAULT_TCP_PORT 21064 57 58 static int listen_fd = -1; 59 static int tcp_port; 60 struct dm_hash_table *sock_hash; 61 62 static int get_our_ip_address(char *addr, int *family); 63 static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid, 64 struct local_client **new_client); 65 66 /* Called by init_cluster() to open up the listening socket */ 67 int init_comms(unsigned short port) 68 { 69 struct sockaddr_in6 addr; 70 71 sock_hash = dm_hash_create(100); 72 tcp_port = port ? : DEFAULT_TCP_PORT; 73 74 listen_fd = socket(AF_INET6, SOCK_STREAM, 0); 75 76 if (listen_fd < 0) 77 { 78 return -1; 79 } 80 else 81 { 82 int one = 1; 83 setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int)); 84 setsockopt(listen_fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int)); 85 } 86 87 memset(&addr, 0, sizeof(addr)); // Bind to INADDR_ANY 88 addr.sin6_family = AF_INET6; 89 addr.sin6_port = htons(tcp_port); 90 91 if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) 92 { 93 DEBUGLOG("Can't bind to port: %s\n", strerror(errno)); 94 syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port); 95 close(listen_fd); 96 return -1; 97 } 98 99 listen(listen_fd, 5); 100 101 /* Set Close-on-exec */ 102 fcntl(listen_fd, F_SETFD, 1); 103 104 return 0; 105 } 106 107 void tcp_remove_client(const char *c_csid) 108 { 109 struct local_client *client; 110 char csid[GULM_MAX_CSID_LEN]; 111 unsigned int i; 112 memcpy(csid, c_csid, sizeof csid); 113 DEBUGLOG("tcp_remove_client\n"); 114 115 /* Don't actually close the socket here - that's the 116 job of clvmd.c whch will do the job when it notices the 117 other end has gone. We just need to remove the client(s) from 118 the hash table so we don't try to use it for sending any more */ 119 for (i = 0; i < 2; i++) 120 { 121 client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN); 122 if (client) 123 { 124 dm_hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN); 125 client->removeme = 1; 126 close(client->fd); 127 } 128 /* Look for a mangled one too, on the 2nd iteration. */ 129 csid[0] ^= 0x80; 130 } 131 } 132 133 int alloc_client(int fd, const char *c_csid, struct local_client **new_client) 134 { 135 struct local_client *client; 136 char csid[GULM_MAX_CSID_LEN]; 137 memcpy(csid, c_csid, sizeof csid); 138 139 DEBUGLOG("alloc_client %d csid = %s\n", fd, print_csid(csid)); 140 141 /* Create a local_client and return it */ 142 client = malloc(sizeof(struct local_client)); 143 if (!client) 144 { 145 DEBUGLOG("malloc failed\n"); 146 return -1; 147 } 148 149 memset(client, 0, sizeof(struct local_client)); 150 client->fd = fd; 151 client->type = CLUSTER_DATA_SOCK; 152 client->callback = read_from_tcpsock; 153 if (new_client) 154 *new_client = client; 155 156 /* Add to our list of node sockets */ 157 if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN)) 158 { 159 DEBUGLOG("alloc_client mangling CSID for second connection\n"); 160 /* This is a duplicate connection but we can't close it because 161 the other end may already have started sending. 162 So, we mangle the IP address and keep it, all sending will 163 go out of the main FD 164 */ 165 csid[0] ^= 0x80; 166 client->bits.net.flags = 1; /* indicate mangled CSID */ 167 168 /* If it still exists then kill the connection as we should only 169 ever have one incoming connection from each node */ 170 if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN)) 171 { 172 DEBUGLOG("Multiple incoming connections from node\n"); 173 syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]); 174 175 free(client); 176 errno = ECONNREFUSED; 177 return -1; 178 } 179 } 180 dm_hash_insert_binary(sock_hash, csid, GULM_MAX_CSID_LEN, client); 181 182 return 0; 183 } 184 185 int get_main_gulm_cluster_fd() 186 { 187 return listen_fd; 188 } 189 190 191 /* Read on main comms (listen) socket, accept it */ 192 int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid, 193 struct local_client **new_client) 194 { 195 int newfd; 196 struct sockaddr_in6 addr; 197 socklen_t addrlen = sizeof(addr); 198 int status; 199 char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN]; 200 201 DEBUGLOG("cluster_fd_callback\n"); 202 *new_client = NULL; 203 newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen); 204 205 DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno); 206 if (!newfd) 207 { 208 syslog(LOG_ERR, "error in accept: %m"); 209 errno = EAGAIN; 210 return -1; /* Don't return an error or clvmd will close the listening FD */ 211 } 212 213 /* Check that the client is a member of the cluster 214 and reject if not. 215 */ 216 if (gulm_name_from_csid((char *)&addr.sin6_addr, name) < 0) 217 { 218 syslog(LOG_ERR, "Got connect from non-cluster node %s\n", 219 print_csid((char *)&addr.sin6_addr)); 220 DEBUGLOG("Got connect from non-cluster node %s\n", 221 print_csid((char *)&addr.sin6_addr)); 222 close(newfd); 223 224 errno = EAGAIN; 225 return -1; 226 } 227 228 status = alloc_client(newfd, (char *)&addr.sin6_addr, new_client); 229 if (status) 230 { 231 DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status); 232 close(newfd); 233 /* See above... */ 234 errno = EAGAIN; 235 return -1; 236 } 237 DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client); 238 return newfd; 239 } 240 241 /* Try to get at least 'len' bytes from the socket */ 242 static int really_read(int fd, char *buf, int len) 243 { 244 int got, offset; 245 246 got = offset = 0; 247 248 do { 249 got = read(fd, buf+offset, len-offset); 250 DEBUGLOG("really_read. got %d bytes\n", got); 251 offset += got; 252 } while (got > 0 && offset < len); 253 254 if (got < 0) 255 return got; 256 else 257 return offset; 258 } 259 260 261 static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid, 262 struct local_client **new_client) 263 { 264 struct sockaddr_in6 addr; 265 socklen_t slen = sizeof(addr); 266 struct clvm_header *header = (struct clvm_header *)buf; 267 int status; 268 uint32_t arglen; 269 270 DEBUGLOG("read_from_tcpsock fd %d\n", client->fd); 271 *new_client = NULL; 272 273 /* Get "csid" */ 274 getpeername(client->fd, (struct sockaddr *)&addr, &slen); 275 memcpy(csid, &addr.sin6_addr, GULM_MAX_CSID_LEN); 276 277 /* Read just the header first, then get the rest if there is any. 278 * Stream sockets, sigh. 279 */ 280 status = really_read(client->fd, buf, sizeof(struct clvm_header)); 281 if (status > 0) 282 { 283 int status2; 284 285 arglen = ntohl(header->arglen); 286 287 /* Get the rest */ 288 if (arglen && arglen < GULM_MAX_CLUSTER_MESSAGE) 289 { 290 status2 = really_read(client->fd, buf+status, arglen); 291 if (status2 > 0) 292 status += status2; 293 else 294 status = status2; 295 } 296 } 297 298 DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno); 299 300 /* Remove it from the hash table if there's an error, clvmd will 301 remove the socket from its lists and free the client struct */ 302 if (status == 0 || 303 (status < 0 && errno != EAGAIN && errno != EINTR)) 304 { 305 char remcsid[GULM_MAX_CSID_LEN]; 306 307 memcpy(remcsid, csid, GULM_MAX_CSID_LEN); 308 close(client->fd); 309 310 /* If the csid was mangled, then make sure we remove the right entry */ 311 if (client->bits.net.flags) 312 remcsid[0] ^= 0x80; 313 dm_hash_remove_binary(sock_hash, remcsid, GULM_MAX_CSID_LEN); 314 315 /* Tell cluster manager layer */ 316 add_down_node(remcsid); 317 } 318 else { 319 gulm_add_up_node(csid); 320 /* Send it back to clvmd */ 321 process_message(client, buf, status, csid); 322 } 323 return status; 324 } 325 326 int gulm_connect_csid(const char *csid, struct local_client **newclient) 327 { 328 int fd; 329 struct sockaddr_in6 addr; 330 int status; 331 int one = 1; 332 333 DEBUGLOG("Connecting socket\n"); 334 fd = socket(PF_INET6, SOCK_STREAM, 0); 335 336 if (fd < 0) 337 { 338 syslog(LOG_ERR, "Unable to create new socket: %m"); 339 return -1; 340 } 341 342 addr.sin6_family = AF_INET6; 343 memcpy(&addr.sin6_addr, csid, GULM_MAX_CSID_LEN); 344 addr.sin6_port = htons(tcp_port); 345 346 DEBUGLOG("Connecting socket %d\n", fd); 347 if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in6)) < 0) 348 { 349 /* "Connection refused" is "normal" because clvmd may not yet be running 350 * on that node. 351 */ 352 if (errno != ECONNREFUSED) 353 { 354 syslog(LOG_ERR, "Unable to connect to remote node: %m"); 355 } 356 DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno)); 357 close(fd); 358 return -1; 359 } 360 361 /* Set Close-on-exec */ 362 fcntl(fd, F_SETFD, 1); 363 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int)); 364 365 status = alloc_client(fd, csid, newclient); 366 if (status) 367 close(fd); 368 else 369 add_client(*newclient); 370 371 /* If we can connect to it, it must be running a clvmd */ 372 gulm_add_up_node(csid); 373 return status; 374 } 375 376 /* Send a message to a known CSID */ 377 static int tcp_send_message(void *buf, int msglen, const char *csid, const char *errtext) 378 { 379 int status; 380 struct local_client *client; 381 char ourcsid[GULM_MAX_CSID_LEN]; 382 383 assert(csid); 384 385 DEBUGLOG("tcp_send_message, csid = %s, msglen = %d\n", print_csid(csid), msglen); 386 387 /* Don't connect to ourself */ 388 get_our_gulm_csid(ourcsid); 389 if (memcmp(csid, ourcsid, GULM_MAX_CSID_LEN) == 0) 390 return msglen; 391 392 client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN); 393 if (!client) 394 { 395 status = gulm_connect_csid(csid, &client); 396 if (status) 397 return -1; 398 } 399 DEBUGLOG("tcp_send_message, fd = %d\n", client->fd); 400 401 return write(client->fd, buf, msglen); 402 } 403 404 405 int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext) 406 { 407 int status=0; 408 409 DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen); 410 411 /* If csid is NULL then send to all known (not just connected) nodes */ 412 if (!csid) 413 { 414 void *context = NULL; 415 char loop_csid[GULM_MAX_CSID_LEN]; 416 417 /* Loop round all gulm-known nodes */ 418 while (get_next_node_csid(&context, loop_csid)) 419 { 420 status = tcp_send_message(buf, msglen, loop_csid, errtext); 421 if (status == 0 || 422 (status < 0 && (errno == EAGAIN || errno == EINTR))) 423 break; 424 } 425 } 426 else 427 { 428 429 status = tcp_send_message(buf, msglen, csid, errtext); 430 } 431 return status; 432 } 433 434 /* To get our own IP address we get the locally bound address of the 435 socket that's talking to GULM in the assumption(eek) that it will 436 be on the "right" network in a multi-homed system */ 437 static int get_our_ip_address(char *addr, int *family) 438 { 439 struct utsname info; 440 441 uname(&info); 442 get_ip_address(info.nodename, addr); 443 444 return 0; 445 } 446 447 /* Public version of above for those that don't care what protocol 448 we're using */ 449 void get_our_gulm_csid(char *csid) 450 { 451 static char our_csid[GULM_MAX_CSID_LEN]; 452 static int got_csid = 0; 453 454 if (!got_csid) 455 { 456 int family; 457 458 memset(our_csid, 0, sizeof(our_csid)); 459 if (get_our_ip_address(our_csid, &family)) 460 { 461 got_csid = 1; 462 } 463 } 464 memcpy(csid, our_csid, GULM_MAX_CSID_LEN); 465 } 466 467 static void map_v4_to_v6(struct in_addr *ip4, struct in6_addr *ip6) 468 { 469 ip6->s6_addr32[0] = 0; 470 ip6->s6_addr32[1] = 0; 471 ip6->s6_addr32[2] = htonl(0xffff); 472 ip6->s6_addr32[3] = ip4->s_addr; 473 } 474 475 /* Get someone else's IP address from DNS */ 476 int get_ip_address(const char *node, char *addr) 477 { 478 struct hostent *he; 479 480 memset(addr, 0, GULM_MAX_CSID_LEN); 481 482 // TODO: what do we do about multi-homed hosts ??? 483 // CCSs ip_interfaces solved this but some bugger removed it. 484 485 /* Try IPv6 first. The man page for gethostbyname implies that 486 it will lookup ip6 & ip4 names, but it seems not to */ 487 he = gethostbyname2(node, AF_INET6); 488 if (he) 489 { 490 memcpy(addr, he->h_addr_list[0], 491 he->h_length); 492 } 493 else 494 { 495 he = gethostbyname2(node, AF_INET); 496 if (!he) 497 return -1; 498 map_v4_to_v6((struct in_addr *)he->h_addr_list[0], (struct in6_addr *)addr); 499 } 500 501 return 0; 502 } 503 504 char *print_csid(const char *csid) 505 { 506 static char buf[128]; 507 int *icsid = (int *)csid; 508 509 sprintf(buf, "[%x.%x.%x.%x]", 510 icsid[0],icsid[1],icsid[2],icsid[3]); 511 512 return buf; 513 } 514