1 /* $NetBSD: tcp-comms.c,v 1.1.1.2 2009/12/02 00:27:06 haad Exp $ */
2
3 /*
4 * Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of LVM2.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 /*
19 * This provides the inter-clvmd communications for a system without CMAN.
20 * There is a listening TCP socket which accepts new connections in the
21 * normal way.
22 * It can also make outgoing connnections to the other clvmd nodes.
23 */
24
25 #define _GNU_SOURCE
26 #define _FILE_OFFSET_BITS 64
27
28 #include <configure.h>
29 #include <pthread.h>
30 #include <sys/types.h>
31 #include <sys/utsname.h>
32 #include <sys/ioctl.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/socket.h>
36 #include <netinet/in.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stdint.h>
40 #include <fcntl.h>
41 #include <string.h>
42 #include <stddef.h>
43 #include <stdint.h>
44 #include <unistd.h>
45 #include <errno.h>
46 #include <syslog.h>
47 #include <netdb.h>
48 #include <assert.h>
49 #include <libdevmapper.h>
50
51 #include "clvm.h"
52 #include "clvmd-comms.h"
53 #include "clvmd.h"
54 #include "clvmd-gulm.h"
55
56 #define DEFAULT_TCP_PORT 21064
57
58 static int listen_fd = -1;
59 static int tcp_port;
60 struct dm_hash_table *sock_hash;
61
62 static int get_our_ip_address(char *addr, int *family);
63 static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid,
64 struct local_client **new_client);
65
66 /* Called by init_cluster() to open up the listening socket */
init_comms(unsigned short port)67 int init_comms(unsigned short port)
68 {
69 struct sockaddr_in6 addr;
70
71 sock_hash = dm_hash_create(100);
72 tcp_port = port ? : DEFAULT_TCP_PORT;
73
74 listen_fd = socket(AF_INET6, SOCK_STREAM, 0);
75
76 if (listen_fd < 0)
77 {
78 return -1;
79 }
80 else
81 {
82 int one = 1;
83 setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int));
84 setsockopt(listen_fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
85 }
86
87 memset(&addr, 0, sizeof(addr)); // Bind to INADDR_ANY
88 addr.sin6_family = AF_INET6;
89 addr.sin6_port = htons(tcp_port);
90
91 if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
92 {
93 DEBUGLOG("Can't bind to port: %s\n", strerror(errno));
94 syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port);
95 close(listen_fd);
96 return -1;
97 }
98
99 listen(listen_fd, 5);
100
101 /* Set Close-on-exec */
102 fcntl(listen_fd, F_SETFD, 1);
103
104 return 0;
105 }
106
tcp_remove_client(const char * c_csid)107 void tcp_remove_client(const char *c_csid)
108 {
109 struct local_client *client;
110 char csid[GULM_MAX_CSID_LEN];
111 unsigned int i;
112 memcpy(csid, c_csid, sizeof csid);
113 DEBUGLOG("tcp_remove_client\n");
114
115 /* Don't actually close the socket here - that's the
116 job of clvmd.c whch will do the job when it notices the
117 other end has gone. We just need to remove the client(s) from
118 the hash table so we don't try to use it for sending any more */
119 for (i = 0; i < 2; i++)
120 {
121 client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
122 if (client)
123 {
124 dm_hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
125 client->removeme = 1;
126 close(client->fd);
127 }
128 /* Look for a mangled one too, on the 2nd iteration. */
129 csid[0] ^= 0x80;
130 }
131 }
132
alloc_client(int fd,const char * c_csid,struct local_client ** new_client)133 int alloc_client(int fd, const char *c_csid, struct local_client **new_client)
134 {
135 struct local_client *client;
136 char csid[GULM_MAX_CSID_LEN];
137 memcpy(csid, c_csid, sizeof csid);
138
139 DEBUGLOG("alloc_client %d csid = %s\n", fd, print_csid(csid));
140
141 /* Create a local_client and return it */
142 client = malloc(sizeof(struct local_client));
143 if (!client)
144 {
145 DEBUGLOG("malloc failed\n");
146 return -1;
147 }
148
149 memset(client, 0, sizeof(struct local_client));
150 client->fd = fd;
151 client->type = CLUSTER_DATA_SOCK;
152 client->callback = read_from_tcpsock;
153 if (new_client)
154 *new_client = client;
155
156 /* Add to our list of node sockets */
157 if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
158 {
159 DEBUGLOG("alloc_client mangling CSID for second connection\n");
160 /* This is a duplicate connection but we can't close it because
161 the other end may already have started sending.
162 So, we mangle the IP address and keep it, all sending will
163 go out of the main FD
164 */
165 csid[0] ^= 0x80;
166 client->bits.net.flags = 1; /* indicate mangled CSID */
167
168 /* If it still exists then kill the connection as we should only
169 ever have one incoming connection from each node */
170 if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
171 {
172 DEBUGLOG("Multiple incoming connections from node\n");
173 syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]);
174
175 free(client);
176 errno = ECONNREFUSED;
177 return -1;
178 }
179 }
180 dm_hash_insert_binary(sock_hash, csid, GULM_MAX_CSID_LEN, client);
181
182 return 0;
183 }
184
get_main_gulm_cluster_fd()185 int get_main_gulm_cluster_fd()
186 {
187 return listen_fd;
188 }
189
190
191 /* Read on main comms (listen) socket, accept it */
cluster_fd_gulm_callback(struct local_client * fd,char * buf,int len,const char * csid,struct local_client ** new_client)192 int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid,
193 struct local_client **new_client)
194 {
195 int newfd;
196 struct sockaddr_in6 addr;
197 socklen_t addrlen = sizeof(addr);
198 int status;
199 char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN];
200
201 DEBUGLOG("cluster_fd_callback\n");
202 *new_client = NULL;
203 newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
204
205 DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno);
206 if (!newfd)
207 {
208 syslog(LOG_ERR, "error in accept: %m");
209 errno = EAGAIN;
210 return -1; /* Don't return an error or clvmd will close the listening FD */
211 }
212
213 /* Check that the client is a member of the cluster
214 and reject if not.
215 */
216 if (gulm_name_from_csid((char *)&addr.sin6_addr, name) < 0)
217 {
218 syslog(LOG_ERR, "Got connect from non-cluster node %s\n",
219 print_csid((char *)&addr.sin6_addr));
220 DEBUGLOG("Got connect from non-cluster node %s\n",
221 print_csid((char *)&addr.sin6_addr));
222 close(newfd);
223
224 errno = EAGAIN;
225 return -1;
226 }
227
228 status = alloc_client(newfd, (char *)&addr.sin6_addr, new_client);
229 if (status)
230 {
231 DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status);
232 close(newfd);
233 /* See above... */
234 errno = EAGAIN;
235 return -1;
236 }
237 DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client);
238 return newfd;
239 }
240
241 /* Try to get at least 'len' bytes from the socket */
really_read(int fd,char * buf,int len)242 static int really_read(int fd, char *buf, int len)
243 {
244 int got, offset;
245
246 got = offset = 0;
247
248 do {
249 got = read(fd, buf+offset, len-offset);
250 DEBUGLOG("really_read. got %d bytes\n", got);
251 offset += got;
252 } while (got > 0 && offset < len);
253
254 if (got < 0)
255 return got;
256 else
257 return offset;
258 }
259
260
read_from_tcpsock(struct local_client * client,char * buf,int len,char * csid,struct local_client ** new_client)261 static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid,
262 struct local_client **new_client)
263 {
264 struct sockaddr_in6 addr;
265 socklen_t slen = sizeof(addr);
266 struct clvm_header *header = (struct clvm_header *)buf;
267 int status;
268 uint32_t arglen;
269
270 DEBUGLOG("read_from_tcpsock fd %d\n", client->fd);
271 *new_client = NULL;
272
273 /* Get "csid" */
274 getpeername(client->fd, (struct sockaddr *)&addr, &slen);
275 memcpy(csid, &addr.sin6_addr, GULM_MAX_CSID_LEN);
276
277 /* Read just the header first, then get the rest if there is any.
278 * Stream sockets, sigh.
279 */
280 status = really_read(client->fd, buf, sizeof(struct clvm_header));
281 if (status > 0)
282 {
283 int status2;
284
285 arglen = ntohl(header->arglen);
286
287 /* Get the rest */
288 if (arglen && arglen < GULM_MAX_CLUSTER_MESSAGE)
289 {
290 status2 = really_read(client->fd, buf+status, arglen);
291 if (status2 > 0)
292 status += status2;
293 else
294 status = status2;
295 }
296 }
297
298 DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno);
299
300 /* Remove it from the hash table if there's an error, clvmd will
301 remove the socket from its lists and free the client struct */
302 if (status == 0 ||
303 (status < 0 && errno != EAGAIN && errno != EINTR))
304 {
305 char remcsid[GULM_MAX_CSID_LEN];
306
307 memcpy(remcsid, csid, GULM_MAX_CSID_LEN);
308 close(client->fd);
309
310 /* If the csid was mangled, then make sure we remove the right entry */
311 if (client->bits.net.flags)
312 remcsid[0] ^= 0x80;
313 dm_hash_remove_binary(sock_hash, remcsid, GULM_MAX_CSID_LEN);
314
315 /* Tell cluster manager layer */
316 add_down_node(remcsid);
317 }
318 else {
319 gulm_add_up_node(csid);
320 /* Send it back to clvmd */
321 process_message(client, buf, status, csid);
322 }
323 return status;
324 }
325
gulm_connect_csid(const char * csid,struct local_client ** newclient)326 int gulm_connect_csid(const char *csid, struct local_client **newclient)
327 {
328 int fd;
329 struct sockaddr_in6 addr;
330 int status;
331 int one = 1;
332
333 DEBUGLOG("Connecting socket\n");
334 fd = socket(PF_INET6, SOCK_STREAM, 0);
335
336 if (fd < 0)
337 {
338 syslog(LOG_ERR, "Unable to create new socket: %m");
339 return -1;
340 }
341
342 addr.sin6_family = AF_INET6;
343 memcpy(&addr.sin6_addr, csid, GULM_MAX_CSID_LEN);
344 addr.sin6_port = htons(tcp_port);
345
346 DEBUGLOG("Connecting socket %d\n", fd);
347 if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in6)) < 0)
348 {
349 /* "Connection refused" is "normal" because clvmd may not yet be running
350 * on that node.
351 */
352 if (errno != ECONNREFUSED)
353 {
354 syslog(LOG_ERR, "Unable to connect to remote node: %m");
355 }
356 DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno));
357 close(fd);
358 return -1;
359 }
360
361 /* Set Close-on-exec */
362 fcntl(fd, F_SETFD, 1);
363 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
364
365 status = alloc_client(fd, csid, newclient);
366 if (status)
367 close(fd);
368 else
369 add_client(*newclient);
370
371 /* If we can connect to it, it must be running a clvmd */
372 gulm_add_up_node(csid);
373 return status;
374 }
375
376 /* Send a message to a known CSID */
tcp_send_message(void * buf,int msglen,const char * csid,const char * errtext)377 static int tcp_send_message(void *buf, int msglen, const char *csid, const char *errtext)
378 {
379 int status;
380 struct local_client *client;
381 char ourcsid[GULM_MAX_CSID_LEN];
382
383 assert(csid);
384
385 DEBUGLOG("tcp_send_message, csid = %s, msglen = %d\n", print_csid(csid), msglen);
386
387 /* Don't connect to ourself */
388 get_our_gulm_csid(ourcsid);
389 if (memcmp(csid, ourcsid, GULM_MAX_CSID_LEN) == 0)
390 return msglen;
391
392 client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
393 if (!client)
394 {
395 status = gulm_connect_csid(csid, &client);
396 if (status)
397 return -1;
398 }
399 DEBUGLOG("tcp_send_message, fd = %d\n", client->fd);
400
401 return write(client->fd, buf, msglen);
402 }
403
404
gulm_cluster_send_message(void * buf,int msglen,const char * csid,const char * errtext)405 int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext)
406 {
407 int status=0;
408
409 DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen);
410
411 /* If csid is NULL then send to all known (not just connected) nodes */
412 if (!csid)
413 {
414 void *context = NULL;
415 char loop_csid[GULM_MAX_CSID_LEN];
416
417 /* Loop round all gulm-known nodes */
418 while (get_next_node_csid(&context, loop_csid))
419 {
420 status = tcp_send_message(buf, msglen, loop_csid, errtext);
421 if (status == 0 ||
422 (status < 0 && (errno == EAGAIN || errno == EINTR)))
423 break;
424 }
425 }
426 else
427 {
428
429 status = tcp_send_message(buf, msglen, csid, errtext);
430 }
431 return status;
432 }
433
434 /* To get our own IP address we get the locally bound address of the
435 socket that's talking to GULM in the assumption(eek) that it will
436 be on the "right" network in a multi-homed system */
get_our_ip_address(char * addr,int * family)437 static int get_our_ip_address(char *addr, int *family)
438 {
439 struct utsname info;
440
441 uname(&info);
442 get_ip_address(info.nodename, addr);
443
444 return 0;
445 }
446
447 /* Public version of above for those that don't care what protocol
448 we're using */
get_our_gulm_csid(char * csid)449 void get_our_gulm_csid(char *csid)
450 {
451 static char our_csid[GULM_MAX_CSID_LEN];
452 static int got_csid = 0;
453
454 if (!got_csid)
455 {
456 int family;
457
458 memset(our_csid, 0, sizeof(our_csid));
459 if (get_our_ip_address(our_csid, &family))
460 {
461 got_csid = 1;
462 }
463 }
464 memcpy(csid, our_csid, GULM_MAX_CSID_LEN);
465 }
466
map_v4_to_v6(struct in_addr * ip4,struct in6_addr * ip6)467 static void map_v4_to_v6(struct in_addr *ip4, struct in6_addr *ip6)
468 {
469 ip6->s6_addr32[0] = 0;
470 ip6->s6_addr32[1] = 0;
471 ip6->s6_addr32[2] = htonl(0xffff);
472 ip6->s6_addr32[3] = ip4->s_addr;
473 }
474
475 /* Get someone else's IP address from DNS */
get_ip_address(const char * node,char * addr)476 int get_ip_address(const char *node, char *addr)
477 {
478 struct hostent *he;
479
480 memset(addr, 0, GULM_MAX_CSID_LEN);
481
482 // TODO: what do we do about multi-homed hosts ???
483 // CCSs ip_interfaces solved this but some bugger removed it.
484
485 /* Try IPv6 first. The man page for gethostbyname implies that
486 it will lookup ip6 & ip4 names, but it seems not to */
487 he = gethostbyname2(node, AF_INET6);
488 if (he)
489 {
490 memcpy(addr, he->h_addr_list[0],
491 he->h_length);
492 }
493 else
494 {
495 he = gethostbyname2(node, AF_INET);
496 if (!he)
497 return -1;
498 map_v4_to_v6((struct in_addr *)he->h_addr_list[0], (struct in6_addr *)addr);
499 }
500
501 return 0;
502 }
503
print_csid(const char * csid)504 char *print_csid(const char *csid)
505 {
506 static char buf[128];
507 int *icsid = (int *)csid;
508
509 sprintf(buf, "[%x.%x.%x.%x]",
510 icsid[0],icsid[1],icsid[2],icsid[3]);
511
512 return buf;
513 }
514