1 /*	$NetBSD: tcp-comms.c,v 1.1.1.2 2009/12/02 00:27:06 haad Exp $	*/
2 
3 /*
4  *  Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved.
5  *  Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 /*
19  * This provides the inter-clvmd communications for a system without CMAN.
20  * There is a listening TCP socket which accepts new connections in the
21  * normal way.
22  * It can also make outgoing connnections to the other clvmd nodes.
23  */
24 
25 #define _GNU_SOURCE
26 #define _FILE_OFFSET_BITS 64
27 
28 #include <configure.h>
29 #include <pthread.h>
30 #include <sys/types.h>
31 #include <sys/utsname.h>
32 #include <sys/ioctl.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/socket.h>
36 #include <netinet/in.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stdint.h>
40 #include <fcntl.h>
41 #include <string.h>
42 #include <stddef.h>
43 #include <stdint.h>
44 #include <unistd.h>
45 #include <errno.h>
46 #include <syslog.h>
47 #include <netdb.h>
48 #include <assert.h>
49 #include <libdevmapper.h>
50 
51 #include "clvm.h"
52 #include "clvmd-comms.h"
53 #include "clvmd.h"
54 #include "clvmd-gulm.h"
55 
56 #define DEFAULT_TCP_PORT 21064
57 
58 static int listen_fd = -1;
59 static int tcp_port;
60 struct dm_hash_table *sock_hash;
61 
62 static int get_our_ip_address(char *addr, int *family);
63 static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid,
64 			     struct local_client **new_client);
65 
66 /* Called by init_cluster() to open up the listening socket */
67 int init_comms(unsigned short port)
68 {
69     struct sockaddr_in6 addr;
70 
71     sock_hash = dm_hash_create(100);
72     tcp_port = port ? : DEFAULT_TCP_PORT;
73 
74     listen_fd = socket(AF_INET6, SOCK_STREAM, 0);
75 
76     if (listen_fd < 0)
77     {
78 	return -1;
79     }
80     else
81     {
82 	int one = 1;
83 	setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int));
84 	setsockopt(listen_fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
85     }
86 
87     memset(&addr, 0, sizeof(addr)); // Bind to INADDR_ANY
88     addr.sin6_family = AF_INET6;
89     addr.sin6_port = htons(tcp_port);
90 
91     if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
92     {
93 	DEBUGLOG("Can't bind to port: %s\n", strerror(errno));
94 	syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port);
95 	close(listen_fd);
96 	return -1;
97     }
98 
99     listen(listen_fd, 5);
100 
101     /* Set Close-on-exec */
102     fcntl(listen_fd, F_SETFD, 1);
103 
104     return 0;
105 }
106 
107 void tcp_remove_client(const char *c_csid)
108 {
109     struct local_client *client;
110     char csid[GULM_MAX_CSID_LEN];
111     unsigned int i;
112     memcpy(csid, c_csid, sizeof csid);
113     DEBUGLOG("tcp_remove_client\n");
114 
115     /* Don't actually close the socket here - that's the
116        job of clvmd.c whch will do the job when it notices the
117        other end has gone. We just need to remove the client(s) from
118        the hash table so we don't try to use it for sending any more */
119     for (i = 0; i < 2; i++)
120     {
121 	client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
122 	if (client)
123 	{
124 	    dm_hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
125 	    client->removeme = 1;
126 	    close(client->fd);
127 	}
128 	/* Look for a mangled one too, on the 2nd iteration. */
129 	csid[0] ^= 0x80;
130     }
131 }
132 
133 int alloc_client(int fd, const char *c_csid, struct local_client **new_client)
134 {
135     struct local_client *client;
136     char csid[GULM_MAX_CSID_LEN];
137     memcpy(csid, c_csid, sizeof csid);
138 
139     DEBUGLOG("alloc_client %d csid = %s\n", fd, print_csid(csid));
140 
141     /* Create a local_client and return it */
142     client = malloc(sizeof(struct local_client));
143     if (!client)
144     {
145 	DEBUGLOG("malloc failed\n");
146 	return -1;
147     }
148 
149     memset(client, 0, sizeof(struct local_client));
150     client->fd = fd;
151     client->type = CLUSTER_DATA_SOCK;
152     client->callback = read_from_tcpsock;
153     if (new_client)
154 	*new_client = client;
155 
156     /* Add to our list of node sockets */
157     if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
158     {
159 	DEBUGLOG("alloc_client mangling CSID for second connection\n");
160 	/* This is a duplicate connection but we can't close it because
161 	   the other end may already have started sending.
162 	   So, we mangle the IP address and keep it, all sending will
163 	   go out of the main FD
164 	*/
165 	csid[0] ^= 0x80;
166 	client->bits.net.flags = 1; /* indicate mangled CSID */
167 
168         /* If it still exists then kill the connection as we should only
169            ever have one incoming connection from each node */
170         if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
171         {
172 	    DEBUGLOG("Multiple incoming connections from node\n");
173             syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]);
174 
175 	    free(client);
176             errno = ECONNREFUSED;
177             return -1;
178         }
179     }
180     dm_hash_insert_binary(sock_hash, csid, GULM_MAX_CSID_LEN, client);
181 
182     return 0;
183 }
184 
185 int get_main_gulm_cluster_fd()
186 {
187     return listen_fd;
188 }
189 
190 
191 /* Read on main comms (listen) socket, accept it */
192 int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid,
193 			struct local_client **new_client)
194 {
195     int newfd;
196     struct sockaddr_in6 addr;
197     socklen_t addrlen = sizeof(addr);
198     int status;
199     char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN];
200 
201     DEBUGLOG("cluster_fd_callback\n");
202     *new_client = NULL;
203     newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
204 
205     DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno);
206     if (!newfd)
207     {
208 	syslog(LOG_ERR, "error in accept: %m");
209 	errno = EAGAIN;
210 	return -1; /* Don't return an error or clvmd will close the listening FD */
211     }
212 
213     /* Check that the client is a member of the cluster
214        and reject if not.
215     */
216     if (gulm_name_from_csid((char *)&addr.sin6_addr, name) < 0)
217     {
218 	syslog(LOG_ERR, "Got connect from non-cluster node %s\n",
219 	       print_csid((char *)&addr.sin6_addr));
220 	DEBUGLOG("Got connect from non-cluster node %s\n",
221 		 print_csid((char *)&addr.sin6_addr));
222 	close(newfd);
223 
224 	errno = EAGAIN;
225 	return -1;
226     }
227 
228     status = alloc_client(newfd, (char *)&addr.sin6_addr, new_client);
229     if (status)
230     {
231 	DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status);
232 	close(newfd);
233 	/* See above... */
234 	errno = EAGAIN;
235 	return -1;
236     }
237     DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client);
238     return newfd;
239 }
240 
241 /* Try to get at least 'len' bytes from the socket */
242 static int really_read(int fd, char *buf, int len)
243 {
244 	int got, offset;
245 
246 	got = offset = 0;
247 
248 	do {
249 		got = read(fd, buf+offset, len-offset);
250 		DEBUGLOG("really_read. got %d bytes\n", got);
251 		offset += got;
252 	} while (got > 0 && offset < len);
253 
254 	if (got < 0)
255 		return got;
256 	else
257 		return offset;
258 }
259 
260 
261 static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid,
262 			     struct local_client **new_client)
263 {
264     struct sockaddr_in6 addr;
265     socklen_t slen = sizeof(addr);
266     struct clvm_header *header = (struct clvm_header *)buf;
267     int status;
268     uint32_t arglen;
269 
270     DEBUGLOG("read_from_tcpsock fd %d\n", client->fd);
271     *new_client = NULL;
272 
273     /* Get "csid" */
274     getpeername(client->fd, (struct sockaddr *)&addr, &slen);
275     memcpy(csid, &addr.sin6_addr, GULM_MAX_CSID_LEN);
276 
277     /* Read just the header first, then get the rest if there is any.
278      * Stream sockets, sigh.
279      */
280     status = really_read(client->fd, buf, sizeof(struct clvm_header));
281     if (status > 0)
282     {
283 	    int status2;
284 
285 	    arglen = ntohl(header->arglen);
286 
287 	    /* Get the rest */
288 	    if (arglen && arglen < GULM_MAX_CLUSTER_MESSAGE)
289 	    {
290 		    status2 = really_read(client->fd, buf+status, arglen);
291 		    if (status2 > 0)
292 			    status += status2;
293 		    else
294 			    status = status2;
295 	    }
296     }
297 
298     DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno);
299 
300     /* Remove it from the hash table if there's an error, clvmd will
301        remove the socket from its lists and free the client struct */
302     if (status == 0 ||
303 	(status < 0 && errno != EAGAIN && errno != EINTR))
304     {
305 	char remcsid[GULM_MAX_CSID_LEN];
306 
307 	memcpy(remcsid, csid, GULM_MAX_CSID_LEN);
308 	close(client->fd);
309 
310 	/* If the csid was mangled, then make sure we remove the right entry */
311 	if (client->bits.net.flags)
312 	    remcsid[0] ^= 0x80;
313 	dm_hash_remove_binary(sock_hash, remcsid, GULM_MAX_CSID_LEN);
314 
315 	/* Tell cluster manager layer */
316 	add_down_node(remcsid);
317     }
318     else {
319 	    gulm_add_up_node(csid);
320 	    /* Send it back to clvmd */
321 	    process_message(client, buf, status, csid);
322     }
323     return status;
324 }
325 
326 int gulm_connect_csid(const char *csid, struct local_client **newclient)
327 {
328     int fd;
329     struct sockaddr_in6 addr;
330     int status;
331     int one = 1;
332 
333     DEBUGLOG("Connecting socket\n");
334     fd = socket(PF_INET6, SOCK_STREAM, 0);
335 
336     if (fd < 0)
337     {
338 	syslog(LOG_ERR, "Unable to create new socket: %m");
339 	return -1;
340     }
341 
342     addr.sin6_family = AF_INET6;
343     memcpy(&addr.sin6_addr, csid, GULM_MAX_CSID_LEN);
344     addr.sin6_port = htons(tcp_port);
345 
346     DEBUGLOG("Connecting socket %d\n", fd);
347     if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in6)) < 0)
348     {
349 	/* "Connection refused" is "normal" because clvmd may not yet be running
350 	 * on that node.
351 	 */
352 	if (errno != ECONNREFUSED)
353 	{
354 	    syslog(LOG_ERR, "Unable to connect to remote node: %m");
355 	}
356 	DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno));
357 	close(fd);
358 	return -1;
359     }
360 
361     /* Set Close-on-exec */
362     fcntl(fd, F_SETFD, 1);
363     setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
364 
365     status = alloc_client(fd, csid, newclient);
366     if (status)
367 	close(fd);
368     else
369 	add_client(*newclient);
370 
371     /* If we can connect to it, it must be running a clvmd */
372     gulm_add_up_node(csid);
373     return status;
374 }
375 
376 /* Send a message to a known CSID */
377 static int tcp_send_message(void *buf, int msglen, const char *csid, const char *errtext)
378 {
379     int status;
380     struct local_client *client;
381     char ourcsid[GULM_MAX_CSID_LEN];
382 
383     assert(csid);
384 
385     DEBUGLOG("tcp_send_message, csid = %s, msglen = %d\n", print_csid(csid), msglen);
386 
387     /* Don't connect to ourself */
388     get_our_gulm_csid(ourcsid);
389     if (memcmp(csid, ourcsid, GULM_MAX_CSID_LEN) == 0)
390 	return msglen;
391 
392     client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
393     if (!client)
394     {
395 	status = gulm_connect_csid(csid, &client);
396 	if (status)
397 	    return -1;
398     }
399     DEBUGLOG("tcp_send_message, fd = %d\n", client->fd);
400 
401     return write(client->fd, buf, msglen);
402 }
403 
404 
405 int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext)
406 {
407     int status=0;
408 
409     DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen);
410 
411     /* If csid is NULL then send to all known (not just connected) nodes */
412     if (!csid)
413     {
414 	void *context = NULL;
415 	char loop_csid[GULM_MAX_CSID_LEN];
416 
417 	/* Loop round all gulm-known nodes */
418 	while (get_next_node_csid(&context, loop_csid))
419 	{
420 	    status = tcp_send_message(buf, msglen, loop_csid, errtext);
421 	    if (status == 0 ||
422 		(status < 0 && (errno == EAGAIN || errno == EINTR)))
423 		break;
424 	}
425     }
426     else
427     {
428 
429 	status = tcp_send_message(buf, msglen, csid, errtext);
430     }
431     return status;
432 }
433 
434 /* To get our own IP address we get the locally bound address of the
435    socket that's talking to GULM in the assumption(eek) that it will
436    be on the "right" network in a multi-homed system */
437 static int get_our_ip_address(char *addr, int *family)
438 {
439 	struct utsname info;
440 
441 	uname(&info);
442 	get_ip_address(info.nodename, addr);
443 
444 	return 0;
445 }
446 
447 /* Public version of above for those that don't care what protocol
448    we're using */
449 void get_our_gulm_csid(char *csid)
450 {
451     static char our_csid[GULM_MAX_CSID_LEN];
452     static int got_csid = 0;
453 
454     if (!got_csid)
455     {
456 	int family;
457 
458 	memset(our_csid, 0, sizeof(our_csid));
459 	if (get_our_ip_address(our_csid, &family))
460 	{
461 	    got_csid = 1;
462 	}
463     }
464     memcpy(csid, our_csid, GULM_MAX_CSID_LEN);
465 }
466 
467 static void map_v4_to_v6(struct in_addr *ip4, struct in6_addr *ip6)
468 {
469    ip6->s6_addr32[0] = 0;
470    ip6->s6_addr32[1] = 0;
471    ip6->s6_addr32[2] = htonl(0xffff);
472    ip6->s6_addr32[3] = ip4->s_addr;
473 }
474 
475 /* Get someone else's IP address from DNS */
476 int get_ip_address(const char *node, char *addr)
477 {
478     struct hostent *he;
479 
480     memset(addr, 0, GULM_MAX_CSID_LEN);
481 
482     // TODO: what do we do about multi-homed hosts ???
483     // CCSs ip_interfaces solved this but some bugger removed it.
484 
485     /* Try IPv6 first. The man page for gethostbyname implies that
486        it will lookup ip6 & ip4 names, but it seems not to */
487     he = gethostbyname2(node, AF_INET6);
488     if (he)
489     {
490 	memcpy(addr, he->h_addr_list[0],
491 	       he->h_length);
492     }
493     else
494     {
495 	he = gethostbyname2(node, AF_INET);
496 	if (!he)
497 	    return -1;
498 	map_v4_to_v6((struct in_addr *)he->h_addr_list[0], (struct in6_addr *)addr);
499     }
500 
501     return 0;
502 }
503 
504 char *print_csid(const char *csid)
505 {
506     static char buf[128];
507     int *icsid = (int *)csid;
508 
509     sprintf(buf, "[%x.%x.%x.%x]",
510 	    icsid[0],icsid[1],icsid[2],icsid[3]);
511 
512     return buf;
513 }
514