1 /*	$NetBSD: clvmd-cman.c,v 1.1.1.2 2009/12/02 00:27:01 haad Exp $	*/
2 
3 /*
4  * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU General Public License v.2.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 /*
19  * CMAN communication layer for clvmd.
20  */
21 
22 #define _GNU_SOURCE
23 #define _FILE_OFFSET_BITS 64
24 
25 #include <configure.h>
26 #include <pthread.h>
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/socket.h>
30 #include <sys/uio.h>
31 #include <sys/un.h>
32 #include <sys/time.h>
33 #include <sys/ioctl.h>
34 #include <sys/utsname.h>
35 #include <syslog.h>
36 #include <netinet/in.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stddef.h>
40 #include <signal.h>
41 #include <unistd.h>
42 #include <fcntl.h>
43 #include <getopt.h>
44 #include <errno.h>
45 #include <libdevmapper.h>
46 #include <libdlm.h>
47 
48 #include "clvmd-comms.h"
49 #include "clvm.h"
50 #include "lvm-logging.h"
51 #include "clvmd.h"
52 #include "lvm-functions.h"
53 
54 #define LOCKSPACE_NAME "clvmd"
55 
56 struct clvmd_node
57 {
58 	struct cman_node *node;
59 	int clvmd_up;
60 };
61 
62 static int num_nodes;
63 static struct cman_node *nodes = NULL;
64 static struct cman_node this_node;
65 static int count_nodes; /* size of allocated nodes array */
66 static struct dm_hash_table *node_updown_hash;
67 static dlm_lshandle_t *lockspace;
68 static cman_handle_t c_handle;
69 
70 static void count_clvmds_running(void);
71 static void get_members(void);
72 static int nodeid_from_csid(const char *csid);
73 static int name_from_nodeid(int nodeid, char *name);
74 static void event_callback(cman_handle_t handle, void *private, int reason, int arg);
75 static void data_callback(cman_handle_t handle, void *private,
76 			  char *buf, int len, uint8_t port, int nodeid);
77 
78 struct lock_wait {
79 	pthread_cond_t cond;
80 	pthread_mutex_t mutex;
81 	struct dlm_lksb lksb;
82 };
83 
84 static int _init_cluster(void)
85 {
86 	node_updown_hash = dm_hash_create(100);
87 
88 	/* Open the cluster communication socket */
89 	c_handle = cman_init(NULL);
90 	if (!c_handle) {
91 		syslog(LOG_ERR, "Can't open cluster manager socket: %m");
92 		return -1;
93 	}
94 	DEBUGLOG("Connected to CMAN\n");
95 
96 	if (cman_start_recv_data(c_handle, data_callback, CLUSTER_PORT_CLVMD)) {
97 		syslog(LOG_ERR, "Can't bind cluster socket: %m");
98 		return -1;
99 	}
100 
101 	if (cman_start_notification(c_handle, event_callback)) {
102 		syslog(LOG_ERR, "Can't start cluster event listening");
103 		return -1;
104 	}
105 
106 	/* Get the cluster members list */
107 	get_members();
108 	count_clvmds_running();
109 
110 	DEBUGLOG("CMAN initialisation complete\n");
111 
112 	/* Create a lockspace for LV & VG locks to live in */
113 	lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
114 	if (!lockspace) {
115 		if (errno == EEXIST) {
116 			lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
117 		}
118 		if (!lockspace) {
119 			syslog(LOG_ERR, "Unable to create lockspace for CLVM: %m");
120 			return -1;
121 		}
122 	}
123 	dlm_ls_pthread_init(lockspace);
124 	DEBUGLOG("DLM initialisation complete\n");
125 	return 0;
126 }
127 
128 static void _cluster_init_completed(void)
129 {
130 	clvmd_cluster_init_completed();
131 }
132 
133 static int _get_main_cluster_fd()
134 {
135 	return cman_get_fd(c_handle);
136 }
137 
138 static int _get_num_nodes()
139 {
140 	int i;
141 	int nnodes = 0;
142 
143 	/* return number of ACTIVE nodes */
144 	for (i=0; i<num_nodes; i++) {
145 		if (nodes[i].cn_member && nodes[i].cn_nodeid)
146 			nnodes++;
147 	}
148 	return nnodes;
149 }
150 
151 /* send_message with the fd check removed */
152 static int _cluster_send_message(const void *buf, int msglen, const char *csid,
153 				 const char *errtext)
154 {
155 	int nodeid = 0;
156 
157 	if (csid)
158 		memcpy(&nodeid, csid, CMAN_MAX_CSID_LEN);
159 
160 	if (cman_send_data(c_handle, buf, msglen, 0, CLUSTER_PORT_CLVMD, nodeid) <= 0)
161 	{
162 		log_error("%s", errtext);
163 	}
164 	return msglen;
165 }
166 
167 static void _get_our_csid(char *csid)
168 {
169 	if (this_node.cn_nodeid == 0) {
170 		cman_get_node(c_handle, 0, &this_node);
171 	}
172 	memcpy(csid, &this_node.cn_nodeid, CMAN_MAX_CSID_LEN);
173 }
174 
175 /* Call a callback routine for each node is that known (down means not running a clvmd) */
176 static int _cluster_do_node_callback(struct local_client *client,
177 				     void (*callback) (struct local_client *,
178 						       const char *,
179 						       int))
180 {
181 	int i;
182 	int somedown = 0;
183 
184 	for (i = 0; i < _get_num_nodes(); i++) {
185 		if (nodes[i].cn_member && nodes[i].cn_nodeid) {
186 			int up = (int)(long)dm_hash_lookup_binary(node_updown_hash, (char *)&nodes[i].cn_nodeid, sizeof(int));
187 
188 			callback(client, (char *)&nodes[i].cn_nodeid, up);
189 			if (!up)
190 				somedown = -1;
191 		}
192 	}
193 	return somedown;
194 }
195 
196 /* Process OOB messages from the cluster socket */
197 static void event_callback(cman_handle_t handle, void *private, int reason, int arg)
198 {
199 	char namebuf[MAX_CLUSTER_MEMBER_NAME_LEN];
200 
201 	switch (reason) {
202         case CMAN_REASON_PORTCLOSED:
203 		name_from_nodeid(arg, namebuf);
204 		log_notice("clvmd on node %s has died\n", namebuf);
205 		DEBUGLOG("Got port closed message, removing node %s\n", namebuf);
206 
207 		dm_hash_insert_binary(node_updown_hash, (char *)&arg, sizeof(int), (void *)0);
208 		break;
209 
210 	case CMAN_REASON_STATECHANGE:
211 		DEBUGLOG("Got state change message, re-reading members list\n");
212 		get_members();
213 		break;
214 
215 #if defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2
216 	case CMAN_REASON_PORTOPENED:
217 		/* Ignore this, wait for startup message from clvmd itself */
218 		break;
219 
220 	case CMAN_REASON_TRY_SHUTDOWN:
221 		DEBUGLOG("Got try shutdown, sending OK\n");
222 		cman_replyto_shutdown(c_handle, 1);
223 		break;
224 #endif
225 	default:
226 		/* ERROR */
227 		DEBUGLOG("Got unknown event callback message: %d\n", reason);
228 		break;
229 	}
230 }
231 
232 static struct local_client *cman_client;
233 static int _cluster_fd_callback(struct local_client *fd, char *buf, int len,
234 				const char *csid,
235 				struct local_client **new_client)
236 {
237 
238 	/* Save this for data_callback */
239 	cman_client = fd;
240 
241 	/* We never return a new client */
242 	*new_client = NULL;
243 
244 	return cman_dispatch(c_handle, 0);
245 }
246 
247 
248 static void data_callback(cman_handle_t handle, void *private,
249 			  char *buf, int len, uint8_t port, int nodeid)
250 {
251 	/* Ignore looped back messages */
252 	if (nodeid == this_node.cn_nodeid)
253 		return;
254 	process_message(cman_client, buf, len, (char *)&nodeid);
255 }
256 
257 static void _add_up_node(const char *csid)
258 {
259 	/* It's up ! */
260 	int nodeid = nodeid_from_csid(csid);
261 
262 	dm_hash_insert_binary(node_updown_hash, (char *)&nodeid, sizeof(int), (void *)1);
263 	DEBUGLOG("Added new node %d to updown list\n", nodeid);
264 }
265 
266 static void _cluster_closedown()
267 {
268 	destroy_lvhash();
269 	dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1);
270 	cman_finish(c_handle);
271 }
272 
273 static int is_listening(int nodeid)
274 {
275 	int status;
276 
277 	do {
278 		status = cman_is_listening(c_handle, nodeid, CLUSTER_PORT_CLVMD);
279 		if (status < 0 && errno == EBUSY) {	/* Don't busywait */
280 			sleep(1);
281 			errno = EBUSY;	/* In case sleep trashes it */
282 		}
283 	}
284 	while (status < 0 && errno == EBUSY);
285 
286 	return status;
287 }
288 
289 /* Populate the list of CLVMDs running.
290    called only at startup time */
291 static void count_clvmds_running(void)
292 {
293 	int i;
294 
295 	for (i = 0; i < num_nodes; i++) {
296 		int nodeid = nodes[i].cn_nodeid;
297 
298 		if (is_listening(nodeid) == 1)
299 			dm_hash_insert_binary(node_updown_hash, (void *)&nodeid, sizeof(int), (void*)1);
300 		else
301 			dm_hash_insert_binary(node_updown_hash, (void *)&nodeid, sizeof(int), (void*)0);
302 	}
303 }
304 
305 /* Get a list of active cluster members */
306 static void get_members()
307 {
308 	int retnodes;
309 	int status;
310 	int i;
311 	int high_nodeid = 0;
312 
313 	num_nodes = cman_get_node_count(c_handle);
314 	if (num_nodes == -1) {
315 		log_error("Unable to get node count");
316 		return;
317 	}
318 
319 	/* Not enough room for new nodes list ? */
320 	if (num_nodes > count_nodes && nodes) {
321 		free(nodes);
322 		nodes = NULL;
323 	}
324 
325 	if (nodes == NULL) {
326 		count_nodes = num_nodes + 10; /* Overallocate a little */
327 		nodes = malloc(count_nodes * sizeof(struct cman_node));
328 		if (!nodes) {
329 			log_error("Unable to allocate nodes array\n");
330 			exit(5);
331 		}
332 	}
333 
334 	status = cman_get_nodes(c_handle, count_nodes, &retnodes, nodes);
335 	if (status < 0) {
336 		log_error("Unable to get node details");
337 		exit(6);
338 	}
339 
340 	/* Get the highest nodeid */
341 	for (i=0; i<retnodes; i++) {
342 		if (nodes[i].cn_nodeid > high_nodeid)
343 			high_nodeid = nodes[i].cn_nodeid;
344 	}
345 }
346 
347 
348 /* Convert a node name to a CSID */
349 static int _csid_from_name(char *csid, const char *name)
350 {
351 	int i;
352 
353 	for (i = 0; i < num_nodes; i++) {
354 		if (strcmp(name, nodes[i].cn_name) == 0) {
355 			memcpy(csid, &nodes[i].cn_nodeid, CMAN_MAX_CSID_LEN);
356 			return 0;
357 		}
358 	}
359 	return -1;
360 }
361 
362 /* Convert a CSID to a node name */
363 static int _name_from_csid(const char *csid, char *name)
364 {
365 	int i;
366 
367 	for (i = 0; i < num_nodes; i++) {
368 		if (memcmp(csid, &nodes[i].cn_nodeid, CMAN_MAX_CSID_LEN) == 0) {
369 			strcpy(name, nodes[i].cn_name);
370 			return 0;
371 		}
372 	}
373 	/* Who?? */
374 	strcpy(name, "Unknown");
375 	return -1;
376 }
377 
378 /* Convert a node ID to a node name */
379 static int name_from_nodeid(int nodeid, char *name)
380 {
381 	int i;
382 
383 	for (i = 0; i < num_nodes; i++) {
384 		if (nodeid == nodes[i].cn_nodeid) {
385 			strcpy(name, nodes[i].cn_name);
386 			return 0;
387 		}
388 	}
389 	/* Who?? */
390 	strcpy(name, "Unknown");
391 	return -1;
392 }
393 
394 /* Convert a CSID to a node ID */
395 static int nodeid_from_csid(const char *csid)
396 {
397         int nodeid;
398 
399 	memcpy(&nodeid, csid, CMAN_MAX_CSID_LEN);
400 
401 	return nodeid;
402 }
403 
404 static int _is_quorate()
405 {
406 	return cman_is_quorate(c_handle);
407 }
408 
409 static void sync_ast_routine(void *arg)
410 {
411 	struct lock_wait *lwait = arg;
412 
413 	pthread_mutex_lock(&lwait->mutex);
414 	pthread_cond_signal(&lwait->cond);
415 	pthread_mutex_unlock(&lwait->mutex);
416 }
417 
418 static int _sync_lock(const char *resource, int mode, int flags, int *lockid)
419 {
420 	int status;
421 	struct lock_wait lwait;
422 
423 	if (!lockid) {
424 		errno = EINVAL;
425 		return -1;
426 	}
427 
428 	DEBUGLOG("sync_lock: '%s' mode:%d flags=%d\n", resource,mode,flags);
429 	/* Conversions need the lockid in the LKSB */
430 	if (flags & LKF_CONVERT)
431 		lwait.lksb.sb_lkid = *lockid;
432 
433 	pthread_cond_init(&lwait.cond, NULL);
434 	pthread_mutex_init(&lwait.mutex, NULL);
435 	pthread_mutex_lock(&lwait.mutex);
436 
437 	status = dlm_ls_lock(lockspace,
438 			     mode,
439 			     &lwait.lksb,
440 			     flags,
441 			     resource,
442 			     strlen(resource),
443 			     0, sync_ast_routine, &lwait, NULL, NULL);
444 	if (status)
445 		return status;
446 
447 	/* Wait for it to complete */
448 	pthread_cond_wait(&lwait.cond, &lwait.mutex);
449 	pthread_mutex_unlock(&lwait.mutex);
450 
451 	*lockid = lwait.lksb.sb_lkid;
452 
453 	errno = lwait.lksb.sb_status;
454 	DEBUGLOG("sync_lock: returning lkid %x\n", *lockid);
455 	if (lwait.lksb.sb_status)
456 		return -1;
457 	else
458 		return 0;
459 }
460 
461 static int _sync_unlock(const char *resource /* UNUSED */, int lockid)
462 {
463 	int status;
464 	struct lock_wait lwait;
465 
466 	DEBUGLOG("sync_unlock: '%s' lkid:%x\n", resource, lockid);
467 
468 	pthread_cond_init(&lwait.cond, NULL);
469 	pthread_mutex_init(&lwait.mutex, NULL);
470 	pthread_mutex_lock(&lwait.mutex);
471 
472 	status = dlm_ls_unlock(lockspace, lockid, 0, &lwait.lksb, &lwait);
473 
474 	if (status)
475 		return status;
476 
477 	/* Wait for it to complete */
478 	pthread_cond_wait(&lwait.cond, &lwait.mutex);
479 	pthread_mutex_unlock(&lwait.mutex);
480 
481 	errno = lwait.lksb.sb_status;
482 	if (lwait.lksb.sb_status != EUNLOCK)
483 		return -1;
484 	else
485 		return 0;
486 
487 }
488 
489 static int _get_cluster_name(char *buf, int buflen)
490 {
491 	cman_cluster_t cluster_info;
492 	int status;
493 
494 	status = cman_get_cluster(c_handle, &cluster_info);
495 	if (!status) {
496 		strncpy(buf, cluster_info.ci_name, buflen);
497 	}
498 	return status;
499 }
500 
501 static struct cluster_ops _cluster_cman_ops = {
502 	.cluster_init_completed   = _cluster_init_completed,
503 	.cluster_send_message     = _cluster_send_message,
504 	.name_from_csid           = _name_from_csid,
505 	.csid_from_name           = _csid_from_name,
506 	.get_num_nodes            = _get_num_nodes,
507 	.cluster_fd_callback      = _cluster_fd_callback,
508 	.get_main_cluster_fd      = _get_main_cluster_fd,
509 	.cluster_do_node_callback = _cluster_do_node_callback,
510 	.is_quorate               = _is_quorate,
511 	.get_our_csid             = _get_our_csid,
512 	.add_up_node              = _add_up_node,
513 	.cluster_closedown        = _cluster_closedown,
514 	.get_cluster_name         = _get_cluster_name,
515 	.sync_lock                = _sync_lock,
516 	.sync_unlock              = _sync_unlock,
517 };
518 
519 struct cluster_ops *init_cman_cluster(void)
520 {
521 	if (!_init_cluster())
522 		return &_cluster_cman_ops;
523 	else
524 		return NULL;
525 }
526