1 /* $NetBSD: clvmd-cman.c,v 1.1.1.2 2009/12/02 00:27:01 haad Exp $ */
2
3 /*
4 * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of LVM2.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU General Public License v.2.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 /*
19 * CMAN communication layer for clvmd.
20 */
21
22 #define _GNU_SOURCE
23 #define _FILE_OFFSET_BITS 64
24
25 #include <configure.h>
26 #include <pthread.h>
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/socket.h>
30 #include <sys/uio.h>
31 #include <sys/un.h>
32 #include <sys/time.h>
33 #include <sys/ioctl.h>
34 #include <sys/utsname.h>
35 #include <syslog.h>
36 #include <netinet/in.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stddef.h>
40 #include <signal.h>
41 #include <unistd.h>
42 #include <fcntl.h>
43 #include <getopt.h>
44 #include <errno.h>
45 #include <libdevmapper.h>
46 #include <libdlm.h>
47
48 #include "clvmd-comms.h"
49 #include "clvm.h"
50 #include "lvm-logging.h"
51 #include "clvmd.h"
52 #include "lvm-functions.h"
53
54 #define LOCKSPACE_NAME "clvmd"
55
56 struct clvmd_node
57 {
58 struct cman_node *node;
59 int clvmd_up;
60 };
61
62 static int num_nodes;
63 static struct cman_node *nodes = NULL;
64 static struct cman_node this_node;
65 static int count_nodes; /* size of allocated nodes array */
66 static struct dm_hash_table *node_updown_hash;
67 static dlm_lshandle_t *lockspace;
68 static cman_handle_t c_handle;
69
70 static void count_clvmds_running(void);
71 static void get_members(void);
72 static int nodeid_from_csid(const char *csid);
73 static int name_from_nodeid(int nodeid, char *name);
74 static void event_callback(cman_handle_t handle, void *private, int reason, int arg);
75 static void data_callback(cman_handle_t handle, void *private,
76 char *buf, int len, uint8_t port, int nodeid);
77
78 struct lock_wait {
79 pthread_cond_t cond;
80 pthread_mutex_t mutex;
81 struct dlm_lksb lksb;
82 };
83
_init_cluster(void)84 static int _init_cluster(void)
85 {
86 node_updown_hash = dm_hash_create(100);
87
88 /* Open the cluster communication socket */
89 c_handle = cman_init(NULL);
90 if (!c_handle) {
91 syslog(LOG_ERR, "Can't open cluster manager socket: %m");
92 return -1;
93 }
94 DEBUGLOG("Connected to CMAN\n");
95
96 if (cman_start_recv_data(c_handle, data_callback, CLUSTER_PORT_CLVMD)) {
97 syslog(LOG_ERR, "Can't bind cluster socket: %m");
98 return -1;
99 }
100
101 if (cman_start_notification(c_handle, event_callback)) {
102 syslog(LOG_ERR, "Can't start cluster event listening");
103 return -1;
104 }
105
106 /* Get the cluster members list */
107 get_members();
108 count_clvmds_running();
109
110 DEBUGLOG("CMAN initialisation complete\n");
111
112 /* Create a lockspace for LV & VG locks to live in */
113 lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
114 if (!lockspace) {
115 if (errno == EEXIST) {
116 lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
117 }
118 if (!lockspace) {
119 syslog(LOG_ERR, "Unable to create lockspace for CLVM: %m");
120 return -1;
121 }
122 }
123 dlm_ls_pthread_init(lockspace);
124 DEBUGLOG("DLM initialisation complete\n");
125 return 0;
126 }
127
_cluster_init_completed(void)128 static void _cluster_init_completed(void)
129 {
130 clvmd_cluster_init_completed();
131 }
132
_get_main_cluster_fd()133 static int _get_main_cluster_fd()
134 {
135 return cman_get_fd(c_handle);
136 }
137
_get_num_nodes()138 static int _get_num_nodes()
139 {
140 int i;
141 int nnodes = 0;
142
143 /* return number of ACTIVE nodes */
144 for (i=0; i<num_nodes; i++) {
145 if (nodes[i].cn_member && nodes[i].cn_nodeid)
146 nnodes++;
147 }
148 return nnodes;
149 }
150
151 /* send_message with the fd check removed */
_cluster_send_message(const void * buf,int msglen,const char * csid,const char * errtext)152 static int _cluster_send_message(const void *buf, int msglen, const char *csid,
153 const char *errtext)
154 {
155 int nodeid = 0;
156
157 if (csid)
158 memcpy(&nodeid, csid, CMAN_MAX_CSID_LEN);
159
160 if (cman_send_data(c_handle, buf, msglen, 0, CLUSTER_PORT_CLVMD, nodeid) <= 0)
161 {
162 log_error("%s", errtext);
163 }
164 return msglen;
165 }
166
_get_our_csid(char * csid)167 static void _get_our_csid(char *csid)
168 {
169 if (this_node.cn_nodeid == 0) {
170 cman_get_node(c_handle, 0, &this_node);
171 }
172 memcpy(csid, &this_node.cn_nodeid, CMAN_MAX_CSID_LEN);
173 }
174
175 /* Call a callback routine for each node is that known (down means not running a clvmd) */
_cluster_do_node_callback(struct local_client * client,void (* callback)(struct local_client *,const char *,int))176 static int _cluster_do_node_callback(struct local_client *client,
177 void (*callback) (struct local_client *,
178 const char *,
179 int))
180 {
181 int i;
182 int somedown = 0;
183
184 for (i = 0; i < _get_num_nodes(); i++) {
185 if (nodes[i].cn_member && nodes[i].cn_nodeid) {
186 int up = (int)(long)dm_hash_lookup_binary(node_updown_hash, (char *)&nodes[i].cn_nodeid, sizeof(int));
187
188 callback(client, (char *)&nodes[i].cn_nodeid, up);
189 if (!up)
190 somedown = -1;
191 }
192 }
193 return somedown;
194 }
195
196 /* Process OOB messages from the cluster socket */
event_callback(cman_handle_t handle,void * private,int reason,int arg)197 static void event_callback(cman_handle_t handle, void *private, int reason, int arg)
198 {
199 char namebuf[MAX_CLUSTER_MEMBER_NAME_LEN];
200
201 switch (reason) {
202 case CMAN_REASON_PORTCLOSED:
203 name_from_nodeid(arg, namebuf);
204 log_notice("clvmd on node %s has died\n", namebuf);
205 DEBUGLOG("Got port closed message, removing node %s\n", namebuf);
206
207 dm_hash_insert_binary(node_updown_hash, (char *)&arg, sizeof(int), (void *)0);
208 break;
209
210 case CMAN_REASON_STATECHANGE:
211 DEBUGLOG("Got state change message, re-reading members list\n");
212 get_members();
213 break;
214
215 #if defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2
216 case CMAN_REASON_PORTOPENED:
217 /* Ignore this, wait for startup message from clvmd itself */
218 break;
219
220 case CMAN_REASON_TRY_SHUTDOWN:
221 DEBUGLOG("Got try shutdown, sending OK\n");
222 cman_replyto_shutdown(c_handle, 1);
223 break;
224 #endif
225 default:
226 /* ERROR */
227 DEBUGLOG("Got unknown event callback message: %d\n", reason);
228 break;
229 }
230 }
231
232 static struct local_client *cman_client;
_cluster_fd_callback(struct local_client * fd,char * buf,int len,const char * csid,struct local_client ** new_client)233 static int _cluster_fd_callback(struct local_client *fd, char *buf, int len,
234 const char *csid,
235 struct local_client **new_client)
236 {
237
238 /* Save this for data_callback */
239 cman_client = fd;
240
241 /* We never return a new client */
242 *new_client = NULL;
243
244 return cman_dispatch(c_handle, 0);
245 }
246
247
data_callback(cman_handle_t handle,void * private,char * buf,int len,uint8_t port,int nodeid)248 static void data_callback(cman_handle_t handle, void *private,
249 char *buf, int len, uint8_t port, int nodeid)
250 {
251 /* Ignore looped back messages */
252 if (nodeid == this_node.cn_nodeid)
253 return;
254 process_message(cman_client, buf, len, (char *)&nodeid);
255 }
256
_add_up_node(const char * csid)257 static void _add_up_node(const char *csid)
258 {
259 /* It's up ! */
260 int nodeid = nodeid_from_csid(csid);
261
262 dm_hash_insert_binary(node_updown_hash, (char *)&nodeid, sizeof(int), (void *)1);
263 DEBUGLOG("Added new node %d to updown list\n", nodeid);
264 }
265
_cluster_closedown()266 static void _cluster_closedown()
267 {
268 destroy_lvhash();
269 dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1);
270 cman_finish(c_handle);
271 }
272
is_listening(int nodeid)273 static int is_listening(int nodeid)
274 {
275 int status;
276
277 do {
278 status = cman_is_listening(c_handle, nodeid, CLUSTER_PORT_CLVMD);
279 if (status < 0 && errno == EBUSY) { /* Don't busywait */
280 sleep(1);
281 errno = EBUSY; /* In case sleep trashes it */
282 }
283 }
284 while (status < 0 && errno == EBUSY);
285
286 return status;
287 }
288
289 /* Populate the list of CLVMDs running.
290 called only at startup time */
count_clvmds_running(void)291 static void count_clvmds_running(void)
292 {
293 int i;
294
295 for (i = 0; i < num_nodes; i++) {
296 int nodeid = nodes[i].cn_nodeid;
297
298 if (is_listening(nodeid) == 1)
299 dm_hash_insert_binary(node_updown_hash, (void *)&nodeid, sizeof(int), (void*)1);
300 else
301 dm_hash_insert_binary(node_updown_hash, (void *)&nodeid, sizeof(int), (void*)0);
302 }
303 }
304
305 /* Get a list of active cluster members */
get_members()306 static void get_members()
307 {
308 int retnodes;
309 int status;
310 int i;
311 int high_nodeid = 0;
312
313 num_nodes = cman_get_node_count(c_handle);
314 if (num_nodes == -1) {
315 log_error("Unable to get node count");
316 return;
317 }
318
319 /* Not enough room for new nodes list ? */
320 if (num_nodes > count_nodes && nodes) {
321 free(nodes);
322 nodes = NULL;
323 }
324
325 if (nodes == NULL) {
326 count_nodes = num_nodes + 10; /* Overallocate a little */
327 nodes = malloc(count_nodes * sizeof(struct cman_node));
328 if (!nodes) {
329 log_error("Unable to allocate nodes array\n");
330 exit(5);
331 }
332 }
333
334 status = cman_get_nodes(c_handle, count_nodes, &retnodes, nodes);
335 if (status < 0) {
336 log_error("Unable to get node details");
337 exit(6);
338 }
339
340 /* Get the highest nodeid */
341 for (i=0; i<retnodes; i++) {
342 if (nodes[i].cn_nodeid > high_nodeid)
343 high_nodeid = nodes[i].cn_nodeid;
344 }
345 }
346
347
348 /* Convert a node name to a CSID */
_csid_from_name(char * csid,const char * name)349 static int _csid_from_name(char *csid, const char *name)
350 {
351 int i;
352
353 for (i = 0; i < num_nodes; i++) {
354 if (strcmp(name, nodes[i].cn_name) == 0) {
355 memcpy(csid, &nodes[i].cn_nodeid, CMAN_MAX_CSID_LEN);
356 return 0;
357 }
358 }
359 return -1;
360 }
361
362 /* Convert a CSID to a node name */
_name_from_csid(const char * csid,char * name)363 static int _name_from_csid(const char *csid, char *name)
364 {
365 int i;
366
367 for (i = 0; i < num_nodes; i++) {
368 if (memcmp(csid, &nodes[i].cn_nodeid, CMAN_MAX_CSID_LEN) == 0) {
369 strcpy(name, nodes[i].cn_name);
370 return 0;
371 }
372 }
373 /* Who?? */
374 strcpy(name, "Unknown");
375 return -1;
376 }
377
378 /* Convert a node ID to a node name */
name_from_nodeid(int nodeid,char * name)379 static int name_from_nodeid(int nodeid, char *name)
380 {
381 int i;
382
383 for (i = 0; i < num_nodes; i++) {
384 if (nodeid == nodes[i].cn_nodeid) {
385 strcpy(name, nodes[i].cn_name);
386 return 0;
387 }
388 }
389 /* Who?? */
390 strcpy(name, "Unknown");
391 return -1;
392 }
393
394 /* Convert a CSID to a node ID */
nodeid_from_csid(const char * csid)395 static int nodeid_from_csid(const char *csid)
396 {
397 int nodeid;
398
399 memcpy(&nodeid, csid, CMAN_MAX_CSID_LEN);
400
401 return nodeid;
402 }
403
_is_quorate()404 static int _is_quorate()
405 {
406 return cman_is_quorate(c_handle);
407 }
408
sync_ast_routine(void * arg)409 static void sync_ast_routine(void *arg)
410 {
411 struct lock_wait *lwait = arg;
412
413 pthread_mutex_lock(&lwait->mutex);
414 pthread_cond_signal(&lwait->cond);
415 pthread_mutex_unlock(&lwait->mutex);
416 }
417
_sync_lock(const char * resource,int mode,int flags,int * lockid)418 static int _sync_lock(const char *resource, int mode, int flags, int *lockid)
419 {
420 int status;
421 struct lock_wait lwait;
422
423 if (!lockid) {
424 errno = EINVAL;
425 return -1;
426 }
427
428 DEBUGLOG("sync_lock: '%s' mode:%d flags=%d\n", resource,mode,flags);
429 /* Conversions need the lockid in the LKSB */
430 if (flags & LKF_CONVERT)
431 lwait.lksb.sb_lkid = *lockid;
432
433 pthread_cond_init(&lwait.cond, NULL);
434 pthread_mutex_init(&lwait.mutex, NULL);
435 pthread_mutex_lock(&lwait.mutex);
436
437 status = dlm_ls_lock(lockspace,
438 mode,
439 &lwait.lksb,
440 flags,
441 resource,
442 strlen(resource),
443 0, sync_ast_routine, &lwait, NULL, NULL);
444 if (status)
445 return status;
446
447 /* Wait for it to complete */
448 pthread_cond_wait(&lwait.cond, &lwait.mutex);
449 pthread_mutex_unlock(&lwait.mutex);
450
451 *lockid = lwait.lksb.sb_lkid;
452
453 errno = lwait.lksb.sb_status;
454 DEBUGLOG("sync_lock: returning lkid %x\n", *lockid);
455 if (lwait.lksb.sb_status)
456 return -1;
457 else
458 return 0;
459 }
460
_sync_unlock(const char * resource,int lockid)461 static int _sync_unlock(const char *resource /* UNUSED */, int lockid)
462 {
463 int status;
464 struct lock_wait lwait;
465
466 DEBUGLOG("sync_unlock: '%s' lkid:%x\n", resource, lockid);
467
468 pthread_cond_init(&lwait.cond, NULL);
469 pthread_mutex_init(&lwait.mutex, NULL);
470 pthread_mutex_lock(&lwait.mutex);
471
472 status = dlm_ls_unlock(lockspace, lockid, 0, &lwait.lksb, &lwait);
473
474 if (status)
475 return status;
476
477 /* Wait for it to complete */
478 pthread_cond_wait(&lwait.cond, &lwait.mutex);
479 pthread_mutex_unlock(&lwait.mutex);
480
481 errno = lwait.lksb.sb_status;
482 if (lwait.lksb.sb_status != EUNLOCK)
483 return -1;
484 else
485 return 0;
486
487 }
488
_get_cluster_name(char * buf,int buflen)489 static int _get_cluster_name(char *buf, int buflen)
490 {
491 cman_cluster_t cluster_info;
492 int status;
493
494 status = cman_get_cluster(c_handle, &cluster_info);
495 if (!status) {
496 strncpy(buf, cluster_info.ci_name, buflen);
497 }
498 return status;
499 }
500
501 static struct cluster_ops _cluster_cman_ops = {
502 .cluster_init_completed = _cluster_init_completed,
503 .cluster_send_message = _cluster_send_message,
504 .name_from_csid = _name_from_csid,
505 .csid_from_name = _csid_from_name,
506 .get_num_nodes = _get_num_nodes,
507 .cluster_fd_callback = _cluster_fd_callback,
508 .get_main_cluster_fd = _get_main_cluster_fd,
509 .cluster_do_node_callback = _cluster_do_node_callback,
510 .is_quorate = _is_quorate,
511 .get_our_csid = _get_our_csid,
512 .add_up_node = _add_up_node,
513 .cluster_closedown = _cluster_closedown,
514 .get_cluster_name = _get_cluster_name,
515 .sync_lock = _sync_lock,
516 .sync_unlock = _sync_unlock,
517 };
518
init_cman_cluster(void)519 struct cluster_ops *init_cman_cluster(void)
520 {
521 if (!_init_cluster())
522 return &_cluster_cman_ops;
523 else
524 return NULL;
525 }
526