1 /* $NetBSD: clvmd-gulm.c,v 1.1.1.2 2009/12/02 00:27:02 haad Exp $ */
2
3 /*
4 * Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of LVM2.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 /*
19 * This provides the interface between clvmd and gulm as the cluster
20 * and lock manager.
21 *
22 * It also provides the "liblm" functions too as it's hard (and pointless)
23 * to seperate them out when using gulm.
24 *
25 * What it does /not/ provide is the communications between clvmd daemons
26 * on the cluster nodes. That is done in tcp-comms.c
27 */
28
29 #include <pthread.h>
30 #include <sys/types.h>
31 #include <sys/utsname.h>
32 #include <sys/ioctl.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/file.h>
36 #include <sys/socket.h>
37 #include <netinet/in.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <stdint.h>
41 #include <signal.h>
42 #include <fcntl.h>
43 #include <string.h>
44 #include <stddef.h>
45 #include <stdint.h>
46 #include <unistd.h>
47 #include <errno.h>
48 #include <utmpx.h>
49 #include <syslog.h>
50 #include <assert.h>
51 #include <libdevmapper.h>
52 #include <ccs.h>
53 #include <libgulm.h>
54
55 #include "locking.h"
56 #include "lvm-logging.h"
57 #include "clvm.h"
58 #include "clvmd-comms.h"
59 #include "lvm-functions.h"
60 #include "clvmd.h"
61 #include "clvmd-gulm.h"
62
63 /* Hash list of nodes in the cluster */
64 static struct dm_hash_table *node_hash;
65
66 /* hash list of outstanding lock requests */
67 static struct dm_hash_table *lock_hash;
68
69 /* Copy of the current quorate state */
70 static uint8_t gulm_quorate = 0;
71 static enum {INIT_NOTDONE, INIT_DONE, INIT_WAITQUORATE} init_state = INIT_NOTDONE;
72
73 /* Number of active nodes */
74 static int num_nodes;
75
76 static char *cluster_name;
77 static int in_shutdown = 0;
78
79 static pthread_mutex_t lock_start_mutex;
80 static volatile int lock_start_flag;
81
82 struct node_info
83 {
84 enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state;
85 char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN];
86 };
87
88 struct lock_wait
89 {
90 pthread_cond_t cond;
91 pthread_mutex_t mutex;
92 int status;
93 };
94
95 /* Forward */
96 static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid,
97 struct local_client **new_client);
98 static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid,
99 struct local_client **new_client);
100 static int get_all_cluster_nodes(void);
101 static int _csid_from_name(char *csid, const char *name);
102 static void _cluster_closedown(void);
103
104 /* In tcp-comms.c */
105 extern struct dm_hash_table *sock_hash;
106
add_internal_client(int fd,fd_callback_t callback)107 static int add_internal_client(int fd, fd_callback_t callback)
108 {
109 struct local_client *client;
110
111 DEBUGLOG("Add_internal_client, fd = %d\n", fd);
112
113 /* Add a GULM file descriptor it to the main loop */
114 client = malloc(sizeof(struct local_client));
115 if (!client)
116 {
117 DEBUGLOG("malloc failed\n");
118 return -1;
119 }
120
121 memset(client, 0, sizeof(struct local_client));
122 client->fd = fd;
123 client->type = CLUSTER_INTERNAL;
124 client->callback = callback;
125 add_client(client);
126
127 /* Set Close-on-exec */
128 fcntl(fd, F_SETFD, 1);
129
130 return 0;
131 }
132
133 /* Gulm library handle */
134 static gulm_interface_p gulm_if;
135 static lg_core_callbacks_t core_callbacks;
136 static lg_lockspace_callbacks_t lock_callbacks;
137
badsig_handler(int sig)138 static void badsig_handler(int sig)
139 {
140 DEBUGLOG("got sig %d\n", sig);
141 _cluster_closedown();
142 exit(0);
143 }
144
_reread_config(void)145 static void _reread_config(void)
146 {
147 /* Re-read CCS node list */
148 DEBUGLOG("Re-reading CCS config\n");
149 get_all_cluster_nodes();
150 }
151
_init_cluster(void)152 static int _init_cluster(void)
153 {
154 int status;
155 int ccs_h;
156 int port = 0;
157 char *portstr;
158
159 /* Get cluster name from CCS */
160 ccs_h = ccs_force_connect(NULL, 0);
161 if (ccs_h < 0)
162 {
163 syslog(LOG_ERR, "Cannot login in to CCSD server\n");
164 return -1;
165 }
166
167 ccs_get(ccs_h, "//cluster/@name", &cluster_name);
168 DEBUGLOG("got cluster name %s\n", cluster_name);
169
170 if (!ccs_get(ccs_h, "//cluster/clvm/@port", &portstr))
171 {
172 port = atoi(portstr);
173 free(portstr);
174 DEBUGLOG("got port number %d\n", port);
175
176 if (port <= 0 && port >= 65536)
177 port = 0;
178 }
179
180 ccs_disconnect(ccs_h);
181
182 /* Block locking until we are logged in */
183 pthread_mutex_init(&lock_start_mutex, NULL);
184 pthread_mutex_lock(&lock_start_mutex);
185 lock_start_flag = 1;
186
187 node_hash = dm_hash_create(100);
188 lock_hash = dm_hash_create(10);
189
190 /* Get all nodes from CCS */
191 if (get_all_cluster_nodes())
192 return -1;
193
194 /* Initialise GULM library */
195 status = lg_initialize(&gulm_if, cluster_name, "clvmd");
196 if (status)
197 {
198 DEBUGLOG("lg_initialize failed: %d\n", status);
199 return status;
200 }
201
202 /* Connect to core - we are not "important" :-) */
203 status = lg_core_login(gulm_if, 0);
204 if (status)
205 {
206 DEBUGLOG("lg_core_login failed: %d\n", status);
207 return status;
208 }
209
210 /* Initialise the inter-node comms */
211 status = init_comms(port);
212 if (status)
213 return status;
214
215 /* Add core FD to the list */
216 status = add_internal_client(lg_core_selector(gulm_if), read_from_core_sock);
217 if (status)
218 {
219 DEBUGLOG("can't allocate client space\n");
220 return status;
221 }
222
223 /* Connect to the lock server */
224 if (lg_lock_login(gulm_if, "CLVM"))
225 {
226 syslog(LOG_ERR, "Cannot login in to LOCK server\n");
227 DEBUGLOG("Cannot login in to LOCK server\n");
228 exit(88);
229 }
230
231 /* Add lockspace FD to the list */
232 status = add_internal_client(lg_lock_selector(gulm_if), read_from_lock_sock);
233 if (status)
234 {
235 DEBUGLOG("can't allocate client space\n");
236 exit(status);
237 }
238
239 /* Request a list of nodes, we can't really do anything until
240 this comes back */
241 status = lg_core_nodelist(gulm_if);
242 if (status)
243 {
244 DEBUGLOG("lg_core_nodelist failed: %d\n", status);
245 return status;
246 }
247
248 /* So I can kill it without taking GULM down too */
249 signal(SIGINT, badsig_handler);
250 signal(SIGTERM, badsig_handler);
251
252 return 0;
253 }
254
_cluster_closedown(void)255 static void _cluster_closedown(void)
256 {
257 DEBUGLOG("cluster_closedown\n");
258 in_shutdown = 1;
259 destroy_lvhash();
260 lg_lock_logout(gulm_if);
261 lg_core_logout(gulm_if);
262 lg_release(gulm_if);
263 }
264
265 /* Expire locks for a named node, or us */
266 #define GIO_KEY_SIZE 46
drop_expired_locks(char * nodename)267 static void drop_expired_locks(char *nodename)
268 {
269 struct utsname nodeinfo;
270 uint8_t mask[GIO_KEY_SIZE];
271
272 DEBUGLOG("Dropping expired locks for %s\n", nodename?nodename:"(null)");
273 memset(mask, 0xff, GIO_KEY_SIZE);
274
275 if (!nodename)
276 {
277 uname(&nodeinfo);
278 nodename = nodeinfo.nodename;
279 }
280
281 if (lg_lock_drop_exp(gulm_if, nodename, mask, GIO_KEY_SIZE))
282 {
283 DEBUGLOG("Error calling lg_lock_drop_exp()\n");
284 }
285 }
286
287
read_from_core_sock(struct local_client * client,char * buf,int len,const char * csid,struct local_client ** new_client)288 static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid,
289 struct local_client **new_client)
290 {
291 int status;
292
293 *new_client = NULL;
294 status = lg_core_handle_messages(gulm_if, &core_callbacks, NULL);
295 return status<0 ? status : 1;
296 }
297
read_from_lock_sock(struct local_client * client,char * buf,int len,const char * csid,struct local_client ** new_client)298 static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid,
299 struct local_client **new_client)
300 {
301 int status;
302
303 *new_client = NULL;
304 status = lg_lock_handle_messages(gulm_if, &lock_callbacks, NULL);
305 return status<0 ? status : 1;
306 }
307
308
309 /* CORE callback routines */
core_login_reply(void * misc,uint64_t gen,uint32_t error,uint32_t rank,uint8_t corestate)310 static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t rank, uint8_t corestate)
311 {
312 DEBUGLOG("CORE Got a Login reply. gen:%lld err:%d rank:%d corestate:%d\n",
313 gen, error, rank, corestate);
314
315 if (error)
316 exit(error);
317
318 /* Get the current core state (for quorum) */
319 lg_core_corestate(gulm_if);
320
321 return 0;
322 }
323
set_node_state(struct node_info * ninfo,char * csid,uint8_t nodestate)324 static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
325 {
326 if (nodestate == lg_core_Logged_in)
327 {
328 /* Don't clobber NODE_CLVMD state */
329 if (ninfo->state != NODE_CLVMD)
330 {
331 if (ninfo->state == NODE_UNKNOWN ||
332 ninfo->state == NODE_DOWN)
333 num_nodes++;
334
335 ninfo->state = NODE_UP;
336 }
337 }
338 else
339 {
340 if (nodestate == lg_core_Expired ||
341 nodestate == lg_core_Fenced ||
342 nodestate == lg_core_Logged_out)
343 {
344 if (ninfo->state != NODE_DOWN)
345 num_nodes--;
346 ninfo->state = NODE_DOWN;
347 }
348 }
349 /* Gulm doesn't always send node DOWN events, so even if this a a node UP we must
350 * assume (ahem) that it prevously went down at some time. So we close
351 * the sockets here to make sure that we don't have any dead connections
352 * to that node.
353 */
354 tcp_remove_client(csid);
355
356 DEBUGLOG("set_node_state, '%s' state = %d num_nodes=%d\n",
357 ninfo->name, ninfo->state, num_nodes);
358 }
359
add_or_set_node(char * name,struct in6_addr * ip,uint8_t state)360 static struct node_info *add_or_set_node(char *name, struct in6_addr *ip, uint8_t state)
361 {
362 struct node_info *ninfo;
363
364 ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN);
365 if (!ninfo)
366 {
367 /* If we can't find that node then re-read the config file in case it
368 was added after we were started */
369 DEBUGLOG("Node %s not found, re-reading config file\n", name);
370 get_all_cluster_nodes();
371
372 /* Now try again */
373 ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN);
374 if (!ninfo)
375 {
376 DEBUGLOG("Ignoring node %s, not part of the SAN cluster\n", name);
377 return NULL;
378 }
379 }
380
381 set_node_state(ninfo, (char *)ip, state);
382
383 return ninfo;
384 }
385
_get_our_csid(char * csid)386 static void _get_our_csid(char *csid)
387 {
388 get_our_gulm_csid(csid);
389 }
390
core_nodelist(void * misc,lglcb_t type,char * name,struct in6_addr * ip,uint8_t state)391 static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *ip, uint8_t state)
392 {
393 DEBUGLOG("CORE nodelist\n");
394
395 if (type == lglcb_start)
396 {
397 DEBUGLOG("Got Nodelist, start\n");
398 }
399 else
400 {
401 if (type == lglcb_item)
402 {
403 DEBUGLOG("Got nodelist, item: %s, %#x\n", name, state);
404
405 add_or_set_node(name, ip, state);
406 }
407 else
408 {
409 if (type == lglcb_stop)
410 {
411 char ourcsid[GULM_MAX_CSID_LEN];
412
413 DEBUGLOG("Got Nodelist, stop\n");
414 if (gulm_quorate)
415 {
416 clvmd_cluster_init_completed();
417 init_state = INIT_DONE;
418 }
419 else
420 {
421 if (init_state == INIT_NOTDONE)
422 init_state = INIT_WAITQUORATE;
423 }
424
425 /* Mark ourself as up */
426 _get_our_csid(ourcsid);
427 gulm_add_up_node(ourcsid);
428 }
429 else
430 {
431 DEBUGLOG("Unknown lglcb_t %#x\n", type);
432 }
433 }
434 }
435
436 return 0;
437 }
438
core_statechange(void * misc,uint8_t corestate,uint8_t quorate,struct in6_addr * masterip,char * mastername)439 static int core_statechange(void *misc, uint8_t corestate, uint8_t quorate, struct in6_addr *masterip, char *mastername)
440 {
441 DEBUGLOG("CORE Got statechange. quorate:%d, corestate:%x mastername:%s\n",
442 quorate, corestate, mastername);
443
444 gulm_quorate = quorate;
445 if (quorate && init_state == INIT_WAITQUORATE)
446 {
447 clvmd_cluster_init_completed();
448 init_state = INIT_DONE;
449 }
450 return 0;
451 }
452
core_nodechange(void * misc,char * nodename,struct in6_addr * nodeip,uint8_t nodestate)453 static int core_nodechange(void *misc, char *nodename, struct in6_addr *nodeip, uint8_t nodestate)
454 {
455 struct node_info *ninfo;
456
457 DEBUGLOG("CORE node change, name=%s, state = %d\n", nodename, nodestate);
458
459 /* If we don't get nodeip here, try a lookup by name */
460 if (!nodeip)
461 _csid_from_name((char *)nodeip, nodename);
462 if (!nodeip)
463 return 0;
464
465 ninfo = add_or_set_node(nodename, nodeip, nodestate);
466 if (!ninfo)
467 return 0;
468
469 /* Check if we need to drop any expired locks */
470 if (ninfo->state == NODE_DOWN)
471 {
472 drop_expired_locks(nodename);
473 }
474
475 return 0;
476 }
core_error(void * misc,uint32_t err)477 static int core_error(void *misc, uint32_t err)
478 {
479 DEBUGLOG("CORE error: %d\n", err);
480 // Not sure what happens here
481 return 0;
482 }
483
484 /* LOCK callback routines */
lock_login_reply(void * misc,uint32_t error,uint8_t which)485 static int lock_login_reply(void *misc, uint32_t error, uint8_t which)
486 {
487 DEBUGLOG("LOCK Got a Login reply. err:%d which:%d\n",
488 error, which);
489
490 if (error)
491 exit(error);
492
493 /* Drop any expired locks for us that might be hanging around */
494 drop_expired_locks(NULL);
495
496 /* Enable locking operations in other threads */
497 if (lock_start_flag)
498 {
499 lock_start_flag = 0;
500 pthread_mutex_unlock(&lock_start_mutex);
501 }
502
503 return 0;
504 }
505
lock_lock_state(void * misc,uint8_t * key,uint16_t keylen,uint64_t subid,uint64_t start,uint64_t stop,uint8_t state,uint32_t flags,uint32_t error,uint8_t * LVB,uint16_t LVBlen)506 static int lock_lock_state(void *misc, uint8_t *key, uint16_t keylen,
507 uint64_t subid, uint64_t start, uint64_t stop,
508 uint8_t state, uint32_t flags, uint32_t error,
509 uint8_t *LVB, uint16_t LVBlen)
510 {
511 struct lock_wait *lwait;
512
513 DEBUGLOG("LOCK lock state: %s, error = %d\n", key, error);
514
515 /* No waiting process to wake up when we are shutting down */
516 if (in_shutdown)
517 return 0;
518
519 lwait = dm_hash_lookup(lock_hash, key);
520 if (!lwait)
521 {
522 DEBUGLOG("Can't find hash entry for resource %s\n", key);
523 return 0;
524 }
525 lwait->status = error;
526 pthread_mutex_lock(&lwait->mutex);
527 pthread_cond_signal(&lwait->cond);
528 pthread_mutex_unlock(&lwait->mutex);
529
530 return 0;
531 }
lock_error(void * misc,uint32_t err)532 static int lock_error(void *misc, uint32_t err)
533 {
534 DEBUGLOG("LOCK error: %d\n", err);
535 // Not sure what happens here
536 return 0;
537 }
538
539
540 /* CORE callbacks */
541 static lg_core_callbacks_t core_callbacks = {
542 .login_reply = core_login_reply,
543 .nodelist = core_nodelist,
544 .statechange = core_statechange,
545 .nodechange = core_nodechange,
546 .error = core_error,
547 };
548
549 /* LOCK callbacks */
550 static lg_lockspace_callbacks_t lock_callbacks = {
551 .login_reply = lock_login_reply,
552 .lock_state = lock_lock_state,
553 .error = lock_error,
554 };
555
556 /* Allow tcp-comms to loop round the list of active nodes */
get_next_node_csid(void ** context,char * csid)557 int get_next_node_csid(void **context, char *csid)
558 {
559 struct node_info *ninfo = NULL;
560
561 /* First node */
562 if (!*context)
563 {
564 *context = dm_hash_get_first(node_hash);
565 }
566 else
567 {
568 *context = dm_hash_get_next(node_hash, *context);
569 }
570 if (*context)
571 ninfo = dm_hash_get_data(node_hash, *context);
572
573 /* Find a node that is UP */
574 while (*context && ninfo->state == NODE_DOWN)
575 {
576 *context = dm_hash_get_next(node_hash, *context);
577 if (*context)
578 {
579 ninfo = dm_hash_get_data(node_hash, *context);
580 }
581 }
582
583 if (!*context || ninfo->state == NODE_DOWN)
584 {
585 return 0;
586 }
587
588 memcpy(csid, dm_hash_get_key(node_hash, *context), GULM_MAX_CSID_LEN);
589 return 1;
590 }
591
gulm_name_from_csid(const char * csid,char * name)592 int gulm_name_from_csid(const char *csid, char *name)
593 {
594 struct node_info *ninfo;
595
596 ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN);
597 if (!ninfo)
598 {
599 sprintf(name, "UNKNOWN %s", print_csid(csid));
600 return -1;
601 }
602
603 strcpy(name, ninfo->name);
604 return 0;
605 }
606
607
_csid_from_name(char * csid,const char * name)608 static int _csid_from_name(char *csid, const char *name)
609 {
610 struct dm_hash_node *hn;
611 struct node_info *ninfo;
612
613 dm_hash_iterate(hn, node_hash)
614 {
615 ninfo = dm_hash_get_data(node_hash, hn);
616 if (strcmp(ninfo->name, name) == 0)
617 {
618 memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN);
619 return 0;
620 }
621 }
622 return -1;
623 }
624
_get_num_nodes()625 static int _get_num_nodes()
626 {
627 DEBUGLOG("num_nodes = %d\n", num_nodes);
628 return num_nodes;
629 }
630
631 /* Node is now known to be running a clvmd */
gulm_add_up_node(const char * csid)632 void gulm_add_up_node(const char *csid)
633 {
634 struct node_info *ninfo;
635
636 ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN);
637 if (!ninfo) {
638 DEBUGLOG("gulm_add_up_node no node_hash entry for csid %s\n", print_csid(csid));
639 return;
640 }
641
642 DEBUGLOG("gulm_add_up_node %s\n", ninfo->name);
643
644 if (ninfo->state == NODE_DOWN)
645 num_nodes++;
646 ninfo->state = NODE_CLVMD;
647
648 return;
649
650 }
651 /* Node is now known to be NOT running a clvmd */
add_down_node(char * csid)652 void add_down_node(char *csid)
653 {
654 struct node_info *ninfo;
655
656 ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN);
657 if (!ninfo)
658 return;
659
660 /* Only set it to UP if it was previously known to be
661 running clvmd - gulm may set it DOWN quite soon */
662 if (ninfo->state == NODE_CLVMD)
663 ninfo->state = NODE_UP;
664 drop_expired_locks(ninfo->name);
665 return;
666
667 }
668
669 /* Call a callback for each node, so the caller knows whether it's up or down */
_cluster_do_node_callback(struct local_client * master_client,void (* callback)(struct local_client *,const char * csid,int node_up))670 static int _cluster_do_node_callback(struct local_client *master_client,
671 void (*callback)(struct local_client *, const char *csid, int node_up))
672 {
673 struct dm_hash_node *hn;
674 struct node_info *ninfo;
675 int somedown = 0;
676
677 dm_hash_iterate(hn, node_hash)
678 {
679 char csid[GULM_MAX_CSID_LEN];
680 struct local_client *client;
681
682 ninfo = dm_hash_get_data(node_hash, hn);
683 memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN);
684
685 DEBUGLOG("down_callback. node %s, state = %d\n", ninfo->name, ninfo->state);
686
687 client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
688 if (!client)
689 {
690 /* If it's up but not connected, try to make contact */
691 if (ninfo->state == NODE_UP)
692 gulm_connect_csid(csid, &client);
693
694 client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
695
696 }
697 DEBUGLOG("down_callback2. node %s, state = %d\n", ninfo->name, ninfo->state);
698 if (ninfo->state != NODE_DOWN)
699 callback(master_client, csid, ninfo->state == NODE_CLVMD);
700
701 if (ninfo->state != NODE_CLVMD)
702 somedown = -1;
703 }
704 return somedown;
705 }
706
707 /* Convert gulm error codes to unix errno numbers */
gulm_to_errno(int gulm_ret)708 static int gulm_to_errno(int gulm_ret)
709 {
710 switch (gulm_ret)
711 {
712 case lg_err_TryFailed:
713 case lg_err_AlreadyPend:
714 errno = EAGAIN;
715 break;
716
717 /* More?? */
718 default:
719 errno = EINVAL;
720 }
721
722 return gulm_ret ? -1 : 0;
723 }
724
725 /* Real locking */
_lock_resource(char * resource,int mode,int flags,int * lockid)726 static int _lock_resource(char *resource, int mode, int flags, int *lockid)
727 {
728 int status;
729 struct lock_wait lwait;
730
731 /* Wait until the lock module is ready */
732 if (lock_start_flag)
733 {
734 pthread_mutex_lock(&lock_start_mutex);
735 pthread_mutex_unlock(&lock_start_mutex);
736 }
737
738 pthread_cond_init(&lwait.cond, NULL);
739 pthread_mutex_init(&lwait.mutex, NULL);
740 pthread_mutex_lock(&lwait.mutex);
741
742 /* This needs to be converted from DLM/LVM2 value for GULM */
743 if (flags & LKF_NOQUEUE) flags = lg_lock_flag_Try;
744
745 dm_hash_insert(lock_hash, resource, &lwait);
746 DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode);
747
748 status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
749 0, 0, 0,
750 mode, flags, NULL, 0);
751 if (status)
752 {
753 DEBUGLOG("lg_lock_state returned %d\n", status);
754 return status;
755 }
756
757 /* Wait for it to complete */
758 pthread_cond_wait(&lwait.cond, &lwait.mutex);
759 pthread_mutex_unlock(&lwait.mutex);
760
761 dm_hash_remove(lock_hash, resource);
762 DEBUGLOG("lock-resource returning %d\n", lwait.status);
763
764 return gulm_to_errno(lwait.status);
765 }
766
767
_unlock_resource(char * resource,int lockid)768 static int _unlock_resource(char *resource, int lockid)
769 {
770 int status;
771 struct lock_wait lwait;
772
773 pthread_cond_init(&lwait.cond, NULL);
774 pthread_mutex_init(&lwait.mutex, NULL);
775 pthread_mutex_lock(&lwait.mutex);
776
777 dm_hash_insert(lock_hash, resource, &lwait);
778
779 DEBUGLOG("unlock_resource %s\n", resource);
780 status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
781 0, 0, 0,
782 lg_lock_state_Unlock, 0, NULL, 0);
783
784 if (status)
785 {
786 DEBUGLOG("lg_lock_state(unlock) returned %d\n", status);
787 return status;
788 }
789
790 /* When we are shutting down, don't wait for unlocks
791 to be acknowledged, just do it. */
792 if (in_shutdown)
793 return status;
794
795 /* Wait for it to complete */
796
797 pthread_cond_wait(&lwait.cond, &lwait.mutex);
798 pthread_mutex_unlock(&lwait.mutex);
799
800 dm_hash_remove(lock_hash, resource);
801
802 return gulm_to_errno(lwait.status);
803 }
804
805
806 /* These two locking functions MUST be called in a seperate thread from
807 the clvmd main loop because they expect to be woken up by it.
808
809 These are abstractions around the real locking functions (above)
810 as we need to emulate the DLM's EX/PW/CW interaction with GULM using
811 two locks.
812 To aid unlocking, we store the lock mode in the lockid (as GULM
813 doesn't use this).
814 */
_sync_lock(const char * resource,int mode,int flags,int * lockid)815 static int _sync_lock(const char *resource, int mode, int flags, int *lockid)
816 {
817 int status;
818 char lock1[strlen(resource)+3];
819 char lock2[strlen(resource)+3];
820
821 snprintf(lock1, sizeof(lock1), "%s-1", resource);
822 snprintf(lock2, sizeof(lock2), "%s-2", resource);
823
824 switch (mode)
825 {
826 case LCK_EXCL:
827 status = _lock_resource(lock1, lg_lock_state_Exclusive, flags, lockid);
828 if (status)
829 goto out;
830
831 /* If we can't get this lock too then bail out */
832 status = _lock_resource(lock2, lg_lock_state_Exclusive, LCK_NONBLOCK, lockid);
833 if (status == lg_err_TryFailed)
834 {
835 _unlock_resource(lock1, *lockid);
836 status = -1;
837 errno = EAGAIN;
838 }
839 break;
840
841 case LCK_PREAD:
842 case LCK_READ:
843 status = _lock_resource(lock1, lg_lock_state_Shared, flags, lockid);
844 if (status)
845 goto out;
846 status = _unlock_resource(lock2, *lockid);
847 break;
848
849 case LCK_WRITE:
850 status = _lock_resource(lock2, lg_lock_state_Exclusive, flags, lockid);
851 if (status)
852 goto out;
853 status = _unlock_resource(lock1, *lockid);
854 break;
855
856 default:
857 status = -1;
858 errno = EINVAL;
859 break;
860 }
861 out:
862 *lockid = mode;
863 return status;
864 }
865
_sync_unlock(const char * resource,int lockid)866 static int _sync_unlock(const char *resource, int lockid)
867 {
868 int status = 0;
869 char lock1[strlen(resource)+3];
870 char lock2[strlen(resource)+3];
871
872 snprintf(lock1, sizeof(lock1), "%s-1", resource);
873 snprintf(lock2, sizeof(lock2), "%s-2", resource);
874
875 /* The held lock mode is in the lock id */
876 assert(lockid == LCK_EXCL ||
877 lockid == LCK_READ ||
878 lockid == LCK_PREAD ||
879 lockid == LCK_WRITE);
880
881 status = _unlock_resource(lock1, lockid);
882 if (!status)
883 status = _unlock_resource(lock2, lockid);
884
885 return status;
886 }
887
_is_quorate()888 static int _is_quorate()
889 {
890 return gulm_quorate;
891 }
892
893 /* Get all the cluster node names & IPs from CCS and
894 add them to our node list so we know who to talk to.
895 Called when we start up and if we get sent SIGHUP.
896 */
get_all_cluster_nodes()897 static int get_all_cluster_nodes()
898 {
899 int ctree;
900 char *nodename;
901 int error;
902 int i;
903
904 /* Open the config file */
905 ctree = ccs_force_connect(NULL, 1);
906 if (ctree < 0)
907 {
908 log_error("Error connecting to CCS");
909 return -1;
910 }
911
912 for (i=1;;i++)
913 {
914 char nodekey[256];
915 char nodeip[GULM_MAX_CSID_LEN];
916 int clvmflag = 1;
917 char *clvmflagstr;
918 char key[256];
919
920 sprintf(nodekey, "//cluster/clusternodes/clusternode[%d]/@name", i);
921 error = ccs_get(ctree, nodekey, &nodename);
922 if (error)
923 break;
924
925 sprintf(key, "//cluster/clusternodes/clusternode[@name=\"%s\"]/clvm", nodename);
926 if (!ccs_get(ctree, key, &clvmflagstr))
927 {
928 clvmflag = atoi(clvmflagstr);
929 free(clvmflagstr);
930 }
931
932 DEBUGLOG("Got node %s from ccs(clvmflag = %d)\n", nodename, clvmflag);
933 if ((get_ip_address(nodename, nodeip) == 0) && clvmflag)
934 {
935 struct node_info *ninfo;
936
937 /* If it's not in the list, then add it */
938 ninfo = dm_hash_lookup_binary(node_hash, nodeip, GULM_MAX_CSID_LEN);
939 if (!ninfo)
940 {
941 ninfo = malloc(sizeof(struct node_info));
942 if (!ninfo)
943 {
944 syslog(LOG_ERR, "Cannot alloc memory for node info\n");
945 ccs_disconnect(ctree);
946 return -1;
947 }
948 strcpy(ninfo->name, nodename);
949
950 ninfo->state = NODE_DOWN;
951 dm_hash_insert_binary(node_hash, nodeip, GULM_MAX_CSID_LEN, ninfo);
952 }
953 }
954 else
955 {
956 if (!clvmflag) {
957 DEBUGLOG("node %s has clvm disabled\n", nodename);
958 }
959 else {
960 DEBUGLOG("Cannot resolve host name %s\n", nodename);
961 log_error("Cannot resolve host name %s\n", nodename);
962 }
963 }
964 free(nodename);
965 }
966
967 /* Finished with config file */
968 ccs_disconnect(ctree);
969
970 return 0;
971 }
972
_get_main_cluster_fd(void)973 static int _get_main_cluster_fd(void)
974 {
975 return get_main_gulm_cluster_fd();
976 }
977
_cluster_fd_callback(struct local_client * fd,char * buf,int len,const char * csid,struct local_client ** new_client)978 static int _cluster_fd_callback(struct local_client *fd, char *buf, int len, const char *csid, struct local_client **new_client)
979 {
980 return cluster_fd_gulm_callback(fd, buf, len, csid, new_client);
981 }
982
_cluster_send_message(const void * buf,int msglen,const char * csid,const char * errtext)983 static int _cluster_send_message(const void *buf, int msglen, const char *csid, const char *errtext)
984 {
985 return gulm_cluster_send_message((char *)buf, msglen, csid, errtext);
986 }
987
_get_cluster_name(char * buf,int buflen)988 static int _get_cluster_name(char *buf, int buflen)
989 {
990 strncpy(buf, cluster_name, buflen);
991 return 0;
992 }
993
994 static struct cluster_ops _cluster_gulm_ops = {
995 .cluster_init_completed = NULL,
996 .cluster_send_message = _cluster_send_message,
997 .name_from_csid = gulm_name_from_csid,
998 .csid_from_name = _csid_from_name,
999 .get_num_nodes = _get_num_nodes,
1000 .cluster_fd_callback = _cluster_fd_callback,
1001 .get_main_cluster_fd = _get_main_cluster_fd,
1002 .cluster_do_node_callback = _cluster_do_node_callback,
1003 .is_quorate = _is_quorate,
1004 .get_our_csid = _get_our_csid,
1005 .add_up_node = gulm_add_up_node,
1006 .reread_config = _reread_config,
1007 .cluster_closedown = _cluster_closedown,
1008 .get_cluster_name = _get_cluster_name,
1009 .sync_lock = _sync_lock,
1010 .sync_unlock = _sync_unlock,
1011 };
1012
init_gulm_cluster(void)1013 struct cluster_ops *init_gulm_cluster(void)
1014 {
1015 if (!_init_cluster())
1016 return &_cluster_gulm_ops;
1017 else
1018 return NULL;
1019 }
1020