1 /*	$NetBSD: clvmd-gulm.c,v 1.1.1.2 2009/12/02 00:27:02 haad Exp $	*/
2 
3 /*
4  * Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 /*
19  * This provides the interface between clvmd and gulm as the cluster
20  * and lock manager.
21  *
22  * It also provides the "liblm" functions too as it's hard (and pointless)
23  * to seperate them out when using gulm.
24  *
25  * What it does /not/ provide is the communications between clvmd daemons
26  * on the cluster nodes. That is done in tcp-comms.c
27  */
28 
29 #include <pthread.h>
30 #include <sys/types.h>
31 #include <sys/utsname.h>
32 #include <sys/ioctl.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/file.h>
36 #include <sys/socket.h>
37 #include <netinet/in.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <stdint.h>
41 #include <signal.h>
42 #include <fcntl.h>
43 #include <string.h>
44 #include <stddef.h>
45 #include <stdint.h>
46 #include <unistd.h>
47 #include <errno.h>
48 #include <utmpx.h>
49 #include <syslog.h>
50 #include <assert.h>
51 #include <libdevmapper.h>
52 #include <ccs.h>
53 #include <libgulm.h>
54 
55 #include "locking.h"
56 #include "lvm-logging.h"
57 #include "clvm.h"
58 #include "clvmd-comms.h"
59 #include "lvm-functions.h"
60 #include "clvmd.h"
61 #include "clvmd-gulm.h"
62 
63 /* Hash list of nodes in the cluster */
64 static struct dm_hash_table *node_hash;
65 
66 /* hash list of outstanding lock requests */
67 static struct dm_hash_table *lock_hash;
68 
69 /* Copy of the current quorate state */
70 static uint8_t gulm_quorate = 0;
71 static enum {INIT_NOTDONE, INIT_DONE, INIT_WAITQUORATE} init_state = INIT_NOTDONE;
72 
73 /* Number of active nodes */
74 static int num_nodes;
75 
76 static char *cluster_name;
77 static int in_shutdown = 0;
78 
79 static pthread_mutex_t lock_start_mutex;
80 static volatile int lock_start_flag;
81 
82 struct node_info
83 {
84     enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state;
85     char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN];
86 };
87 
88 struct lock_wait
89 {
90     pthread_cond_t cond;
91     pthread_mutex_t mutex;
92     int status;
93 };
94 
95 /* Forward */
96 static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid,
97 			       struct local_client **new_client);
98 static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid,
99 			       struct local_client **new_client);
100 static int get_all_cluster_nodes(void);
101 static int _csid_from_name(char *csid, const char *name);
102 static void _cluster_closedown(void);
103 
104 /* In tcp-comms.c */
105 extern struct dm_hash_table *sock_hash;
106 
add_internal_client(int fd,fd_callback_t callback)107 static int add_internal_client(int fd, fd_callback_t callback)
108 {
109     struct local_client *client;
110 
111     DEBUGLOG("Add_internal_client, fd = %d\n", fd);
112 
113     /* Add a GULM file descriptor it to the main loop */
114     client = malloc(sizeof(struct local_client));
115     if (!client)
116     {
117 	DEBUGLOG("malloc failed\n");
118 	return -1;
119     }
120 
121     memset(client, 0, sizeof(struct local_client));
122     client->fd = fd;
123     client->type = CLUSTER_INTERNAL;
124     client->callback = callback;
125     add_client(client);
126 
127     /* Set Close-on-exec */
128     fcntl(fd, F_SETFD, 1);
129 
130     return 0;
131 }
132 
133 /* Gulm library handle */
134 static gulm_interface_p gulm_if;
135 static lg_core_callbacks_t core_callbacks;
136 static lg_lockspace_callbacks_t lock_callbacks;
137 
badsig_handler(int sig)138 static void badsig_handler(int sig)
139 {
140     DEBUGLOG("got sig %d\n", sig);
141     _cluster_closedown();
142     exit(0);
143 }
144 
_reread_config(void)145 static void _reread_config(void)
146 {
147         /* Re-read CCS node list */
148 	DEBUGLOG("Re-reading CCS config\n");
149 	get_all_cluster_nodes();
150 }
151 
_init_cluster(void)152 static int _init_cluster(void)
153 {
154     int status;
155     int ccs_h;
156     int port = 0;
157     char *portstr;
158 
159     /* Get cluster name from CCS */
160     ccs_h = ccs_force_connect(NULL, 0);
161     if (ccs_h < 0)
162     {
163 	syslog(LOG_ERR, "Cannot login in to CCSD server\n");
164 	return -1;
165     }
166 
167     ccs_get(ccs_h, "//cluster/@name", &cluster_name);
168     DEBUGLOG("got cluster name %s\n", cluster_name);
169 
170     if (!ccs_get(ccs_h, "//cluster/clvm/@port", &portstr))
171     {
172 	port = atoi(portstr);
173 	free(portstr);
174 	DEBUGLOG("got port number %d\n", port);
175 
176 	if (port <= 0 && port >= 65536)
177 	    port = 0;
178     }
179 
180     ccs_disconnect(ccs_h);
181 
182     /* Block locking until we are logged in */
183     pthread_mutex_init(&lock_start_mutex, NULL);
184     pthread_mutex_lock(&lock_start_mutex);
185     lock_start_flag = 1;
186 
187     node_hash = dm_hash_create(100);
188     lock_hash = dm_hash_create(10);
189 
190     /* Get all nodes from CCS */
191     if (get_all_cluster_nodes())
192 	return -1;
193 
194     /* Initialise GULM library */
195     status = lg_initialize(&gulm_if, cluster_name, "clvmd");
196     if (status)
197     {
198 	DEBUGLOG("lg_initialize failed: %d\n", status);
199 	return status;
200     }
201 
202     /* Connect to core - we are not "important" :-) */
203     status = lg_core_login(gulm_if, 0);
204     if (status)
205     {
206 	DEBUGLOG("lg_core_login failed: %d\n", status);
207 	return status;
208     }
209 
210     /* Initialise the inter-node comms */
211     status = init_comms(port);
212     if (status)
213 	return status;
214 
215     /* Add core FD to the list */
216     status = add_internal_client(lg_core_selector(gulm_if), read_from_core_sock);
217     if (status)
218     {
219 	DEBUGLOG("can't allocate client space\n");
220 	return status;
221     }
222 
223     /* Connect to the lock server */
224     if (lg_lock_login(gulm_if, "CLVM"))
225     {
226 	syslog(LOG_ERR, "Cannot login in to LOCK server\n");
227 	DEBUGLOG("Cannot login in to LOCK server\n");
228 	exit(88);
229     }
230 
231     /* Add lockspace FD to the list */
232     status = add_internal_client(lg_lock_selector(gulm_if), read_from_lock_sock);
233     if (status)
234     {
235 	DEBUGLOG("can't allocate client space\n");
236 	exit(status);
237     }
238 
239     /* Request a list of nodes, we can't really do anything until
240        this comes back */
241     status = lg_core_nodelist(gulm_if);
242     if (status)
243     {
244 	DEBUGLOG("lg_core_nodelist failed: %d\n", status);
245 	return status;
246     }
247 
248     /* So I can kill it without taking GULM down too */
249     signal(SIGINT, badsig_handler);
250     signal(SIGTERM, badsig_handler);
251 
252     return 0;
253 }
254 
_cluster_closedown(void)255 static void _cluster_closedown(void)
256 {
257     DEBUGLOG("cluster_closedown\n");
258     in_shutdown = 1;
259     destroy_lvhash();
260     lg_lock_logout(gulm_if);
261     lg_core_logout(gulm_if);
262     lg_release(gulm_if);
263 }
264 
265 /* Expire locks for a named node, or us */
266 #define GIO_KEY_SIZE 46
drop_expired_locks(char * nodename)267 static void drop_expired_locks(char *nodename)
268 {
269     struct utsname nodeinfo;
270     uint8_t mask[GIO_KEY_SIZE];
271 
272     DEBUGLOG("Dropping expired locks for %s\n", nodename?nodename:"(null)");
273     memset(mask, 0xff, GIO_KEY_SIZE);
274 
275     if (!nodename)
276     {
277 	uname(&nodeinfo);
278 	nodename = nodeinfo.nodename;
279     }
280 
281     if (lg_lock_drop_exp(gulm_if, nodename, mask, GIO_KEY_SIZE))
282     {
283 	DEBUGLOG("Error calling lg_lock_drop_exp()\n");
284     }
285 }
286 
287 
read_from_core_sock(struct local_client * client,char * buf,int len,const char * csid,struct local_client ** new_client)288 static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid,
289 			       struct local_client **new_client)
290 {
291     int status;
292 
293     *new_client = NULL;
294     status = lg_core_handle_messages(gulm_if, &core_callbacks, NULL);
295     return status<0 ? status : 1;
296 }
297 
read_from_lock_sock(struct local_client * client,char * buf,int len,const char * csid,struct local_client ** new_client)298 static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid,
299 			       struct local_client **new_client)
300 {
301     int status;
302 
303     *new_client = NULL;
304     status = lg_lock_handle_messages(gulm_if, &lock_callbacks, NULL);
305     return status<0 ? status : 1;
306 }
307 
308 
309 /* CORE callback routines */
core_login_reply(void * misc,uint64_t gen,uint32_t error,uint32_t rank,uint8_t corestate)310 static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t rank, uint8_t corestate)
311 {
312    DEBUGLOG("CORE Got a Login reply.  gen:%lld err:%d rank:%d corestate:%d\n",
313          gen, error, rank, corestate);
314 
315    if (error)
316        exit(error);
317 
318    /* Get the current core state (for quorum) */
319    lg_core_corestate(gulm_if);
320 
321    return 0;
322 }
323 
set_node_state(struct node_info * ninfo,char * csid,uint8_t nodestate)324 static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
325 {
326     if (nodestate == lg_core_Logged_in)
327     {
328 	/* Don't clobber NODE_CLVMD state */
329 	if (ninfo->state != NODE_CLVMD)
330 	{
331 	    if (ninfo->state == NODE_UNKNOWN ||
332 		ninfo->state == NODE_DOWN)
333 		num_nodes++;
334 
335 	    ninfo->state = NODE_UP;
336 	}
337     }
338     else
339     {
340 	if (nodestate == lg_core_Expired ||
341 	    nodestate == lg_core_Fenced ||
342 	    nodestate == lg_core_Logged_out)
343 	{
344 	    if (ninfo->state != NODE_DOWN)
345 		num_nodes--;
346 	    ninfo->state = NODE_DOWN;
347 	}
348     }
349     /* Gulm doesn't always send node DOWN events, so even if this a a node UP we must
350      * assume (ahem) that it prevously went down at some time. So we close
351      * the sockets here to make sure that we don't have any dead connections
352      * to that node.
353      */
354     tcp_remove_client(csid);
355 
356     DEBUGLOG("set_node_state, '%s' state = %d num_nodes=%d\n",
357 	     ninfo->name, ninfo->state, num_nodes);
358 }
359 
add_or_set_node(char * name,struct in6_addr * ip,uint8_t state)360 static struct node_info *add_or_set_node(char *name, struct in6_addr *ip, uint8_t state)
361 {
362     struct node_info *ninfo;
363 
364     ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN);
365     if (!ninfo)
366     {
367 	/* If we can't find that node then re-read the config file in case it
368 	   was added after we were started */
369 	DEBUGLOG("Node %s not found, re-reading config file\n", name);
370 	get_all_cluster_nodes();
371 
372 	/* Now try again */
373 	ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN);
374 	if (!ninfo)
375 	{
376 	    DEBUGLOG("Ignoring node %s, not part of the SAN cluster\n", name);
377 	    return NULL;
378 	}
379     }
380 
381     set_node_state(ninfo, (char *)ip, state);
382 
383     return ninfo;
384 }
385 
_get_our_csid(char * csid)386 static void _get_our_csid(char *csid)
387 {
388 	get_our_gulm_csid(csid);
389 }
390 
core_nodelist(void * misc,lglcb_t type,char * name,struct in6_addr * ip,uint8_t state)391 static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *ip, uint8_t state)
392 {
393     DEBUGLOG("CORE nodelist\n");
394 
395     if (type == lglcb_start)
396     {
397 	DEBUGLOG("Got Nodelist, start\n");
398     }
399     else
400     {
401 	if (type == lglcb_item)
402 	{
403 	    DEBUGLOG("Got nodelist, item: %s, %#x\n", name, state);
404 
405 	    add_or_set_node(name, ip, state);
406 	}
407 	else
408 	{
409 	    if (type == lglcb_stop)
410 	    {
411 		char ourcsid[GULM_MAX_CSID_LEN];
412 
413 		DEBUGLOG("Got Nodelist, stop\n");
414 		if (gulm_quorate)
415 		{
416 			clvmd_cluster_init_completed();
417 			init_state = INIT_DONE;
418 		}
419 		else
420 		{
421 			if (init_state == INIT_NOTDONE)
422 				init_state = INIT_WAITQUORATE;
423 		}
424 
425 		/* Mark ourself as up */
426 		_get_our_csid(ourcsid);
427 		gulm_add_up_node(ourcsid);
428 	    }
429 	    else
430 	    {
431 		DEBUGLOG("Unknown lglcb_t %#x\n", type);
432 	    }
433 	}
434     }
435 
436     return 0;
437 }
438 
core_statechange(void * misc,uint8_t corestate,uint8_t quorate,struct in6_addr * masterip,char * mastername)439 static int core_statechange(void *misc, uint8_t corestate, uint8_t quorate, struct in6_addr *masterip, char *mastername)
440 {
441     DEBUGLOG("CORE Got statechange. quorate:%d, corestate:%x mastername:%s\n",
442 	     quorate, corestate, mastername);
443 
444     gulm_quorate = quorate;
445     if (quorate && init_state == INIT_WAITQUORATE)
446     {
447 	    clvmd_cluster_init_completed();
448 	    init_state = INIT_DONE;
449     }
450     return 0;
451 }
452 
core_nodechange(void * misc,char * nodename,struct in6_addr * nodeip,uint8_t nodestate)453 static int core_nodechange(void *misc, char *nodename, struct in6_addr *nodeip, uint8_t nodestate)
454 {
455     struct node_info *ninfo;
456 
457     DEBUGLOG("CORE node change, name=%s, state = %d\n", nodename, nodestate);
458 
459     /* If we don't get nodeip here, try a lookup by name */
460     if (!nodeip)
461 	_csid_from_name((char *)nodeip, nodename);
462     if (!nodeip)
463 	return 0;
464 
465     ninfo = add_or_set_node(nodename, nodeip, nodestate);
466     if (!ninfo)
467 	return 0;
468 
469     /* Check if we need to drop any expired locks */
470     if (ninfo->state == NODE_DOWN)
471     {
472 	drop_expired_locks(nodename);
473     }
474 
475     return 0;
476 }
core_error(void * misc,uint32_t err)477 static int core_error(void *misc, uint32_t err)
478 {
479     DEBUGLOG("CORE error: %d\n", err);
480     // Not sure what happens here
481     return 0;
482 }
483 
484 /* LOCK callback routines */
lock_login_reply(void * misc,uint32_t error,uint8_t which)485 static int lock_login_reply(void *misc, uint32_t error, uint8_t which)
486 {
487     DEBUGLOG("LOCK Got a Login reply.  err:%d which:%d\n",
488 	     error, which);
489 
490     if (error)
491 	exit(error);
492 
493     /* Drop any expired locks for us that might be hanging around */
494     drop_expired_locks(NULL);
495 
496     /* Enable locking operations in other threads */
497     if (lock_start_flag)
498     {
499 	lock_start_flag = 0;
500 	pthread_mutex_unlock(&lock_start_mutex);
501     }
502 
503     return 0;
504 }
505 
lock_lock_state(void * misc,uint8_t * key,uint16_t keylen,uint64_t subid,uint64_t start,uint64_t stop,uint8_t state,uint32_t flags,uint32_t error,uint8_t * LVB,uint16_t LVBlen)506 static int lock_lock_state(void *misc, uint8_t *key, uint16_t keylen,
507 			   uint64_t subid, uint64_t start, uint64_t stop,
508 			   uint8_t state, uint32_t flags, uint32_t error,
509 			   uint8_t *LVB, uint16_t LVBlen)
510 {
511     struct lock_wait *lwait;
512 
513     DEBUGLOG("LOCK lock state: %s, error = %d\n", key, error);
514 
515     /* No waiting process to wake up when we are shutting down */
516     if (in_shutdown)
517 	    return 0;
518 
519     lwait = dm_hash_lookup(lock_hash, key);
520     if (!lwait)
521     {
522 	DEBUGLOG("Can't find hash entry for resource %s\n", key);
523 	return 0;
524     }
525     lwait->status = error;
526     pthread_mutex_lock(&lwait->mutex);
527     pthread_cond_signal(&lwait->cond);
528     pthread_mutex_unlock(&lwait->mutex);
529 
530     return 0;
531 }
lock_error(void * misc,uint32_t err)532 static int lock_error(void *misc, uint32_t err)
533 {
534     DEBUGLOG("LOCK error: %d\n", err);
535     // Not sure what happens here
536     return 0;
537 }
538 
539 
540 /* CORE callbacks */
541 static lg_core_callbacks_t core_callbacks = {
542     .login_reply  = core_login_reply,
543     .nodelist     = core_nodelist,
544     .statechange  = core_statechange,
545     .nodechange   = core_nodechange,
546     .error        = core_error,
547 };
548 
549 /* LOCK callbacks */
550 static lg_lockspace_callbacks_t lock_callbacks = {
551     .login_reply   = lock_login_reply,
552     .lock_state    = lock_lock_state,
553     .error         = lock_error,
554 };
555 
556 /* Allow tcp-comms to loop round the list of active nodes */
get_next_node_csid(void ** context,char * csid)557 int get_next_node_csid(void **context, char *csid)
558 {
559     struct node_info *ninfo = NULL;
560 
561     /* First node */
562     if (!*context)
563     {
564 	*context = dm_hash_get_first(node_hash);
565     }
566     else
567     {
568 	*context = dm_hash_get_next(node_hash, *context);
569     }
570     if (*context)
571 	ninfo = dm_hash_get_data(node_hash, *context);
572 
573     /* Find a node that is UP */
574     while (*context && ninfo->state == NODE_DOWN)
575     {
576 	*context = dm_hash_get_next(node_hash, *context);
577 	if (*context)
578 	{
579 	    ninfo = dm_hash_get_data(node_hash, *context);
580 	}
581     }
582 
583     if (!*context || ninfo->state == NODE_DOWN)
584     {
585 	return 0;
586     }
587 
588     memcpy(csid, dm_hash_get_key(node_hash, *context), GULM_MAX_CSID_LEN);
589     return 1;
590 }
591 
gulm_name_from_csid(const char * csid,char * name)592 int gulm_name_from_csid(const char *csid, char *name)
593 {
594     struct node_info *ninfo;
595 
596     ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN);
597     if (!ninfo)
598     {
599         sprintf(name, "UNKNOWN %s", print_csid(csid));
600 	return -1;
601     }
602 
603     strcpy(name, ninfo->name);
604     return 0;
605 }
606 
607 
_csid_from_name(char * csid,const char * name)608 static int _csid_from_name(char *csid, const char *name)
609 {
610     struct dm_hash_node *hn;
611     struct node_info *ninfo;
612 
613     dm_hash_iterate(hn, node_hash)
614     {
615 	ninfo = dm_hash_get_data(node_hash, hn);
616 	if (strcmp(ninfo->name, name) == 0)
617 	{
618 	    memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN);
619 	    return 0;
620 	}
621     }
622     return -1;
623 }
624 
_get_num_nodes()625 static int _get_num_nodes()
626 {
627     DEBUGLOG("num_nodes = %d\n", num_nodes);
628     return num_nodes;
629 }
630 
631 /* Node is now known to be running a clvmd */
gulm_add_up_node(const char * csid)632 void gulm_add_up_node(const char *csid)
633 {
634     struct node_info *ninfo;
635 
636     ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN);
637     if (!ninfo) {
638 	    DEBUGLOG("gulm_add_up_node no node_hash entry for csid %s\n", print_csid(csid));
639 	return;
640     }
641 
642     DEBUGLOG("gulm_add_up_node %s\n", ninfo->name);
643 
644     if (ninfo->state == NODE_DOWN)
645 	    num_nodes++;
646     ninfo->state = NODE_CLVMD;
647 
648     return;
649 
650 }
651 /* Node is now known to be NOT running a clvmd */
add_down_node(char * csid)652 void add_down_node(char *csid)
653 {
654     struct node_info *ninfo;
655 
656     ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN);
657     if (!ninfo)
658 	return;
659 
660     /* Only set it to UP if it was previously known to be
661        running clvmd - gulm may set it DOWN quite soon */
662     if (ninfo->state == NODE_CLVMD)
663 	ninfo->state = NODE_UP;
664     drop_expired_locks(ninfo->name);
665     return;
666 
667 }
668 
669 /* Call a callback for each node, so the caller knows whether it's up or down */
_cluster_do_node_callback(struct local_client * master_client,void (* callback)(struct local_client *,const char * csid,int node_up))670 static int _cluster_do_node_callback(struct local_client *master_client,
671 				     void (*callback)(struct local_client *, const char *csid, int node_up))
672 {
673     struct dm_hash_node *hn;
674     struct node_info *ninfo;
675     int somedown = 0;
676 
677     dm_hash_iterate(hn, node_hash)
678     {
679 	char csid[GULM_MAX_CSID_LEN];
680 	struct local_client *client;
681 
682 	ninfo = dm_hash_get_data(node_hash, hn);
683 	memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN);
684 
685 	DEBUGLOG("down_callback. node %s, state = %d\n", ninfo->name, ninfo->state);
686 
687 	client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
688 	if (!client)
689 	{
690 	    /* If it's up but not connected, try to make contact */
691 	    if (ninfo->state == NODE_UP)
692 		    gulm_connect_csid(csid, &client);
693 
694 	    client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
695 
696 	}
697 	DEBUGLOG("down_callback2. node %s, state = %d\n", ninfo->name, ninfo->state);
698 	if (ninfo->state != NODE_DOWN)
699 		callback(master_client, csid, ninfo->state == NODE_CLVMD);
700 
701 	if (ninfo->state != NODE_CLVMD)
702 		somedown = -1;
703     }
704     return somedown;
705 }
706 
707 /* Convert gulm error codes to unix errno numbers */
gulm_to_errno(int gulm_ret)708 static int gulm_to_errno(int gulm_ret)
709 {
710     switch (gulm_ret)
711     {
712     case lg_err_TryFailed:
713     case lg_err_AlreadyPend:
714 	    errno = EAGAIN;
715 	    break;
716 
717 	/* More?? */
718     default:
719 	    errno = EINVAL;
720     }
721 
722     return gulm_ret ? -1 : 0;
723 }
724 
725 /* Real locking */
_lock_resource(char * resource,int mode,int flags,int * lockid)726 static int _lock_resource(char *resource, int mode, int flags, int *lockid)
727 {
728     int status;
729     struct lock_wait lwait;
730 
731     /* Wait until the lock module is ready */
732     if (lock_start_flag)
733     {
734 	pthread_mutex_lock(&lock_start_mutex);
735 	pthread_mutex_unlock(&lock_start_mutex);
736     }
737 
738     pthread_cond_init(&lwait.cond, NULL);
739     pthread_mutex_init(&lwait.mutex, NULL);
740     pthread_mutex_lock(&lwait.mutex);
741 
742     /* This needs to be converted from DLM/LVM2 value for GULM */
743     if (flags & LKF_NOQUEUE) flags = lg_lock_flag_Try;
744 
745     dm_hash_insert(lock_hash, resource, &lwait);
746     DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode);
747 
748     status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
749 			       0, 0, 0,
750 			       mode, flags, NULL, 0);
751     if (status)
752     {
753 	DEBUGLOG("lg_lock_state returned %d\n", status);
754 	return status;
755     }
756 
757     /* Wait for it to complete */
758     pthread_cond_wait(&lwait.cond, &lwait.mutex);
759     pthread_mutex_unlock(&lwait.mutex);
760 
761     dm_hash_remove(lock_hash, resource);
762     DEBUGLOG("lock-resource returning %d\n", lwait.status);
763 
764     return gulm_to_errno(lwait.status);
765 }
766 
767 
_unlock_resource(char * resource,int lockid)768 static int _unlock_resource(char *resource, int lockid)
769 {
770     int status;
771     struct lock_wait lwait;
772 
773     pthread_cond_init(&lwait.cond, NULL);
774     pthread_mutex_init(&lwait.mutex, NULL);
775     pthread_mutex_lock(&lwait.mutex);
776 
777     dm_hash_insert(lock_hash, resource, &lwait);
778 
779     DEBUGLOG("unlock_resource %s\n", resource);
780     status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
781 			       0, 0, 0,
782 			       lg_lock_state_Unlock, 0, NULL, 0);
783 
784     if (status)
785     {
786 	DEBUGLOG("lg_lock_state(unlock) returned %d\n", status);
787 	return status;
788     }
789 
790     /* When we are shutting down, don't wait for unlocks
791        to be acknowledged, just do it. */
792     if (in_shutdown)
793 	    return status;
794 
795     /* Wait for it to complete */
796 
797     pthread_cond_wait(&lwait.cond, &lwait.mutex);
798     pthread_mutex_unlock(&lwait.mutex);
799 
800     dm_hash_remove(lock_hash, resource);
801 
802     return gulm_to_errno(lwait.status);
803 }
804 
805 
806 /* These two locking functions MUST be called in a seperate thread from
807    the clvmd main loop because they expect to be woken up by it.
808 
809    These are abstractions around the real locking functions (above)
810    as we need to emulate the DLM's EX/PW/CW interaction with GULM using
811    two locks.
812    To aid unlocking, we store the lock mode in the lockid (as GULM
813    doesn't use this).
814 */
_sync_lock(const char * resource,int mode,int flags,int * lockid)815 static int _sync_lock(const char *resource, int mode, int flags, int *lockid)
816 {
817     int status;
818     char lock1[strlen(resource)+3];
819     char lock2[strlen(resource)+3];
820 
821     snprintf(lock1, sizeof(lock1), "%s-1", resource);
822     snprintf(lock2, sizeof(lock2), "%s-2", resource);
823 
824     switch (mode)
825     {
826     case LCK_EXCL:
827 	status = _lock_resource(lock1, lg_lock_state_Exclusive, flags, lockid);
828 	if (status)
829 	    goto out;
830 
831 	/* If we can't get this lock too then bail out */
832 	status = _lock_resource(lock2, lg_lock_state_Exclusive, LCK_NONBLOCK, lockid);
833         if (status == lg_err_TryFailed)
834         {
835            _unlock_resource(lock1, *lockid);
836            status = -1;
837            errno = EAGAIN;
838         }
839 	break;
840 
841     case LCK_PREAD:
842     case LCK_READ:
843 	status = _lock_resource(lock1, lg_lock_state_Shared, flags, lockid);
844 	if (status)
845 		goto out;
846 	status = _unlock_resource(lock2, *lockid);
847 	break;
848 
849     case LCK_WRITE:
850 	status = _lock_resource(lock2, lg_lock_state_Exclusive, flags, lockid);
851 	if (status)
852 		goto out;
853 	status = _unlock_resource(lock1, *lockid);
854 	break;
855 
856     default:
857 	status = -1;
858 	errno = EINVAL;
859 	break;
860     }
861  out:
862     *lockid = mode;
863     return status;
864 }
865 
_sync_unlock(const char * resource,int lockid)866 static int _sync_unlock(const char *resource, int lockid)
867 {
868     int status = 0;
869     char lock1[strlen(resource)+3];
870     char lock2[strlen(resource)+3];
871 
872     snprintf(lock1, sizeof(lock1), "%s-1", resource);
873     snprintf(lock2, sizeof(lock2), "%s-2", resource);
874 
875     /* The held lock mode is in the lock id */
876     assert(lockid == LCK_EXCL ||
877 	   lockid == LCK_READ ||
878 	   lockid == LCK_PREAD ||
879 	   lockid == LCK_WRITE);
880 
881     status = _unlock_resource(lock1, lockid);
882     if (!status)
883 	    status = _unlock_resource(lock2, lockid);
884 
885     return status;
886 }
887 
_is_quorate()888 static int _is_quorate()
889 {
890 	return gulm_quorate;
891 }
892 
893 /* Get all the cluster node names & IPs from CCS and
894    add them to our node list so we know who to talk to.
895    Called when we start up and if we get sent SIGHUP.
896 */
get_all_cluster_nodes()897 static int get_all_cluster_nodes()
898 {
899     int ctree;
900     char *nodename;
901     int error;
902     int i;
903 
904     /* Open the config file */
905     ctree = ccs_force_connect(NULL, 1);
906     if (ctree < 0)
907     {
908 	log_error("Error connecting to CCS");
909 	return -1;
910     }
911 
912     for (i=1;;i++)
913     {
914 	char nodekey[256];
915 	char nodeip[GULM_MAX_CSID_LEN];
916 	int  clvmflag = 1;
917 	char *clvmflagstr;
918 	char key[256];
919 
920 	sprintf(nodekey, "//cluster/clusternodes/clusternode[%d]/@name", i);
921 	error = ccs_get(ctree, nodekey, &nodename);
922 	if (error)
923 	    break;
924 
925 	sprintf(key, "//cluster/clusternodes/clusternode[@name=\"%s\"]/clvm", nodename);
926 	if (!ccs_get(ctree, key, &clvmflagstr))
927 	{
928 	    clvmflag = atoi(clvmflagstr);
929 	    free(clvmflagstr);
930 	}
931 
932 	DEBUGLOG("Got node %s from ccs(clvmflag = %d)\n", nodename, clvmflag);
933 	if ((get_ip_address(nodename, nodeip) == 0) && clvmflag)
934 	{
935 	    struct node_info *ninfo;
936 
937 	    /* If it's not in the list, then add it */
938 	    ninfo = dm_hash_lookup_binary(node_hash, nodeip, GULM_MAX_CSID_LEN);
939 	    if (!ninfo)
940 	    {
941 		ninfo = malloc(sizeof(struct node_info));
942 		if (!ninfo)
943 		{
944 		    syslog(LOG_ERR, "Cannot alloc memory for node info\n");
945 		    ccs_disconnect(ctree);
946 		    return -1;
947 		}
948 		strcpy(ninfo->name, nodename);
949 
950 		ninfo->state = NODE_DOWN;
951 		dm_hash_insert_binary(node_hash, nodeip, GULM_MAX_CSID_LEN, ninfo);
952 	    }
953 	}
954 	else
955 	{
956 		if (!clvmflag) {
957 			DEBUGLOG("node %s has clvm disabled\n", nodename);
958 		}
959 		else {
960 			DEBUGLOG("Cannot resolve host name %s\n", nodename);
961 			log_error("Cannot resolve host name %s\n", nodename);
962 		}
963 	}
964 	free(nodename);
965     }
966 
967     /* Finished with config file */
968     ccs_disconnect(ctree);
969 
970     return 0;
971 }
972 
_get_main_cluster_fd(void)973 static int _get_main_cluster_fd(void)
974 {
975 	return get_main_gulm_cluster_fd();
976 }
977 
_cluster_fd_callback(struct local_client * fd,char * buf,int len,const char * csid,struct local_client ** new_client)978 static int _cluster_fd_callback(struct local_client *fd, char *buf, int len, const char *csid, struct local_client **new_client)
979 {
980 	return cluster_fd_gulm_callback(fd, buf, len, csid, new_client);
981 }
982 
_cluster_send_message(const void * buf,int msglen,const char * csid,const char * errtext)983 static int _cluster_send_message(const void *buf, int msglen, const char *csid, const char *errtext)
984 {
985 	return gulm_cluster_send_message((char *)buf, msglen, csid, errtext);
986 }
987 
_get_cluster_name(char * buf,int buflen)988 static int _get_cluster_name(char *buf, int buflen)
989 {
990 	strncpy(buf, cluster_name, buflen);
991 	return 0;
992 }
993 
994 static struct cluster_ops _cluster_gulm_ops = {
995 	.cluster_init_completed   = NULL,
996 	.cluster_send_message     = _cluster_send_message,
997 	.name_from_csid           = gulm_name_from_csid,
998 	.csid_from_name           = _csid_from_name,
999 	.get_num_nodes            = _get_num_nodes,
1000 	.cluster_fd_callback      = _cluster_fd_callback,
1001 	.get_main_cluster_fd      = _get_main_cluster_fd,
1002 	.cluster_do_node_callback = _cluster_do_node_callback,
1003 	.is_quorate               = _is_quorate,
1004 	.get_our_csid             = _get_our_csid,
1005 	.add_up_node              = gulm_add_up_node,
1006 	.reread_config            = _reread_config,
1007 	.cluster_closedown        = _cluster_closedown,
1008 	.get_cluster_name         = _get_cluster_name,
1009 	.sync_lock                = _sync_lock,
1010 	.sync_unlock              = _sync_unlock,
1011 };
1012 
init_gulm_cluster(void)1013 struct cluster_ops *init_gulm_cluster(void)
1014 {
1015 	if (!_init_cluster())
1016 		return &_cluster_gulm_ops;
1017 	else
1018 		return NULL;
1019 }
1020