1 /* $NetBSD: clvmd-gulm.c,v 1.1.1.2 2009/12/02 00:27:02 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved. 5 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. 6 * 7 * This file is part of LVM2. 8 * 9 * This copyrighted material is made available to anyone wishing to use, 10 * modify, copy, or redistribute it subject to the terms and conditions 11 * of the GNU Lesser General Public License v.2.1. 12 * 13 * You should have received a copy of the GNU Lesser General Public License 14 * along with this program; if not, write to the Free Software Foundation, 15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 /* 19 * This provides the interface between clvmd and gulm as the cluster 20 * and lock manager. 21 * 22 * It also provides the "liblm" functions too as it's hard (and pointless) 23 * to seperate them out when using gulm. 24 * 25 * What it does /not/ provide is the communications between clvmd daemons 26 * on the cluster nodes. That is done in tcp-comms.c 27 */ 28 29 #include <pthread.h> 30 #include <sys/types.h> 31 #include <sys/utsname.h> 32 #include <sys/ioctl.h> 33 #include <sys/socket.h> 34 #include <sys/stat.h> 35 #include <sys/file.h> 36 #include <sys/socket.h> 37 #include <netinet/in.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <stdint.h> 41 #include <signal.h> 42 #include <fcntl.h> 43 #include <string.h> 44 #include <stddef.h> 45 #include <stdint.h> 46 #include <unistd.h> 47 #include <errno.h> 48 #include <utmpx.h> 49 #include <syslog.h> 50 #include <assert.h> 51 #include <libdevmapper.h> 52 #include <ccs.h> 53 #include <libgulm.h> 54 55 #include "locking.h" 56 #include "lvm-logging.h" 57 #include "clvm.h" 58 #include "clvmd-comms.h" 59 #include "lvm-functions.h" 60 #include "clvmd.h" 61 #include "clvmd-gulm.h" 62 63 /* Hash list of nodes in the cluster */ 64 static struct dm_hash_table *node_hash; 65 66 /* hash list of outstanding lock requests */ 67 static struct dm_hash_table *lock_hash; 68 69 /* Copy of the current quorate state */ 70 static uint8_t gulm_quorate = 0; 71 static enum {INIT_NOTDONE, INIT_DONE, INIT_WAITQUORATE} init_state = INIT_NOTDONE; 72 73 /* Number of active nodes */ 74 static int num_nodes; 75 76 static char *cluster_name; 77 static int in_shutdown = 0; 78 79 static pthread_mutex_t lock_start_mutex; 80 static volatile int lock_start_flag; 81 82 struct node_info 83 { 84 enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state; 85 char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN]; 86 }; 87 88 struct lock_wait 89 { 90 pthread_cond_t cond; 91 pthread_mutex_t mutex; 92 int status; 93 }; 94 95 /* Forward */ 96 static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid, 97 struct local_client **new_client); 98 static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid, 99 struct local_client **new_client); 100 static int get_all_cluster_nodes(void); 101 static int _csid_from_name(char *csid, const char *name); 102 static void _cluster_closedown(void); 103 104 /* In tcp-comms.c */ 105 extern struct dm_hash_table *sock_hash; 106 107 static int add_internal_client(int fd, fd_callback_t callback) 108 { 109 struct local_client *client; 110 111 DEBUGLOG("Add_internal_client, fd = %d\n", fd); 112 113 /* Add a GULM file descriptor it to the main loop */ 114 client = malloc(sizeof(struct local_client)); 115 if (!client) 116 { 117 DEBUGLOG("malloc failed\n"); 118 return -1; 119 } 120 121 memset(client, 0, sizeof(struct local_client)); 122 client->fd = fd; 123 client->type = CLUSTER_INTERNAL; 124 client->callback = callback; 125 add_client(client); 126 127 /* Set Close-on-exec */ 128 fcntl(fd, F_SETFD, 1); 129 130 return 0; 131 } 132 133 /* Gulm library handle */ 134 static gulm_interface_p gulm_if; 135 static lg_core_callbacks_t core_callbacks; 136 static lg_lockspace_callbacks_t lock_callbacks; 137 138 static void badsig_handler(int sig) 139 { 140 DEBUGLOG("got sig %d\n", sig); 141 _cluster_closedown(); 142 exit(0); 143 } 144 145 static void _reread_config(void) 146 { 147 /* Re-read CCS node list */ 148 DEBUGLOG("Re-reading CCS config\n"); 149 get_all_cluster_nodes(); 150 } 151 152 static int _init_cluster(void) 153 { 154 int status; 155 int ccs_h; 156 int port = 0; 157 char *portstr; 158 159 /* Get cluster name from CCS */ 160 ccs_h = ccs_force_connect(NULL, 0); 161 if (ccs_h < 0) 162 { 163 syslog(LOG_ERR, "Cannot login in to CCSD server\n"); 164 return -1; 165 } 166 167 ccs_get(ccs_h, "//cluster/@name", &cluster_name); 168 DEBUGLOG("got cluster name %s\n", cluster_name); 169 170 if (!ccs_get(ccs_h, "//cluster/clvm/@port", &portstr)) 171 { 172 port = atoi(portstr); 173 free(portstr); 174 DEBUGLOG("got port number %d\n", port); 175 176 if (port <= 0 && port >= 65536) 177 port = 0; 178 } 179 180 ccs_disconnect(ccs_h); 181 182 /* Block locking until we are logged in */ 183 pthread_mutex_init(&lock_start_mutex, NULL); 184 pthread_mutex_lock(&lock_start_mutex); 185 lock_start_flag = 1; 186 187 node_hash = dm_hash_create(100); 188 lock_hash = dm_hash_create(10); 189 190 /* Get all nodes from CCS */ 191 if (get_all_cluster_nodes()) 192 return -1; 193 194 /* Initialise GULM library */ 195 status = lg_initialize(&gulm_if, cluster_name, "clvmd"); 196 if (status) 197 { 198 DEBUGLOG("lg_initialize failed: %d\n", status); 199 return status; 200 } 201 202 /* Connect to core - we are not "important" :-) */ 203 status = lg_core_login(gulm_if, 0); 204 if (status) 205 { 206 DEBUGLOG("lg_core_login failed: %d\n", status); 207 return status; 208 } 209 210 /* Initialise the inter-node comms */ 211 status = init_comms(port); 212 if (status) 213 return status; 214 215 /* Add core FD to the list */ 216 status = add_internal_client(lg_core_selector(gulm_if), read_from_core_sock); 217 if (status) 218 { 219 DEBUGLOG("can't allocate client space\n"); 220 return status; 221 } 222 223 /* Connect to the lock server */ 224 if (lg_lock_login(gulm_if, "CLVM")) 225 { 226 syslog(LOG_ERR, "Cannot login in to LOCK server\n"); 227 DEBUGLOG("Cannot login in to LOCK server\n"); 228 exit(88); 229 } 230 231 /* Add lockspace FD to the list */ 232 status = add_internal_client(lg_lock_selector(gulm_if), read_from_lock_sock); 233 if (status) 234 { 235 DEBUGLOG("can't allocate client space\n"); 236 exit(status); 237 } 238 239 /* Request a list of nodes, we can't really do anything until 240 this comes back */ 241 status = lg_core_nodelist(gulm_if); 242 if (status) 243 { 244 DEBUGLOG("lg_core_nodelist failed: %d\n", status); 245 return status; 246 } 247 248 /* So I can kill it without taking GULM down too */ 249 signal(SIGINT, badsig_handler); 250 signal(SIGTERM, badsig_handler); 251 252 return 0; 253 } 254 255 static void _cluster_closedown(void) 256 { 257 DEBUGLOG("cluster_closedown\n"); 258 in_shutdown = 1; 259 destroy_lvhash(); 260 lg_lock_logout(gulm_if); 261 lg_core_logout(gulm_if); 262 lg_release(gulm_if); 263 } 264 265 /* Expire locks for a named node, or us */ 266 #define GIO_KEY_SIZE 46 267 static void drop_expired_locks(char *nodename) 268 { 269 struct utsname nodeinfo; 270 uint8_t mask[GIO_KEY_SIZE]; 271 272 DEBUGLOG("Dropping expired locks for %s\n", nodename?nodename:"(null)"); 273 memset(mask, 0xff, GIO_KEY_SIZE); 274 275 if (!nodename) 276 { 277 uname(&nodeinfo); 278 nodename = nodeinfo.nodename; 279 } 280 281 if (lg_lock_drop_exp(gulm_if, nodename, mask, GIO_KEY_SIZE)) 282 { 283 DEBUGLOG("Error calling lg_lock_drop_exp()\n"); 284 } 285 } 286 287 288 static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid, 289 struct local_client **new_client) 290 { 291 int status; 292 293 *new_client = NULL; 294 status = lg_core_handle_messages(gulm_if, &core_callbacks, NULL); 295 return status<0 ? status : 1; 296 } 297 298 static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid, 299 struct local_client **new_client) 300 { 301 int status; 302 303 *new_client = NULL; 304 status = lg_lock_handle_messages(gulm_if, &lock_callbacks, NULL); 305 return status<0 ? status : 1; 306 } 307 308 309 /* CORE callback routines */ 310 static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t rank, uint8_t corestate) 311 { 312 DEBUGLOG("CORE Got a Login reply. gen:%lld err:%d rank:%d corestate:%d\n", 313 gen, error, rank, corestate); 314 315 if (error) 316 exit(error); 317 318 /* Get the current core state (for quorum) */ 319 lg_core_corestate(gulm_if); 320 321 return 0; 322 } 323 324 static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate) 325 { 326 if (nodestate == lg_core_Logged_in) 327 { 328 /* Don't clobber NODE_CLVMD state */ 329 if (ninfo->state != NODE_CLVMD) 330 { 331 if (ninfo->state == NODE_UNKNOWN || 332 ninfo->state == NODE_DOWN) 333 num_nodes++; 334 335 ninfo->state = NODE_UP; 336 } 337 } 338 else 339 { 340 if (nodestate == lg_core_Expired || 341 nodestate == lg_core_Fenced || 342 nodestate == lg_core_Logged_out) 343 { 344 if (ninfo->state != NODE_DOWN) 345 num_nodes--; 346 ninfo->state = NODE_DOWN; 347 } 348 } 349 /* Gulm doesn't always send node DOWN events, so even if this a a node UP we must 350 * assume (ahem) that it prevously went down at some time. So we close 351 * the sockets here to make sure that we don't have any dead connections 352 * to that node. 353 */ 354 tcp_remove_client(csid); 355 356 DEBUGLOG("set_node_state, '%s' state = %d num_nodes=%d\n", 357 ninfo->name, ninfo->state, num_nodes); 358 } 359 360 static struct node_info *add_or_set_node(char *name, struct in6_addr *ip, uint8_t state) 361 { 362 struct node_info *ninfo; 363 364 ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN); 365 if (!ninfo) 366 { 367 /* If we can't find that node then re-read the config file in case it 368 was added after we were started */ 369 DEBUGLOG("Node %s not found, re-reading config file\n", name); 370 get_all_cluster_nodes(); 371 372 /* Now try again */ 373 ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN); 374 if (!ninfo) 375 { 376 DEBUGLOG("Ignoring node %s, not part of the SAN cluster\n", name); 377 return NULL; 378 } 379 } 380 381 set_node_state(ninfo, (char *)ip, state); 382 383 return ninfo; 384 } 385 386 static void _get_our_csid(char *csid) 387 { 388 get_our_gulm_csid(csid); 389 } 390 391 static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *ip, uint8_t state) 392 { 393 DEBUGLOG("CORE nodelist\n"); 394 395 if (type == lglcb_start) 396 { 397 DEBUGLOG("Got Nodelist, start\n"); 398 } 399 else 400 { 401 if (type == lglcb_item) 402 { 403 DEBUGLOG("Got nodelist, item: %s, %#x\n", name, state); 404 405 add_or_set_node(name, ip, state); 406 } 407 else 408 { 409 if (type == lglcb_stop) 410 { 411 char ourcsid[GULM_MAX_CSID_LEN]; 412 413 DEBUGLOG("Got Nodelist, stop\n"); 414 if (gulm_quorate) 415 { 416 clvmd_cluster_init_completed(); 417 init_state = INIT_DONE; 418 } 419 else 420 { 421 if (init_state == INIT_NOTDONE) 422 init_state = INIT_WAITQUORATE; 423 } 424 425 /* Mark ourself as up */ 426 _get_our_csid(ourcsid); 427 gulm_add_up_node(ourcsid); 428 } 429 else 430 { 431 DEBUGLOG("Unknown lglcb_t %#x\n", type); 432 } 433 } 434 } 435 436 return 0; 437 } 438 439 static int core_statechange(void *misc, uint8_t corestate, uint8_t quorate, struct in6_addr *masterip, char *mastername) 440 { 441 DEBUGLOG("CORE Got statechange. quorate:%d, corestate:%x mastername:%s\n", 442 quorate, corestate, mastername); 443 444 gulm_quorate = quorate; 445 if (quorate && init_state == INIT_WAITQUORATE) 446 { 447 clvmd_cluster_init_completed(); 448 init_state = INIT_DONE; 449 } 450 return 0; 451 } 452 453 static int core_nodechange(void *misc, char *nodename, struct in6_addr *nodeip, uint8_t nodestate) 454 { 455 struct node_info *ninfo; 456 457 DEBUGLOG("CORE node change, name=%s, state = %d\n", nodename, nodestate); 458 459 /* If we don't get nodeip here, try a lookup by name */ 460 if (!nodeip) 461 _csid_from_name((char *)nodeip, nodename); 462 if (!nodeip) 463 return 0; 464 465 ninfo = add_or_set_node(nodename, nodeip, nodestate); 466 if (!ninfo) 467 return 0; 468 469 /* Check if we need to drop any expired locks */ 470 if (ninfo->state == NODE_DOWN) 471 { 472 drop_expired_locks(nodename); 473 } 474 475 return 0; 476 } 477 static int core_error(void *misc, uint32_t err) 478 { 479 DEBUGLOG("CORE error: %d\n", err); 480 // Not sure what happens here 481 return 0; 482 } 483 484 /* LOCK callback routines */ 485 static int lock_login_reply(void *misc, uint32_t error, uint8_t which) 486 { 487 DEBUGLOG("LOCK Got a Login reply. err:%d which:%d\n", 488 error, which); 489 490 if (error) 491 exit(error); 492 493 /* Drop any expired locks for us that might be hanging around */ 494 drop_expired_locks(NULL); 495 496 /* Enable locking operations in other threads */ 497 if (lock_start_flag) 498 { 499 lock_start_flag = 0; 500 pthread_mutex_unlock(&lock_start_mutex); 501 } 502 503 return 0; 504 } 505 506 static int lock_lock_state(void *misc, uint8_t *key, uint16_t keylen, 507 uint64_t subid, uint64_t start, uint64_t stop, 508 uint8_t state, uint32_t flags, uint32_t error, 509 uint8_t *LVB, uint16_t LVBlen) 510 { 511 struct lock_wait *lwait; 512 513 DEBUGLOG("LOCK lock state: %s, error = %d\n", key, error); 514 515 /* No waiting process to wake up when we are shutting down */ 516 if (in_shutdown) 517 return 0; 518 519 lwait = dm_hash_lookup(lock_hash, key); 520 if (!lwait) 521 { 522 DEBUGLOG("Can't find hash entry for resource %s\n", key); 523 return 0; 524 } 525 lwait->status = error; 526 pthread_mutex_lock(&lwait->mutex); 527 pthread_cond_signal(&lwait->cond); 528 pthread_mutex_unlock(&lwait->mutex); 529 530 return 0; 531 } 532 static int lock_error(void *misc, uint32_t err) 533 { 534 DEBUGLOG("LOCK error: %d\n", err); 535 // Not sure what happens here 536 return 0; 537 } 538 539 540 /* CORE callbacks */ 541 static lg_core_callbacks_t core_callbacks = { 542 .login_reply = core_login_reply, 543 .nodelist = core_nodelist, 544 .statechange = core_statechange, 545 .nodechange = core_nodechange, 546 .error = core_error, 547 }; 548 549 /* LOCK callbacks */ 550 static lg_lockspace_callbacks_t lock_callbacks = { 551 .login_reply = lock_login_reply, 552 .lock_state = lock_lock_state, 553 .error = lock_error, 554 }; 555 556 /* Allow tcp-comms to loop round the list of active nodes */ 557 int get_next_node_csid(void **context, char *csid) 558 { 559 struct node_info *ninfo = NULL; 560 561 /* First node */ 562 if (!*context) 563 { 564 *context = dm_hash_get_first(node_hash); 565 } 566 else 567 { 568 *context = dm_hash_get_next(node_hash, *context); 569 } 570 if (*context) 571 ninfo = dm_hash_get_data(node_hash, *context); 572 573 /* Find a node that is UP */ 574 while (*context && ninfo->state == NODE_DOWN) 575 { 576 *context = dm_hash_get_next(node_hash, *context); 577 if (*context) 578 { 579 ninfo = dm_hash_get_data(node_hash, *context); 580 } 581 } 582 583 if (!*context || ninfo->state == NODE_DOWN) 584 { 585 return 0; 586 } 587 588 memcpy(csid, dm_hash_get_key(node_hash, *context), GULM_MAX_CSID_LEN); 589 return 1; 590 } 591 592 int gulm_name_from_csid(const char *csid, char *name) 593 { 594 struct node_info *ninfo; 595 596 ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN); 597 if (!ninfo) 598 { 599 sprintf(name, "UNKNOWN %s", print_csid(csid)); 600 return -1; 601 } 602 603 strcpy(name, ninfo->name); 604 return 0; 605 } 606 607 608 static int _csid_from_name(char *csid, const char *name) 609 { 610 struct dm_hash_node *hn; 611 struct node_info *ninfo; 612 613 dm_hash_iterate(hn, node_hash) 614 { 615 ninfo = dm_hash_get_data(node_hash, hn); 616 if (strcmp(ninfo->name, name) == 0) 617 { 618 memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN); 619 return 0; 620 } 621 } 622 return -1; 623 } 624 625 static int _get_num_nodes() 626 { 627 DEBUGLOG("num_nodes = %d\n", num_nodes); 628 return num_nodes; 629 } 630 631 /* Node is now known to be running a clvmd */ 632 void gulm_add_up_node(const char *csid) 633 { 634 struct node_info *ninfo; 635 636 ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN); 637 if (!ninfo) { 638 DEBUGLOG("gulm_add_up_node no node_hash entry for csid %s\n", print_csid(csid)); 639 return; 640 } 641 642 DEBUGLOG("gulm_add_up_node %s\n", ninfo->name); 643 644 if (ninfo->state == NODE_DOWN) 645 num_nodes++; 646 ninfo->state = NODE_CLVMD; 647 648 return; 649 650 } 651 /* Node is now known to be NOT running a clvmd */ 652 void add_down_node(char *csid) 653 { 654 struct node_info *ninfo; 655 656 ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN); 657 if (!ninfo) 658 return; 659 660 /* Only set it to UP if it was previously known to be 661 running clvmd - gulm may set it DOWN quite soon */ 662 if (ninfo->state == NODE_CLVMD) 663 ninfo->state = NODE_UP; 664 drop_expired_locks(ninfo->name); 665 return; 666 667 } 668 669 /* Call a callback for each node, so the caller knows whether it's up or down */ 670 static int _cluster_do_node_callback(struct local_client *master_client, 671 void (*callback)(struct local_client *, const char *csid, int node_up)) 672 { 673 struct dm_hash_node *hn; 674 struct node_info *ninfo; 675 int somedown = 0; 676 677 dm_hash_iterate(hn, node_hash) 678 { 679 char csid[GULM_MAX_CSID_LEN]; 680 struct local_client *client; 681 682 ninfo = dm_hash_get_data(node_hash, hn); 683 memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN); 684 685 DEBUGLOG("down_callback. node %s, state = %d\n", ninfo->name, ninfo->state); 686 687 client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN); 688 if (!client) 689 { 690 /* If it's up but not connected, try to make contact */ 691 if (ninfo->state == NODE_UP) 692 gulm_connect_csid(csid, &client); 693 694 client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN); 695 696 } 697 DEBUGLOG("down_callback2. node %s, state = %d\n", ninfo->name, ninfo->state); 698 if (ninfo->state != NODE_DOWN) 699 callback(master_client, csid, ninfo->state == NODE_CLVMD); 700 701 if (ninfo->state != NODE_CLVMD) 702 somedown = -1; 703 } 704 return somedown; 705 } 706 707 /* Convert gulm error codes to unix errno numbers */ 708 static int gulm_to_errno(int gulm_ret) 709 { 710 switch (gulm_ret) 711 { 712 case lg_err_TryFailed: 713 case lg_err_AlreadyPend: 714 errno = EAGAIN; 715 break; 716 717 /* More?? */ 718 default: 719 errno = EINVAL; 720 } 721 722 return gulm_ret ? -1 : 0; 723 } 724 725 /* Real locking */ 726 static int _lock_resource(char *resource, int mode, int flags, int *lockid) 727 { 728 int status; 729 struct lock_wait lwait; 730 731 /* Wait until the lock module is ready */ 732 if (lock_start_flag) 733 { 734 pthread_mutex_lock(&lock_start_mutex); 735 pthread_mutex_unlock(&lock_start_mutex); 736 } 737 738 pthread_cond_init(&lwait.cond, NULL); 739 pthread_mutex_init(&lwait.mutex, NULL); 740 pthread_mutex_lock(&lwait.mutex); 741 742 /* This needs to be converted from DLM/LVM2 value for GULM */ 743 if (flags & LKF_NOQUEUE) flags = lg_lock_flag_Try; 744 745 dm_hash_insert(lock_hash, resource, &lwait); 746 DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode); 747 748 status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1, 749 0, 0, 0, 750 mode, flags, NULL, 0); 751 if (status) 752 { 753 DEBUGLOG("lg_lock_state returned %d\n", status); 754 return status; 755 } 756 757 /* Wait for it to complete */ 758 pthread_cond_wait(&lwait.cond, &lwait.mutex); 759 pthread_mutex_unlock(&lwait.mutex); 760 761 dm_hash_remove(lock_hash, resource); 762 DEBUGLOG("lock-resource returning %d\n", lwait.status); 763 764 return gulm_to_errno(lwait.status); 765 } 766 767 768 static int _unlock_resource(char *resource, int lockid) 769 { 770 int status; 771 struct lock_wait lwait; 772 773 pthread_cond_init(&lwait.cond, NULL); 774 pthread_mutex_init(&lwait.mutex, NULL); 775 pthread_mutex_lock(&lwait.mutex); 776 777 dm_hash_insert(lock_hash, resource, &lwait); 778 779 DEBUGLOG("unlock_resource %s\n", resource); 780 status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1, 781 0, 0, 0, 782 lg_lock_state_Unlock, 0, NULL, 0); 783 784 if (status) 785 { 786 DEBUGLOG("lg_lock_state(unlock) returned %d\n", status); 787 return status; 788 } 789 790 /* When we are shutting down, don't wait for unlocks 791 to be acknowledged, just do it. */ 792 if (in_shutdown) 793 return status; 794 795 /* Wait for it to complete */ 796 797 pthread_cond_wait(&lwait.cond, &lwait.mutex); 798 pthread_mutex_unlock(&lwait.mutex); 799 800 dm_hash_remove(lock_hash, resource); 801 802 return gulm_to_errno(lwait.status); 803 } 804 805 806 /* These two locking functions MUST be called in a seperate thread from 807 the clvmd main loop because they expect to be woken up by it. 808 809 These are abstractions around the real locking functions (above) 810 as we need to emulate the DLM's EX/PW/CW interaction with GULM using 811 two locks. 812 To aid unlocking, we store the lock mode in the lockid (as GULM 813 doesn't use this). 814 */ 815 static int _sync_lock(const char *resource, int mode, int flags, int *lockid) 816 { 817 int status; 818 char lock1[strlen(resource)+3]; 819 char lock2[strlen(resource)+3]; 820 821 snprintf(lock1, sizeof(lock1), "%s-1", resource); 822 snprintf(lock2, sizeof(lock2), "%s-2", resource); 823 824 switch (mode) 825 { 826 case LCK_EXCL: 827 status = _lock_resource(lock1, lg_lock_state_Exclusive, flags, lockid); 828 if (status) 829 goto out; 830 831 /* If we can't get this lock too then bail out */ 832 status = _lock_resource(lock2, lg_lock_state_Exclusive, LCK_NONBLOCK, lockid); 833 if (status == lg_err_TryFailed) 834 { 835 _unlock_resource(lock1, *lockid); 836 status = -1; 837 errno = EAGAIN; 838 } 839 break; 840 841 case LCK_PREAD: 842 case LCK_READ: 843 status = _lock_resource(lock1, lg_lock_state_Shared, flags, lockid); 844 if (status) 845 goto out; 846 status = _unlock_resource(lock2, *lockid); 847 break; 848 849 case LCK_WRITE: 850 status = _lock_resource(lock2, lg_lock_state_Exclusive, flags, lockid); 851 if (status) 852 goto out; 853 status = _unlock_resource(lock1, *lockid); 854 break; 855 856 default: 857 status = -1; 858 errno = EINVAL; 859 break; 860 } 861 out: 862 *lockid = mode; 863 return status; 864 } 865 866 static int _sync_unlock(const char *resource, int lockid) 867 { 868 int status = 0; 869 char lock1[strlen(resource)+3]; 870 char lock2[strlen(resource)+3]; 871 872 snprintf(lock1, sizeof(lock1), "%s-1", resource); 873 snprintf(lock2, sizeof(lock2), "%s-2", resource); 874 875 /* The held lock mode is in the lock id */ 876 assert(lockid == LCK_EXCL || 877 lockid == LCK_READ || 878 lockid == LCK_PREAD || 879 lockid == LCK_WRITE); 880 881 status = _unlock_resource(lock1, lockid); 882 if (!status) 883 status = _unlock_resource(lock2, lockid); 884 885 return status; 886 } 887 888 static int _is_quorate() 889 { 890 return gulm_quorate; 891 } 892 893 /* Get all the cluster node names & IPs from CCS and 894 add them to our node list so we know who to talk to. 895 Called when we start up and if we get sent SIGHUP. 896 */ 897 static int get_all_cluster_nodes() 898 { 899 int ctree; 900 char *nodename; 901 int error; 902 int i; 903 904 /* Open the config file */ 905 ctree = ccs_force_connect(NULL, 1); 906 if (ctree < 0) 907 { 908 log_error("Error connecting to CCS"); 909 return -1; 910 } 911 912 for (i=1;;i++) 913 { 914 char nodekey[256]; 915 char nodeip[GULM_MAX_CSID_LEN]; 916 int clvmflag = 1; 917 char *clvmflagstr; 918 char key[256]; 919 920 sprintf(nodekey, "//cluster/clusternodes/clusternode[%d]/@name", i); 921 error = ccs_get(ctree, nodekey, &nodename); 922 if (error) 923 break; 924 925 sprintf(key, "//cluster/clusternodes/clusternode[@name=\"%s\"]/clvm", nodename); 926 if (!ccs_get(ctree, key, &clvmflagstr)) 927 { 928 clvmflag = atoi(clvmflagstr); 929 free(clvmflagstr); 930 } 931 932 DEBUGLOG("Got node %s from ccs(clvmflag = %d)\n", nodename, clvmflag); 933 if ((get_ip_address(nodename, nodeip) == 0) && clvmflag) 934 { 935 struct node_info *ninfo; 936 937 /* If it's not in the list, then add it */ 938 ninfo = dm_hash_lookup_binary(node_hash, nodeip, GULM_MAX_CSID_LEN); 939 if (!ninfo) 940 { 941 ninfo = malloc(sizeof(struct node_info)); 942 if (!ninfo) 943 { 944 syslog(LOG_ERR, "Cannot alloc memory for node info\n"); 945 ccs_disconnect(ctree); 946 return -1; 947 } 948 strcpy(ninfo->name, nodename); 949 950 ninfo->state = NODE_DOWN; 951 dm_hash_insert_binary(node_hash, nodeip, GULM_MAX_CSID_LEN, ninfo); 952 } 953 } 954 else 955 { 956 if (!clvmflag) { 957 DEBUGLOG("node %s has clvm disabled\n", nodename); 958 } 959 else { 960 DEBUGLOG("Cannot resolve host name %s\n", nodename); 961 log_error("Cannot resolve host name %s\n", nodename); 962 } 963 } 964 free(nodename); 965 } 966 967 /* Finished with config file */ 968 ccs_disconnect(ctree); 969 970 return 0; 971 } 972 973 static int _get_main_cluster_fd(void) 974 { 975 return get_main_gulm_cluster_fd(); 976 } 977 978 static int _cluster_fd_callback(struct local_client *fd, char *buf, int len, const char *csid, struct local_client **new_client) 979 { 980 return cluster_fd_gulm_callback(fd, buf, len, csid, new_client); 981 } 982 983 static int _cluster_send_message(const void *buf, int msglen, const char *csid, const char *errtext) 984 { 985 return gulm_cluster_send_message((char *)buf, msglen, csid, errtext); 986 } 987 988 static int _get_cluster_name(char *buf, int buflen) 989 { 990 strncpy(buf, cluster_name, buflen); 991 return 0; 992 } 993 994 static struct cluster_ops _cluster_gulm_ops = { 995 .cluster_init_completed = NULL, 996 .cluster_send_message = _cluster_send_message, 997 .name_from_csid = gulm_name_from_csid, 998 .csid_from_name = _csid_from_name, 999 .get_num_nodes = _get_num_nodes, 1000 .cluster_fd_callback = _cluster_fd_callback, 1001 .get_main_cluster_fd = _get_main_cluster_fd, 1002 .cluster_do_node_callback = _cluster_do_node_callback, 1003 .is_quorate = _is_quorate, 1004 .get_our_csid = _get_our_csid, 1005 .add_up_node = gulm_add_up_node, 1006 .reread_config = _reread_config, 1007 .cluster_closedown = _cluster_closedown, 1008 .get_cluster_name = _get_cluster_name, 1009 .sync_lock = _sync_lock, 1010 .sync_unlock = _sync_unlock, 1011 }; 1012 1013 struct cluster_ops *init_gulm_cluster(void) 1014 { 1015 if (!_init_cluster()) 1016 return &_cluster_gulm_ops; 1017 else 1018 return NULL; 1019 } 1020