1 /* $NetBSD: clvmd-openais.c,v 1.1.1.2 2009/12/02 00:27:03 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2007-2009 Red Hat, Inc. All rights reserved. 5 * 6 * This file is part of LVM2. 7 * 8 * This copyrighted material is made available to anyone wishing to use, 9 * modify, copy, or redistribute it subject to the terms and conditions 10 * of the GNU Lesser General Public License v.2.1. 11 * 12 * You should have received a copy of the GNU Lesser General Public License 13 * along with this program; if not, write to the Free Software Foundation, 14 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 15 */ 16 17 /* 18 * This provides the interface between clvmd and OpenAIS as the cluster 19 * and lock manager. 20 */ 21 22 #define _GNU_SOURCE 23 #define _FILE_OFFSET_BITS 64 24 25 #include <configure.h> 26 #include <pthread.h> 27 #include <sys/types.h> 28 #include <sys/utsname.h> 29 #include <sys/ioctl.h> 30 #include <sys/socket.h> 31 #include <sys/stat.h> 32 #include <sys/file.h> 33 #include <sys/socket.h> 34 #include <netinet/in.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <stdint.h> 38 #include <signal.h> 39 #include <fcntl.h> 40 #include <string.h> 41 #include <stddef.h> 42 #include <stdint.h> 43 #include <unistd.h> 44 #include <errno.h> 45 #include <utmpx.h> 46 #include <syslog.h> 47 #include <assert.h> 48 #include <libdevmapper.h> 49 50 #include <openais/saAis.h> 51 #include <openais/saLck.h> 52 53 #include <corosync/corotypes.h> 54 #include <corosync/cpg.h> 55 56 #include "locking.h" 57 #include "lvm-logging.h" 58 #include "clvm.h" 59 #include "clvmd-comms.h" 60 #include "lvm-functions.h" 61 #include "clvmd.h" 62 63 /* Timeout value for several openais calls */ 64 #define TIMEOUT 10 65 66 static void openais_cpg_deliver_callback (cpg_handle_t handle, 67 const struct cpg_name *groupName, 68 uint32_t nodeid, 69 uint32_t pid, 70 void *msg, 71 size_t msg_len); 72 static void openais_cpg_confchg_callback(cpg_handle_t handle, 73 const struct cpg_name *groupName, 74 const struct cpg_address *member_list, size_t member_list_entries, 75 const struct cpg_address *left_list, size_t left_list_entries, 76 const struct cpg_address *joined_list, size_t joined_list_entries); 77 78 static void _cluster_closedown(void); 79 80 /* Hash list of nodes in the cluster */ 81 static struct dm_hash_table *node_hash; 82 83 /* For associating lock IDs & resource handles */ 84 static struct dm_hash_table *lock_hash; 85 86 /* Number of active nodes */ 87 static int num_nodes; 88 static unsigned int our_nodeid; 89 90 static struct local_client *cluster_client; 91 92 /* OpenAIS handles */ 93 static cpg_handle_t cpg_handle; 94 static SaLckHandleT lck_handle; 95 96 static struct cpg_name cpg_group_name; 97 98 /* Openais callback structs */ 99 cpg_callbacks_t openais_cpg_callbacks = { 100 .cpg_deliver_fn = openais_cpg_deliver_callback, 101 .cpg_confchg_fn = openais_cpg_confchg_callback, 102 }; 103 104 struct node_info 105 { 106 enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state; 107 int nodeid; 108 }; 109 110 struct lock_info 111 { 112 SaLckResourceHandleT res_handle; 113 SaLckLockIdT lock_id; 114 SaNameT lock_name; 115 }; 116 117 /* Set errno to something approximating the right value and return 0 or -1 */ 118 static int ais_to_errno(SaAisErrorT err) 119 { 120 switch(err) 121 { 122 case SA_AIS_OK: 123 return 0; 124 case SA_AIS_ERR_LIBRARY: 125 errno = EINVAL; 126 break; 127 case SA_AIS_ERR_VERSION: 128 errno = EINVAL; 129 break; 130 case SA_AIS_ERR_INIT: 131 errno = EINVAL; 132 break; 133 case SA_AIS_ERR_TIMEOUT: 134 errno = ETIME; 135 break; 136 case SA_AIS_ERR_TRY_AGAIN: 137 errno = EAGAIN; 138 break; 139 case SA_AIS_ERR_INVALID_PARAM: 140 errno = EINVAL; 141 break; 142 case SA_AIS_ERR_NO_MEMORY: 143 errno = ENOMEM; 144 break; 145 case SA_AIS_ERR_BAD_HANDLE: 146 errno = EINVAL; 147 break; 148 case SA_AIS_ERR_BUSY: 149 errno = EBUSY; 150 break; 151 case SA_AIS_ERR_ACCESS: 152 errno = EPERM; 153 break; 154 case SA_AIS_ERR_NOT_EXIST: 155 errno = ENOENT; 156 break; 157 case SA_AIS_ERR_NAME_TOO_LONG: 158 errno = ENAMETOOLONG; 159 break; 160 case SA_AIS_ERR_EXIST: 161 errno = EEXIST; 162 break; 163 case SA_AIS_ERR_NO_SPACE: 164 errno = ENOSPC; 165 break; 166 case SA_AIS_ERR_INTERRUPT: 167 errno = EINTR; 168 break; 169 case SA_AIS_ERR_NAME_NOT_FOUND: 170 errno = ENOENT; 171 break; 172 case SA_AIS_ERR_NO_RESOURCES: 173 errno = ENOMEM; 174 break; 175 case SA_AIS_ERR_NOT_SUPPORTED: 176 errno = EOPNOTSUPP; 177 break; 178 case SA_AIS_ERR_BAD_OPERATION: 179 errno = EINVAL; 180 break; 181 case SA_AIS_ERR_FAILED_OPERATION: 182 errno = EIO; 183 break; 184 case SA_AIS_ERR_MESSAGE_ERROR: 185 errno = EIO; 186 break; 187 case SA_AIS_ERR_QUEUE_FULL: 188 errno = EXFULL; 189 break; 190 case SA_AIS_ERR_QUEUE_NOT_AVAILABLE: 191 errno = EINVAL; 192 break; 193 case SA_AIS_ERR_BAD_FLAGS: 194 errno = EINVAL; 195 break; 196 case SA_AIS_ERR_TOO_BIG: 197 errno = E2BIG; 198 break; 199 case SA_AIS_ERR_NO_SECTIONS: 200 errno = ENOMEM; 201 break; 202 default: 203 errno = EINVAL; 204 break; 205 } 206 return -1; 207 } 208 209 static char *print_openais_csid(const char *csid) 210 { 211 static char buf[128]; 212 int id; 213 214 memcpy(&id, csid, sizeof(int)); 215 sprintf(buf, "%d", id); 216 return buf; 217 } 218 219 static int add_internal_client(int fd, fd_callback_t callback) 220 { 221 struct local_client *client; 222 223 DEBUGLOG("Add_internal_client, fd = %d\n", fd); 224 225 client = malloc(sizeof(struct local_client)); 226 if (!client) 227 { 228 DEBUGLOG("malloc failed\n"); 229 return -1; 230 } 231 232 memset(client, 0, sizeof(struct local_client)); 233 client->fd = fd; 234 client->type = CLUSTER_INTERNAL; 235 client->callback = callback; 236 add_client(client); 237 238 /* Set Close-on-exec */ 239 fcntl(fd, F_SETFD, 1); 240 241 return 0; 242 } 243 244 static void openais_cpg_deliver_callback (cpg_handle_t handle, 245 const struct cpg_name *groupName, 246 uint32_t nodeid, 247 uint32_t pid, 248 void *msg, 249 size_t msg_len) 250 { 251 int target_nodeid; 252 253 memcpy(&target_nodeid, msg, OPENAIS_CSID_LEN); 254 255 DEBUGLOG("%u got message from nodeid %d for %d. len %d\n", 256 our_nodeid, nodeid, target_nodeid, msg_len-4); 257 258 if (nodeid != our_nodeid) 259 if (target_nodeid == our_nodeid || target_nodeid == 0) 260 process_message(cluster_client, (char *)msg+OPENAIS_CSID_LEN, 261 msg_len-OPENAIS_CSID_LEN, (char*)&nodeid); 262 } 263 264 static void openais_cpg_confchg_callback(cpg_handle_t handle, 265 const struct cpg_name *groupName, 266 const struct cpg_address *member_list, size_t member_list_entries, 267 const struct cpg_address *left_list, size_t left_list_entries, 268 const struct cpg_address *joined_list, size_t joined_list_entries) 269 { 270 int i; 271 struct node_info *ninfo; 272 273 DEBUGLOG("confchg callback. %d joined, %d left, %d members\n", 274 joined_list_entries, left_list_entries, member_list_entries); 275 276 for (i=0; i<joined_list_entries; i++) { 277 ninfo = dm_hash_lookup_binary(node_hash, 278 (char *)&joined_list[i].nodeid, 279 OPENAIS_CSID_LEN); 280 if (!ninfo) { 281 ninfo = malloc(sizeof(struct node_info)); 282 if (!ninfo) { 283 break; 284 } 285 else { 286 ninfo->nodeid = joined_list[i].nodeid; 287 dm_hash_insert_binary(node_hash, 288 (char *)&ninfo->nodeid, 289 OPENAIS_CSID_LEN, ninfo); 290 } 291 } 292 ninfo->state = NODE_CLVMD; 293 } 294 295 for (i=0; i<left_list_entries; i++) { 296 ninfo = dm_hash_lookup_binary(node_hash, 297 (char *)&left_list[i].nodeid, 298 OPENAIS_CSID_LEN); 299 if (ninfo) 300 ninfo->state = NODE_DOWN; 301 } 302 303 for (i=0; i<member_list_entries; i++) { 304 if (member_list[i].nodeid == 0) continue; 305 ninfo = dm_hash_lookup_binary(node_hash, 306 (char *)&member_list[i].nodeid, 307 OPENAIS_CSID_LEN); 308 if (!ninfo) { 309 ninfo = malloc(sizeof(struct node_info)); 310 if (!ninfo) { 311 break; 312 } 313 else { 314 ninfo->nodeid = member_list[i].nodeid; 315 dm_hash_insert_binary(node_hash, 316 (char *)&ninfo->nodeid, 317 OPENAIS_CSID_LEN, ninfo); 318 } 319 } 320 ninfo->state = NODE_CLVMD; 321 } 322 323 num_nodes = member_list_entries; 324 } 325 326 static int lck_dispatch(struct local_client *client, char *buf, int len, 327 const char *csid, struct local_client **new_client) 328 { 329 *new_client = NULL; 330 saLckDispatch(lck_handle, SA_DISPATCH_ONE); 331 return 1; 332 } 333 334 static int _init_cluster(void) 335 { 336 SaAisErrorT err; 337 SaVersionT ver = { 'B', 1, 1 }; 338 int select_fd; 339 340 node_hash = dm_hash_create(100); 341 lock_hash = dm_hash_create(10); 342 343 err = cpg_initialize(&cpg_handle, 344 &openais_cpg_callbacks); 345 if (err != SA_AIS_OK) { 346 syslog(LOG_ERR, "Cannot initialise OpenAIS CPG service: %d", 347 err); 348 DEBUGLOG("Cannot initialise OpenAIS CPG service: %d", err); 349 return ais_to_errno(err); 350 } 351 352 err = saLckInitialize(&lck_handle, 353 NULL, 354 &ver); 355 if (err != SA_AIS_OK) { 356 cpg_initialize(&cpg_handle, &openais_cpg_callbacks); 357 syslog(LOG_ERR, "Cannot initialise OpenAIS lock service: %d", 358 err); 359 DEBUGLOG("Cannot initialise OpenAIS lock service: %d\n\n", err); 360 return ais_to_errno(err); 361 } 362 363 /* Connect to the clvmd group */ 364 strcpy((char *)cpg_group_name.value, "clvmd"); 365 cpg_group_name.length = strlen((char *)cpg_group_name.value); 366 err = cpg_join(cpg_handle, &cpg_group_name); 367 if (err != SA_AIS_OK) { 368 cpg_finalize(cpg_handle); 369 saLckFinalize(lck_handle); 370 syslog(LOG_ERR, "Cannot join clvmd process group"); 371 DEBUGLOG("Cannot join clvmd process group: %d\n", err); 372 return ais_to_errno(err); 373 } 374 375 err = cpg_local_get(cpg_handle, 376 &our_nodeid); 377 if (err != SA_AIS_OK) { 378 cpg_finalize(cpg_handle); 379 saLckFinalize(lck_handle); 380 syslog(LOG_ERR, "Cannot get local node id\n"); 381 return ais_to_errno(err); 382 } 383 DEBUGLOG("Our local node id is %d\n", our_nodeid); 384 385 saLckSelectionObjectGet(lck_handle, (SaSelectionObjectT *)&select_fd); 386 add_internal_client(select_fd, lck_dispatch); 387 388 DEBUGLOG("Connected to OpenAIS\n"); 389 390 return 0; 391 } 392 393 static void _cluster_closedown(void) 394 { 395 DEBUGLOG("cluster_closedown\n"); 396 destroy_lvhash(); 397 398 saLckFinalize(lck_handle); 399 cpg_finalize(cpg_handle); 400 } 401 402 static void _get_our_csid(char *csid) 403 { 404 memcpy(csid, &our_nodeid, sizeof(int)); 405 } 406 407 /* OpenAIS doesn't really have nmode names so we 408 just use the node ID in hex instead */ 409 static int _csid_from_name(char *csid, const char *name) 410 { 411 int nodeid; 412 struct node_info *ninfo; 413 414 if (sscanf(name, "%x", &nodeid) == 1) { 415 ninfo = dm_hash_lookup_binary(node_hash, csid, OPENAIS_CSID_LEN); 416 if (ninfo) 417 return nodeid; 418 } 419 return -1; 420 } 421 422 static int _name_from_csid(const char *csid, char *name) 423 { 424 struct node_info *ninfo; 425 426 ninfo = dm_hash_lookup_binary(node_hash, csid, OPENAIS_CSID_LEN); 427 if (!ninfo) 428 { 429 sprintf(name, "UNKNOWN %s", print_openais_csid(csid)); 430 return -1; 431 } 432 433 sprintf(name, "%x", ninfo->nodeid); 434 return 0; 435 } 436 437 static int _get_num_nodes() 438 { 439 DEBUGLOG("num_nodes = %d\n", num_nodes); 440 return num_nodes; 441 } 442 443 /* Node is now known to be running a clvmd */ 444 static void _add_up_node(const char *csid) 445 { 446 struct node_info *ninfo; 447 448 ninfo = dm_hash_lookup_binary(node_hash, csid, OPENAIS_CSID_LEN); 449 if (!ninfo) { 450 DEBUGLOG("openais_add_up_node no node_hash entry for csid %s\n", 451 print_openais_csid(csid)); 452 return; 453 } 454 455 DEBUGLOG("openais_add_up_node %d\n", ninfo->nodeid); 456 457 ninfo->state = NODE_CLVMD; 458 459 return; 460 } 461 462 /* Call a callback for each node, so the caller knows whether it's up or down */ 463 static int _cluster_do_node_callback(struct local_client *master_client, 464 void (*callback)(struct local_client *, 465 const char *csid, int node_up)) 466 { 467 struct dm_hash_node *hn; 468 struct node_info *ninfo; 469 int somedown = 0; 470 471 dm_hash_iterate(hn, node_hash) 472 { 473 char csid[OPENAIS_CSID_LEN]; 474 475 ninfo = dm_hash_get_data(node_hash, hn); 476 memcpy(csid, dm_hash_get_key(node_hash, hn), OPENAIS_CSID_LEN); 477 478 DEBUGLOG("down_callback. node %d, state = %d\n", ninfo->nodeid, 479 ninfo->state); 480 481 if (ninfo->state != NODE_DOWN) 482 callback(master_client, csid, ninfo->state == NODE_CLVMD); 483 if (ninfo->state != NODE_CLVMD) 484 somedown = -1; 485 } 486 return somedown; 487 } 488 489 /* Real locking */ 490 static int _lock_resource(char *resource, int mode, int flags, int *lockid) 491 { 492 struct lock_info *linfo; 493 SaLckResourceHandleT res_handle; 494 SaAisErrorT err; 495 SaLckLockIdT lock_id; 496 SaLckLockStatusT lockStatus; 497 498 /* This needs to be converted from DLM/LVM2 value for OpenAIS LCK */ 499 if (flags & LCK_NONBLOCK) flags = SA_LCK_LOCK_NO_QUEUE; 500 501 linfo = malloc(sizeof(struct lock_info)); 502 if (!linfo) 503 return -1; 504 505 DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode); 506 507 linfo->lock_name.length = strlen(resource)+1; 508 strcpy((char *)linfo->lock_name.value, resource); 509 510 err = saLckResourceOpen(lck_handle, &linfo->lock_name, 511 SA_LCK_RESOURCE_CREATE, TIMEOUT, &res_handle); 512 if (err != SA_AIS_OK) 513 { 514 DEBUGLOG("ResourceOpen returned %d\n", err); 515 free(linfo); 516 return ais_to_errno(err); 517 } 518 519 err = saLckResourceLock( 520 res_handle, 521 &lock_id, 522 mode, 523 flags, 524 0, 525 SA_TIME_END, 526 &lockStatus); 527 if (err != SA_AIS_OK && lockStatus != SA_LCK_LOCK_GRANTED) 528 { 529 free(linfo); 530 saLckResourceClose(res_handle); 531 return ais_to_errno(err); 532 } 533 534 /* Wait for it to complete */ 535 536 DEBUGLOG("lock_resource returning %d, lock_id=%llx\n", err, 537 lock_id); 538 539 linfo->lock_id = lock_id; 540 linfo->res_handle = res_handle; 541 542 dm_hash_insert(lock_hash, resource, linfo); 543 544 return ais_to_errno(err); 545 } 546 547 548 static int _unlock_resource(char *resource, int lockid) 549 { 550 SaAisErrorT err; 551 struct lock_info *linfo; 552 553 DEBUGLOG("unlock_resource %s\n", resource); 554 linfo = dm_hash_lookup(lock_hash, resource); 555 if (!linfo) 556 return 0; 557 558 DEBUGLOG("unlock_resource: lockid: %llx\n", linfo->lock_id); 559 err = saLckResourceUnlock(linfo->lock_id, SA_TIME_END); 560 if (err != SA_AIS_OK) 561 { 562 DEBUGLOG("Unlock returned %d\n", err); 563 return ais_to_errno(err); 564 } 565 566 /* Release the resource */ 567 dm_hash_remove(lock_hash, resource); 568 saLckResourceClose(linfo->res_handle); 569 free(linfo); 570 571 return ais_to_errno(err); 572 } 573 574 static int _sync_lock(const char *resource, int mode, int flags, int *lockid) 575 { 576 int status; 577 char lock1[strlen(resource)+3]; 578 char lock2[strlen(resource)+3]; 579 580 snprintf(lock1, sizeof(lock1), "%s-1", resource); 581 snprintf(lock2, sizeof(lock2), "%s-2", resource); 582 583 switch (mode) 584 { 585 case LCK_EXCL: 586 status = _lock_resource(lock1, SA_LCK_EX_LOCK_MODE, flags, lockid); 587 if (status) 588 goto out; 589 590 /* If we can't get this lock too then bail out */ 591 status = _lock_resource(lock2, SA_LCK_EX_LOCK_MODE, LCK_NONBLOCK, 592 lockid); 593 if (status == SA_LCK_LOCK_NOT_QUEUED) 594 { 595 _unlock_resource(lock1, *lockid); 596 status = -1; 597 errno = EAGAIN; 598 } 599 break; 600 601 case LCK_PREAD: 602 case LCK_READ: 603 status = _lock_resource(lock1, SA_LCK_PR_LOCK_MODE, flags, lockid); 604 if (status) 605 goto out; 606 _unlock_resource(lock2, *lockid); 607 break; 608 609 case LCK_WRITE: 610 status = _lock_resource(lock2, SA_LCK_EX_LOCK_MODE, flags, lockid); 611 if (status) 612 goto out; 613 _unlock_resource(lock1, *lockid); 614 break; 615 616 default: 617 status = -1; 618 errno = EINVAL; 619 break; 620 } 621 out: 622 *lockid = mode; 623 return status; 624 } 625 626 static int _sync_unlock(const char *resource, int lockid) 627 { 628 int status = 0; 629 char lock1[strlen(resource)+3]; 630 char lock2[strlen(resource)+3]; 631 632 snprintf(lock1, sizeof(lock1), "%s-1", resource); 633 snprintf(lock2, sizeof(lock2), "%s-2", resource); 634 635 _unlock_resource(lock1, lockid); 636 _unlock_resource(lock2, lockid); 637 638 return status; 639 } 640 641 /* We are always quorate ! */ 642 static int _is_quorate() 643 { 644 return 1; 645 } 646 647 static int _get_main_cluster_fd(void) 648 { 649 int select_fd; 650 651 cpg_fd_get(cpg_handle, &select_fd); 652 return select_fd; 653 } 654 655 static int _cluster_fd_callback(struct local_client *fd, char *buf, int len, 656 const char *csid, 657 struct local_client **new_client) 658 { 659 cluster_client = fd; 660 *new_client = NULL; 661 cpg_dispatch(cpg_handle, SA_DISPATCH_ONE); 662 return 1; 663 } 664 665 static int _cluster_send_message(const void *buf, int msglen, const char *csid, 666 const char *errtext) 667 { 668 struct iovec iov[2]; 669 SaAisErrorT err; 670 int target_node; 671 672 if (csid) 673 memcpy(&target_node, csid, OPENAIS_CSID_LEN); 674 else 675 target_node = 0; 676 677 iov[0].iov_base = &target_node; 678 iov[0].iov_len = sizeof(int); 679 iov[1].iov_base = (char *)buf; 680 iov[1].iov_len = msglen; 681 682 err = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 2); 683 return ais_to_errno(err); 684 } 685 686 /* We don't have a cluster name to report here */ 687 static int _get_cluster_name(char *buf, int buflen) 688 { 689 strncpy(buf, "OpenAIS", buflen); 690 return 0; 691 } 692 693 static struct cluster_ops _cluster_openais_ops = { 694 .cluster_init_completed = NULL, 695 .cluster_send_message = _cluster_send_message, 696 .name_from_csid = _name_from_csid, 697 .csid_from_name = _csid_from_name, 698 .get_num_nodes = _get_num_nodes, 699 .cluster_fd_callback = _cluster_fd_callback, 700 .get_main_cluster_fd = _get_main_cluster_fd, 701 .cluster_do_node_callback = _cluster_do_node_callback, 702 .is_quorate = _is_quorate, 703 .get_our_csid = _get_our_csid, 704 .add_up_node = _add_up_node, 705 .reread_config = NULL, 706 .cluster_closedown = _cluster_closedown, 707 .get_cluster_name = _get_cluster_name, 708 .sync_lock = _sync_lock, 709 .sync_unlock = _sync_unlock, 710 }; 711 712 struct cluster_ops *init_openais_cluster(void) 713 { 714 if (!_init_cluster()) 715 return &_cluster_openais_ops; 716 else 717 return NULL; 718 } 719