1 /* $NetBSD: cluster_locking.c,v 1.1.1.3 2009/12/02 00:26:24 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. 5 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. 6 * 7 * This file is part of LVM2. 8 * 9 * This copyrighted material is made available to anyone wishing to use, 10 * modify, copy, or redistribute it subject to the terms and conditions 11 * of the GNU Lesser General Public License v.2.1. 12 * 13 * You should have received a copy of the GNU Lesser General Public License 14 * along with this program; if not, write to the Free Software Foundation, 15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 /* 19 * Locking functions for LVM. 20 * The main purpose of this part of the library is to serialise LVM 21 * management operations across a cluster. 22 */ 23 24 #include "lib.h" 25 #include "clvm.h" 26 #include "lvm-string.h" 27 #include "locking.h" 28 #include "locking_types.h" 29 #include "toolcontext.h" 30 31 #include <assert.h> 32 #include <stddef.h> 33 #include <sys/socket.h> 34 #include <sys/un.h> 35 #include <unistd.h> 36 37 #ifndef CLUSTER_LOCKING_INTERNAL 38 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags); 39 int query_resource(const char *resource, int *mode); 40 void locking_end(void); 41 int locking_init(int type, struct config_tree *cf, uint32_t *flags); 42 #endif 43 44 typedef struct lvm_response { 45 char node[255]; 46 char *response; 47 int status; 48 int len; 49 } lvm_response_t; 50 51 /* 52 * This gets stuck at the start of memory we allocate so we 53 * can sanity-check it at deallocation time 54 */ 55 #define LVM_SIGNATURE 0x434C564D 56 57 /* 58 * NOTE: the LVMD uses the socket FD as the client ID, this means 59 * that any client that calls fork() will inherit the context of 60 * it's parent. 61 */ 62 static int _clvmd_sock = -1; 63 64 /* FIXME Install SIGPIPE handler? */ 65 66 /* Open connection to the Cluster Manager daemon */ 67 static int _open_local_sock(void) 68 { 69 int local_socket; 70 struct sockaddr_un sockaddr; 71 72 /* Open local socket */ 73 if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) { 74 log_error("Local socket creation failed: %s", strerror(errno)); 75 return -1; 76 } 77 78 memset(&sockaddr, 0, sizeof(sockaddr)); 79 memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME)); 80 81 sockaddr.sun_family = AF_UNIX; 82 83 if (connect(local_socket,(struct sockaddr *) &sockaddr, 84 sizeof(sockaddr))) { 85 int saved_errno = errno; 86 87 log_error("connect() failed on local socket: %s", 88 strerror(errno)); 89 if (close(local_socket)) 90 stack; 91 92 errno = saved_errno; 93 return -1; 94 } 95 96 return local_socket; 97 } 98 99 /* Send a request and return the status */ 100 static int _send_request(char *inbuf, int inlen, char **retbuf) 101 { 102 char outbuf[PIPE_BUF] __attribute((aligned(8))); 103 struct clvm_header *outheader = (struct clvm_header *) outbuf; 104 int len; 105 int off; 106 int buflen; 107 int err; 108 109 /* Send it to CLVMD */ 110 rewrite: 111 if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) { 112 if (err == -1 && errno == EINTR) 113 goto rewrite; 114 log_error("Error writing data to clvmd: %s", strerror(errno)); 115 return 0; 116 } 117 118 /* Get the response */ 119 reread: 120 if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) { 121 if (errno == EINTR) 122 goto reread; 123 log_error("Error reading data from clvmd: %s", strerror(errno)); 124 return 0; 125 } 126 127 if (len == 0) { 128 log_error("EOF reading CLVMD"); 129 errno = ENOTCONN; 130 return 0; 131 } 132 133 /* Allocate buffer */ 134 buflen = len + outheader->arglen; 135 *retbuf = dm_malloc(buflen); 136 if (!*retbuf) { 137 errno = ENOMEM; 138 return 0; 139 } 140 141 /* Copy the header */ 142 memcpy(*retbuf, outbuf, len); 143 outheader = (struct clvm_header *) *retbuf; 144 145 /* Read the returned values */ 146 off = 1; /* we've already read the first byte */ 147 while (off <= outheader->arglen && len > 0) { 148 len = read(_clvmd_sock, outheader->args + off, 149 buflen - off - offsetof(struct clvm_header, args)); 150 if (len > 0) 151 off += len; 152 } 153 154 /* Was it an error ? */ 155 if (outheader->status != 0) { 156 errno = outheader->status; 157 158 /* Only return an error here if there are no node-specific 159 errors present in the message that might have more detail */ 160 if (!(outheader->flags & CLVMD_FLAG_NODEERRS)) { 161 log_error("cluster request failed: %s", strerror(errno)); 162 return 0; 163 } 164 165 } 166 167 return 1; 168 } 169 170 /* Build the structure header and parse-out wildcard node names */ 171 /* FIXME: Cleanup implicit casts of clvmd_cmd (int, char, uint8_t, etc). */ 172 static void _build_header(struct clvm_header *head, int clvmd_cmd, const char *node, 173 int len) 174 { 175 head->cmd = clvmd_cmd; 176 head->status = 0; 177 head->flags = 0; 178 head->clientid = 0; 179 head->arglen = len; 180 181 if (node) { 182 /* 183 * Allow a couple of special node names: 184 * "*" for all nodes, 185 * "." for the local node only 186 */ 187 if (strcmp(node, "*") == 0) { 188 head->node[0] = '\0'; 189 } else if (strcmp(node, ".") == 0) { 190 head->node[0] = '\0'; 191 head->flags = CLVMD_FLAG_LOCAL; 192 } else 193 strcpy(head->node, node); 194 } else 195 head->node[0] = '\0'; 196 } 197 198 /* 199 * Send a message to a(or all) node(s) in the cluster and wait for replies 200 */ 201 static int _cluster_request(char clvmd_cmd, const char *node, void *data, int len, 202 lvm_response_t ** response, int *num) 203 { 204 char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1] __attribute((aligned(8))); 205 char *inptr; 206 char *retbuf = NULL; 207 int status; 208 int i; 209 int num_responses = 0; 210 struct clvm_header *head = (struct clvm_header *) outbuf; 211 lvm_response_t *rarray; 212 213 *num = 0; 214 215 if (_clvmd_sock == -1) 216 _clvmd_sock = _open_local_sock(); 217 218 if (_clvmd_sock == -1) 219 return 0; 220 221 _build_header(head, clvmd_cmd, node, len); 222 memcpy(head->node + strlen(head->node) + 1, data, len); 223 224 status = _send_request(outbuf, sizeof(struct clvm_header) + 225 strlen(head->node) + len, &retbuf); 226 if (!status) 227 goto out; 228 229 /* Count the number of responses we got */ 230 head = (struct clvm_header *) retbuf; 231 inptr = head->args; 232 while (inptr[0]) { 233 num_responses++; 234 inptr += strlen(inptr) + 1; 235 inptr += sizeof(int); 236 inptr += strlen(inptr) + 1; 237 } 238 239 /* 240 * Allocate response array. 241 * With an extra pair of INTs on the front to sanity 242 * check the pointer when we are given it back to free 243 */ 244 *response = dm_malloc(sizeof(lvm_response_t) * num_responses); 245 if (!*response) { 246 errno = ENOMEM; 247 status = 0; 248 goto out; 249 } 250 251 rarray = *response; 252 253 /* Unpack the response into an lvm_response_t array */ 254 inptr = head->args; 255 i = 0; 256 while (inptr[0]) { 257 strcpy(rarray[i].node, inptr); 258 inptr += strlen(inptr) + 1; 259 260 memcpy(&rarray[i].status, inptr, sizeof(int)); 261 inptr += sizeof(int); 262 263 rarray[i].response = dm_malloc(strlen(inptr) + 1); 264 if (rarray[i].response == NULL) { 265 /* Free up everything else and return error */ 266 int j; 267 for (j = 0; j < i; j++) 268 dm_free(rarray[j].response); 269 free(*response); 270 errno = ENOMEM; 271 status = -1; 272 goto out; 273 } 274 275 strcpy(rarray[i].response, inptr); 276 rarray[i].len = strlen(inptr); 277 inptr += strlen(inptr) + 1; 278 i++; 279 } 280 *num = num_responses; 281 *response = rarray; 282 283 out: 284 if (retbuf) 285 dm_free(retbuf); 286 287 return status; 288 } 289 290 /* Free reply array */ 291 static int _cluster_free_request(lvm_response_t * response, int num) 292 { 293 int i; 294 295 for (i = 0; i < num; i++) { 296 dm_free(response[i].response); 297 } 298 299 dm_free(response); 300 301 return 1; 302 } 303 304 static int _lock_for_cluster(struct cmd_context *cmd, unsigned char clvmd_cmd, 305 uint32_t flags, const char *name) 306 { 307 int status; 308 int i; 309 char *args; 310 const char *node = ""; 311 int len; 312 int saved_errno = errno; 313 lvm_response_t *response = NULL; 314 int num_responses; 315 316 assert(name); 317 318 len = strlen(name) + 3; 319 args = alloca(len); 320 strcpy(args + 2, name); 321 322 args[0] = flags & 0x7F; /* Maskoff lock flags */ 323 args[1] = flags & 0xC0; /* Bitmap flags */ 324 325 if (mirror_in_sync()) 326 args[1] |= LCK_MIRROR_NOSYNC_MODE; 327 328 if (dmeventd_monitor_mode()) 329 args[1] |= LCK_DMEVENTD_MONITOR_MODE; 330 331 if (cmd->partial_activation) 332 args[1] |= LCK_PARTIAL_MODE; 333 334 /* 335 * VG locks are just that: locks, and have no side effects 336 * so we only need to do them on the local node because all 337 * locks are cluster-wide. 338 * Also, if the lock is exclusive it makes no sense to try to 339 * acquire it on all nodes, so just do that on the local node too. 340 * One exception, is that P_ locks /do/ get distributed across 341 * the cluster because they might have side-effects. 342 */ 343 if (strncmp(name, "P_", 2) && 344 (clvmd_cmd == CLVMD_CMD_LOCK_VG || 345 (flags & LCK_TYPE_MASK) == LCK_EXCL || 346 (flags & LCK_LOCAL) || 347 !(flags & LCK_CLUSTER_VG))) 348 node = "."; 349 350 status = _cluster_request(clvmd_cmd, node, args, len, 351 &response, &num_responses); 352 353 /* If any nodes were down then display them and return an error */ 354 for (i = 0; i < num_responses; i++) { 355 if (response[i].status == EHOSTDOWN) { 356 log_error("clvmd not running on node %s", 357 response[i].node); 358 status = 0; 359 errno = response[i].status; 360 } else if (response[i].status) { 361 log_error("Error locking on node %s: %s", 362 response[i].node, 363 response[i].response[0] ? 364 response[i].response : 365 strerror(response[i].status)); 366 status = 0; 367 errno = response[i].status; 368 } 369 } 370 371 saved_errno = errno; 372 _cluster_free_request(response, num_responses); 373 errno = saved_errno; 374 375 return status; 376 } 377 378 /* API entry point for LVM */ 379 #ifdef CLUSTER_LOCKING_INTERNAL 380 static int _lock_resource(struct cmd_context *cmd, const char *resource, 381 uint32_t flags) 382 #else 383 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags) 384 #endif 385 { 386 char lockname[PATH_MAX]; 387 int clvmd_cmd = 0; 388 const char *lock_scope; 389 const char *lock_type = ""; 390 391 assert(strlen(resource) < sizeof(lockname)); 392 assert(resource); 393 394 switch (flags & LCK_SCOPE_MASK) { 395 case LCK_VG: 396 if (flags == LCK_VG_BACKUP) { 397 log_very_verbose("Requesting backup of VG metadata for %s", 398 resource); 399 return _lock_for_cluster(cmd, CLVMD_CMD_VG_BACKUP, 400 LCK_CLUSTER_VG, resource); 401 } 402 403 /* If the VG name is empty then lock the unused PVs */ 404 if (*resource == '#' || (flags & LCK_CACHE)) 405 dm_snprintf(lockname, sizeof(lockname), "P_%s", 406 resource); 407 else 408 dm_snprintf(lockname, sizeof(lockname), "V_%s", 409 resource); 410 411 lock_scope = "VG"; 412 clvmd_cmd = CLVMD_CMD_LOCK_VG; 413 flags &= LCK_TYPE_MASK; 414 break; 415 416 case LCK_LV: 417 clvmd_cmd = CLVMD_CMD_LOCK_LV; 418 strcpy(lockname, resource); 419 lock_scope = "LV"; 420 flags &= 0xffdf; /* Mask off HOLD flag */ 421 break; 422 423 default: 424 log_error("Unrecognised lock scope: %d", 425 flags & LCK_SCOPE_MASK); 426 return 0; 427 } 428 429 switch(flags & LCK_TYPE_MASK) { 430 case LCK_UNLOCK: 431 lock_type = "UN"; 432 break; 433 case LCK_NULL: 434 lock_type = "NL"; 435 break; 436 case LCK_READ: 437 lock_type = "CR"; 438 break; 439 case LCK_PREAD: 440 lock_type = "PR"; 441 break; 442 case LCK_WRITE: 443 lock_type = "PW"; 444 break; 445 case LCK_EXCL: 446 lock_type = "EX"; 447 break; 448 default: 449 log_error("Unrecognised lock type: %u", 450 flags & LCK_TYPE_MASK); 451 return 0; 452 } 453 454 log_very_verbose("Locking %s %s %s %s%s%s%s (0x%x)", lock_scope, lockname, 455 lock_type, 456 flags & LCK_NONBLOCK ? "" : "B", 457 flags & LCK_HOLD ? "H" : "", 458 flags & LCK_LOCAL ? "L" : "", 459 flags & LCK_CLUSTER_VG ? "C" : "", 460 flags); 461 462 /* Send a message to the cluster manager */ 463 return _lock_for_cluster(cmd, clvmd_cmd, flags, lockname); 464 } 465 466 static int decode_lock_type(const char *response) 467 { 468 if (!response) 469 return LCK_NULL; 470 else if (strcmp(response, "EX")) 471 return LCK_EXCL; 472 else if (strcmp(response, "CR")) 473 return LCK_READ; 474 else if (strcmp(response, "PR")) 475 return LCK_PREAD; 476 477 stack; 478 return 0; 479 } 480 481 #ifdef CLUSTER_LOCKING_INTERNAL 482 static int _query_resource(const char *resource, int *mode) 483 #else 484 int query_resource(const char *resource, int *mode) 485 #endif 486 { 487 int i, status, len, num_responses, saved_errno; 488 const char *node = ""; 489 char *args; 490 lvm_response_t *response = NULL; 491 492 saved_errno = errno; 493 len = strlen(resource) + 3; 494 args = alloca(len); 495 strcpy(args + 2, resource); 496 497 args[0] = 0; 498 args[1] = LCK_CLUSTER_VG; 499 500 status = _cluster_request(CLVMD_CMD_LOCK_QUERY, node, args, len, 501 &response, &num_responses); 502 *mode = LCK_NULL; 503 for (i = 0; i < num_responses; i++) { 504 if (response[i].status == EHOSTDOWN) 505 continue; 506 507 if (!response[i].response[0]) 508 continue; 509 510 /* 511 * All nodes should use CR, or exactly one node 512 * should held EX. (PR is obsolete) 513 * If two nodes node reports different locks, 514 * something is broken - just return more important mode. 515 */ 516 if (decode_lock_type(response[i].response) > *mode) 517 *mode = decode_lock_type(response[i].response); 518 519 log_debug("Lock held for %s, node %s : %s", resource, 520 response[i].node, response[i].response); 521 } 522 523 _cluster_free_request(response, num_responses); 524 errno = saved_errno; 525 526 return status; 527 } 528 529 #ifdef CLUSTER_LOCKING_INTERNAL 530 static void _locking_end(void) 531 #else 532 void locking_end(void) 533 #endif 534 { 535 if (_clvmd_sock != -1 && close(_clvmd_sock)) 536 stack; 537 538 _clvmd_sock = -1; 539 } 540 541 #ifdef CLUSTER_LOCKING_INTERNAL 542 static void _reset_locking(void) 543 #else 544 void reset_locking(void) 545 #endif 546 { 547 if (close(_clvmd_sock)) 548 stack; 549 550 _clvmd_sock = _open_local_sock(); 551 if (_clvmd_sock == -1) 552 stack; 553 } 554 555 #ifdef CLUSTER_LOCKING_INTERNAL 556 int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd) 557 { 558 locking->lock_resource = _lock_resource; 559 locking->query_resource = _query_resource; 560 locking->fin_locking = _locking_end; 561 locking->reset_locking = _reset_locking; 562 locking->flags = LCK_PRE_MEMLOCK | LCK_CLUSTERED; 563 564 _clvmd_sock = _open_local_sock(); 565 if (_clvmd_sock == -1) 566 return 0; 567 568 return 1; 569 } 570 #else 571 int locking_init(int type, struct config_tree *cf, uint32_t *flags) 572 { 573 _clvmd_sock = _open_local_sock(); 574 if (_clvmd_sock == -1) 575 return 0; 576 577 /* Ask LVM to lock memory before calling us */ 578 *flags |= LCK_PRE_MEMLOCK; 579 *flags |= LCK_CLUSTERED; 580 581 return 1; 582 } 583 #endif 584