1 /*	$NetBSD: cluster_locking.c,v 1.1.1.3 2009/12/02 00:26:24 haad Exp $	*/
2 
3 /*
4  * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 /*
19  * Locking functions for LVM.
20  * The main purpose of this part of the library is to serialise LVM
21  * management operations across a cluster.
22  */
23 
24 #include "lib.h"
25 #include "clvm.h"
26 #include "lvm-string.h"
27 #include "locking.h"
28 #include "locking_types.h"
29 #include "toolcontext.h"
30 
31 #include <assert.h>
32 #include <stddef.h>
33 #include <sys/socket.h>
34 #include <sys/un.h>
35 #include <unistd.h>
36 
37 #ifndef CLUSTER_LOCKING_INTERNAL
38 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags);
39 int query_resource(const char *resource, int *mode);
40 void locking_end(void);
41 int locking_init(int type, struct config_tree *cf, uint32_t *flags);
42 #endif
43 
44 typedef struct lvm_response {
45 	char node[255];
46 	char *response;
47 	int status;
48 	int len;
49 } lvm_response_t;
50 
51 /*
52  * This gets stuck at the start of memory we allocate so we
53  * can sanity-check it at deallocation time
54  */
55 #define LVM_SIGNATURE 0x434C564D
56 
57 /*
58  * NOTE: the LVMD uses the socket FD as the client ID, this means
59  * that any client that calls fork() will inherit the context of
60  * it's parent.
61  */
62 static int _clvmd_sock = -1;
63 
64 /* FIXME Install SIGPIPE handler? */
65 
66 /* Open connection to the Cluster Manager daemon */
67 static int _open_local_sock(void)
68 {
69 	int local_socket;
70 	struct sockaddr_un sockaddr;
71 
72 	/* Open local socket */
73 	if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
74 		log_error("Local socket creation failed: %s", strerror(errno));
75 		return -1;
76 	}
77 
78 	memset(&sockaddr, 0, sizeof(sockaddr));
79 	memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
80 
81 	sockaddr.sun_family = AF_UNIX;
82 
83 	if (connect(local_socket,(struct sockaddr *) &sockaddr,
84 		    sizeof(sockaddr))) {
85 		int saved_errno = errno;
86 
87 		log_error("connect() failed on local socket: %s",
88 			  strerror(errno));
89 		if (close(local_socket))
90 			stack;
91 
92 		errno = saved_errno;
93 		return -1;
94 	}
95 
96 	return local_socket;
97 }
98 
99 /* Send a request and return the status */
100 static int _send_request(char *inbuf, int inlen, char **retbuf)
101 {
102 	char outbuf[PIPE_BUF] __attribute((aligned(8)));
103 	struct clvm_header *outheader = (struct clvm_header *) outbuf;
104 	int len;
105 	int off;
106 	int buflen;
107 	int err;
108 
109 	/* Send it to CLVMD */
110  rewrite:
111 	if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) {
112 		if (err == -1 && errno == EINTR)
113 			goto rewrite;
114 		log_error("Error writing data to clvmd: %s", strerror(errno));
115 		return 0;
116 	}
117 
118 	/* Get the response */
119  reread:
120 	if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
121 		if (errno == EINTR)
122 			goto reread;
123 		log_error("Error reading data from clvmd: %s", strerror(errno));
124 		return 0;
125 	}
126 
127 	if (len == 0) {
128 		log_error("EOF reading CLVMD");
129 		errno = ENOTCONN;
130 		return 0;
131 	}
132 
133 	/* Allocate buffer */
134 	buflen = len + outheader->arglen;
135 	*retbuf = dm_malloc(buflen);
136 	if (!*retbuf) {
137 		errno = ENOMEM;
138 		return 0;
139 	}
140 
141 	/* Copy the header */
142 	memcpy(*retbuf, outbuf, len);
143 	outheader = (struct clvm_header *) *retbuf;
144 
145 	/* Read the returned values */
146 	off = 1;		/* we've already read the first byte */
147 	while (off <= outheader->arglen && len > 0) {
148 		len = read(_clvmd_sock, outheader->args + off,
149 			   buflen - off - offsetof(struct clvm_header, args));
150 		if (len > 0)
151 			off += len;
152 	}
153 
154 	/* Was it an error ? */
155 	if (outheader->status != 0) {
156 		errno = outheader->status;
157 
158 		/* Only return an error here if there are no node-specific
159 		   errors present in the message that might have more detail */
160 		if (!(outheader->flags & CLVMD_FLAG_NODEERRS)) {
161 			log_error("cluster request failed: %s", strerror(errno));
162 			return 0;
163 		}
164 
165 	}
166 
167 	return 1;
168 }
169 
170 /* Build the structure header and parse-out wildcard node names */
171 /* FIXME: Cleanup implicit casts of clvmd_cmd (int, char, uint8_t, etc). */
172 static void _build_header(struct clvm_header *head, int clvmd_cmd, const char *node,
173 			  int len)
174 {
175 	head->cmd = clvmd_cmd;
176 	head->status = 0;
177 	head->flags = 0;
178 	head->clientid = 0;
179 	head->arglen = len;
180 
181 	if (node) {
182 		/*
183 		 * Allow a couple of special node names:
184 		 * "*" for all nodes,
185 		 * "." for the local node only
186 		 */
187 		if (strcmp(node, "*") == 0) {
188 			head->node[0] = '\0';
189 		} else if (strcmp(node, ".") == 0) {
190 			head->node[0] = '\0';
191 			head->flags = CLVMD_FLAG_LOCAL;
192 		} else
193 			strcpy(head->node, node);
194 	} else
195 		head->node[0] = '\0';
196 }
197 
198 /*
199  * Send a message to a(or all) node(s) in the cluster and wait for replies
200  */
201 static int _cluster_request(char clvmd_cmd, const char *node, void *data, int len,
202 			   lvm_response_t ** response, int *num)
203 {
204 	char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1] __attribute((aligned(8)));
205 	char *inptr;
206 	char *retbuf = NULL;
207 	int status;
208 	int i;
209 	int num_responses = 0;
210 	struct clvm_header *head = (struct clvm_header *) outbuf;
211 	lvm_response_t *rarray;
212 
213 	*num = 0;
214 
215 	if (_clvmd_sock == -1)
216 		_clvmd_sock = _open_local_sock();
217 
218 	if (_clvmd_sock == -1)
219 		return 0;
220 
221 	_build_header(head, clvmd_cmd, node, len);
222 	memcpy(head->node + strlen(head->node) + 1, data, len);
223 
224 	status = _send_request(outbuf, sizeof(struct clvm_header) +
225 			      strlen(head->node) + len, &retbuf);
226 	if (!status)
227 		goto out;
228 
229 	/* Count the number of responses we got */
230 	head = (struct clvm_header *) retbuf;
231 	inptr = head->args;
232 	while (inptr[0]) {
233 		num_responses++;
234 		inptr += strlen(inptr) + 1;
235 		inptr += sizeof(int);
236 		inptr += strlen(inptr) + 1;
237 	}
238 
239 	/*
240 	 * Allocate response array.
241 	 * With an extra pair of INTs on the front to sanity
242 	 * check the pointer when we are given it back to free
243 	 */
244 	*response = dm_malloc(sizeof(lvm_response_t) * num_responses);
245 	if (!*response) {
246 		errno = ENOMEM;
247 		status = 0;
248 		goto out;
249 	}
250 
251 	rarray = *response;
252 
253 	/* Unpack the response into an lvm_response_t array */
254 	inptr = head->args;
255 	i = 0;
256 	while (inptr[0]) {
257 		strcpy(rarray[i].node, inptr);
258 		inptr += strlen(inptr) + 1;
259 
260 		memcpy(&rarray[i].status, inptr, sizeof(int));
261 		inptr += sizeof(int);
262 
263 		rarray[i].response = dm_malloc(strlen(inptr) + 1);
264 		if (rarray[i].response == NULL) {
265 			/* Free up everything else and return error */
266 			int j;
267 			for (j = 0; j < i; j++)
268 				dm_free(rarray[j].response);
269 			free(*response);
270 			errno = ENOMEM;
271 			status = -1;
272 			goto out;
273 		}
274 
275 		strcpy(rarray[i].response, inptr);
276 		rarray[i].len = strlen(inptr);
277 		inptr += strlen(inptr) + 1;
278 		i++;
279 	}
280 	*num = num_responses;
281 	*response = rarray;
282 
283       out:
284 	if (retbuf)
285 		dm_free(retbuf);
286 
287 	return status;
288 }
289 
290 /* Free reply array */
291 static int _cluster_free_request(lvm_response_t * response, int num)
292 {
293 	int i;
294 
295 	for (i = 0; i < num; i++) {
296 		dm_free(response[i].response);
297 	}
298 
299 	dm_free(response);
300 
301 	return 1;
302 }
303 
304 static int _lock_for_cluster(struct cmd_context *cmd, unsigned char clvmd_cmd,
305 			     uint32_t flags, const char *name)
306 {
307 	int status;
308 	int i;
309 	char *args;
310 	const char *node = "";
311 	int len;
312 	int saved_errno = errno;
313 	lvm_response_t *response = NULL;
314 	int num_responses;
315 
316 	assert(name);
317 
318 	len = strlen(name) + 3;
319 	args = alloca(len);
320 	strcpy(args + 2, name);
321 
322 	args[0] = flags & 0x7F; /* Maskoff lock flags */
323 	args[1] = flags & 0xC0; /* Bitmap flags */
324 
325 	if (mirror_in_sync())
326 		args[1] |= LCK_MIRROR_NOSYNC_MODE;
327 
328 	if (dmeventd_monitor_mode())
329 		args[1] |= LCK_DMEVENTD_MONITOR_MODE;
330 
331 	if (cmd->partial_activation)
332 		args[1] |= LCK_PARTIAL_MODE;
333 
334 	/*
335 	 * VG locks are just that: locks, and have no side effects
336 	 * so we only need to do them on the local node because all
337 	 * locks are cluster-wide.
338 	 * Also, if the lock is exclusive it makes no sense to try to
339 	 * acquire it on all nodes, so just do that on the local node too.
340 	 * One exception, is that P_ locks /do/ get distributed across
341 	 * the cluster because they might have side-effects.
342 	 */
343 	if (strncmp(name, "P_", 2) &&
344 	    (clvmd_cmd == CLVMD_CMD_LOCK_VG ||
345 	     (flags & LCK_TYPE_MASK) == LCK_EXCL ||
346 	     (flags & LCK_LOCAL) ||
347 	     !(flags & LCK_CLUSTER_VG)))
348 		node = ".";
349 
350 	status = _cluster_request(clvmd_cmd, node, args, len,
351 				  &response, &num_responses);
352 
353 	/* If any nodes were down then display them and return an error */
354 	for (i = 0; i < num_responses; i++) {
355 		if (response[i].status == EHOSTDOWN) {
356 			log_error("clvmd not running on node %s",
357 				  response[i].node);
358 			status = 0;
359 			errno = response[i].status;
360 		} else if (response[i].status) {
361 			log_error("Error locking on node %s: %s",
362 				  response[i].node,
363 				  response[i].response[0] ?
364 				  	response[i].response :
365 				  	strerror(response[i].status));
366 			status = 0;
367 			errno = response[i].status;
368 		}
369 	}
370 
371 	saved_errno = errno;
372 	_cluster_free_request(response, num_responses);
373 	errno = saved_errno;
374 
375 	return status;
376 }
377 
378 /* API entry point for LVM */
379 #ifdef CLUSTER_LOCKING_INTERNAL
380 static int _lock_resource(struct cmd_context *cmd, const char *resource,
381 			  uint32_t flags)
382 #else
383 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags)
384 #endif
385 {
386 	char lockname[PATH_MAX];
387 	int clvmd_cmd = 0;
388 	const char *lock_scope;
389 	const char *lock_type = "";
390 
391 	assert(strlen(resource) < sizeof(lockname));
392 	assert(resource);
393 
394 	switch (flags & LCK_SCOPE_MASK) {
395 	case LCK_VG:
396 		if (flags == LCK_VG_BACKUP) {
397 			log_very_verbose("Requesting backup of VG metadata for %s",
398 					 resource);
399 			return _lock_for_cluster(cmd, CLVMD_CMD_VG_BACKUP,
400 						 LCK_CLUSTER_VG, resource);
401 		}
402 
403 		/* If the VG name is empty then lock the unused PVs */
404 		if (*resource == '#' || (flags & LCK_CACHE))
405 			dm_snprintf(lockname, sizeof(lockname), "P_%s",
406 				    resource);
407 		else
408 			dm_snprintf(lockname, sizeof(lockname), "V_%s",
409 				    resource);
410 
411 		lock_scope = "VG";
412 		clvmd_cmd = CLVMD_CMD_LOCK_VG;
413 		flags &= LCK_TYPE_MASK;
414 		break;
415 
416 	case LCK_LV:
417 		clvmd_cmd = CLVMD_CMD_LOCK_LV;
418 		strcpy(lockname, resource);
419 		lock_scope = "LV";
420 		flags &= 0xffdf;	/* Mask off HOLD flag */
421 		break;
422 
423 	default:
424 		log_error("Unrecognised lock scope: %d",
425 			  flags & LCK_SCOPE_MASK);
426 		return 0;
427 	}
428 
429 	switch(flags & LCK_TYPE_MASK) {
430 	case LCK_UNLOCK:
431 		lock_type = "UN";
432 		break;
433 	case LCK_NULL:
434 		lock_type = "NL";
435 		break;
436 	case LCK_READ:
437 		lock_type = "CR";
438 		break;
439 	case LCK_PREAD:
440 		lock_type = "PR";
441 		break;
442 	case LCK_WRITE:
443 		lock_type = "PW";
444 		break;
445 	case LCK_EXCL:
446 		lock_type = "EX";
447 		break;
448 	default:
449 		log_error("Unrecognised lock type: %u",
450 			  flags & LCK_TYPE_MASK);
451 		return 0;
452 	}
453 
454 	log_very_verbose("Locking %s %s %s %s%s%s%s (0x%x)", lock_scope, lockname,
455 			 lock_type,
456 			 flags & LCK_NONBLOCK ? "" : "B",
457 			 flags & LCK_HOLD ? "H" : "",
458 			 flags & LCK_LOCAL ? "L" : "",
459 			 flags & LCK_CLUSTER_VG ? "C" : "",
460 			 flags);
461 
462 	/* Send a message to the cluster manager */
463 	return _lock_for_cluster(cmd, clvmd_cmd, flags, lockname);
464 }
465 
466 static int decode_lock_type(const char *response)
467 {
468 	if (!response)
469 		return LCK_NULL;
470 	else if (strcmp(response, "EX"))
471 		return LCK_EXCL;
472 	else if (strcmp(response, "CR"))
473 		return LCK_READ;
474 	else if (strcmp(response, "PR"))
475 		return LCK_PREAD;
476 
477 	stack;
478 	return 0;
479 }
480 
481 #ifdef CLUSTER_LOCKING_INTERNAL
482 static int _query_resource(const char *resource, int *mode)
483 #else
484 int query_resource(const char *resource, int *mode)
485 #endif
486 {
487 	int i, status, len, num_responses, saved_errno;
488 	const char *node = "";
489 	char *args;
490 	lvm_response_t *response = NULL;
491 
492 	saved_errno = errno;
493 	len = strlen(resource) + 3;
494 	args = alloca(len);
495 	strcpy(args + 2, resource);
496 
497 	args[0] = 0;
498 	args[1] = LCK_CLUSTER_VG;
499 
500 	status = _cluster_request(CLVMD_CMD_LOCK_QUERY, node, args, len,
501 				  &response, &num_responses);
502 	*mode = LCK_NULL;
503 	for (i = 0; i < num_responses; i++) {
504 		if (response[i].status == EHOSTDOWN)
505 			continue;
506 
507 		if (!response[i].response[0])
508 			continue;
509 
510 		/*
511 		 * All nodes should use CR, or exactly one node
512 		 * should held EX. (PR is obsolete)
513 		 * If two nodes node reports different locks,
514 		 * something is broken - just return more important mode.
515 		 */
516 		if (decode_lock_type(response[i].response) > *mode)
517 			*mode = decode_lock_type(response[i].response);
518 
519 		log_debug("Lock held for %s, node %s : %s", resource,
520 			  response[i].node, response[i].response);
521 	}
522 
523 	_cluster_free_request(response, num_responses);
524 	errno = saved_errno;
525 
526 	return status;
527 }
528 
529 #ifdef CLUSTER_LOCKING_INTERNAL
530 static void _locking_end(void)
531 #else
532 void locking_end(void)
533 #endif
534 {
535 	if (_clvmd_sock != -1 && close(_clvmd_sock))
536 		stack;
537 
538 	_clvmd_sock = -1;
539 }
540 
541 #ifdef CLUSTER_LOCKING_INTERNAL
542 static void _reset_locking(void)
543 #else
544 void reset_locking(void)
545 #endif
546 {
547 	if (close(_clvmd_sock))
548 		stack;
549 
550 	_clvmd_sock = _open_local_sock();
551 	if (_clvmd_sock == -1)
552 		stack;
553 }
554 
555 #ifdef CLUSTER_LOCKING_INTERNAL
556 int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd)
557 {
558 	locking->lock_resource = _lock_resource;
559 	locking->query_resource = _query_resource;
560 	locking->fin_locking = _locking_end;
561 	locking->reset_locking = _reset_locking;
562 	locking->flags = LCK_PRE_MEMLOCK | LCK_CLUSTERED;
563 
564 	_clvmd_sock = _open_local_sock();
565 	if (_clvmd_sock == -1)
566 		return 0;
567 
568 	return 1;
569 }
570 #else
571 int locking_init(int type, struct config_tree *cf, uint32_t *flags)
572 {
573 	_clvmd_sock = _open_local_sock();
574 	if (_clvmd_sock == -1)
575 		return 0;
576 
577 	/* Ask LVM to lock memory before calling us */
578 	*flags |= LCK_PRE_MEMLOCK;
579 	*flags |= LCK_CLUSTERED;
580 
581 	return 1;
582 }
583 #endif
584