1 /*	$NetBSD: local.c,v 1.1.1.1 2009/12/02 00:27:10 haad Exp $	*/
2 
3 /*
4  * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
5  *
6  * This copyrighted material is made available to anyone wishing to use,
7  * modify, copy, or redistribute it subject to the terms and conditions
8  * of the GNU Lesser General Public License v.2.1.
9  *
10  * You should have received a copy of the GNU Lesser General Public License
11  * along with this program; if not, write to the Free Software Foundation,
12  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
13  */
14 #include <unistd.h>
15 #include <errno.h>
16 #include <string.h>
17 #include <stdint.h>
18 #include <sys/types.h>
19 #include <sys/socket.h>
20 #include <sys/poll.h>
21 #include <linux/connector.h>
22 #include <linux/netlink.h>
23 
24 #include "dm-log-userspace.h"
25 #include "functions.h"
26 #include "cluster.h"
27 #include "common.h"
28 #include "logging.h"
29 #include "link_mon.h"
30 #include "local.h"
31 
32 #ifndef CN_IDX_DM
33 #warning Kernel should be at least 2.6.31
34 #define CN_IDX_DM                       0x7     /* Device Mapper */
35 #define CN_VAL_DM_USERSPACE_LOG         0x1
36 #endif
37 
38 static int cn_fd;  /* Connector (netlink) socket fd */
39 static char recv_buf[2048];
40 static char send_buf[2048];
41 
42 
43 /* FIXME: merge this function with kernel_send_helper */
44 static int kernel_ack(uint32_t seq, int error)
45 {
46 	int r;
47 	struct nlmsghdr *nlh = (struct nlmsghdr *)send_buf;
48 	struct cn_msg *msg = NLMSG_DATA(nlh);
49 
50 	if (error < 0) {
51 		LOG_ERROR("Programmer error: error codes must be positive");
52 		return -EINVAL;
53 	}
54 
55 	memset(send_buf, 0, sizeof(send_buf));
56 
57 	nlh->nlmsg_seq = 0;
58 	nlh->nlmsg_pid = getpid();
59 	nlh->nlmsg_type = NLMSG_DONE;
60 	nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct cn_msg));
61 	nlh->nlmsg_flags = 0;
62 
63 	msg->len = 0;
64 	msg->id.idx = CN_IDX_DM;
65 	msg->id.val = CN_VAL_DM_USERSPACE_LOG;
66 	msg->seq = seq;
67 	msg->ack = error;
68 
69 	r = send(cn_fd, nlh, NLMSG_LENGTH(sizeof(struct cn_msg)), 0);
70 	/* FIXME: do better error processing */
71 	if (r <= 0)
72 		return -EBADE;
73 
74 	return 0;
75 }
76 
77 
78 /*
79  * kernel_recv
80  * @rq: the newly allocated request from kernel
81  *
82  * Read requests from the kernel and allocate space for the new request.
83  * If there is no request from the kernel, *rq is NULL.
84  *
85  * This function is not thread safe due to returned stack pointer.  In fact,
86  * the returned pointer must not be in-use when this function is called again.
87  *
88  * Returns: 0 on success, -EXXX on error
89  */
90 static int kernel_recv(struct clog_request **rq)
91 {
92 	int r = 0;
93 	int len;
94 	struct cn_msg *msg;
95 	struct dm_ulog_request *u_rq;
96 
97 	*rq = NULL;
98 	memset(recv_buf, 0, sizeof(recv_buf));
99 
100 	len = recv(cn_fd, recv_buf, sizeof(recv_buf), 0);
101 	if (len < 0) {
102 		LOG_ERROR("Failed to recv message from kernel");
103 		r = -errno;
104 		goto fail;
105 	}
106 
107 	switch (((struct nlmsghdr *)recv_buf)->nlmsg_type) {
108 	case NLMSG_ERROR:
109 		LOG_ERROR("Unable to recv message from kernel: NLMSG_ERROR");
110 		r = -EBADE;
111 		goto fail;
112 	case NLMSG_DONE:
113 		msg = (struct cn_msg *)NLMSG_DATA((struct nlmsghdr *)recv_buf);
114 		len -= sizeof(struct nlmsghdr);
115 
116 		if (len < sizeof(struct cn_msg)) {
117 			LOG_ERROR("Incomplete request from kernel received");
118 			r = -EBADE;
119 			goto fail;
120 		}
121 
122 		if (msg->len > DM_ULOG_REQUEST_SIZE) {
123 			LOG_ERROR("Not enough space to receive kernel request (%d/%d)",
124 				  msg->len, DM_ULOG_REQUEST_SIZE);
125 			r = -EBADE;
126 			goto fail;
127 		}
128 
129 		if (!msg->len)
130 			LOG_ERROR("Zero length message received");
131 
132 		len -= sizeof(struct cn_msg);
133 
134 		if (len < msg->len)
135 			LOG_ERROR("len = %d, msg->len = %d", len, msg->len);
136 
137 		msg->data[msg->len] = '\0'; /* Cleaner way to ensure this? */
138 		u_rq = (struct dm_ulog_request *)msg->data;
139 
140 		if (!u_rq->request_type) {
141 			LOG_DBG("Bad transmission, requesting resend [%u]",
142 				msg->seq);
143 			r = -EAGAIN;
144 
145 			if (kernel_ack(msg->seq, EAGAIN)) {
146 				LOG_ERROR("Failed to NACK kernel transmission [%u]",
147 					  msg->seq);
148 				r = -EBADE;
149 			}
150 		}
151 
152 		/*
153 		 * Now we've got sizeof(struct cn_msg) + sizeof(struct nlmsghdr)
154 		 * worth of space that precede the request structure from the
155 		 * kernel.  Since that space isn't going to be used again, we
156 		 * can take it for our purposes; rather than allocating a whole
157 		 * new structure and doing a memcpy.
158 		 *
159 		 * We should really make sure 'clog_request' doesn't grow
160 		 * beyond what is available to us, but we need only check it
161 		 * once... perhaps at compile time?
162 		 */
163 //		*rq = container_of(u_rq, struct clog_request, u_rq);
164 		*rq = (void *)u_rq -
165 			(sizeof(struct clog_request) -
166 			 sizeof(struct dm_ulog_request));
167 
168 		/* Clear the wrapper container fields */
169 		memset(*rq, 0, (void *)u_rq - (void *)(*rq));
170 		break;
171 	default:
172 		LOG_ERROR("Unknown nlmsg_type");
173 		r = -EBADE;
174 	}
175 
176 fail:
177 	if (r)
178 		*rq = NULL;
179 
180 	return (r == -EAGAIN) ? 0 : r;
181 }
182 
183 static int kernel_send_helper(void *data, int out_size)
184 {
185 	int r;
186 	struct nlmsghdr *nlh;
187 	struct cn_msg *msg;
188 
189 	memset(send_buf, 0, sizeof(send_buf));
190 
191 	nlh = (struct nlmsghdr *)send_buf;
192 	nlh->nlmsg_seq = 0;  /* FIXME: Is this used? */
193 	nlh->nlmsg_pid = getpid();
194 	nlh->nlmsg_type = NLMSG_DONE;
195 	nlh->nlmsg_len = NLMSG_LENGTH(out_size + sizeof(struct cn_msg));
196 	nlh->nlmsg_flags = 0;
197 
198 	msg = NLMSG_DATA(nlh);
199 	memcpy(msg->data, data, out_size);
200 	msg->len = out_size;
201 	msg->id.idx = CN_IDX_DM;
202 	msg->id.val = CN_VAL_DM_USERSPACE_LOG;
203 	msg->seq = 0;
204 
205 	r = send(cn_fd, nlh, NLMSG_LENGTH(out_size + sizeof(struct cn_msg)), 0);
206 	/* FIXME: do better error processing */
207 	if (r <= 0)
208 		return -EBADE;
209 
210 	return 0;
211 }
212 
213 /*
214  * do_local_work
215  *
216  * Any processing errors are placed in the 'rq'
217  * structure to be reported back to the kernel.
218  * It may be pointless for this function to
219  * return an int.
220  *
221  * Returns: 0 on success, -EXXX on failure
222  */
223 static int do_local_work(void *data)
224 {
225 	int r;
226 	struct clog_request *rq;
227 	struct dm_ulog_request *u_rq = NULL;
228 
229 	r = kernel_recv(&rq);
230 	if (r)
231 		return r;
232 
233 	if (!rq)
234 		return 0;
235 
236 	u_rq = &rq->u_rq;
237 	LOG_DBG("[%s]  Request from kernel received: [%s/%u]",
238 		SHORT_UUID(u_rq->uuid), RQ_TYPE(u_rq->request_type),
239 		u_rq->seq);
240 	switch (u_rq->request_type) {
241 	case DM_ULOG_CTR:
242 	case DM_ULOG_DTR:
243 	case DM_ULOG_GET_REGION_SIZE:
244 	case DM_ULOG_IN_SYNC:
245 	case DM_ULOG_GET_SYNC_COUNT:
246 	case DM_ULOG_STATUS_INFO:
247 	case DM_ULOG_STATUS_TABLE:
248 	case DM_ULOG_PRESUSPEND:
249 		/* We do not specify ourselves as server here */
250 		r = do_request(rq, 0);
251 		if (r)
252 			LOG_DBG("Returning failed request to kernel [%s]",
253 				RQ_TYPE(u_rq->request_type));
254 		r = kernel_send(u_rq);
255 		if (r)
256 			LOG_ERROR("Failed to respond to kernel [%s]",
257 				  RQ_TYPE(u_rq->request_type));
258 
259 		break;
260 	case DM_ULOG_RESUME:
261 		/*
262 		 * Resume is a special case that requires a local
263 		 * component to join the CPG, and a cluster component
264 		 * to handle the request.
265 		 */
266 		r = local_resume(u_rq);
267 		if (r) {
268 			LOG_DBG("Returning failed request to kernel [%s]",
269 				RQ_TYPE(u_rq->request_type));
270 			r = kernel_send(u_rq);
271 			if (r)
272 				LOG_ERROR("Failed to respond to kernel [%s]",
273 					  RQ_TYPE(u_rq->request_type));
274 			break;
275 		}
276 		/* ELSE, fall through */
277 	case DM_ULOG_IS_CLEAN:
278 	case DM_ULOG_FLUSH:
279 	case DM_ULOG_MARK_REGION:
280 	case DM_ULOG_GET_RESYNC_WORK:
281 	case DM_ULOG_SET_REGION_SYNC:
282 	case DM_ULOG_IS_REMOTE_RECOVERING:
283 	case DM_ULOG_POSTSUSPEND:
284 		r = cluster_send(rq);
285 		if (r) {
286 			u_rq->data_size = 0;
287 			u_rq->error = r;
288 			kernel_send(u_rq);
289 		}
290 
291 		break;
292 	case DM_ULOG_CLEAR_REGION:
293 		r = kernel_ack(u_rq->seq, 0);
294 
295 		r = cluster_send(rq);
296 		if (r) {
297 			/*
298 			 * FIXME: store error for delivery on flush
299 			 *        This would allow us to optimize MARK_REGION
300 			 *        too.
301 			 */
302 		}
303 
304 		break;
305 	default:
306 		LOG_ERROR("Invalid log request received (%u), ignoring.",
307 			  u_rq->request_type);
308 
309 		return 0;
310 	}
311 
312 	if (r && !u_rq->error)
313 		u_rq->error = r;
314 
315 	return r;
316 }
317 
318 /*
319  * kernel_send
320  * @u_rq: result to pass back to kernel
321  *
322  * This function returns the u_rq structure
323  * (containing the results) to the kernel.
324  * It then frees the structure.
325  *
326  * WARNING: should the structure be freed if
327  * there is an error?  I vote 'yes'.  If the
328  * kernel doesn't get the response, it should
329  * resend the request.
330  *
331  * Returns: 0 on success, -EXXX on failure
332  */
333 int kernel_send(struct dm_ulog_request *u_rq)
334 {
335 	int r;
336 	int size;
337 
338 	if (!u_rq)
339 		return -EINVAL;
340 
341 	size = sizeof(struct dm_ulog_request) + u_rq->data_size;
342 
343 	if (!u_rq->data_size && !u_rq->error) {
344 		/* An ACK is all that is needed */
345 
346 		/* FIXME: add ACK code */
347 	} else if (size > DM_ULOG_REQUEST_SIZE) {
348 		/*
349 		 * If we gotten here, we've already overrun
350 		 * our allotted space somewhere.
351 		 *
352 		 * We must do something, because the kernel
353 		 * is waiting for a response.
354 		 */
355 		LOG_ERROR("Not enough space to respond to server");
356 		u_rq->error = -ENOSPC;
357 		size = sizeof(struct dm_ulog_request);
358 	}
359 
360 	r = kernel_send_helper(u_rq, size);
361 	if (r)
362 		LOG_ERROR("Failed to send msg to kernel.");
363 
364 	return r;
365 }
366 
367 /*
368  * init_local
369  *
370  * Initialize kernel communication socket (netlink)
371  *
372  * Returns: 0 on success, values from common.h on failure
373  */
374 int init_local(void)
375 {
376 	int r = 0;
377 	int opt;
378 	struct sockaddr_nl addr;
379 
380 	cn_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
381 	if (cn_fd < 0)
382 		return EXIT_KERNEL_SOCKET;
383 
384 	/* memset to fix valgrind complaint */
385 	memset(&addr, 0, sizeof(struct sockaddr_nl));
386 
387 	addr.nl_family = AF_NETLINK;
388 	addr.nl_groups = CN_IDX_DM;
389 	addr.nl_pid = 0;
390 
391 	r = bind(cn_fd, (struct sockaddr *) &addr, sizeof(addr));
392 	if (r < 0) {
393 		close(cn_fd);
394 		return EXIT_KERNEL_BIND;
395 	}
396 
397 	opt = addr.nl_groups;
398 	r = setsockopt(cn_fd, 270, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
399 	if (r) {
400 		close(cn_fd);
401 		return EXIT_KERNEL_SETSOCKOPT;
402 	}
403 
404 	/*
405 	r = fcntl(cn_fd, F_SETFL, FNDELAY);
406 	*/
407 
408 	links_register(cn_fd, "local", do_local_work, NULL);
409 
410 	return 0;
411 }
412 
413 /*
414  * cleanup_local
415  *
416  * Clean up before exiting
417  */
418 void cleanup_local(void)
419 {
420 	links_unregister(cn_fd);
421 	close(cn_fd);
422 }
423