1 /* $NetBSD: local.c,v 1.1.1.1 2009/12/02 00:27:10 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. 5 * 6 * This copyrighted material is made available to anyone wishing to use, 7 * modify, copy, or redistribute it subject to the terms and conditions 8 * of the GNU Lesser General Public License v.2.1. 9 * 10 * You should have received a copy of the GNU Lesser General Public License 11 * along with this program; if not, write to the Free Software Foundation, 12 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 13 */ 14 #include <unistd.h> 15 #include <errno.h> 16 #include <string.h> 17 #include <stdint.h> 18 #include <sys/types.h> 19 #include <sys/socket.h> 20 #include <sys/poll.h> 21 #include <linux/connector.h> 22 #include <linux/netlink.h> 23 24 #include "dm-log-userspace.h" 25 #include "functions.h" 26 #include "cluster.h" 27 #include "common.h" 28 #include "logging.h" 29 #include "link_mon.h" 30 #include "local.h" 31 32 #ifndef CN_IDX_DM 33 #warning Kernel should be at least 2.6.31 34 #define CN_IDX_DM 0x7 /* Device Mapper */ 35 #define CN_VAL_DM_USERSPACE_LOG 0x1 36 #endif 37 38 static int cn_fd; /* Connector (netlink) socket fd */ 39 static char recv_buf[2048]; 40 static char send_buf[2048]; 41 42 43 /* FIXME: merge this function with kernel_send_helper */ 44 static int kernel_ack(uint32_t seq, int error) 45 { 46 int r; 47 struct nlmsghdr *nlh = (struct nlmsghdr *)send_buf; 48 struct cn_msg *msg = NLMSG_DATA(nlh); 49 50 if (error < 0) { 51 LOG_ERROR("Programmer error: error codes must be positive"); 52 return -EINVAL; 53 } 54 55 memset(send_buf, 0, sizeof(send_buf)); 56 57 nlh->nlmsg_seq = 0; 58 nlh->nlmsg_pid = getpid(); 59 nlh->nlmsg_type = NLMSG_DONE; 60 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct cn_msg)); 61 nlh->nlmsg_flags = 0; 62 63 msg->len = 0; 64 msg->id.idx = CN_IDX_DM; 65 msg->id.val = CN_VAL_DM_USERSPACE_LOG; 66 msg->seq = seq; 67 msg->ack = error; 68 69 r = send(cn_fd, nlh, NLMSG_LENGTH(sizeof(struct cn_msg)), 0); 70 /* FIXME: do better error processing */ 71 if (r <= 0) 72 return -EBADE; 73 74 return 0; 75 } 76 77 78 /* 79 * kernel_recv 80 * @rq: the newly allocated request from kernel 81 * 82 * Read requests from the kernel and allocate space for the new request. 83 * If there is no request from the kernel, *rq is NULL. 84 * 85 * This function is not thread safe due to returned stack pointer. In fact, 86 * the returned pointer must not be in-use when this function is called again. 87 * 88 * Returns: 0 on success, -EXXX on error 89 */ 90 static int kernel_recv(struct clog_request **rq) 91 { 92 int r = 0; 93 int len; 94 struct cn_msg *msg; 95 struct dm_ulog_request *u_rq; 96 97 *rq = NULL; 98 memset(recv_buf, 0, sizeof(recv_buf)); 99 100 len = recv(cn_fd, recv_buf, sizeof(recv_buf), 0); 101 if (len < 0) { 102 LOG_ERROR("Failed to recv message from kernel"); 103 r = -errno; 104 goto fail; 105 } 106 107 switch (((struct nlmsghdr *)recv_buf)->nlmsg_type) { 108 case NLMSG_ERROR: 109 LOG_ERROR("Unable to recv message from kernel: NLMSG_ERROR"); 110 r = -EBADE; 111 goto fail; 112 case NLMSG_DONE: 113 msg = (struct cn_msg *)NLMSG_DATA((struct nlmsghdr *)recv_buf); 114 len -= sizeof(struct nlmsghdr); 115 116 if (len < sizeof(struct cn_msg)) { 117 LOG_ERROR("Incomplete request from kernel received"); 118 r = -EBADE; 119 goto fail; 120 } 121 122 if (msg->len > DM_ULOG_REQUEST_SIZE) { 123 LOG_ERROR("Not enough space to receive kernel request (%d/%d)", 124 msg->len, DM_ULOG_REQUEST_SIZE); 125 r = -EBADE; 126 goto fail; 127 } 128 129 if (!msg->len) 130 LOG_ERROR("Zero length message received"); 131 132 len -= sizeof(struct cn_msg); 133 134 if (len < msg->len) 135 LOG_ERROR("len = %d, msg->len = %d", len, msg->len); 136 137 msg->data[msg->len] = '\0'; /* Cleaner way to ensure this? */ 138 u_rq = (struct dm_ulog_request *)msg->data; 139 140 if (!u_rq->request_type) { 141 LOG_DBG("Bad transmission, requesting resend [%u]", 142 msg->seq); 143 r = -EAGAIN; 144 145 if (kernel_ack(msg->seq, EAGAIN)) { 146 LOG_ERROR("Failed to NACK kernel transmission [%u]", 147 msg->seq); 148 r = -EBADE; 149 } 150 } 151 152 /* 153 * Now we've got sizeof(struct cn_msg) + sizeof(struct nlmsghdr) 154 * worth of space that precede the request structure from the 155 * kernel. Since that space isn't going to be used again, we 156 * can take it for our purposes; rather than allocating a whole 157 * new structure and doing a memcpy. 158 * 159 * We should really make sure 'clog_request' doesn't grow 160 * beyond what is available to us, but we need only check it 161 * once... perhaps at compile time? 162 */ 163 // *rq = container_of(u_rq, struct clog_request, u_rq); 164 *rq = (void *)u_rq - 165 (sizeof(struct clog_request) - 166 sizeof(struct dm_ulog_request)); 167 168 /* Clear the wrapper container fields */ 169 memset(*rq, 0, (void *)u_rq - (void *)(*rq)); 170 break; 171 default: 172 LOG_ERROR("Unknown nlmsg_type"); 173 r = -EBADE; 174 } 175 176 fail: 177 if (r) 178 *rq = NULL; 179 180 return (r == -EAGAIN) ? 0 : r; 181 } 182 183 static int kernel_send_helper(void *data, int out_size) 184 { 185 int r; 186 struct nlmsghdr *nlh; 187 struct cn_msg *msg; 188 189 memset(send_buf, 0, sizeof(send_buf)); 190 191 nlh = (struct nlmsghdr *)send_buf; 192 nlh->nlmsg_seq = 0; /* FIXME: Is this used? */ 193 nlh->nlmsg_pid = getpid(); 194 nlh->nlmsg_type = NLMSG_DONE; 195 nlh->nlmsg_len = NLMSG_LENGTH(out_size + sizeof(struct cn_msg)); 196 nlh->nlmsg_flags = 0; 197 198 msg = NLMSG_DATA(nlh); 199 memcpy(msg->data, data, out_size); 200 msg->len = out_size; 201 msg->id.idx = CN_IDX_DM; 202 msg->id.val = CN_VAL_DM_USERSPACE_LOG; 203 msg->seq = 0; 204 205 r = send(cn_fd, nlh, NLMSG_LENGTH(out_size + sizeof(struct cn_msg)), 0); 206 /* FIXME: do better error processing */ 207 if (r <= 0) 208 return -EBADE; 209 210 return 0; 211 } 212 213 /* 214 * do_local_work 215 * 216 * Any processing errors are placed in the 'rq' 217 * structure to be reported back to the kernel. 218 * It may be pointless for this function to 219 * return an int. 220 * 221 * Returns: 0 on success, -EXXX on failure 222 */ 223 static int do_local_work(void *data) 224 { 225 int r; 226 struct clog_request *rq; 227 struct dm_ulog_request *u_rq = NULL; 228 229 r = kernel_recv(&rq); 230 if (r) 231 return r; 232 233 if (!rq) 234 return 0; 235 236 u_rq = &rq->u_rq; 237 LOG_DBG("[%s] Request from kernel received: [%s/%u]", 238 SHORT_UUID(u_rq->uuid), RQ_TYPE(u_rq->request_type), 239 u_rq->seq); 240 switch (u_rq->request_type) { 241 case DM_ULOG_CTR: 242 case DM_ULOG_DTR: 243 case DM_ULOG_GET_REGION_SIZE: 244 case DM_ULOG_IN_SYNC: 245 case DM_ULOG_GET_SYNC_COUNT: 246 case DM_ULOG_STATUS_INFO: 247 case DM_ULOG_STATUS_TABLE: 248 case DM_ULOG_PRESUSPEND: 249 /* We do not specify ourselves as server here */ 250 r = do_request(rq, 0); 251 if (r) 252 LOG_DBG("Returning failed request to kernel [%s]", 253 RQ_TYPE(u_rq->request_type)); 254 r = kernel_send(u_rq); 255 if (r) 256 LOG_ERROR("Failed to respond to kernel [%s]", 257 RQ_TYPE(u_rq->request_type)); 258 259 break; 260 case DM_ULOG_RESUME: 261 /* 262 * Resume is a special case that requires a local 263 * component to join the CPG, and a cluster component 264 * to handle the request. 265 */ 266 r = local_resume(u_rq); 267 if (r) { 268 LOG_DBG("Returning failed request to kernel [%s]", 269 RQ_TYPE(u_rq->request_type)); 270 r = kernel_send(u_rq); 271 if (r) 272 LOG_ERROR("Failed to respond to kernel [%s]", 273 RQ_TYPE(u_rq->request_type)); 274 break; 275 } 276 /* ELSE, fall through */ 277 case DM_ULOG_IS_CLEAN: 278 case DM_ULOG_FLUSH: 279 case DM_ULOG_MARK_REGION: 280 case DM_ULOG_GET_RESYNC_WORK: 281 case DM_ULOG_SET_REGION_SYNC: 282 case DM_ULOG_IS_REMOTE_RECOVERING: 283 case DM_ULOG_POSTSUSPEND: 284 r = cluster_send(rq); 285 if (r) { 286 u_rq->data_size = 0; 287 u_rq->error = r; 288 kernel_send(u_rq); 289 } 290 291 break; 292 case DM_ULOG_CLEAR_REGION: 293 r = kernel_ack(u_rq->seq, 0); 294 295 r = cluster_send(rq); 296 if (r) { 297 /* 298 * FIXME: store error for delivery on flush 299 * This would allow us to optimize MARK_REGION 300 * too. 301 */ 302 } 303 304 break; 305 default: 306 LOG_ERROR("Invalid log request received (%u), ignoring.", 307 u_rq->request_type); 308 309 return 0; 310 } 311 312 if (r && !u_rq->error) 313 u_rq->error = r; 314 315 return r; 316 } 317 318 /* 319 * kernel_send 320 * @u_rq: result to pass back to kernel 321 * 322 * This function returns the u_rq structure 323 * (containing the results) to the kernel. 324 * It then frees the structure. 325 * 326 * WARNING: should the structure be freed if 327 * there is an error? I vote 'yes'. If the 328 * kernel doesn't get the response, it should 329 * resend the request. 330 * 331 * Returns: 0 on success, -EXXX on failure 332 */ 333 int kernel_send(struct dm_ulog_request *u_rq) 334 { 335 int r; 336 int size; 337 338 if (!u_rq) 339 return -EINVAL; 340 341 size = sizeof(struct dm_ulog_request) + u_rq->data_size; 342 343 if (!u_rq->data_size && !u_rq->error) { 344 /* An ACK is all that is needed */ 345 346 /* FIXME: add ACK code */ 347 } else if (size > DM_ULOG_REQUEST_SIZE) { 348 /* 349 * If we gotten here, we've already overrun 350 * our allotted space somewhere. 351 * 352 * We must do something, because the kernel 353 * is waiting for a response. 354 */ 355 LOG_ERROR("Not enough space to respond to server"); 356 u_rq->error = -ENOSPC; 357 size = sizeof(struct dm_ulog_request); 358 } 359 360 r = kernel_send_helper(u_rq, size); 361 if (r) 362 LOG_ERROR("Failed to send msg to kernel."); 363 364 return r; 365 } 366 367 /* 368 * init_local 369 * 370 * Initialize kernel communication socket (netlink) 371 * 372 * Returns: 0 on success, values from common.h on failure 373 */ 374 int init_local(void) 375 { 376 int r = 0; 377 int opt; 378 struct sockaddr_nl addr; 379 380 cn_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); 381 if (cn_fd < 0) 382 return EXIT_KERNEL_SOCKET; 383 384 /* memset to fix valgrind complaint */ 385 memset(&addr, 0, sizeof(struct sockaddr_nl)); 386 387 addr.nl_family = AF_NETLINK; 388 addr.nl_groups = CN_IDX_DM; 389 addr.nl_pid = 0; 390 391 r = bind(cn_fd, (struct sockaddr *) &addr, sizeof(addr)); 392 if (r < 0) { 393 close(cn_fd); 394 return EXIT_KERNEL_BIND; 395 } 396 397 opt = addr.nl_groups; 398 r = setsockopt(cn_fd, 270, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt)); 399 if (r) { 400 close(cn_fd); 401 return EXIT_KERNEL_SETSOCKOPT; 402 } 403 404 /* 405 r = fcntl(cn_fd, F_SETFL, FNDELAY); 406 */ 407 408 links_register(cn_fd, "local", do_local_work, NULL); 409 410 return 0; 411 } 412 413 /* 414 * cleanup_local 415 * 416 * Clean up before exiting 417 */ 418 void cleanup_local(void) 419 { 420 links_unregister(cn_fd); 421 close(cn_fd); 422 } 423