1 /* 2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 3 * 4 * Network Block Device Client Side 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qapi/error.h" 21 #include "nbd-internal.h" 22 23 static int nbd_errno_to_system_errno(int err) 24 { 25 switch (err) { 26 case NBD_SUCCESS: 27 return 0; 28 case NBD_EPERM: 29 return EPERM; 30 case NBD_EIO: 31 return EIO; 32 case NBD_ENOMEM: 33 return ENOMEM; 34 case NBD_ENOSPC: 35 return ENOSPC; 36 case NBD_EINVAL: 37 default: 38 return EINVAL; 39 } 40 } 41 42 /* Definitions for opaque data types */ 43 44 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 45 46 /* That's all folks */ 47 48 /* Basic flow for negotiation 49 50 Server Client 51 Negotiate 52 53 or 54 55 Server Client 56 Negotiate #1 57 Option 58 Negotiate #2 59 60 ---- 61 62 followed by 63 64 Server Client 65 Request 66 Response 67 Request 68 Response 69 ... 70 ... 71 Request (type == 2) 72 73 */ 74 75 76 /* If type represents success, return 1 without further action. 77 * If type represents an error reply, consume the rest of the packet on ioc. 78 * Then return 0 for unsupported (so the client can fall back to 79 * other approaches), or -1 with errp set for other errors. 80 */ 81 static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type, 82 Error **errp) 83 { 84 uint32_t len; 85 char *msg = NULL; 86 int result = -1; 87 88 if (!(type & (1 << 31))) { 89 return 1; 90 } 91 92 if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) { 93 error_setg(errp, "failed to read option length"); 94 return -1; 95 } 96 len = be32_to_cpu(len); 97 if (len) { 98 if (len > NBD_MAX_BUFFER_SIZE) { 99 error_setg(errp, "server's error message is too long"); 100 goto cleanup; 101 } 102 msg = g_malloc(len + 1); 103 if (read_sync(ioc, msg, len) != len) { 104 error_setg(errp, "failed to read option error message"); 105 goto cleanup; 106 } 107 msg[len] = '\0'; 108 } 109 110 switch (type) { 111 case NBD_REP_ERR_UNSUP: 112 TRACE("server doesn't understand request %d, attempting fallback", 113 opt); 114 result = 0; 115 goto cleanup; 116 117 case NBD_REP_ERR_POLICY: 118 error_setg(errp, "Denied by server for option %x", opt); 119 break; 120 121 case NBD_REP_ERR_INVALID: 122 error_setg(errp, "Invalid data length for option %x", opt); 123 break; 124 125 case NBD_REP_ERR_TLS_REQD: 126 error_setg(errp, "TLS negotiation required before option %x", opt); 127 break; 128 129 default: 130 error_setg(errp, "Unknown error code when asking for option %x", opt); 131 break; 132 } 133 134 if (msg) { 135 error_append_hint(errp, "%s\n", msg); 136 } 137 138 cleanup: 139 g_free(msg); 140 return result; 141 } 142 143 static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) 144 { 145 uint64_t magic; 146 uint32_t opt; 147 uint32_t type; 148 uint32_t len; 149 uint32_t namelen; 150 int error; 151 152 *name = NULL; 153 if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 154 error_setg(errp, "failed to read list option magic"); 155 return -1; 156 } 157 magic = be64_to_cpu(magic); 158 if (magic != NBD_REP_MAGIC) { 159 error_setg(errp, "Unexpected option list magic"); 160 return -1; 161 } 162 if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 163 error_setg(errp, "failed to read list option"); 164 return -1; 165 } 166 opt = be32_to_cpu(opt); 167 if (opt != NBD_OPT_LIST) { 168 error_setg(errp, "Unexpected option type %x expected %x", 169 opt, NBD_OPT_LIST); 170 return -1; 171 } 172 173 if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) { 174 error_setg(errp, "failed to read list option type"); 175 return -1; 176 } 177 type = be32_to_cpu(type); 178 error = nbd_handle_reply_err(ioc, opt, type, errp); 179 if (error <= 0) { 180 return error; 181 } 182 183 if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) { 184 error_setg(errp, "failed to read option length"); 185 return -1; 186 } 187 len = be32_to_cpu(len); 188 189 if (type == NBD_REP_ACK) { 190 if (len != 0) { 191 error_setg(errp, "length too long for option end"); 192 return -1; 193 } 194 } else if (type == NBD_REP_SERVER) { 195 if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { 196 error_setg(errp, "failed to read option name length"); 197 return -1; 198 } 199 namelen = be32_to_cpu(namelen); 200 if (len != (namelen + sizeof(namelen))) { 201 error_setg(errp, "incorrect option mame length"); 202 return -1; 203 } 204 if (namelen > 255) { 205 error_setg(errp, "export name length too long %d", namelen); 206 return -1; 207 } 208 209 *name = g_new0(char, namelen + 1); 210 if (read_sync(ioc, *name, namelen) != namelen) { 211 error_setg(errp, "failed to read export name"); 212 g_free(*name); 213 *name = NULL; 214 return -1; 215 } 216 (*name)[namelen] = '\0'; 217 } else { 218 error_setg(errp, "Unexpected reply type %x expected %x", 219 type, NBD_REP_SERVER); 220 return -1; 221 } 222 return 1; 223 } 224 225 226 static int nbd_receive_query_exports(QIOChannel *ioc, 227 const char *wantname, 228 Error **errp) 229 { 230 uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC); 231 uint32_t opt = cpu_to_be32(NBD_OPT_LIST); 232 uint32_t length = 0; 233 bool foundExport = false; 234 235 TRACE("Querying export list"); 236 if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 237 error_setg(errp, "Failed to send list option magic"); 238 return -1; 239 } 240 241 if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 242 error_setg(errp, "Failed to send list option number"); 243 return -1; 244 } 245 246 if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) { 247 error_setg(errp, "Failed to send list option length"); 248 return -1; 249 } 250 251 TRACE("Reading available export names"); 252 while (1) { 253 char *name = NULL; 254 int ret = nbd_receive_list(ioc, &name, errp); 255 256 if (ret < 0) { 257 g_free(name); 258 name = NULL; 259 return -1; 260 } 261 if (ret == 0) { 262 /* Server doesn't support export listing, so 263 * we will just assume an export with our 264 * wanted name exists */ 265 foundExport = true; 266 break; 267 } 268 if (name == NULL) { 269 TRACE("End of export name list"); 270 break; 271 } 272 if (g_str_equal(name, wantname)) { 273 foundExport = true; 274 TRACE("Found desired export name '%s'", name); 275 } else { 276 TRACE("Ignored export name '%s'", name); 277 } 278 g_free(name); 279 } 280 281 if (!foundExport) { 282 error_setg(errp, "No export with name '%s' available", wantname); 283 return -1; 284 } 285 286 return 0; 287 } 288 289 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, 290 QCryptoTLSCreds *tlscreds, 291 const char *hostname, Error **errp) 292 { 293 uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC); 294 uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS); 295 uint32_t length = 0; 296 uint32_t type; 297 QIOChannelTLS *tioc; 298 struct NBDTLSHandshakeData data = { 0 }; 299 300 TRACE("Requesting TLS from server"); 301 if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 302 error_setg(errp, "Failed to send option magic"); 303 return NULL; 304 } 305 306 if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 307 error_setg(errp, "Failed to send option number"); 308 return NULL; 309 } 310 311 if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) { 312 error_setg(errp, "Failed to send option length"); 313 return NULL; 314 } 315 316 TRACE("Getting TLS reply from server1"); 317 if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 318 error_setg(errp, "failed to read option magic"); 319 return NULL; 320 } 321 magic = be64_to_cpu(magic); 322 if (magic != NBD_REP_MAGIC) { 323 error_setg(errp, "Unexpected option magic"); 324 return NULL; 325 } 326 TRACE("Getting TLS reply from server2"); 327 if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 328 error_setg(errp, "failed to read option"); 329 return NULL; 330 } 331 opt = be32_to_cpu(opt); 332 if (opt != NBD_OPT_STARTTLS) { 333 error_setg(errp, "Unexpected option type %x expected %x", 334 opt, NBD_OPT_STARTTLS); 335 return NULL; 336 } 337 338 TRACE("Getting TLS reply from server"); 339 if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) { 340 error_setg(errp, "failed to read option type"); 341 return NULL; 342 } 343 type = be32_to_cpu(type); 344 if (type != NBD_REP_ACK) { 345 error_setg(errp, "Server rejected request to start TLS %x", 346 type); 347 return NULL; 348 } 349 350 TRACE("Getting TLS reply from server"); 351 if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) { 352 error_setg(errp, "failed to read option length"); 353 return NULL; 354 } 355 length = be32_to_cpu(length); 356 if (length != 0) { 357 error_setg(errp, "Start TLS reponse was not zero %x", 358 length); 359 return NULL; 360 } 361 362 TRACE("TLS request approved, setting up TLS"); 363 tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp); 364 if (!tioc) { 365 return NULL; 366 } 367 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 368 TRACE("Starting TLS hanshake"); 369 qio_channel_tls_handshake(tioc, 370 nbd_tls_handshake, 371 &data, 372 NULL); 373 374 if (!data.complete) { 375 g_main_loop_run(data.loop); 376 } 377 g_main_loop_unref(data.loop); 378 if (data.error) { 379 error_propagate(errp, data.error); 380 object_unref(OBJECT(tioc)); 381 return NULL; 382 } 383 384 return QIO_CHANNEL(tioc); 385 } 386 387 388 int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, 389 QCryptoTLSCreds *tlscreds, const char *hostname, 390 QIOChannel **outioc, 391 off_t *size, Error **errp) 392 { 393 char buf[256]; 394 uint64_t magic, s; 395 int rc; 396 397 TRACE("Receiving negotiation tlscreds=%p hostname=%s.", 398 tlscreds, hostname ? hostname : "<null>"); 399 400 rc = -EINVAL; 401 402 if (outioc) { 403 *outioc = NULL; 404 } 405 if (tlscreds && !outioc) { 406 error_setg(errp, "Output I/O channel required for TLS"); 407 goto fail; 408 } 409 410 if (read_sync(ioc, buf, 8) != 8) { 411 error_setg(errp, "Failed to read data"); 412 goto fail; 413 } 414 415 buf[8] = '\0'; 416 if (strlen(buf) == 0) { 417 error_setg(errp, "Server connection closed unexpectedly"); 418 goto fail; 419 } 420 421 TRACE("Magic is %c%c%c%c%c%c%c%c", 422 qemu_isprint(buf[0]) ? buf[0] : '.', 423 qemu_isprint(buf[1]) ? buf[1] : '.', 424 qemu_isprint(buf[2]) ? buf[2] : '.', 425 qemu_isprint(buf[3]) ? buf[3] : '.', 426 qemu_isprint(buf[4]) ? buf[4] : '.', 427 qemu_isprint(buf[5]) ? buf[5] : '.', 428 qemu_isprint(buf[6]) ? buf[6] : '.', 429 qemu_isprint(buf[7]) ? buf[7] : '.'); 430 431 if (memcmp(buf, "NBDMAGIC", 8) != 0) { 432 error_setg(errp, "Invalid magic received"); 433 goto fail; 434 } 435 436 if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 437 error_setg(errp, "Failed to read magic"); 438 goto fail; 439 } 440 magic = be64_to_cpu(magic); 441 TRACE("Magic is 0x%" PRIx64, magic); 442 443 if (magic == NBD_OPTS_MAGIC) { 444 uint32_t clientflags = 0; 445 uint32_t opt; 446 uint32_t namesize; 447 uint16_t globalflags; 448 uint16_t exportflags; 449 bool fixedNewStyle = false; 450 451 if (read_sync(ioc, &globalflags, sizeof(globalflags)) != 452 sizeof(globalflags)) { 453 error_setg(errp, "Failed to read server flags"); 454 goto fail; 455 } 456 globalflags = be16_to_cpu(globalflags); 457 *flags = globalflags << 16; 458 TRACE("Global flags are %x", globalflags); 459 if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) { 460 fixedNewStyle = true; 461 TRACE("Server supports fixed new style"); 462 clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE; 463 } 464 /* client requested flags */ 465 clientflags = cpu_to_be32(clientflags); 466 if (write_sync(ioc, &clientflags, sizeof(clientflags)) != 467 sizeof(clientflags)) { 468 error_setg(errp, "Failed to send clientflags field"); 469 goto fail; 470 } 471 if (tlscreds) { 472 if (fixedNewStyle) { 473 *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp); 474 if (!*outioc) { 475 goto fail; 476 } 477 ioc = *outioc; 478 } else { 479 error_setg(errp, "Server does not support STARTTLS"); 480 goto fail; 481 } 482 } 483 if (!name) { 484 TRACE("Using default NBD export name \"\""); 485 name = ""; 486 } 487 if (fixedNewStyle) { 488 /* Check our desired export is present in the 489 * server export list. Since NBD_OPT_EXPORT_NAME 490 * cannot return an error message, running this 491 * query gives us good error reporting if the 492 * server required TLS 493 */ 494 if (nbd_receive_query_exports(ioc, name, errp) < 0) { 495 goto fail; 496 } 497 } 498 /* write the export name */ 499 magic = cpu_to_be64(magic); 500 if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 501 error_setg(errp, "Failed to send export name magic"); 502 goto fail; 503 } 504 opt = cpu_to_be32(NBD_OPT_EXPORT_NAME); 505 if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 506 error_setg(errp, "Failed to send export name option number"); 507 goto fail; 508 } 509 namesize = cpu_to_be32(strlen(name)); 510 if (write_sync(ioc, &namesize, sizeof(namesize)) != 511 sizeof(namesize)) { 512 error_setg(errp, "Failed to send export name length"); 513 goto fail; 514 } 515 if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) { 516 error_setg(errp, "Failed to send export name"); 517 goto fail; 518 } 519 520 if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) { 521 error_setg(errp, "Failed to read export length"); 522 goto fail; 523 } 524 *size = be64_to_cpu(s); 525 TRACE("Size is %" PRIu64, *size); 526 527 if (read_sync(ioc, &exportflags, sizeof(exportflags)) != 528 sizeof(exportflags)) { 529 error_setg(errp, "Failed to read export flags"); 530 goto fail; 531 } 532 exportflags = be16_to_cpu(exportflags); 533 *flags |= exportflags; 534 TRACE("Export flags are %x", exportflags); 535 } else if (magic == NBD_CLIENT_MAGIC) { 536 if (name) { 537 error_setg(errp, "Server does not support export names"); 538 goto fail; 539 } 540 if (tlscreds) { 541 error_setg(errp, "Server does not support STARTTLS"); 542 goto fail; 543 } 544 545 if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) { 546 error_setg(errp, "Failed to read export length"); 547 goto fail; 548 } 549 *size = be64_to_cpu(s); 550 TRACE("Size is %" PRIu64, *size); 551 552 if (read_sync(ioc, flags, sizeof(*flags)) != sizeof(*flags)) { 553 error_setg(errp, "Failed to read export flags"); 554 goto fail; 555 } 556 *flags = be32_to_cpup(flags); 557 } else { 558 error_setg(errp, "Bad magic received"); 559 goto fail; 560 } 561 562 if (read_sync(ioc, &buf, 124) != 124) { 563 error_setg(errp, "Failed to read reserved block"); 564 goto fail; 565 } 566 rc = 0; 567 568 fail: 569 return rc; 570 } 571 572 #ifdef __linux__ 573 int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size) 574 { 575 TRACE("Setting NBD socket"); 576 577 if (ioctl(fd, NBD_SET_SOCK, sioc->fd) < 0) { 578 int serrno = errno; 579 LOG("Failed to set NBD socket"); 580 return -serrno; 581 } 582 583 TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE); 584 585 if (ioctl(fd, NBD_SET_BLKSIZE, (size_t)BDRV_SECTOR_SIZE) < 0) { 586 int serrno = errno; 587 LOG("Failed setting NBD block size"); 588 return -serrno; 589 } 590 591 TRACE("Setting size to %zd block(s)", (size_t)(size / BDRV_SECTOR_SIZE)); 592 593 if (ioctl(fd, NBD_SET_SIZE_BLOCKS, (size_t)(size / BDRV_SECTOR_SIZE)) < 0) { 594 int serrno = errno; 595 LOG("Failed setting size (in blocks)"); 596 return -serrno; 597 } 598 599 if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) { 600 if (errno == ENOTTY) { 601 int read_only = (flags & NBD_FLAG_READ_ONLY) != 0; 602 TRACE("Setting readonly attribute"); 603 604 if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) { 605 int serrno = errno; 606 LOG("Failed setting read-only attribute"); 607 return -serrno; 608 } 609 } else { 610 int serrno = errno; 611 LOG("Failed setting flags"); 612 return -serrno; 613 } 614 } 615 616 TRACE("Negotiation ended"); 617 618 return 0; 619 } 620 621 int nbd_client(int fd) 622 { 623 int ret; 624 int serrno; 625 626 TRACE("Doing NBD loop"); 627 628 ret = ioctl(fd, NBD_DO_IT); 629 if (ret < 0 && errno == EPIPE) { 630 /* NBD_DO_IT normally returns EPIPE when someone has disconnected 631 * the socket via NBD_DISCONNECT. We do not want to return 1 in 632 * that case. 633 */ 634 ret = 0; 635 } 636 serrno = errno; 637 638 TRACE("NBD loop returned %d: %s", ret, strerror(serrno)); 639 640 TRACE("Clearing NBD queue"); 641 ioctl(fd, NBD_CLEAR_QUE); 642 643 TRACE("Clearing NBD socket"); 644 ioctl(fd, NBD_CLEAR_SOCK); 645 646 errno = serrno; 647 return ret; 648 } 649 #else 650 int nbd_init(int fd, QIOChannelSocket *ioc, uint32_t flags, off_t size) 651 { 652 return -ENOTSUP; 653 } 654 655 int nbd_client(int fd) 656 { 657 return -ENOTSUP; 658 } 659 #endif 660 661 ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request) 662 { 663 uint8_t buf[NBD_REQUEST_SIZE]; 664 ssize_t ret; 665 666 TRACE("Sending request to server: " 667 "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}", 668 request->from, request->len, request->handle, request->type); 669 670 cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC); 671 cpu_to_be32w((uint32_t*)(buf + 4), request->type); 672 cpu_to_be64w((uint64_t*)(buf + 8), request->handle); 673 cpu_to_be64w((uint64_t*)(buf + 16), request->from); 674 cpu_to_be32w((uint32_t*)(buf + 24), request->len); 675 676 ret = write_sync(ioc, buf, sizeof(buf)); 677 if (ret < 0) { 678 return ret; 679 } 680 681 if (ret != sizeof(buf)) { 682 LOG("writing to socket failed"); 683 return -EINVAL; 684 } 685 return 0; 686 } 687 688 ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply) 689 { 690 uint8_t buf[NBD_REPLY_SIZE]; 691 uint32_t magic; 692 ssize_t ret; 693 694 ret = read_sync(ioc, buf, sizeof(buf)); 695 if (ret < 0) { 696 return ret; 697 } 698 699 if (ret != sizeof(buf)) { 700 LOG("read failed"); 701 return -EINVAL; 702 } 703 704 /* Reply 705 [ 0 .. 3] magic (NBD_REPLY_MAGIC) 706 [ 4 .. 7] error (0 == no error) 707 [ 7 .. 15] handle 708 */ 709 710 magic = be32_to_cpup((uint32_t*)buf); 711 reply->error = be32_to_cpup((uint32_t*)(buf + 4)); 712 reply->handle = be64_to_cpup((uint64_t*)(buf + 8)); 713 714 reply->error = nbd_errno_to_system_errno(reply->error); 715 716 TRACE("Got reply: " 717 "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }", 718 magic, reply->error, reply->handle); 719 720 if (magic != NBD_REPLY_MAGIC) { 721 LOG("invalid magic (got 0x%x)", magic); 722 return -EINVAL; 723 } 724 return 0; 725 } 726 727