xref: /qemu/nbd/client.c (revision 3d100d0f)
1 /*
2  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
3  *
4  *  Network Block Device Client Side
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; under version 2 of the License.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #include "nbd-internal.h"
22 
23 static int nbd_errno_to_system_errno(int err)
24 {
25     switch (err) {
26     case NBD_SUCCESS:
27         return 0;
28     case NBD_EPERM:
29         return EPERM;
30     case NBD_EIO:
31         return EIO;
32     case NBD_ENOMEM:
33         return ENOMEM;
34     case NBD_ENOSPC:
35         return ENOSPC;
36     case NBD_EINVAL:
37     default:
38         return EINVAL;
39     }
40 }
41 
42 /* Definitions for opaque data types */
43 
44 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
45 
46 /* That's all folks */
47 
48 /* Basic flow for negotiation
49 
50    Server         Client
51    Negotiate
52 
53    or
54 
55    Server         Client
56    Negotiate #1
57                   Option
58    Negotiate #2
59 
60    ----
61 
62    followed by
63 
64    Server         Client
65                   Request
66    Response
67                   Request
68    Response
69                   ...
70    ...
71                   Request (type == 2)
72 
73 */
74 
75 
76 static int nbd_handle_reply_err(uint32_t opt, uint32_t type, Error **errp)
77 {
78     if (!(type & (1 << 31))) {
79         return 0;
80     }
81 
82     switch (type) {
83     case NBD_REP_ERR_UNSUP:
84         error_setg(errp, "Unsupported option type %x", opt);
85         break;
86 
87     case NBD_REP_ERR_POLICY:
88         error_setg(errp, "Denied by server for option %x", opt);
89         break;
90 
91     case NBD_REP_ERR_INVALID:
92         error_setg(errp, "Invalid data length for option %x", opt);
93         break;
94 
95     case NBD_REP_ERR_TLS_REQD:
96         error_setg(errp, "TLS negotiation required before option %x", opt);
97         break;
98 
99     default:
100         error_setg(errp, "Unknown error code when asking for option %x", opt);
101         break;
102     }
103 
104     return -1;
105 }
106 
107 static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
108 {
109     uint64_t magic;
110     uint32_t opt;
111     uint32_t type;
112     uint32_t len;
113     uint32_t namelen;
114 
115     *name = NULL;
116     if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
117         error_setg(errp, "failed to read list option magic");
118         return -1;
119     }
120     magic = be64_to_cpu(magic);
121     if (magic != NBD_REP_MAGIC) {
122         error_setg(errp, "Unexpected option list magic");
123         return -1;
124     }
125     if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
126         error_setg(errp, "failed to read list option");
127         return -1;
128     }
129     opt = be32_to_cpu(opt);
130     if (opt != NBD_OPT_LIST) {
131         error_setg(errp, "Unexpected option type %x expected %x",
132                    opt, NBD_OPT_LIST);
133         return -1;
134     }
135 
136     if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
137         error_setg(errp, "failed to read list option type");
138         return -1;
139     }
140     type = be32_to_cpu(type);
141     if (type == NBD_REP_ERR_UNSUP) {
142         return 0;
143     }
144     if (nbd_handle_reply_err(opt, type, errp) < 0) {
145         return -1;
146     }
147 
148     if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
149         error_setg(errp, "failed to read option length");
150         return -1;
151     }
152     len = be32_to_cpu(len);
153 
154     if (type == NBD_REP_ACK) {
155         if (len != 0) {
156             error_setg(errp, "length too long for option end");
157             return -1;
158         }
159     } else if (type == NBD_REP_SERVER) {
160         if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
161             error_setg(errp, "failed to read option name length");
162             return -1;
163         }
164         namelen = be32_to_cpu(namelen);
165         if (len != (namelen + sizeof(namelen))) {
166             error_setg(errp, "incorrect option mame length");
167             return -1;
168         }
169         if (namelen > 255) {
170             error_setg(errp, "export name length too long %d", namelen);
171             return -1;
172         }
173 
174         *name = g_new0(char, namelen + 1);
175         if (read_sync(ioc, *name, namelen) != namelen) {
176             error_setg(errp, "failed to read export name");
177             g_free(*name);
178             *name = NULL;
179             return -1;
180         }
181         (*name)[namelen] = '\0';
182     } else {
183         error_setg(errp, "Unexpected reply type %x expected %x",
184                    type, NBD_REP_SERVER);
185         return -1;
186     }
187     return 1;
188 }
189 
190 
191 static int nbd_receive_query_exports(QIOChannel *ioc,
192                                      const char *wantname,
193                                      Error **errp)
194 {
195     uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
196     uint32_t opt = cpu_to_be32(NBD_OPT_LIST);
197     uint32_t length = 0;
198     bool foundExport = false;
199 
200     TRACE("Querying export list");
201     if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
202         error_setg(errp, "Failed to send list option magic");
203         return -1;
204     }
205 
206     if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
207         error_setg(errp, "Failed to send list option number");
208         return -1;
209     }
210 
211     if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
212         error_setg(errp, "Failed to send list option length");
213         return -1;
214     }
215 
216     TRACE("Reading available export names");
217     while (1) {
218         char *name = NULL;
219         int ret = nbd_receive_list(ioc, &name, errp);
220 
221         if (ret < 0) {
222             g_free(name);
223             name = NULL;
224             return -1;
225         }
226         if (ret == 0) {
227             /* Server doesn't support export listing, so
228              * we will just assume an export with our
229              * wanted name exists */
230             foundExport = true;
231             break;
232         }
233         if (name == NULL) {
234             TRACE("End of export name list");
235             break;
236         }
237         if (g_str_equal(name, wantname)) {
238             foundExport = true;
239             TRACE("Found desired export name '%s'", name);
240         } else {
241             TRACE("Ignored export name '%s'", name);
242         }
243         g_free(name);
244     }
245 
246     if (!foundExport) {
247         error_setg(errp, "No export with name '%s' available", wantname);
248         return -1;
249     }
250 
251     return 0;
252 }
253 
254 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
255                                         QCryptoTLSCreds *tlscreds,
256                                         const char *hostname, Error **errp)
257 {
258     uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
259     uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS);
260     uint32_t length = 0;
261     uint32_t type;
262     QIOChannelTLS *tioc;
263     struct NBDTLSHandshakeData data = { 0 };
264 
265     TRACE("Requesting TLS from server");
266     if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
267         error_setg(errp, "Failed to send option magic");
268         return NULL;
269     }
270 
271     if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
272         error_setg(errp, "Failed to send option number");
273         return NULL;
274     }
275 
276     if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
277         error_setg(errp, "Failed to send option length");
278         return NULL;
279     }
280 
281     TRACE("Getting TLS reply from server1");
282     if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
283         error_setg(errp, "failed to read option magic");
284         return NULL;
285     }
286     magic = be64_to_cpu(magic);
287     if (magic != NBD_REP_MAGIC) {
288         error_setg(errp, "Unexpected option magic");
289         return NULL;
290     }
291     TRACE("Getting TLS reply from server2");
292     if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
293         error_setg(errp, "failed to read option");
294         return NULL;
295     }
296     opt = be32_to_cpu(opt);
297     if (opt != NBD_OPT_STARTTLS) {
298         error_setg(errp, "Unexpected option type %x expected %x",
299                    opt, NBD_OPT_STARTTLS);
300         return NULL;
301     }
302 
303     TRACE("Getting TLS reply from server");
304     if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
305         error_setg(errp, "failed to read option type");
306         return NULL;
307     }
308     type = be32_to_cpu(type);
309     if (type != NBD_REP_ACK) {
310         error_setg(errp, "Server rejected request to start TLS %x",
311                    type);
312         return NULL;
313     }
314 
315     TRACE("Getting TLS reply from server");
316     if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
317         error_setg(errp, "failed to read option length");
318         return NULL;
319     }
320     length = be32_to_cpu(length);
321     if (length != 0) {
322         error_setg(errp, "Start TLS reponse was not zero %x",
323                    length);
324         return NULL;
325     }
326 
327     TRACE("TLS request approved, setting up TLS");
328     tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
329     if (!tioc) {
330         return NULL;
331     }
332     data.loop = g_main_loop_new(g_main_context_default(), FALSE);
333     TRACE("Starting TLS hanshake");
334     qio_channel_tls_handshake(tioc,
335                               nbd_tls_handshake,
336                               &data,
337                               NULL);
338 
339     if (!data.complete) {
340         g_main_loop_run(data.loop);
341     }
342     g_main_loop_unref(data.loop);
343     if (data.error) {
344         error_propagate(errp, data.error);
345         object_unref(OBJECT(tioc));
346         return NULL;
347     }
348 
349     return QIO_CHANNEL(tioc);
350 }
351 
352 
353 int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
354                           QCryptoTLSCreds *tlscreds, const char *hostname,
355                           QIOChannel **outioc,
356                           off_t *size, Error **errp)
357 {
358     char buf[256];
359     uint64_t magic, s;
360     int rc;
361 
362     TRACE("Receiving negotiation tlscreds=%p hostname=%s.",
363           tlscreds, hostname ? hostname : "<null>");
364 
365     rc = -EINVAL;
366 
367     if (outioc) {
368         *outioc = NULL;
369     }
370     if (tlscreds && !outioc) {
371         error_setg(errp, "Output I/O channel required for TLS");
372         goto fail;
373     }
374 
375     if (read_sync(ioc, buf, 8) != 8) {
376         error_setg(errp, "Failed to read data");
377         goto fail;
378     }
379 
380     buf[8] = '\0';
381     if (strlen(buf) == 0) {
382         error_setg(errp, "Server connection closed unexpectedly");
383         goto fail;
384     }
385 
386     TRACE("Magic is %c%c%c%c%c%c%c%c",
387           qemu_isprint(buf[0]) ? buf[0] : '.',
388           qemu_isprint(buf[1]) ? buf[1] : '.',
389           qemu_isprint(buf[2]) ? buf[2] : '.',
390           qemu_isprint(buf[3]) ? buf[3] : '.',
391           qemu_isprint(buf[4]) ? buf[4] : '.',
392           qemu_isprint(buf[5]) ? buf[5] : '.',
393           qemu_isprint(buf[6]) ? buf[6] : '.',
394           qemu_isprint(buf[7]) ? buf[7] : '.');
395 
396     if (memcmp(buf, "NBDMAGIC", 8) != 0) {
397         error_setg(errp, "Invalid magic received");
398         goto fail;
399     }
400 
401     if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
402         error_setg(errp, "Failed to read magic");
403         goto fail;
404     }
405     magic = be64_to_cpu(magic);
406     TRACE("Magic is 0x%" PRIx64, magic);
407 
408     if (magic == NBD_OPTS_MAGIC) {
409         uint32_t clientflags = 0;
410         uint32_t opt;
411         uint32_t namesize;
412         uint16_t globalflags;
413         uint16_t exportflags;
414         bool fixedNewStyle = false;
415 
416         if (read_sync(ioc, &globalflags, sizeof(globalflags)) !=
417             sizeof(globalflags)) {
418             error_setg(errp, "Failed to read server flags");
419             goto fail;
420         }
421         globalflags = be16_to_cpu(globalflags);
422         *flags = globalflags << 16;
423         TRACE("Global flags are %x", globalflags);
424         if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
425             fixedNewStyle = true;
426             TRACE("Server supports fixed new style");
427             clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
428         }
429         /* client requested flags */
430         clientflags = cpu_to_be32(clientflags);
431         if (write_sync(ioc, &clientflags, sizeof(clientflags)) !=
432             sizeof(clientflags)) {
433             error_setg(errp, "Failed to send clientflags field");
434             goto fail;
435         }
436         if (tlscreds) {
437             if (fixedNewStyle) {
438                 *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
439                 if (!*outioc) {
440                     goto fail;
441                 }
442                 ioc = *outioc;
443             } else {
444                 error_setg(errp, "Server does not support STARTTLS");
445                 goto fail;
446             }
447         }
448         if (!name) {
449             TRACE("Using default NBD export name \"\"");
450             name = "";
451         }
452         if (fixedNewStyle) {
453             /* Check our desired export is present in the
454              * server export list. Since NBD_OPT_EXPORT_NAME
455              * cannot return an error message, running this
456              * query gives us good error reporting if the
457              * server required TLS
458              */
459             if (nbd_receive_query_exports(ioc, name, errp) < 0) {
460                 goto fail;
461             }
462         }
463         /* write the export name */
464         magic = cpu_to_be64(magic);
465         if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
466             error_setg(errp, "Failed to send export name magic");
467             goto fail;
468         }
469         opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
470         if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
471             error_setg(errp, "Failed to send export name option number");
472             goto fail;
473         }
474         namesize = cpu_to_be32(strlen(name));
475         if (write_sync(ioc, &namesize, sizeof(namesize)) !=
476             sizeof(namesize)) {
477             error_setg(errp, "Failed to send export name length");
478             goto fail;
479         }
480         if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) {
481             error_setg(errp, "Failed to send export name");
482             goto fail;
483         }
484 
485         if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) {
486             error_setg(errp, "Failed to read export length");
487             goto fail;
488         }
489         *size = be64_to_cpu(s);
490         TRACE("Size is %" PRIu64, *size);
491 
492         if (read_sync(ioc, &exportflags, sizeof(exportflags)) !=
493             sizeof(exportflags)) {
494             error_setg(errp, "Failed to read export flags");
495             goto fail;
496         }
497         exportflags = be16_to_cpu(exportflags);
498         *flags |= exportflags;
499         TRACE("Export flags are %x", exportflags);
500     } else if (magic == NBD_CLIENT_MAGIC) {
501         if (name) {
502             error_setg(errp, "Server does not support export names");
503             goto fail;
504         }
505         if (tlscreds) {
506             error_setg(errp, "Server does not support STARTTLS");
507             goto fail;
508         }
509 
510         if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) {
511             error_setg(errp, "Failed to read export length");
512             goto fail;
513         }
514         *size = be64_to_cpu(s);
515         TRACE("Size is %" PRIu64, *size);
516 
517         if (read_sync(ioc, flags, sizeof(*flags)) != sizeof(*flags)) {
518             error_setg(errp, "Failed to read export flags");
519             goto fail;
520         }
521         *flags = be32_to_cpup(flags);
522     } else {
523         error_setg(errp, "Bad magic received");
524         goto fail;
525     }
526 
527     if (read_sync(ioc, &buf, 124) != 124) {
528         error_setg(errp, "Failed to read reserved block");
529         goto fail;
530     }
531     rc = 0;
532 
533 fail:
534     return rc;
535 }
536 
537 #ifdef __linux__
538 int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size)
539 {
540     TRACE("Setting NBD socket");
541 
542     if (ioctl(fd, NBD_SET_SOCK, sioc->fd) < 0) {
543         int serrno = errno;
544         LOG("Failed to set NBD socket");
545         return -serrno;
546     }
547 
548     TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE);
549 
550     if (ioctl(fd, NBD_SET_BLKSIZE, (size_t)BDRV_SECTOR_SIZE) < 0) {
551         int serrno = errno;
552         LOG("Failed setting NBD block size");
553         return -serrno;
554     }
555 
556     TRACE("Setting size to %zd block(s)", (size_t)(size / BDRV_SECTOR_SIZE));
557 
558     if (ioctl(fd, NBD_SET_SIZE_BLOCKS, (size_t)(size / BDRV_SECTOR_SIZE)) < 0) {
559         int serrno = errno;
560         LOG("Failed setting size (in blocks)");
561         return -serrno;
562     }
563 
564     if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) {
565         if (errno == ENOTTY) {
566             int read_only = (flags & NBD_FLAG_READ_ONLY) != 0;
567             TRACE("Setting readonly attribute");
568 
569             if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
570                 int serrno = errno;
571                 LOG("Failed setting read-only attribute");
572                 return -serrno;
573             }
574         } else {
575             int serrno = errno;
576             LOG("Failed setting flags");
577             return -serrno;
578         }
579     }
580 
581     TRACE("Negotiation ended");
582 
583     return 0;
584 }
585 
586 int nbd_client(int fd)
587 {
588     int ret;
589     int serrno;
590 
591     TRACE("Doing NBD loop");
592 
593     ret = ioctl(fd, NBD_DO_IT);
594     if (ret < 0 && errno == EPIPE) {
595         /* NBD_DO_IT normally returns EPIPE when someone has disconnected
596          * the socket via NBD_DISCONNECT.  We do not want to return 1 in
597          * that case.
598          */
599         ret = 0;
600     }
601     serrno = errno;
602 
603     TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
604 
605     TRACE("Clearing NBD queue");
606     ioctl(fd, NBD_CLEAR_QUE);
607 
608     TRACE("Clearing NBD socket");
609     ioctl(fd, NBD_CLEAR_SOCK);
610 
611     errno = serrno;
612     return ret;
613 }
614 #else
615 int nbd_init(int fd, QIOChannelSocket *ioc, uint32_t flags, off_t size)
616 {
617     return -ENOTSUP;
618 }
619 
620 int nbd_client(int fd)
621 {
622     return -ENOTSUP;
623 }
624 #endif
625 
626 ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
627 {
628     uint8_t buf[NBD_REQUEST_SIZE];
629     ssize_t ret;
630 
631     cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
632     cpu_to_be32w((uint32_t*)(buf + 4), request->type);
633     cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
634     cpu_to_be64w((uint64_t*)(buf + 16), request->from);
635     cpu_to_be32w((uint32_t*)(buf + 24), request->len);
636 
637     TRACE("Sending request to server: "
638           "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
639           request->from, request->len, request->handle, request->type);
640 
641     ret = write_sync(ioc, buf, sizeof(buf));
642     if (ret < 0) {
643         return ret;
644     }
645 
646     if (ret != sizeof(buf)) {
647         LOG("writing to socket failed");
648         return -EINVAL;
649     }
650     return 0;
651 }
652 
653 ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
654 {
655     uint8_t buf[NBD_REPLY_SIZE];
656     uint32_t magic;
657     ssize_t ret;
658 
659     ret = read_sync(ioc, buf, sizeof(buf));
660     if (ret < 0) {
661         return ret;
662     }
663 
664     if (ret != sizeof(buf)) {
665         LOG("read failed");
666         return -EINVAL;
667     }
668 
669     /* Reply
670        [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
671        [ 4 ..  7]    error   (0 == no error)
672        [ 7 .. 15]    handle
673      */
674 
675     magic = be32_to_cpup((uint32_t*)buf);
676     reply->error  = be32_to_cpup((uint32_t*)(buf + 4));
677     reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
678 
679     reply->error = nbd_errno_to_system_errno(reply->error);
680 
681     TRACE("Got reply: "
682           "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
683           magic, reply->error, reply->handle);
684 
685     if (magic != NBD_REPLY_MAGIC) {
686         LOG("invalid magic (got 0x%x)", magic);
687         return -EINVAL;
688     }
689     return 0;
690 }
691 
692