xref: /qemu/nbd/client.c (revision 8d86ada2)
1 /*
2  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
3  *
4  *  Network Block Device Client Side
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; under version 2 of the License.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "nbd-internal.h"
21 
22 static int nbd_errno_to_system_errno(int err)
23 {
24     switch (err) {
25     case NBD_SUCCESS:
26         return 0;
27     case NBD_EPERM:
28         return EPERM;
29     case NBD_EIO:
30         return EIO;
31     case NBD_ENOMEM:
32         return ENOMEM;
33     case NBD_ENOSPC:
34         return ENOSPC;
35     case NBD_EINVAL:
36     default:
37         return EINVAL;
38     }
39 }
40 
41 /* Definitions for opaque data types */
42 
43 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
44 
45 /* That's all folks */
46 
47 /* Basic flow for negotiation
48 
49    Server         Client
50    Negotiate
51 
52    or
53 
54    Server         Client
55    Negotiate #1
56                   Option
57    Negotiate #2
58 
59    ----
60 
61    followed by
62 
63    Server         Client
64                   Request
65    Response
66                   Request
67    Response
68                   ...
69    ...
70                   Request (type == 2)
71 
72 */
73 
74 
75 static int nbd_handle_reply_err(uint32_t opt, uint32_t type, Error **errp)
76 {
77     if (!(type & (1 << 31))) {
78         return 0;
79     }
80 
81     switch (type) {
82     case NBD_REP_ERR_UNSUP:
83         error_setg(errp, "Unsupported option type %x", opt);
84         break;
85 
86     case NBD_REP_ERR_POLICY:
87         error_setg(errp, "Denied by server for option %x", opt);
88         break;
89 
90     case NBD_REP_ERR_INVALID:
91         error_setg(errp, "Invalid data length for option %x", opt);
92         break;
93 
94     case NBD_REP_ERR_TLS_REQD:
95         error_setg(errp, "TLS negotiation required before option %x", opt);
96         break;
97 
98     default:
99         error_setg(errp, "Unknown error code when asking for option %x", opt);
100         break;
101     }
102 
103     return -1;
104 }
105 
106 static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
107 {
108     uint64_t magic;
109     uint32_t opt;
110     uint32_t type;
111     uint32_t len;
112     uint32_t namelen;
113 
114     *name = NULL;
115     if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
116         error_setg(errp, "failed to read list option magic");
117         return -1;
118     }
119     magic = be64_to_cpu(magic);
120     if (magic != NBD_REP_MAGIC) {
121         error_setg(errp, "Unexpected option list magic");
122         return -1;
123     }
124     if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
125         error_setg(errp, "failed to read list option");
126         return -1;
127     }
128     opt = be32_to_cpu(opt);
129     if (opt != NBD_OPT_LIST) {
130         error_setg(errp, "Unexpected option type %x expected %x",
131                    opt, NBD_OPT_LIST);
132         return -1;
133     }
134 
135     if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
136         error_setg(errp, "failed to read list option type");
137         return -1;
138     }
139     type = be32_to_cpu(type);
140     if (type == NBD_REP_ERR_UNSUP) {
141         return 0;
142     }
143     if (nbd_handle_reply_err(opt, type, errp) < 0) {
144         return -1;
145     }
146 
147     if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
148         error_setg(errp, "failed to read option length");
149         return -1;
150     }
151     len = be32_to_cpu(len);
152 
153     if (type == NBD_REP_ACK) {
154         if (len != 0) {
155             error_setg(errp, "length too long for option end");
156             return -1;
157         }
158     } else if (type == NBD_REP_SERVER) {
159         if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
160             error_setg(errp, "failed to read option name length");
161             return -1;
162         }
163         namelen = be32_to_cpu(namelen);
164         if (len != (namelen + sizeof(namelen))) {
165             error_setg(errp, "incorrect option mame length");
166             return -1;
167         }
168         if (namelen > 255) {
169             error_setg(errp, "export name length too long %d", namelen);
170             return -1;
171         }
172 
173         *name = g_new0(char, namelen + 1);
174         if (read_sync(ioc, *name, namelen) != namelen) {
175             error_setg(errp, "failed to read export name");
176             g_free(*name);
177             *name = NULL;
178             return -1;
179         }
180         (*name)[namelen] = '\0';
181     } else {
182         error_setg(errp, "Unexpected reply type %x expected %x",
183                    type, NBD_REP_SERVER);
184         return -1;
185     }
186     return 1;
187 }
188 
189 
190 static int nbd_receive_query_exports(QIOChannel *ioc,
191                                      const char *wantname,
192                                      Error **errp)
193 {
194     uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
195     uint32_t opt = cpu_to_be32(NBD_OPT_LIST);
196     uint32_t length = 0;
197     bool foundExport = false;
198 
199     TRACE("Querying export list");
200     if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
201         error_setg(errp, "Failed to send list option magic");
202         return -1;
203     }
204 
205     if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
206         error_setg(errp, "Failed to send list option number");
207         return -1;
208     }
209 
210     if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
211         error_setg(errp, "Failed to send list option length");
212         return -1;
213     }
214 
215     TRACE("Reading available export names");
216     while (1) {
217         char *name = NULL;
218         int ret = nbd_receive_list(ioc, &name, errp);
219 
220         if (ret < 0) {
221             g_free(name);
222             name = NULL;
223             return -1;
224         }
225         if (ret == 0) {
226             /* Server doesn't support export listing, so
227              * we will just assume an export with our
228              * wanted name exists */
229             foundExport = true;
230             break;
231         }
232         if (name == NULL) {
233             TRACE("End of export name list");
234             break;
235         }
236         if (g_str_equal(name, wantname)) {
237             foundExport = true;
238             TRACE("Found desired export name '%s'", name);
239         } else {
240             TRACE("Ignored export name '%s'", name);
241         }
242         g_free(name);
243     }
244 
245     if (!foundExport) {
246         error_setg(errp, "No export with name '%s' available", wantname);
247         return -1;
248     }
249 
250     return 0;
251 }
252 
253 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
254                                         QCryptoTLSCreds *tlscreds,
255                                         const char *hostname, Error **errp)
256 {
257     uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
258     uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS);
259     uint32_t length = 0;
260     uint32_t type;
261     QIOChannelTLS *tioc;
262     struct NBDTLSHandshakeData data = { 0 };
263 
264     TRACE("Requesting TLS from server");
265     if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
266         error_setg(errp, "Failed to send option magic");
267         return NULL;
268     }
269 
270     if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
271         error_setg(errp, "Failed to send option number");
272         return NULL;
273     }
274 
275     if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
276         error_setg(errp, "Failed to send option length");
277         return NULL;
278     }
279 
280     TRACE("Getting TLS reply from server1");
281     if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
282         error_setg(errp, "failed to read option magic");
283         return NULL;
284     }
285     magic = be64_to_cpu(magic);
286     if (magic != NBD_REP_MAGIC) {
287         error_setg(errp, "Unexpected option magic");
288         return NULL;
289     }
290     TRACE("Getting TLS reply from server2");
291     if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
292         error_setg(errp, "failed to read option");
293         return NULL;
294     }
295     opt = be32_to_cpu(opt);
296     if (opt != NBD_OPT_STARTTLS) {
297         error_setg(errp, "Unexpected option type %x expected %x",
298                    opt, NBD_OPT_STARTTLS);
299         return NULL;
300     }
301 
302     TRACE("Getting TLS reply from server");
303     if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
304         error_setg(errp, "failed to read option type");
305         return NULL;
306     }
307     type = be32_to_cpu(type);
308     if (type != NBD_REP_ACK) {
309         error_setg(errp, "Server rejected request to start TLS %x",
310                    type);
311         return NULL;
312     }
313 
314     TRACE("Getting TLS reply from server");
315     if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
316         error_setg(errp, "failed to read option length");
317         return NULL;
318     }
319     length = be32_to_cpu(length);
320     if (length != 0) {
321         error_setg(errp, "Start TLS reponse was not zero %x",
322                    length);
323         return NULL;
324     }
325 
326     TRACE("TLS request approved, setting up TLS");
327     tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
328     if (!tioc) {
329         return NULL;
330     }
331     data.loop = g_main_loop_new(g_main_context_default(), FALSE);
332     TRACE("Starting TLS hanshake");
333     qio_channel_tls_handshake(tioc,
334                               nbd_tls_handshake,
335                               &data,
336                               NULL);
337 
338     if (!data.complete) {
339         g_main_loop_run(data.loop);
340     }
341     g_main_loop_unref(data.loop);
342     if (data.error) {
343         error_propagate(errp, data.error);
344         object_unref(OBJECT(tioc));
345         return NULL;
346     }
347 
348     return QIO_CHANNEL(tioc);
349 }
350 
351 
352 int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
353                           QCryptoTLSCreds *tlscreds, const char *hostname,
354                           QIOChannel **outioc,
355                           off_t *size, Error **errp)
356 {
357     char buf[256];
358     uint64_t magic, s;
359     int rc;
360 
361     TRACE("Receiving negotiation tlscreds=%p hostname=%s.",
362           tlscreds, hostname ? hostname : "<null>");
363 
364     rc = -EINVAL;
365 
366     if (outioc) {
367         *outioc = NULL;
368     }
369     if (tlscreds && !outioc) {
370         error_setg(errp, "Output I/O channel required for TLS");
371         goto fail;
372     }
373 
374     if (read_sync(ioc, buf, 8) != 8) {
375         error_setg(errp, "Failed to read data");
376         goto fail;
377     }
378 
379     buf[8] = '\0';
380     if (strlen(buf) == 0) {
381         error_setg(errp, "Server connection closed unexpectedly");
382         goto fail;
383     }
384 
385     TRACE("Magic is %c%c%c%c%c%c%c%c",
386           qemu_isprint(buf[0]) ? buf[0] : '.',
387           qemu_isprint(buf[1]) ? buf[1] : '.',
388           qemu_isprint(buf[2]) ? buf[2] : '.',
389           qemu_isprint(buf[3]) ? buf[3] : '.',
390           qemu_isprint(buf[4]) ? buf[4] : '.',
391           qemu_isprint(buf[5]) ? buf[5] : '.',
392           qemu_isprint(buf[6]) ? buf[6] : '.',
393           qemu_isprint(buf[7]) ? buf[7] : '.');
394 
395     if (memcmp(buf, "NBDMAGIC", 8) != 0) {
396         error_setg(errp, "Invalid magic received");
397         goto fail;
398     }
399 
400     if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
401         error_setg(errp, "Failed to read magic");
402         goto fail;
403     }
404     magic = be64_to_cpu(magic);
405     TRACE("Magic is 0x%" PRIx64, magic);
406 
407     if (magic == NBD_OPTS_MAGIC) {
408         uint32_t clientflags = 0;
409         uint32_t opt;
410         uint32_t namesize;
411         uint16_t globalflags;
412         uint16_t exportflags;
413         bool fixedNewStyle = false;
414 
415         if (read_sync(ioc, &globalflags, sizeof(globalflags)) !=
416             sizeof(globalflags)) {
417             error_setg(errp, "Failed to read server flags");
418             goto fail;
419         }
420         globalflags = be16_to_cpu(globalflags);
421         *flags = globalflags << 16;
422         TRACE("Global flags are %x", globalflags);
423         if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
424             fixedNewStyle = true;
425             TRACE("Server supports fixed new style");
426             clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
427         }
428         /* client requested flags */
429         clientflags = cpu_to_be32(clientflags);
430         if (write_sync(ioc, &clientflags, sizeof(clientflags)) !=
431             sizeof(clientflags)) {
432             error_setg(errp, "Failed to send clientflags field");
433             goto fail;
434         }
435         if (tlscreds) {
436             if (fixedNewStyle) {
437                 *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
438                 if (!*outioc) {
439                     goto fail;
440                 }
441                 ioc = *outioc;
442             } else {
443                 error_setg(errp, "Server does not support STARTTLS");
444                 goto fail;
445             }
446         }
447         if (!name) {
448             TRACE("Using default NBD export name \"\"");
449             name = "";
450         }
451         if (fixedNewStyle) {
452             /* Check our desired export is present in the
453              * server export list. Since NBD_OPT_EXPORT_NAME
454              * cannot return an error message, running this
455              * query gives us good error reporting if the
456              * server required TLS
457              */
458             if (nbd_receive_query_exports(ioc, name, errp) < 0) {
459                 goto fail;
460             }
461         }
462         /* write the export name */
463         magic = cpu_to_be64(magic);
464         if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
465             error_setg(errp, "Failed to send export name magic");
466             goto fail;
467         }
468         opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
469         if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
470             error_setg(errp, "Failed to send export name option number");
471             goto fail;
472         }
473         namesize = cpu_to_be32(strlen(name));
474         if (write_sync(ioc, &namesize, sizeof(namesize)) !=
475             sizeof(namesize)) {
476             error_setg(errp, "Failed to send export name length");
477             goto fail;
478         }
479         if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) {
480             error_setg(errp, "Failed to send export name");
481             goto fail;
482         }
483 
484         if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) {
485             error_setg(errp, "Failed to read export length");
486             goto fail;
487         }
488         *size = be64_to_cpu(s);
489         TRACE("Size is %" PRIu64, *size);
490 
491         if (read_sync(ioc, &exportflags, sizeof(exportflags)) !=
492             sizeof(exportflags)) {
493             error_setg(errp, "Failed to read export flags");
494             goto fail;
495         }
496         exportflags = be16_to_cpu(exportflags);
497         *flags |= exportflags;
498         TRACE("Export flags are %x", exportflags);
499     } else if (magic == NBD_CLIENT_MAGIC) {
500         if (name) {
501             error_setg(errp, "Server does not support export names");
502             goto fail;
503         }
504         if (tlscreds) {
505             error_setg(errp, "Server does not support STARTTLS");
506             goto fail;
507         }
508 
509         if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) {
510             error_setg(errp, "Failed to read export length");
511             goto fail;
512         }
513         *size = be64_to_cpu(s);
514         TRACE("Size is %" PRIu64, *size);
515 
516         if (read_sync(ioc, flags, sizeof(*flags)) != sizeof(*flags)) {
517             error_setg(errp, "Failed to read export flags");
518             goto fail;
519         }
520         *flags = be32_to_cpup(flags);
521     } else {
522         error_setg(errp, "Bad magic received");
523         goto fail;
524     }
525 
526     if (read_sync(ioc, &buf, 124) != 124) {
527         error_setg(errp, "Failed to read reserved block");
528         goto fail;
529     }
530     rc = 0;
531 
532 fail:
533     return rc;
534 }
535 
536 #ifdef __linux__
537 int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size)
538 {
539     TRACE("Setting NBD socket");
540 
541     if (ioctl(fd, NBD_SET_SOCK, sioc->fd) < 0) {
542         int serrno = errno;
543         LOG("Failed to set NBD socket");
544         return -serrno;
545     }
546 
547     TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE);
548 
549     if (ioctl(fd, NBD_SET_BLKSIZE, (size_t)BDRV_SECTOR_SIZE) < 0) {
550         int serrno = errno;
551         LOG("Failed setting NBD block size");
552         return -serrno;
553     }
554 
555     TRACE("Setting size to %zd block(s)", (size_t)(size / BDRV_SECTOR_SIZE));
556 
557     if (ioctl(fd, NBD_SET_SIZE_BLOCKS, (size_t)(size / BDRV_SECTOR_SIZE)) < 0) {
558         int serrno = errno;
559         LOG("Failed setting size (in blocks)");
560         return -serrno;
561     }
562 
563     if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) {
564         if (errno == ENOTTY) {
565             int read_only = (flags & NBD_FLAG_READ_ONLY) != 0;
566             TRACE("Setting readonly attribute");
567 
568             if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
569                 int serrno = errno;
570                 LOG("Failed setting read-only attribute");
571                 return -serrno;
572             }
573         } else {
574             int serrno = errno;
575             LOG("Failed setting flags");
576             return -serrno;
577         }
578     }
579 
580     TRACE("Negotiation ended");
581 
582     return 0;
583 }
584 
585 int nbd_client(int fd)
586 {
587     int ret;
588     int serrno;
589 
590     TRACE("Doing NBD loop");
591 
592     ret = ioctl(fd, NBD_DO_IT);
593     if (ret < 0 && errno == EPIPE) {
594         /* NBD_DO_IT normally returns EPIPE when someone has disconnected
595          * the socket via NBD_DISCONNECT.  We do not want to return 1 in
596          * that case.
597          */
598         ret = 0;
599     }
600     serrno = errno;
601 
602     TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
603 
604     TRACE("Clearing NBD queue");
605     ioctl(fd, NBD_CLEAR_QUE);
606 
607     TRACE("Clearing NBD socket");
608     ioctl(fd, NBD_CLEAR_SOCK);
609 
610     errno = serrno;
611     return ret;
612 }
613 #else
614 int nbd_init(int fd, QIOChannelSocket *ioc, uint32_t flags, off_t size)
615 {
616     return -ENOTSUP;
617 }
618 
619 int nbd_client(int fd)
620 {
621     return -ENOTSUP;
622 }
623 #endif
624 
625 ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
626 {
627     uint8_t buf[NBD_REQUEST_SIZE];
628     ssize_t ret;
629 
630     cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
631     cpu_to_be32w((uint32_t*)(buf + 4), request->type);
632     cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
633     cpu_to_be64w((uint64_t*)(buf + 16), request->from);
634     cpu_to_be32w((uint32_t*)(buf + 24), request->len);
635 
636     TRACE("Sending request to client: "
637           "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
638           request->from, request->len, request->handle, request->type);
639 
640     ret = write_sync(ioc, buf, sizeof(buf));
641     if (ret < 0) {
642         return ret;
643     }
644 
645     if (ret != sizeof(buf)) {
646         LOG("writing to socket failed");
647         return -EINVAL;
648     }
649     return 0;
650 }
651 
652 ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
653 {
654     uint8_t buf[NBD_REPLY_SIZE];
655     uint32_t magic;
656     ssize_t ret;
657 
658     ret = read_sync(ioc, buf, sizeof(buf));
659     if (ret < 0) {
660         return ret;
661     }
662 
663     if (ret != sizeof(buf)) {
664         LOG("read failed");
665         return -EINVAL;
666     }
667 
668     /* Reply
669        [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
670        [ 4 ..  7]    error   (0 == no error)
671        [ 7 .. 15]    handle
672      */
673 
674     magic = be32_to_cpup((uint32_t*)buf);
675     reply->error  = be32_to_cpup((uint32_t*)(buf + 4));
676     reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
677 
678     reply->error = nbd_errno_to_system_errno(reply->error);
679 
680     TRACE("Got reply: "
681           "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
682           magic, reply->error, reply->handle);
683 
684     if (magic != NBD_REPLY_MAGIC) {
685         LOG("invalid magic (got 0x%x)", magic);
686         return -EINVAL;
687     }
688     return 0;
689 }
690 
691