xref: /qemu/nbd/client.c (revision afc47486)
1 /*
2  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
3  *
4  *  Network Block Device Client Side
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; under version 2 of the License.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #include "nbd-internal.h"
22 
23 static int nbd_errno_to_system_errno(int err)
24 {
25     switch (err) {
26     case NBD_SUCCESS:
27         return 0;
28     case NBD_EPERM:
29         return EPERM;
30     case NBD_EIO:
31         return EIO;
32     case NBD_ENOMEM:
33         return ENOMEM;
34     case NBD_ENOSPC:
35         return ENOSPC;
36     case NBD_EINVAL:
37     default:
38         return EINVAL;
39     }
40 }
41 
42 /* Definitions for opaque data types */
43 
44 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
45 
46 /* That's all folks */
47 
48 /* Basic flow for negotiation
49 
50    Server         Client
51    Negotiate
52 
53    or
54 
55    Server         Client
56    Negotiate #1
57                   Option
58    Negotiate #2
59 
60    ----
61 
62    followed by
63 
64    Server         Client
65                   Request
66    Response
67                   Request
68    Response
69                   ...
70    ...
71                   Request (type == 2)
72 
73 */
74 
75 
76 /* If type represents success, return 1 without further action.
77  * If type represents an error reply, consume the rest of the packet on ioc.
78  * Then return 0 for unsupported (so the client can fall back to
79  * other approaches), or -1 with errp set for other errors.
80  */
81 static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type,
82                                 Error **errp)
83 {
84     uint32_t len;
85     char *msg = NULL;
86     int result = -1;
87 
88     if (!(type & (1 << 31))) {
89         return 1;
90     }
91 
92     if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
93         error_setg(errp, "failed to read option length");
94         return -1;
95     }
96     len = be32_to_cpu(len);
97     if (len) {
98         if (len > NBD_MAX_BUFFER_SIZE) {
99             error_setg(errp, "server's error message is too long");
100             goto cleanup;
101         }
102         msg = g_malloc(len + 1);
103         if (read_sync(ioc, msg, len) != len) {
104             error_setg(errp, "failed to read option error message");
105             goto cleanup;
106         }
107         msg[len] = '\0';
108     }
109 
110     switch (type) {
111     case NBD_REP_ERR_UNSUP:
112         TRACE("server doesn't understand request %d, attempting fallback",
113               opt);
114         result = 0;
115         goto cleanup;
116 
117     case NBD_REP_ERR_POLICY:
118         error_setg(errp, "Denied by server for option %x", opt);
119         break;
120 
121     case NBD_REP_ERR_INVALID:
122         error_setg(errp, "Invalid data length for option %x", opt);
123         break;
124 
125     case NBD_REP_ERR_TLS_REQD:
126         error_setg(errp, "TLS negotiation required before option %x", opt);
127         break;
128 
129     default:
130         error_setg(errp, "Unknown error code when asking for option %x", opt);
131         break;
132     }
133 
134     if (msg) {
135         error_append_hint(errp, "%s\n", msg);
136     }
137 
138  cleanup:
139     g_free(msg);
140     return result;
141 }
142 
143 static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
144 {
145     uint64_t magic;
146     uint32_t opt;
147     uint32_t type;
148     uint32_t len;
149     uint32_t namelen;
150     int error;
151 
152     *name = NULL;
153     if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
154         error_setg(errp, "failed to read list option magic");
155         return -1;
156     }
157     magic = be64_to_cpu(magic);
158     if (magic != NBD_REP_MAGIC) {
159         error_setg(errp, "Unexpected option list magic");
160         return -1;
161     }
162     if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
163         error_setg(errp, "failed to read list option");
164         return -1;
165     }
166     opt = be32_to_cpu(opt);
167     if (opt != NBD_OPT_LIST) {
168         error_setg(errp, "Unexpected option type %x expected %x",
169                    opt, NBD_OPT_LIST);
170         return -1;
171     }
172 
173     if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
174         error_setg(errp, "failed to read list option type");
175         return -1;
176     }
177     type = be32_to_cpu(type);
178     error = nbd_handle_reply_err(ioc, opt, type, errp);
179     if (error <= 0) {
180         return error;
181     }
182 
183     if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
184         error_setg(errp, "failed to read option length");
185         return -1;
186     }
187     len = be32_to_cpu(len);
188 
189     if (type == NBD_REP_ACK) {
190         if (len != 0) {
191             error_setg(errp, "length too long for option end");
192             return -1;
193         }
194     } else if (type == NBD_REP_SERVER) {
195         if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
196             error_setg(errp, "failed to read option name length");
197             return -1;
198         }
199         namelen = be32_to_cpu(namelen);
200         if (len != (namelen + sizeof(namelen))) {
201             error_setg(errp, "incorrect option mame length");
202             return -1;
203         }
204         if (namelen > 255) {
205             error_setg(errp, "export name length too long %d", namelen);
206             return -1;
207         }
208 
209         *name = g_new0(char, namelen + 1);
210         if (read_sync(ioc, *name, namelen) != namelen) {
211             error_setg(errp, "failed to read export name");
212             g_free(*name);
213             *name = NULL;
214             return -1;
215         }
216         (*name)[namelen] = '\0';
217     } else {
218         error_setg(errp, "Unexpected reply type %x expected %x",
219                    type, NBD_REP_SERVER);
220         return -1;
221     }
222     return 1;
223 }
224 
225 
226 static int nbd_receive_query_exports(QIOChannel *ioc,
227                                      const char *wantname,
228                                      Error **errp)
229 {
230     uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
231     uint32_t opt = cpu_to_be32(NBD_OPT_LIST);
232     uint32_t length = 0;
233     bool foundExport = false;
234 
235     TRACE("Querying export list");
236     if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
237         error_setg(errp, "Failed to send list option magic");
238         return -1;
239     }
240 
241     if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
242         error_setg(errp, "Failed to send list option number");
243         return -1;
244     }
245 
246     if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
247         error_setg(errp, "Failed to send list option length");
248         return -1;
249     }
250 
251     TRACE("Reading available export names");
252     while (1) {
253         char *name = NULL;
254         int ret = nbd_receive_list(ioc, &name, errp);
255 
256         if (ret < 0) {
257             g_free(name);
258             name = NULL;
259             return -1;
260         }
261         if (ret == 0) {
262             /* Server doesn't support export listing, so
263              * we will just assume an export with our
264              * wanted name exists */
265             foundExport = true;
266             break;
267         }
268         if (name == NULL) {
269             TRACE("End of export name list");
270             break;
271         }
272         if (g_str_equal(name, wantname)) {
273             foundExport = true;
274             TRACE("Found desired export name '%s'", name);
275         } else {
276             TRACE("Ignored export name '%s'", name);
277         }
278         g_free(name);
279     }
280 
281     if (!foundExport) {
282         error_setg(errp, "No export with name '%s' available", wantname);
283         return -1;
284     }
285 
286     return 0;
287 }
288 
289 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
290                                         QCryptoTLSCreds *tlscreds,
291                                         const char *hostname, Error **errp)
292 {
293     uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
294     uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS);
295     uint32_t length = 0;
296     uint32_t type;
297     QIOChannelTLS *tioc;
298     struct NBDTLSHandshakeData data = { 0 };
299 
300     TRACE("Requesting TLS from server");
301     if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
302         error_setg(errp, "Failed to send option magic");
303         return NULL;
304     }
305 
306     if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
307         error_setg(errp, "Failed to send option number");
308         return NULL;
309     }
310 
311     if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
312         error_setg(errp, "Failed to send option length");
313         return NULL;
314     }
315 
316     TRACE("Getting TLS reply from server1");
317     if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
318         error_setg(errp, "failed to read option magic");
319         return NULL;
320     }
321     magic = be64_to_cpu(magic);
322     if (magic != NBD_REP_MAGIC) {
323         error_setg(errp, "Unexpected option magic");
324         return NULL;
325     }
326     TRACE("Getting TLS reply from server2");
327     if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
328         error_setg(errp, "failed to read option");
329         return NULL;
330     }
331     opt = be32_to_cpu(opt);
332     if (opt != NBD_OPT_STARTTLS) {
333         error_setg(errp, "Unexpected option type %x expected %x",
334                    opt, NBD_OPT_STARTTLS);
335         return NULL;
336     }
337 
338     TRACE("Getting TLS reply from server");
339     if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
340         error_setg(errp, "failed to read option type");
341         return NULL;
342     }
343     type = be32_to_cpu(type);
344     if (type != NBD_REP_ACK) {
345         error_setg(errp, "Server rejected request to start TLS %x",
346                    type);
347         return NULL;
348     }
349 
350     TRACE("Getting TLS reply from server");
351     if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
352         error_setg(errp, "failed to read option length");
353         return NULL;
354     }
355     length = be32_to_cpu(length);
356     if (length != 0) {
357         error_setg(errp, "Start TLS reponse was not zero %x",
358                    length);
359         return NULL;
360     }
361 
362     TRACE("TLS request approved, setting up TLS");
363     tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
364     if (!tioc) {
365         return NULL;
366     }
367     data.loop = g_main_loop_new(g_main_context_default(), FALSE);
368     TRACE("Starting TLS hanshake");
369     qio_channel_tls_handshake(tioc,
370                               nbd_tls_handshake,
371                               &data,
372                               NULL);
373 
374     if (!data.complete) {
375         g_main_loop_run(data.loop);
376     }
377     g_main_loop_unref(data.loop);
378     if (data.error) {
379         error_propagate(errp, data.error);
380         object_unref(OBJECT(tioc));
381         return NULL;
382     }
383 
384     return QIO_CHANNEL(tioc);
385 }
386 
387 
388 int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
389                           QCryptoTLSCreds *tlscreds, const char *hostname,
390                           QIOChannel **outioc,
391                           off_t *size, Error **errp)
392 {
393     char buf[256];
394     uint64_t magic, s;
395     int rc;
396 
397     TRACE("Receiving negotiation tlscreds=%p hostname=%s.",
398           tlscreds, hostname ? hostname : "<null>");
399 
400     rc = -EINVAL;
401 
402     if (outioc) {
403         *outioc = NULL;
404     }
405     if (tlscreds && !outioc) {
406         error_setg(errp, "Output I/O channel required for TLS");
407         goto fail;
408     }
409 
410     if (read_sync(ioc, buf, 8) != 8) {
411         error_setg(errp, "Failed to read data");
412         goto fail;
413     }
414 
415     buf[8] = '\0';
416     if (strlen(buf) == 0) {
417         error_setg(errp, "Server connection closed unexpectedly");
418         goto fail;
419     }
420 
421     TRACE("Magic is %c%c%c%c%c%c%c%c",
422           qemu_isprint(buf[0]) ? buf[0] : '.',
423           qemu_isprint(buf[1]) ? buf[1] : '.',
424           qemu_isprint(buf[2]) ? buf[2] : '.',
425           qemu_isprint(buf[3]) ? buf[3] : '.',
426           qemu_isprint(buf[4]) ? buf[4] : '.',
427           qemu_isprint(buf[5]) ? buf[5] : '.',
428           qemu_isprint(buf[6]) ? buf[6] : '.',
429           qemu_isprint(buf[7]) ? buf[7] : '.');
430 
431     if (memcmp(buf, "NBDMAGIC", 8) != 0) {
432         error_setg(errp, "Invalid magic received");
433         goto fail;
434     }
435 
436     if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
437         error_setg(errp, "Failed to read magic");
438         goto fail;
439     }
440     magic = be64_to_cpu(magic);
441     TRACE("Magic is 0x%" PRIx64, magic);
442 
443     if (magic == NBD_OPTS_MAGIC) {
444         uint32_t clientflags = 0;
445         uint32_t opt;
446         uint32_t namesize;
447         uint16_t globalflags;
448         uint16_t exportflags;
449         bool fixedNewStyle = false;
450 
451         if (read_sync(ioc, &globalflags, sizeof(globalflags)) !=
452             sizeof(globalflags)) {
453             error_setg(errp, "Failed to read server flags");
454             goto fail;
455         }
456         globalflags = be16_to_cpu(globalflags);
457         *flags = globalflags << 16;
458         TRACE("Global flags are %x", globalflags);
459         if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
460             fixedNewStyle = true;
461             TRACE("Server supports fixed new style");
462             clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
463         }
464         /* client requested flags */
465         clientflags = cpu_to_be32(clientflags);
466         if (write_sync(ioc, &clientflags, sizeof(clientflags)) !=
467             sizeof(clientflags)) {
468             error_setg(errp, "Failed to send clientflags field");
469             goto fail;
470         }
471         if (tlscreds) {
472             if (fixedNewStyle) {
473                 *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
474                 if (!*outioc) {
475                     goto fail;
476                 }
477                 ioc = *outioc;
478             } else {
479                 error_setg(errp, "Server does not support STARTTLS");
480                 goto fail;
481             }
482         }
483         if (!name) {
484             TRACE("Using default NBD export name \"\"");
485             name = "";
486         }
487         if (fixedNewStyle) {
488             /* Check our desired export is present in the
489              * server export list. Since NBD_OPT_EXPORT_NAME
490              * cannot return an error message, running this
491              * query gives us good error reporting if the
492              * server required TLS
493              */
494             if (nbd_receive_query_exports(ioc, name, errp) < 0) {
495                 goto fail;
496             }
497         }
498         /* write the export name */
499         magic = cpu_to_be64(magic);
500         if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
501             error_setg(errp, "Failed to send export name magic");
502             goto fail;
503         }
504         opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
505         if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
506             error_setg(errp, "Failed to send export name option number");
507             goto fail;
508         }
509         namesize = cpu_to_be32(strlen(name));
510         if (write_sync(ioc, &namesize, sizeof(namesize)) !=
511             sizeof(namesize)) {
512             error_setg(errp, "Failed to send export name length");
513             goto fail;
514         }
515         if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) {
516             error_setg(errp, "Failed to send export name");
517             goto fail;
518         }
519 
520         if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) {
521             error_setg(errp, "Failed to read export length");
522             goto fail;
523         }
524         *size = be64_to_cpu(s);
525         TRACE("Size is %" PRIu64, *size);
526 
527         if (read_sync(ioc, &exportflags, sizeof(exportflags)) !=
528             sizeof(exportflags)) {
529             error_setg(errp, "Failed to read export flags");
530             goto fail;
531         }
532         exportflags = be16_to_cpu(exportflags);
533         *flags |= exportflags;
534         TRACE("Export flags are %x", exportflags);
535     } else if (magic == NBD_CLIENT_MAGIC) {
536         if (name) {
537             error_setg(errp, "Server does not support export names");
538             goto fail;
539         }
540         if (tlscreds) {
541             error_setg(errp, "Server does not support STARTTLS");
542             goto fail;
543         }
544 
545         if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) {
546             error_setg(errp, "Failed to read export length");
547             goto fail;
548         }
549         *size = be64_to_cpu(s);
550         TRACE("Size is %" PRIu64, *size);
551 
552         if (read_sync(ioc, flags, sizeof(*flags)) != sizeof(*flags)) {
553             error_setg(errp, "Failed to read export flags");
554             goto fail;
555         }
556         *flags = be32_to_cpup(flags);
557     } else {
558         error_setg(errp, "Bad magic received");
559         goto fail;
560     }
561 
562     if (read_sync(ioc, &buf, 124) != 124) {
563         error_setg(errp, "Failed to read reserved block");
564         goto fail;
565     }
566     rc = 0;
567 
568 fail:
569     return rc;
570 }
571 
572 #ifdef __linux__
573 int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size)
574 {
575     TRACE("Setting NBD socket");
576 
577     if (ioctl(fd, NBD_SET_SOCK, sioc->fd) < 0) {
578         int serrno = errno;
579         LOG("Failed to set NBD socket");
580         return -serrno;
581     }
582 
583     TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE);
584 
585     if (ioctl(fd, NBD_SET_BLKSIZE, (size_t)BDRV_SECTOR_SIZE) < 0) {
586         int serrno = errno;
587         LOG("Failed setting NBD block size");
588         return -serrno;
589     }
590 
591     TRACE("Setting size to %zd block(s)", (size_t)(size / BDRV_SECTOR_SIZE));
592 
593     if (ioctl(fd, NBD_SET_SIZE_BLOCKS, (size_t)(size / BDRV_SECTOR_SIZE)) < 0) {
594         int serrno = errno;
595         LOG("Failed setting size (in blocks)");
596         return -serrno;
597     }
598 
599     if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) {
600         if (errno == ENOTTY) {
601             int read_only = (flags & NBD_FLAG_READ_ONLY) != 0;
602             TRACE("Setting readonly attribute");
603 
604             if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
605                 int serrno = errno;
606                 LOG("Failed setting read-only attribute");
607                 return -serrno;
608             }
609         } else {
610             int serrno = errno;
611             LOG("Failed setting flags");
612             return -serrno;
613         }
614     }
615 
616     TRACE("Negotiation ended");
617 
618     return 0;
619 }
620 
621 int nbd_client(int fd)
622 {
623     int ret;
624     int serrno;
625 
626     TRACE("Doing NBD loop");
627 
628     ret = ioctl(fd, NBD_DO_IT);
629     if (ret < 0 && errno == EPIPE) {
630         /* NBD_DO_IT normally returns EPIPE when someone has disconnected
631          * the socket via NBD_DISCONNECT.  We do not want to return 1 in
632          * that case.
633          */
634         ret = 0;
635     }
636     serrno = errno;
637 
638     TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
639 
640     TRACE("Clearing NBD queue");
641     ioctl(fd, NBD_CLEAR_QUE);
642 
643     TRACE("Clearing NBD socket");
644     ioctl(fd, NBD_CLEAR_SOCK);
645 
646     errno = serrno;
647     return ret;
648 }
649 #else
650 int nbd_init(int fd, QIOChannelSocket *ioc, uint32_t flags, off_t size)
651 {
652     return -ENOTSUP;
653 }
654 
655 int nbd_client(int fd)
656 {
657     return -ENOTSUP;
658 }
659 #endif
660 
661 ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
662 {
663     uint8_t buf[NBD_REQUEST_SIZE];
664     ssize_t ret;
665 
666     TRACE("Sending request to server: "
667           "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
668           request->from, request->len, request->handle, request->type);
669 
670     cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
671     cpu_to_be32w((uint32_t*)(buf + 4), request->type);
672     cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
673     cpu_to_be64w((uint64_t*)(buf + 16), request->from);
674     cpu_to_be32w((uint32_t*)(buf + 24), request->len);
675 
676     ret = write_sync(ioc, buf, sizeof(buf));
677     if (ret < 0) {
678         return ret;
679     }
680 
681     if (ret != sizeof(buf)) {
682         LOG("writing to socket failed");
683         return -EINVAL;
684     }
685     return 0;
686 }
687 
688 ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
689 {
690     uint8_t buf[NBD_REPLY_SIZE];
691     uint32_t magic;
692     ssize_t ret;
693 
694     ret = read_sync(ioc, buf, sizeof(buf));
695     if (ret < 0) {
696         return ret;
697     }
698 
699     if (ret != sizeof(buf)) {
700         LOG("read failed");
701         return -EINVAL;
702     }
703 
704     /* Reply
705        [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
706        [ 4 ..  7]    error   (0 == no error)
707        [ 7 .. 15]    handle
708      */
709 
710     magic = be32_to_cpup((uint32_t*)buf);
711     reply->error  = be32_to_cpup((uint32_t*)(buf + 4));
712     reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
713 
714     reply->error = nbd_errno_to_system_errno(reply->error);
715 
716     TRACE("Got reply: "
717           "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
718           magic, reply->error, reply->handle);
719 
720     if (magic != NBD_REPLY_MAGIC) {
721         LOG("invalid magic (got 0x%x)", magic);
722         return -EINVAL;
723     }
724     return 0;
725 }
726 
727