xref: /qemu/block/blkio.c (revision 10b2393e)
1fd66dbd4SStefan Hajnoczi /* SPDX-License-Identifier: LGPL-2.1-or-later */
2fd66dbd4SStefan Hajnoczi /*
3fd66dbd4SStefan Hajnoczi  * libblkio BlockDriver
4fd66dbd4SStefan Hajnoczi  *
5fd66dbd4SStefan Hajnoczi  * Copyright Red Hat, Inc.
6fd66dbd4SStefan Hajnoczi  *
7fd66dbd4SStefan Hajnoczi  * Author:
8fd66dbd4SStefan Hajnoczi  *   Stefan Hajnoczi <stefanha@redhat.com>
9fd66dbd4SStefan Hajnoczi  */
10fd66dbd4SStefan Hajnoczi 
11fd66dbd4SStefan Hajnoczi #include "qemu/osdep.h"
12fd66dbd4SStefan Hajnoczi #include <blkio.h>
13fd66dbd4SStefan Hajnoczi #include "block/block_int.h"
14c5640b3eSStefan Hajnoczi #include "exec/memory.h"
15c5640b3eSStefan Hajnoczi #include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
16433fcea4SStefan Hajnoczi #include "qemu/defer-call.h"
17fd66dbd4SStefan Hajnoczi #include "qapi/error.h"
18c5640b3eSStefan Hajnoczi #include "qemu/error-report.h"
19fd66dbd4SStefan Hajnoczi #include "qapi/qmp/qdict.h"
20fd66dbd4SStefan Hajnoczi #include "qemu/module.h"
2128ff7b4dSStefan Hajnoczi #include "sysemu/block-backend.h"
22c5640b3eSStefan Hajnoczi #include "exec/memory.h" /* for ram_block_discard_disable() */
23fd66dbd4SStefan Hajnoczi 
244f01a9bbSPeter Krempa #include "block/block-io.h"
254f01a9bbSPeter Krempa 
26fd66dbd4SStefan Hajnoczi /*
27fd66dbd4SStefan Hajnoczi  * Allocated bounce buffers are kept in a list sorted by buffer address.
28fd66dbd4SStefan Hajnoczi  */
29fd66dbd4SStefan Hajnoczi typedef struct BlkioBounceBuf {
30fd66dbd4SStefan Hajnoczi     QLIST_ENTRY(BlkioBounceBuf) next;
31fd66dbd4SStefan Hajnoczi 
32fd66dbd4SStefan Hajnoczi     /* The bounce buffer */
33fd66dbd4SStefan Hajnoczi     struct iovec buf;
34fd66dbd4SStefan Hajnoczi } BlkioBounceBuf;
35fd66dbd4SStefan Hajnoczi 
36fd66dbd4SStefan Hajnoczi typedef struct {
37fd66dbd4SStefan Hajnoczi     /*
38fd66dbd4SStefan Hajnoczi      * libblkio is not thread-safe so this lock protects ->blkio and
39fd66dbd4SStefan Hajnoczi      * ->blkioq.
40fd66dbd4SStefan Hajnoczi      */
41fd66dbd4SStefan Hajnoczi     QemuMutex blkio_lock;
42fd66dbd4SStefan Hajnoczi     struct blkio *blkio;
43fd66dbd4SStefan Hajnoczi     struct blkioq *blkioq; /* make this multi-queue in the future... */
44fd66dbd4SStefan Hajnoczi     int completion_fd;
45fd66dbd4SStefan Hajnoczi 
46fd66dbd4SStefan Hajnoczi     /*
47fd66dbd4SStefan Hajnoczi      * Polling fetches the next completion into this field.
48fd66dbd4SStefan Hajnoczi      *
49fd66dbd4SStefan Hajnoczi      * No lock is necessary since only one thread calls aio_poll() and invokes
50fd66dbd4SStefan Hajnoczi      * fd and poll handlers.
51fd66dbd4SStefan Hajnoczi      */
52fd66dbd4SStefan Hajnoczi     struct blkio_completion poll_completion;
53fd66dbd4SStefan Hajnoczi 
54fd66dbd4SStefan Hajnoczi     /*
55fd66dbd4SStefan Hajnoczi      * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available.
56fd66dbd4SStefan Hajnoczi      *
57fd66dbd4SStefan Hajnoczi      * Lock ordering: ->bounce_lock before ->blkio_lock.
58fd66dbd4SStefan Hajnoczi      */
59fd66dbd4SStefan Hajnoczi     CoMutex bounce_lock;
60fd66dbd4SStefan Hajnoczi 
61fd66dbd4SStefan Hajnoczi     /* Bounce buffer pool */
62fd66dbd4SStefan Hajnoczi     struct blkio_mem_region bounce_pool;
63fd66dbd4SStefan Hajnoczi 
64fd66dbd4SStefan Hajnoczi     /* Sorted list of allocated bounce buffers */
65fd66dbd4SStefan Hajnoczi     QLIST_HEAD(, BlkioBounceBuf) bounce_bufs;
66fd66dbd4SStefan Hajnoczi 
67fd66dbd4SStefan Hajnoczi     /* Queue for coroutines waiting for bounce buffer space */
68fd66dbd4SStefan Hajnoczi     CoQueue bounce_available;
69fd66dbd4SStefan Hajnoczi 
70fd66dbd4SStefan Hajnoczi     /* The value of the "mem-region-alignment" property */
71615eaeabSRichard W.M. Jones     uint64_t mem_region_alignment;
72fd66dbd4SStefan Hajnoczi 
73fd66dbd4SStefan Hajnoczi     /* Can we skip adding/deleting blkio_mem_regions? */
74fd66dbd4SStefan Hajnoczi     bool needs_mem_regions;
75c5640b3eSStefan Hajnoczi 
76c5640b3eSStefan Hajnoczi     /* Are file descriptors necessary for blkio_mem_regions? */
77c5640b3eSStefan Hajnoczi     bool needs_mem_region_fd;
78c5640b3eSStefan Hajnoczi 
79c5640b3eSStefan Hajnoczi     /* Are madvise(MADV_DONTNEED)-style operations unavailable? */
80c5640b3eSStefan Hajnoczi     bool may_pin_mem_regions;
81fd66dbd4SStefan Hajnoczi } BDRVBlkioState;
82fd66dbd4SStefan Hajnoczi 
83fd66dbd4SStefan Hajnoczi /* Called with s->bounce_lock held */
blkio_resize_bounce_pool(BDRVBlkioState * s,int64_t bytes)84fd66dbd4SStefan Hajnoczi static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes)
85fd66dbd4SStefan Hajnoczi {
86fd66dbd4SStefan Hajnoczi     /* There can be no allocated bounce buffers during resize */
87fd66dbd4SStefan Hajnoczi     assert(QLIST_EMPTY(&s->bounce_bufs));
88fd66dbd4SStefan Hajnoczi 
89fd66dbd4SStefan Hajnoczi     /* Pad size to reduce frequency of resize calls */
90fd66dbd4SStefan Hajnoczi     bytes += 128 * 1024;
91fd66dbd4SStefan Hajnoczi 
92*10b2393eSKevin Wolf     /* Align the pool size to avoid blkio_alloc_mem_region() failure */
93*10b2393eSKevin Wolf     bytes = QEMU_ALIGN_UP(bytes, s->mem_region_alignment);
94*10b2393eSKevin Wolf 
95fd66dbd4SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
96fd66dbd4SStefan Hajnoczi         int ret;
97fd66dbd4SStefan Hajnoczi 
98fd66dbd4SStefan Hajnoczi         if (s->bounce_pool.addr) {
99fd66dbd4SStefan Hajnoczi             blkio_unmap_mem_region(s->blkio, &s->bounce_pool);
100fd66dbd4SStefan Hajnoczi             blkio_free_mem_region(s->blkio, &s->bounce_pool);
101fd66dbd4SStefan Hajnoczi             memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
102fd66dbd4SStefan Hajnoczi         }
103fd66dbd4SStefan Hajnoczi 
104fd66dbd4SStefan Hajnoczi         /* Automatically freed when s->blkio is destroyed */
105fd66dbd4SStefan Hajnoczi         ret = blkio_alloc_mem_region(s->blkio, &s->bounce_pool, bytes);
106fd66dbd4SStefan Hajnoczi         if (ret < 0) {
107fd66dbd4SStefan Hajnoczi             return ret;
108fd66dbd4SStefan Hajnoczi         }
109fd66dbd4SStefan Hajnoczi 
110fd66dbd4SStefan Hajnoczi         ret = blkio_map_mem_region(s->blkio, &s->bounce_pool);
111fd66dbd4SStefan Hajnoczi         if (ret < 0) {
112fd66dbd4SStefan Hajnoczi             blkio_free_mem_region(s->blkio, &s->bounce_pool);
113fd66dbd4SStefan Hajnoczi             memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
114fd66dbd4SStefan Hajnoczi             return ret;
115fd66dbd4SStefan Hajnoczi         }
116fd66dbd4SStefan Hajnoczi     }
117fd66dbd4SStefan Hajnoczi 
118fd66dbd4SStefan Hajnoczi     return 0;
119fd66dbd4SStefan Hajnoczi }
120fd66dbd4SStefan Hajnoczi 
121fd66dbd4SStefan Hajnoczi /* Called with s->bounce_lock held */
122fd66dbd4SStefan Hajnoczi static bool
blkio_do_alloc_bounce_buffer(BDRVBlkioState * s,BlkioBounceBuf * bounce,int64_t bytes)123fd66dbd4SStefan Hajnoczi blkio_do_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
124fd66dbd4SStefan Hajnoczi                              int64_t bytes)
125fd66dbd4SStefan Hajnoczi {
126fd66dbd4SStefan Hajnoczi     void *addr = s->bounce_pool.addr;
127fd66dbd4SStefan Hajnoczi     BlkioBounceBuf *cur = NULL;
128fd66dbd4SStefan Hajnoczi     BlkioBounceBuf *prev = NULL;
129fd66dbd4SStefan Hajnoczi     ptrdiff_t space;
130fd66dbd4SStefan Hajnoczi 
131fd66dbd4SStefan Hajnoczi     /*
132fd66dbd4SStefan Hajnoczi      * This is just a linear search over the holes between requests. An
133fd66dbd4SStefan Hajnoczi      * efficient allocator would be nice.
134fd66dbd4SStefan Hajnoczi      */
135fd66dbd4SStefan Hajnoczi     QLIST_FOREACH(cur, &s->bounce_bufs, next) {
136fd66dbd4SStefan Hajnoczi         space = cur->buf.iov_base - addr;
137fd66dbd4SStefan Hajnoczi         if (bytes <= space) {
138fd66dbd4SStefan Hajnoczi             QLIST_INSERT_BEFORE(cur, bounce, next);
139fd66dbd4SStefan Hajnoczi             bounce->buf.iov_base = addr;
140fd66dbd4SStefan Hajnoczi             bounce->buf.iov_len = bytes;
141fd66dbd4SStefan Hajnoczi             return true;
142fd66dbd4SStefan Hajnoczi         }
143fd66dbd4SStefan Hajnoczi 
144fd66dbd4SStefan Hajnoczi         addr = cur->buf.iov_base + cur->buf.iov_len;
145fd66dbd4SStefan Hajnoczi         prev = cur;
146fd66dbd4SStefan Hajnoczi     }
147fd66dbd4SStefan Hajnoczi 
148fd66dbd4SStefan Hajnoczi     /* Is there space after the last request? */
149fd66dbd4SStefan Hajnoczi     space = s->bounce_pool.addr + s->bounce_pool.len - addr;
150fd66dbd4SStefan Hajnoczi     if (bytes > space) {
151fd66dbd4SStefan Hajnoczi         return false;
152fd66dbd4SStefan Hajnoczi     }
153fd66dbd4SStefan Hajnoczi     if (prev) {
154fd66dbd4SStefan Hajnoczi         QLIST_INSERT_AFTER(prev, bounce, next);
155fd66dbd4SStefan Hajnoczi     } else {
156fd66dbd4SStefan Hajnoczi         QLIST_INSERT_HEAD(&s->bounce_bufs, bounce, next);
157fd66dbd4SStefan Hajnoczi     }
158fd66dbd4SStefan Hajnoczi     bounce->buf.iov_base = addr;
159fd66dbd4SStefan Hajnoczi     bounce->buf.iov_len = bytes;
160fd66dbd4SStefan Hajnoczi     return true;
161fd66dbd4SStefan Hajnoczi }
162fd66dbd4SStefan Hajnoczi 
163fd66dbd4SStefan Hajnoczi static int coroutine_fn
blkio_alloc_bounce_buffer(BDRVBlkioState * s,BlkioBounceBuf * bounce,int64_t bytes)164fd66dbd4SStefan Hajnoczi blkio_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
165fd66dbd4SStefan Hajnoczi                           int64_t bytes)
166fd66dbd4SStefan Hajnoczi {
167fd66dbd4SStefan Hajnoczi     /*
168fd66dbd4SStefan Hajnoczi      * Ensure fairness: first time around we join the back of the queue,
169fd66dbd4SStefan Hajnoczi      * subsequently we join the front so we don't lose our place.
170fd66dbd4SStefan Hajnoczi      */
171fd66dbd4SStefan Hajnoczi     CoQueueWaitFlags wait_flags = 0;
172fd66dbd4SStefan Hajnoczi 
173fd66dbd4SStefan Hajnoczi     QEMU_LOCK_GUARD(&s->bounce_lock);
174fd66dbd4SStefan Hajnoczi 
175fd66dbd4SStefan Hajnoczi     /* Ensure fairness: don't even try if other requests are already waiting */
176fd66dbd4SStefan Hajnoczi     if (!qemu_co_queue_empty(&s->bounce_available)) {
177fd66dbd4SStefan Hajnoczi         qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
178fd66dbd4SStefan Hajnoczi                                  wait_flags);
179fd66dbd4SStefan Hajnoczi         wait_flags = CO_QUEUE_WAIT_FRONT;
180fd66dbd4SStefan Hajnoczi     }
181fd66dbd4SStefan Hajnoczi 
182fd66dbd4SStefan Hajnoczi     while (true) {
183fd66dbd4SStefan Hajnoczi         if (blkio_do_alloc_bounce_buffer(s, bounce, bytes)) {
184fd66dbd4SStefan Hajnoczi             /* Kick the next queued request since there may be space */
185fd66dbd4SStefan Hajnoczi             qemu_co_queue_next(&s->bounce_available);
186fd66dbd4SStefan Hajnoczi             return 0;
187fd66dbd4SStefan Hajnoczi         }
188fd66dbd4SStefan Hajnoczi 
189fd66dbd4SStefan Hajnoczi         /*
190fd66dbd4SStefan Hajnoczi          * If there are no in-flight requests then the pool was simply too
191fd66dbd4SStefan Hajnoczi          * small.
192fd66dbd4SStefan Hajnoczi          */
193fd66dbd4SStefan Hajnoczi         if (QLIST_EMPTY(&s->bounce_bufs)) {
194fd66dbd4SStefan Hajnoczi             bool ok;
195fd66dbd4SStefan Hajnoczi             int ret;
196fd66dbd4SStefan Hajnoczi 
197fd66dbd4SStefan Hajnoczi             ret = blkio_resize_bounce_pool(s, bytes);
198fd66dbd4SStefan Hajnoczi             if (ret < 0) {
199fd66dbd4SStefan Hajnoczi                 /* Kick the next queued request since that may fail too */
200fd66dbd4SStefan Hajnoczi                 qemu_co_queue_next(&s->bounce_available);
201fd66dbd4SStefan Hajnoczi                 return ret;
202fd66dbd4SStefan Hajnoczi             }
203fd66dbd4SStefan Hajnoczi 
204fd66dbd4SStefan Hajnoczi             ok = blkio_do_alloc_bounce_buffer(s, bounce, bytes);
205fd66dbd4SStefan Hajnoczi             assert(ok); /* must have space this time */
206fd66dbd4SStefan Hajnoczi             return 0;
207fd66dbd4SStefan Hajnoczi         }
208fd66dbd4SStefan Hajnoczi 
209fd66dbd4SStefan Hajnoczi         qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
210fd66dbd4SStefan Hajnoczi                                  wait_flags);
211fd66dbd4SStefan Hajnoczi         wait_flags = CO_QUEUE_WAIT_FRONT;
212fd66dbd4SStefan Hajnoczi     }
213fd66dbd4SStefan Hajnoczi }
214fd66dbd4SStefan Hajnoczi 
blkio_free_bounce_buffer(BDRVBlkioState * s,BlkioBounceBuf * bounce)215fd66dbd4SStefan Hajnoczi static void coroutine_fn blkio_free_bounce_buffer(BDRVBlkioState *s,
216fd66dbd4SStefan Hajnoczi                                                   BlkioBounceBuf *bounce)
217fd66dbd4SStefan Hajnoczi {
218fd66dbd4SStefan Hajnoczi     QEMU_LOCK_GUARD(&s->bounce_lock);
219fd66dbd4SStefan Hajnoczi 
220fd66dbd4SStefan Hajnoczi     QLIST_REMOVE(bounce, next);
221fd66dbd4SStefan Hajnoczi 
222fd66dbd4SStefan Hajnoczi     /* Wake up waiting coroutines since space may now be available */
223fd66dbd4SStefan Hajnoczi     qemu_co_queue_next(&s->bounce_available);
224fd66dbd4SStefan Hajnoczi }
225fd66dbd4SStefan Hajnoczi 
226fd66dbd4SStefan Hajnoczi /* For async to .bdrv_co_*() conversion */
227fd66dbd4SStefan Hajnoczi typedef struct {
228fd66dbd4SStefan Hajnoczi     Coroutine *coroutine;
229fd66dbd4SStefan Hajnoczi     int ret;
230fd66dbd4SStefan Hajnoczi } BlkioCoData;
231fd66dbd4SStefan Hajnoczi 
blkio_completion_fd_read(void * opaque)232fd66dbd4SStefan Hajnoczi static void blkio_completion_fd_read(void *opaque)
233fd66dbd4SStefan Hajnoczi {
234fd66dbd4SStefan Hajnoczi     BlockDriverState *bs = opaque;
235fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
236fd66dbd4SStefan Hajnoczi     uint64_t val;
237fd66dbd4SStefan Hajnoczi     int ret;
238fd66dbd4SStefan Hajnoczi 
239fd66dbd4SStefan Hajnoczi     /* Polling may have already fetched a completion */
240fd66dbd4SStefan Hajnoczi     if (s->poll_completion.user_data != NULL) {
241fd66dbd4SStefan Hajnoczi         BlkioCoData *cod = s->poll_completion.user_data;
242fd66dbd4SStefan Hajnoczi         cod->ret = s->poll_completion.ret;
243fd66dbd4SStefan Hajnoczi 
244fd66dbd4SStefan Hajnoczi         /* Clear it in case aio_co_wake() enters a nested event loop */
245fd66dbd4SStefan Hajnoczi         s->poll_completion.user_data = NULL;
246fd66dbd4SStefan Hajnoczi 
247fd66dbd4SStefan Hajnoczi         aio_co_wake(cod->coroutine);
248fd66dbd4SStefan Hajnoczi     }
249fd66dbd4SStefan Hajnoczi 
250fd66dbd4SStefan Hajnoczi     /* Reset completion fd status */
251fd66dbd4SStefan Hajnoczi     ret = read(s->completion_fd, &val, sizeof(val));
252fd66dbd4SStefan Hajnoczi 
253fd66dbd4SStefan Hajnoczi     /* Ignore errors, there's nothing we can do */
254fd66dbd4SStefan Hajnoczi     (void)ret;
255fd66dbd4SStefan Hajnoczi 
256fd66dbd4SStefan Hajnoczi     /*
257fd66dbd4SStefan Hajnoczi      * Reading one completion at a time makes nested event loop re-entrancy
258fd66dbd4SStefan Hajnoczi      * simple. Change this loop to get multiple completions in one go if it
259fd66dbd4SStefan Hajnoczi      * becomes a performance bottleneck.
260fd66dbd4SStefan Hajnoczi      */
261fd66dbd4SStefan Hajnoczi     while (true) {
262fd66dbd4SStefan Hajnoczi         struct blkio_completion completion;
263fd66dbd4SStefan Hajnoczi 
264fd66dbd4SStefan Hajnoczi         WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
265fd66dbd4SStefan Hajnoczi             ret = blkioq_do_io(s->blkioq, &completion, 0, 1, NULL);
266fd66dbd4SStefan Hajnoczi         }
267fd66dbd4SStefan Hajnoczi         if (ret != 1) {
268fd66dbd4SStefan Hajnoczi             break;
269fd66dbd4SStefan Hajnoczi         }
270fd66dbd4SStefan Hajnoczi 
271fd66dbd4SStefan Hajnoczi         BlkioCoData *cod = completion.user_data;
272fd66dbd4SStefan Hajnoczi         cod->ret = completion.ret;
273fd66dbd4SStefan Hajnoczi         aio_co_wake(cod->coroutine);
274fd66dbd4SStefan Hajnoczi     }
275fd66dbd4SStefan Hajnoczi }
276fd66dbd4SStefan Hajnoczi 
blkio_completion_fd_poll(void * opaque)277fd66dbd4SStefan Hajnoczi static bool blkio_completion_fd_poll(void *opaque)
278fd66dbd4SStefan Hajnoczi {
279fd66dbd4SStefan Hajnoczi     BlockDriverState *bs = opaque;
280fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
281fd66dbd4SStefan Hajnoczi     int ret;
282fd66dbd4SStefan Hajnoczi 
283fd66dbd4SStefan Hajnoczi     /* Just in case we already fetched a completion */
284fd66dbd4SStefan Hajnoczi     if (s->poll_completion.user_data != NULL) {
285fd66dbd4SStefan Hajnoczi         return true;
286fd66dbd4SStefan Hajnoczi     }
287fd66dbd4SStefan Hajnoczi 
288fd66dbd4SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
289fd66dbd4SStefan Hajnoczi         ret = blkioq_do_io(s->blkioq, &s->poll_completion, 0, 1, NULL);
290fd66dbd4SStefan Hajnoczi     }
291fd66dbd4SStefan Hajnoczi     return ret == 1;
292fd66dbd4SStefan Hajnoczi }
293fd66dbd4SStefan Hajnoczi 
blkio_completion_fd_poll_ready(void * opaque)294fd66dbd4SStefan Hajnoczi static void blkio_completion_fd_poll_ready(void *opaque)
295fd66dbd4SStefan Hajnoczi {
296fd66dbd4SStefan Hajnoczi     blkio_completion_fd_read(opaque);
297fd66dbd4SStefan Hajnoczi }
298fd66dbd4SStefan Hajnoczi 
blkio_attach_aio_context(BlockDriverState * bs,AioContext * new_context)299fd66dbd4SStefan Hajnoczi static void blkio_attach_aio_context(BlockDriverState *bs,
300fd66dbd4SStefan Hajnoczi                                      AioContext *new_context)
301fd66dbd4SStefan Hajnoczi {
302fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
303fd66dbd4SStefan Hajnoczi 
30460f782b6SStefan Hajnoczi     aio_set_fd_handler(new_context, s->completion_fd,
30560f782b6SStefan Hajnoczi                        blkio_completion_fd_read, NULL,
306fd66dbd4SStefan Hajnoczi                        blkio_completion_fd_poll,
30760f782b6SStefan Hajnoczi                        blkio_completion_fd_poll_ready, bs);
308fd66dbd4SStefan Hajnoczi }
309fd66dbd4SStefan Hajnoczi 
blkio_detach_aio_context(BlockDriverState * bs)310fd66dbd4SStefan Hajnoczi static void blkio_detach_aio_context(BlockDriverState *bs)
311fd66dbd4SStefan Hajnoczi {
312fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
313fd66dbd4SStefan Hajnoczi 
31460f782b6SStefan Hajnoczi     aio_set_fd_handler(bdrv_get_aio_context(bs), s->completion_fd, NULL, NULL,
31560f782b6SStefan Hajnoczi                        NULL, NULL, NULL);
316fd66dbd4SStefan Hajnoczi }
317fd66dbd4SStefan Hajnoczi 
31828ff7b4dSStefan Hajnoczi /*
319ccee48aaSStefan Hajnoczi  * Called by defer_call_end() or immediately if not in a deferred section.
320ccee48aaSStefan Hajnoczi  * Called without blkio_lock.
32128ff7b4dSStefan Hajnoczi  */
blkio_deferred_fn(void * opaque)322ccee48aaSStefan Hajnoczi static void blkio_deferred_fn(void *opaque)
323fd66dbd4SStefan Hajnoczi {
32428ff7b4dSStefan Hajnoczi     BDRVBlkioState *s = opaque;
325fd66dbd4SStefan Hajnoczi 
32628ff7b4dSStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
327fd66dbd4SStefan Hajnoczi         blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
328fd66dbd4SStefan Hajnoczi     }
329fd66dbd4SStefan Hajnoczi }
330fd66dbd4SStefan Hajnoczi 
33128ff7b4dSStefan Hajnoczi /*
33228ff7b4dSStefan Hajnoczi  * Schedule I/O submission after enqueuing a new request. Called without
33328ff7b4dSStefan Hajnoczi  * blkio_lock.
33428ff7b4dSStefan Hajnoczi  */
blkio_submit_io(BlockDriverState * bs)33528ff7b4dSStefan Hajnoczi static void blkio_submit_io(BlockDriverState *bs)
33628ff7b4dSStefan Hajnoczi {
33728ff7b4dSStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
33828ff7b4dSStefan Hajnoczi 
339ccee48aaSStefan Hajnoczi     defer_call(blkio_deferred_fn, s);
34028ff7b4dSStefan Hajnoczi }
34128ff7b4dSStefan Hajnoczi 
342fd66dbd4SStefan Hajnoczi static int coroutine_fn
blkio_co_pdiscard(BlockDriverState * bs,int64_t offset,int64_t bytes)343fd66dbd4SStefan Hajnoczi blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
344fd66dbd4SStefan Hajnoczi {
345fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
346fd66dbd4SStefan Hajnoczi     BlkioCoData cod = {
347fd66dbd4SStefan Hajnoczi         .coroutine = qemu_coroutine_self(),
348fd66dbd4SStefan Hajnoczi     };
349fd66dbd4SStefan Hajnoczi 
350fd66dbd4SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
351fd66dbd4SStefan Hajnoczi         blkioq_discard(s->blkioq, offset, bytes, &cod, 0);
352fd66dbd4SStefan Hajnoczi     }
353fd66dbd4SStefan Hajnoczi 
35428ff7b4dSStefan Hajnoczi     blkio_submit_io(bs);
355fd66dbd4SStefan Hajnoczi     qemu_coroutine_yield();
356fd66dbd4SStefan Hajnoczi     return cod.ret;
357fd66dbd4SStefan Hajnoczi }
358fd66dbd4SStefan Hajnoczi 
359fd66dbd4SStefan Hajnoczi static int coroutine_fn
blkio_co_preadv(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)360fd66dbd4SStefan Hajnoczi blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
361fd66dbd4SStefan Hajnoczi                 QEMUIOVector *qiov, BdrvRequestFlags flags)
362fd66dbd4SStefan Hajnoczi {
363fd66dbd4SStefan Hajnoczi     BlkioCoData cod = {
364fd66dbd4SStefan Hajnoczi         .coroutine = qemu_coroutine_self(),
365fd66dbd4SStefan Hajnoczi     };
366fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
367c5640b3eSStefan Hajnoczi     bool use_bounce_buffer =
368c5640b3eSStefan Hajnoczi         s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
369fd66dbd4SStefan Hajnoczi     BlkioBounceBuf bounce;
370fd66dbd4SStefan Hajnoczi     struct iovec *iov = qiov->iov;
371fd66dbd4SStefan Hajnoczi     int iovcnt = qiov->niov;
372fd66dbd4SStefan Hajnoczi 
373fd66dbd4SStefan Hajnoczi     if (use_bounce_buffer) {
374fd66dbd4SStefan Hajnoczi         int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
375fd66dbd4SStefan Hajnoczi         if (ret < 0) {
376fd66dbd4SStefan Hajnoczi             return ret;
377fd66dbd4SStefan Hajnoczi         }
378fd66dbd4SStefan Hajnoczi 
379fd66dbd4SStefan Hajnoczi         iov = &bounce.buf;
380fd66dbd4SStefan Hajnoczi         iovcnt = 1;
381fd66dbd4SStefan Hajnoczi     }
382fd66dbd4SStefan Hajnoczi 
383fd66dbd4SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
384fd66dbd4SStefan Hajnoczi         blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0);
385fd66dbd4SStefan Hajnoczi     }
386fd66dbd4SStefan Hajnoczi 
38728ff7b4dSStefan Hajnoczi     blkio_submit_io(bs);
388fd66dbd4SStefan Hajnoczi     qemu_coroutine_yield();
389fd66dbd4SStefan Hajnoczi 
390fd66dbd4SStefan Hajnoczi     if (use_bounce_buffer) {
391fd66dbd4SStefan Hajnoczi         if (cod.ret == 0) {
392fd66dbd4SStefan Hajnoczi             qemu_iovec_from_buf(qiov, 0,
393fd66dbd4SStefan Hajnoczi                                 bounce.buf.iov_base,
394fd66dbd4SStefan Hajnoczi                                 bounce.buf.iov_len);
395fd66dbd4SStefan Hajnoczi         }
396fd66dbd4SStefan Hajnoczi 
397fd66dbd4SStefan Hajnoczi         blkio_free_bounce_buffer(s, &bounce);
398fd66dbd4SStefan Hajnoczi     }
399fd66dbd4SStefan Hajnoczi 
400fd66dbd4SStefan Hajnoczi     return cod.ret;
401fd66dbd4SStefan Hajnoczi }
402fd66dbd4SStefan Hajnoczi 
blkio_co_pwritev(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)403fd66dbd4SStefan Hajnoczi static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
404fd66dbd4SStefan Hajnoczi         int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
405fd66dbd4SStefan Hajnoczi {
406fd66dbd4SStefan Hajnoczi     uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
407fd66dbd4SStefan Hajnoczi     BlkioCoData cod = {
408fd66dbd4SStefan Hajnoczi         .coroutine = qemu_coroutine_self(),
409fd66dbd4SStefan Hajnoczi     };
410fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
411c5640b3eSStefan Hajnoczi     bool use_bounce_buffer =
412c5640b3eSStefan Hajnoczi         s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
413fd66dbd4SStefan Hajnoczi     BlkioBounceBuf bounce;
414fd66dbd4SStefan Hajnoczi     struct iovec *iov = qiov->iov;
415fd66dbd4SStefan Hajnoczi     int iovcnt = qiov->niov;
416fd66dbd4SStefan Hajnoczi 
417fd66dbd4SStefan Hajnoczi     if (use_bounce_buffer) {
418fd66dbd4SStefan Hajnoczi         int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
419fd66dbd4SStefan Hajnoczi         if (ret < 0) {
420fd66dbd4SStefan Hajnoczi             return ret;
421fd66dbd4SStefan Hajnoczi         }
422fd66dbd4SStefan Hajnoczi 
423fd66dbd4SStefan Hajnoczi         qemu_iovec_to_buf(qiov, 0, bounce.buf.iov_base, bytes);
424fd66dbd4SStefan Hajnoczi         iov = &bounce.buf;
425fd66dbd4SStefan Hajnoczi         iovcnt = 1;
426fd66dbd4SStefan Hajnoczi     }
427fd66dbd4SStefan Hajnoczi 
428fd66dbd4SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
429fd66dbd4SStefan Hajnoczi         blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags);
430fd66dbd4SStefan Hajnoczi     }
431fd66dbd4SStefan Hajnoczi 
43228ff7b4dSStefan Hajnoczi     blkio_submit_io(bs);
433fd66dbd4SStefan Hajnoczi     qemu_coroutine_yield();
434fd66dbd4SStefan Hajnoczi 
435fd66dbd4SStefan Hajnoczi     if (use_bounce_buffer) {
436fd66dbd4SStefan Hajnoczi         blkio_free_bounce_buffer(s, &bounce);
437fd66dbd4SStefan Hajnoczi     }
438fd66dbd4SStefan Hajnoczi 
439fd66dbd4SStefan Hajnoczi     return cod.ret;
440fd66dbd4SStefan Hajnoczi }
441fd66dbd4SStefan Hajnoczi 
blkio_co_flush(BlockDriverState * bs)442fd66dbd4SStefan Hajnoczi static int coroutine_fn blkio_co_flush(BlockDriverState *bs)
443fd66dbd4SStefan Hajnoczi {
444fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
445fd66dbd4SStefan Hajnoczi     BlkioCoData cod = {
446fd66dbd4SStefan Hajnoczi         .coroutine = qemu_coroutine_self(),
447fd66dbd4SStefan Hajnoczi     };
448fd66dbd4SStefan Hajnoczi 
449fd66dbd4SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
450fd66dbd4SStefan Hajnoczi         blkioq_flush(s->blkioq, &cod, 0);
451fd66dbd4SStefan Hajnoczi     }
452fd66dbd4SStefan Hajnoczi 
45328ff7b4dSStefan Hajnoczi     blkio_submit_io(bs);
454fd66dbd4SStefan Hajnoczi     qemu_coroutine_yield();
455fd66dbd4SStefan Hajnoczi     return cod.ret;
456fd66dbd4SStefan Hajnoczi }
457fd66dbd4SStefan Hajnoczi 
blkio_co_pwrite_zeroes(BlockDriverState * bs,int64_t offset,int64_t bytes,BdrvRequestFlags flags)458fd66dbd4SStefan Hajnoczi static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs,
459fd66dbd4SStefan Hajnoczi     int64_t offset, int64_t bytes, BdrvRequestFlags flags)
460fd66dbd4SStefan Hajnoczi {
461fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
462fd66dbd4SStefan Hajnoczi     BlkioCoData cod = {
463fd66dbd4SStefan Hajnoczi         .coroutine = qemu_coroutine_self(),
464fd66dbd4SStefan Hajnoczi     };
465fd66dbd4SStefan Hajnoczi     uint32_t blkio_flags = 0;
466fd66dbd4SStefan Hajnoczi 
467fd66dbd4SStefan Hajnoczi     if (flags & BDRV_REQ_FUA) {
468fd66dbd4SStefan Hajnoczi         blkio_flags |= BLKIO_REQ_FUA;
469fd66dbd4SStefan Hajnoczi     }
470fd66dbd4SStefan Hajnoczi     if (!(flags & BDRV_REQ_MAY_UNMAP)) {
471fd66dbd4SStefan Hajnoczi         blkio_flags |= BLKIO_REQ_NO_UNMAP;
472fd66dbd4SStefan Hajnoczi     }
473fd66dbd4SStefan Hajnoczi     if (flags & BDRV_REQ_NO_FALLBACK) {
474fd66dbd4SStefan Hajnoczi         blkio_flags |= BLKIO_REQ_NO_FALLBACK;
475fd66dbd4SStefan Hajnoczi     }
476fd66dbd4SStefan Hajnoczi 
477fd66dbd4SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
478fd66dbd4SStefan Hajnoczi         blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags);
479fd66dbd4SStefan Hajnoczi     }
480fd66dbd4SStefan Hajnoczi 
48128ff7b4dSStefan Hajnoczi     blkio_submit_io(bs);
482fd66dbd4SStefan Hajnoczi     qemu_coroutine_yield();
483fd66dbd4SStefan Hajnoczi     return cod.ret;
484fd66dbd4SStefan Hajnoczi }
485fd66dbd4SStefan Hajnoczi 
486c5640b3eSStefan Hajnoczi typedef enum {
487c5640b3eSStefan Hajnoczi     BMRR_OK,
488c5640b3eSStefan Hajnoczi     BMRR_SKIP,
489c5640b3eSStefan Hajnoczi     BMRR_FAIL,
490c5640b3eSStefan Hajnoczi } BlkioMemRegionResult;
491c5640b3eSStefan Hajnoczi 
492c5640b3eSStefan Hajnoczi /*
493c5640b3eSStefan Hajnoczi  * Produce a struct blkio_mem_region for a given address and size.
494c5640b3eSStefan Hajnoczi  *
495c5640b3eSStefan Hajnoczi  * This function produces identical results when called multiple times with the
496c5640b3eSStefan Hajnoczi  * same arguments. This property is necessary because blkio_unmap_mem_region()
497c5640b3eSStefan Hajnoczi  * must receive the same struct blkio_mem_region field values that were passed
498c5640b3eSStefan Hajnoczi  * to blkio_map_mem_region().
499c5640b3eSStefan Hajnoczi  */
500c5640b3eSStefan Hajnoczi static BlkioMemRegionResult
blkio_mem_region_from_host(BlockDriverState * bs,void * host,size_t size,struct blkio_mem_region * region,Error ** errp)501c5640b3eSStefan Hajnoczi blkio_mem_region_from_host(BlockDriverState *bs,
502c5640b3eSStefan Hajnoczi                            void *host, size_t size,
503c5640b3eSStefan Hajnoczi                            struct blkio_mem_region *region,
504c5640b3eSStefan Hajnoczi                            Error **errp)
505c5640b3eSStefan Hajnoczi {
506c5640b3eSStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
507c5640b3eSStefan Hajnoczi     int fd = -1;
508c5640b3eSStefan Hajnoczi     ram_addr_t fd_offset = 0;
509c5640b3eSStefan Hajnoczi 
510c5640b3eSStefan Hajnoczi     if (((uintptr_t)host | size) % s->mem_region_alignment) {
511c5640b3eSStefan Hajnoczi         error_setg(errp, "unaligned buf %p with size %zu", host, size);
512c5640b3eSStefan Hajnoczi         return BMRR_FAIL;
513c5640b3eSStefan Hajnoczi     }
514c5640b3eSStefan Hajnoczi 
515c5640b3eSStefan Hajnoczi     /* Attempt to find the fd for the underlying memory */
516c5640b3eSStefan Hajnoczi     if (s->needs_mem_region_fd) {
517c5640b3eSStefan Hajnoczi         RAMBlock *ram_block;
518c5640b3eSStefan Hajnoczi         RAMBlock *end_block;
519c5640b3eSStefan Hajnoczi         ram_addr_t offset;
520c5640b3eSStefan Hajnoczi 
521c5640b3eSStefan Hajnoczi         /*
522c5640b3eSStefan Hajnoczi          * bdrv_register_buf() is called with the BQL held so mr lives at least
523c5640b3eSStefan Hajnoczi          * until this function returns.
524c5640b3eSStefan Hajnoczi          */
525c5640b3eSStefan Hajnoczi         ram_block = qemu_ram_block_from_host(host, false, &fd_offset);
526c5640b3eSStefan Hajnoczi         if (ram_block) {
527c5640b3eSStefan Hajnoczi             fd = qemu_ram_get_fd(ram_block);
528c5640b3eSStefan Hajnoczi         }
529c5640b3eSStefan Hajnoczi         if (fd == -1) {
530c5640b3eSStefan Hajnoczi             /*
531c5640b3eSStefan Hajnoczi              * Ideally every RAMBlock would have an fd. pc-bios and other
532c5640b3eSStefan Hajnoczi              * things don't. Luckily they are usually not I/O buffers and we
533c5640b3eSStefan Hajnoczi              * can just ignore them.
534c5640b3eSStefan Hajnoczi              */
535c5640b3eSStefan Hajnoczi             return BMRR_SKIP;
536c5640b3eSStefan Hajnoczi         }
537c5640b3eSStefan Hajnoczi 
538c5640b3eSStefan Hajnoczi         /* Make sure the fd covers the entire range */
539c5640b3eSStefan Hajnoczi         end_block = qemu_ram_block_from_host(host + size - 1, false, &offset);
540c5640b3eSStefan Hajnoczi         if (ram_block != end_block) {
541c5640b3eSStefan Hajnoczi             error_setg(errp, "registered buffer at %p with size %zu extends "
542c5640b3eSStefan Hajnoczi                        "beyond RAMBlock", host, size);
543c5640b3eSStefan Hajnoczi             return BMRR_FAIL;
544c5640b3eSStefan Hajnoczi         }
545c5640b3eSStefan Hajnoczi     }
546c5640b3eSStefan Hajnoczi 
547c5640b3eSStefan Hajnoczi     *region = (struct blkio_mem_region){
548c5640b3eSStefan Hajnoczi         .addr = host,
549c5640b3eSStefan Hajnoczi         .len = size,
550c5640b3eSStefan Hajnoczi         .fd = fd,
551c5640b3eSStefan Hajnoczi         .fd_offset = fd_offset,
552c5640b3eSStefan Hajnoczi     };
553c5640b3eSStefan Hajnoczi     return BMRR_OK;
554c5640b3eSStefan Hajnoczi }
555c5640b3eSStefan Hajnoczi 
blkio_register_buf(BlockDriverState * bs,void * host,size_t size,Error ** errp)556c5640b3eSStefan Hajnoczi static bool blkio_register_buf(BlockDriverState *bs, void *host, size_t size,
557c5640b3eSStefan Hajnoczi                                Error **errp)
558c5640b3eSStefan Hajnoczi {
559c5640b3eSStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
560c5640b3eSStefan Hajnoczi     struct blkio_mem_region region;
561c5640b3eSStefan Hajnoczi     BlkioMemRegionResult region_result;
562c5640b3eSStefan Hajnoczi     int ret;
563c5640b3eSStefan Hajnoczi 
564c5640b3eSStefan Hajnoczi     /*
565c5640b3eSStefan Hajnoczi      * Mapping memory regions conflicts with RAM discard (virtio-mem) when
566c5640b3eSStefan Hajnoczi      * there is pinning, so only do it when necessary.
567c5640b3eSStefan Hajnoczi      */
568c5640b3eSStefan Hajnoczi     if (!s->needs_mem_regions && s->may_pin_mem_regions) {
569c5640b3eSStefan Hajnoczi         return true;
570c5640b3eSStefan Hajnoczi     }
571c5640b3eSStefan Hajnoczi 
572c5640b3eSStefan Hajnoczi     region_result = blkio_mem_region_from_host(bs, host, size, &region, errp);
573c5640b3eSStefan Hajnoczi     if (region_result == BMRR_SKIP) {
574c5640b3eSStefan Hajnoczi         return true;
575c5640b3eSStefan Hajnoczi     } else if (region_result != BMRR_OK) {
576c5640b3eSStefan Hajnoczi         return false;
577c5640b3eSStefan Hajnoczi     }
578c5640b3eSStefan Hajnoczi 
579c5640b3eSStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
580c5640b3eSStefan Hajnoczi         ret = blkio_map_mem_region(s->blkio, &region);
581c5640b3eSStefan Hajnoczi     }
582c5640b3eSStefan Hajnoczi 
583c5640b3eSStefan Hajnoczi     if (ret < 0) {
584c5640b3eSStefan Hajnoczi         error_setg(errp, "Failed to add blkio mem region %p with size %zu: %s",
585c5640b3eSStefan Hajnoczi                    host, size, blkio_get_error_msg());
586c5640b3eSStefan Hajnoczi         return false;
587c5640b3eSStefan Hajnoczi     }
588c5640b3eSStefan Hajnoczi     return true;
589c5640b3eSStefan Hajnoczi }
590c5640b3eSStefan Hajnoczi 
blkio_unregister_buf(BlockDriverState * bs,void * host,size_t size)591c5640b3eSStefan Hajnoczi static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
592c5640b3eSStefan Hajnoczi {
593c5640b3eSStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
594c5640b3eSStefan Hajnoczi     struct blkio_mem_region region;
595c5640b3eSStefan Hajnoczi 
596c5640b3eSStefan Hajnoczi     /* See blkio_register_buf() */
597c5640b3eSStefan Hajnoczi     if (!s->needs_mem_regions && s->may_pin_mem_regions) {
598c5640b3eSStefan Hajnoczi         return;
599c5640b3eSStefan Hajnoczi     }
600c5640b3eSStefan Hajnoczi 
601c5640b3eSStefan Hajnoczi     if (blkio_mem_region_from_host(bs, host, size, &region, NULL) != BMRR_OK) {
602c5640b3eSStefan Hajnoczi         return;
603c5640b3eSStefan Hajnoczi     }
604c5640b3eSStefan Hajnoczi 
605c5640b3eSStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
606c5640b3eSStefan Hajnoczi         blkio_unmap_mem_region(s->blkio, &region);
607c5640b3eSStefan Hajnoczi     }
608c5640b3eSStefan Hajnoczi }
609c5640b3eSStefan Hajnoczi 
blkio_io_uring_connect(BlockDriverState * bs,QDict * options,int flags,Error ** errp)61069785d66SStefano Garzarella static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
61169785d66SStefano Garzarella                                   int flags, Error **errp)
612fd66dbd4SStefan Hajnoczi {
613fd66dbd4SStefan Hajnoczi     const char *filename = qdict_get_str(options, "filename");
614fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
615fd66dbd4SStefan Hajnoczi     int ret;
616fd66dbd4SStefan Hajnoczi 
617fd66dbd4SStefan Hajnoczi     ret = blkio_set_str(s->blkio, "path", filename);
618fd66dbd4SStefan Hajnoczi     qdict_del(options, "filename");
619fd66dbd4SStefan Hajnoczi     if (ret < 0) {
620fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret, "failed to set path: %s",
621fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
622fd66dbd4SStefan Hajnoczi         return ret;
623fd66dbd4SStefan Hajnoczi     }
624fd66dbd4SStefan Hajnoczi 
625fd66dbd4SStefan Hajnoczi     if (flags & BDRV_O_NOCACHE) {
626fd66dbd4SStefan Hajnoczi         ret = blkio_set_bool(s->blkio, "direct", true);
627fd66dbd4SStefan Hajnoczi         if (ret < 0) {
628fd66dbd4SStefan Hajnoczi             error_setg_errno(errp, -ret, "failed to set direct: %s",
629fd66dbd4SStefan Hajnoczi                              blkio_get_error_msg());
630fd66dbd4SStefan Hajnoczi             return ret;
631fd66dbd4SStefan Hajnoczi         }
632fd66dbd4SStefan Hajnoczi     }
633fd66dbd4SStefan Hajnoczi 
63469785d66SStefano Garzarella     ret = blkio_connect(s->blkio);
63569785d66SStefano Garzarella     if (ret < 0) {
63669785d66SStefano Garzarella         error_setg_errno(errp, -ret, "blkio_connect failed: %s",
63769785d66SStefano Garzarella                          blkio_get_error_msg());
63869785d66SStefano Garzarella         return ret;
63969785d66SStefano Garzarella     }
64069785d66SStefano Garzarella 
641fd66dbd4SStefan Hajnoczi     return 0;
642fd66dbd4SStefan Hajnoczi }
643fd66dbd4SStefan Hajnoczi 
blkio_nvme_io_uring_connect(BlockDriverState * bs,QDict * options,int flags,Error ** errp)64469785d66SStefano Garzarella static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
64569785d66SStefano Garzarella                                        int flags, Error **errp)
646fd66dbd4SStefan Hajnoczi {
6476c32fc0dSAlberto Faria     const char *path = qdict_get_try_str(options, "path");
648fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
649fd66dbd4SStefan Hajnoczi     int ret;
650fd66dbd4SStefan Hajnoczi 
6516c32fc0dSAlberto Faria     if (!path) {
6526c32fc0dSAlberto Faria         error_setg(errp, "missing 'path' option");
6536c32fc0dSAlberto Faria         return -EINVAL;
6546c32fc0dSAlberto Faria     }
6556c32fc0dSAlberto Faria 
6566c32fc0dSAlberto Faria     ret = blkio_set_str(s->blkio, "path", path);
6576c32fc0dSAlberto Faria     qdict_del(options, "path");
658fd66dbd4SStefan Hajnoczi     if (ret < 0) {
659fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret, "failed to set path: %s",
660fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
661fd66dbd4SStefan Hajnoczi         return ret;
662fd66dbd4SStefan Hajnoczi     }
663fd66dbd4SStefan Hajnoczi 
664fd66dbd4SStefan Hajnoczi     if (!(flags & BDRV_O_NOCACHE)) {
665fd66dbd4SStefan Hajnoczi         error_setg(errp, "cache.direct=off is not supported");
666fd66dbd4SStefan Hajnoczi         return -EINVAL;
667fd66dbd4SStefan Hajnoczi     }
668fd66dbd4SStefan Hajnoczi 
66969785d66SStefano Garzarella     ret = blkio_connect(s->blkio);
67069785d66SStefano Garzarella     if (ret < 0) {
67169785d66SStefano Garzarella         error_setg_errno(errp, -ret, "blkio_connect failed: %s",
67269785d66SStefano Garzarella                          blkio_get_error_msg());
67369785d66SStefano Garzarella         return ret;
67469785d66SStefano Garzarella     }
67569785d66SStefano Garzarella 
676fd66dbd4SStefan Hajnoczi     return 0;
677fd66dbd4SStefan Hajnoczi }
678fd66dbd4SStefan Hajnoczi 
blkio_virtio_blk_connect(BlockDriverState * bs,QDict * options,int flags,Error ** errp)67969785d66SStefano Garzarella static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
68069785d66SStefano Garzarella                                     int flags, Error **errp)
681fd66dbd4SStefan Hajnoczi {
682fd66dbd4SStefan Hajnoczi     const char *path = qdict_get_try_str(options, "path");
683fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
684cad2ccc3SStefano Garzarella     bool fd_supported = false;
6850b054b4cSStefano Garzarella     int fd = -1, ret;
686fd66dbd4SStefan Hajnoczi 
687fd66dbd4SStefan Hajnoczi     if (!path) {
688fd66dbd4SStefan Hajnoczi         error_setg(errp, "missing 'path' option");
689fd66dbd4SStefan Hajnoczi         return -EINVAL;
690fd66dbd4SStefan Hajnoczi     }
691fd66dbd4SStefan Hajnoczi 
692cad2ccc3SStefano Garzarella     if (!(flags & BDRV_O_NOCACHE)) {
693cad2ccc3SStefano Garzarella         error_setg(errp, "cache.direct=off is not supported");
694cad2ccc3SStefano Garzarella         return -EINVAL;
695cad2ccc3SStefano Garzarella     }
696cad2ccc3SStefano Garzarella 
6971c38fe69SStefano Garzarella     if (blkio_set_int(s->blkio, "fd", -1) == 0) {
698cad2ccc3SStefano Garzarella         fd_supported = true;
699cad2ccc3SStefano Garzarella     }
700cad2ccc3SStefano Garzarella 
701cad2ccc3SStefano Garzarella     /*
702cad2ccc3SStefano Garzarella      * If the libblkio driver supports fd passing, let's always use qemu_open()
703cad2ccc3SStefano Garzarella      * to open the `path`, so we can handle fd passing from the management
704cad2ccc3SStefano Garzarella      * layer through the "/dev/fdset/N" special path.
705cad2ccc3SStefano Garzarella      */
706cad2ccc3SStefano Garzarella     if (fd_supported) {
707a5942c17SStefano Garzarella         /*
708a5942c17SStefano Garzarella          * `path` can contain the path of a character device
709a5942c17SStefano Garzarella          * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
710a5942c17SStefano Garzarella          *
711a5942c17SStefano Garzarella          * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
712a5942c17SStefano Garzarella          * is not set in the open flags, because the exchange of IOCTL commands
713a5942c17SStefano Garzarella          * for example will fail.
714a5942c17SStefano Garzarella          *
715a5942c17SStefano Garzarella          * In order to open the device read-only, we are using the `read-only`
716a5942c17SStefano Garzarella          * property of the libblkio driver in blkio_file_open().
717a5942c17SStefano Garzarella          */
718723bea27SStefano Garzarella         fd = qemu_open(path, O_RDWR, NULL);
719cad2ccc3SStefano Garzarella         if (fd < 0) {
7209b06d0d0SStefano Garzarella             /*
7219b06d0d0SStefano Garzarella              * qemu_open() can fail if the user specifies a path that is not
7229b06d0d0SStefano Garzarella              * a file or device, for example in the case of Unix Domain Socket
7239b06d0d0SStefano Garzarella              * for the virtio-blk-vhost-user driver. In such cases let's have
7249b06d0d0SStefano Garzarella              * libblkio open the path directly.
7259b06d0d0SStefano Garzarella              */
726723bea27SStefano Garzarella             fd_supported = false;
727723bea27SStefano Garzarella         } else {
728cad2ccc3SStefano Garzarella             ret = blkio_set_int(s->blkio, "fd", fd);
729cad2ccc3SStefano Garzarella             if (ret < 0) {
730723bea27SStefano Garzarella                 fd_supported = false;
731cad2ccc3SStefano Garzarella                 qemu_close(fd);
7320b054b4cSStefano Garzarella                 fd = -1;
733cad2ccc3SStefano Garzarella             }
734723bea27SStefano Garzarella         }
735723bea27SStefano Garzarella     }
736723bea27SStefano Garzarella 
737723bea27SStefano Garzarella     if (!fd_supported) {
738fd66dbd4SStefan Hajnoczi         ret = blkio_set_str(s->blkio, "path", path);
739fd66dbd4SStefan Hajnoczi         if (ret < 0) {
740fd66dbd4SStefan Hajnoczi             error_setg_errno(errp, -ret, "failed to set path: %s",
741fd66dbd4SStefan Hajnoczi                              blkio_get_error_msg());
742fd66dbd4SStefan Hajnoczi             return ret;
743fd66dbd4SStefan Hajnoczi         }
744fd66dbd4SStefan Hajnoczi     }
745cad2ccc3SStefano Garzarella 
74669785d66SStefano Garzarella     ret = blkio_connect(s->blkio);
7470b054b4cSStefano Garzarella     if (ret < 0 && fd >= 0) {
7480b054b4cSStefano Garzarella         /* Failed to give the FD to libblkio, close it */
7490b054b4cSStefano Garzarella         qemu_close(fd);
7500b054b4cSStefano Garzarella         fd = -1;
7510b054b4cSStefano Garzarella     }
7520b054b4cSStefano Garzarella 
753809c319fSStefano Garzarella     /*
7549b06d0d0SStefano Garzarella      * Before https://gitlab.com/libblkio/libblkio/-/merge_requests/208
7559b06d0d0SStefano Garzarella      * (libblkio <= v1.3.0), setting the `fd` property is not enough to check
7569b06d0d0SStefano Garzarella      * whether the driver supports the `fd` property or not. In that case,
7579b06d0d0SStefano Garzarella      * blkio_connect() will fail with -EINVAL.
7589b06d0d0SStefano Garzarella      * So let's try calling blkio_connect() again by directly setting `path`
7599b06d0d0SStefano Garzarella      * to cover this scenario.
760809c319fSStefano Garzarella      */
761809c319fSStefano Garzarella     if (fd_supported && ret == -EINVAL) {
762809c319fSStefano Garzarella         /*
763809c319fSStefano Garzarella          * We need to clear the `fd` property we set previously by setting
764809c319fSStefano Garzarella          * it to -1.
765809c319fSStefano Garzarella          */
766809c319fSStefano Garzarella         ret = blkio_set_int(s->blkio, "fd", -1);
767809c319fSStefano Garzarella         if (ret < 0) {
768809c319fSStefano Garzarella             error_setg_errno(errp, -ret, "failed to set fd: %s",
769809c319fSStefano Garzarella                              blkio_get_error_msg());
770809c319fSStefano Garzarella             return ret;
771809c319fSStefano Garzarella         }
772809c319fSStefano Garzarella 
773809c319fSStefano Garzarella         ret = blkio_set_str(s->blkio, "path", path);
774809c319fSStefano Garzarella         if (ret < 0) {
775809c319fSStefano Garzarella             error_setg_errno(errp, -ret, "failed to set path: %s",
776809c319fSStefano Garzarella                              blkio_get_error_msg());
777809c319fSStefano Garzarella             return ret;
778809c319fSStefano Garzarella         }
779809c319fSStefano Garzarella 
780809c319fSStefano Garzarella         ret = blkio_connect(s->blkio);
781809c319fSStefano Garzarella     }
782809c319fSStefano Garzarella 
78369785d66SStefano Garzarella     if (ret < 0) {
78469785d66SStefano Garzarella         error_setg_errno(errp, -ret, "blkio_connect failed: %s",
78569785d66SStefano Garzarella                          blkio_get_error_msg());
78669785d66SStefano Garzarella         return ret;
78769785d66SStefano Garzarella     }
78869785d66SStefano Garzarella 
789cad2ccc3SStefano Garzarella     qdict_del(options, "path");
790cad2ccc3SStefano Garzarella 
791fd66dbd4SStefan Hajnoczi     return 0;
792fd66dbd4SStefan Hajnoczi }
793fd66dbd4SStefan Hajnoczi 
blkio_file_open(BlockDriverState * bs,QDict * options,int flags,Error ** errp)794fd66dbd4SStefan Hajnoczi static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
795fd66dbd4SStefan Hajnoczi                            Error **errp)
796fd66dbd4SStefan Hajnoczi {
797fd66dbd4SStefan Hajnoczi     const char *blkio_driver = bs->drv->protocol_name;
798fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
799fd66dbd4SStefan Hajnoczi     int ret;
800fd66dbd4SStefan Hajnoczi 
801fd66dbd4SStefan Hajnoczi     ret = blkio_create(blkio_driver, &s->blkio);
802fd66dbd4SStefan Hajnoczi     if (ret < 0) {
803fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret, "blkio_create failed: %s",
804fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
805fd66dbd4SStefan Hajnoczi         return ret;
806fd66dbd4SStefan Hajnoczi     }
807fd66dbd4SStefan Hajnoczi 
808fd66dbd4SStefan Hajnoczi     if (!(flags & BDRV_O_RDWR)) {
809fd66dbd4SStefan Hajnoczi         ret = blkio_set_bool(s->blkio, "read-only", true);
810fd66dbd4SStefan Hajnoczi         if (ret < 0) {
811fd66dbd4SStefan Hajnoczi             error_setg_errno(errp, -ret, "failed to set read-only: %s",
812fd66dbd4SStefan Hajnoczi                              blkio_get_error_msg());
813fd66dbd4SStefan Hajnoczi             blkio_destroy(&s->blkio);
814fd66dbd4SStefan Hajnoczi             return ret;
815fd66dbd4SStefan Hajnoczi         }
816fd66dbd4SStefan Hajnoczi     }
817fd66dbd4SStefan Hajnoczi 
81869785d66SStefano Garzarella     if (strcmp(blkio_driver, "io_uring") == 0) {
81969785d66SStefano Garzarella         ret = blkio_io_uring_connect(bs, options, flags, errp);
82069785d66SStefano Garzarella     } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
82169785d66SStefano Garzarella         ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
82269785d66SStefano Garzarella     } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
82369785d66SStefano Garzarella         ret = blkio_virtio_blk_connect(bs, options, flags, errp);
82469785d66SStefano Garzarella     } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
82569785d66SStefano Garzarella         ret = blkio_virtio_blk_connect(bs, options, flags, errp);
82669785d66SStefano Garzarella     } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
82769785d66SStefano Garzarella         ret = blkio_virtio_blk_connect(bs, options, flags, errp);
82869785d66SStefano Garzarella     } else {
82969785d66SStefano Garzarella         g_assert_not_reached();
83069785d66SStefano Garzarella     }
831fd66dbd4SStefan Hajnoczi     if (ret < 0) {
832fd66dbd4SStefan Hajnoczi         blkio_destroy(&s->blkio);
833fd66dbd4SStefan Hajnoczi         return ret;
834fd66dbd4SStefan Hajnoczi     }
835fd66dbd4SStefan Hajnoczi 
836fd66dbd4SStefan Hajnoczi     ret = blkio_get_bool(s->blkio,
837fd66dbd4SStefan Hajnoczi                          "needs-mem-regions",
838fd66dbd4SStefan Hajnoczi                          &s->needs_mem_regions);
839fd66dbd4SStefan Hajnoczi     if (ret < 0) {
840fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret,
841fd66dbd4SStefan Hajnoczi                          "failed to get needs-mem-regions: %s",
842fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
843fd66dbd4SStefan Hajnoczi         blkio_destroy(&s->blkio);
844fd66dbd4SStefan Hajnoczi         return ret;
845fd66dbd4SStefan Hajnoczi     }
846fd66dbd4SStefan Hajnoczi 
847c5640b3eSStefan Hajnoczi     ret = blkio_get_bool(s->blkio,
848c5640b3eSStefan Hajnoczi                          "needs-mem-region-fd",
849c5640b3eSStefan Hajnoczi                          &s->needs_mem_region_fd);
850c5640b3eSStefan Hajnoczi     if (ret < 0) {
851c5640b3eSStefan Hajnoczi         error_setg_errno(errp, -ret,
852c5640b3eSStefan Hajnoczi                          "failed to get needs-mem-region-fd: %s",
853c5640b3eSStefan Hajnoczi                          blkio_get_error_msg());
854c5640b3eSStefan Hajnoczi         blkio_destroy(&s->blkio);
855c5640b3eSStefan Hajnoczi         return ret;
856c5640b3eSStefan Hajnoczi     }
857c5640b3eSStefan Hajnoczi 
858fd66dbd4SStefan Hajnoczi     ret = blkio_get_uint64(s->blkio,
859fd66dbd4SStefan Hajnoczi                            "mem-region-alignment",
860fd66dbd4SStefan Hajnoczi                            &s->mem_region_alignment);
861fd66dbd4SStefan Hajnoczi     if (ret < 0) {
862fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret,
863fd66dbd4SStefan Hajnoczi                          "failed to get mem-region-alignment: %s",
864fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
865fd66dbd4SStefan Hajnoczi         blkio_destroy(&s->blkio);
866fd66dbd4SStefan Hajnoczi         return ret;
867fd66dbd4SStefan Hajnoczi     }
868fd66dbd4SStefan Hajnoczi 
869c5640b3eSStefan Hajnoczi     ret = blkio_get_bool(s->blkio,
870c5640b3eSStefan Hajnoczi                          "may-pin-mem-regions",
871c5640b3eSStefan Hajnoczi                          &s->may_pin_mem_regions);
872c5640b3eSStefan Hajnoczi     if (ret < 0) {
873c5640b3eSStefan Hajnoczi         /* Be conservative (assume pinning) if the property is not supported */
874c5640b3eSStefan Hajnoczi         s->may_pin_mem_regions = s->needs_mem_regions;
875c5640b3eSStefan Hajnoczi     }
876c5640b3eSStefan Hajnoczi 
877c5640b3eSStefan Hajnoczi     /*
878c5640b3eSStefan Hajnoczi      * Notify if libblkio drivers pin memory and prevent features like
879c5640b3eSStefan Hajnoczi      * virtio-mem from working.
880c5640b3eSStefan Hajnoczi      */
881c5640b3eSStefan Hajnoczi     if (s->may_pin_mem_regions) {
882c5640b3eSStefan Hajnoczi         ret = ram_block_discard_disable(true);
883c5640b3eSStefan Hajnoczi         if (ret < 0) {
884c5640b3eSStefan Hajnoczi             error_setg_errno(errp, -ret, "ram_block_discard_disable() failed");
885c5640b3eSStefan Hajnoczi             blkio_destroy(&s->blkio);
886c5640b3eSStefan Hajnoczi             return ret;
887c5640b3eSStefan Hajnoczi         }
888c5640b3eSStefan Hajnoczi     }
889c5640b3eSStefan Hajnoczi 
890fd66dbd4SStefan Hajnoczi     ret = blkio_start(s->blkio);
891fd66dbd4SStefan Hajnoczi     if (ret < 0) {
892fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret, "blkio_start failed: %s",
893fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
894fd66dbd4SStefan Hajnoczi         blkio_destroy(&s->blkio);
895c5640b3eSStefan Hajnoczi         if (s->may_pin_mem_regions) {
896c5640b3eSStefan Hajnoczi             ram_block_discard_disable(false);
897c5640b3eSStefan Hajnoczi         }
898fd66dbd4SStefan Hajnoczi         return ret;
899fd66dbd4SStefan Hajnoczi     }
900fd66dbd4SStefan Hajnoczi 
901c5640b3eSStefan Hajnoczi     bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF;
902fd66dbd4SStefan Hajnoczi     bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP |
903fd66dbd4SStefan Hajnoczi                                BDRV_REQ_NO_FALLBACK;
904fd66dbd4SStefan Hajnoczi 
905fd66dbd4SStefan Hajnoczi     qemu_mutex_init(&s->blkio_lock);
906fd66dbd4SStefan Hajnoczi     qemu_co_mutex_init(&s->bounce_lock);
907fd66dbd4SStefan Hajnoczi     qemu_co_queue_init(&s->bounce_available);
908fd66dbd4SStefan Hajnoczi     QLIST_INIT(&s->bounce_bufs);
909fd66dbd4SStefan Hajnoczi     s->blkioq = blkio_get_queue(s->blkio, 0);
910fd66dbd4SStefan Hajnoczi     s->completion_fd = blkioq_get_completion_fd(s->blkioq);
9119359c459SStefano Garzarella     blkioq_set_completion_fd_enabled(s->blkioq, true);
912fd66dbd4SStefan Hajnoczi 
913fd66dbd4SStefan Hajnoczi     blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
914fd66dbd4SStefan Hajnoczi     return 0;
915fd66dbd4SStefan Hajnoczi }
916fd66dbd4SStefan Hajnoczi 
blkio_close(BlockDriverState * bs)917fd66dbd4SStefan Hajnoczi static void blkio_close(BlockDriverState *bs)
918fd66dbd4SStefan Hajnoczi {
919fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
920fd66dbd4SStefan Hajnoczi 
921fd66dbd4SStefan Hajnoczi     /* There is no destroy() API for s->bounce_lock */
922fd66dbd4SStefan Hajnoczi 
923fd66dbd4SStefan Hajnoczi     qemu_mutex_destroy(&s->blkio_lock);
924fd66dbd4SStefan Hajnoczi     blkio_detach_aio_context(bs);
925fd66dbd4SStefan Hajnoczi     blkio_destroy(&s->blkio);
926c5640b3eSStefan Hajnoczi 
927c5640b3eSStefan Hajnoczi     if (s->may_pin_mem_regions) {
928c5640b3eSStefan Hajnoczi         ram_block_discard_disable(false);
929c5640b3eSStefan Hajnoczi     }
930fd66dbd4SStefan Hajnoczi }
931fd66dbd4SStefan Hajnoczi 
blkio_co_getlength(BlockDriverState * bs)932c86422c5SEmanuele Giuseppe Esposito static int64_t coroutine_fn blkio_co_getlength(BlockDriverState *bs)
933fd66dbd4SStefan Hajnoczi {
934fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
935fd66dbd4SStefan Hajnoczi     uint64_t capacity;
936fd66dbd4SStefan Hajnoczi     int ret;
937fd66dbd4SStefan Hajnoczi 
938fd66dbd4SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
939fd66dbd4SStefan Hajnoczi         ret = blkio_get_uint64(s->blkio, "capacity", &capacity);
940fd66dbd4SStefan Hajnoczi     }
941fd66dbd4SStefan Hajnoczi     if (ret < 0) {
942fd66dbd4SStefan Hajnoczi         return -ret;
943fd66dbd4SStefan Hajnoczi     }
944fd66dbd4SStefan Hajnoczi 
945fd66dbd4SStefan Hajnoczi     return capacity;
946fd66dbd4SStefan Hajnoczi }
947fd66dbd4SStefan Hajnoczi 
blkio_truncate(BlockDriverState * bs,int64_t offset,bool exact,PreallocMode prealloc,BdrvRequestFlags flags,Error ** errp)9484c8f4fdaSAlberto Faria static int coroutine_fn blkio_truncate(BlockDriverState *bs, int64_t offset,
9494c8f4fdaSAlberto Faria                                        bool exact, PreallocMode prealloc,
9504c8f4fdaSAlberto Faria                                        BdrvRequestFlags flags, Error **errp)
9514c8f4fdaSAlberto Faria {
9524c8f4fdaSAlberto Faria     int64_t current_length;
9534c8f4fdaSAlberto Faria 
9544c8f4fdaSAlberto Faria     if (prealloc != PREALLOC_MODE_OFF) {
9554c8f4fdaSAlberto Faria         error_setg(errp, "Unsupported preallocation mode '%s'",
9564c8f4fdaSAlberto Faria                    PreallocMode_str(prealloc));
9574c8f4fdaSAlberto Faria         return -ENOTSUP;
9584c8f4fdaSAlberto Faria     }
9594c8f4fdaSAlberto Faria 
960c86422c5SEmanuele Giuseppe Esposito     current_length = blkio_co_getlength(bs);
9614c8f4fdaSAlberto Faria 
9624c8f4fdaSAlberto Faria     if (offset > current_length) {
9634c8f4fdaSAlberto Faria         error_setg(errp, "Cannot grow device");
9644c8f4fdaSAlberto Faria         return -EINVAL;
9654c8f4fdaSAlberto Faria     } else if (exact && offset != current_length) {
9664c8f4fdaSAlberto Faria         error_setg(errp, "Cannot resize device");
9674c8f4fdaSAlberto Faria         return -ENOTSUP;
9684c8f4fdaSAlberto Faria     }
9694c8f4fdaSAlberto Faria 
9704c8f4fdaSAlberto Faria     return 0;
9714c8f4fdaSAlberto Faria }
9724c8f4fdaSAlberto Faria 
9733d47eb0aSEmanuele Giuseppe Esposito static int coroutine_fn
blkio_co_get_info(BlockDriverState * bs,BlockDriverInfo * bdi)9743d47eb0aSEmanuele Giuseppe Esposito blkio_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
975fd66dbd4SStefan Hajnoczi {
976fd66dbd4SStefan Hajnoczi     return 0;
977fd66dbd4SStefan Hajnoczi }
978fd66dbd4SStefan Hajnoczi 
blkio_refresh_limits(BlockDriverState * bs,Error ** errp)979fd66dbd4SStefan Hajnoczi static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
980fd66dbd4SStefan Hajnoczi {
981fd66dbd4SStefan Hajnoczi     BDRVBlkioState *s = bs->opaque;
982fd66dbd4SStefan Hajnoczi     QEMU_LOCK_GUARD(&s->blkio_lock);
983fd66dbd4SStefan Hajnoczi     int value;
984fd66dbd4SStefan Hajnoczi     int ret;
985fd66dbd4SStefan Hajnoczi 
986fd66dbd4SStefan Hajnoczi     ret = blkio_get_int(s->blkio, "request-alignment", &value);
987fd66dbd4SStefan Hajnoczi     if (ret < 0) {
988fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret, "failed to get \"request-alignment\": %s",
989fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
990fd66dbd4SStefan Hajnoczi         return;
991fd66dbd4SStefan Hajnoczi     }
992fd66dbd4SStefan Hajnoczi     bs->bl.request_alignment = value;
993fd66dbd4SStefan Hajnoczi     if (bs->bl.request_alignment < 1 ||
994fd66dbd4SStefan Hajnoczi         bs->bl.request_alignment >= INT_MAX ||
995fd66dbd4SStefan Hajnoczi         !is_power_of_2(bs->bl.request_alignment)) {
996fd66dbd4SStefan Hajnoczi         error_setg(errp, "invalid \"request-alignment\" value %" PRIu32 ", "
997fd66dbd4SStefan Hajnoczi                    "must be a power of 2 less than INT_MAX",
998fd66dbd4SStefan Hajnoczi                    bs->bl.request_alignment);
999fd66dbd4SStefan Hajnoczi         return;
1000fd66dbd4SStefan Hajnoczi     }
1001fd66dbd4SStefan Hajnoczi 
1002fd66dbd4SStefan Hajnoczi     ret = blkio_get_int(s->blkio, "optimal-io-size", &value);
1003fd66dbd4SStefan Hajnoczi     if (ret < 0) {
1004fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret, "failed to get \"optimal-io-size\": %s",
1005fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
1006fd66dbd4SStefan Hajnoczi         return;
1007fd66dbd4SStefan Hajnoczi     }
1008fd66dbd4SStefan Hajnoczi     bs->bl.opt_transfer = value;
1009fd66dbd4SStefan Hajnoczi     if (bs->bl.opt_transfer > INT_MAX ||
1010fd66dbd4SStefan Hajnoczi         (bs->bl.opt_transfer % bs->bl.request_alignment)) {
1011fd66dbd4SStefan Hajnoczi         error_setg(errp, "invalid \"optimal-io-size\" value %" PRIu32 ", must "
1012fd66dbd4SStefan Hajnoczi                    "be a multiple of %" PRIu32, bs->bl.opt_transfer,
1013fd66dbd4SStefan Hajnoczi                    bs->bl.request_alignment);
1014fd66dbd4SStefan Hajnoczi         return;
1015fd66dbd4SStefan Hajnoczi     }
1016fd66dbd4SStefan Hajnoczi 
1017fd66dbd4SStefan Hajnoczi     ret = blkio_get_int(s->blkio, "max-transfer", &value);
1018fd66dbd4SStefan Hajnoczi     if (ret < 0) {
1019fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret, "failed to get \"max-transfer\": %s",
1020fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
1021fd66dbd4SStefan Hajnoczi         return;
1022fd66dbd4SStefan Hajnoczi     }
1023fd66dbd4SStefan Hajnoczi     bs->bl.max_transfer = value;
1024fd66dbd4SStefan Hajnoczi     if ((bs->bl.max_transfer % bs->bl.request_alignment) ||
1025fd66dbd4SStefan Hajnoczi         (bs->bl.opt_transfer && (bs->bl.max_transfer % bs->bl.opt_transfer))) {
1026fd66dbd4SStefan Hajnoczi         error_setg(errp, "invalid \"max-transfer\" value %" PRIu32 ", must be "
1027fd66dbd4SStefan Hajnoczi                    "a multiple of %" PRIu32 " and %" PRIu32 " (if non-zero)",
1028fd66dbd4SStefan Hajnoczi                    bs->bl.max_transfer, bs->bl.request_alignment,
1029fd66dbd4SStefan Hajnoczi                    bs->bl.opt_transfer);
1030fd66dbd4SStefan Hajnoczi         return;
1031fd66dbd4SStefan Hajnoczi     }
1032fd66dbd4SStefan Hajnoczi 
1033fd66dbd4SStefan Hajnoczi     ret = blkio_get_int(s->blkio, "buf-alignment", &value);
1034fd66dbd4SStefan Hajnoczi     if (ret < 0) {
1035fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret, "failed to get \"buf-alignment\": %s",
1036fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
1037fd66dbd4SStefan Hajnoczi         return;
1038fd66dbd4SStefan Hajnoczi     }
1039fd66dbd4SStefan Hajnoczi     if (value < 1) {
1040fd66dbd4SStefan Hajnoczi         error_setg(errp, "invalid \"buf-alignment\" value %d, must be "
1041fd66dbd4SStefan Hajnoczi                    "positive", value);
1042fd66dbd4SStefan Hajnoczi         return;
1043fd66dbd4SStefan Hajnoczi     }
1044fd66dbd4SStefan Hajnoczi     bs->bl.min_mem_alignment = value;
1045fd66dbd4SStefan Hajnoczi 
1046fd66dbd4SStefan Hajnoczi     ret = blkio_get_int(s->blkio, "optimal-buf-alignment", &value);
1047fd66dbd4SStefan Hajnoczi     if (ret < 0) {
1048fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret,
1049fd66dbd4SStefan Hajnoczi                          "failed to get \"optimal-buf-alignment\": %s",
1050fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
1051fd66dbd4SStefan Hajnoczi         return;
1052fd66dbd4SStefan Hajnoczi     }
1053fd66dbd4SStefan Hajnoczi     if (value < 1) {
1054fd66dbd4SStefan Hajnoczi         error_setg(errp, "invalid \"optimal-buf-alignment\" value %d, "
1055fd66dbd4SStefan Hajnoczi                    "must be positive", value);
1056fd66dbd4SStefan Hajnoczi         return;
1057fd66dbd4SStefan Hajnoczi     }
1058fd66dbd4SStefan Hajnoczi     bs->bl.opt_mem_alignment = value;
1059fd66dbd4SStefan Hajnoczi 
1060fd66dbd4SStefan Hajnoczi     ret = blkio_get_int(s->blkio, "max-segments", &value);
1061fd66dbd4SStefan Hajnoczi     if (ret < 0) {
1062fd66dbd4SStefan Hajnoczi         error_setg_errno(errp, -ret, "failed to get \"max-segments\": %s",
1063fd66dbd4SStefan Hajnoczi                          blkio_get_error_msg());
1064fd66dbd4SStefan Hajnoczi         return;
1065fd66dbd4SStefan Hajnoczi     }
1066fd66dbd4SStefan Hajnoczi     if (value < 1) {
1067fd66dbd4SStefan Hajnoczi         error_setg(errp, "invalid \"max-segments\" value %d, must be positive",
1068fd66dbd4SStefan Hajnoczi                    value);
1069fd66dbd4SStefan Hajnoczi         return;
1070fd66dbd4SStefan Hajnoczi     }
1071fd66dbd4SStefan Hajnoczi     bs->bl.max_iov = value;
1072fd66dbd4SStefan Hajnoczi }
1073fd66dbd4SStefan Hajnoczi 
1074fd66dbd4SStefan Hajnoczi /*
1075fd66dbd4SStefan Hajnoczi  * TODO
1076fd66dbd4SStefan Hajnoczi  * Missing libblkio APIs:
1077fd66dbd4SStefan Hajnoczi  * - block_status
1078fd66dbd4SStefan Hajnoczi  * - co_invalidate_cache
1079fd66dbd4SStefan Hajnoczi  *
1080fd66dbd4SStefan Hajnoczi  * Out of scope?
1081fd66dbd4SStefan Hajnoczi  * - create
1082fd66dbd4SStefan Hajnoczi  * - truncate
1083fd66dbd4SStefan Hajnoczi  */
1084fd66dbd4SStefan Hajnoczi 
1085c21eae1cSStefan Hajnoczi /*
1086c21eae1cSStefan Hajnoczi  * Do not include .format_name and .protocol_name because module_block.py
1087c21eae1cSStefan Hajnoczi  * does not parse macros in the source code.
1088c21eae1cSStefan Hajnoczi  */
1089c21eae1cSStefan Hajnoczi #define BLKIO_DRIVER_COMMON \
1090fd66dbd4SStefan Hajnoczi     .instance_size           = sizeof(BDRVBlkioState), \
1091fd66dbd4SStefan Hajnoczi     .bdrv_file_open          = blkio_file_open, \
1092fd66dbd4SStefan Hajnoczi     .bdrv_close              = blkio_close, \
1093c86422c5SEmanuele Giuseppe Esposito     .bdrv_co_getlength       = blkio_co_getlength, \
10944c8f4fdaSAlberto Faria     .bdrv_co_truncate        = blkio_truncate, \
10953d47eb0aSEmanuele Giuseppe Esposito     .bdrv_co_get_info        = blkio_co_get_info, \
1096fd66dbd4SStefan Hajnoczi     .bdrv_attach_aio_context = blkio_attach_aio_context, \
1097fd66dbd4SStefan Hajnoczi     .bdrv_detach_aio_context = blkio_detach_aio_context, \
1098fd66dbd4SStefan Hajnoczi     .bdrv_co_pdiscard        = blkio_co_pdiscard, \
1099fd66dbd4SStefan Hajnoczi     .bdrv_co_preadv          = blkio_co_preadv, \
1100fd66dbd4SStefan Hajnoczi     .bdrv_co_pwritev         = blkio_co_pwritev, \
1101fd66dbd4SStefan Hajnoczi     .bdrv_co_flush_to_disk   = blkio_co_flush, \
1102fd66dbd4SStefan Hajnoczi     .bdrv_co_pwrite_zeroes   = blkio_co_pwrite_zeroes, \
1103fd66dbd4SStefan Hajnoczi     .bdrv_refresh_limits     = blkio_refresh_limits, \
1104c5640b3eSStefan Hajnoczi     .bdrv_register_buf       = blkio_register_buf, \
1105c21eae1cSStefan Hajnoczi     .bdrv_unregister_buf     = blkio_unregister_buf,
1106fd66dbd4SStefan Hajnoczi 
1107c21eae1cSStefan Hajnoczi /*
1108c21eae1cSStefan Hajnoczi  * Use the same .format_name and .protocol_name as the libblkio driver name for
1109c21eae1cSStefan Hajnoczi  * consistency.
1110c21eae1cSStefan Hajnoczi  */
1111c21eae1cSStefan Hajnoczi 
1112c21eae1cSStefan Hajnoczi static BlockDriver bdrv_io_uring = {
1113c21eae1cSStefan Hajnoczi     .format_name         = "io_uring",
1114c21eae1cSStefan Hajnoczi     .protocol_name       = "io_uring",
1115fd66dbd4SStefan Hajnoczi     .bdrv_needs_filename = true,
1116c21eae1cSStefan Hajnoczi     BLKIO_DRIVER_COMMON
1117c21eae1cSStefan Hajnoczi };
1118fd66dbd4SStefan Hajnoczi 
1119c21eae1cSStefan Hajnoczi static BlockDriver bdrv_nvme_io_uring = {
1120c21eae1cSStefan Hajnoczi     .format_name         = "nvme-io_uring",
1121c21eae1cSStefan Hajnoczi     .protocol_name       = "nvme-io_uring",
1122c21eae1cSStefan Hajnoczi     BLKIO_DRIVER_COMMON
1123c21eae1cSStefan Hajnoczi };
1124fd66dbd4SStefan Hajnoczi 
1125c21eae1cSStefan Hajnoczi static BlockDriver bdrv_virtio_blk_vfio_pci = {
1126c21eae1cSStefan Hajnoczi     .format_name         = "virtio-blk-vfio-pci",
1127c21eae1cSStefan Hajnoczi     .protocol_name       = "virtio-blk-vfio-pci",
1128c21eae1cSStefan Hajnoczi     BLKIO_DRIVER_COMMON
1129c21eae1cSStefan Hajnoczi };
113003d9e4c0SAlberto Faria 
1131c21eae1cSStefan Hajnoczi static BlockDriver bdrv_virtio_blk_vhost_user = {
1132c21eae1cSStefan Hajnoczi     .format_name         = "virtio-blk-vhost-user",
1133c21eae1cSStefan Hajnoczi     .protocol_name       = "virtio-blk-vhost-user",
1134c21eae1cSStefan Hajnoczi     BLKIO_DRIVER_COMMON
1135c21eae1cSStefan Hajnoczi };
1136fd66dbd4SStefan Hajnoczi 
1137c21eae1cSStefan Hajnoczi static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
1138c21eae1cSStefan Hajnoczi     .format_name         = "virtio-blk-vhost-vdpa",
1139c21eae1cSStefan Hajnoczi     .protocol_name       = "virtio-blk-vhost-vdpa",
1140c21eae1cSStefan Hajnoczi     BLKIO_DRIVER_COMMON
1141c21eae1cSStefan Hajnoczi };
1142fd66dbd4SStefan Hajnoczi 
bdrv_blkio_init(void)1143fd66dbd4SStefan Hajnoczi static void bdrv_blkio_init(void)
1144fd66dbd4SStefan Hajnoczi {
1145fd66dbd4SStefan Hajnoczi     bdrv_register(&bdrv_io_uring);
1146fd66dbd4SStefan Hajnoczi     bdrv_register(&bdrv_nvme_io_uring);
114703d9e4c0SAlberto Faria     bdrv_register(&bdrv_virtio_blk_vfio_pci);
1148fd66dbd4SStefan Hajnoczi     bdrv_register(&bdrv_virtio_blk_vhost_user);
1149fd66dbd4SStefan Hajnoczi     bdrv_register(&bdrv_virtio_blk_vhost_vdpa);
1150fd66dbd4SStefan Hajnoczi }
1151fd66dbd4SStefan Hajnoczi 
1152fd66dbd4SStefan Hajnoczi block_init(bdrv_blkio_init);
1153