xref: /qemu/block/copy-on-read.c (revision c3033fd3)
1 /*
2  * Copy-on-read filter block driver
3  *
4  * Copyright (c) 2018 Red Hat, Inc.
5  *
6  * Author:
7  *   Max Reitz <mreitz@redhat.com>
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 or
12  * (at your option) version 3 of the License.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #include "qemu/osdep.h"
24 #include "block/block_int.h"
25 #include "qemu/module.h"
26 #include "qapi/error.h"
27 #include "qapi/qmp/qdict.h"
28 #include "block/copy-on-read.h"
29 
30 
31 typedef struct BDRVStateCOR {
32     bool active;
33     BlockDriverState *bottom_bs;
34     bool chain_frozen;
35 } BDRVStateCOR;
36 
37 
38 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
39                     Error **errp)
40 {
41     BlockDriverState *bottom_bs = NULL;
42     BDRVStateCOR *state = bs->opaque;
43     /* Find a bottom node name, if any */
44     const char *bottom_node = qdict_get_try_str(options, "bottom");
45 
46     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
47                                BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
48                                false, errp);
49     if (!bs->file) {
50         return -EINVAL;
51     }
52 
53     bs->supported_read_flags = BDRV_REQ_PREFETCH;
54 
55     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
56         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
57 
58     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
59         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
60             bs->file->bs->supported_zero_flags);
61 
62     if (bottom_node) {
63         bottom_bs = bdrv_find_node(bottom_node);
64         if (!bottom_bs) {
65             error_setg(errp, "Bottom node '%s' not found", bottom_node);
66             qdict_del(options, "bottom");
67             return -EINVAL;
68         }
69         qdict_del(options, "bottom");
70 
71         if (!bottom_bs->drv) {
72             error_setg(errp, "Bottom node '%s' not opened", bottom_node);
73             return -EINVAL;
74         }
75 
76         if (bottom_bs->drv->is_filter) {
77             error_setg(errp, "Bottom node '%s' is a filter", bottom_node);
78             return -EINVAL;
79         }
80 
81         if (bdrv_freeze_backing_chain(bs, bottom_bs, errp) < 0) {
82             return -EINVAL;
83         }
84         state->chain_frozen = true;
85 
86         /*
87          * We do freeze the chain, so it shouldn't be removed. Still, storing a
88          * pointer worth bdrv_ref().
89          */
90         bdrv_ref(bottom_bs);
91     }
92     state->active = true;
93     state->bottom_bs = bottom_bs;
94 
95     /*
96      * We don't need to call bdrv_child_refresh_perms() now as the permissions
97      * will be updated later when the filter node gets its parent.
98      */
99 
100     return 0;
101 }
102 
103 
104 #define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
105                           | BLK_PERM_WRITE \
106                           | BLK_PERM_RESIZE)
107 #define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH)
108 
109 static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
110                            BdrvChildRole role,
111                            BlockReopenQueue *reopen_queue,
112                            uint64_t perm, uint64_t shared,
113                            uint64_t *nperm, uint64_t *nshared)
114 {
115     BDRVStateCOR *s = bs->opaque;
116 
117     if (!s->active) {
118         /*
119          * While the filter is being removed
120          */
121         *nperm = 0;
122         *nshared = BLK_PERM_ALL;
123         return;
124     }
125 
126     *nperm = perm & PERM_PASSTHROUGH;
127     *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
128 
129     /* We must not request write permissions for an inactive node, the child
130      * cannot provide it. */
131     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
132         *nperm |= BLK_PERM_WRITE_UNCHANGED;
133     }
134 }
135 
136 
137 static int64_t cor_getlength(BlockDriverState *bs)
138 {
139     return bdrv_getlength(bs->file->bs);
140 }
141 
142 
143 static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
144                                            uint64_t offset, uint64_t bytes,
145                                            QEMUIOVector *qiov,
146                                            size_t qiov_offset,
147                                            int flags)
148 {
149     int64_t n;
150     int local_flags;
151     int ret;
152     BDRVStateCOR *state = bs->opaque;
153 
154     if (!state->bottom_bs) {
155         return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
156                                    flags | BDRV_REQ_COPY_ON_READ);
157     }
158 
159     while (bytes) {
160         local_flags = flags;
161 
162         /* In case of failure, try to copy-on-read anyway */
163         ret = bdrv_is_allocated(bs->file->bs, offset, bytes, &n);
164         if (ret <= 0) {
165             ret = bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
166                                           state->bottom_bs, true, offset,
167                                           n, &n);
168             if (ret > 0 || ret < 0) {
169                 local_flags |= BDRV_REQ_COPY_ON_READ;
170             }
171             /* Finish earlier if the end of a backing file has been reached */
172             if (n == 0) {
173                 break;
174             }
175         }
176 
177         /* Skip if neither read nor write are needed */
178         if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
179             BDRV_REQ_PREFETCH) {
180             ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
181                                       local_flags);
182             if (ret < 0) {
183                 return ret;
184             }
185         }
186 
187         offset += n;
188         qiov_offset += n;
189         bytes -= n;
190     }
191 
192     return 0;
193 }
194 
195 
196 static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
197                                             uint64_t offset,
198                                             uint64_t bytes,
199                                             QEMUIOVector *qiov,
200                                             size_t qiov_offset, int flags)
201 {
202     return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
203                                 flags);
204 }
205 
206 
207 static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
208                                              int64_t offset, int bytes,
209                                              BdrvRequestFlags flags)
210 {
211     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
212 }
213 
214 
215 static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
216                                         int64_t offset, int bytes)
217 {
218     return bdrv_co_pdiscard(bs->file, offset, bytes);
219 }
220 
221 
222 static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
223                                                   uint64_t offset,
224                                                   uint64_t bytes,
225                                                   QEMUIOVector *qiov)
226 {
227     return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
228                            BDRV_REQ_WRITE_COMPRESSED);
229 }
230 
231 
232 static void cor_eject(BlockDriverState *bs, bool eject_flag)
233 {
234     bdrv_eject(bs->file->bs, eject_flag);
235 }
236 
237 
238 static void cor_lock_medium(BlockDriverState *bs, bool locked)
239 {
240     bdrv_lock_medium(bs->file->bs, locked);
241 }
242 
243 
244 static void cor_close(BlockDriverState *bs)
245 {
246     BDRVStateCOR *s = bs->opaque;
247 
248     if (s->chain_frozen) {
249         s->chain_frozen = false;
250         bdrv_unfreeze_backing_chain(bs, s->bottom_bs);
251     }
252 
253     bdrv_unref(s->bottom_bs);
254 }
255 
256 
257 static BlockDriver bdrv_copy_on_read = {
258     .format_name                        = "copy-on-read",
259     .instance_size                      = sizeof(BDRVStateCOR),
260 
261     .bdrv_open                          = cor_open,
262     .bdrv_close                         = cor_close,
263     .bdrv_child_perm                    = cor_child_perm,
264 
265     .bdrv_getlength                     = cor_getlength,
266 
267     .bdrv_co_preadv_part                = cor_co_preadv_part,
268     .bdrv_co_pwritev_part               = cor_co_pwritev_part,
269     .bdrv_co_pwrite_zeroes              = cor_co_pwrite_zeroes,
270     .bdrv_co_pdiscard                   = cor_co_pdiscard,
271     .bdrv_co_pwritev_compressed         = cor_co_pwritev_compressed,
272 
273     .bdrv_eject                         = cor_eject,
274     .bdrv_lock_medium                   = cor_lock_medium,
275 
276     .has_variable_length                = true,
277     .is_filter                          = true,
278 };
279 
280 
281 void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
282 {
283     BdrvChild *child;
284     BlockDriverState *bs;
285     BDRVStateCOR *s = cor_filter_bs->opaque;
286 
287     child = bdrv_filter_child(cor_filter_bs);
288     if (!child) {
289         return;
290     }
291     bs = child->bs;
292 
293     /* Retain the BDS until we complete the graph change. */
294     bdrv_ref(bs);
295     /* Hold a guest back from writing while permissions are being reset. */
296     bdrv_drained_begin(bs);
297     /* Drop permissions before the graph change. */
298     s->active = false;
299     /* unfreeze, as otherwise bdrv_replace_node() will fail */
300     if (s->chain_frozen) {
301         s->chain_frozen = false;
302         bdrv_unfreeze_backing_chain(cor_filter_bs, s->bottom_bs);
303     }
304     bdrv_child_refresh_perms(cor_filter_bs, child, &error_abort);
305     bdrv_replace_node(cor_filter_bs, bs, &error_abort);
306 
307     bdrv_drained_end(bs);
308     bdrv_unref(bs);
309     bdrv_unref(cor_filter_bs);
310 }
311 
312 
313 static void bdrv_copy_on_read_init(void)
314 {
315     bdrv_register(&bdrv_copy_on_read);
316 }
317 
318 block_init(bdrv_copy_on_read_init);
319