xref: /qemu/block/graph-lock.c (revision 3202d8e4)
1aead9dc9SPaolo Bonzini /*
2aead9dc9SPaolo Bonzini  * Graph lock: rwlock to protect block layer graph manipulations (add/remove
3aead9dc9SPaolo Bonzini  * edges and nodes)
4aead9dc9SPaolo Bonzini  *
5aead9dc9SPaolo Bonzini  *  Copyright (c) 2022 Red Hat
6aead9dc9SPaolo Bonzini  *
7aead9dc9SPaolo Bonzini  * This library is free software; you can redistribute it and/or
8aead9dc9SPaolo Bonzini  * modify it under the terms of the GNU Lesser General Public
9aead9dc9SPaolo Bonzini  * License as published by the Free Software Foundation; either
10aead9dc9SPaolo Bonzini  * version 2.1 of the License, or (at your option) any later version.
11aead9dc9SPaolo Bonzini  *
12aead9dc9SPaolo Bonzini  * This library is distributed in the hope that it will be useful,
13aead9dc9SPaolo Bonzini  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14aead9dc9SPaolo Bonzini  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15aead9dc9SPaolo Bonzini  * Lesser General Public License for more details.
16aead9dc9SPaolo Bonzini  *
17aead9dc9SPaolo Bonzini  * You should have received a copy of the GNU Lesser General Public
18aead9dc9SPaolo Bonzini  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19aead9dc9SPaolo Bonzini  */
20aead9dc9SPaolo Bonzini 
21aead9dc9SPaolo Bonzini #include "qemu/osdep.h"
22aead9dc9SPaolo Bonzini #include "qemu/main-loop.h"
23aead9dc9SPaolo Bonzini #include "block/graph-lock.h"
24aead9dc9SPaolo Bonzini #include "block/block.h"
25aead9dc9SPaolo Bonzini #include "block/block_int.h"
26aead9dc9SPaolo Bonzini 
274002ffdcSKevin Wolf /* Dummy lock object to use for Thread Safety Analysis (TSA) */
284002ffdcSKevin Wolf BdrvGraphLock graph_lock;
294002ffdcSKevin Wolf 
30aead9dc9SPaolo Bonzini /* Protects the list of aiocontext and orphaned_reader_count */
31aead9dc9SPaolo Bonzini static QemuMutex aio_context_list_lock;
32aead9dc9SPaolo Bonzini 
33aead9dc9SPaolo Bonzini /* Written and read with atomic operations. */
34aead9dc9SPaolo Bonzini static int has_writer;
35aead9dc9SPaolo Bonzini 
36aead9dc9SPaolo Bonzini /*
37aead9dc9SPaolo Bonzini  * A reader coroutine could move from an AioContext to another.
38aead9dc9SPaolo Bonzini  * If this happens, there is no problem from the point of view of
39aead9dc9SPaolo Bonzini  * counters. The problem is that the total count becomes
40aead9dc9SPaolo Bonzini  * unbalanced if one of the two AioContexts gets deleted.
41aead9dc9SPaolo Bonzini  * The count of readers must remain correct, so the AioContext's
42aead9dc9SPaolo Bonzini  * balance is transferred to this glboal variable.
43aead9dc9SPaolo Bonzini  * Protected by aio_context_list_lock.
44aead9dc9SPaolo Bonzini  */
45aead9dc9SPaolo Bonzini static uint32_t orphaned_reader_count;
46aead9dc9SPaolo Bonzini 
47aead9dc9SPaolo Bonzini /* Queue of readers waiting for the writer to finish */
48aead9dc9SPaolo Bonzini static CoQueue reader_queue;
49aead9dc9SPaolo Bonzini 
50aead9dc9SPaolo Bonzini struct BdrvGraphRWlock {
51aead9dc9SPaolo Bonzini     /* How many readers are currently reading the graph. */
52aead9dc9SPaolo Bonzini     uint32_t reader_count;
53aead9dc9SPaolo Bonzini 
54aead9dc9SPaolo Bonzini     /*
55aead9dc9SPaolo Bonzini      * List of BdrvGraphRWlock kept in graph-lock.c
56aead9dc9SPaolo Bonzini      * Protected by aio_context_list_lock
57aead9dc9SPaolo Bonzini      */
58aead9dc9SPaolo Bonzini     QTAILQ_ENTRY(BdrvGraphRWlock) next_aio;
59aead9dc9SPaolo Bonzini };
60aead9dc9SPaolo Bonzini 
61aead9dc9SPaolo Bonzini /*
62aead9dc9SPaolo Bonzini  * List of BdrvGraphRWlock. This list ensures that each BdrvGraphRWlock
63aead9dc9SPaolo Bonzini  * can safely modify only its own counter, avoid reading/writing
64aead9dc9SPaolo Bonzini  * others and thus improving performances by avoiding cacheline bounces.
65aead9dc9SPaolo Bonzini  */
66aead9dc9SPaolo Bonzini static QTAILQ_HEAD(, BdrvGraphRWlock) aio_context_list =
67aead9dc9SPaolo Bonzini     QTAILQ_HEAD_INITIALIZER(aio_context_list);
68aead9dc9SPaolo Bonzini 
69aead9dc9SPaolo Bonzini static void __attribute__((__constructor__)) bdrv_init_graph_lock(void)
70aead9dc9SPaolo Bonzini {
71aead9dc9SPaolo Bonzini     qemu_mutex_init(&aio_context_list_lock);
72aead9dc9SPaolo Bonzini     qemu_co_queue_init(&reader_queue);
73aead9dc9SPaolo Bonzini }
74aead9dc9SPaolo Bonzini 
75aead9dc9SPaolo Bonzini void register_aiocontext(AioContext *ctx)
76aead9dc9SPaolo Bonzini {
77aead9dc9SPaolo Bonzini     ctx->bdrv_graph = g_new0(BdrvGraphRWlock, 1);
78aead9dc9SPaolo Bonzini     QEMU_LOCK_GUARD(&aio_context_list_lock);
79aead9dc9SPaolo Bonzini     assert(ctx->bdrv_graph->reader_count == 0);
80aead9dc9SPaolo Bonzini     QTAILQ_INSERT_TAIL(&aio_context_list, ctx->bdrv_graph, next_aio);
81aead9dc9SPaolo Bonzini }
82aead9dc9SPaolo Bonzini 
83aead9dc9SPaolo Bonzini void unregister_aiocontext(AioContext *ctx)
84aead9dc9SPaolo Bonzini {
85aead9dc9SPaolo Bonzini     QEMU_LOCK_GUARD(&aio_context_list_lock);
86aead9dc9SPaolo Bonzini     orphaned_reader_count += ctx->bdrv_graph->reader_count;
87aead9dc9SPaolo Bonzini     QTAILQ_REMOVE(&aio_context_list, ctx->bdrv_graph, next_aio);
88aead9dc9SPaolo Bonzini     g_free(ctx->bdrv_graph);
89aead9dc9SPaolo Bonzini }
90aead9dc9SPaolo Bonzini 
91aead9dc9SPaolo Bonzini static uint32_t reader_count(void)
92aead9dc9SPaolo Bonzini {
93aead9dc9SPaolo Bonzini     BdrvGraphRWlock *brdv_graph;
94aead9dc9SPaolo Bonzini     uint32_t rd;
95aead9dc9SPaolo Bonzini 
96aead9dc9SPaolo Bonzini     QEMU_LOCK_GUARD(&aio_context_list_lock);
97aead9dc9SPaolo Bonzini 
983202d8e4SMichael Tokarev     /* rd can temporarily be negative, but the total will *always* be >= 0 */
99aead9dc9SPaolo Bonzini     rd = orphaned_reader_count;
100aead9dc9SPaolo Bonzini     QTAILQ_FOREACH(brdv_graph, &aio_context_list, next_aio) {
101aead9dc9SPaolo Bonzini         rd += qatomic_read(&brdv_graph->reader_count);
102aead9dc9SPaolo Bonzini     }
103aead9dc9SPaolo Bonzini 
104aead9dc9SPaolo Bonzini     /* shouldn't overflow unless there are 2^31 readers */
105aead9dc9SPaolo Bonzini     assert((int32_t)rd >= 0);
106aead9dc9SPaolo Bonzini     return rd;
107aead9dc9SPaolo Bonzini }
108aead9dc9SPaolo Bonzini 
10931b2ddfeSKevin Wolf void bdrv_graph_wrlock(BlockDriverState *bs)
110aead9dc9SPaolo Bonzini {
11131b2ddfeSKevin Wolf     AioContext *ctx = NULL;
11231b2ddfeSKevin Wolf 
113aead9dc9SPaolo Bonzini     GLOBAL_STATE_CODE();
114aead9dc9SPaolo Bonzini     assert(!qatomic_read(&has_writer));
115aead9dc9SPaolo Bonzini 
11631b2ddfeSKevin Wolf     /*
11731b2ddfeSKevin Wolf      * Release only non-mainloop AioContext. The mainloop often relies on the
11831b2ddfeSKevin Wolf      * BQL and doesn't lock the main AioContext before doing things.
11931b2ddfeSKevin Wolf      */
12031b2ddfeSKevin Wolf     if (bs) {
12131b2ddfeSKevin Wolf         ctx = bdrv_get_aio_context(bs);
12231b2ddfeSKevin Wolf         if (ctx != qemu_get_aio_context()) {
12331b2ddfeSKevin Wolf             aio_context_release(ctx);
12431b2ddfeSKevin Wolf         } else {
12531b2ddfeSKevin Wolf             ctx = NULL;
12631b2ddfeSKevin Wolf         }
12731b2ddfeSKevin Wolf     }
12831b2ddfeSKevin Wolf 
129aead9dc9SPaolo Bonzini     /* Make sure that constantly arriving new I/O doesn't cause starvation */
130aead9dc9SPaolo Bonzini     bdrv_drain_all_begin_nopoll();
131aead9dc9SPaolo Bonzini 
132aead9dc9SPaolo Bonzini     /*
133aead9dc9SPaolo Bonzini      * reader_count == 0: this means writer will read has_reader as 1
134aead9dc9SPaolo Bonzini      * reader_count >= 1: we don't know if writer read has_writer == 0 or 1,
135aead9dc9SPaolo Bonzini      *                    but we need to wait.
136aead9dc9SPaolo Bonzini      * Wait by allowing other coroutine (and possible readers) to continue.
137aead9dc9SPaolo Bonzini      */
138aead9dc9SPaolo Bonzini     do {
139aead9dc9SPaolo Bonzini         /*
140aead9dc9SPaolo Bonzini          * has_writer must be 0 while polling, otherwise we get a deadlock if
141aead9dc9SPaolo Bonzini          * any callback involved during AIO_WAIT_WHILE() tries to acquire the
142aead9dc9SPaolo Bonzini          * reader lock.
143aead9dc9SPaolo Bonzini          */
144aead9dc9SPaolo Bonzini         qatomic_set(&has_writer, 0);
145d805d8a2SStefan Hajnoczi         AIO_WAIT_WHILE_UNLOCKED(NULL, reader_count() >= 1);
146aead9dc9SPaolo Bonzini         qatomic_set(&has_writer, 1);
147aead9dc9SPaolo Bonzini 
148aead9dc9SPaolo Bonzini         /*
149aead9dc9SPaolo Bonzini          * We want to only check reader_count() after has_writer = 1 is visible
150aead9dc9SPaolo Bonzini          * to other threads. That way no more readers can sneak in after we've
151aead9dc9SPaolo Bonzini          * determined reader_count() == 0.
152aead9dc9SPaolo Bonzini          */
153aead9dc9SPaolo Bonzini         smp_mb();
154aead9dc9SPaolo Bonzini     } while (reader_count() >= 1);
155aead9dc9SPaolo Bonzini 
156aead9dc9SPaolo Bonzini     bdrv_drain_all_end();
15731b2ddfeSKevin Wolf 
15831b2ddfeSKevin Wolf     if (ctx) {
15931b2ddfeSKevin Wolf         aio_context_acquire(bdrv_get_aio_context(bs));
16031b2ddfeSKevin Wolf     }
161aead9dc9SPaolo Bonzini }
162aead9dc9SPaolo Bonzini 
163aead9dc9SPaolo Bonzini void bdrv_graph_wrunlock(void)
164aead9dc9SPaolo Bonzini {
165aead9dc9SPaolo Bonzini     GLOBAL_STATE_CODE();
166aead9dc9SPaolo Bonzini     QEMU_LOCK_GUARD(&aio_context_list_lock);
167aead9dc9SPaolo Bonzini     assert(qatomic_read(&has_writer));
168aead9dc9SPaolo Bonzini 
169aead9dc9SPaolo Bonzini     /*
170aead9dc9SPaolo Bonzini      * No need for memory barriers, this works in pair with
171aead9dc9SPaolo Bonzini      * the slow path of rdlock() and both take the lock.
172aead9dc9SPaolo Bonzini      */
173aead9dc9SPaolo Bonzini     qatomic_store_release(&has_writer, 0);
174aead9dc9SPaolo Bonzini 
175aead9dc9SPaolo Bonzini     /* Wake up all coroutine that are waiting to read the graph */
176aead9dc9SPaolo Bonzini     qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
177aead9dc9SPaolo Bonzini }
178aead9dc9SPaolo Bonzini 
179aead9dc9SPaolo Bonzini void coroutine_fn bdrv_graph_co_rdlock(void)
180aead9dc9SPaolo Bonzini {
181aead9dc9SPaolo Bonzini     BdrvGraphRWlock *bdrv_graph;
182aead9dc9SPaolo Bonzini     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
183aead9dc9SPaolo Bonzini 
184aead9dc9SPaolo Bonzini     for (;;) {
185aead9dc9SPaolo Bonzini         qatomic_set(&bdrv_graph->reader_count,
186aead9dc9SPaolo Bonzini                     bdrv_graph->reader_count + 1);
187aead9dc9SPaolo Bonzini         /* make sure writer sees reader_count before we check has_writer */
188aead9dc9SPaolo Bonzini         smp_mb();
189aead9dc9SPaolo Bonzini 
190aead9dc9SPaolo Bonzini         /*
191aead9dc9SPaolo Bonzini          * has_writer == 0: this means writer will read reader_count as >= 1
192aead9dc9SPaolo Bonzini          * has_writer == 1: we don't know if writer read reader_count == 0
193aead9dc9SPaolo Bonzini          *                  or > 0, but we need to wait anyways because
194aead9dc9SPaolo Bonzini          *                  it will write.
195aead9dc9SPaolo Bonzini          */
196aead9dc9SPaolo Bonzini         if (!qatomic_read(&has_writer)) {
197aead9dc9SPaolo Bonzini             break;
198aead9dc9SPaolo Bonzini         }
199aead9dc9SPaolo Bonzini 
200aead9dc9SPaolo Bonzini         /*
201aead9dc9SPaolo Bonzini          * Synchronize access with reader_count() in bdrv_graph_wrlock().
202aead9dc9SPaolo Bonzini          * Case 1:
203aead9dc9SPaolo Bonzini          * If this critical section gets executed first, reader_count will
204aead9dc9SPaolo Bonzini          * decrease and the reader will go to sleep.
205aead9dc9SPaolo Bonzini          * Then the writer will read reader_count that does not take into
206aead9dc9SPaolo Bonzini          * account this reader, and if there's no other reader it will
207aead9dc9SPaolo Bonzini          * enter the write section.
208aead9dc9SPaolo Bonzini          * Case 2:
209aead9dc9SPaolo Bonzini          * If reader_count() critical section gets executed first,
210aead9dc9SPaolo Bonzini          * then writer will read reader_count >= 1.
211aead9dc9SPaolo Bonzini          * It will wait in AIO_WAIT_WHILE(), but once it releases the lock
212aead9dc9SPaolo Bonzini          * we will enter this critical section and call aio_wait_kick().
213aead9dc9SPaolo Bonzini          */
214aead9dc9SPaolo Bonzini         WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) {
215aead9dc9SPaolo Bonzini             /*
216aead9dc9SPaolo Bonzini              * Additional check when we use the above lock to synchronize
217aead9dc9SPaolo Bonzini              * with bdrv_graph_wrunlock().
218aead9dc9SPaolo Bonzini              * Case 1:
219aead9dc9SPaolo Bonzini              * If this gets executed first, has_writer is still 1, so we reduce
220aead9dc9SPaolo Bonzini              * reader_count and go to sleep.
221aead9dc9SPaolo Bonzini              * Then the writer will set has_writer to 0 and wake up all readers,
222aead9dc9SPaolo Bonzini              * us included.
223aead9dc9SPaolo Bonzini              * Case 2:
224aead9dc9SPaolo Bonzini              * If bdrv_graph_wrunlock() critical section gets executed first,
225aead9dc9SPaolo Bonzini              * then it will set has_writer to 0 and wake up all other readers.
226aead9dc9SPaolo Bonzini              * Then we execute this critical section, and therefore must check
227aead9dc9SPaolo Bonzini              * again for has_writer, otherwise we sleep without any writer
228aead9dc9SPaolo Bonzini              * actually running.
229aead9dc9SPaolo Bonzini              */
230aead9dc9SPaolo Bonzini             if (!qatomic_read(&has_writer)) {
231aead9dc9SPaolo Bonzini                 return;
232aead9dc9SPaolo Bonzini             }
233aead9dc9SPaolo Bonzini 
234aead9dc9SPaolo Bonzini             /* slow path where reader sleeps */
235aead9dc9SPaolo Bonzini             bdrv_graph->reader_count--;
236aead9dc9SPaolo Bonzini             aio_wait_kick();
237aead9dc9SPaolo Bonzini             qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
238aead9dc9SPaolo Bonzini         }
239aead9dc9SPaolo Bonzini     }
240aead9dc9SPaolo Bonzini }
241aead9dc9SPaolo Bonzini 
242aead9dc9SPaolo Bonzini void coroutine_fn bdrv_graph_co_rdunlock(void)
243aead9dc9SPaolo Bonzini {
244aead9dc9SPaolo Bonzini     BdrvGraphRWlock *bdrv_graph;
245aead9dc9SPaolo Bonzini     bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
246aead9dc9SPaolo Bonzini 
247aead9dc9SPaolo Bonzini     qatomic_store_release(&bdrv_graph->reader_count,
248aead9dc9SPaolo Bonzini                           bdrv_graph->reader_count - 1);
249aead9dc9SPaolo Bonzini     /* make sure writer sees reader_count before we check has_writer */
250aead9dc9SPaolo Bonzini     smp_mb();
251aead9dc9SPaolo Bonzini 
252aead9dc9SPaolo Bonzini     /*
253aead9dc9SPaolo Bonzini      * has_writer == 0: this means reader will read reader_count decreased
254aead9dc9SPaolo Bonzini      * has_writer == 1: we don't know if writer read reader_count old or
255aead9dc9SPaolo Bonzini      *                  new. Therefore, kick again so on next iteration
256aead9dc9SPaolo Bonzini      *                  writer will for sure read the updated value.
257aead9dc9SPaolo Bonzini      */
258aead9dc9SPaolo Bonzini     if (qatomic_read(&has_writer)) {
259aead9dc9SPaolo Bonzini         aio_wait_kick();
260aead9dc9SPaolo Bonzini     }
261aead9dc9SPaolo Bonzini }
262aead9dc9SPaolo Bonzini 
263aead9dc9SPaolo Bonzini void bdrv_graph_rdlock_main_loop(void)
264aead9dc9SPaolo Bonzini {
265aead9dc9SPaolo Bonzini     GLOBAL_STATE_CODE();
266aead9dc9SPaolo Bonzini     assert(!qemu_in_coroutine());
267aead9dc9SPaolo Bonzini }
268aead9dc9SPaolo Bonzini 
269aead9dc9SPaolo Bonzini void bdrv_graph_rdunlock_main_loop(void)
270aead9dc9SPaolo Bonzini {
271aead9dc9SPaolo Bonzini     GLOBAL_STATE_CODE();
272aead9dc9SPaolo Bonzini     assert(!qemu_in_coroutine());
273aead9dc9SPaolo Bonzini }
2743f35f82eSEmanuele Giuseppe Esposito 
2753f35f82eSEmanuele Giuseppe Esposito void assert_bdrv_graph_readable(void)
2763f35f82eSEmanuele Giuseppe Esposito {
27758a2e3f5SStefan Hajnoczi     /* reader_count() is slow due to aio_context_list_lock lock contention */
27858a2e3f5SStefan Hajnoczi #ifdef CONFIG_DEBUG_GRAPH_LOCK
2793f35f82eSEmanuele Giuseppe Esposito     assert(qemu_in_main_thread() || reader_count());
28058a2e3f5SStefan Hajnoczi #endif
2813f35f82eSEmanuele Giuseppe Esposito }
2823f35f82eSEmanuele Giuseppe Esposito 
2833f35f82eSEmanuele Giuseppe Esposito void assert_bdrv_graph_writable(void)
2843f35f82eSEmanuele Giuseppe Esposito {
2853f35f82eSEmanuele Giuseppe Esposito     assert(qemu_in_main_thread());
2863f35f82eSEmanuele Giuseppe Esposito     assert(qatomic_read(&has_writer));
2873f35f82eSEmanuele Giuseppe Esposito }
288