xref: /qemu/block/qed-table.c (revision f6513529)
1 /*
2  * QEMU Enhanced Disk Format Table I/O
3  *
4  * Copyright IBM, Corp. 2010
5  *
6  * Authors:
7  *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
8  *  Anthony Liguori   <aliguori@us.ibm.com>
9  *
10  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
11  * See the COPYING.LIB file in the top-level directory.
12  *
13  */
14 
15 #include "qemu/osdep.h"
16 #include "trace.h"
17 #include "qemu/sockets.h" /* for EINPROGRESS on Windows */
18 #include "qed.h"
19 #include "qemu/bswap.h"
20 
21 static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
22 {
23     QEMUIOVector qiov;
24     int noffsets;
25     int i, ret;
26 
27     struct iovec iov = {
28         .iov_base = table->offsets,
29         .iov_len = s->header.cluster_size * s->header.table_size,
30     };
31     qemu_iovec_init_external(&qiov, &iov, 1);
32 
33     trace_qed_read_table(s, offset, table);
34 
35     ret = bdrv_preadv(s->bs->file, offset, &qiov);
36     if (ret < 0) {
37         goto out;
38     }
39 
40     /* Byteswap offsets */
41     qed_acquire(s);
42     noffsets = qiov.size / sizeof(uint64_t);
43     for (i = 0; i < noffsets; i++) {
44         table->offsets[i] = le64_to_cpu(table->offsets[i]);
45     }
46     qed_release(s);
47 
48     ret = 0;
49 out:
50     /* Completion */
51     trace_qed_read_table_cb(s, table, ret);
52     return ret;
53 }
54 
55 typedef struct {
56     GenericCB gencb;
57     BDRVQEDState *s;
58     QEDTable *orig_table;
59     QEDTable *table;
60     bool flush;             /* flush after write? */
61 
62     struct iovec iov;
63     QEMUIOVector qiov;
64 } QEDWriteTableCB;
65 
66 static void qed_write_table_cb(void *opaque, int ret)
67 {
68     QEDWriteTableCB *write_table_cb = opaque;
69     BDRVQEDState *s = write_table_cb->s;
70 
71     trace_qed_write_table_cb(s,
72                              write_table_cb->orig_table,
73                              write_table_cb->flush,
74                              ret);
75 
76     if (ret) {
77         goto out;
78     }
79 
80     if (write_table_cb->flush) {
81         /* We still need to flush first */
82         write_table_cb->flush = false;
83         qed_acquire(s);
84         bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
85                        write_table_cb);
86         qed_release(s);
87         return;
88     }
89 
90 out:
91     qemu_vfree(write_table_cb->table);
92     gencb_complete(&write_table_cb->gencb, ret);
93 }
94 
95 /**
96  * Write out an updated part or all of a table
97  *
98  * @s:          QED state
99  * @offset:     Offset of table in image file, in bytes
100  * @table:      Table
101  * @index:      Index of first element
102  * @n:          Number of elements
103  * @flush:      Whether or not to sync to disk
104  * @cb:         Completion function
105  * @opaque:     Argument for completion function
106  */
107 static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
108                             unsigned int index, unsigned int n, bool flush,
109                             BlockCompletionFunc *cb, void *opaque)
110 {
111     QEDWriteTableCB *write_table_cb;
112     unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
113     unsigned int start, end, i;
114     size_t len_bytes;
115 
116     trace_qed_write_table(s, offset, table, index, n);
117 
118     /* Calculate indices of the first and one after last elements */
119     start = index & ~sector_mask;
120     end = (index + n + sector_mask) & ~sector_mask;
121 
122     len_bytes = (end - start) * sizeof(uint64_t);
123 
124     write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
125     write_table_cb->s = s;
126     write_table_cb->orig_table = table;
127     write_table_cb->flush = flush;
128     write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
129     write_table_cb->iov.iov_base = write_table_cb->table->offsets;
130     write_table_cb->iov.iov_len = len_bytes;
131     qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
132 
133     /* Byteswap table */
134     for (i = start; i < end; i++) {
135         uint64_t le_offset = cpu_to_le64(table->offsets[i]);
136         write_table_cb->table->offsets[i - start] = le_offset;
137     }
138 
139     /* Adjust for offset into table */
140     offset += start * sizeof(uint64_t);
141 
142     bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
143                     &write_table_cb->qiov,
144                     write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
145                     qed_write_table_cb, write_table_cb);
146 }
147 
148 /**
149  * Propagate return value from async callback
150  */
151 static void qed_sync_cb(void *opaque, int ret)
152 {
153     *(int *)opaque = ret;
154 }
155 
156 int qed_read_l1_table_sync(BDRVQEDState *s)
157 {
158     return qed_read_table(s, s->header.l1_table_offset, s->l1_table);
159 }
160 
161 void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
162                         BlockCompletionFunc *cb, void *opaque)
163 {
164     BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
165     qed_write_table(s, s->header.l1_table_offset,
166                     s->l1_table, index, n, false, cb, opaque);
167 }
168 
169 int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
170                             unsigned int n)
171 {
172     int ret = -EINPROGRESS;
173 
174     qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
175     BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
176 
177     return ret;
178 }
179 
180 void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
181                        BlockCompletionFunc *cb, void *opaque)
182 {
183     int ret;
184 
185     qed_unref_l2_cache_entry(request->l2_table);
186 
187     /* Check for cached L2 entry */
188     request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
189     if (request->l2_table) {
190         cb(opaque, 0);
191         return;
192     }
193 
194     request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
195     request->l2_table->table = qed_alloc_table(s);
196 
197     BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
198     ret = qed_read_table(s, offset, request->l2_table->table);
199 
200     qed_acquire(s);
201     if (ret) {
202         /* can't trust loaded L2 table anymore */
203         qed_unref_l2_cache_entry(request->l2_table);
204         request->l2_table = NULL;
205     } else {
206         request->l2_table->offset = offset;
207 
208         qed_commit_l2_cache_entry(&s->l2_cache, request->l2_table);
209 
210         /* This is guaranteed to succeed because we just committed the entry
211          * to the cache.
212          */
213         request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
214         assert(request->l2_table != NULL);
215     }
216     qed_release(s);
217 
218     cb(opaque, ret);
219 }
220 
221 int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
222 {
223     int ret = -EINPROGRESS;
224 
225     qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
226     BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
227 
228     return ret;
229 }
230 
231 void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
232                         unsigned int index, unsigned int n, bool flush,
233                         BlockCompletionFunc *cb, void *opaque)
234 {
235     BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
236     qed_write_table(s, request->l2_table->offset,
237                     request->l2_table->table, index, n, flush, cb, opaque);
238 }
239 
240 int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
241                             unsigned int index, unsigned int n, bool flush)
242 {
243     int ret = -EINPROGRESS;
244 
245     qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
246     BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
247 
248     return ret;
249 }
250