1298800caSStefan Hajnoczi /* 2298800caSStefan Hajnoczi * QEMU Enhanced Disk Format Table I/O 3298800caSStefan Hajnoczi * 4298800caSStefan Hajnoczi * Copyright IBM, Corp. 2010 5298800caSStefan Hajnoczi * 6298800caSStefan Hajnoczi * Authors: 7298800caSStefan Hajnoczi * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> 8298800caSStefan Hajnoczi * Anthony Liguori <aliguori@us.ibm.com> 9298800caSStefan Hajnoczi * 10298800caSStefan Hajnoczi * This work is licensed under the terms of the GNU LGPL, version 2 or later. 11298800caSStefan Hajnoczi * See the COPYING.LIB file in the top-level directory. 12298800caSStefan Hajnoczi * 13298800caSStefan Hajnoczi */ 14298800caSStefan Hajnoczi 1580c71a24SPeter Maydell #include "qemu/osdep.h" 16298800caSStefan Hajnoczi #include "trace.h" 171de7afc9SPaolo Bonzini #include "qemu/sockets.h" /* for EINPROGRESS on Windows */ 18298800caSStefan Hajnoczi #include "qed.h" 1958369e22SPaolo Bonzini #include "qemu/bswap.h" 20298800caSStefan Hajnoczi 21f6513529SKevin Wolf static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table) 22298800caSStefan Hajnoczi { 2311273076SKevin Wolf QEMUIOVector qiov; 2411273076SKevin Wolf int noffsets; 2511273076SKevin Wolf int i, ret; 26298800caSStefan Hajnoczi 2711273076SKevin Wolf struct iovec iov = { 2811273076SKevin Wolf .iov_base = table->offsets, 2911273076SKevin Wolf .iov_len = s->header.cluster_size * s->header.table_size, 3011273076SKevin Wolf }; 3111273076SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 3211273076SKevin Wolf 3311273076SKevin Wolf trace_qed_read_table(s, offset, table); 3411273076SKevin Wolf 3511273076SKevin Wolf ret = bdrv_preadv(s->bs->file, offset, &qiov); 3611273076SKevin Wolf if (ret < 0) { 37298800caSStefan Hajnoczi goto out; 38298800caSStefan Hajnoczi } 39298800caSStefan Hajnoczi 40298800caSStefan Hajnoczi /* Byteswap offsets */ 41b9e413ddSPaolo Bonzini qed_acquire(s); 4211273076SKevin Wolf noffsets = qiov.size / sizeof(uint64_t); 43298800caSStefan Hajnoczi for (i = 0; i < noffsets; i++) { 44298800caSStefan Hajnoczi table->offsets[i] = le64_to_cpu(table->offsets[i]); 45298800caSStefan Hajnoczi } 46b9e413ddSPaolo Bonzini qed_release(s); 47298800caSStefan Hajnoczi 4811273076SKevin Wolf ret = 0; 49298800caSStefan Hajnoczi out: 50298800caSStefan Hajnoczi /* Completion */ 5111273076SKevin Wolf trace_qed_read_table_cb(s, table, ret); 52f6513529SKevin Wolf return ret; 53298800caSStefan Hajnoczi } 54298800caSStefan Hajnoczi 55298800caSStefan Hajnoczi typedef struct { 56298800caSStefan Hajnoczi GenericCB gencb; 57298800caSStefan Hajnoczi BDRVQEDState *s; 58298800caSStefan Hajnoczi QEDTable *orig_table; 59298800caSStefan Hajnoczi QEDTable *table; 60298800caSStefan Hajnoczi bool flush; /* flush after write? */ 61298800caSStefan Hajnoczi 62298800caSStefan Hajnoczi struct iovec iov; 63298800caSStefan Hajnoczi QEMUIOVector qiov; 64298800caSStefan Hajnoczi } QEDWriteTableCB; 65298800caSStefan Hajnoczi 66298800caSStefan Hajnoczi static void qed_write_table_cb(void *opaque, int ret) 67298800caSStefan Hajnoczi { 68298800caSStefan Hajnoczi QEDWriteTableCB *write_table_cb = opaque; 69b9e413ddSPaolo Bonzini BDRVQEDState *s = write_table_cb->s; 70298800caSStefan Hajnoczi 71b9e413ddSPaolo Bonzini trace_qed_write_table_cb(s, 72298800caSStefan Hajnoczi write_table_cb->orig_table, 73298800caSStefan Hajnoczi write_table_cb->flush, 74298800caSStefan Hajnoczi ret); 75298800caSStefan Hajnoczi 76298800caSStefan Hajnoczi if (ret) { 77298800caSStefan Hajnoczi goto out; 78298800caSStefan Hajnoczi } 79298800caSStefan Hajnoczi 80298800caSStefan Hajnoczi if (write_table_cb->flush) { 81298800caSStefan Hajnoczi /* We still need to flush first */ 82298800caSStefan Hajnoczi write_table_cb->flush = false; 83b9e413ddSPaolo Bonzini qed_acquire(s); 84298800caSStefan Hajnoczi bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb, 85298800caSStefan Hajnoczi write_table_cb); 86b9e413ddSPaolo Bonzini qed_release(s); 87298800caSStefan Hajnoczi return; 88298800caSStefan Hajnoczi } 89298800caSStefan Hajnoczi 90298800caSStefan Hajnoczi out: 91298800caSStefan Hajnoczi qemu_vfree(write_table_cb->table); 92298800caSStefan Hajnoczi gencb_complete(&write_table_cb->gencb, ret); 93298800caSStefan Hajnoczi } 94298800caSStefan Hajnoczi 95298800caSStefan Hajnoczi /** 96298800caSStefan Hajnoczi * Write out an updated part or all of a table 97298800caSStefan Hajnoczi * 98298800caSStefan Hajnoczi * @s: QED state 99298800caSStefan Hajnoczi * @offset: Offset of table in image file, in bytes 100298800caSStefan Hajnoczi * @table: Table 101298800caSStefan Hajnoczi * @index: Index of first element 102298800caSStefan Hajnoczi * @n: Number of elements 103298800caSStefan Hajnoczi * @flush: Whether or not to sync to disk 104298800caSStefan Hajnoczi * @cb: Completion function 105298800caSStefan Hajnoczi * @opaque: Argument for completion function 106298800caSStefan Hajnoczi */ 107298800caSStefan Hajnoczi static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, 108298800caSStefan Hajnoczi unsigned int index, unsigned int n, bool flush, 109097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 110298800caSStefan Hajnoczi { 111298800caSStefan Hajnoczi QEDWriteTableCB *write_table_cb; 112298800caSStefan Hajnoczi unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1; 113298800caSStefan Hajnoczi unsigned int start, end, i; 114298800caSStefan Hajnoczi size_t len_bytes; 115298800caSStefan Hajnoczi 116298800caSStefan Hajnoczi trace_qed_write_table(s, offset, table, index, n); 117298800caSStefan Hajnoczi 118298800caSStefan Hajnoczi /* Calculate indices of the first and one after last elements */ 119298800caSStefan Hajnoczi start = index & ~sector_mask; 120298800caSStefan Hajnoczi end = (index + n + sector_mask) & ~sector_mask; 121298800caSStefan Hajnoczi 122298800caSStefan Hajnoczi len_bytes = (end - start) * sizeof(uint64_t); 123298800caSStefan Hajnoczi 124298800caSStefan Hajnoczi write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque); 125298800caSStefan Hajnoczi write_table_cb->s = s; 126298800caSStefan Hajnoczi write_table_cb->orig_table = table; 127298800caSStefan Hajnoczi write_table_cb->flush = flush; 128298800caSStefan Hajnoczi write_table_cb->table = qemu_blockalign(s->bs, len_bytes); 129298800caSStefan Hajnoczi write_table_cb->iov.iov_base = write_table_cb->table->offsets; 130298800caSStefan Hajnoczi write_table_cb->iov.iov_len = len_bytes; 131298800caSStefan Hajnoczi qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1); 132298800caSStefan Hajnoczi 133298800caSStefan Hajnoczi /* Byteswap table */ 134298800caSStefan Hajnoczi for (i = start; i < end; i++) { 135298800caSStefan Hajnoczi uint64_t le_offset = cpu_to_le64(table->offsets[i]); 136298800caSStefan Hajnoczi write_table_cb->table->offsets[i - start] = le_offset; 137298800caSStefan Hajnoczi } 138298800caSStefan Hajnoczi 139298800caSStefan Hajnoczi /* Adjust for offset into table */ 140298800caSStefan Hajnoczi offset += start * sizeof(uint64_t); 141298800caSStefan Hajnoczi 1420d1049c7SKevin Wolf bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, 143298800caSStefan Hajnoczi &write_table_cb->qiov, 14423e9a39eSZhi Yong Wu write_table_cb->qiov.size / BDRV_SECTOR_SIZE, 145298800caSStefan Hajnoczi qed_write_table_cb, write_table_cb); 146298800caSStefan Hajnoczi } 147298800caSStefan Hajnoczi 148298800caSStefan Hajnoczi /** 149298800caSStefan Hajnoczi * Propagate return value from async callback 150298800caSStefan Hajnoczi */ 151298800caSStefan Hajnoczi static void qed_sync_cb(void *opaque, int ret) 152298800caSStefan Hajnoczi { 153298800caSStefan Hajnoczi *(int *)opaque = ret; 154298800caSStefan Hajnoczi } 155298800caSStefan Hajnoczi 156298800caSStefan Hajnoczi int qed_read_l1_table_sync(BDRVQEDState *s) 157298800caSStefan Hajnoczi { 158f6513529SKevin Wolf return qed_read_table(s, s->header.l1_table_offset, s->l1_table); 159298800caSStefan Hajnoczi } 160298800caSStefan Hajnoczi 161298800caSStefan Hajnoczi void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n, 162097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 163298800caSStefan Hajnoczi { 164298800caSStefan Hajnoczi BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE); 165298800caSStefan Hajnoczi qed_write_table(s, s->header.l1_table_offset, 166298800caSStefan Hajnoczi s->l1_table, index, n, false, cb, opaque); 167298800caSStefan Hajnoczi } 168298800caSStefan Hajnoczi 169298800caSStefan Hajnoczi int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, 170298800caSStefan Hajnoczi unsigned int n) 171298800caSStefan Hajnoczi { 172298800caSStefan Hajnoczi int ret = -EINPROGRESS; 173298800caSStefan Hajnoczi 174298800caSStefan Hajnoczi qed_write_l1_table(s, index, n, qed_sync_cb, &ret); 17588b062c2SPaolo Bonzini BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); 176298800caSStefan Hajnoczi 177298800caSStefan Hajnoczi return ret; 178298800caSStefan Hajnoczi } 179298800caSStefan Hajnoczi 180*a8165d2dSKevin Wolf int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset) 181298800caSStefan Hajnoczi { 182f6513529SKevin Wolf int ret; 183298800caSStefan Hajnoczi 184298800caSStefan Hajnoczi qed_unref_l2_cache_entry(request->l2_table); 185298800caSStefan Hajnoczi 186298800caSStefan Hajnoczi /* Check for cached L2 entry */ 187298800caSStefan Hajnoczi request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); 188298800caSStefan Hajnoczi if (request->l2_table) { 189*a8165d2dSKevin Wolf return 0; 190298800caSStefan Hajnoczi } 191298800caSStefan Hajnoczi 192298800caSStefan Hajnoczi request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); 193298800caSStefan Hajnoczi request->l2_table->table = qed_alloc_table(s); 194298800caSStefan Hajnoczi 195298800caSStefan Hajnoczi BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD); 196f6513529SKevin Wolf ret = qed_read_table(s, offset, request->l2_table->table); 197f6513529SKevin Wolf 198f6513529SKevin Wolf qed_acquire(s); 199f6513529SKevin Wolf if (ret) { 200f6513529SKevin Wolf /* can't trust loaded L2 table anymore */ 201f6513529SKevin Wolf qed_unref_l2_cache_entry(request->l2_table); 202f6513529SKevin Wolf request->l2_table = NULL; 203f6513529SKevin Wolf } else { 204f6513529SKevin Wolf request->l2_table->offset = offset; 205f6513529SKevin Wolf 206f6513529SKevin Wolf qed_commit_l2_cache_entry(&s->l2_cache, request->l2_table); 207f6513529SKevin Wolf 208f6513529SKevin Wolf /* This is guaranteed to succeed because we just committed the entry 209f6513529SKevin Wolf * to the cache. 210f6513529SKevin Wolf */ 211f6513529SKevin Wolf request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); 212f6513529SKevin Wolf assert(request->l2_table != NULL); 213f6513529SKevin Wolf } 214f6513529SKevin Wolf qed_release(s); 215f6513529SKevin Wolf 216*a8165d2dSKevin Wolf return ret; 217298800caSStefan Hajnoczi } 218298800caSStefan Hajnoczi 219298800caSStefan Hajnoczi int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset) 220298800caSStefan Hajnoczi { 221*a8165d2dSKevin Wolf return qed_read_l2_table(s, request, offset); 222298800caSStefan Hajnoczi } 223298800caSStefan Hajnoczi 224298800caSStefan Hajnoczi void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, 225298800caSStefan Hajnoczi unsigned int index, unsigned int n, bool flush, 226097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 227298800caSStefan Hajnoczi { 228298800caSStefan Hajnoczi BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE); 229298800caSStefan Hajnoczi qed_write_table(s, request->l2_table->offset, 230298800caSStefan Hajnoczi request->l2_table->table, index, n, flush, cb, opaque); 231298800caSStefan Hajnoczi } 232298800caSStefan Hajnoczi 233298800caSStefan Hajnoczi int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, 234298800caSStefan Hajnoczi unsigned int index, unsigned int n, bool flush) 235298800caSStefan Hajnoczi { 236298800caSStefan Hajnoczi int ret = -EINPROGRESS; 237298800caSStefan Hajnoczi 238298800caSStefan Hajnoczi qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret); 23988b062c2SPaolo Bonzini BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); 240298800caSStefan Hajnoczi 241298800caSStefan Hajnoczi return ret; 242298800caSStefan Hajnoczi } 243