16e790746SPaolo Bonzini /* 26e790746SPaolo Bonzini * Virtio Block Device 36e790746SPaolo Bonzini * 46e790746SPaolo Bonzini * Copyright IBM, Corp. 2007 56e790746SPaolo Bonzini * 66e790746SPaolo Bonzini * Authors: 76e790746SPaolo Bonzini * Anthony Liguori <aliguori@us.ibm.com> 86e790746SPaolo Bonzini * 96e790746SPaolo Bonzini * This work is licensed under the terms of the GNU GPL, version 2. See 106e790746SPaolo Bonzini * the COPYING file in the top-level directory. 116e790746SPaolo Bonzini * 126e790746SPaolo Bonzini */ 136e790746SPaolo Bonzini 1480c71a24SPeter Maydell #include "qemu/osdep.h" 15da34e65cSMarkus Armbruster #include "qapi/error.h" 16827805a2SFam Zheng #include "qemu/iov.h" 170b8fa32fSMarkus Armbruster #include "qemu/module.h" 186e790746SPaolo Bonzini #include "qemu/error-report.h" 199b92fbcfSSergio Lopez #include "qemu/main-loop.h" 204f736650SSam Li #include "block/block_int.h" 216e790746SPaolo Bonzini #include "trace.h" 226e790746SPaolo Bonzini #include "hw/block/block.h" 23a27bd6c7SMarkus Armbruster #include "hw/qdev-properties.h" 246e790746SPaolo Bonzini #include "sysemu/blockdev.h" 25baf42268SStefan Hajnoczi #include "sysemu/block-ram-registrar.h" 262f780b6aSMarkus Armbruster #include "sysemu/sysemu.h" 2754d31236SMarkus Armbruster #include "sysemu/runstate.h" 286e790746SPaolo Bonzini #include "hw/virtio/virtio-blk.h" 296e790746SPaolo Bonzini #include "dataplane/virtio-blk.h" 3008e2c9f1SPaolo Bonzini #include "scsi/constants.h" 316e790746SPaolo Bonzini #ifdef __linux__ 326e790746SPaolo Bonzini # include <scsi/sg.h> 336e790746SPaolo Bonzini #endif 346e790746SPaolo Bonzini #include "hw/virtio/virtio-bus.h" 35ca77ee28SMarkus Armbruster #include "migration/qemu-file-types.h" 36783d1897SRusty Russell #include "hw/virtio/virtio-access.h" 37d9cf55a8SDaniil Tatianin #include "hw/virtio/virtio-blk-common.h" 384c41c69eSHiroki Narukawa #include "qemu/coroutine.h" 396e790746SPaolo Bonzini 40d14dde5eSGreg Kurz static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, 41edaffd9fSStefan Hajnoczi VirtIOBlockReq *req) 42671ec3f0SFam Zheng { 43671ec3f0SFam Zheng req->dev = s; 44edaffd9fSStefan Hajnoczi req->vq = vq; 45869d66afSStefan Hajnoczi req->qiov.size = 0; 462a6cdd6dSPaolo Bonzini req->in_len = 0; 47869d66afSStefan Hajnoczi req->next = NULL; 4895f7142aSPeter Lieven req->mr_next = NULL; 49671ec3f0SFam Zheng } 50671ec3f0SFam Zheng 51d14dde5eSGreg Kurz static void virtio_blk_free_request(VirtIOBlockReq *req) 52671ec3f0SFam Zheng { 53c84b3192SPaolo Bonzini g_free(req); 54671ec3f0SFam Zheng } 55671ec3f0SFam Zheng 5603de2f52SPaolo Bonzini static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) 576e790746SPaolo Bonzini { 586e790746SPaolo Bonzini VirtIOBlock *s = req->dev; 596e790746SPaolo Bonzini VirtIODevice *vdev = VIRTIO_DEVICE(s); 606e790746SPaolo Bonzini 61a576ceacSStefan Hajnoczi trace_virtio_blk_req_complete(vdev, req, status); 626e790746SPaolo Bonzini 636e790746SPaolo Bonzini stb_p(&req->in->status, status); 647bd04a04SStefan Hajnoczi iov_discard_undo(&req->inhdr_undo); 657bd04a04SStefan Hajnoczi iov_discard_undo(&req->outhdr_undo); 66edaffd9fSStefan Hajnoczi virtqueue_push(req->vq, &req->elem, req->in_len); 67eb41cf78SPaolo Bonzini if (s->dataplane_started && !s->dataplane_disabled) { 68edaffd9fSStefan Hajnoczi virtio_blk_data_plane_notify(s->dataplane, req->vq); 6903de2f52SPaolo Bonzini } else { 70edaffd9fSStefan Hajnoczi virtio_notify(vdev, req->vq); 716e790746SPaolo Bonzini } 72bf4bd461SFam Zheng } 73bf4bd461SFam Zheng 746e790746SPaolo Bonzini static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, 7500f639fbSStefano Garzarella bool is_read, bool acct_failed) 766e790746SPaolo Bonzini { 776e790746SPaolo Bonzini VirtIOBlock *s = req->dev; 789a6719d5SStefano Garzarella BlockErrorAction action = blk_get_error_action(s->blk, is_read, error); 796e790746SPaolo Bonzini 80a589569fSWenchao Xia if (action == BLOCK_ERROR_ACTION_STOP) { 81466138dcSFam Zheng /* Break the link as the next request is going to be parsed from the 82466138dcSFam Zheng * ring again. Otherwise we may end up doing a double completion! */ 83466138dcSFam Zheng req->mr_next = NULL; 846e790746SPaolo Bonzini req->next = s->rq; 856e790746SPaolo Bonzini s->rq = req; 86a589569fSWenchao Xia } else if (action == BLOCK_ERROR_ACTION_REPORT) { 876e790746SPaolo Bonzini virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 8800f639fbSStefano Garzarella if (acct_failed) { 8901762e03SAlberto Garcia block_acct_failed(blk_get_stats(s->blk), &req->acct); 9000f639fbSStefano Garzarella } 91671ec3f0SFam Zheng virtio_blk_free_request(req); 926e790746SPaolo Bonzini } 936e790746SPaolo Bonzini 944be74634SMarkus Armbruster blk_error_action(s->blk, action, is_read, error); 95a589569fSWenchao Xia return action != BLOCK_ERROR_ACTION_IGNORE; 966e790746SPaolo Bonzini } 976e790746SPaolo Bonzini 986e790746SPaolo Bonzini static void virtio_blk_rw_complete(void *opaque, int ret) 996e790746SPaolo Bonzini { 10095f7142aSPeter Lieven VirtIOBlockReq *next = opaque; 101b9e413ddSPaolo Bonzini VirtIOBlock *s = next->dev; 102a576ceacSStefan Hajnoczi VirtIODevice *vdev = VIRTIO_DEVICE(s); 1036e790746SPaolo Bonzini 104b9e413ddSPaolo Bonzini aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); 10595f7142aSPeter Lieven while (next) { 10695f7142aSPeter Lieven VirtIOBlockReq *req = next; 10795f7142aSPeter Lieven next = req->mr_next; 108a576ceacSStefan Hajnoczi trace_virtio_blk_rw_complete(vdev, req, ret); 1096e790746SPaolo Bonzini 11095f7142aSPeter Lieven if (req->qiov.nalloc != -1) { 111e61809edSDongli Zhang /* If nalloc is != -1 req->qiov is a local copy of the original 1129bb192a4SYaowei Bai * external iovec. It was allocated in submit_requests to be 1139bb192a4SYaowei Bai * able to merge requests. */ 11495f7142aSPeter Lieven qemu_iovec_destroy(&req->qiov); 11595f7142aSPeter Lieven } 11695f7142aSPeter Lieven 1176e790746SPaolo Bonzini if (ret) { 118bf4069fbSAnastasiia Rusakova int p = virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type); 119783d1897SRusty Russell bool is_read = !(p & VIRTIO_BLK_T_OUT); 1202a6cdd6dSPaolo Bonzini /* Note that memory may be dirtied on read failure. If the 1212a6cdd6dSPaolo Bonzini * virtio request is not completed here, as is the case for 1222a6cdd6dSPaolo Bonzini * BLOCK_ERROR_ACTION_STOP, the memory may not be copied 1232a6cdd6dSPaolo Bonzini * correctly during live migration. While this is ugly, 1242a6cdd6dSPaolo Bonzini * it is acceptable because the device is free to write to 1252a6cdd6dSPaolo Bonzini * the memory until the request is completed (which will 1262a6cdd6dSPaolo Bonzini * happen on the other side of the migration). 1272a6cdd6dSPaolo Bonzini */ 12800f639fbSStefano Garzarella if (virtio_blk_handle_rw_error(req, -ret, is_read, true)) { 12995f7142aSPeter Lieven continue; 13095f7142aSPeter Lieven } 1316e790746SPaolo Bonzini } 1326e790746SPaolo Bonzini 1336e790746SPaolo Bonzini virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 134bf4069fbSAnastasiia Rusakova block_acct_done(blk_get_stats(s->blk), &req->acct); 135671ec3f0SFam Zheng virtio_blk_free_request(req); 1366e790746SPaolo Bonzini } 137b9e413ddSPaolo Bonzini aio_context_release(blk_get_aio_context(s->conf.conf.blk)); 13895f7142aSPeter Lieven } 1396e790746SPaolo Bonzini 1406e790746SPaolo Bonzini static void virtio_blk_flush_complete(void *opaque, int ret) 1416e790746SPaolo Bonzini { 1426e790746SPaolo Bonzini VirtIOBlockReq *req = opaque; 143b9e413ddSPaolo Bonzini VirtIOBlock *s = req->dev; 1446e790746SPaolo Bonzini 145b9e413ddSPaolo Bonzini aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); 1466e790746SPaolo Bonzini if (ret) { 14700f639fbSStefano Garzarella if (virtio_blk_handle_rw_error(req, -ret, 0, true)) { 148b9e413ddSPaolo Bonzini goto out; 1496e790746SPaolo Bonzini } 1506e790746SPaolo Bonzini } 1516e790746SPaolo Bonzini 1526e790746SPaolo Bonzini virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 1539a6719d5SStefano Garzarella block_acct_done(blk_get_stats(s->blk), &req->acct); 154671ec3f0SFam Zheng virtio_blk_free_request(req); 155b9e413ddSPaolo Bonzini 156b9e413ddSPaolo Bonzini out: 157b9e413ddSPaolo Bonzini aio_context_release(blk_get_aio_context(s->conf.conf.blk)); 1586e790746SPaolo Bonzini } 1596e790746SPaolo Bonzini 16037b06f8dSStefano Garzarella static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) 16137b06f8dSStefano Garzarella { 16237b06f8dSStefano Garzarella VirtIOBlockReq *req = opaque; 16337b06f8dSStefano Garzarella VirtIOBlock *s = req->dev; 16437b06f8dSStefano Garzarella bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & 16537b06f8dSStefano Garzarella ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; 16637b06f8dSStefano Garzarella 16737b06f8dSStefano Garzarella aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); 16837b06f8dSStefano Garzarella if (ret) { 16937b06f8dSStefano Garzarella if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { 17037b06f8dSStefano Garzarella goto out; 17137b06f8dSStefano Garzarella } 17237b06f8dSStefano Garzarella } 17337b06f8dSStefano Garzarella 17437b06f8dSStefano Garzarella virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 17537b06f8dSStefano Garzarella if (is_write_zeroes) { 17637b06f8dSStefano Garzarella block_acct_done(blk_get_stats(s->blk), &req->acct); 17737b06f8dSStefano Garzarella } 17837b06f8dSStefano Garzarella virtio_blk_free_request(req); 17937b06f8dSStefano Garzarella 18037b06f8dSStefano Garzarella out: 18137b06f8dSStefano Garzarella aio_context_release(blk_get_aio_context(s->conf.conf.blk)); 18237b06f8dSStefano Garzarella } 18337b06f8dSStefano Garzarella 1841dc936aaSFam Zheng #ifdef __linux__ 1851dc936aaSFam Zheng 1861dc936aaSFam Zheng typedef struct { 1871dc936aaSFam Zheng VirtIOBlockReq *req; 1881dc936aaSFam Zheng struct sg_io_hdr hdr; 1891dc936aaSFam Zheng } VirtIOBlockIoctlReq; 1901dc936aaSFam Zheng 1911dc936aaSFam Zheng static void virtio_blk_ioctl_complete(void *opaque, int status) 1921dc936aaSFam Zheng { 1931dc936aaSFam Zheng VirtIOBlockIoctlReq *ioctl_req = opaque; 1941dc936aaSFam Zheng VirtIOBlockReq *req = ioctl_req->req; 1959d456654SPaolo Bonzini VirtIOBlock *s = req->dev; 1969d456654SPaolo Bonzini VirtIODevice *vdev = VIRTIO_DEVICE(s); 1971dc936aaSFam Zheng struct virtio_scsi_inhdr *scsi; 1981dc936aaSFam Zheng struct sg_io_hdr *hdr; 1991dc936aaSFam Zheng 2001dc936aaSFam Zheng scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base; 2011dc936aaSFam Zheng 2021dc936aaSFam Zheng if (status) { 2031dc936aaSFam Zheng status = VIRTIO_BLK_S_UNSUPP; 2041dc936aaSFam Zheng virtio_stl_p(vdev, &scsi->errors, 255); 2051dc936aaSFam Zheng goto out; 2061dc936aaSFam Zheng } 2071dc936aaSFam Zheng 2081dc936aaSFam Zheng hdr = &ioctl_req->hdr; 2091dc936aaSFam Zheng /* 2101dc936aaSFam Zheng * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) 2111dc936aaSFam Zheng * clear the masked_status field [hence status gets cleared too, see 2121dc936aaSFam Zheng * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED 2131dc936aaSFam Zheng * status has occurred. However they do set DRIVER_SENSE in driver_status 2141dc936aaSFam Zheng * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. 2151dc936aaSFam Zheng */ 2161dc936aaSFam Zheng if (hdr->status == 0 && hdr->sb_len_wr > 0) { 2171dc936aaSFam Zheng hdr->status = CHECK_CONDITION; 2181dc936aaSFam Zheng } 2191dc936aaSFam Zheng 2201dc936aaSFam Zheng virtio_stl_p(vdev, &scsi->errors, 2211dc936aaSFam Zheng hdr->status | (hdr->msg_status << 8) | 2221dc936aaSFam Zheng (hdr->host_status << 16) | (hdr->driver_status << 24)); 2231dc936aaSFam Zheng virtio_stl_p(vdev, &scsi->residual, hdr->resid); 2241dc936aaSFam Zheng virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr); 2251dc936aaSFam Zheng virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); 2261dc936aaSFam Zheng 2271dc936aaSFam Zheng out: 228b9e413ddSPaolo Bonzini aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); 2291dc936aaSFam Zheng virtio_blk_req_complete(req, status); 2301dc936aaSFam Zheng virtio_blk_free_request(req); 231b9e413ddSPaolo Bonzini aio_context_release(blk_get_aio_context(s->conf.conf.blk)); 2321dc936aaSFam Zheng g_free(ioctl_req); 2331dc936aaSFam Zheng } 2341dc936aaSFam Zheng 2351dc936aaSFam Zheng #endif 2361dc936aaSFam Zheng 237edaffd9fSStefan Hajnoczi static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s, VirtQueue *vq) 2386e790746SPaolo Bonzini { 239edaffd9fSStefan Hajnoczi VirtIOBlockReq *req = virtqueue_pop(vq, sizeof(VirtIOBlockReq)); 2406e790746SPaolo Bonzini 24151b19ebeSPaolo Bonzini if (req) { 242edaffd9fSStefan Hajnoczi virtio_blk_init_request(s, vq, req); 2436e790746SPaolo Bonzini } 2446e790746SPaolo Bonzini return req; 2456e790746SPaolo Bonzini } 2466e790746SPaolo Bonzini 24775344fa4SFam Zheng static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) 2486e790746SPaolo Bonzini { 2496e790746SPaolo Bonzini int status = VIRTIO_BLK_S_OK; 2505a05cbeeSFam Zheng struct virtio_scsi_inhdr *scsi = NULL; 25175344fa4SFam Zheng VirtIOBlock *blk = req->dev; 252bf4069fbSAnastasiia Rusakova VirtIODevice *vdev = VIRTIO_DEVICE(blk); 253bf4069fbSAnastasiia Rusakova VirtQueueElement *elem = &req->elem; 254783d1897SRusty Russell 2555a05cbeeSFam Zheng #ifdef __linux__ 2565a05cbeeSFam Zheng int i; 2571dc936aaSFam Zheng VirtIOBlockIoctlReq *ioctl_req; 258a209f461SFam Zheng BlockAIOCB *acb; 2595a05cbeeSFam Zheng #endif 2606e790746SPaolo Bonzini 2616e790746SPaolo Bonzini /* 2626e790746SPaolo Bonzini * We require at least one output segment each for the virtio_blk_outhdr 2636e790746SPaolo Bonzini * and the SCSI command block. 2646e790746SPaolo Bonzini * 2656e790746SPaolo Bonzini * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr 2666e790746SPaolo Bonzini * and the sense buffer pointer in the input segments. 2676e790746SPaolo Bonzini */ 2685a05cbeeSFam Zheng if (elem->out_num < 2 || elem->in_num < 3) { 2695a05cbeeSFam Zheng status = VIRTIO_BLK_S_IOERR; 2705a05cbeeSFam Zheng goto fail; 2716e790746SPaolo Bonzini } 2726e790746SPaolo Bonzini 2736e790746SPaolo Bonzini /* 2746e790746SPaolo Bonzini * The scsi inhdr is placed in the second-to-last input segment, just 2756e790746SPaolo Bonzini * before the regular inhdr. 2766e790746SPaolo Bonzini */ 2775a05cbeeSFam Zheng scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; 2786e790746SPaolo Bonzini 279bbe8bd4dSStefano Garzarella if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) { 2806e790746SPaolo Bonzini status = VIRTIO_BLK_S_UNSUPP; 2816e790746SPaolo Bonzini goto fail; 2826e790746SPaolo Bonzini } 2836e790746SPaolo Bonzini 2846e790746SPaolo Bonzini /* 2856e790746SPaolo Bonzini * No support for bidirection commands yet. 2866e790746SPaolo Bonzini */ 2875a05cbeeSFam Zheng if (elem->out_num > 2 && elem->in_num > 3) { 2886e790746SPaolo Bonzini status = VIRTIO_BLK_S_UNSUPP; 2896e790746SPaolo Bonzini goto fail; 2906e790746SPaolo Bonzini } 2916e790746SPaolo Bonzini 2926e790746SPaolo Bonzini #ifdef __linux__ 2931dc936aaSFam Zheng ioctl_req = g_new0(VirtIOBlockIoctlReq, 1); 2941dc936aaSFam Zheng ioctl_req->req = req; 2951dc936aaSFam Zheng ioctl_req->hdr.interface_id = 'S'; 2961dc936aaSFam Zheng ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len; 2971dc936aaSFam Zheng ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base; 2981dc936aaSFam Zheng ioctl_req->hdr.dxfer_len = 0; 2996e790746SPaolo Bonzini 3005a05cbeeSFam Zheng if (elem->out_num > 2) { 3016e790746SPaolo Bonzini /* 3026e790746SPaolo Bonzini * If there are more than the minimally required 2 output segments 3036e790746SPaolo Bonzini * there is write payload starting from the third iovec. 3046e790746SPaolo Bonzini */ 3051dc936aaSFam Zheng ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV; 3061dc936aaSFam Zheng ioctl_req->hdr.iovec_count = elem->out_num - 2; 3076e790746SPaolo Bonzini 3081dc936aaSFam Zheng for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { 3091dc936aaSFam Zheng ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len; 3101dc936aaSFam Zheng } 3116e790746SPaolo Bonzini 3121dc936aaSFam Zheng ioctl_req->hdr.dxferp = elem->out_sg + 2; 3136e790746SPaolo Bonzini 3145a05cbeeSFam Zheng } else if (elem->in_num > 3) { 3156e790746SPaolo Bonzini /* 3166e790746SPaolo Bonzini * If we have more than 3 input segments the guest wants to actually 3176e790746SPaolo Bonzini * read data. 3186e790746SPaolo Bonzini */ 3191dc936aaSFam Zheng ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV; 3201dc936aaSFam Zheng ioctl_req->hdr.iovec_count = elem->in_num - 3; 3211dc936aaSFam Zheng for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { 3221dc936aaSFam Zheng ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len; 3231dc936aaSFam Zheng } 3246e790746SPaolo Bonzini 3251dc936aaSFam Zheng ioctl_req->hdr.dxferp = elem->in_sg; 3266e790746SPaolo Bonzini } else { 3276e790746SPaolo Bonzini /* 3286e790746SPaolo Bonzini * Some SCSI commands don't actually transfer any data. 3296e790746SPaolo Bonzini */ 3301dc936aaSFam Zheng ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE; 3316e790746SPaolo Bonzini } 3326e790746SPaolo Bonzini 3331dc936aaSFam Zheng ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; 3341dc936aaSFam Zheng ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; 3356e790746SPaolo Bonzini 336a209f461SFam Zheng acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, 3371dc936aaSFam Zheng virtio_blk_ioctl_complete, ioctl_req); 338a209f461SFam Zheng if (!acb) { 339a209f461SFam Zheng g_free(ioctl_req); 340a209f461SFam Zheng status = VIRTIO_BLK_S_UNSUPP; 341a209f461SFam Zheng goto fail; 342a209f461SFam Zheng } 3431dc936aaSFam Zheng return -EINPROGRESS; 3446e790746SPaolo Bonzini #else 3456e790746SPaolo Bonzini abort(); 3466e790746SPaolo Bonzini #endif 3476e790746SPaolo Bonzini 3486e790746SPaolo Bonzini fail: 3496e790746SPaolo Bonzini /* Just put anything nonzero so that the ioctl fails in the guest. */ 3505a05cbeeSFam Zheng if (scsi) { 351783d1897SRusty Russell virtio_stl_p(vdev, &scsi->errors, 255); 3525a05cbeeSFam Zheng } 3535a05cbeeSFam Zheng return status; 3545a05cbeeSFam Zheng } 3555a05cbeeSFam Zheng 3565a05cbeeSFam Zheng static void virtio_blk_handle_scsi(VirtIOBlockReq *req) 3575a05cbeeSFam Zheng { 3585a05cbeeSFam Zheng int status; 3595a05cbeeSFam Zheng 36075344fa4SFam Zheng status = virtio_blk_handle_scsi_req(req); 3611dc936aaSFam Zheng if (status != -EINPROGRESS) { 3626e790746SPaolo Bonzini virtio_blk_req_complete(req, status); 363671ec3f0SFam Zheng virtio_blk_free_request(req); 3646e790746SPaolo Bonzini } 3651dc936aaSFam Zheng } 3666e790746SPaolo Bonzini 367baf42268SStefan Hajnoczi static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb, 36895f7142aSPeter Lieven int start, int num_reqs, int niov) 3696e790746SPaolo Bonzini { 370baf42268SStefan Hajnoczi BlockBackend *blk = s->blk; 37195f7142aSPeter Lieven QEMUIOVector *qiov = &mrb->reqs[start]->qiov; 37295f7142aSPeter Lieven int64_t sector_num = mrb->reqs[start]->sector_num; 37395f7142aSPeter Lieven bool is_write = mrb->is_write; 374baf42268SStefan Hajnoczi BdrvRequestFlags flags = 0; 3756e790746SPaolo Bonzini 37695f7142aSPeter Lieven if (num_reqs > 1) { 37795f7142aSPeter Lieven int i; 37895f7142aSPeter Lieven struct iovec *tmp_iov = qiov->iov; 37995f7142aSPeter Lieven int tmp_niov = qiov->niov; 38095f7142aSPeter Lieven 38195f7142aSPeter Lieven /* mrb->reqs[start]->qiov was initialized from external so we can't 382b5772fddSEric Blake * modify it here. We need to initialize it locally and then add the 38395f7142aSPeter Lieven * external iovecs. */ 38495f7142aSPeter Lieven qemu_iovec_init(qiov, niov); 38595f7142aSPeter Lieven 38695f7142aSPeter Lieven for (i = 0; i < tmp_niov; i++) { 38795f7142aSPeter Lieven qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len); 38895f7142aSPeter Lieven } 38995f7142aSPeter Lieven 39095f7142aSPeter Lieven for (i = start + 1; i < start + num_reqs; i++) { 39195f7142aSPeter Lieven qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0, 39295f7142aSPeter Lieven mrb->reqs[i]->qiov.size); 39395f7142aSPeter Lieven mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; 39495f7142aSPeter Lieven } 39595f7142aSPeter Lieven 396a576ceacSStefan Hajnoczi trace_virtio_blk_submit_multireq(VIRTIO_DEVICE(mrb->reqs[start]->dev), 397a576ceacSStefan Hajnoczi mrb, start, num_reqs, 398b5772fddSEric Blake sector_num << BDRV_SECTOR_BITS, 399b5772fddSEric Blake qiov->size, is_write); 40095f7142aSPeter Lieven block_acct_merge_done(blk_get_stats(blk), 40195f7142aSPeter Lieven is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, 40295f7142aSPeter Lieven num_reqs - 1); 40395f7142aSPeter Lieven } 40495f7142aSPeter Lieven 405baf42268SStefan Hajnoczi if (blk_ram_registrar_ok(&s->blk_ram_registrar)) { 406baf42268SStefan Hajnoczi flags |= BDRV_REQ_REGISTERED_BUF; 407baf42268SStefan Hajnoczi } 408baf42268SStefan Hajnoczi 40995f7142aSPeter Lieven if (is_write) { 410baf42268SStefan Hajnoczi blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 411baf42268SStefan Hajnoczi flags, virtio_blk_rw_complete, 412baf42268SStefan Hajnoczi mrb->reqs[start]); 41395f7142aSPeter Lieven } else { 414baf42268SStefan Hajnoczi blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 415baf42268SStefan Hajnoczi flags, virtio_blk_rw_complete, 416baf42268SStefan Hajnoczi mrb->reqs[start]); 41795f7142aSPeter Lieven } 41895f7142aSPeter Lieven } 41995f7142aSPeter Lieven 42095f7142aSPeter Lieven static int multireq_compare(const void *a, const void *b) 42195f7142aSPeter Lieven { 42295f7142aSPeter Lieven const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a, 42395f7142aSPeter Lieven *req2 = *(VirtIOBlockReq **)b; 42495f7142aSPeter Lieven 42595f7142aSPeter Lieven /* 42695f7142aSPeter Lieven * Note that we can't simply subtract sector_num1 from sector_num2 42795f7142aSPeter Lieven * here as that could overflow the return value. 42895f7142aSPeter Lieven */ 42995f7142aSPeter Lieven if (req1->sector_num > req2->sector_num) { 43095f7142aSPeter Lieven return 1; 43195f7142aSPeter Lieven } else if (req1->sector_num < req2->sector_num) { 43295f7142aSPeter Lieven return -1; 43395f7142aSPeter Lieven } else { 43495f7142aSPeter Lieven return 0; 43595f7142aSPeter Lieven } 43695f7142aSPeter Lieven } 43795f7142aSPeter Lieven 438baf42268SStefan Hajnoczi static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb) 43995f7142aSPeter Lieven { 44095f7142aSPeter Lieven int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; 4415def6b80SEric Blake uint32_t max_transfer; 44295f7142aSPeter Lieven int64_t sector_num = 0; 44395f7142aSPeter Lieven 44495f7142aSPeter Lieven if (mrb->num_reqs == 1) { 445baf42268SStefan Hajnoczi submit_requests(s, mrb, 0, 1, -1); 44695f7142aSPeter Lieven mrb->num_reqs = 0; 4476e790746SPaolo Bonzini return; 4486e790746SPaolo Bonzini } 4496e790746SPaolo Bonzini 4505def6b80SEric Blake max_transfer = blk_get_max_transfer(mrb->reqs[0]->dev->blk); 45195f7142aSPeter Lieven 45295f7142aSPeter Lieven qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs), 45395f7142aSPeter Lieven &multireq_compare); 45495f7142aSPeter Lieven 45595f7142aSPeter Lieven for (i = 0; i < mrb->num_reqs; i++) { 45695f7142aSPeter Lieven VirtIOBlockReq *req = mrb->reqs[i]; 45795f7142aSPeter Lieven if (num_reqs > 0) { 45849cffbc6SGonglei /* 45949cffbc6SGonglei * NOTE: We cannot merge the requests in below situations: 46049cffbc6SGonglei * 1. requests are not sequential 46149cffbc6SGonglei * 2. merge would exceed maximum number of IOVs 46249cffbc6SGonglei * 3. merge would exceed maximum transfer length of backend device 46349cffbc6SGonglei */ 46449cffbc6SGonglei if (sector_num + nb_sectors != req->sector_num || 465baf42268SStefan Hajnoczi niov > blk_get_max_iov(s->blk) - req->qiov.niov || 4665def6b80SEric Blake req->qiov.size > max_transfer || 4675def6b80SEric Blake nb_sectors > (max_transfer - 4685def6b80SEric Blake req->qiov.size) / BDRV_SECTOR_SIZE) { 469baf42268SStefan Hajnoczi submit_requests(s, mrb, start, num_reqs, niov); 47095f7142aSPeter Lieven num_reqs = 0; 4716e790746SPaolo Bonzini } 4726e790746SPaolo Bonzini } 4736e790746SPaolo Bonzini 47495f7142aSPeter Lieven if (num_reqs == 0) { 47595f7142aSPeter Lieven sector_num = req->sector_num; 47695f7142aSPeter Lieven nb_sectors = niov = 0; 47795f7142aSPeter Lieven start = i; 47895f7142aSPeter Lieven } 47995f7142aSPeter Lieven 48095f7142aSPeter Lieven nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE; 48195f7142aSPeter Lieven niov += req->qiov.niov; 48295f7142aSPeter Lieven num_reqs++; 48395f7142aSPeter Lieven } 48495f7142aSPeter Lieven 485baf42268SStefan Hajnoczi submit_requests(s, mrb, start, num_reqs, niov); 48695f7142aSPeter Lieven mrb->num_reqs = 0; 4876e790746SPaolo Bonzini } 4886e790746SPaolo Bonzini 4896e790746SPaolo Bonzini static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) 4906e790746SPaolo Bonzini { 491bf4069fbSAnastasiia Rusakova VirtIOBlock *s = req->dev; 492bf4069fbSAnastasiia Rusakova 493bf4069fbSAnastasiia Rusakova block_acct_start(blk_get_stats(s->blk), &req->acct, 0, 4945366d0c8SBenoît Canet BLOCK_ACCT_FLUSH); 4956e790746SPaolo Bonzini 4966e790746SPaolo Bonzini /* 4976e790746SPaolo Bonzini * Make sure all outstanding writes are posted to the backing device. 4986e790746SPaolo Bonzini */ 49995f7142aSPeter Lieven if (mrb->is_write && mrb->num_reqs > 0) { 500baf42268SStefan Hajnoczi virtio_blk_submit_multireq(s, mrb); 50195f7142aSPeter Lieven } 502bf4069fbSAnastasiia Rusakova blk_aio_flush(s->blk, virtio_blk_flush_complete, req); 5036e790746SPaolo Bonzini } 5046e790746SPaolo Bonzini 505d0e14376SMarkus Armbruster static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, 506d0e14376SMarkus Armbruster uint64_t sector, size_t size) 507d0e14376SMarkus Armbruster { 5083c2daac0SMarkus Armbruster uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; 5093c2daac0SMarkus Armbruster uint64_t total_sectors; 5103c2daac0SMarkus Armbruster 51175af1f34SPeter Lieven if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 51295f7142aSPeter Lieven return false; 51395f7142aSPeter Lieven } 514d0e14376SMarkus Armbruster if (sector & dev->sector_mask) { 515d0e14376SMarkus Armbruster return false; 516d0e14376SMarkus Armbruster } 5172a30307fSMarkus Armbruster if (size % dev->conf.conf.logical_block_size) { 518d0e14376SMarkus Armbruster return false; 519d0e14376SMarkus Armbruster } 5204be74634SMarkus Armbruster blk_get_geometry(dev->blk, &total_sectors); 5213c2daac0SMarkus Armbruster if (sector > total_sectors || nb_sectors > total_sectors - sector) { 5223c2daac0SMarkus Armbruster return false; 5233c2daac0SMarkus Armbruster } 524d0e14376SMarkus Armbruster return true; 525d0e14376SMarkus Armbruster } 526d0e14376SMarkus Armbruster 52737b06f8dSStefano Garzarella static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req, 52837b06f8dSStefano Garzarella struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes) 52937b06f8dSStefano Garzarella { 53037b06f8dSStefano Garzarella VirtIOBlock *s = req->dev; 53137b06f8dSStefano Garzarella VirtIODevice *vdev = VIRTIO_DEVICE(s); 53237b06f8dSStefano Garzarella uint64_t sector; 53337b06f8dSStefano Garzarella uint32_t num_sectors, flags, max_sectors; 53437b06f8dSStefano Garzarella uint8_t err_status; 53537b06f8dSStefano Garzarella int bytes; 53637b06f8dSStefano Garzarella 53737b06f8dSStefano Garzarella sector = virtio_ldq_p(vdev, &dwz_hdr->sector); 53837b06f8dSStefano Garzarella num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors); 53937b06f8dSStefano Garzarella flags = virtio_ldl_p(vdev, &dwz_hdr->flags); 54037b06f8dSStefano Garzarella max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors : 54137b06f8dSStefano Garzarella s->conf.max_discard_sectors; 54237b06f8dSStefano Garzarella 54337b06f8dSStefano Garzarella /* 54437b06f8dSStefano Garzarella * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check 54537b06f8dSStefano Garzarella * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in 54637b06f8dSStefano Garzarella * the integer variable. 54737b06f8dSStefano Garzarella */ 54837b06f8dSStefano Garzarella if (unlikely(num_sectors > max_sectors)) { 54937b06f8dSStefano Garzarella err_status = VIRTIO_BLK_S_IOERR; 55037b06f8dSStefano Garzarella goto err; 55137b06f8dSStefano Garzarella } 55237b06f8dSStefano Garzarella 55337b06f8dSStefano Garzarella bytes = num_sectors << BDRV_SECTOR_BITS; 55437b06f8dSStefano Garzarella 55537b06f8dSStefano Garzarella if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) { 55637b06f8dSStefano Garzarella err_status = VIRTIO_BLK_S_IOERR; 55737b06f8dSStefano Garzarella goto err; 55837b06f8dSStefano Garzarella } 55937b06f8dSStefano Garzarella 56037b06f8dSStefano Garzarella /* 56137b06f8dSStefano Garzarella * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard 56237b06f8dSStefano Garzarella * and write zeroes commands if any unknown flag is set. 56337b06f8dSStefano Garzarella */ 56437b06f8dSStefano Garzarella if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { 56537b06f8dSStefano Garzarella err_status = VIRTIO_BLK_S_UNSUPP; 56637b06f8dSStefano Garzarella goto err; 56737b06f8dSStefano Garzarella } 56837b06f8dSStefano Garzarella 56937b06f8dSStefano Garzarella if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */ 57037b06f8dSStefano Garzarella int blk_aio_flags = 0; 57137b06f8dSStefano Garzarella 57237b06f8dSStefano Garzarella if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 57337b06f8dSStefano Garzarella blk_aio_flags |= BDRV_REQ_MAY_UNMAP; 57437b06f8dSStefano Garzarella } 57537b06f8dSStefano Garzarella 57637b06f8dSStefano Garzarella block_acct_start(blk_get_stats(s->blk), &req->acct, bytes, 57737b06f8dSStefano Garzarella BLOCK_ACCT_WRITE); 57837b06f8dSStefano Garzarella 57937b06f8dSStefano Garzarella blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS, 58037b06f8dSStefano Garzarella bytes, blk_aio_flags, 58137b06f8dSStefano Garzarella virtio_blk_discard_write_zeroes_complete, req); 58237b06f8dSStefano Garzarella } else { /* VIRTIO_BLK_T_DISCARD */ 58337b06f8dSStefano Garzarella /* 58437b06f8dSStefano Garzarella * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for 58537b06f8dSStefano Garzarella * discard commands if the unmap flag is set. 58637b06f8dSStefano Garzarella */ 58737b06f8dSStefano Garzarella if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { 58837b06f8dSStefano Garzarella err_status = VIRTIO_BLK_S_UNSUPP; 58937b06f8dSStefano Garzarella goto err; 59037b06f8dSStefano Garzarella } 59137b06f8dSStefano Garzarella 59237b06f8dSStefano Garzarella blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes, 59337b06f8dSStefano Garzarella virtio_blk_discard_write_zeroes_complete, req); 59437b06f8dSStefano Garzarella } 59537b06f8dSStefano Garzarella 59637b06f8dSStefano Garzarella return VIRTIO_BLK_S_OK; 59737b06f8dSStefano Garzarella 59837b06f8dSStefano Garzarella err: 59937b06f8dSStefano Garzarella if (is_write_zeroes) { 60037b06f8dSStefano Garzarella block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE); 60137b06f8dSStefano Garzarella } 60237b06f8dSStefano Garzarella return err_status; 60337b06f8dSStefano Garzarella } 60437b06f8dSStefano Garzarella 6054f736650SSam Li typedef struct ZoneCmdData { 6064f736650SSam Li VirtIOBlockReq *req; 6074f736650SSam Li struct iovec *in_iov; 6084f736650SSam Li unsigned in_num; 6094f736650SSam Li union { 6104f736650SSam Li struct { 6114f736650SSam Li unsigned int nr_zones; 6124f736650SSam Li BlockZoneDescriptor *zones; 6134f736650SSam Li } zone_report_data; 6144f736650SSam Li struct { 6154f736650SSam Li int64_t offset; 6164f736650SSam Li } zone_append_data; 6174f736650SSam Li }; 6184f736650SSam Li } ZoneCmdData; 6194f736650SSam Li 6204f736650SSam Li /* 6214f736650SSam Li * check zoned_request: error checking before issuing requests. If all checks 6224f736650SSam Li * passed, return true. 6234f736650SSam Li * append: true if only zone append requests issued. 6244f736650SSam Li */ 6254f736650SSam Li static bool check_zoned_request(VirtIOBlock *s, int64_t offset, int64_t len, 6264f736650SSam Li bool append, uint8_t *status) { 6274f736650SSam Li BlockDriverState *bs = blk_bs(s->blk); 6284f736650SSam Li int index; 6294f736650SSam Li 6304f736650SSam Li if (!virtio_has_feature(s->host_features, VIRTIO_BLK_F_ZONED)) { 6314f736650SSam Li *status = VIRTIO_BLK_S_UNSUPP; 6324f736650SSam Li return false; 6334f736650SSam Li } 6344f736650SSam Li 6354f736650SSam Li if (offset < 0 || len < 0 || len > (bs->total_sectors << BDRV_SECTOR_BITS) 6364f736650SSam Li || offset > (bs->total_sectors << BDRV_SECTOR_BITS) - len) { 6374f736650SSam Li *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; 6384f736650SSam Li return false; 6394f736650SSam Li } 6404f736650SSam Li 6414f736650SSam Li if (append) { 6424f736650SSam Li if (bs->bl.write_granularity) { 6434f736650SSam Li if ((offset % bs->bl.write_granularity) != 0) { 6444f736650SSam Li *status = VIRTIO_BLK_S_ZONE_UNALIGNED_WP; 6454f736650SSam Li return false; 6464f736650SSam Li } 6474f736650SSam Li } 6484f736650SSam Li 6494f736650SSam Li index = offset / bs->bl.zone_size; 6504f736650SSam Li if (BDRV_ZT_IS_CONV(bs->wps->wp[index])) { 6514f736650SSam Li *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; 6524f736650SSam Li return false; 6534f736650SSam Li } 6544f736650SSam Li 6554f736650SSam Li if (len / 512 > bs->bl.max_append_sectors) { 6564f736650SSam Li if (bs->bl.max_append_sectors == 0) { 6574f736650SSam Li *status = VIRTIO_BLK_S_UNSUPP; 6584f736650SSam Li } else { 6594f736650SSam Li *status = VIRTIO_BLK_S_ZONE_INVALID_CMD; 6604f736650SSam Li } 6614f736650SSam Li return false; 6624f736650SSam Li } 6634f736650SSam Li } 6644f736650SSam Li return true; 6654f736650SSam Li } 6664f736650SSam Li 6674f736650SSam Li static void virtio_blk_zone_report_complete(void *opaque, int ret) 6684f736650SSam Li { 6694f736650SSam Li ZoneCmdData *data = opaque; 6704f736650SSam Li VirtIOBlockReq *req = data->req; 6714f736650SSam Li VirtIOBlock *s = req->dev; 6724f736650SSam Li VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); 6734f736650SSam Li struct iovec *in_iov = data->in_iov; 6744f736650SSam Li unsigned in_num = data->in_num; 6754f736650SSam Li int64_t zrp_size, n, j = 0; 6764f736650SSam Li int64_t nz = data->zone_report_data.nr_zones; 6774f736650SSam Li int8_t err_status = VIRTIO_BLK_S_OK; 6784f736650SSam Li 679*4e92acf7SSam Li trace_virtio_blk_zone_report_complete(vdev, req, nz, ret); 6804f736650SSam Li if (ret) { 6814f736650SSam Li err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; 6824f736650SSam Li goto out; 6834f736650SSam Li } 6844f736650SSam Li 6854f736650SSam Li struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) { 6864f736650SSam Li .nr_zones = cpu_to_le64(nz), 6874f736650SSam Li }; 6884f736650SSam Li zrp_size = sizeof(struct virtio_blk_zone_report) 6894f736650SSam Li + sizeof(struct virtio_blk_zone_descriptor) * nz; 6904f736650SSam Li n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr)); 6914f736650SSam Li if (n != sizeof(zrp_hdr)) { 6924f736650SSam Li virtio_error(vdev, "Driver provided input buffer that is too small!"); 6934f736650SSam Li err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; 6944f736650SSam Li goto out; 6954f736650SSam Li } 6964f736650SSam Li 6974f736650SSam Li for (size_t i = sizeof(zrp_hdr); i < zrp_size; 6984f736650SSam Li i += sizeof(struct virtio_blk_zone_descriptor), ++j) { 6994f736650SSam Li struct virtio_blk_zone_descriptor desc = 7004f736650SSam Li (struct virtio_blk_zone_descriptor) { 7014f736650SSam Li .z_start = cpu_to_le64(data->zone_report_data.zones[j].start 7024f736650SSam Li >> BDRV_SECTOR_BITS), 7034f736650SSam Li .z_cap = cpu_to_le64(data->zone_report_data.zones[j].cap 7044f736650SSam Li >> BDRV_SECTOR_BITS), 7054f736650SSam Li .z_wp = cpu_to_le64(data->zone_report_data.zones[j].wp 7064f736650SSam Li >> BDRV_SECTOR_BITS), 7074f736650SSam Li }; 7084f736650SSam Li 7094f736650SSam Li switch (data->zone_report_data.zones[j].type) { 7104f736650SSam Li case BLK_ZT_CONV: 7114f736650SSam Li desc.z_type = VIRTIO_BLK_ZT_CONV; 7124f736650SSam Li break; 7134f736650SSam Li case BLK_ZT_SWR: 7144f736650SSam Li desc.z_type = VIRTIO_BLK_ZT_SWR; 7154f736650SSam Li break; 7164f736650SSam Li case BLK_ZT_SWP: 7174f736650SSam Li desc.z_type = VIRTIO_BLK_ZT_SWP; 7184f736650SSam Li break; 7194f736650SSam Li default: 7204f736650SSam Li g_assert_not_reached(); 7214f736650SSam Li } 7224f736650SSam Li 7234f736650SSam Li switch (data->zone_report_data.zones[j].state) { 7244f736650SSam Li case BLK_ZS_RDONLY: 7254f736650SSam Li desc.z_state = VIRTIO_BLK_ZS_RDONLY; 7264f736650SSam Li break; 7274f736650SSam Li case BLK_ZS_OFFLINE: 7284f736650SSam Li desc.z_state = VIRTIO_BLK_ZS_OFFLINE; 7294f736650SSam Li break; 7304f736650SSam Li case BLK_ZS_EMPTY: 7314f736650SSam Li desc.z_state = VIRTIO_BLK_ZS_EMPTY; 7324f736650SSam Li break; 7334f736650SSam Li case BLK_ZS_CLOSED: 7344f736650SSam Li desc.z_state = VIRTIO_BLK_ZS_CLOSED; 7354f736650SSam Li break; 7364f736650SSam Li case BLK_ZS_FULL: 7374f736650SSam Li desc.z_state = VIRTIO_BLK_ZS_FULL; 7384f736650SSam Li break; 7394f736650SSam Li case BLK_ZS_EOPEN: 7404f736650SSam Li desc.z_state = VIRTIO_BLK_ZS_EOPEN; 7414f736650SSam Li break; 7424f736650SSam Li case BLK_ZS_IOPEN: 7434f736650SSam Li desc.z_state = VIRTIO_BLK_ZS_IOPEN; 7444f736650SSam Li break; 7454f736650SSam Li case BLK_ZS_NOT_WP: 7464f736650SSam Li desc.z_state = VIRTIO_BLK_ZS_NOT_WP; 7474f736650SSam Li break; 7484f736650SSam Li default: 7494f736650SSam Li g_assert_not_reached(); 7504f736650SSam Li } 7514f736650SSam Li 7524f736650SSam Li /* TODO: it takes O(n^2) time complexity. Optimizations required. */ 7534f736650SSam Li n = iov_from_buf(in_iov, in_num, i, &desc, sizeof(desc)); 7544f736650SSam Li if (n != sizeof(desc)) { 7554f736650SSam Li virtio_error(vdev, "Driver provided input buffer " 7564f736650SSam Li "for descriptors that is too small!"); 7574f736650SSam Li err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; 7584f736650SSam Li } 7594f736650SSam Li } 7604f736650SSam Li 7614f736650SSam Li out: 7624f736650SSam Li aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); 7634f736650SSam Li virtio_blk_req_complete(req, err_status); 7644f736650SSam Li virtio_blk_free_request(req); 7654f736650SSam Li aio_context_release(blk_get_aio_context(s->conf.conf.blk)); 7664f736650SSam Li g_free(data->zone_report_data.zones); 7674f736650SSam Li g_free(data); 7684f736650SSam Li } 7694f736650SSam Li 7704f736650SSam Li static void virtio_blk_handle_zone_report(VirtIOBlockReq *req, 7714f736650SSam Li struct iovec *in_iov, 7724f736650SSam Li unsigned in_num) 7734f736650SSam Li { 7744f736650SSam Li VirtIOBlock *s = req->dev; 7754f736650SSam Li VirtIODevice *vdev = VIRTIO_DEVICE(s); 7764f736650SSam Li unsigned int nr_zones; 7774f736650SSam Li ZoneCmdData *data; 7784f736650SSam Li int64_t zone_size, offset; 7794f736650SSam Li uint8_t err_status; 7804f736650SSam Li 7814f736650SSam Li if (req->in_len < sizeof(struct virtio_blk_inhdr) + 7824f736650SSam Li sizeof(struct virtio_blk_zone_report) + 7834f736650SSam Li sizeof(struct virtio_blk_zone_descriptor)) { 7844f736650SSam Li virtio_error(vdev, "in buffer too small for zone report"); 7854f736650SSam Li return; 7864f736650SSam Li } 7874f736650SSam Li 7884f736650SSam Li /* start byte offset of the zone report */ 7894f736650SSam Li offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; 7904f736650SSam Li if (!check_zoned_request(s, offset, 0, false, &err_status)) { 7914f736650SSam Li goto out; 7924f736650SSam Li } 7934f736650SSam Li nr_zones = (req->in_len - sizeof(struct virtio_blk_inhdr) - 7944f736650SSam Li sizeof(struct virtio_blk_zone_report)) / 7954f736650SSam Li sizeof(struct virtio_blk_zone_descriptor); 796*4e92acf7SSam Li trace_virtio_blk_handle_zone_report(vdev, req, 797*4e92acf7SSam Li offset >> BDRV_SECTOR_BITS, nr_zones); 7984f736650SSam Li 7994f736650SSam Li zone_size = sizeof(BlockZoneDescriptor) * nr_zones; 8004f736650SSam Li data = g_malloc(sizeof(ZoneCmdData)); 8014f736650SSam Li data->req = req; 8024f736650SSam Li data->in_iov = in_iov; 8034f736650SSam Li data->in_num = in_num; 8044f736650SSam Li data->zone_report_data.nr_zones = nr_zones; 8054f736650SSam Li data->zone_report_data.zones = g_malloc(zone_size), 8064f736650SSam Li 8074f736650SSam Li blk_aio_zone_report(s->blk, offset, &data->zone_report_data.nr_zones, 8084f736650SSam Li data->zone_report_data.zones, 8094f736650SSam Li virtio_blk_zone_report_complete, data); 8104f736650SSam Li return; 8114f736650SSam Li out: 8124f736650SSam Li virtio_blk_req_complete(req, err_status); 8134f736650SSam Li virtio_blk_free_request(req); 8144f736650SSam Li } 8154f736650SSam Li 8164f736650SSam Li static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) 8174f736650SSam Li { 8184f736650SSam Li VirtIOBlockReq *req = opaque; 8194f736650SSam Li VirtIOBlock *s = req->dev; 820*4e92acf7SSam Li VirtIODevice *vdev = VIRTIO_DEVICE(s); 8214f736650SSam Li int8_t err_status = VIRTIO_BLK_S_OK; 822*4e92acf7SSam Li trace_virtio_blk_zone_mgmt_complete(vdev, req,ret); 8234f736650SSam Li 8244f736650SSam Li if (ret) { 8254f736650SSam Li err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; 8264f736650SSam Li } 8274f736650SSam Li 8284f736650SSam Li aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); 8294f736650SSam Li virtio_blk_req_complete(req, err_status); 8304f736650SSam Li virtio_blk_free_request(req); 8314f736650SSam Li aio_context_release(blk_get_aio_context(s->conf.conf.blk)); 8324f736650SSam Li } 8334f736650SSam Li 8344f736650SSam Li static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) 8354f736650SSam Li { 8364f736650SSam Li VirtIOBlock *s = req->dev; 8374f736650SSam Li VirtIODevice *vdev = VIRTIO_DEVICE(s); 8384f736650SSam Li BlockDriverState *bs = blk_bs(s->blk); 8394f736650SSam Li int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; 8404f736650SSam Li uint64_t len; 8414f736650SSam Li uint64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS; 8424f736650SSam Li uint8_t err_status = VIRTIO_BLK_S_OK; 8434f736650SSam Li 8444f736650SSam Li uint32_t type = virtio_ldl_p(vdev, &req->out.type); 8454f736650SSam Li if (type == VIRTIO_BLK_T_ZONE_RESET_ALL) { 8464f736650SSam Li /* Entire drive capacity */ 8474f736650SSam Li offset = 0; 8484f736650SSam Li len = capacity; 849*4e92acf7SSam Li trace_virtio_blk_handle_zone_reset_all(vdev, req, 0, 850*4e92acf7SSam Li bs->total_sectors); 8514f736650SSam Li } else { 8524f736650SSam Li if (bs->bl.zone_size > capacity - offset) { 8534f736650SSam Li /* The zoned device allows the last smaller zone. */ 8544f736650SSam Li len = capacity - bs->bl.zone_size * (bs->bl.nr_zones - 1); 8554f736650SSam Li } else { 8564f736650SSam Li len = bs->bl.zone_size; 8574f736650SSam Li } 858*4e92acf7SSam Li trace_virtio_blk_handle_zone_mgmt(vdev, req, op, 859*4e92acf7SSam Li offset >> BDRV_SECTOR_BITS, 860*4e92acf7SSam Li len >> BDRV_SECTOR_BITS); 8614f736650SSam Li } 8624f736650SSam Li 8634f736650SSam Li if (!check_zoned_request(s, offset, len, false, &err_status)) { 8644f736650SSam Li goto out; 8654f736650SSam Li } 8664f736650SSam Li 8674f736650SSam Li blk_aio_zone_mgmt(s->blk, op, offset, len, 8684f736650SSam Li virtio_blk_zone_mgmt_complete, req); 8694f736650SSam Li 8704f736650SSam Li return 0; 8714f736650SSam Li out: 8724f736650SSam Li virtio_blk_req_complete(req, err_status); 8734f736650SSam Li virtio_blk_free_request(req); 8744f736650SSam Li return err_status; 8754f736650SSam Li } 8764f736650SSam Li 8774f736650SSam Li static void virtio_blk_zone_append_complete(void *opaque, int ret) 8784f736650SSam Li { 8794f736650SSam Li ZoneCmdData *data = opaque; 8804f736650SSam Li VirtIOBlockReq *req = data->req; 8814f736650SSam Li VirtIOBlock *s = req->dev; 8824f736650SSam Li VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); 8834f736650SSam Li int64_t append_sector, n; 8844f736650SSam Li uint8_t err_status = VIRTIO_BLK_S_OK; 8854f736650SSam Li 8864f736650SSam Li if (ret) { 8874f736650SSam Li err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; 8884f736650SSam Li goto out; 8894f736650SSam Li } 8904f736650SSam Li 8914f736650SSam Li virtio_stq_p(vdev, &append_sector, 8924f736650SSam Li data->zone_append_data.offset >> BDRV_SECTOR_BITS); 8934f736650SSam Li n = iov_from_buf(data->in_iov, data->in_num, 0, &append_sector, 8944f736650SSam Li sizeof(append_sector)); 8954f736650SSam Li if (n != sizeof(append_sector)) { 8964f736650SSam Li virtio_error(vdev, "Driver provided input buffer less than size of " 8974f736650SSam Li "append_sector"); 8984f736650SSam Li err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; 8994f736650SSam Li goto out; 9004f736650SSam Li } 901*4e92acf7SSam Li trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret); 9024f736650SSam Li 9034f736650SSam Li out: 9044f736650SSam Li aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); 9054f736650SSam Li virtio_blk_req_complete(req, err_status); 9064f736650SSam Li virtio_blk_free_request(req); 9074f736650SSam Li aio_context_release(blk_get_aio_context(s->conf.conf.blk)); 9084f736650SSam Li g_free(data); 9094f736650SSam Li } 9104f736650SSam Li 9114f736650SSam Li static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, 9124f736650SSam Li struct iovec *out_iov, 9134f736650SSam Li struct iovec *in_iov, 9144f736650SSam Li uint64_t out_num, 9154f736650SSam Li unsigned in_num) { 9164f736650SSam Li VirtIOBlock *s = req->dev; 9174f736650SSam Li VirtIODevice *vdev = VIRTIO_DEVICE(s); 9184f736650SSam Li uint8_t err_status = VIRTIO_BLK_S_OK; 9194f736650SSam Li 9204f736650SSam Li int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS; 9214f736650SSam Li int64_t len = iov_size(out_iov, out_num); 9224f736650SSam Li 923*4e92acf7SSam Li trace_virtio_blk_handle_zone_append(vdev, req, offset >> BDRV_SECTOR_BITS); 9244f736650SSam Li if (!check_zoned_request(s, offset, len, true, &err_status)) { 9254f736650SSam Li goto out; 9264f736650SSam Li } 9274f736650SSam Li 9284f736650SSam Li ZoneCmdData *data = g_malloc(sizeof(ZoneCmdData)); 9294f736650SSam Li data->req = req; 9304f736650SSam Li data->in_iov = in_iov; 9314f736650SSam Li data->in_num = in_num; 9324f736650SSam Li data->zone_append_data.offset = offset; 9334f736650SSam Li qemu_iovec_init_external(&req->qiov, out_iov, out_num); 93452eb76f4SSam Li 93552eb76f4SSam Li block_acct_start(blk_get_stats(s->blk), &req->acct, len, 93652eb76f4SSam Li BLOCK_ACCT_ZONE_APPEND); 93752eb76f4SSam Li 9384f736650SSam Li blk_aio_zone_append(s->blk, &data->zone_append_data.offset, &req->qiov, 0, 9394f736650SSam Li virtio_blk_zone_append_complete, data); 9404f736650SSam Li return 0; 9414f736650SSam Li 9424f736650SSam Li out: 9434f736650SSam Li aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); 9444f736650SSam Li virtio_blk_req_complete(req, err_status); 9454f736650SSam Li virtio_blk_free_request(req); 9464f736650SSam Li aio_context_release(blk_get_aio_context(s->conf.conf.blk)); 9474f736650SSam Li return err_status; 9484f736650SSam Li } 9494f736650SSam Li 95020ea686aSGreg Kurz static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) 9516e790746SPaolo Bonzini { 9526e790746SPaolo Bonzini uint32_t type; 953f897bf75SStefan Hajnoczi struct iovec *in_iov = req->elem.in_sg; 9545636da76SDongli Zhang struct iovec *out_iov = req->elem.out_sg; 955f897bf75SStefan Hajnoczi unsigned in_num = req->elem.in_num; 956f897bf75SStefan Hajnoczi unsigned out_num = req->elem.out_num; 95720ea686aSGreg Kurz VirtIOBlock *s = req->dev; 95820ea686aSGreg Kurz VirtIODevice *vdev = VIRTIO_DEVICE(s); 9596e790746SPaolo Bonzini 960f897bf75SStefan Hajnoczi if (req->elem.out_num < 1 || req->elem.in_num < 1) { 96120ea686aSGreg Kurz virtio_error(vdev, "virtio-blk missing headers"); 96220ea686aSGreg Kurz return -1; 9636e790746SPaolo Bonzini } 9646e790746SPaolo Bonzini 9655636da76SDongli Zhang if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out, 966827805a2SFam Zheng sizeof(req->out)) != sizeof(req->out))) { 96720ea686aSGreg Kurz virtio_error(vdev, "virtio-blk request outhdr too short"); 96820ea686aSGreg Kurz return -1; 969827805a2SFam Zheng } 970ee17e848SFam Zheng 9717bd04a04SStefan Hajnoczi iov_discard_front_undoable(&out_iov, &out_num, sizeof(req->out), 9727bd04a04SStefan Hajnoczi &req->outhdr_undo); 973ee17e848SFam Zheng 97412048545SGonglei if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 97520ea686aSGreg Kurz virtio_error(vdev, "virtio-blk request inhdr too short"); 9767bd04a04SStefan Hajnoczi iov_discard_undo(&req->outhdr_undo); 97720ea686aSGreg Kurz return -1; 978ee17e848SFam Zheng } 979ee17e848SFam Zheng 9802a6cdd6dSPaolo Bonzini /* We always touch the last byte, so just see how big in_iov is. */ 9812a6cdd6dSPaolo Bonzini req->in_len = iov_size(in_iov, in_num); 982ee17e848SFam Zheng req->in = (void *)in_iov[in_num - 1].iov_base 983ee17e848SFam Zheng + in_iov[in_num - 1].iov_len 984ee17e848SFam Zheng - sizeof(struct virtio_blk_inhdr); 9857bd04a04SStefan Hajnoczi iov_discard_back_undoable(in_iov, &in_num, sizeof(struct virtio_blk_inhdr), 9867bd04a04SStefan Hajnoczi &req->inhdr_undo); 9876e790746SPaolo Bonzini 9889a6719d5SStefano Garzarella type = virtio_ldl_p(vdev, &req->out.type); 9896e790746SPaolo Bonzini 99095f7142aSPeter Lieven /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER 991631b22eaSStefan Weil * is an optional flag. Although a guest should not send this flag if 99295f7142aSPeter Lieven * not negotiated we ignored it in the past. So keep ignoring it. */ 99395f7142aSPeter Lieven switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { 99495f7142aSPeter Lieven case VIRTIO_BLK_T_IN: 99595f7142aSPeter Lieven { 99695f7142aSPeter Lieven bool is_write = type & VIRTIO_BLK_T_OUT; 9979a6719d5SStefano Garzarella req->sector_num = virtio_ldq_p(vdev, &req->out.sector); 99895f7142aSPeter Lieven 99995f7142aSPeter Lieven if (is_write) { 10005636da76SDongli Zhang qemu_iovec_init_external(&req->qiov, out_iov, out_num); 1001a576ceacSStefan Hajnoczi trace_virtio_blk_handle_write(vdev, req, req->sector_num, 100295f7142aSPeter Lieven req->qiov.size / BDRV_SECTOR_SIZE); 100395f7142aSPeter Lieven } else { 100495f7142aSPeter Lieven qemu_iovec_init_external(&req->qiov, in_iov, in_num); 1005a576ceacSStefan Hajnoczi trace_virtio_blk_handle_read(vdev, req, req->sector_num, 100695f7142aSPeter Lieven req->qiov.size / BDRV_SECTOR_SIZE); 100795f7142aSPeter Lieven } 100895f7142aSPeter Lieven 10099a6719d5SStefano Garzarella if (!virtio_blk_sect_range_ok(s, req->sector_num, req->qiov.size)) { 101095f7142aSPeter Lieven virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); 10119a6719d5SStefano Garzarella block_acct_invalid(blk_get_stats(s->blk), 101201762e03SAlberto Garcia is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); 101395f7142aSPeter Lieven virtio_blk_free_request(req); 101420ea686aSGreg Kurz return 0; 101595f7142aSPeter Lieven } 101695f7142aSPeter Lieven 10179a6719d5SStefano Garzarella block_acct_start(blk_get_stats(s->blk), &req->acct, req->qiov.size, 101895f7142aSPeter Lieven is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); 101995f7142aSPeter Lieven 102095f7142aSPeter Lieven /* merge would exceed maximum number of requests or IO direction 102195f7142aSPeter Lieven * changes */ 102295f7142aSPeter Lieven if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || 1023c99495acSPeter Lieven is_write != mrb->is_write || 10249a6719d5SStefano Garzarella !s->conf.request_merging)) { 1025baf42268SStefan Hajnoczi virtio_blk_submit_multireq(s, mrb); 102695f7142aSPeter Lieven } 102795f7142aSPeter Lieven 102895f7142aSPeter Lieven assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); 102995f7142aSPeter Lieven mrb->reqs[mrb->num_reqs++] = req; 103095f7142aSPeter Lieven mrb->is_write = is_write; 103195f7142aSPeter Lieven break; 103295f7142aSPeter Lieven } 103395f7142aSPeter Lieven case VIRTIO_BLK_T_FLUSH: 10346e790746SPaolo Bonzini virtio_blk_handle_flush(req, mrb); 103595f7142aSPeter Lieven break; 10364f736650SSam Li case VIRTIO_BLK_T_ZONE_REPORT: 10374f736650SSam Li virtio_blk_handle_zone_report(req, in_iov, in_num); 10384f736650SSam Li break; 10394f736650SSam Li case VIRTIO_BLK_T_ZONE_OPEN: 10404f736650SSam Li virtio_blk_handle_zone_mgmt(req, BLK_ZO_OPEN); 10414f736650SSam Li break; 10424f736650SSam Li case VIRTIO_BLK_T_ZONE_CLOSE: 10434f736650SSam Li virtio_blk_handle_zone_mgmt(req, BLK_ZO_CLOSE); 10444f736650SSam Li break; 10454f736650SSam Li case VIRTIO_BLK_T_ZONE_FINISH: 10464f736650SSam Li virtio_blk_handle_zone_mgmt(req, BLK_ZO_FINISH); 10474f736650SSam Li break; 10484f736650SSam Li case VIRTIO_BLK_T_ZONE_RESET: 10494f736650SSam Li virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET); 10504f736650SSam Li break; 10514f736650SSam Li case VIRTIO_BLK_T_ZONE_RESET_ALL: 10524f736650SSam Li virtio_blk_handle_zone_mgmt(req, BLK_ZO_RESET); 10534f736650SSam Li break; 105495f7142aSPeter Lieven case VIRTIO_BLK_T_SCSI_CMD: 10556e790746SPaolo Bonzini virtio_blk_handle_scsi(req); 105695f7142aSPeter Lieven break; 105795f7142aSPeter Lieven case VIRTIO_BLK_T_GET_ID: 105895f7142aSPeter Lieven { 10596e790746SPaolo Bonzini /* 10606e790746SPaolo Bonzini * NB: per existing s/n string convention the string is 10616e790746SPaolo Bonzini * terminated by '\0' only when shorter than buffer. 10626e790746SPaolo Bonzini */ 10632a30307fSMarkus Armbruster const char *serial = s->conf.serial ? s->conf.serial : ""; 1064a83ceea8SMarc Marí size_t size = MIN(strlen(serial) + 1, 1065a83ceea8SMarc Marí MIN(iov_size(in_iov, in_num), 1066a83ceea8SMarc Marí VIRTIO_BLK_ID_BYTES)); 1067a83ceea8SMarc Marí iov_from_buf(in_iov, in_num, 0, serial, size); 10686e790746SPaolo Bonzini virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); 1069671ec3f0SFam Zheng virtio_blk_free_request(req); 107095f7142aSPeter Lieven break; 107195f7142aSPeter Lieven } 10724f736650SSam Li case VIRTIO_BLK_T_ZONE_APPEND & ~VIRTIO_BLK_T_OUT: 10734f736650SSam Li /* 10744f736650SSam Li * Passing out_iov/out_num and in_iov/in_num is not safe 10754f736650SSam Li * to access req->elem.out_sg directly because it may be 10764f736650SSam Li * modified by virtio_blk_handle_request(). 10774f736650SSam Li */ 10784f736650SSam Li virtio_blk_handle_zone_append(req, out_iov, in_iov, out_num, in_num); 10794f736650SSam Li break; 108037b06f8dSStefano Garzarella /* 108137b06f8dSStefano Garzarella * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with 108237b06f8dSStefano Garzarella * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement, 108337b06f8dSStefano Garzarella * so we must mask it for these requests, then we will check if it is set. 108437b06f8dSStefano Garzarella */ 108537b06f8dSStefano Garzarella case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT: 108637b06f8dSStefano Garzarella case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT: 108737b06f8dSStefano Garzarella { 108837b06f8dSStefano Garzarella struct virtio_blk_discard_write_zeroes dwz_hdr; 108937b06f8dSStefano Garzarella size_t out_len = iov_size(out_iov, out_num); 109037b06f8dSStefano Garzarella bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) == 109137b06f8dSStefano Garzarella VIRTIO_BLK_T_WRITE_ZEROES; 109237b06f8dSStefano Garzarella uint8_t err_status; 109337b06f8dSStefano Garzarella 109437b06f8dSStefano Garzarella /* 109537b06f8dSStefano Garzarella * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains 109637b06f8dSStefano Garzarella * more than one segment. 109737b06f8dSStefano Garzarella */ 109837b06f8dSStefano Garzarella if (unlikely(!(type & VIRTIO_BLK_T_OUT) || 109937b06f8dSStefano Garzarella out_len > sizeof(dwz_hdr))) { 110037b06f8dSStefano Garzarella virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); 110137b06f8dSStefano Garzarella virtio_blk_free_request(req); 110237b06f8dSStefano Garzarella return 0; 110337b06f8dSStefano Garzarella } 110437b06f8dSStefano Garzarella 110537b06f8dSStefano Garzarella if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr, 110637b06f8dSStefano Garzarella sizeof(dwz_hdr)) != sizeof(dwz_hdr))) { 11077bd04a04SStefan Hajnoczi iov_discard_undo(&req->inhdr_undo); 11087bd04a04SStefan Hajnoczi iov_discard_undo(&req->outhdr_undo); 110937b06f8dSStefano Garzarella virtio_error(vdev, "virtio-blk discard/write_zeroes header" 111037b06f8dSStefano Garzarella " too short"); 111137b06f8dSStefano Garzarella return -1; 111237b06f8dSStefano Garzarella } 111337b06f8dSStefano Garzarella 111437b06f8dSStefano Garzarella err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr, 111537b06f8dSStefano Garzarella is_write_zeroes); 111637b06f8dSStefano Garzarella if (err_status != VIRTIO_BLK_S_OK) { 111737b06f8dSStefano Garzarella virtio_blk_req_complete(req, err_status); 111837b06f8dSStefano Garzarella virtio_blk_free_request(req); 111937b06f8dSStefano Garzarella } 112037b06f8dSStefano Garzarella 112137b06f8dSStefano Garzarella break; 112237b06f8dSStefano Garzarella } 112395f7142aSPeter Lieven default: 11246e790746SPaolo Bonzini virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); 1125671ec3f0SFam Zheng virtio_blk_free_request(req); 11266e790746SPaolo Bonzini } 112720ea686aSGreg Kurz return 0; 11286e790746SPaolo Bonzini } 11296e790746SPaolo Bonzini 1130186b9691SStefan Hajnoczi void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) 11316e790746SPaolo Bonzini { 11326e790746SPaolo Bonzini VirtIOBlockReq *req; 113395f7142aSPeter Lieven MultiReqBuffer mrb = {}; 1134d0435bc5SStefan Hajnoczi bool suppress_notifications = virtio_queue_get_notification(vq); 11356e790746SPaolo Bonzini 11369d456654SPaolo Bonzini aio_context_acquire(blk_get_aio_context(s->blk)); 1137fc73548eSStefan Hajnoczi blk_io_plug(s->blk); 1138fc73548eSStefan Hajnoczi 11399ef9d402SStefan Hajnoczi do { 1140d0435bc5SStefan Hajnoczi if (suppress_notifications) { 11419ef9d402SStefan Hajnoczi virtio_queue_set_notification(vq, 0); 1142d0435bc5SStefan Hajnoczi } 11439ef9d402SStefan Hajnoczi 1144edaffd9fSStefan Hajnoczi while ((req = virtio_blk_get_request(s, vq))) { 114520ea686aSGreg Kurz if (virtio_blk_handle_request(req, &mrb)) { 114620ea686aSGreg Kurz virtqueue_detach_element(req->vq, &req->elem, 0); 114720ea686aSGreg Kurz virtio_blk_free_request(req); 114820ea686aSGreg Kurz break; 114920ea686aSGreg Kurz } 11506e790746SPaolo Bonzini } 11516e790746SPaolo Bonzini 1152d0435bc5SStefan Hajnoczi if (suppress_notifications) { 11539ef9d402SStefan Hajnoczi virtio_queue_set_notification(vq, 1); 1154d0435bc5SStefan Hajnoczi } 11559ef9d402SStefan Hajnoczi } while (!virtio_queue_empty(vq)); 11569ef9d402SStefan Hajnoczi 115795f7142aSPeter Lieven if (mrb.num_reqs) { 1158baf42268SStefan Hajnoczi virtio_blk_submit_multireq(s, &mrb); 115995f7142aSPeter Lieven } 1160fc73548eSStefan Hajnoczi 1161fc73548eSStefan Hajnoczi blk_io_unplug(s->blk); 11629d456654SPaolo Bonzini aio_context_release(blk_get_aio_context(s->blk)); 11636e790746SPaolo Bonzini } 11646e790746SPaolo Bonzini 11658a2fad57SMichael S. Tsirkin static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) 11668a2fad57SMichael S. Tsirkin { 11678a2fad57SMichael S. Tsirkin VirtIOBlock *s = (VirtIOBlock *)vdev; 11688a2fad57SMichael S. Tsirkin 1169186b9691SStefan Hajnoczi if (s->dataplane && !s->dataplane_started) { 11708a2fad57SMichael S. Tsirkin /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start 11718a2fad57SMichael S. Tsirkin * dataplane here instead of waiting for .set_status(). 11728a2fad57SMichael S. Tsirkin */ 11739ffe337cSPaolo Bonzini virtio_device_start_ioeventfd(vdev); 11748a2fad57SMichael S. Tsirkin if (!s->dataplane_disabled) { 11758a2fad57SMichael S. Tsirkin return; 11768a2fad57SMichael S. Tsirkin } 11778a2fad57SMichael S. Tsirkin } 1178186b9691SStefan Hajnoczi virtio_blk_handle_vq(s, vq); 11798a2fad57SMichael S. Tsirkin } 11808a2fad57SMichael S. Tsirkin 1181a937f8e8SStefan Hajnoczi static void virtio_blk_dma_restart_bh(void *opaque) 11826e790746SPaolo Bonzini { 1183a937f8e8SStefan Hajnoczi VirtIOBlock *s = opaque; 1184a937f8e8SStefan Hajnoczi 11856e790746SPaolo Bonzini VirtIOBlockReq *req = s->rq; 118695f7142aSPeter Lieven MultiReqBuffer mrb = {}; 11876e790746SPaolo Bonzini 11886e790746SPaolo Bonzini s->rq = NULL; 11896e790746SPaolo Bonzini 11901919631eSPaolo Bonzini aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); 11916e790746SPaolo Bonzini while (req) { 11921bdb176aSzhanghailiang VirtIOBlockReq *next = req->next; 119320ea686aSGreg Kurz if (virtio_blk_handle_request(req, &mrb)) { 119420ea686aSGreg Kurz /* Device is now broken and won't do any processing until it gets 119520ea686aSGreg Kurz * reset. Already queued requests will be lost: let's purge them. 119620ea686aSGreg Kurz */ 119720ea686aSGreg Kurz while (req) { 119820ea686aSGreg Kurz next = req->next; 119920ea686aSGreg Kurz virtqueue_detach_element(req->vq, &req->elem, 0); 120020ea686aSGreg Kurz virtio_blk_free_request(req); 120120ea686aSGreg Kurz req = next; 120220ea686aSGreg Kurz } 120320ea686aSGreg Kurz break; 120420ea686aSGreg Kurz } 12051bdb176aSzhanghailiang req = next; 12066e790746SPaolo Bonzini } 12076e790746SPaolo Bonzini 120895f7142aSPeter Lieven if (mrb.num_reqs) { 1209baf42268SStefan Hajnoczi virtio_blk_submit_multireq(s, &mrb); 121095f7142aSPeter Lieven } 1211a937f8e8SStefan Hajnoczi 1212a937f8e8SStefan Hajnoczi /* Paired with inc in virtio_blk_dma_restart_cb() */ 1213680f2002SKevin Wolf blk_dec_in_flight(s->conf.conf.blk); 1214a937f8e8SStefan Hajnoczi 12151919631eSPaolo Bonzini aio_context_release(blk_get_aio_context(s->conf.conf.blk)); 12166e790746SPaolo Bonzini } 12176e790746SPaolo Bonzini 1218538f0497SPhilippe Mathieu-Daudé static void virtio_blk_dma_restart_cb(void *opaque, bool running, 12196e790746SPaolo Bonzini RunState state) 12206e790746SPaolo Bonzini { 12216e790746SPaolo Bonzini VirtIOBlock *s = opaque; 12226e790746SPaolo Bonzini 12236e790746SPaolo Bonzini if (!running) { 12246e790746SPaolo Bonzini return; 12256e790746SPaolo Bonzini } 12266e790746SPaolo Bonzini 1227a937f8e8SStefan Hajnoczi /* Paired with dec in virtio_blk_dma_restart_bh() */ 1228680f2002SKevin Wolf blk_inc_in_flight(s->conf.conf.blk); 1229a937f8e8SStefan Hajnoczi 1230a937f8e8SStefan Hajnoczi aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.conf.blk), 1231a937f8e8SStefan Hajnoczi virtio_blk_dma_restart_bh, s); 12326e790746SPaolo Bonzini } 12336e790746SPaolo Bonzini 12346e790746SPaolo Bonzini static void virtio_blk_reset(VirtIODevice *vdev) 12356e790746SPaolo Bonzini { 12366e790746SPaolo Bonzini VirtIOBlock *s = VIRTIO_BLK(vdev); 12376e40b3bfSAlexander Yarygin AioContext *ctx; 123826307f6aSFam Zheng VirtIOBlockReq *req; 12396e790746SPaolo Bonzini 12406e40b3bfSAlexander Yarygin ctx = blk_get_aio_context(s->blk); 12416e40b3bfSAlexander Yarygin aio_context_acquire(ctx); 12426e40b3bfSAlexander Yarygin blk_drain(s->blk); 12436e40b3bfSAlexander Yarygin 124426307f6aSFam Zheng /* We drop queued requests after blk_drain() because blk_drain() itself can 124526307f6aSFam Zheng * produce them. */ 124626307f6aSFam Zheng while (s->rq) { 124726307f6aSFam Zheng req = s->rq; 124826307f6aSFam Zheng s->rq = req->next; 124997b93c8aSStefan Hajnoczi virtqueue_detach_element(req->vq, &req->elem, 0); 125026307f6aSFam Zheng virtio_blk_free_request(req); 125126307f6aSFam Zheng } 125226307f6aSFam Zheng 12536e40b3bfSAlexander Yarygin aio_context_release(ctx); 12546e40b3bfSAlexander Yarygin 12559ffe337cSPaolo Bonzini assert(!s->dataplane_started); 12564be74634SMarkus Armbruster blk_set_enable_write_cache(s->blk, s->original_wce); 12576e790746SPaolo Bonzini } 12586e790746SPaolo Bonzini 12596e790746SPaolo Bonzini /* coalesce internal state, copy to pci i/o region 0 12606e790746SPaolo Bonzini */ 12616e790746SPaolo Bonzini static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) 12626e790746SPaolo Bonzini { 12636e790746SPaolo Bonzini VirtIOBlock *s = VIRTIO_BLK(vdev); 12642a30307fSMarkus Armbruster BlockConf *conf = &s->conf.conf; 12654f736650SSam Li BlockDriverState *bs = blk_bs(s->blk); 12666e790746SPaolo Bonzini struct virtio_blk_config blkcfg; 12676e790746SPaolo Bonzini uint64_t capacity; 126817d0bc01SStefan Hajnoczi int64_t length; 1269f7516731SMarkus Armbruster int blk_size = conf->logical_block_size; 12701f433e84SEmanuele Giuseppe Esposito AioContext *ctx; 12711f433e84SEmanuele Giuseppe Esposito 12721f433e84SEmanuele Giuseppe Esposito ctx = blk_get_aio_context(s->blk); 12731f433e84SEmanuele Giuseppe Esposito aio_context_acquire(ctx); 12746e790746SPaolo Bonzini 12754be74634SMarkus Armbruster blk_get_geometry(s->blk, &capacity); 12766e790746SPaolo Bonzini memset(&blkcfg, 0, sizeof(blkcfg)); 1277783d1897SRusty Russell virtio_stq_p(vdev, &blkcfg.capacity, capacity); 12781bf8a989SDenis Plotnikov virtio_stl_p(vdev, &blkcfg.seg_max, 12791bf8a989SDenis Plotnikov s->conf.seg_max_adjust ? s->conf.queue_size - 2 : 128 - 2); 1280907eb3e5SMichael S. Tsirkin virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); 1281783d1897SRusty Russell virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); 1282f7516731SMarkus Armbruster virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); 12836abee260SRoman Kagan virtio_stl_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); 1284907eb3e5SMichael S. Tsirkin blkcfg.geometry.heads = conf->heads; 12856e790746SPaolo Bonzini /* 12866e790746SPaolo Bonzini * We must ensure that the block device capacity is a multiple of 1287e03ba136SPeter Maydell * the logical block size. If that is not the case, let's use 12886e790746SPaolo Bonzini * sector_mask to adopt the geometry to have a correct picture. 12896e790746SPaolo Bonzini * For those devices where the capacity is ok for the given geometry 1290e03ba136SPeter Maydell * we don't touch the sector value of the geometry, since some devices 12916e790746SPaolo Bonzini * (like s390 dasd) need a specific value. Here the capacity is already 12926e790746SPaolo Bonzini * cyls*heads*secs*blk_size and the sector value is not block size 12936e790746SPaolo Bonzini * divided by 512 - instead it is the amount of blk_size blocks 12946e790746SPaolo Bonzini * per track (cylinder). 12956e790746SPaolo Bonzini */ 129617d0bc01SStefan Hajnoczi length = blk_getlength(s->blk); 12971f433e84SEmanuele Giuseppe Esposito aio_context_release(ctx); 129817d0bc01SStefan Hajnoczi if (length > 0 && length / conf->heads / conf->secs % blk_size) { 1299907eb3e5SMichael S. Tsirkin blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; 13006e790746SPaolo Bonzini } else { 1301907eb3e5SMichael S. Tsirkin blkcfg.geometry.sectors = conf->secs; 13026e790746SPaolo Bonzini } 13036e790746SPaolo Bonzini blkcfg.size_max = 0; 1304f7516731SMarkus Armbruster blkcfg.physical_block_exp = get_physical_block_exp(conf); 13056e790746SPaolo Bonzini blkcfg.alignment_offset = 0; 13064be74634SMarkus Armbruster blkcfg.wce = blk_enable_write_cache(s->blk); 13072f270590SStefan Hajnoczi virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues); 130837b06f8dSStefano Garzarella if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) { 1309fb0b154cSAkihiko Odaki uint32_t discard_granularity = conf->discard_granularity; 1310fb0b154cSAkihiko Odaki if (discard_granularity == -1 || !s->conf.report_discard_granularity) { 1311fb0b154cSAkihiko Odaki discard_granularity = blk_size; 1312fb0b154cSAkihiko Odaki } 131337b06f8dSStefano Garzarella virtio_stl_p(vdev, &blkcfg.max_discard_sectors, 131437b06f8dSStefano Garzarella s->conf.max_discard_sectors); 131537b06f8dSStefano Garzarella virtio_stl_p(vdev, &blkcfg.discard_sector_alignment, 1316fb0b154cSAkihiko Odaki discard_granularity >> BDRV_SECTOR_BITS); 131737b06f8dSStefano Garzarella /* 131837b06f8dSStefano Garzarella * We support only one segment per request since multiple segments 131937b06f8dSStefano Garzarella * are not widely used and there are no userspace APIs that allow 132037b06f8dSStefano Garzarella * applications to submit multiple segments in a single call. 132137b06f8dSStefano Garzarella */ 132237b06f8dSStefano Garzarella virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1); 132337b06f8dSStefano Garzarella } 132437b06f8dSStefano Garzarella if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) { 132537b06f8dSStefano Garzarella virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors, 132637b06f8dSStefano Garzarella s->conf.max_write_zeroes_sectors); 132737b06f8dSStefano Garzarella blkcfg.write_zeroes_may_unmap = 1; 132837b06f8dSStefano Garzarella virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1); 132937b06f8dSStefano Garzarella } 13304f736650SSam Li if (bs->bl.zoned != BLK_Z_NONE) { 13314f736650SSam Li switch (bs->bl.zoned) { 13324f736650SSam Li case BLK_Z_HM: 13334f736650SSam Li blkcfg.zoned.model = VIRTIO_BLK_Z_HM; 13344f736650SSam Li break; 13354f736650SSam Li case BLK_Z_HA: 13364f736650SSam Li blkcfg.zoned.model = VIRTIO_BLK_Z_HA; 13374f736650SSam Li break; 13384f736650SSam Li default: 13394f736650SSam Li g_assert_not_reached(); 13404f736650SSam Li } 13414f736650SSam Li 13424f736650SSam Li virtio_stl_p(vdev, &blkcfg.zoned.zone_sectors, 13434f736650SSam Li bs->bl.zone_size / 512); 13444f736650SSam Li virtio_stl_p(vdev, &blkcfg.zoned.max_active_zones, 13454f736650SSam Li bs->bl.max_active_zones); 13464f736650SSam Li virtio_stl_p(vdev, &blkcfg.zoned.max_open_zones, 13474f736650SSam Li bs->bl.max_open_zones); 13484f736650SSam Li virtio_stl_p(vdev, &blkcfg.zoned.write_granularity, blk_size); 13494f736650SSam Li virtio_stl_p(vdev, &blkcfg.zoned.max_append_sectors, 13504f736650SSam Li bs->bl.max_append_sectors); 13514f736650SSam Li } else { 13524f736650SSam Li blkcfg.zoned.model = VIRTIO_BLK_Z_NONE; 13534f736650SSam Li } 135420764be0SStefano Garzarella memcpy(config, &blkcfg, s->config_size); 13556e790746SPaolo Bonzini } 13566e790746SPaolo Bonzini 13576e790746SPaolo Bonzini static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) 13586e790746SPaolo Bonzini { 13596e790746SPaolo Bonzini VirtIOBlock *s = VIRTIO_BLK(vdev); 13606e790746SPaolo Bonzini struct virtio_blk_config blkcfg; 13616e790746SPaolo Bonzini 136220764be0SStefano Garzarella memcpy(&blkcfg, config, s->config_size); 13636d7e73d6SFam Zheng 13644be74634SMarkus Armbruster aio_context_acquire(blk_get_aio_context(s->blk)); 13654be74634SMarkus Armbruster blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); 13664be74634SMarkus Armbruster aio_context_release(blk_get_aio_context(s->blk)); 13676e790746SPaolo Bonzini } 13686e790746SPaolo Bonzini 13699d5b731dSJason Wang static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, 13709d5b731dSJason Wang Error **errp) 13716e790746SPaolo Bonzini { 13726e790746SPaolo Bonzini VirtIOBlock *s = VIRTIO_BLK(vdev); 13736e790746SPaolo Bonzini 1374bbe8bd4dSStefano Garzarella /* Firstly sync all virtio-blk possible supported features */ 1375bbe8bd4dSStefano Garzarella features |= s->host_features; 1376bbe8bd4dSStefano Garzarella 13770cd09c3aSCornelia Huck virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); 13780cd09c3aSCornelia Huck virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); 13790cd09c3aSCornelia Huck virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); 13800cd09c3aSCornelia Huck virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); 138195129d6fSCornelia Huck if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) { 1382bbe8bd4dSStefano Garzarella if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) { 1383efb8206cSJason Wang error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); 1384efb8206cSJason Wang return 0; 1385efb8206cSJason Wang } 1386efb8206cSJason Wang } else { 1387c9b11f97SJason Wang virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT); 1388efb8206cSJason Wang virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); 1389efb8206cSJason Wang } 13906e790746SPaolo Bonzini 13915f258577SEvgeny Yakovlev if (blk_enable_write_cache(s->blk) || 13925f258577SEvgeny Yakovlev (s->conf.x_enable_wce_if_config_wce && 13935f258577SEvgeny Yakovlev virtio_has_feature(features, VIRTIO_BLK_F_CONFIG_WCE))) { 13940cd09c3aSCornelia Huck virtio_add_feature(&features, VIRTIO_BLK_F_WCE); 13954be74634SMarkus Armbruster } 139686b1cf32SKevin Wolf if (!blk_is_writable(s->blk)) { 13970cd09c3aSCornelia Huck virtio_add_feature(&features, VIRTIO_BLK_F_RO); 13984be74634SMarkus Armbruster } 13992f270590SStefan Hajnoczi if (s->conf.num_queues > 1) { 14002f270590SStefan Hajnoczi virtio_add_feature(&features, VIRTIO_BLK_F_MQ); 14012f270590SStefan Hajnoczi } 14026e790746SPaolo Bonzini 14036e790746SPaolo Bonzini return features; 14046e790746SPaolo Bonzini } 14056e790746SPaolo Bonzini 14066e790746SPaolo Bonzini static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) 14076e790746SPaolo Bonzini { 14086e790746SPaolo Bonzini VirtIOBlock *s = VIRTIO_BLK(vdev); 14096e790746SPaolo Bonzini 14109ffe337cSPaolo Bonzini if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { 14119ffe337cSPaolo Bonzini assert(!s->dataplane_started); 14126e790746SPaolo Bonzini } 14136e790746SPaolo Bonzini 14146e790746SPaolo Bonzini if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { 14156e790746SPaolo Bonzini return; 14166e790746SPaolo Bonzini } 14176e790746SPaolo Bonzini 1418ef5bc962SPaolo Bonzini /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send 1419ef5bc962SPaolo Bonzini * cache flushes. Thus, the "auto writethrough" behavior is never 1420ef5bc962SPaolo Bonzini * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. 1421ef5bc962SPaolo Bonzini * Leaving it enabled would break the following sequence: 1422ef5bc962SPaolo Bonzini * 1423ef5bc962SPaolo Bonzini * Guest started with "-drive cache=writethrough" 1424ef5bc962SPaolo Bonzini * Guest sets status to 0 1425ef5bc962SPaolo Bonzini * Guest sets DRIVER bit in status field 1426ef5bc962SPaolo Bonzini * Guest reads host features (WCE=0, CONFIG_WCE=1) 1427ef5bc962SPaolo Bonzini * Guest writes guest features (WCE=0, CONFIG_WCE=1) 1428ef5bc962SPaolo Bonzini * Guest writes 1 to the WCE configuration field (writeback mode) 1429ef5bc962SPaolo Bonzini * Guest sets DRIVER_OK bit in status field 1430ef5bc962SPaolo Bonzini * 14314be74634SMarkus Armbruster * s->blk would erroneously be placed in writethrough mode. 1432ef5bc962SPaolo Bonzini */ 143395129d6fSCornelia Huck if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { 14344be74634SMarkus Armbruster aio_context_acquire(blk_get_aio_context(s->blk)); 14354be74634SMarkus Armbruster blk_set_enable_write_cache(s->blk, 143695129d6fSCornelia Huck virtio_vdev_has_feature(vdev, 143795129d6fSCornelia Huck VIRTIO_BLK_F_WCE)); 14384be74634SMarkus Armbruster aio_context_release(blk_get_aio_context(s->blk)); 14396e790746SPaolo Bonzini } 1440ef5bc962SPaolo Bonzini } 14416e790746SPaolo Bonzini 1442b2b295a7SGreg Kurz static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) 1443b2b295a7SGreg Kurz { 1444b2b295a7SGreg Kurz VirtIOBlock *s = VIRTIO_BLK(vdev); 1445b2b295a7SGreg Kurz VirtIOBlockReq *req = s->rq; 14466e790746SPaolo Bonzini 14476e790746SPaolo Bonzini while (req) { 14486e790746SPaolo Bonzini qemu_put_sbyte(f, 1); 144930d8bf6dSStefan Hajnoczi 145030d8bf6dSStefan Hajnoczi if (s->conf.num_queues > 1) { 145130d8bf6dSStefan Hajnoczi qemu_put_be32(f, virtio_get_queue_index(req->vq)); 145230d8bf6dSStefan Hajnoczi } 145330d8bf6dSStefan Hajnoczi 145486044b24SJason Wang qemu_put_virtqueue_element(vdev, f, &req->elem); 14556e790746SPaolo Bonzini req = req->next; 14566e790746SPaolo Bonzini } 14576e790746SPaolo Bonzini qemu_put_sbyte(f, 0); 14586e790746SPaolo Bonzini } 14596e790746SPaolo Bonzini 1460b2b295a7SGreg Kurz static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, 1461b2b295a7SGreg Kurz int version_id) 1462b2b295a7SGreg Kurz { 1463b2b295a7SGreg Kurz VirtIOBlock *s = VIRTIO_BLK(vdev); 1464b2b295a7SGreg Kurz 14656e790746SPaolo Bonzini while (qemu_get_sbyte(f)) { 146630d8bf6dSStefan Hajnoczi unsigned nvqs = s->conf.num_queues; 146730d8bf6dSStefan Hajnoczi unsigned vq_idx = 0; 1468ab281c17SPaolo Bonzini VirtIOBlockReq *req; 146930d8bf6dSStefan Hajnoczi 147030d8bf6dSStefan Hajnoczi if (nvqs > 1) { 147130d8bf6dSStefan Hajnoczi vq_idx = qemu_get_be32(f); 147230d8bf6dSStefan Hajnoczi 147330d8bf6dSStefan Hajnoczi if (vq_idx >= nvqs) { 147430d8bf6dSStefan Hajnoczi error_report("Invalid virtqueue index in request list: %#x", 147530d8bf6dSStefan Hajnoczi vq_idx); 147630d8bf6dSStefan Hajnoczi return -EINVAL; 147730d8bf6dSStefan Hajnoczi } 147830d8bf6dSStefan Hajnoczi } 147930d8bf6dSStefan Hajnoczi 14808607f5c3SJason Wang req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); 148130d8bf6dSStefan Hajnoczi virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req); 14826e790746SPaolo Bonzini req->next = s->rq; 14836e790746SPaolo Bonzini s->rq = req; 14846e790746SPaolo Bonzini } 14856e790746SPaolo Bonzini 14866e790746SPaolo Bonzini return 0; 14876e790746SPaolo Bonzini } 14886e790746SPaolo Bonzini 14899b92fbcfSSergio Lopez static void virtio_resize_cb(void *opaque) 14909b92fbcfSSergio Lopez { 14919b92fbcfSSergio Lopez VirtIODevice *vdev = opaque; 14929b92fbcfSSergio Lopez 14939b92fbcfSSergio Lopez assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 14949b92fbcfSSergio Lopez virtio_notify_config(vdev); 14959b92fbcfSSergio Lopez } 14969b92fbcfSSergio Lopez 14976e790746SPaolo Bonzini static void virtio_blk_resize(void *opaque) 14986e790746SPaolo Bonzini { 14996e790746SPaolo Bonzini VirtIODevice *vdev = VIRTIO_DEVICE(opaque); 15006e790746SPaolo Bonzini 15019b92fbcfSSergio Lopez /* 15029b92fbcfSSergio Lopez * virtio_notify_config() needs to acquire the global mutex, 15039b92fbcfSSergio Lopez * so it can't be called from an iothread. Instead, schedule 15049b92fbcfSSergio Lopez * it to be run in the main context BH. 15059b92fbcfSSergio Lopez */ 15069b92fbcfSSergio Lopez aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); 15076e790746SPaolo Bonzini } 15086e790746SPaolo Bonzini 15096e790746SPaolo Bonzini static const BlockDevOps virtio_block_ops = { 15106e790746SPaolo Bonzini .resize_cb = virtio_blk_resize, 15116e790746SPaolo Bonzini }; 15126e790746SPaolo Bonzini 151375884afdSAndreas Färber static void virtio_blk_device_realize(DeviceState *dev, Error **errp) 15146e790746SPaolo Bonzini { 151575884afdSAndreas Färber VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1516179b417eSAndreas Färber VirtIOBlock *s = VIRTIO_BLK(dev); 15172a30307fSMarkus Armbruster VirtIOBlkConf *conf = &s->conf; 15183ffeeef7SAndreas Färber Error *err = NULL; 15192f270590SStefan Hajnoczi unsigned i; 15206e790746SPaolo Bonzini 15214be74634SMarkus Armbruster if (!conf->conf.blk) { 152275884afdSAndreas Färber error_setg(errp, "drive property not set"); 152375884afdSAndreas Färber return; 15246e790746SPaolo Bonzini } 15254be74634SMarkus Armbruster if (!blk_is_inserted(conf->conf.blk)) { 152675884afdSAndreas Färber error_setg(errp, "Device needs media, but drive is empty"); 152775884afdSAndreas Färber return; 15286e790746SPaolo Bonzini } 15299445e1e1SStefan Hajnoczi if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) { 15309445e1e1SStefan Hajnoczi conf->num_queues = 1; 15319445e1e1SStefan Hajnoczi } 15322f270590SStefan Hajnoczi if (!conf->num_queues) { 15332f270590SStefan Hajnoczi error_setg(errp, "num-queues property must be larger than 0"); 15342f270590SStefan Hajnoczi return; 15352f270590SStefan Hajnoczi } 15361bf8a989SDenis Plotnikov if (conf->queue_size <= 2) { 15371bf8a989SDenis Plotnikov error_setg(errp, "invalid queue-size property (%" PRIu16 "), " 15381bf8a989SDenis Plotnikov "must be > 2", conf->queue_size); 15391bf8a989SDenis Plotnikov return; 15401bf8a989SDenis Plotnikov } 15416040aeddSMark Kanda if (!is_power_of_2(conf->queue_size) || 15426040aeddSMark Kanda conf->queue_size > VIRTQUEUE_MAX_SIZE) { 15436040aeddSMark Kanda error_setg(errp, "invalid queue-size property (%" PRIu16 "), " 15446040aeddSMark Kanda "must be a power of 2 (max %d)", 15456040aeddSMark Kanda conf->queue_size, VIRTQUEUE_MAX_SIZE); 15466040aeddSMark Kanda return; 15476040aeddSMark Kanda } 15486e790746SPaolo Bonzini 1549ceff3e1fSMao Zhongyi if (!blkconf_apply_backend_options(&conf->conf, 155086b1cf32SKevin Wolf !blk_supports_write_perm(conf->conf.blk), 155186b1cf32SKevin Wolf true, errp)) { 1552a17c17a2SKevin Wolf return; 1553a17c17a2SKevin Wolf } 15544be74634SMarkus Armbruster s->original_wce = blk_enable_write_cache(conf->conf.blk); 1555ceff3e1fSMao Zhongyi if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) { 155675884afdSAndreas Färber return; 15576e790746SPaolo Bonzini } 1558ceff3e1fSMao Zhongyi 1559c56ee92fSRoman Kagan if (!blkconf_blocksizes(&conf->conf, errp)) { 15600a75b60cSMark Kanda return; 15610a75b60cSMark Kanda } 15620a75b60cSMark Kanda 15634f736650SSam Li BlockDriverState *bs = blk_bs(conf->conf.blk); 15644f736650SSam Li if (bs->bl.zoned != BLK_Z_NONE) { 15654f736650SSam Li virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED); 15664f736650SSam Li if (bs->bl.zoned == BLK_Z_HM) { 15674f736650SSam Li virtio_clear_feature(&s->host_features, VIRTIO_BLK_F_DISCARD); 15684f736650SSam Li } 15694f736650SSam Li } 15704f736650SSam Li 157137b06f8dSStefano Garzarella if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) && 157237b06f8dSStefano Garzarella (!conf->max_discard_sectors || 157337b06f8dSStefano Garzarella conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) { 157437b06f8dSStefano Garzarella error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")" 157537b06f8dSStefano Garzarella ", must be between 1 and %d", 157637b06f8dSStefano Garzarella conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS); 157737b06f8dSStefano Garzarella return; 157837b06f8dSStefano Garzarella } 157937b06f8dSStefano Garzarella 158037b06f8dSStefano Garzarella if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) && 158137b06f8dSStefano Garzarella (!conf->max_write_zeroes_sectors || 158237b06f8dSStefano Garzarella conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) { 158337b06f8dSStefano Garzarella error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32 158437b06f8dSStefano Garzarella "), must be between 1 and %d", 158537b06f8dSStefano Garzarella conf->max_write_zeroes_sectors, 158637b06f8dSStefano Garzarella (int)BDRV_REQUEST_MAX_SECTORS); 158737b06f8dSStefano Garzarella return; 158837b06f8dSStefano Garzarella } 158937b06f8dSStefano Garzarella 1590d9cf55a8SDaniil Tatianin s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, 1591d74c30c8SDaniil Tatianin s->host_features); 15923857cd5cSJonah Palmer virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); 15936e790746SPaolo Bonzini 15944be74634SMarkus Armbruster s->blk = conf->conf.blk; 15956e790746SPaolo Bonzini s->rq = NULL; 15962a30307fSMarkus Armbruster s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; 15976e790746SPaolo Bonzini 15982f270590SStefan Hajnoczi for (i = 0; i < conf->num_queues; i++) { 15996040aeddSMark Kanda virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); 16002f270590SStefan Hajnoczi } 160198e3ab35SKevin Wolf qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); 16022a30307fSMarkus Armbruster virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); 16033ffeeef7SAndreas Färber if (err != NULL) { 160475884afdSAndreas Färber error_propagate(errp, err); 1605cfaf757eSPan Nengyuan for (i = 0; i < conf->num_queues; i++) { 1606cfaf757eSPan Nengyuan virtio_del_queue(vdev, i); 1607cfaf757eSPan Nengyuan } 16086a1a8cc7SKONRAD Frederic virtio_cleanup(vdev); 160975884afdSAndreas Färber return; 16106e790746SPaolo Bonzini } 16116e790746SPaolo Bonzini 1612a937f8e8SStefan Hajnoczi /* 1613a937f8e8SStefan Hajnoczi * This must be after virtio_init() so virtio_blk_dma_restart_cb() gets 1614a937f8e8SStefan Hajnoczi * called after ->start_ioeventfd() has already set blk's AioContext. 1615a937f8e8SStefan Hajnoczi */ 1616a937f8e8SStefan Hajnoczi s->change = 1617a937f8e8SStefan Hajnoczi qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s); 1618a937f8e8SStefan Hajnoczi 1619baf42268SStefan Hajnoczi blk_ram_registrar_init(&s->blk_ram_registrar, s->blk); 16204be74634SMarkus Armbruster blk_set_dev_ops(s->blk, &virtio_block_ops, s); 16216e790746SPaolo Bonzini 16224be74634SMarkus Armbruster blk_iostatus_enable(s->blk); 162371f571a2SSam Eiderman 162471f571a2SSam Eiderman add_boot_device_lchs(dev, "/disk@0,0", 162571f571a2SSam Eiderman conf->conf.lcyls, 162671f571a2SSam Eiderman conf->conf.lheads, 162771f571a2SSam Eiderman conf->conf.lsecs); 16286e790746SPaolo Bonzini } 16296e790746SPaolo Bonzini 1630b69c3c21SMarkus Armbruster static void virtio_blk_device_unrealize(DeviceState *dev) 16316e790746SPaolo Bonzini { 1632306ec6c3SAndreas Färber VirtIODevice *vdev = VIRTIO_DEVICE(dev); 1633306ec6c3SAndreas Färber VirtIOBlock *s = VIRTIO_BLK(dev); 16344a0117cfSEugenio Pérez VirtIOBlkConf *conf = &s->conf; 16354a0117cfSEugenio Pérez unsigned i; 1636306ec6c3SAndreas Färber 16377bfde688SJulia Suvorova blk_drain(s->blk); 163871f571a2SSam Eiderman del_boot_device_lchs(dev, "/disk@0,0"); 16396e790746SPaolo Bonzini virtio_blk_data_plane_destroy(s->dataplane); 16406e790746SPaolo Bonzini s->dataplane = NULL; 16414a0117cfSEugenio Pérez for (i = 0; i < conf->num_queues; i++) { 16424a0117cfSEugenio Pérez virtio_del_queue(vdev, i); 16434a0117cfSEugenio Pérez } 164498e3ab35SKevin Wolf qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); 1645baf42268SStefan Hajnoczi blk_ram_registrar_destroy(&s->blk_ram_registrar); 16466e790746SPaolo Bonzini qemu_del_vm_change_state_handler(s->change); 16474be74634SMarkus Armbruster blockdev_mark_auto_del(s->blk); 16486a1a8cc7SKONRAD Frederic virtio_cleanup(vdev); 16496e790746SPaolo Bonzini } 16506e790746SPaolo Bonzini 1651467b3f33SStefan Hajnoczi static void virtio_blk_instance_init(Object *obj) 1652467b3f33SStefan Hajnoczi { 1653467b3f33SStefan Hajnoczi VirtIOBlock *s = VIRTIO_BLK(obj); 1654467b3f33SStefan Hajnoczi 16552a30307fSMarkus Armbruster device_add_bootindex_property(obj, &s->conf.conf.bootindex, 16563342ec32SGonglei "bootindex", "/disk@0,0", 165740c2281cSMarkus Armbruster DEVICE(obj)); 1658467b3f33SStefan Hajnoczi } 1659467b3f33SStefan Hajnoczi 1660977a117fSHalil Pasic static const VMStateDescription vmstate_virtio_blk = { 1661977a117fSHalil Pasic .name = "virtio-blk", 1662977a117fSHalil Pasic .minimum_version_id = 2, 1663977a117fSHalil Pasic .version_id = 2, 1664977a117fSHalil Pasic .fields = (VMStateField[]) { 1665977a117fSHalil Pasic VMSTATE_VIRTIO_DEVICE, 1666977a117fSHalil Pasic VMSTATE_END_OF_LIST() 1667977a117fSHalil Pasic }, 1668977a117fSHalil Pasic }; 1669bbded32cSDr. David Alan Gilbert 16706e790746SPaolo Bonzini static Property virtio_blk_properties[] = { 16712a30307fSMarkus Armbruster DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf), 16728c398252SKevin Wolf DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf), 16732a30307fSMarkus Armbruster DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf), 16742a30307fSMarkus Armbruster DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), 1675bbe8bd4dSStefano Garzarella DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features, 1676bbe8bd4dSStefano Garzarella VIRTIO_BLK_F_CONFIG_WCE, true), 167732a877e4SStefan Hajnoczi #ifdef __linux__ 1678bbe8bd4dSStefano Garzarella DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features, 1679bbe8bd4dSStefano Garzarella VIRTIO_BLK_F_SCSI, false), 168032a877e4SStefan Hajnoczi #endif 1681c99495acSPeter Lieven DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, 1682c99495acSPeter Lieven true), 16839445e1e1SStefan Hajnoczi DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 16849445e1e1SStefan Hajnoczi VIRTIO_BLK_AUTO_NUM_QUEUES), 1685c9b7d9ecSDenis Plotnikov DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), 16861bf8a989SDenis Plotnikov DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), 1687d679ac09SFam Zheng DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, 1688d679ac09SFam Zheng IOThread *), 16895c81161fSStefano Garzarella DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, 16905c81161fSStefano Garzarella VIRTIO_BLK_F_DISCARD, true), 1691fb0b154cSAkihiko Odaki DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock, 1692fb0b154cSAkihiko Odaki conf.report_discard_granularity, true), 16935c81161fSStefano Garzarella DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features, 16945c81161fSStefano Garzarella VIRTIO_BLK_F_WRITE_ZEROES, true), 169537b06f8dSStefano Garzarella DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock, 169637b06f8dSStefano Garzarella conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS), 169737b06f8dSStefano Garzarella DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock, 169837b06f8dSStefano Garzarella conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS), 16995f258577SEvgeny Yakovlev DEFINE_PROP_BOOL("x-enable-wce-if-config-wce", VirtIOBlock, 17005f258577SEvgeny Yakovlev conf.x_enable_wce_if_config_wce, true), 17016e790746SPaolo Bonzini DEFINE_PROP_END_OF_LIST(), 17026e790746SPaolo Bonzini }; 17036e790746SPaolo Bonzini 17046e790746SPaolo Bonzini static void virtio_blk_class_init(ObjectClass *klass, void *data) 17056e790746SPaolo Bonzini { 17066e790746SPaolo Bonzini DeviceClass *dc = DEVICE_CLASS(klass); 17076e790746SPaolo Bonzini VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 170875884afdSAndreas Färber 17094f67d30bSMarc-André Lureau device_class_set_props(dc, virtio_blk_properties); 1710bbded32cSDr. David Alan Gilbert dc->vmsd = &vmstate_virtio_blk; 1711125ee0edSMarcel Apfelbaum set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 171275884afdSAndreas Färber vdc->realize = virtio_blk_device_realize; 1713306ec6c3SAndreas Färber vdc->unrealize = virtio_blk_device_unrealize; 17146e790746SPaolo Bonzini vdc->get_config = virtio_blk_update_config; 17156e790746SPaolo Bonzini vdc->set_config = virtio_blk_set_config; 17166e790746SPaolo Bonzini vdc->get_features = virtio_blk_get_features; 17176e790746SPaolo Bonzini vdc->set_status = virtio_blk_set_status; 17186e790746SPaolo Bonzini vdc->reset = virtio_blk_reset; 1719b2b295a7SGreg Kurz vdc->save = virtio_blk_save_device; 1720b2b295a7SGreg Kurz vdc->load = virtio_blk_load_device; 17219ffe337cSPaolo Bonzini vdc->start_ioeventfd = virtio_blk_data_plane_start; 17229ffe337cSPaolo Bonzini vdc->stop_ioeventfd = virtio_blk_data_plane_stop; 17236e790746SPaolo Bonzini } 17246e790746SPaolo Bonzini 1725b5c7ceafSChanglong Xie static const TypeInfo virtio_blk_info = { 17266e790746SPaolo Bonzini .name = TYPE_VIRTIO_BLK, 17276e790746SPaolo Bonzini .parent = TYPE_VIRTIO_DEVICE, 17286e790746SPaolo Bonzini .instance_size = sizeof(VirtIOBlock), 1729467b3f33SStefan Hajnoczi .instance_init = virtio_blk_instance_init, 17306e790746SPaolo Bonzini .class_init = virtio_blk_class_init, 17316e790746SPaolo Bonzini }; 17326e790746SPaolo Bonzini 17336e790746SPaolo Bonzini static void virtio_register_types(void) 17346e790746SPaolo Bonzini { 1735b5c7ceafSChanglong Xie type_register_static(&virtio_blk_info); 17366e790746SPaolo Bonzini } 17376e790746SPaolo Bonzini 17386e790746SPaolo Bonzini type_init(virtio_register_types) 1739