xref: /qemu/hw/scsi/scsi-disk.c (revision 3e4c67c9)
1 /*
2  * SCSI Device emulation
3  *
4  * Copyright (c) 2006 CodeSourcery.
5  * Based on code by Fabrice Bellard
6  *
7  * Written by Paul Brook
8  * Modifications:
9  *  2009-Dec-12 Artyom Tarasenko : implemented stamdard inquiry for the case
10  *                                 when the allocation length of CDB is smaller
11  *                                 than 36.
12  *  2009-Oct-13 Artyom Tarasenko : implemented the block descriptor in the
13  *                                 MODE SENSE response.
14  *
15  * This code is licensed under the LGPL.
16  *
17  * Note that this file only handles the SCSI architecture model and device
18  * commands.  Emulation of interface/link layer protocols is handled by
19  * the host adapter emulator.
20  */
21 
22 //#define DEBUG_SCSI
23 
24 #ifdef DEBUG_SCSI
25 #define DPRINTF(fmt, ...) \
26 do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
27 #else
28 #define DPRINTF(fmt, ...) do {} while(0)
29 #endif
30 
31 #include "qemu-common.h"
32 #include "qemu/error-report.h"
33 #include "hw/scsi/scsi.h"
34 #include "block/scsi.h"
35 #include "sysemu/sysemu.h"
36 #include "sysemu/block-backend.h"
37 #include "sysemu/blockdev.h"
38 #include "hw/block/block.h"
39 #include "sysemu/dma.h"
40 
41 #ifdef __linux
42 #include <scsi/sg.h>
43 #endif
44 
45 #define SCSI_WRITE_SAME_MAX         524288
46 #define SCSI_DMA_BUF_SIZE           131072
47 #define SCSI_MAX_INQUIRY_LEN        256
48 #define SCSI_MAX_MODE_LEN           256
49 
50 #define DEFAULT_DISCARD_GRANULARITY 4096
51 #define DEFAULT_MAX_UNMAP_SIZE      (1 << 30)   /* 1 GB */
52 #define DEFAULT_MAX_IO_SIZE         INT_MAX     /* 2 GB - 1 block */
53 
54 typedef struct SCSIDiskState SCSIDiskState;
55 
56 typedef struct SCSIDiskReq {
57     SCSIRequest req;
58     /* Both sector and sector_count are in terms of qemu 512 byte blocks.  */
59     uint64_t sector;
60     uint32_t sector_count;
61     uint32_t buflen;
62     bool started;
63     struct iovec iov;
64     QEMUIOVector qiov;
65     BlockAcctCookie acct;
66 } SCSIDiskReq;
67 
68 #define SCSI_DISK_F_REMOVABLE             0
69 #define SCSI_DISK_F_DPOFUA                1
70 #define SCSI_DISK_F_NO_REMOVABLE_DEVOPS   2
71 
72 struct SCSIDiskState
73 {
74     SCSIDevice qdev;
75     uint32_t features;
76     bool media_changed;
77     bool media_event;
78     bool eject_request;
79     uint64_t wwn;
80     uint64_t port_wwn;
81     uint16_t port_index;
82     uint64_t max_unmap_size;
83     uint64_t max_io_size;
84     QEMUBH *bh;
85     char *version;
86     char *serial;
87     char *vendor;
88     char *product;
89     bool tray_open;
90     bool tray_locked;
91 };
92 
93 static int scsi_handle_rw_error(SCSIDiskReq *r, int error);
94 
95 static void scsi_free_request(SCSIRequest *req)
96 {
97     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
98 
99     qemu_vfree(r->iov.iov_base);
100 }
101 
102 /* Helper function for command completion with sense.  */
103 static void scsi_check_condition(SCSIDiskReq *r, SCSISense sense)
104 {
105     DPRINTF("Command complete tag=0x%x sense=%d/%d/%d\n",
106             r->req.tag, sense.key, sense.asc, sense.ascq);
107     scsi_req_build_sense(&r->req, sense);
108     scsi_req_complete(&r->req, CHECK_CONDITION);
109 }
110 
111 static uint32_t scsi_init_iovec(SCSIDiskReq *r, size_t size)
112 {
113     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
114 
115     if (!r->iov.iov_base) {
116         r->buflen = size;
117         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
118     }
119     r->iov.iov_len = MIN(r->sector_count * 512, r->buflen);
120     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
121     return r->qiov.size / 512;
122 }
123 
124 static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req)
125 {
126     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
127 
128     qemu_put_be64s(f, &r->sector);
129     qemu_put_be32s(f, &r->sector_count);
130     qemu_put_be32s(f, &r->buflen);
131     if (r->buflen) {
132         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
133             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
134         } else if (!req->retry) {
135             uint32_t len = r->iov.iov_len;
136             qemu_put_be32s(f, &len);
137             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
138         }
139     }
140 }
141 
142 static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
143 {
144     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
145 
146     qemu_get_be64s(f, &r->sector);
147     qemu_get_be32s(f, &r->sector_count);
148     qemu_get_be32s(f, &r->buflen);
149     if (r->buflen) {
150         scsi_init_iovec(r, r->buflen);
151         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
152             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
153         } else if (!r->req.retry) {
154             uint32_t len;
155             qemu_get_be32s(f, &len);
156             r->iov.iov_len = len;
157             assert(r->iov.iov_len <= r->buflen);
158             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
159         }
160     }
161 
162     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
163 }
164 
165 static void scsi_aio_complete(void *opaque, int ret)
166 {
167     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
168     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
169 
170     assert(r->req.aiocb != NULL);
171     r->req.aiocb = NULL;
172     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
173     if (r->req.io_canceled) {
174         scsi_req_cancel_complete(&r->req);
175         goto done;
176     }
177 
178     if (ret < 0) {
179         if (scsi_handle_rw_error(r, -ret)) {
180             goto done;
181         }
182     }
183 
184     scsi_req_complete(&r->req, GOOD);
185 
186 done:
187     scsi_req_unref(&r->req);
188 }
189 
190 static bool scsi_is_cmd_fua(SCSICommand *cmd)
191 {
192     switch (cmd->buf[0]) {
193     case READ_10:
194     case READ_12:
195     case READ_16:
196     case WRITE_10:
197     case WRITE_12:
198     case WRITE_16:
199         return (cmd->buf[1] & 8) != 0;
200 
201     case VERIFY_10:
202     case VERIFY_12:
203     case VERIFY_16:
204     case WRITE_VERIFY_10:
205     case WRITE_VERIFY_12:
206     case WRITE_VERIFY_16:
207         return true;
208 
209     case READ_6:
210     case WRITE_6:
211     default:
212         return false;
213     }
214 }
215 
216 static void scsi_write_do_fua(SCSIDiskReq *r)
217 {
218     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
219 
220     if (r->req.io_canceled) {
221         scsi_req_cancel_complete(&r->req);
222         goto done;
223     }
224 
225     if (scsi_is_cmd_fua(&r->req.cmd)) {
226         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
227                          BLOCK_ACCT_FLUSH);
228         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
229         return;
230     }
231 
232     scsi_req_complete(&r->req, GOOD);
233 
234 done:
235     scsi_req_unref(&r->req);
236 }
237 
238 static void scsi_dma_complete_noio(void *opaque, int ret)
239 {
240     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
241     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
242 
243     if (r->req.aiocb != NULL) {
244         r->req.aiocb = NULL;
245         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
246     }
247     if (r->req.io_canceled) {
248         scsi_req_cancel_complete(&r->req);
249         goto done;
250     }
251 
252     if (ret < 0) {
253         if (scsi_handle_rw_error(r, -ret)) {
254             goto done;
255         }
256     }
257 
258     r->sector += r->sector_count;
259     r->sector_count = 0;
260     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
261         scsi_write_do_fua(r);
262         return;
263     } else {
264         scsi_req_complete(&r->req, GOOD);
265     }
266 
267 done:
268     scsi_req_unref(&r->req);
269 }
270 
271 static void scsi_dma_complete(void *opaque, int ret)
272 {
273     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
274 
275     assert(r->req.aiocb != NULL);
276     scsi_dma_complete_noio(opaque, ret);
277 }
278 
279 static void scsi_read_complete(void * opaque, int ret)
280 {
281     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
282     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
283     int n;
284 
285     assert(r->req.aiocb != NULL);
286     r->req.aiocb = NULL;
287     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
288     if (r->req.io_canceled) {
289         scsi_req_cancel_complete(&r->req);
290         goto done;
291     }
292 
293     if (ret < 0) {
294         if (scsi_handle_rw_error(r, -ret)) {
295             goto done;
296         }
297     }
298 
299     DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size);
300 
301     n = r->qiov.size / 512;
302     r->sector += n;
303     r->sector_count -= n;
304     scsi_req_data(&r->req, r->qiov.size);
305 
306 done:
307     scsi_req_unref(&r->req);
308 }
309 
310 /* Actually issue a read to the block device.  */
311 static void scsi_do_read(void *opaque, int ret)
312 {
313     SCSIDiskReq *r = opaque;
314     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
315     uint32_t n;
316 
317     if (r->req.aiocb != NULL) {
318         r->req.aiocb = NULL;
319         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
320     }
321     if (r->req.io_canceled) {
322         scsi_req_cancel_complete(&r->req);
323         goto done;
324     }
325 
326     if (ret < 0) {
327         if (scsi_handle_rw_error(r, -ret)) {
328             goto done;
329         }
330     }
331 
332     /* The request is used as the AIO opaque value, so add a ref.  */
333     scsi_req_ref(&r->req);
334 
335     if (r->req.sg) {
336         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
337         r->req.resid -= r->req.sg->size;
338         r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg, r->sector,
339                                     scsi_dma_complete, r);
340     } else {
341         n = scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
342         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
343                          n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
344         r->req.aiocb = blk_aio_readv(s->qdev.conf.blk, r->sector, &r->qiov, n,
345                                      scsi_read_complete, r);
346     }
347 
348 done:
349     scsi_req_unref(&r->req);
350 }
351 
352 /* Read more data from scsi device into buffer.  */
353 static void scsi_read_data(SCSIRequest *req)
354 {
355     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
356     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
357     bool first;
358 
359     DPRINTF("Read sector_count=%d\n", r->sector_count);
360     if (r->sector_count == 0) {
361         /* This also clears the sense buffer for REQUEST SENSE.  */
362         scsi_req_complete(&r->req, GOOD);
363         return;
364     }
365 
366     /* No data transfer may already be in progress */
367     assert(r->req.aiocb == NULL);
368 
369     /* The request is used as the AIO opaque value, so add a ref.  */
370     scsi_req_ref(&r->req);
371     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
372         DPRINTF("Data transfer direction invalid\n");
373         scsi_read_complete(r, -EINVAL);
374         return;
375     }
376 
377     if (s->tray_open) {
378         scsi_read_complete(r, -ENOMEDIUM);
379         return;
380     }
381 
382     first = !r->started;
383     r->started = true;
384     if (first && scsi_is_cmd_fua(&r->req.cmd)) {
385         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
386                          BLOCK_ACCT_FLUSH);
387         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read, r);
388     } else {
389         scsi_do_read(r, 0);
390     }
391 }
392 
393 /*
394  * scsi_handle_rw_error has two return values.  0 means that the error
395  * must be ignored, 1 means that the error has been processed and the
396  * caller should not do anything else for this request.  Note that
397  * scsi_handle_rw_error always manages its reference counts, independent
398  * of the return value.
399  */
400 static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
401 {
402     bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
403     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
404     BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk,
405                                                    is_read, error);
406 
407     if (action == BLOCK_ERROR_ACTION_REPORT) {
408         switch (error) {
409         case ENOMEDIUM:
410             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
411             break;
412         case ENOMEM:
413             scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE));
414             break;
415         case EINVAL:
416             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
417             break;
418         case ENOSPC:
419             scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED));
420             break;
421         default:
422             scsi_check_condition(r, SENSE_CODE(IO_ERROR));
423             break;
424         }
425     }
426     blk_error_action(s->qdev.conf.blk, action, is_read, error);
427     if (action == BLOCK_ERROR_ACTION_STOP) {
428         scsi_req_retry(&r->req);
429     }
430     return action != BLOCK_ERROR_ACTION_IGNORE;
431 }
432 
433 static void scsi_write_complete(void * opaque, int ret)
434 {
435     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
436     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
437     uint32_t n;
438 
439     if (r->req.aiocb != NULL) {
440         r->req.aiocb = NULL;
441         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
442     }
443     if (r->req.io_canceled) {
444         scsi_req_cancel_complete(&r->req);
445         goto done;
446     }
447 
448     if (ret < 0) {
449         if (scsi_handle_rw_error(r, -ret)) {
450             goto done;
451         }
452     }
453 
454     n = r->qiov.size / 512;
455     r->sector += n;
456     r->sector_count -= n;
457     if (r->sector_count == 0) {
458         scsi_write_do_fua(r);
459         return;
460     } else {
461         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
462         DPRINTF("Write complete tag=0x%x more=%zd\n", r->req.tag, r->qiov.size);
463         scsi_req_data(&r->req, r->qiov.size);
464     }
465 
466 done:
467     scsi_req_unref(&r->req);
468 }
469 
470 static void scsi_write_data(SCSIRequest *req)
471 {
472     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
473     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
474     uint32_t n;
475 
476     /* No data transfer may already be in progress */
477     assert(r->req.aiocb == NULL);
478 
479     /* The request is used as the AIO opaque value, so add a ref.  */
480     scsi_req_ref(&r->req);
481     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
482         DPRINTF("Data transfer direction invalid\n");
483         scsi_write_complete(r, -EINVAL);
484         return;
485     }
486 
487     if (!r->req.sg && !r->qiov.size) {
488         /* Called for the first time.  Ask the driver to send us more data.  */
489         r->started = true;
490         scsi_write_complete(r, 0);
491         return;
492     }
493     if (s->tray_open) {
494         scsi_write_complete(r, -ENOMEDIUM);
495         return;
496     }
497 
498     if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
499         r->req.cmd.buf[0] == VERIFY_16) {
500         if (r->req.sg) {
501             scsi_dma_complete_noio(r, 0);
502         } else {
503             scsi_write_complete(r, 0);
504         }
505         return;
506     }
507 
508     if (r->req.sg) {
509         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
510         r->req.resid -= r->req.sg->size;
511         r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, r->sector,
512                                      scsi_dma_complete, r);
513     } else {
514         n = r->qiov.size / 512;
515         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
516                          n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE);
517         r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, r->sector, &r->qiov, n,
518                                       scsi_write_complete, r);
519     }
520 }
521 
522 /* Return a pointer to the data buffer.  */
523 static uint8_t *scsi_get_buf(SCSIRequest *req)
524 {
525     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
526 
527     return (uint8_t *)r->iov.iov_base;
528 }
529 
530 static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
531 {
532     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
533     int buflen = 0;
534     int start;
535 
536     if (req->cmd.buf[1] & 0x1) {
537         /* Vital product data */
538         uint8_t page_code = req->cmd.buf[2];
539 
540         outbuf[buflen++] = s->qdev.type & 0x1f;
541         outbuf[buflen++] = page_code ; // this page
542         outbuf[buflen++] = 0x00;
543         outbuf[buflen++] = 0x00;
544         start = buflen;
545 
546         switch (page_code) {
547         case 0x00: /* Supported page codes, mandatory */
548         {
549             DPRINTF("Inquiry EVPD[Supported pages] "
550                     "buffer size %zd\n", req->cmd.xfer);
551             outbuf[buflen++] = 0x00; // list of supported pages (this page)
552             if (s->serial) {
553                 outbuf[buflen++] = 0x80; // unit serial number
554             }
555             outbuf[buflen++] = 0x83; // device identification
556             if (s->qdev.type == TYPE_DISK) {
557                 outbuf[buflen++] = 0xb0; // block limits
558                 outbuf[buflen++] = 0xb2; // thin provisioning
559             }
560             break;
561         }
562         case 0x80: /* Device serial number, optional */
563         {
564             int l;
565 
566             if (!s->serial) {
567                 DPRINTF("Inquiry (EVPD[Serial number] not supported\n");
568                 return -1;
569             }
570 
571             l = strlen(s->serial);
572             if (l > 20) {
573                 l = 20;
574             }
575 
576             DPRINTF("Inquiry EVPD[Serial number] "
577                     "buffer size %zd\n", req->cmd.xfer);
578             memcpy(outbuf+buflen, s->serial, l);
579             buflen += l;
580             break;
581         }
582 
583         case 0x83: /* Device identification page, mandatory */
584         {
585             const char *str = s->serial ?: blk_name(s->qdev.conf.blk);
586             int max_len = s->serial ? 20 : 255 - 8;
587             int id_len = strlen(str);
588 
589             if (id_len > max_len) {
590                 id_len = max_len;
591             }
592             DPRINTF("Inquiry EVPD[Device identification] "
593                     "buffer size %zd\n", req->cmd.xfer);
594 
595             outbuf[buflen++] = 0x2; // ASCII
596             outbuf[buflen++] = 0;   // not officially assigned
597             outbuf[buflen++] = 0;   // reserved
598             outbuf[buflen++] = id_len; // length of data following
599             memcpy(outbuf+buflen, str, id_len);
600             buflen += id_len;
601 
602             if (s->wwn) {
603                 outbuf[buflen++] = 0x1; // Binary
604                 outbuf[buflen++] = 0x3; // NAA
605                 outbuf[buflen++] = 0;   // reserved
606                 outbuf[buflen++] = 8;
607                 stq_be_p(&outbuf[buflen], s->wwn);
608                 buflen += 8;
609             }
610 
611             if (s->port_wwn) {
612                 outbuf[buflen++] = 0x61; // SAS / Binary
613                 outbuf[buflen++] = 0x93; // PIV / Target port / NAA
614                 outbuf[buflen++] = 0;    // reserved
615                 outbuf[buflen++] = 8;
616                 stq_be_p(&outbuf[buflen], s->port_wwn);
617                 buflen += 8;
618             }
619 
620             if (s->port_index) {
621                 outbuf[buflen++] = 0x61; // SAS / Binary
622                 outbuf[buflen++] = 0x94; // PIV / Target port / relative target port
623                 outbuf[buflen++] = 0;    // reserved
624                 outbuf[buflen++] = 4;
625                 stw_be_p(&outbuf[buflen + 2], s->port_index);
626                 buflen += 4;
627             }
628             break;
629         }
630         case 0xb0: /* block limits */
631         {
632             unsigned int unmap_sectors =
633                     s->qdev.conf.discard_granularity / s->qdev.blocksize;
634             unsigned int min_io_size =
635                     s->qdev.conf.min_io_size / s->qdev.blocksize;
636             unsigned int opt_io_size =
637                     s->qdev.conf.opt_io_size / s->qdev.blocksize;
638             unsigned int max_unmap_sectors =
639                     s->max_unmap_size / s->qdev.blocksize;
640             unsigned int max_io_sectors =
641                     s->max_io_size / s->qdev.blocksize;
642 
643             if (s->qdev.type == TYPE_ROM) {
644                 DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n",
645                         page_code);
646                 return -1;
647             }
648             /* required VPD size with unmap support */
649             buflen = 0x40;
650             memset(outbuf + 4, 0, buflen - 4);
651 
652             outbuf[4] = 0x1; /* wsnz */
653 
654             /* optimal transfer length granularity */
655             outbuf[6] = (min_io_size >> 8) & 0xff;
656             outbuf[7] = min_io_size & 0xff;
657 
658             /* maximum transfer length */
659             outbuf[8] = (max_io_sectors >> 24) & 0xff;
660             outbuf[9] = (max_io_sectors >> 16) & 0xff;
661             outbuf[10] = (max_io_sectors >> 8) & 0xff;
662             outbuf[11] = max_io_sectors & 0xff;
663 
664             /* optimal transfer length */
665             outbuf[12] = (opt_io_size >> 24) & 0xff;
666             outbuf[13] = (opt_io_size >> 16) & 0xff;
667             outbuf[14] = (opt_io_size >> 8) & 0xff;
668             outbuf[15] = opt_io_size & 0xff;
669 
670             /* max unmap LBA count, default is 1GB */
671             outbuf[20] = (max_unmap_sectors >> 24) & 0xff;
672             outbuf[21] = (max_unmap_sectors >> 16) & 0xff;
673             outbuf[22] = (max_unmap_sectors >> 8) & 0xff;
674             outbuf[23] = max_unmap_sectors & 0xff;
675 
676             /* max unmap descriptors, 255 fit in 4 kb with an 8-byte header.  */
677             outbuf[24] = 0;
678             outbuf[25] = 0;
679             outbuf[26] = 0;
680             outbuf[27] = 255;
681 
682             /* optimal unmap granularity */
683             outbuf[28] = (unmap_sectors >> 24) & 0xff;
684             outbuf[29] = (unmap_sectors >> 16) & 0xff;
685             outbuf[30] = (unmap_sectors >> 8) & 0xff;
686             outbuf[31] = unmap_sectors & 0xff;
687 
688             /* max write same size */
689             outbuf[36] = 0;
690             outbuf[37] = 0;
691             outbuf[38] = 0;
692             outbuf[39] = 0;
693 
694             outbuf[40] = (max_io_sectors >> 24) & 0xff;
695             outbuf[41] = (max_io_sectors >> 16) & 0xff;
696             outbuf[42] = (max_io_sectors >> 8) & 0xff;
697             outbuf[43] = max_io_sectors & 0xff;
698             break;
699         }
700         case 0xb2: /* thin provisioning */
701         {
702             buflen = 8;
703             outbuf[4] = 0;
704             outbuf[5] = 0xe0; /* unmap & write_same 10/16 all supported */
705             outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1;
706             outbuf[7] = 0;
707             break;
708         }
709         default:
710             return -1;
711         }
712         /* done with EVPD */
713         assert(buflen - start <= 255);
714         outbuf[start - 1] = buflen - start;
715         return buflen;
716     }
717 
718     /* Standard INQUIRY data */
719     if (req->cmd.buf[2] != 0) {
720         return -1;
721     }
722 
723     /* PAGE CODE == 0 */
724     buflen = req->cmd.xfer;
725     if (buflen > SCSI_MAX_INQUIRY_LEN) {
726         buflen = SCSI_MAX_INQUIRY_LEN;
727     }
728 
729     outbuf[0] = s->qdev.type & 0x1f;
730     outbuf[1] = (s->features & (1 << SCSI_DISK_F_REMOVABLE)) ? 0x80 : 0;
731 
732     strpadcpy((char *) &outbuf[16], 16, s->product, ' ');
733     strpadcpy((char *) &outbuf[8], 8, s->vendor, ' ');
734 
735     memset(&outbuf[32], 0, 4);
736     memcpy(&outbuf[32], s->version, MIN(4, strlen(s->version)));
737     /*
738      * We claim conformance to SPC-3, which is required for guests
739      * to ask for modern features like READ CAPACITY(16) or the
740      * block characteristics VPD page by default.  Not all of SPC-3
741      * is actually implemented, but we're good enough.
742      */
743     outbuf[2] = 5;
744     outbuf[3] = 2 | 0x10; /* Format 2, HiSup */
745 
746     if (buflen > 36) {
747         outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */
748     } else {
749         /* If the allocation length of CDB is too small,
750                the additional length is not adjusted */
751         outbuf[4] = 36 - 5;
752     }
753 
754     /* Sync data transfer and TCQ.  */
755     outbuf[7] = 0x10 | (req->bus->info->tcq ? 0x02 : 0);
756     return buflen;
757 }
758 
759 static inline bool media_is_dvd(SCSIDiskState *s)
760 {
761     uint64_t nb_sectors;
762     if (s->qdev.type != TYPE_ROM) {
763         return false;
764     }
765     if (!blk_is_inserted(s->qdev.conf.blk)) {
766         return false;
767     }
768     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
769     return nb_sectors > CD_MAX_SECTORS;
770 }
771 
772 static inline bool media_is_cd(SCSIDiskState *s)
773 {
774     uint64_t nb_sectors;
775     if (s->qdev.type != TYPE_ROM) {
776         return false;
777     }
778     if (!blk_is_inserted(s->qdev.conf.blk)) {
779         return false;
780     }
781     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
782     return nb_sectors <= CD_MAX_SECTORS;
783 }
784 
785 static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r,
786                                       uint8_t *outbuf)
787 {
788     uint8_t type = r->req.cmd.buf[1] & 7;
789 
790     if (s->qdev.type != TYPE_ROM) {
791         return -1;
792     }
793 
794     /* Types 1/2 are only defined for Blu-Ray.  */
795     if (type != 0) {
796         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
797         return -1;
798     }
799 
800     memset(outbuf, 0, 34);
801     outbuf[1] = 32;
802     outbuf[2] = 0xe; /* last session complete, disc finalized */
803     outbuf[3] = 1;   /* first track on disc */
804     outbuf[4] = 1;   /* # of sessions */
805     outbuf[5] = 1;   /* first track of last session */
806     outbuf[6] = 1;   /* last track of last session */
807     outbuf[7] = 0x20; /* unrestricted use */
808     outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */
809     /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
810     /* 12-23: not meaningful for CD-ROM or DVD-ROM */
811     /* 24-31: disc bar code */
812     /* 32: disc application code */
813     /* 33: number of OPC tables */
814 
815     return 34;
816 }
817 
818 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
819                                    uint8_t *outbuf)
820 {
821     static const int rds_caps_size[5] = {
822         [0] = 2048 + 4,
823         [1] = 4 + 4,
824         [3] = 188 + 4,
825         [4] = 2048 + 4,
826     };
827 
828     uint8_t media = r->req.cmd.buf[1];
829     uint8_t layer = r->req.cmd.buf[6];
830     uint8_t format = r->req.cmd.buf[7];
831     int size = -1;
832 
833     if (s->qdev.type != TYPE_ROM) {
834         return -1;
835     }
836     if (media != 0) {
837         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
838         return -1;
839     }
840 
841     if (format != 0xff) {
842         if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) {
843             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
844             return -1;
845         }
846         if (media_is_cd(s)) {
847             scsi_check_condition(r, SENSE_CODE(INCOMPATIBLE_FORMAT));
848             return -1;
849         }
850         if (format >= ARRAY_SIZE(rds_caps_size)) {
851             return -1;
852         }
853         size = rds_caps_size[format];
854         memset(outbuf, 0, size);
855     }
856 
857     switch (format) {
858     case 0x00: {
859         /* Physical format information */
860         uint64_t nb_sectors;
861         if (layer != 0) {
862             goto fail;
863         }
864         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
865 
866         outbuf[4] = 1;   /* DVD-ROM, part version 1 */
867         outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */
868         outbuf[6] = 1;   /* one layer, read-only (per MMC-2 spec) */
869         outbuf[7] = 0;   /* default densities */
870 
871         stl_be_p(&outbuf[12], (nb_sectors >> 2) - 1); /* end sector */
872         stl_be_p(&outbuf[16], (nb_sectors >> 2) - 1); /* l0 end sector */
873         break;
874     }
875 
876     case 0x01: /* DVD copyright information, all zeros */
877         break;
878 
879     case 0x03: /* BCA information - invalid field for no BCA info */
880         return -1;
881 
882     case 0x04: /* DVD disc manufacturing information, all zeros */
883         break;
884 
885     case 0xff: { /* List capabilities */
886         int i;
887         size = 4;
888         for (i = 0; i < ARRAY_SIZE(rds_caps_size); i++) {
889             if (!rds_caps_size[i]) {
890                 continue;
891             }
892             outbuf[size] = i;
893             outbuf[size + 1] = 0x40; /* Not writable, readable */
894             stw_be_p(&outbuf[size + 2], rds_caps_size[i]);
895             size += 4;
896         }
897         break;
898      }
899 
900     default:
901         return -1;
902     }
903 
904     /* Size of buffer, not including 2 byte size field */
905     stw_be_p(outbuf, size - 2);
906     return size;
907 
908 fail:
909     return -1;
910 }
911 
912 static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf)
913 {
914     uint8_t event_code, media_status;
915 
916     media_status = 0;
917     if (s->tray_open) {
918         media_status = MS_TRAY_OPEN;
919     } else if (blk_is_inserted(s->qdev.conf.blk)) {
920         media_status = MS_MEDIA_PRESENT;
921     }
922 
923     /* Event notification descriptor */
924     event_code = MEC_NO_CHANGE;
925     if (media_status != MS_TRAY_OPEN) {
926         if (s->media_event) {
927             event_code = MEC_NEW_MEDIA;
928             s->media_event = false;
929         } else if (s->eject_request) {
930             event_code = MEC_EJECT_REQUESTED;
931             s->eject_request = false;
932         }
933     }
934 
935     outbuf[0] = event_code;
936     outbuf[1] = media_status;
937 
938     /* These fields are reserved, just clear them. */
939     outbuf[2] = 0;
940     outbuf[3] = 0;
941     return 4;
942 }
943 
944 static int scsi_get_event_status_notification(SCSIDiskState *s, SCSIDiskReq *r,
945                                               uint8_t *outbuf)
946 {
947     int size;
948     uint8_t *buf = r->req.cmd.buf;
949     uint8_t notification_class_request = buf[4];
950     if (s->qdev.type != TYPE_ROM) {
951         return -1;
952     }
953     if ((buf[1] & 1) == 0) {
954         /* asynchronous */
955         return -1;
956     }
957 
958     size = 4;
959     outbuf[0] = outbuf[1] = 0;
960     outbuf[3] = 1 << GESN_MEDIA; /* supported events */
961     if (notification_class_request & (1 << GESN_MEDIA)) {
962         outbuf[2] = GESN_MEDIA;
963         size += scsi_event_status_media(s, &outbuf[size]);
964     } else {
965         outbuf[2] = 0x80;
966     }
967     stw_be_p(outbuf, size - 4);
968     return size;
969 }
970 
971 static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf)
972 {
973     int current;
974 
975     if (s->qdev.type != TYPE_ROM) {
976         return -1;
977     }
978     current = media_is_dvd(s) ? MMC_PROFILE_DVD_ROM : MMC_PROFILE_CD_ROM;
979     memset(outbuf, 0, 40);
980     stl_be_p(&outbuf[0], 36); /* Bytes after the data length field */
981     stw_be_p(&outbuf[6], current);
982     /* outbuf[8] - outbuf[19]: Feature 0 - Profile list */
983     outbuf[10] = 0x03; /* persistent, current */
984     outbuf[11] = 8; /* two profiles */
985     stw_be_p(&outbuf[12], MMC_PROFILE_DVD_ROM);
986     outbuf[14] = (current == MMC_PROFILE_DVD_ROM);
987     stw_be_p(&outbuf[16], MMC_PROFILE_CD_ROM);
988     outbuf[18] = (current == MMC_PROFILE_CD_ROM);
989     /* outbuf[20] - outbuf[31]: Feature 1 - Core feature */
990     stw_be_p(&outbuf[20], 1);
991     outbuf[22] = 0x08 | 0x03; /* version 2, persistent, current */
992     outbuf[23] = 8;
993     stl_be_p(&outbuf[24], 1); /* SCSI */
994     outbuf[28] = 1; /* DBE = 1, mandatory */
995     /* outbuf[32] - outbuf[39]: Feature 3 - Removable media feature */
996     stw_be_p(&outbuf[32], 3);
997     outbuf[34] = 0x08 | 0x03; /* version 2, persistent, current */
998     outbuf[35] = 4;
999     outbuf[36] = 0x39; /* tray, load=1, eject=1, unlocked at powerup, lock=1 */
1000     /* TODO: Random readable, CD read, DVD read, drive serial number,
1001        power management */
1002     return 40;
1003 }
1004 
1005 static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
1006 {
1007     if (s->qdev.type != TYPE_ROM) {
1008         return -1;
1009     }
1010     memset(outbuf, 0, 8);
1011     outbuf[5] = 1; /* CD-ROM */
1012     return 8;
1013 }
1014 
1015 static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
1016                            int page_control)
1017 {
1018     static const int mode_sense_valid[0x3f] = {
1019         [MODE_PAGE_HD_GEOMETRY]            = (1 << TYPE_DISK),
1020         [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK),
1021         [MODE_PAGE_CACHING]                = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1022         [MODE_PAGE_R_W_ERROR]              = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1023         [MODE_PAGE_AUDIO_CTL]              = (1 << TYPE_ROM),
1024         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
1025     };
1026 
1027     uint8_t *p = *p_outbuf + 2;
1028     int length;
1029 
1030     if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
1031         return -1;
1032     }
1033 
1034     /*
1035      * If Changeable Values are requested, a mask denoting those mode parameters
1036      * that are changeable shall be returned. As we currently don't support
1037      * parameter changes via MODE_SELECT all bits are returned set to zero.
1038      * The buffer was already menset to zero by the caller of this function.
1039      *
1040      * The offsets here are off by two compared to the descriptions in the
1041      * SCSI specs, because those include a 2-byte header.  This is unfortunate,
1042      * but it is done so that offsets are consistent within our implementation
1043      * of MODE SENSE and MODE SELECT.  MODE SELECT has to deal with both
1044      * 2-byte and 4-byte headers.
1045      */
1046     switch (page) {
1047     case MODE_PAGE_HD_GEOMETRY:
1048         length = 0x16;
1049         if (page_control == 1) { /* Changeable Values */
1050             break;
1051         }
1052         /* if a geometry hint is available, use it */
1053         p[0] = (s->qdev.conf.cyls >> 16) & 0xff;
1054         p[1] = (s->qdev.conf.cyls >> 8) & 0xff;
1055         p[2] = s->qdev.conf.cyls & 0xff;
1056         p[3] = s->qdev.conf.heads & 0xff;
1057         /* Write precomp start cylinder, disabled */
1058         p[4] = (s->qdev.conf.cyls >> 16) & 0xff;
1059         p[5] = (s->qdev.conf.cyls >> 8) & 0xff;
1060         p[6] = s->qdev.conf.cyls & 0xff;
1061         /* Reduced current start cylinder, disabled */
1062         p[7] = (s->qdev.conf.cyls >> 16) & 0xff;
1063         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1064         p[9] = s->qdev.conf.cyls & 0xff;
1065         /* Device step rate [ns], 200ns */
1066         p[10] = 0;
1067         p[11] = 200;
1068         /* Landing zone cylinder */
1069         p[12] = 0xff;
1070         p[13] =  0xff;
1071         p[14] = 0xff;
1072         /* Medium rotation rate [rpm], 5400 rpm */
1073         p[18] = (5400 >> 8) & 0xff;
1074         p[19] = 5400 & 0xff;
1075         break;
1076 
1077     case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
1078         length = 0x1e;
1079         if (page_control == 1) { /* Changeable Values */
1080             break;
1081         }
1082         /* Transfer rate [kbit/s], 5Mbit/s */
1083         p[0] = 5000 >> 8;
1084         p[1] = 5000 & 0xff;
1085         /* if a geometry hint is available, use it */
1086         p[2] = s->qdev.conf.heads & 0xff;
1087         p[3] = s->qdev.conf.secs & 0xff;
1088         p[4] = s->qdev.blocksize >> 8;
1089         p[6] = (s->qdev.conf.cyls >> 8) & 0xff;
1090         p[7] = s->qdev.conf.cyls & 0xff;
1091         /* Write precomp start cylinder, disabled */
1092         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1093         p[9] = s->qdev.conf.cyls & 0xff;
1094         /* Reduced current start cylinder, disabled */
1095         p[10] = (s->qdev.conf.cyls >> 8) & 0xff;
1096         p[11] = s->qdev.conf.cyls & 0xff;
1097         /* Device step rate [100us], 100us */
1098         p[12] = 0;
1099         p[13] = 1;
1100         /* Device step pulse width [us], 1us */
1101         p[14] = 1;
1102         /* Device head settle delay [100us], 100us */
1103         p[15] = 0;
1104         p[16] = 1;
1105         /* Motor on delay [0.1s], 0.1s */
1106         p[17] = 1;
1107         /* Motor off delay [0.1s], 0.1s */
1108         p[18] = 1;
1109         /* Medium rotation rate [rpm], 5400 rpm */
1110         p[26] = (5400 >> 8) & 0xff;
1111         p[27] = 5400 & 0xff;
1112         break;
1113 
1114     case MODE_PAGE_CACHING:
1115         length = 0x12;
1116         if (page_control == 1 || /* Changeable Values */
1117             blk_enable_write_cache(s->qdev.conf.blk)) {
1118             p[0] = 4; /* WCE */
1119         }
1120         break;
1121 
1122     case MODE_PAGE_R_W_ERROR:
1123         length = 10;
1124         if (page_control == 1) { /* Changeable Values */
1125             break;
1126         }
1127         p[0] = 0x80; /* Automatic Write Reallocation Enabled */
1128         if (s->qdev.type == TYPE_ROM) {
1129             p[1] = 0x20; /* Read Retry Count */
1130         }
1131         break;
1132 
1133     case MODE_PAGE_AUDIO_CTL:
1134         length = 14;
1135         break;
1136 
1137     case MODE_PAGE_CAPABILITIES:
1138         length = 0x14;
1139         if (page_control == 1) { /* Changeable Values */
1140             break;
1141         }
1142 
1143         p[0] = 0x3b; /* CD-R & CD-RW read */
1144         p[1] = 0; /* Writing not supported */
1145         p[2] = 0x7f; /* Audio, composite, digital out,
1146                         mode 2 form 1&2, multi session */
1147         p[3] = 0xff; /* CD DA, DA accurate, RW supported,
1148                         RW corrected, C2 errors, ISRC,
1149                         UPC, Bar code */
1150         p[4] = 0x2d | (s->tray_locked ? 2 : 0);
1151         /* Locking supported, jumper present, eject, tray */
1152         p[5] = 0; /* no volume & mute control, no
1153                      changer */
1154         p[6] = (50 * 176) >> 8; /* 50x read speed */
1155         p[7] = (50 * 176) & 0xff;
1156         p[8] = 2 >> 8; /* Two volume levels */
1157         p[9] = 2 & 0xff;
1158         p[10] = 2048 >> 8; /* 2M buffer */
1159         p[11] = 2048 & 0xff;
1160         p[12] = (16 * 176) >> 8; /* 16x read speed current */
1161         p[13] = (16 * 176) & 0xff;
1162         p[16] = (16 * 176) >> 8; /* 16x write speed */
1163         p[17] = (16 * 176) & 0xff;
1164         p[18] = (16 * 176) >> 8; /* 16x write speed current */
1165         p[19] = (16 * 176) & 0xff;
1166         break;
1167 
1168     default:
1169         return -1;
1170     }
1171 
1172     assert(length < 256);
1173     (*p_outbuf)[0] = page;
1174     (*p_outbuf)[1] = length;
1175     *p_outbuf += length + 2;
1176     return length + 2;
1177 }
1178 
1179 static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
1180 {
1181     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1182     uint64_t nb_sectors;
1183     bool dbd;
1184     int page, buflen, ret, page_control;
1185     uint8_t *p;
1186     uint8_t dev_specific_param;
1187 
1188     dbd = (r->req.cmd.buf[1] & 0x8) != 0;
1189     page = r->req.cmd.buf[2] & 0x3f;
1190     page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
1191     DPRINTF("Mode Sense(%d) (page %d, xfer %zd, page_control %d)\n",
1192         (r->req.cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, r->req.cmd.xfer, page_control);
1193     memset(outbuf, 0, r->req.cmd.xfer);
1194     p = outbuf;
1195 
1196     if (s->qdev.type == TYPE_DISK) {
1197         dev_specific_param = s->features & (1 << SCSI_DISK_F_DPOFUA) ? 0x10 : 0;
1198         if (blk_is_read_only(s->qdev.conf.blk)) {
1199             dev_specific_param |= 0x80; /* Readonly.  */
1200         }
1201     } else {
1202         /* MMC prescribes that CD/DVD drives have no block descriptors,
1203          * and defines no device-specific parameter.  */
1204         dev_specific_param = 0x00;
1205         dbd = true;
1206     }
1207 
1208     if (r->req.cmd.buf[0] == MODE_SENSE) {
1209         p[1] = 0; /* Default media type.  */
1210         p[2] = dev_specific_param;
1211         p[3] = 0; /* Block descriptor length.  */
1212         p += 4;
1213     } else { /* MODE_SENSE_10 */
1214         p[2] = 0; /* Default media type.  */
1215         p[3] = dev_specific_param;
1216         p[6] = p[7] = 0; /* Block descriptor length.  */
1217         p += 8;
1218     }
1219 
1220     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1221     if (!dbd && nb_sectors) {
1222         if (r->req.cmd.buf[0] == MODE_SENSE) {
1223             outbuf[3] = 8; /* Block descriptor length  */
1224         } else { /* MODE_SENSE_10 */
1225             outbuf[7] = 8; /* Block descriptor length  */
1226         }
1227         nb_sectors /= (s->qdev.blocksize / 512);
1228         if (nb_sectors > 0xffffff) {
1229             nb_sectors = 0;
1230         }
1231         p[0] = 0; /* media density code */
1232         p[1] = (nb_sectors >> 16) & 0xff;
1233         p[2] = (nb_sectors >> 8) & 0xff;
1234         p[3] = nb_sectors & 0xff;
1235         p[4] = 0; /* reserved */
1236         p[5] = 0; /* bytes 5-7 are the sector size in bytes */
1237         p[6] = s->qdev.blocksize >> 8;
1238         p[7] = 0;
1239         p += 8;
1240     }
1241 
1242     if (page_control == 3) {
1243         /* Saved Values */
1244         scsi_check_condition(r, SENSE_CODE(SAVING_PARAMS_NOT_SUPPORTED));
1245         return -1;
1246     }
1247 
1248     if (page == 0x3f) {
1249         for (page = 0; page <= 0x3e; page++) {
1250             mode_sense_page(s, page, &p, page_control);
1251         }
1252     } else {
1253         ret = mode_sense_page(s, page, &p, page_control);
1254         if (ret == -1) {
1255             return -1;
1256         }
1257     }
1258 
1259     buflen = p - outbuf;
1260     /*
1261      * The mode data length field specifies the length in bytes of the
1262      * following data that is available to be transferred. The mode data
1263      * length does not include itself.
1264      */
1265     if (r->req.cmd.buf[0] == MODE_SENSE) {
1266         outbuf[0] = buflen - 1;
1267     } else { /* MODE_SENSE_10 */
1268         outbuf[0] = ((buflen - 2) >> 8) & 0xff;
1269         outbuf[1] = (buflen - 2) & 0xff;
1270     }
1271     return buflen;
1272 }
1273 
1274 static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
1275 {
1276     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1277     int start_track, format, msf, toclen;
1278     uint64_t nb_sectors;
1279 
1280     msf = req->cmd.buf[1] & 2;
1281     format = req->cmd.buf[2] & 0xf;
1282     start_track = req->cmd.buf[6];
1283     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1284     DPRINTF("Read TOC (track %d format %d msf %d)\n", start_track, format, msf >> 1);
1285     nb_sectors /= s->qdev.blocksize / 512;
1286     switch (format) {
1287     case 0:
1288         toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track);
1289         break;
1290     case 1:
1291         /* multi session : only a single session defined */
1292         toclen = 12;
1293         memset(outbuf, 0, 12);
1294         outbuf[1] = 0x0a;
1295         outbuf[2] = 0x01;
1296         outbuf[3] = 0x01;
1297         break;
1298     case 2:
1299         toclen = cdrom_read_toc_raw(nb_sectors, outbuf, msf, start_track);
1300         break;
1301     default:
1302         return -1;
1303     }
1304     return toclen;
1305 }
1306 
1307 static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
1308 {
1309     SCSIRequest *req = &r->req;
1310     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1311     bool start = req->cmd.buf[4] & 1;
1312     bool loej = req->cmd.buf[4] & 2; /* load on start, eject on !start */
1313     int pwrcnd = req->cmd.buf[4] & 0xf0;
1314 
1315     if (pwrcnd) {
1316         /* eject/load only happens for power condition == 0 */
1317         return 0;
1318     }
1319 
1320     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && loej) {
1321         if (!start && !s->tray_open && s->tray_locked) {
1322             scsi_check_condition(r,
1323                                  blk_is_inserted(s->qdev.conf.blk)
1324                                  ? SENSE_CODE(ILLEGAL_REQ_REMOVAL_PREVENTED)
1325                                  : SENSE_CODE(NOT_READY_REMOVAL_PREVENTED));
1326             return -1;
1327         }
1328 
1329         if (s->tray_open != !start) {
1330             blk_eject(s->qdev.conf.blk, !start);
1331             s->tray_open = !start;
1332         }
1333     }
1334     return 0;
1335 }
1336 
1337 static void scsi_disk_emulate_read_data(SCSIRequest *req)
1338 {
1339     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1340     int buflen = r->iov.iov_len;
1341 
1342     if (buflen) {
1343         DPRINTF("Read buf_len=%d\n", buflen);
1344         r->iov.iov_len = 0;
1345         r->started = true;
1346         scsi_req_data(&r->req, buflen);
1347         return;
1348     }
1349 
1350     /* This also clears the sense buffer for REQUEST SENSE.  */
1351     scsi_req_complete(&r->req, GOOD);
1352 }
1353 
1354 static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
1355                                        uint8_t *inbuf, int inlen)
1356 {
1357     uint8_t mode_current[SCSI_MAX_MODE_LEN];
1358     uint8_t mode_changeable[SCSI_MAX_MODE_LEN];
1359     uint8_t *p;
1360     int len, expected_len, changeable_len, i;
1361 
1362     /* The input buffer does not include the page header, so it is
1363      * off by 2 bytes.
1364      */
1365     expected_len = inlen + 2;
1366     if (expected_len > SCSI_MAX_MODE_LEN) {
1367         return -1;
1368     }
1369 
1370     p = mode_current;
1371     memset(mode_current, 0, inlen + 2);
1372     len = mode_sense_page(s, page, &p, 0);
1373     if (len < 0 || len != expected_len) {
1374         return -1;
1375     }
1376 
1377     p = mode_changeable;
1378     memset(mode_changeable, 0, inlen + 2);
1379     changeable_len = mode_sense_page(s, page, &p, 1);
1380     assert(changeable_len == len);
1381 
1382     /* Check that unchangeable bits are the same as what MODE SENSE
1383      * would return.
1384      */
1385     for (i = 2; i < len; i++) {
1386         if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) {
1387             return -1;
1388         }
1389     }
1390     return 0;
1391 }
1392 
1393 static void scsi_disk_apply_mode_select(SCSIDiskState *s, int page, uint8_t *p)
1394 {
1395     switch (page) {
1396     case MODE_PAGE_CACHING:
1397         blk_set_enable_write_cache(s->qdev.conf.blk, (p[0] & 4) != 0);
1398         break;
1399 
1400     default:
1401         break;
1402     }
1403 }
1404 
1405 static int mode_select_pages(SCSIDiskReq *r, uint8_t *p, int len, bool change)
1406 {
1407     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1408 
1409     while (len > 0) {
1410         int page, subpage, page_len;
1411 
1412         /* Parse both possible formats for the mode page headers.  */
1413         page = p[0] & 0x3f;
1414         if (p[0] & 0x40) {
1415             if (len < 4) {
1416                 goto invalid_param_len;
1417             }
1418             subpage = p[1];
1419             page_len = lduw_be_p(&p[2]);
1420             p += 4;
1421             len -= 4;
1422         } else {
1423             if (len < 2) {
1424                 goto invalid_param_len;
1425             }
1426             subpage = 0;
1427             page_len = p[1];
1428             p += 2;
1429             len -= 2;
1430         }
1431 
1432         if (subpage) {
1433             goto invalid_param;
1434         }
1435         if (page_len > len) {
1436             goto invalid_param_len;
1437         }
1438 
1439         if (!change) {
1440             if (scsi_disk_check_mode_select(s, page, p, page_len) < 0) {
1441                 goto invalid_param;
1442             }
1443         } else {
1444             scsi_disk_apply_mode_select(s, page, p);
1445         }
1446 
1447         p += page_len;
1448         len -= page_len;
1449     }
1450     return 0;
1451 
1452 invalid_param:
1453     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1454     return -1;
1455 
1456 invalid_param_len:
1457     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1458     return -1;
1459 }
1460 
1461 static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
1462 {
1463     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1464     uint8_t *p = inbuf;
1465     int cmd = r->req.cmd.buf[0];
1466     int len = r->req.cmd.xfer;
1467     int hdr_len = (cmd == MODE_SELECT ? 4 : 8);
1468     int bd_len;
1469     int pass;
1470 
1471     /* We only support PF=1, SP=0.  */
1472     if ((r->req.cmd.buf[1] & 0x11) != 0x10) {
1473         goto invalid_field;
1474     }
1475 
1476     if (len < hdr_len) {
1477         goto invalid_param_len;
1478     }
1479 
1480     bd_len = (cmd == MODE_SELECT ? p[3] : lduw_be_p(&p[6]));
1481     len -= hdr_len;
1482     p += hdr_len;
1483     if (len < bd_len) {
1484         goto invalid_param_len;
1485     }
1486     if (bd_len != 0 && bd_len != 8) {
1487         goto invalid_param;
1488     }
1489 
1490     len -= bd_len;
1491     p += bd_len;
1492 
1493     /* Ensure no change is made if there is an error!  */
1494     for (pass = 0; pass < 2; pass++) {
1495         if (mode_select_pages(r, p, len, pass == 1) < 0) {
1496             assert(pass == 0);
1497             return;
1498         }
1499     }
1500     if (!blk_enable_write_cache(s->qdev.conf.blk)) {
1501         /* The request is used as the AIO opaque value, so add a ref.  */
1502         scsi_req_ref(&r->req);
1503         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
1504                          BLOCK_ACCT_FLUSH);
1505         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
1506         return;
1507     }
1508 
1509     scsi_req_complete(&r->req, GOOD);
1510     return;
1511 
1512 invalid_param:
1513     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1514     return;
1515 
1516 invalid_param_len:
1517     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1518     return;
1519 
1520 invalid_field:
1521     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1522 }
1523 
1524 static inline bool check_lba_range(SCSIDiskState *s,
1525                                    uint64_t sector_num, uint32_t nb_sectors)
1526 {
1527     /*
1528      * The first line tests that no overflow happens when computing the last
1529      * sector.  The second line tests that the last accessed sector is in
1530      * range.
1531      *
1532      * Careful, the computations should not underflow for nb_sectors == 0,
1533      * and a 0-block read to the first LBA beyond the end of device is
1534      * valid.
1535      */
1536     return (sector_num <= sector_num + nb_sectors &&
1537             sector_num + nb_sectors <= s->qdev.max_lba + 1);
1538 }
1539 
1540 typedef struct UnmapCBData {
1541     SCSIDiskReq *r;
1542     uint8_t *inbuf;
1543     int count;
1544 } UnmapCBData;
1545 
1546 static void scsi_unmap_complete(void *opaque, int ret)
1547 {
1548     UnmapCBData *data = opaque;
1549     SCSIDiskReq *r = data->r;
1550     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1551     uint64_t sector_num;
1552     uint32_t nb_sectors;
1553 
1554     r->req.aiocb = NULL;
1555     if (r->req.io_canceled) {
1556         scsi_req_cancel_complete(&r->req);
1557         goto done;
1558     }
1559 
1560     if (ret < 0) {
1561         if (scsi_handle_rw_error(r, -ret)) {
1562             goto done;
1563         }
1564     }
1565 
1566     if (data->count > 0) {
1567         sector_num = ldq_be_p(&data->inbuf[0]);
1568         nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
1569         if (!check_lba_range(s, sector_num, nb_sectors)) {
1570             scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1571             goto done;
1572         }
1573 
1574         r->req.aiocb = blk_aio_discard(s->qdev.conf.blk,
1575                                        sector_num * (s->qdev.blocksize / 512),
1576                                        nb_sectors * (s->qdev.blocksize / 512),
1577                                        scsi_unmap_complete, data);
1578         data->count--;
1579         data->inbuf += 16;
1580         return;
1581     }
1582 
1583     scsi_req_complete(&r->req, GOOD);
1584 
1585 done:
1586     scsi_req_unref(&r->req);
1587     g_free(data);
1588 }
1589 
1590 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
1591 {
1592     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1593     uint8_t *p = inbuf;
1594     int len = r->req.cmd.xfer;
1595     UnmapCBData *data;
1596 
1597     /* Reject ANCHOR=1.  */
1598     if (r->req.cmd.buf[1] & 0x1) {
1599         goto invalid_field;
1600     }
1601 
1602     if (len < 8) {
1603         goto invalid_param_len;
1604     }
1605     if (len < lduw_be_p(&p[0]) + 2) {
1606         goto invalid_param_len;
1607     }
1608     if (len < lduw_be_p(&p[2]) + 8) {
1609         goto invalid_param_len;
1610     }
1611     if (lduw_be_p(&p[2]) & 15) {
1612         goto invalid_param_len;
1613     }
1614 
1615     if (blk_is_read_only(s->qdev.conf.blk)) {
1616         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1617         return;
1618     }
1619 
1620     data = g_new0(UnmapCBData, 1);
1621     data->r = r;
1622     data->inbuf = &p[8];
1623     data->count = lduw_be_p(&p[2]) >> 4;
1624 
1625     /* The matching unref is in scsi_unmap_complete, before data is freed.  */
1626     scsi_req_ref(&r->req);
1627     scsi_unmap_complete(data, 0);
1628     return;
1629 
1630 invalid_param_len:
1631     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1632     return;
1633 
1634 invalid_field:
1635     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1636 }
1637 
1638 typedef struct WriteSameCBData {
1639     SCSIDiskReq *r;
1640     int64_t sector;
1641     int nb_sectors;
1642     QEMUIOVector qiov;
1643     struct iovec iov;
1644 } WriteSameCBData;
1645 
1646 static void scsi_write_same_complete(void *opaque, int ret)
1647 {
1648     WriteSameCBData *data = opaque;
1649     SCSIDiskReq *r = data->r;
1650     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1651 
1652     assert(r->req.aiocb != NULL);
1653     r->req.aiocb = NULL;
1654     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1655     if (r->req.io_canceled) {
1656         scsi_req_cancel_complete(&r->req);
1657         goto done;
1658     }
1659 
1660     if (ret < 0) {
1661         if (scsi_handle_rw_error(r, -ret)) {
1662             goto done;
1663         }
1664     }
1665 
1666     data->nb_sectors -= data->iov.iov_len / 512;
1667     data->sector += data->iov.iov_len / 512;
1668     data->iov.iov_len = MIN(data->nb_sectors * 512, data->iov.iov_len);
1669     if (data->iov.iov_len) {
1670         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1671                          data->iov.iov_len, BLOCK_ACCT_WRITE);
1672         r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector,
1673                                       &data->qiov, data->iov.iov_len / 512,
1674                                       scsi_write_same_complete, data);
1675         return;
1676     }
1677 
1678     scsi_req_complete(&r->req, GOOD);
1679 
1680 done:
1681     scsi_req_unref(&r->req);
1682     qemu_vfree(data->iov.iov_base);
1683     g_free(data);
1684 }
1685 
1686 static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
1687 {
1688     SCSIRequest *req = &r->req;
1689     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1690     uint32_t nb_sectors = scsi_data_cdb_xfer(r->req.cmd.buf);
1691     WriteSameCBData *data;
1692     uint8_t *buf;
1693     int i;
1694 
1695     /* Fail if PBDATA=1 or LBDATA=1 or ANCHOR=1.  */
1696     if (nb_sectors == 0 || (req->cmd.buf[1] & 0x16)) {
1697         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1698         return;
1699     }
1700 
1701     if (blk_is_read_only(s->qdev.conf.blk)) {
1702         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1703         return;
1704     }
1705     if (!check_lba_range(s, r->req.cmd.lba, nb_sectors)) {
1706         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1707         return;
1708     }
1709 
1710     if (buffer_is_zero(inbuf, s->qdev.blocksize)) {
1711         int flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0;
1712 
1713         /* The request is used as the AIO opaque value, so add a ref.  */
1714         scsi_req_ref(&r->req);
1715         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1716                          nb_sectors * s->qdev.blocksize,
1717                         BLOCK_ACCT_WRITE);
1718         r->req.aiocb = blk_aio_write_zeroes(s->qdev.conf.blk,
1719                                 r->req.cmd.lba * (s->qdev.blocksize / 512),
1720                                 nb_sectors * (s->qdev.blocksize / 512),
1721                                 flags, scsi_aio_complete, r);
1722         return;
1723     }
1724 
1725     data = g_new0(WriteSameCBData, 1);
1726     data->r = r;
1727     data->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
1728     data->nb_sectors = nb_sectors * (s->qdev.blocksize / 512);
1729     data->iov.iov_len = MIN(data->nb_sectors * 512, SCSI_WRITE_SAME_MAX);
1730     data->iov.iov_base = buf = blk_blockalign(s->qdev.conf.blk,
1731                                               data->iov.iov_len);
1732     qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1733 
1734     for (i = 0; i < data->iov.iov_len; i += s->qdev.blocksize) {
1735         memcpy(&buf[i], inbuf, s->qdev.blocksize);
1736     }
1737 
1738     scsi_req_ref(&r->req);
1739     block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1740                      data->iov.iov_len, BLOCK_ACCT_WRITE);
1741     r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector,
1742                                   &data->qiov, data->iov.iov_len / 512,
1743                                   scsi_write_same_complete, data);
1744 }
1745 
1746 static void scsi_disk_emulate_write_data(SCSIRequest *req)
1747 {
1748     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1749 
1750     if (r->iov.iov_len) {
1751         int buflen = r->iov.iov_len;
1752         DPRINTF("Write buf_len=%d\n", buflen);
1753         r->iov.iov_len = 0;
1754         scsi_req_data(&r->req, buflen);
1755         return;
1756     }
1757 
1758     switch (req->cmd.buf[0]) {
1759     case MODE_SELECT:
1760     case MODE_SELECT_10:
1761         /* This also clears the sense buffer for REQUEST SENSE.  */
1762         scsi_disk_emulate_mode_select(r, r->iov.iov_base);
1763         break;
1764 
1765     case UNMAP:
1766         scsi_disk_emulate_unmap(r, r->iov.iov_base);
1767         break;
1768 
1769     case VERIFY_10:
1770     case VERIFY_12:
1771     case VERIFY_16:
1772         if (r->req.status == -1) {
1773             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1774         }
1775         break;
1776 
1777     case WRITE_SAME_10:
1778     case WRITE_SAME_16:
1779         scsi_disk_emulate_write_same(r, r->iov.iov_base);
1780         break;
1781 
1782     default:
1783         abort();
1784     }
1785 }
1786 
1787 static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
1788 {
1789     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1790     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1791     uint64_t nb_sectors;
1792     uint8_t *outbuf;
1793     int buflen;
1794 
1795     switch (req->cmd.buf[0]) {
1796     case INQUIRY:
1797     case MODE_SENSE:
1798     case MODE_SENSE_10:
1799     case RESERVE:
1800     case RESERVE_10:
1801     case RELEASE:
1802     case RELEASE_10:
1803     case START_STOP:
1804     case ALLOW_MEDIUM_REMOVAL:
1805     case GET_CONFIGURATION:
1806     case GET_EVENT_STATUS_NOTIFICATION:
1807     case MECHANISM_STATUS:
1808     case REQUEST_SENSE:
1809         break;
1810 
1811     default:
1812         if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) {
1813             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
1814             return 0;
1815         }
1816         break;
1817     }
1818 
1819     /*
1820      * FIXME: we shouldn't return anything bigger than 4k, but the code
1821      * requires the buffer to be as big as req->cmd.xfer in several
1822      * places.  So, do not allow CDBs with a very large ALLOCATION
1823      * LENGTH.  The real fix would be to modify scsi_read_data and
1824      * dma_buf_read, so that they return data beyond the buflen
1825      * as all zeros.
1826      */
1827     if (req->cmd.xfer > 65536) {
1828         goto illegal_request;
1829     }
1830     r->buflen = MAX(4096, req->cmd.xfer);
1831 
1832     if (!r->iov.iov_base) {
1833         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
1834     }
1835 
1836     buflen = req->cmd.xfer;
1837     outbuf = r->iov.iov_base;
1838     memset(outbuf, 0, r->buflen);
1839     switch (req->cmd.buf[0]) {
1840     case TEST_UNIT_READY:
1841         assert(!s->tray_open && blk_is_inserted(s->qdev.conf.blk));
1842         break;
1843     case INQUIRY:
1844         buflen = scsi_disk_emulate_inquiry(req, outbuf);
1845         if (buflen < 0) {
1846             goto illegal_request;
1847         }
1848         break;
1849     case MODE_SENSE:
1850     case MODE_SENSE_10:
1851         buflen = scsi_disk_emulate_mode_sense(r, outbuf);
1852         if (buflen < 0) {
1853             goto illegal_request;
1854         }
1855         break;
1856     case READ_TOC:
1857         buflen = scsi_disk_emulate_read_toc(req, outbuf);
1858         if (buflen < 0) {
1859             goto illegal_request;
1860         }
1861         break;
1862     case RESERVE:
1863         if (req->cmd.buf[1] & 1) {
1864             goto illegal_request;
1865         }
1866         break;
1867     case RESERVE_10:
1868         if (req->cmd.buf[1] & 3) {
1869             goto illegal_request;
1870         }
1871         break;
1872     case RELEASE:
1873         if (req->cmd.buf[1] & 1) {
1874             goto illegal_request;
1875         }
1876         break;
1877     case RELEASE_10:
1878         if (req->cmd.buf[1] & 3) {
1879             goto illegal_request;
1880         }
1881         break;
1882     case START_STOP:
1883         if (scsi_disk_emulate_start_stop(r) < 0) {
1884             return 0;
1885         }
1886         break;
1887     case ALLOW_MEDIUM_REMOVAL:
1888         s->tray_locked = req->cmd.buf[4] & 1;
1889         blk_lock_medium(s->qdev.conf.blk, req->cmd.buf[4] & 1);
1890         break;
1891     case READ_CAPACITY_10:
1892         /* The normal LEN field for this command is zero.  */
1893         memset(outbuf, 0, 8);
1894         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1895         if (!nb_sectors) {
1896             scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
1897             return 0;
1898         }
1899         if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) {
1900             goto illegal_request;
1901         }
1902         nb_sectors /= s->qdev.blocksize / 512;
1903         /* Returned value is the address of the last sector.  */
1904         nb_sectors--;
1905         /* Remember the new size for read/write sanity checking. */
1906         s->qdev.max_lba = nb_sectors;
1907         /* Clip to 2TB, instead of returning capacity modulo 2TB. */
1908         if (nb_sectors > UINT32_MAX) {
1909             nb_sectors = UINT32_MAX;
1910         }
1911         outbuf[0] = (nb_sectors >> 24) & 0xff;
1912         outbuf[1] = (nb_sectors >> 16) & 0xff;
1913         outbuf[2] = (nb_sectors >> 8) & 0xff;
1914         outbuf[3] = nb_sectors & 0xff;
1915         outbuf[4] = 0;
1916         outbuf[5] = 0;
1917         outbuf[6] = s->qdev.blocksize >> 8;
1918         outbuf[7] = 0;
1919         break;
1920     case REQUEST_SENSE:
1921         /* Just return "NO SENSE".  */
1922         buflen = scsi_build_sense(NULL, 0, outbuf, r->buflen,
1923                                   (req->cmd.buf[1] & 1) == 0);
1924         if (buflen < 0) {
1925             goto illegal_request;
1926         }
1927         break;
1928     case MECHANISM_STATUS:
1929         buflen = scsi_emulate_mechanism_status(s, outbuf);
1930         if (buflen < 0) {
1931             goto illegal_request;
1932         }
1933         break;
1934     case GET_CONFIGURATION:
1935         buflen = scsi_get_configuration(s, outbuf);
1936         if (buflen < 0) {
1937             goto illegal_request;
1938         }
1939         break;
1940     case GET_EVENT_STATUS_NOTIFICATION:
1941         buflen = scsi_get_event_status_notification(s, r, outbuf);
1942         if (buflen < 0) {
1943             goto illegal_request;
1944         }
1945         break;
1946     case READ_DISC_INFORMATION:
1947         buflen = scsi_read_disc_information(s, r, outbuf);
1948         if (buflen < 0) {
1949             goto illegal_request;
1950         }
1951         break;
1952     case READ_DVD_STRUCTURE:
1953         buflen = scsi_read_dvd_structure(s, r, outbuf);
1954         if (buflen < 0) {
1955             goto illegal_request;
1956         }
1957         break;
1958     case SERVICE_ACTION_IN_16:
1959         /* Service Action In subcommands. */
1960         if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
1961             DPRINTF("SAI READ CAPACITY(16)\n");
1962             memset(outbuf, 0, req->cmd.xfer);
1963             blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1964             if (!nb_sectors) {
1965                 scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
1966                 return 0;
1967             }
1968             if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) {
1969                 goto illegal_request;
1970             }
1971             nb_sectors /= s->qdev.blocksize / 512;
1972             /* Returned value is the address of the last sector.  */
1973             nb_sectors--;
1974             /* Remember the new size for read/write sanity checking. */
1975             s->qdev.max_lba = nb_sectors;
1976             outbuf[0] = (nb_sectors >> 56) & 0xff;
1977             outbuf[1] = (nb_sectors >> 48) & 0xff;
1978             outbuf[2] = (nb_sectors >> 40) & 0xff;
1979             outbuf[3] = (nb_sectors >> 32) & 0xff;
1980             outbuf[4] = (nb_sectors >> 24) & 0xff;
1981             outbuf[5] = (nb_sectors >> 16) & 0xff;
1982             outbuf[6] = (nb_sectors >> 8) & 0xff;
1983             outbuf[7] = nb_sectors & 0xff;
1984             outbuf[8] = 0;
1985             outbuf[9] = 0;
1986             outbuf[10] = s->qdev.blocksize >> 8;
1987             outbuf[11] = 0;
1988             outbuf[12] = 0;
1989             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
1990 
1991             /* set TPE bit if the format supports discard */
1992             if (s->qdev.conf.discard_granularity) {
1993                 outbuf[14] = 0x80;
1994             }
1995 
1996             /* Protection, exponent and lowest lba field left blank. */
1997             break;
1998         }
1999         DPRINTF("Unsupported Service Action In\n");
2000         goto illegal_request;
2001     case SYNCHRONIZE_CACHE:
2002         /* The request is used as the AIO opaque value, so add a ref.  */
2003         scsi_req_ref(&r->req);
2004         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
2005                          BLOCK_ACCT_FLUSH);
2006         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
2007         return 0;
2008     case SEEK_10:
2009         DPRINTF("Seek(10) (sector %" PRId64 ")\n", r->req.cmd.lba);
2010         if (r->req.cmd.lba > s->qdev.max_lba) {
2011             goto illegal_lba;
2012         }
2013         break;
2014     case MODE_SELECT:
2015         DPRINTF("Mode Select(6) (len %lu)\n", (long)r->req.cmd.xfer);
2016         break;
2017     case MODE_SELECT_10:
2018         DPRINTF("Mode Select(10) (len %lu)\n", (long)r->req.cmd.xfer);
2019         break;
2020     case UNMAP:
2021         DPRINTF("Unmap (len %lu)\n", (long)r->req.cmd.xfer);
2022         break;
2023     case VERIFY_10:
2024     case VERIFY_12:
2025     case VERIFY_16:
2026         DPRINTF("Verify (bytchk %d)\n", (req->cmd.buf[1] >> 1) & 3);
2027         if (req->cmd.buf[1] & 6) {
2028             goto illegal_request;
2029         }
2030         break;
2031     case WRITE_SAME_10:
2032     case WRITE_SAME_16:
2033         DPRINTF("WRITE SAME %d (len %lu)\n",
2034                 req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16,
2035                 (long)r->req.cmd.xfer);
2036         break;
2037     default:
2038         DPRINTF("Unknown SCSI command (%2.2x=%s)\n", buf[0],
2039                 scsi_command_name(buf[0]));
2040         scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
2041         return 0;
2042     }
2043     assert(!r->req.aiocb);
2044     r->iov.iov_len = MIN(r->buflen, req->cmd.xfer);
2045     if (r->iov.iov_len == 0) {
2046         scsi_req_complete(&r->req, GOOD);
2047     }
2048     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2049         assert(r->iov.iov_len == req->cmd.xfer);
2050         return -r->iov.iov_len;
2051     } else {
2052         return r->iov.iov_len;
2053     }
2054 
2055 illegal_request:
2056     if (r->req.status == -1) {
2057         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2058     }
2059     return 0;
2060 
2061 illegal_lba:
2062     scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2063     return 0;
2064 }
2065 
2066 /* Execute a scsi command.  Returns the length of the data expected by the
2067    command.  This will be Positive for data transfers from the device
2068    (eg. disk reads), negative for transfers to the device (eg. disk writes),
2069    and zero if the command does not transfer any data.  */
2070 
2071 static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
2072 {
2073     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2074     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2075     uint32_t len;
2076     uint8_t command;
2077 
2078     command = buf[0];
2079 
2080     if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) {
2081         scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2082         return 0;
2083     }
2084 
2085     len = scsi_data_cdb_xfer(r->req.cmd.buf);
2086     switch (command) {
2087     case READ_6:
2088     case READ_10:
2089     case READ_12:
2090     case READ_16:
2091         DPRINTF("Read (sector %" PRId64 ", count %u)\n", r->req.cmd.lba, len);
2092         if (r->req.cmd.buf[1] & 0xe0) {
2093             goto illegal_request;
2094         }
2095         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2096             goto illegal_lba;
2097         }
2098         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
2099         r->sector_count = len * (s->qdev.blocksize / 512);
2100         break;
2101     case WRITE_6:
2102     case WRITE_10:
2103     case WRITE_12:
2104     case WRITE_16:
2105     case WRITE_VERIFY_10:
2106     case WRITE_VERIFY_12:
2107     case WRITE_VERIFY_16:
2108         if (blk_is_read_only(s->qdev.conf.blk)) {
2109             scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
2110             return 0;
2111         }
2112         DPRINTF("Write %s(sector %" PRId64 ", count %u)\n",
2113                 (command & 0xe) == 0xe ? "And Verify " : "",
2114                 r->req.cmd.lba, len);
2115         if (r->req.cmd.buf[1] & 0xe0) {
2116             goto illegal_request;
2117         }
2118         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2119             goto illegal_lba;
2120         }
2121         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
2122         r->sector_count = len * (s->qdev.blocksize / 512);
2123         break;
2124     default:
2125         abort();
2126     illegal_request:
2127         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2128         return 0;
2129     illegal_lba:
2130         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2131         return 0;
2132     }
2133     if (r->sector_count == 0) {
2134         scsi_req_complete(&r->req, GOOD);
2135     }
2136     assert(r->iov.iov_len == 0);
2137     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2138         return -r->sector_count * 512;
2139     } else {
2140         return r->sector_count * 512;
2141     }
2142 }
2143 
2144 static void scsi_disk_reset(DeviceState *dev)
2145 {
2146     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev);
2147     uint64_t nb_sectors;
2148 
2149     scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
2150 
2151     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2152     nb_sectors /= s->qdev.blocksize / 512;
2153     if (nb_sectors) {
2154         nb_sectors--;
2155     }
2156     s->qdev.max_lba = nb_sectors;
2157     /* reset tray statuses */
2158     s->tray_locked = 0;
2159     s->tray_open = 0;
2160 }
2161 
2162 static void scsi_disk_resize_cb(void *opaque)
2163 {
2164     SCSIDiskState *s = opaque;
2165 
2166     /* SPC lists this sense code as available only for
2167      * direct-access devices.
2168      */
2169     if (s->qdev.type == TYPE_DISK) {
2170         scsi_device_report_change(&s->qdev, SENSE_CODE(CAPACITY_CHANGED));
2171     }
2172 }
2173 
2174 static void scsi_cd_change_media_cb(void *opaque, bool load)
2175 {
2176     SCSIDiskState *s = opaque;
2177 
2178     /*
2179      * When a CD gets changed, we have to report an ejected state and
2180      * then a loaded state to guests so that they detect tray
2181      * open/close and media change events.  Guests that do not use
2182      * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close
2183      * states rely on this behavior.
2184      *
2185      * media_changed governs the state machine used for unit attention
2186      * report.  media_event is used by GET EVENT STATUS NOTIFICATION.
2187      */
2188     s->media_changed = load;
2189     s->tray_open = !load;
2190     scsi_device_set_ua(&s->qdev, SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM));
2191     s->media_event = true;
2192     s->eject_request = false;
2193 }
2194 
2195 static void scsi_cd_eject_request_cb(void *opaque, bool force)
2196 {
2197     SCSIDiskState *s = opaque;
2198 
2199     s->eject_request = true;
2200     if (force) {
2201         s->tray_locked = false;
2202     }
2203 }
2204 
2205 static bool scsi_cd_is_tray_open(void *opaque)
2206 {
2207     return ((SCSIDiskState *)opaque)->tray_open;
2208 }
2209 
2210 static bool scsi_cd_is_medium_locked(void *opaque)
2211 {
2212     return ((SCSIDiskState *)opaque)->tray_locked;
2213 }
2214 
2215 static const BlockDevOps scsi_disk_removable_block_ops = {
2216     .change_media_cb = scsi_cd_change_media_cb,
2217     .eject_request_cb = scsi_cd_eject_request_cb,
2218     .is_tray_open = scsi_cd_is_tray_open,
2219     .is_medium_locked = scsi_cd_is_medium_locked,
2220 
2221     .resize_cb = scsi_disk_resize_cb,
2222 };
2223 
2224 static const BlockDevOps scsi_disk_block_ops = {
2225     .resize_cb = scsi_disk_resize_cb,
2226 };
2227 
2228 static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
2229 {
2230     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2231     if (s->media_changed) {
2232         s->media_changed = false;
2233         scsi_device_set_ua(&s->qdev, SENSE_CODE(MEDIUM_CHANGED));
2234     }
2235 }
2236 
2237 static void scsi_realize(SCSIDevice *dev, Error **errp)
2238 {
2239     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2240     Error *err = NULL;
2241 
2242     if (!s->qdev.conf.blk) {
2243         error_setg(errp, "drive property not set");
2244         return;
2245     }
2246 
2247     if (!(s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2248         !blk_is_inserted(s->qdev.conf.blk)) {
2249         error_setg(errp, "Device needs media, but drive is empty");
2250         return;
2251     }
2252 
2253     blkconf_serial(&s->qdev.conf, &s->serial);
2254     if (dev->type == TYPE_DISK) {
2255         blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, &err);
2256         if (err) {
2257             error_propagate(errp, err);
2258             return;
2259         }
2260     }
2261 
2262     if (s->qdev.conf.discard_granularity == -1) {
2263         s->qdev.conf.discard_granularity =
2264             MAX(s->qdev.conf.logical_block_size, DEFAULT_DISCARD_GRANULARITY);
2265     }
2266 
2267     if (!s->version) {
2268         s->version = g_strdup(qemu_get_version());
2269     }
2270     if (!s->vendor) {
2271         s->vendor = g_strdup("QEMU");
2272     }
2273 
2274     if (blk_is_sg(s->qdev.conf.blk)) {
2275         error_setg(errp, "unwanted /dev/sg*");
2276         return;
2277     }
2278 
2279     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2280             !(s->features & (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS))) {
2281         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_removable_block_ops, s);
2282     } else {
2283         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_block_ops, s);
2284     }
2285     blk_set_guest_block_size(s->qdev.conf.blk, s->qdev.blocksize);
2286 
2287     blk_iostatus_enable(s->qdev.conf.blk);
2288 }
2289 
2290 static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
2291 {
2292     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2293     s->qdev.blocksize = s->qdev.conf.logical_block_size;
2294     s->qdev.type = TYPE_DISK;
2295     if (!s->product) {
2296         s->product = g_strdup("QEMU HARDDISK");
2297     }
2298     scsi_realize(&s->qdev, errp);
2299 }
2300 
2301 static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
2302 {
2303     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2304     s->qdev.blocksize = 2048;
2305     s->qdev.type = TYPE_ROM;
2306     s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2307     if (!s->product) {
2308         s->product = g_strdup("QEMU CD-ROM");
2309     }
2310     scsi_realize(&s->qdev, errp);
2311 }
2312 
2313 static void scsi_disk_realize(SCSIDevice *dev, Error **errp)
2314 {
2315     DriveInfo *dinfo;
2316     Error *local_err = NULL;
2317 
2318     if (!dev->conf.blk) {
2319         scsi_realize(dev, &local_err);
2320         assert(local_err);
2321         error_propagate(errp, local_err);
2322         return;
2323     }
2324 
2325     dinfo = blk_legacy_dinfo(dev->conf.blk);
2326     if (dinfo && dinfo->media_cd) {
2327         scsi_cd_realize(dev, errp);
2328     } else {
2329         scsi_hd_realize(dev, errp);
2330     }
2331 }
2332 
2333 static const SCSIReqOps scsi_disk_emulate_reqops = {
2334     .size         = sizeof(SCSIDiskReq),
2335     .free_req     = scsi_free_request,
2336     .send_command = scsi_disk_emulate_command,
2337     .read_data    = scsi_disk_emulate_read_data,
2338     .write_data   = scsi_disk_emulate_write_data,
2339     .get_buf      = scsi_get_buf,
2340 };
2341 
2342 static const SCSIReqOps scsi_disk_dma_reqops = {
2343     .size         = sizeof(SCSIDiskReq),
2344     .free_req     = scsi_free_request,
2345     .send_command = scsi_disk_dma_command,
2346     .read_data    = scsi_read_data,
2347     .write_data   = scsi_write_data,
2348     .get_buf      = scsi_get_buf,
2349     .load_request = scsi_disk_load_request,
2350     .save_request = scsi_disk_save_request,
2351 };
2352 
2353 static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
2354     [TEST_UNIT_READY]                 = &scsi_disk_emulate_reqops,
2355     [INQUIRY]                         = &scsi_disk_emulate_reqops,
2356     [MODE_SENSE]                      = &scsi_disk_emulate_reqops,
2357     [MODE_SENSE_10]                   = &scsi_disk_emulate_reqops,
2358     [START_STOP]                      = &scsi_disk_emulate_reqops,
2359     [ALLOW_MEDIUM_REMOVAL]            = &scsi_disk_emulate_reqops,
2360     [READ_CAPACITY_10]                = &scsi_disk_emulate_reqops,
2361     [READ_TOC]                        = &scsi_disk_emulate_reqops,
2362     [READ_DVD_STRUCTURE]              = &scsi_disk_emulate_reqops,
2363     [READ_DISC_INFORMATION]           = &scsi_disk_emulate_reqops,
2364     [GET_CONFIGURATION]               = &scsi_disk_emulate_reqops,
2365     [GET_EVENT_STATUS_NOTIFICATION]   = &scsi_disk_emulate_reqops,
2366     [MECHANISM_STATUS]                = &scsi_disk_emulate_reqops,
2367     [SERVICE_ACTION_IN_16]            = &scsi_disk_emulate_reqops,
2368     [REQUEST_SENSE]                   = &scsi_disk_emulate_reqops,
2369     [SYNCHRONIZE_CACHE]               = &scsi_disk_emulate_reqops,
2370     [SEEK_10]                         = &scsi_disk_emulate_reqops,
2371     [MODE_SELECT]                     = &scsi_disk_emulate_reqops,
2372     [MODE_SELECT_10]                  = &scsi_disk_emulate_reqops,
2373     [UNMAP]                           = &scsi_disk_emulate_reqops,
2374     [WRITE_SAME_10]                   = &scsi_disk_emulate_reqops,
2375     [WRITE_SAME_16]                   = &scsi_disk_emulate_reqops,
2376     [VERIFY_10]                       = &scsi_disk_emulate_reqops,
2377     [VERIFY_12]                       = &scsi_disk_emulate_reqops,
2378     [VERIFY_16]                       = &scsi_disk_emulate_reqops,
2379 
2380     [READ_6]                          = &scsi_disk_dma_reqops,
2381     [READ_10]                         = &scsi_disk_dma_reqops,
2382     [READ_12]                         = &scsi_disk_dma_reqops,
2383     [READ_16]                         = &scsi_disk_dma_reqops,
2384     [WRITE_6]                         = &scsi_disk_dma_reqops,
2385     [WRITE_10]                        = &scsi_disk_dma_reqops,
2386     [WRITE_12]                        = &scsi_disk_dma_reqops,
2387     [WRITE_16]                        = &scsi_disk_dma_reqops,
2388     [WRITE_VERIFY_10]                 = &scsi_disk_dma_reqops,
2389     [WRITE_VERIFY_12]                 = &scsi_disk_dma_reqops,
2390     [WRITE_VERIFY_16]                 = &scsi_disk_dma_reqops,
2391 };
2392 
2393 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
2394                                      uint8_t *buf, void *hba_private)
2395 {
2396     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2397     SCSIRequest *req;
2398     const SCSIReqOps *ops;
2399     uint8_t command;
2400 
2401     command = buf[0];
2402     ops = scsi_disk_reqops_dispatch[command];
2403     if (!ops) {
2404         ops = &scsi_disk_emulate_reqops;
2405     }
2406     req = scsi_req_alloc(ops, &s->qdev, tag, lun, hba_private);
2407 
2408 #ifdef DEBUG_SCSI
2409     DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", lun, tag, buf[0]);
2410     {
2411         int i;
2412         for (i = 1; i < scsi_cdb_length(buf); i++) {
2413             printf(" 0x%02x", buf[i]);
2414         }
2415         printf("\n");
2416     }
2417 #endif
2418 
2419     return req;
2420 }
2421 
2422 #ifdef __linux__
2423 static int get_device_type(SCSIDiskState *s)
2424 {
2425     uint8_t cmd[16];
2426     uint8_t buf[36];
2427     uint8_t sensebuf[8];
2428     sg_io_hdr_t io_header;
2429     int ret;
2430 
2431     memset(cmd, 0, sizeof(cmd));
2432     memset(buf, 0, sizeof(buf));
2433     cmd[0] = INQUIRY;
2434     cmd[4] = sizeof(buf);
2435 
2436     memset(&io_header, 0, sizeof(io_header));
2437     io_header.interface_id = 'S';
2438     io_header.dxfer_direction = SG_DXFER_FROM_DEV;
2439     io_header.dxfer_len = sizeof(buf);
2440     io_header.dxferp = buf;
2441     io_header.cmdp = cmd;
2442     io_header.cmd_len = sizeof(cmd);
2443     io_header.mx_sb_len = sizeof(sensebuf);
2444     io_header.sbp = sensebuf;
2445     io_header.timeout = 6000; /* XXX */
2446 
2447     ret = blk_ioctl(s->qdev.conf.blk, SG_IO, &io_header);
2448     if (ret < 0 || io_header.driver_status || io_header.host_status) {
2449         return -1;
2450     }
2451     s->qdev.type = buf[0];
2452     if (buf[1] & 0x80) {
2453         s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2454     }
2455     return 0;
2456 }
2457 
2458 static void scsi_block_realize(SCSIDevice *dev, Error **errp)
2459 {
2460     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2461     int sg_version;
2462     int rc;
2463 
2464     if (!s->qdev.conf.blk) {
2465         error_setg(errp, "drive property not set");
2466         return;
2467     }
2468 
2469     /* check we are using a driver managing SG_IO (version 3 and after) */
2470     rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version);
2471     if (rc < 0) {
2472         error_setg(errp, "cannot get SG_IO version number: %s.  "
2473                      "Is this a SCSI device?",
2474                      strerror(-rc));
2475         return;
2476     }
2477     if (sg_version < 30000) {
2478         error_setg(errp, "scsi generic interface too old");
2479         return;
2480     }
2481 
2482     /* get device type from INQUIRY data */
2483     rc = get_device_type(s);
2484     if (rc < 0) {
2485         error_setg(errp, "INQUIRY failed");
2486         return;
2487     }
2488 
2489     /* Make a guess for the block size, we'll fix it when the guest sends.
2490      * READ CAPACITY.  If they don't, they likely would assume these sizes
2491      * anyway. (TODO: check in /sys).
2492      */
2493     if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM) {
2494         s->qdev.blocksize = 2048;
2495     } else {
2496         s->qdev.blocksize = 512;
2497     }
2498 
2499     /* Makes the scsi-block device not removable by using HMP and QMP eject
2500      * command.
2501      */
2502     s->features |= (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS);
2503 
2504     scsi_realize(&s->qdev, errp);
2505 }
2506 
2507 static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
2508 {
2509     switch (buf[0]) {
2510     case READ_6:
2511     case READ_10:
2512     case READ_12:
2513     case READ_16:
2514     case VERIFY_10:
2515     case VERIFY_12:
2516     case VERIFY_16:
2517     case WRITE_6:
2518     case WRITE_10:
2519     case WRITE_12:
2520     case WRITE_16:
2521     case WRITE_VERIFY_10:
2522     case WRITE_VERIFY_12:
2523     case WRITE_VERIFY_16:
2524         /* If we are not using O_DIRECT, we might read stale data from the
2525          * host cache if writes were made using other commands than these
2526          * ones (such as WRITE SAME or EXTENDED COPY, etc.).  So, without
2527          * O_DIRECT everything must go through SG_IO.
2528          */
2529         if (!(blk_get_flags(s->qdev.conf.blk) & BDRV_O_NOCACHE)) {
2530             break;
2531         }
2532 
2533         /* MMC writing cannot be done via pread/pwrite, because it sometimes
2534          * involves writing beyond the maximum LBA or to negative LBA (lead-in).
2535          * And once you do these writes, reading from the block device is
2536          * unreliable, too.  It is even possible that reads deliver random data
2537          * from the host page cache (this is probably a Linux bug).
2538          *
2539          * We might use scsi_disk_dma_reqops as long as no writing commands are
2540          * seen, but performance usually isn't paramount on optical media.  So,
2541          * just make scsi-block operate the same as scsi-generic for them.
2542          */
2543         if (s->qdev.type != TYPE_ROM) {
2544             return false;
2545         }
2546         break;
2547 
2548     default:
2549         break;
2550     }
2551 
2552     return true;
2553 }
2554 
2555 
2556 static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
2557                                            uint32_t lun, uint8_t *buf,
2558                                            void *hba_private)
2559 {
2560     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2561 
2562     if (scsi_block_is_passthrough(s, buf)) {
2563         return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
2564                               hba_private);
2565     } else {
2566         return scsi_req_alloc(&scsi_disk_dma_reqops, &s->qdev, tag, lun,
2567                               hba_private);
2568     }
2569 }
2570 
2571 static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
2572                                   uint8_t *buf, void *hba_private)
2573 {
2574     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2575 
2576     if (scsi_block_is_passthrough(s, buf)) {
2577         return scsi_bus_parse_cdb(&s->qdev, cmd, buf, hba_private);
2578     } else {
2579         return scsi_req_parse_cdb(&s->qdev, cmd, buf);
2580     }
2581 }
2582 
2583 #endif
2584 
2585 #define DEFINE_SCSI_DISK_PROPERTIES()                                \
2586     DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf),               \
2587     DEFINE_PROP_STRING("ver", SCSIDiskState, version),               \
2588     DEFINE_PROP_STRING("serial", SCSIDiskState, serial),             \
2589     DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),             \
2590     DEFINE_PROP_STRING("product", SCSIDiskState, product)
2591 
2592 static Property scsi_hd_properties[] = {
2593     DEFINE_SCSI_DISK_PROPERTIES(),
2594     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
2595                     SCSI_DISK_F_REMOVABLE, false),
2596     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
2597                     SCSI_DISK_F_DPOFUA, false),
2598     DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0),
2599     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0),
2600     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
2601     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
2602                        DEFAULT_MAX_UNMAP_SIZE),
2603     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
2604                        DEFAULT_MAX_IO_SIZE),
2605     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
2606     DEFINE_PROP_END_OF_LIST(),
2607 };
2608 
2609 static const VMStateDescription vmstate_scsi_disk_state = {
2610     .name = "scsi-disk",
2611     .version_id = 1,
2612     .minimum_version_id = 1,
2613     .fields = (VMStateField[]) {
2614         VMSTATE_SCSI_DEVICE(qdev, SCSIDiskState),
2615         VMSTATE_BOOL(media_changed, SCSIDiskState),
2616         VMSTATE_BOOL(media_event, SCSIDiskState),
2617         VMSTATE_BOOL(eject_request, SCSIDiskState),
2618         VMSTATE_BOOL(tray_open, SCSIDiskState),
2619         VMSTATE_BOOL(tray_locked, SCSIDiskState),
2620         VMSTATE_END_OF_LIST()
2621     }
2622 };
2623 
2624 static void scsi_hd_class_initfn(ObjectClass *klass, void *data)
2625 {
2626     DeviceClass *dc = DEVICE_CLASS(klass);
2627     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
2628 
2629     sc->realize      = scsi_hd_realize;
2630     sc->alloc_req    = scsi_new_request;
2631     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
2632     dc->fw_name = "disk";
2633     dc->desc = "virtual SCSI disk";
2634     dc->reset = scsi_disk_reset;
2635     dc->props = scsi_hd_properties;
2636     dc->vmsd  = &vmstate_scsi_disk_state;
2637 }
2638 
2639 static const TypeInfo scsi_hd_info = {
2640     .name          = "scsi-hd",
2641     .parent        = TYPE_SCSI_DEVICE,
2642     .instance_size = sizeof(SCSIDiskState),
2643     .class_init    = scsi_hd_class_initfn,
2644 };
2645 
2646 static Property scsi_cd_properties[] = {
2647     DEFINE_SCSI_DISK_PROPERTIES(),
2648     DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0),
2649     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0),
2650     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
2651     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
2652                        DEFAULT_MAX_IO_SIZE),
2653     DEFINE_PROP_END_OF_LIST(),
2654 };
2655 
2656 static void scsi_cd_class_initfn(ObjectClass *klass, void *data)
2657 {
2658     DeviceClass *dc = DEVICE_CLASS(klass);
2659     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
2660 
2661     sc->realize      = scsi_cd_realize;
2662     sc->alloc_req    = scsi_new_request;
2663     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
2664     dc->fw_name = "disk";
2665     dc->desc = "virtual SCSI CD-ROM";
2666     dc->reset = scsi_disk_reset;
2667     dc->props = scsi_cd_properties;
2668     dc->vmsd  = &vmstate_scsi_disk_state;
2669 }
2670 
2671 static const TypeInfo scsi_cd_info = {
2672     .name          = "scsi-cd",
2673     .parent        = TYPE_SCSI_DEVICE,
2674     .instance_size = sizeof(SCSIDiskState),
2675     .class_init    = scsi_cd_class_initfn,
2676 };
2677 
2678 #ifdef __linux__
2679 static Property scsi_block_properties[] = {
2680     DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk),
2681     DEFINE_PROP_END_OF_LIST(),
2682 };
2683 
2684 static void scsi_block_class_initfn(ObjectClass *klass, void *data)
2685 {
2686     DeviceClass *dc = DEVICE_CLASS(klass);
2687     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
2688 
2689     sc->realize      = scsi_block_realize;
2690     sc->alloc_req    = scsi_block_new_request;
2691     sc->parse_cdb    = scsi_block_parse_cdb;
2692     dc->fw_name = "disk";
2693     dc->desc = "SCSI block device passthrough";
2694     dc->reset = scsi_disk_reset;
2695     dc->props = scsi_block_properties;
2696     dc->vmsd  = &vmstate_scsi_disk_state;
2697 }
2698 
2699 static const TypeInfo scsi_block_info = {
2700     .name          = "scsi-block",
2701     .parent        = TYPE_SCSI_DEVICE,
2702     .instance_size = sizeof(SCSIDiskState),
2703     .class_init    = scsi_block_class_initfn,
2704 };
2705 #endif
2706 
2707 static Property scsi_disk_properties[] = {
2708     DEFINE_SCSI_DISK_PROPERTIES(),
2709     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
2710                     SCSI_DISK_F_REMOVABLE, false),
2711     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
2712                     SCSI_DISK_F_DPOFUA, false),
2713     DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0),
2714     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0),
2715     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
2716     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
2717                        DEFAULT_MAX_UNMAP_SIZE),
2718     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
2719                        DEFAULT_MAX_IO_SIZE),
2720     DEFINE_PROP_END_OF_LIST(),
2721 };
2722 
2723 static void scsi_disk_class_initfn(ObjectClass *klass, void *data)
2724 {
2725     DeviceClass *dc = DEVICE_CLASS(klass);
2726     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
2727 
2728     sc->realize      = scsi_disk_realize;
2729     sc->alloc_req    = scsi_new_request;
2730     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
2731     dc->fw_name = "disk";
2732     dc->desc = "virtual SCSI disk or CD-ROM (legacy)";
2733     dc->reset = scsi_disk_reset;
2734     dc->props = scsi_disk_properties;
2735     dc->vmsd  = &vmstate_scsi_disk_state;
2736 }
2737 
2738 static const TypeInfo scsi_disk_info = {
2739     .name          = "scsi-disk",
2740     .parent        = TYPE_SCSI_DEVICE,
2741     .instance_size = sizeof(SCSIDiskState),
2742     .class_init    = scsi_disk_class_initfn,
2743 };
2744 
2745 static void scsi_disk_register_types(void)
2746 {
2747     type_register_static(&scsi_hd_info);
2748     type_register_static(&scsi_cd_info);
2749 #ifdef __linux__
2750     type_register_static(&scsi_block_info);
2751 #endif
2752     type_register_static(&scsi_disk_info);
2753 }
2754 
2755 type_init(scsi_disk_register_types)
2756