xref: /qemu/block/iscsi.c (revision d072cdf3)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "trace.h"
38 #include "block/scsi.h"
39 #include "qemu/iov.h"
40 #include "sysemu/sysemu.h"
41 #include "qmp-commands.h"
42 
43 #include <iscsi/iscsi.h>
44 #include <iscsi/scsi-lowlevel.h>
45 
46 #ifdef __linux__
47 #include <scsi/sg.h>
48 #include <block/scsi.h>
49 #endif
50 
51 typedef struct IscsiLun {
52     struct iscsi_context *iscsi;
53     AioContext *aio_context;
54     int lun;
55     enum scsi_inquiry_peripheral_device_type type;
56     int block_size;
57     uint64_t num_blocks;
58     int events;
59     QEMUTimer *nop_timer;
60     uint8_t lbpme;
61     uint8_t lbprz;
62     uint8_t has_write_same;
63     struct scsi_inquiry_logical_block_provisioning lbp;
64     struct scsi_inquiry_block_limits bl;
65     unsigned char *zeroblock;
66     unsigned long *allocationmap;
67     int cluster_sectors;
68     bool use_16_for_rw;
69 } IscsiLun;
70 
71 typedef struct IscsiTask {
72     int status;
73     int complete;
74     int retries;
75     int do_retry;
76     struct scsi_task *task;
77     Coroutine *co;
78     QEMUBH *bh;
79     IscsiLun *iscsilun;
80     QEMUTimer retry_timer;
81 } IscsiTask;
82 
83 typedef struct IscsiAIOCB {
84     BlockDriverAIOCB common;
85     QEMUIOVector *qiov;
86     QEMUBH *bh;
87     IscsiLun *iscsilun;
88     struct scsi_task *task;
89     uint8_t *buf;
90     int status;
91     int canceled;
92     int64_t sector_num;
93     int nb_sectors;
94 #ifdef __linux__
95     sg_io_hdr_t *ioh;
96 #endif
97 } IscsiAIOCB;
98 
99 #define NOP_INTERVAL 5000
100 #define MAX_NOP_FAILURES 3
101 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
102 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
103 
104 /* this threshold is a trade-off knob to choose between
105  * the potential additional overhead of an extra GET_LBA_STATUS request
106  * vs. unnecessarily reading a lot of zero sectors over the wire.
107  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
108  * sectors we check the allocation status of the area covered by the
109  * request first if the allocationmap indicates that the area might be
110  * unallocated. */
111 #define ISCSI_CHECKALLOC_THRES 64
112 
113 static void
114 iscsi_bh_cb(void *p)
115 {
116     IscsiAIOCB *acb = p;
117 
118     qemu_bh_delete(acb->bh);
119 
120     g_free(acb->buf);
121     acb->buf = NULL;
122 
123     if (acb->canceled == 0) {
124         acb->common.cb(acb->common.opaque, acb->status);
125     }
126 
127     if (acb->task != NULL) {
128         scsi_free_scsi_task(acb->task);
129         acb->task = NULL;
130     }
131 
132     qemu_aio_release(acb);
133 }
134 
135 static void
136 iscsi_schedule_bh(IscsiAIOCB *acb)
137 {
138     if (acb->bh) {
139         return;
140     }
141     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
142     qemu_bh_schedule(acb->bh);
143 }
144 
145 static void iscsi_co_generic_bh_cb(void *opaque)
146 {
147     struct IscsiTask *iTask = opaque;
148     iTask->complete = 1;
149     qemu_bh_delete(iTask->bh);
150     qemu_coroutine_enter(iTask->co, NULL);
151 }
152 
153 static void iscsi_retry_timer_expired(void *opaque)
154 {
155     struct IscsiTask *iTask = opaque;
156     iTask->complete = 1;
157     if (iTask->co) {
158         qemu_coroutine_enter(iTask->co, NULL);
159     }
160 }
161 
162 static inline unsigned exp_random(double mean)
163 {
164     return -mean * log((double)rand() / RAND_MAX);
165 }
166 
167 static void
168 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
169                         void *command_data, void *opaque)
170 {
171     struct IscsiTask *iTask = opaque;
172     struct scsi_task *task = command_data;
173 
174     iTask->status = status;
175     iTask->do_retry = 0;
176     iTask->task = task;
177 
178     if (status != SCSI_STATUS_GOOD) {
179         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
180             if (status == SCSI_STATUS_CHECK_CONDITION
181                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
182                 error_report("iSCSI CheckCondition: %s",
183                              iscsi_get_error(iscsi));
184                 iTask->do_retry = 1;
185                 goto out;
186             }
187             if (status == SCSI_STATUS_BUSY) {
188                 unsigned retry_time =
189                     exp_random(iscsi_retry_times[iTask->retries - 1]);
190                 error_report("iSCSI Busy (retry #%u in %u ms): %s",
191                              iTask->retries, retry_time,
192                              iscsi_get_error(iscsi));
193                 aio_timer_init(iTask->iscsilun->aio_context,
194                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
195                                SCALE_MS, iscsi_retry_timer_expired, iTask);
196                 timer_mod(&iTask->retry_timer,
197                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
198                 iTask->do_retry = 1;
199                 return;
200             }
201         }
202         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
203     }
204 
205 out:
206     if (iTask->co) {
207         iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
208                                iscsi_co_generic_bh_cb, iTask);
209         qemu_bh_schedule(iTask->bh);
210     } else {
211         iTask->complete = 1;
212     }
213 }
214 
215 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
216 {
217     *iTask = (struct IscsiTask) {
218         .co         = qemu_coroutine_self(),
219         .iscsilun   = iscsilun,
220     };
221 }
222 
223 static void
224 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
225                     void *private_data)
226 {
227     IscsiAIOCB *acb = private_data;
228 
229     acb->status = -ECANCELED;
230     iscsi_schedule_bh(acb);
231 }
232 
233 static void
234 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
235 {
236     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
237     IscsiLun *iscsilun = acb->iscsilun;
238 
239     if (acb->status != -EINPROGRESS) {
240         return;
241     }
242 
243     acb->canceled = 1;
244 
245     /* send a task mgmt call to the target to cancel the task on the target */
246     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
247                                      iscsi_abort_task_cb, acb);
248 
249     while (acb->status == -EINPROGRESS) {
250         aio_poll(iscsilun->aio_context, true);
251     }
252 }
253 
254 static const AIOCBInfo iscsi_aiocb_info = {
255     .aiocb_size         = sizeof(IscsiAIOCB),
256     .cancel             = iscsi_aio_cancel,
257 };
258 
259 
260 static void iscsi_process_read(void *arg);
261 static void iscsi_process_write(void *arg);
262 
263 static void
264 iscsi_set_events(IscsiLun *iscsilun)
265 {
266     struct iscsi_context *iscsi = iscsilun->iscsi;
267     int ev;
268 
269     /* We always register a read handler.  */
270     ev = POLLIN;
271     ev |= iscsi_which_events(iscsi);
272     if (ev != iscsilun->events) {
273         aio_set_fd_handler(iscsilun->aio_context,
274                            iscsi_get_fd(iscsi),
275                            iscsi_process_read,
276                            (ev & POLLOUT) ? iscsi_process_write : NULL,
277                            iscsilun);
278 
279     }
280 
281     iscsilun->events = ev;
282 }
283 
284 static void
285 iscsi_process_read(void *arg)
286 {
287     IscsiLun *iscsilun = arg;
288     struct iscsi_context *iscsi = iscsilun->iscsi;
289 
290     iscsi_service(iscsi, POLLIN);
291     iscsi_set_events(iscsilun);
292 }
293 
294 static void
295 iscsi_process_write(void *arg)
296 {
297     IscsiLun *iscsilun = arg;
298     struct iscsi_context *iscsi = iscsilun->iscsi;
299 
300     iscsi_service(iscsi, POLLOUT);
301     iscsi_set_events(iscsilun);
302 }
303 
304 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
305 {
306     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
307 }
308 
309 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
310 {
311     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
312 }
313 
314 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
315                                       IscsiLun *iscsilun)
316 {
317     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
318         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
319             error_report("iSCSI misaligned request: "
320                          "iscsilun->block_size %u, sector_num %" PRIi64
321                          ", nb_sectors %d",
322                          iscsilun->block_size, sector_num, nb_sectors);
323             return 0;
324     }
325     return 1;
326 }
327 
328 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
329                                     int nb_sectors)
330 {
331     if (iscsilun->allocationmap == NULL) {
332         return;
333     }
334     bitmap_set(iscsilun->allocationmap,
335                sector_num / iscsilun->cluster_sectors,
336                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
337 }
338 
339 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
340                                       int nb_sectors)
341 {
342     int64_t cluster_num, nb_clusters;
343     if (iscsilun->allocationmap == NULL) {
344         return;
345     }
346     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
347     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
348                   - cluster_num;
349     if (nb_clusters > 0) {
350         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
351     }
352 }
353 
354 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
355                                         int64_t sector_num, int nb_sectors,
356                                         QEMUIOVector *iov)
357 {
358     IscsiLun *iscsilun = bs->opaque;
359     struct IscsiTask iTask;
360     uint64_t lba;
361     uint32_t num_sectors;
362 
363     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
364         return -EINVAL;
365     }
366 
367     lba = sector_qemu2lun(sector_num, iscsilun);
368     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
369     iscsi_co_init_iscsitask(iscsilun, &iTask);
370 retry:
371     if (iscsilun->use_16_for_rw) {
372         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
373                                         NULL, num_sectors * iscsilun->block_size,
374                                         iscsilun->block_size, 0, 0, 0, 0, 0,
375                                         iscsi_co_generic_cb, &iTask);
376     } else {
377         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
378                                         NULL, num_sectors * iscsilun->block_size,
379                                         iscsilun->block_size, 0, 0, 0, 0, 0,
380                                         iscsi_co_generic_cb, &iTask);
381     }
382     if (iTask.task == NULL) {
383         return -ENOMEM;
384     }
385     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
386                           iov->niov);
387     while (!iTask.complete) {
388         iscsi_set_events(iscsilun);
389         qemu_coroutine_yield();
390     }
391 
392     if (iTask.task != NULL) {
393         scsi_free_scsi_task(iTask.task);
394         iTask.task = NULL;
395     }
396 
397     if (iTask.do_retry) {
398         iTask.complete = 0;
399         goto retry;
400     }
401 
402     if (iTask.status != SCSI_STATUS_GOOD) {
403         return -EIO;
404     }
405 
406     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
407 
408     return 0;
409 }
410 
411 
412 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
413                                              int64_t sector_num, int nb_sectors)
414 {
415     unsigned long size;
416     if (iscsilun->allocationmap == NULL) {
417         return true;
418     }
419     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
420     return !(find_next_bit(iscsilun->allocationmap, size,
421                            sector_num / iscsilun->cluster_sectors) == size);
422 }
423 
424 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
425                                                   int64_t sector_num,
426                                                   int nb_sectors, int *pnum)
427 {
428     IscsiLun *iscsilun = bs->opaque;
429     struct scsi_get_lba_status *lbas = NULL;
430     struct scsi_lba_status_descriptor *lbasd = NULL;
431     struct IscsiTask iTask;
432     int64_t ret;
433 
434     iscsi_co_init_iscsitask(iscsilun, &iTask);
435 
436     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
437         ret = -EINVAL;
438         goto out;
439     }
440 
441     /* default to all sectors allocated */
442     ret = BDRV_BLOCK_DATA;
443     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
444     *pnum = nb_sectors;
445 
446     /* LUN does not support logical block provisioning */
447     if (iscsilun->lbpme == 0) {
448         goto out;
449     }
450 
451 retry:
452     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
453                                   sector_qemu2lun(sector_num, iscsilun),
454                                   8 + 16, iscsi_co_generic_cb,
455                                   &iTask) == NULL) {
456         ret = -ENOMEM;
457         goto out;
458     }
459 
460     while (!iTask.complete) {
461         iscsi_set_events(iscsilun);
462         qemu_coroutine_yield();
463     }
464 
465     if (iTask.do_retry) {
466         if (iTask.task != NULL) {
467             scsi_free_scsi_task(iTask.task);
468             iTask.task = NULL;
469         }
470         iTask.complete = 0;
471         goto retry;
472     }
473 
474     if (iTask.status != SCSI_STATUS_GOOD) {
475         /* in case the get_lba_status_callout fails (i.e.
476          * because the device is busy or the cmd is not
477          * supported) we pretend all blocks are allocated
478          * for backwards compatibility */
479         goto out;
480     }
481 
482     lbas = scsi_datain_unmarshall(iTask.task);
483     if (lbas == NULL) {
484         ret = -EIO;
485         goto out;
486     }
487 
488     lbasd = &lbas->descriptors[0];
489 
490     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
491         ret = -EIO;
492         goto out;
493     }
494 
495     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
496 
497     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
498         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
499         ret &= ~BDRV_BLOCK_DATA;
500         if (iscsilun->lbprz) {
501             ret |= BDRV_BLOCK_ZERO;
502         }
503     }
504 
505     if (ret & BDRV_BLOCK_ZERO) {
506         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
507     } else {
508         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
509     }
510 
511     if (*pnum > nb_sectors) {
512         *pnum = nb_sectors;
513     }
514 out:
515     if (iTask.task != NULL) {
516         scsi_free_scsi_task(iTask.task);
517     }
518     return ret;
519 }
520 
521 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
522                                        int64_t sector_num, int nb_sectors,
523                                        QEMUIOVector *iov)
524 {
525     IscsiLun *iscsilun = bs->opaque;
526     struct IscsiTask iTask;
527     uint64_t lba;
528     uint32_t num_sectors;
529 
530     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
531         return -EINVAL;
532     }
533 
534     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
535         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
536         int64_t ret;
537         int pnum;
538         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
539         if (ret < 0) {
540             return ret;
541         }
542         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
543             qemu_iovec_memset(iov, 0, 0x00, iov->size);
544             return 0;
545         }
546     }
547 
548     lba = sector_qemu2lun(sector_num, iscsilun);
549     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
550 
551     iscsi_co_init_iscsitask(iscsilun, &iTask);
552 retry:
553     if (iscsilun->use_16_for_rw) {
554         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
555                                        num_sectors * iscsilun->block_size,
556                                        iscsilun->block_size, 0, 0, 0, 0, 0,
557                                        iscsi_co_generic_cb, &iTask);
558     } else {
559         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
560                                        num_sectors * iscsilun->block_size,
561                                        iscsilun->block_size,
562                                        0, 0, 0, 0, 0,
563                                        iscsi_co_generic_cb, &iTask);
564     }
565     if (iTask.task == NULL) {
566         return -ENOMEM;
567     }
568     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
569 
570     while (!iTask.complete) {
571         iscsi_set_events(iscsilun);
572         qemu_coroutine_yield();
573     }
574 
575     if (iTask.task != NULL) {
576         scsi_free_scsi_task(iTask.task);
577         iTask.task = NULL;
578     }
579 
580     if (iTask.do_retry) {
581         iTask.complete = 0;
582         goto retry;
583     }
584 
585     if (iTask.status != SCSI_STATUS_GOOD) {
586         return -EIO;
587     }
588 
589     return 0;
590 }
591 
592 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
593 {
594     IscsiLun *iscsilun = bs->opaque;
595     struct IscsiTask iTask;
596 
597     if (bs->sg) {
598         return 0;
599     }
600 
601     iscsi_co_init_iscsitask(iscsilun, &iTask);
602 
603 retry:
604     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
605                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
606         return -ENOMEM;
607     }
608 
609     while (!iTask.complete) {
610         iscsi_set_events(iscsilun);
611         qemu_coroutine_yield();
612     }
613 
614     if (iTask.task != NULL) {
615         scsi_free_scsi_task(iTask.task);
616         iTask.task = NULL;
617     }
618 
619     if (iTask.do_retry) {
620         iTask.complete = 0;
621         goto retry;
622     }
623 
624     if (iTask.status != SCSI_STATUS_GOOD) {
625         return -EIO;
626     }
627 
628     return 0;
629 }
630 
631 #ifdef __linux__
632 static void
633 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
634                      void *command_data, void *opaque)
635 {
636     IscsiAIOCB *acb = opaque;
637 
638     g_free(acb->buf);
639     acb->buf = NULL;
640 
641     if (acb->canceled != 0) {
642         return;
643     }
644 
645     acb->status = 0;
646     if (status < 0) {
647         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
648                      iscsi_get_error(iscsi));
649         acb->status = -EIO;
650     }
651 
652     acb->ioh->driver_status = 0;
653     acb->ioh->host_status   = 0;
654     acb->ioh->resid         = 0;
655 
656 #define SG_ERR_DRIVER_SENSE    0x08
657 
658     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
659         int ss;
660 
661         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
662 
663         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
664         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
665              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
666         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
667     }
668 
669     iscsi_schedule_bh(acb);
670 }
671 
672 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
673         unsigned long int req, void *buf,
674         BlockDriverCompletionFunc *cb, void *opaque)
675 {
676     IscsiLun *iscsilun = bs->opaque;
677     struct iscsi_context *iscsi = iscsilun->iscsi;
678     struct iscsi_data data;
679     IscsiAIOCB *acb;
680 
681     assert(req == SG_IO);
682 
683     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
684 
685     acb->iscsilun = iscsilun;
686     acb->canceled    = 0;
687     acb->bh          = NULL;
688     acb->status      = -EINPROGRESS;
689     acb->buf         = NULL;
690     acb->ioh         = buf;
691 
692     acb->task = malloc(sizeof(struct scsi_task));
693     if (acb->task == NULL) {
694         error_report("iSCSI: Failed to allocate task for scsi command. %s",
695                      iscsi_get_error(iscsi));
696         qemu_aio_release(acb);
697         return NULL;
698     }
699     memset(acb->task, 0, sizeof(struct scsi_task));
700 
701     switch (acb->ioh->dxfer_direction) {
702     case SG_DXFER_TO_DEV:
703         acb->task->xfer_dir = SCSI_XFER_WRITE;
704         break;
705     case SG_DXFER_FROM_DEV:
706         acb->task->xfer_dir = SCSI_XFER_READ;
707         break;
708     default:
709         acb->task->xfer_dir = SCSI_XFER_NONE;
710         break;
711     }
712 
713     acb->task->cdb_size = acb->ioh->cmd_len;
714     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
715     acb->task->expxferlen = acb->ioh->dxfer_len;
716 
717     data.size = 0;
718     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
719         if (acb->ioh->iovec_count == 0) {
720             data.data = acb->ioh->dxferp;
721             data.size = acb->ioh->dxfer_len;
722         } else {
723             scsi_task_set_iov_out(acb->task,
724                                  (struct scsi_iovec *) acb->ioh->dxferp,
725                                  acb->ioh->iovec_count);
726         }
727     }
728 
729     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
730                                  iscsi_aio_ioctl_cb,
731                                  (data.size > 0) ? &data : NULL,
732                                  acb) != 0) {
733         scsi_free_scsi_task(acb->task);
734         qemu_aio_release(acb);
735         return NULL;
736     }
737 
738     /* tell libiscsi to read straight into the buffer we got from ioctl */
739     if (acb->task->xfer_dir == SCSI_XFER_READ) {
740         if (acb->ioh->iovec_count == 0) {
741             scsi_task_add_data_in_buffer(acb->task,
742                                          acb->ioh->dxfer_len,
743                                          acb->ioh->dxferp);
744         } else {
745             scsi_task_set_iov_in(acb->task,
746                                  (struct scsi_iovec *) acb->ioh->dxferp,
747                                  acb->ioh->iovec_count);
748         }
749     }
750 
751     iscsi_set_events(iscsilun);
752 
753     return &acb->common;
754 }
755 
756 static void ioctl_cb(void *opaque, int status)
757 {
758     int *p_status = opaque;
759     *p_status = status;
760 }
761 
762 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
763 {
764     IscsiLun *iscsilun = bs->opaque;
765     int status;
766 
767     switch (req) {
768     case SG_GET_VERSION_NUM:
769         *(int *)buf = 30000;
770         break;
771     case SG_GET_SCSI_ID:
772         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
773         break;
774     case SG_IO:
775         status = -EINPROGRESS;
776         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
777 
778         while (status == -EINPROGRESS) {
779             aio_poll(iscsilun->aio_context, true);
780         }
781 
782         return 0;
783     default:
784         return -1;
785     }
786     return 0;
787 }
788 #endif
789 
790 static int64_t
791 iscsi_getlength(BlockDriverState *bs)
792 {
793     IscsiLun *iscsilun = bs->opaque;
794     int64_t len;
795 
796     len  = iscsilun->num_blocks;
797     len *= iscsilun->block_size;
798 
799     return len;
800 }
801 
802 static int
803 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
804                                    int nb_sectors)
805 {
806     IscsiLun *iscsilun = bs->opaque;
807     struct IscsiTask iTask;
808     struct unmap_list list;
809 
810     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
811         return -EINVAL;
812     }
813 
814     if (!iscsilun->lbp.lbpu) {
815         /* UNMAP is not supported by the target */
816         return 0;
817     }
818 
819     list.lba = sector_qemu2lun(sector_num, iscsilun);
820     list.num = sector_qemu2lun(nb_sectors, iscsilun);
821 
822     iscsi_co_init_iscsitask(iscsilun, &iTask);
823 retry:
824     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
825                      iscsi_co_generic_cb, &iTask) == NULL) {
826         return -ENOMEM;
827     }
828 
829     while (!iTask.complete) {
830         iscsi_set_events(iscsilun);
831         qemu_coroutine_yield();
832     }
833 
834     if (iTask.task != NULL) {
835         scsi_free_scsi_task(iTask.task);
836         iTask.task = NULL;
837     }
838 
839     if (iTask.do_retry) {
840         iTask.complete = 0;
841         goto retry;
842     }
843 
844     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
845         /* the target might fail with a check condition if it
846            is not happy with the alignment of the UNMAP request
847            we silently fail in this case */
848         return 0;
849     }
850 
851     if (iTask.status != SCSI_STATUS_GOOD) {
852         return -EIO;
853     }
854 
855     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
856 
857     return 0;
858 }
859 
860 static int
861 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
862                                    int nb_sectors, BdrvRequestFlags flags)
863 {
864     IscsiLun *iscsilun = bs->opaque;
865     struct IscsiTask iTask;
866     uint64_t lba;
867     uint32_t nb_blocks;
868     bool use_16_for_ws = iscsilun->use_16_for_rw;
869 
870     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
871         return -EINVAL;
872     }
873 
874     if (flags & BDRV_REQ_MAY_UNMAP) {
875         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
876             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
877             use_16_for_ws = true;
878         }
879         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
880             /* WRITESAME16 with UNMAP is not supported by the target,
881              * fall back and try WRITESAME10/16 without UNMAP */
882             flags &= ~BDRV_REQ_MAY_UNMAP;
883             use_16_for_ws = iscsilun->use_16_for_rw;
884         }
885     }
886 
887     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
888         /* WRITESAME without UNMAP is not supported by the target */
889         return -ENOTSUP;
890     }
891 
892     lba = sector_qemu2lun(sector_num, iscsilun);
893     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
894 
895     if (iscsilun->zeroblock == NULL) {
896         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
897         if (iscsilun->zeroblock == NULL) {
898             return -ENOMEM;
899         }
900     }
901 
902     iscsi_co_init_iscsitask(iscsilun, &iTask);
903 retry:
904     if (use_16_for_ws) {
905         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
906                                             iscsilun->zeroblock, iscsilun->block_size,
907                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
908                                             0, 0, iscsi_co_generic_cb, &iTask);
909     } else {
910         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
911                                             iscsilun->zeroblock, iscsilun->block_size,
912                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
913                                             0, 0, iscsi_co_generic_cb, &iTask);
914     }
915     if (iTask.task == NULL) {
916         return -ENOMEM;
917     }
918 
919     while (!iTask.complete) {
920         iscsi_set_events(iscsilun);
921         qemu_coroutine_yield();
922     }
923 
924     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
925         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
926         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
927          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
928         /* WRITE SAME is not supported by the target */
929         iscsilun->has_write_same = false;
930         scsi_free_scsi_task(iTask.task);
931         return -ENOTSUP;
932     }
933 
934     if (iTask.task != NULL) {
935         scsi_free_scsi_task(iTask.task);
936         iTask.task = NULL;
937     }
938 
939     if (iTask.do_retry) {
940         iTask.complete = 0;
941         goto retry;
942     }
943 
944     if (iTask.status != SCSI_STATUS_GOOD) {
945         return -EIO;
946     }
947 
948     if (flags & BDRV_REQ_MAY_UNMAP) {
949         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
950     } else {
951         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
952     }
953 
954     return 0;
955 }
956 
957 static void parse_chap(struct iscsi_context *iscsi, const char *target,
958                        Error **errp)
959 {
960     QemuOptsList *list;
961     QemuOpts *opts;
962     const char *user = NULL;
963     const char *password = NULL;
964 
965     list = qemu_find_opts("iscsi");
966     if (!list) {
967         return;
968     }
969 
970     opts = qemu_opts_find(list, target);
971     if (opts == NULL) {
972         opts = QTAILQ_FIRST(&list->head);
973         if (!opts) {
974             return;
975         }
976     }
977 
978     user = qemu_opt_get(opts, "user");
979     if (!user) {
980         return;
981     }
982 
983     password = qemu_opt_get(opts, "password");
984     if (!password) {
985         error_setg(errp, "CHAP username specified but no password was given");
986         return;
987     }
988 
989     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
990         error_setg(errp, "Failed to set initiator username and password");
991     }
992 }
993 
994 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
995                                 Error **errp)
996 {
997     QemuOptsList *list;
998     QemuOpts *opts;
999     const char *digest = NULL;
1000 
1001     list = qemu_find_opts("iscsi");
1002     if (!list) {
1003         return;
1004     }
1005 
1006     opts = qemu_opts_find(list, target);
1007     if (opts == NULL) {
1008         opts = QTAILQ_FIRST(&list->head);
1009         if (!opts) {
1010             return;
1011         }
1012     }
1013 
1014     digest = qemu_opt_get(opts, "header-digest");
1015     if (!digest) {
1016         return;
1017     }
1018 
1019     if (!strcmp(digest, "CRC32C")) {
1020         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1021     } else if (!strcmp(digest, "NONE")) {
1022         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1023     } else if (!strcmp(digest, "CRC32C-NONE")) {
1024         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1025     } else if (!strcmp(digest, "NONE-CRC32C")) {
1026         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1027     } else {
1028         error_setg(errp, "Invalid header-digest setting : %s", digest);
1029     }
1030 }
1031 
1032 static char *parse_initiator_name(const char *target)
1033 {
1034     QemuOptsList *list;
1035     QemuOpts *opts;
1036     const char *name;
1037     char *iscsi_name;
1038     UuidInfo *uuid_info;
1039 
1040     list = qemu_find_opts("iscsi");
1041     if (list) {
1042         opts = qemu_opts_find(list, target);
1043         if (!opts) {
1044             opts = QTAILQ_FIRST(&list->head);
1045         }
1046         if (opts) {
1047             name = qemu_opt_get(opts, "initiator-name");
1048             if (name) {
1049                 return g_strdup(name);
1050             }
1051         }
1052     }
1053 
1054     uuid_info = qmp_query_uuid(NULL);
1055     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1056         name = qemu_get_vm_name();
1057     } else {
1058         name = uuid_info->UUID;
1059     }
1060     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1061                                  name ? ":" : "", name ? name : "");
1062     qapi_free_UuidInfo(uuid_info);
1063     return iscsi_name;
1064 }
1065 
1066 static void iscsi_nop_timed_event(void *opaque)
1067 {
1068     IscsiLun *iscsilun = opaque;
1069 
1070     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1071         error_report("iSCSI: NOP timeout. Reconnecting...");
1072         iscsi_reconnect(iscsilun->iscsi);
1073     }
1074 
1075     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1076         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1077         return;
1078     }
1079 
1080     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1081     iscsi_set_events(iscsilun);
1082 }
1083 
1084 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1085 {
1086     struct scsi_task *task = NULL;
1087     struct scsi_readcapacity10 *rc10 = NULL;
1088     struct scsi_readcapacity16 *rc16 = NULL;
1089     int retries = ISCSI_CMD_RETRIES;
1090 
1091     do {
1092         if (task != NULL) {
1093             scsi_free_scsi_task(task);
1094             task = NULL;
1095         }
1096 
1097         switch (iscsilun->type) {
1098         case TYPE_DISK:
1099             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1100             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1101                 rc16 = scsi_datain_unmarshall(task);
1102                 if (rc16 == NULL) {
1103                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1104                 } else {
1105                     iscsilun->block_size = rc16->block_length;
1106                     iscsilun->num_blocks = rc16->returned_lba + 1;
1107                     iscsilun->lbpme = rc16->lbpme;
1108                     iscsilun->lbprz = rc16->lbprz;
1109                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1110                 }
1111             }
1112             break;
1113         case TYPE_ROM:
1114             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1115             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1116                 rc10 = scsi_datain_unmarshall(task);
1117                 if (rc10 == NULL) {
1118                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1119                 } else {
1120                     iscsilun->block_size = rc10->block_size;
1121                     if (rc10->lba == 0) {
1122                         /* blank disk loaded */
1123                         iscsilun->num_blocks = 0;
1124                     } else {
1125                         iscsilun->num_blocks = rc10->lba + 1;
1126                     }
1127                 }
1128             }
1129             break;
1130         default:
1131             return;
1132         }
1133     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1134              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1135              && retries-- > 0);
1136 
1137     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1138         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1139     }
1140     if (task) {
1141         scsi_free_scsi_task(task);
1142     }
1143 }
1144 
1145 /* TODO Convert to fine grained options */
1146 static QemuOptsList runtime_opts = {
1147     .name = "iscsi",
1148     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1149     .desc = {
1150         {
1151             .name = "filename",
1152             .type = QEMU_OPT_STRING,
1153             .help = "URL to the iscsi image",
1154         },
1155         { /* end of list */ }
1156     },
1157 };
1158 
1159 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1160                                           int evpd, int pc, void **inq, Error **errp)
1161 {
1162     int full_size;
1163     struct scsi_task *task = NULL;
1164     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1165     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1166         goto fail;
1167     }
1168     full_size = scsi_datain_getfullsize(task);
1169     if (full_size > task->datain.size) {
1170         scsi_free_scsi_task(task);
1171 
1172         /* we need more data for the full list */
1173         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1174         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1175             goto fail;
1176         }
1177     }
1178 
1179     *inq = scsi_datain_unmarshall(task);
1180     if (*inq == NULL) {
1181         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1182         goto fail_with_err;
1183     }
1184 
1185     return task;
1186 
1187 fail:
1188     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1189                iscsi_get_error(iscsi));
1190 fail_with_err:
1191     if (task != NULL) {
1192         scsi_free_scsi_task(task);
1193     }
1194     return NULL;
1195 }
1196 
1197 static void iscsi_detach_aio_context(BlockDriverState *bs)
1198 {
1199     IscsiLun *iscsilun = bs->opaque;
1200 
1201     aio_set_fd_handler(iscsilun->aio_context,
1202                        iscsi_get_fd(iscsilun->iscsi),
1203                        NULL, NULL, NULL);
1204     iscsilun->events = 0;
1205 
1206     if (iscsilun->nop_timer) {
1207         timer_del(iscsilun->nop_timer);
1208         timer_free(iscsilun->nop_timer);
1209         iscsilun->nop_timer = NULL;
1210     }
1211 }
1212 
1213 static void iscsi_attach_aio_context(BlockDriverState *bs,
1214                                      AioContext *new_context)
1215 {
1216     IscsiLun *iscsilun = bs->opaque;
1217 
1218     iscsilun->aio_context = new_context;
1219     iscsi_set_events(iscsilun);
1220 
1221     /* Set up a timer for sending out iSCSI NOPs */
1222     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1223                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1224                                         iscsi_nop_timed_event, iscsilun);
1225     timer_mod(iscsilun->nop_timer,
1226               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1227 }
1228 
1229 /*
1230  * We support iscsi url's on the form
1231  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1232  *
1233  * Note: flags are currently not used by iscsi_open.  If this function
1234  * is changed such that flags are used, please examine iscsi_reopen_prepare()
1235  * to see if needs to be changed as well.
1236  */
1237 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1238                       Error **errp)
1239 {
1240     IscsiLun *iscsilun = bs->opaque;
1241     struct iscsi_context *iscsi = NULL;
1242     struct iscsi_url *iscsi_url = NULL;
1243     struct scsi_task *task = NULL;
1244     struct scsi_inquiry_standard *inq = NULL;
1245     struct scsi_inquiry_supported_pages *inq_vpd;
1246     char *initiator_name = NULL;
1247     QemuOpts *opts;
1248     Error *local_err = NULL;
1249     const char *filename;
1250     int i, ret;
1251 
1252     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1253         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
1254                    "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1255                    "of 512", BDRV_SECTOR_SIZE);
1256         return -EINVAL;
1257     }
1258 
1259     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1260     qemu_opts_absorb_qdict(opts, options, &local_err);
1261     if (local_err) {
1262         error_propagate(errp, local_err);
1263         ret = -EINVAL;
1264         goto out;
1265     }
1266 
1267     filename = qemu_opt_get(opts, "filename");
1268 
1269     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1270     if (iscsi_url == NULL) {
1271         error_setg(errp, "Failed to parse URL : %s", filename);
1272         ret = -EINVAL;
1273         goto out;
1274     }
1275 
1276     memset(iscsilun, 0, sizeof(IscsiLun));
1277 
1278     initiator_name = parse_initiator_name(iscsi_url->target);
1279 
1280     iscsi = iscsi_create_context(initiator_name);
1281     if (iscsi == NULL) {
1282         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1283         ret = -ENOMEM;
1284         goto out;
1285     }
1286 
1287     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1288         error_setg(errp, "iSCSI: Failed to set target name.");
1289         ret = -EINVAL;
1290         goto out;
1291     }
1292 
1293     if (iscsi_url->user != NULL) {
1294         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1295                                               iscsi_url->passwd);
1296         if (ret != 0) {
1297             error_setg(errp, "Failed to set initiator username and password");
1298             ret = -EINVAL;
1299             goto out;
1300         }
1301     }
1302 
1303     /* check if we got CHAP username/password via the options */
1304     parse_chap(iscsi, iscsi_url->target, &local_err);
1305     if (local_err != NULL) {
1306         error_propagate(errp, local_err);
1307         ret = -EINVAL;
1308         goto out;
1309     }
1310 
1311     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1312         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1313         ret = -EINVAL;
1314         goto out;
1315     }
1316 
1317     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1318 
1319     /* check if we got HEADER_DIGEST via the options */
1320     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1321     if (local_err != NULL) {
1322         error_propagate(errp, local_err);
1323         ret = -EINVAL;
1324         goto out;
1325     }
1326 
1327     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1328         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1329             iscsi_get_error(iscsi));
1330         ret = -EINVAL;
1331         goto out;
1332     }
1333 
1334     iscsilun->iscsi = iscsi;
1335     iscsilun->aio_context = bdrv_get_aio_context(bs);
1336     iscsilun->lun   = iscsi_url->lun;
1337     iscsilun->has_write_same = true;
1338 
1339     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1340                             (void **) &inq, errp);
1341     if (task == NULL) {
1342         ret = -EINVAL;
1343         goto out;
1344     }
1345     iscsilun->type = inq->periperal_device_type;
1346     scsi_free_scsi_task(task);
1347     task = NULL;
1348 
1349     iscsi_readcapacity_sync(iscsilun, &local_err);
1350     if (local_err != NULL) {
1351         error_propagate(errp, local_err);
1352         ret = -EINVAL;
1353         goto out;
1354     }
1355     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1356     bs->request_alignment = iscsilun->block_size;
1357 
1358     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1359      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1360      * will try to read from the device to guess the image format.
1361      */
1362     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1363         bs->sg = 1;
1364     }
1365 
1366     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1367                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1368                             (void **) &inq_vpd, errp);
1369     if (task == NULL) {
1370         ret = -EINVAL;
1371         goto out;
1372     }
1373     for (i = 0; i < inq_vpd->num_pages; i++) {
1374         struct scsi_task *inq_task;
1375         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1376         struct scsi_inquiry_block_limits *inq_bl;
1377         switch (inq_vpd->pages[i]) {
1378         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1379             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1380                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1381                                         (void **) &inq_lbp, errp);
1382             if (inq_task == NULL) {
1383                 ret = -EINVAL;
1384                 goto out;
1385             }
1386             memcpy(&iscsilun->lbp, inq_lbp,
1387                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1388             scsi_free_scsi_task(inq_task);
1389             break;
1390         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1391             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1392                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1393                                     (void **) &inq_bl, errp);
1394             if (inq_task == NULL) {
1395                 ret = -EINVAL;
1396                 goto out;
1397             }
1398             memcpy(&iscsilun->bl, inq_bl,
1399                    sizeof(struct scsi_inquiry_block_limits));
1400             scsi_free_scsi_task(inq_task);
1401             break;
1402         default:
1403             break;
1404         }
1405     }
1406     scsi_free_scsi_task(task);
1407     task = NULL;
1408 
1409     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1410 
1411     /* Guess the internal cluster (page) size of the iscsi target by the means
1412      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1413      * reasonable size */
1414     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1415         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1416         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1417                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1418         if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
1419             iscsilun->allocationmap =
1420                 bitmap_new(DIV_ROUND_UP(bs->total_sectors,
1421                                         iscsilun->cluster_sectors));
1422         }
1423     }
1424 
1425 out:
1426     qemu_opts_del(opts);
1427     g_free(initiator_name);
1428     if (iscsi_url != NULL) {
1429         iscsi_destroy_url(iscsi_url);
1430     }
1431     if (task != NULL) {
1432         scsi_free_scsi_task(task);
1433     }
1434 
1435     if (ret) {
1436         if (iscsi != NULL) {
1437             iscsi_destroy_context(iscsi);
1438         }
1439         memset(iscsilun, 0, sizeof(IscsiLun));
1440     }
1441     return ret;
1442 }
1443 
1444 static void iscsi_close(BlockDriverState *bs)
1445 {
1446     IscsiLun *iscsilun = bs->opaque;
1447     struct iscsi_context *iscsi = iscsilun->iscsi;
1448 
1449     iscsi_detach_aio_context(bs);
1450     iscsi_destroy_context(iscsi);
1451     g_free(iscsilun->zeroblock);
1452     g_free(iscsilun->allocationmap);
1453     memset(iscsilun, 0, sizeof(IscsiLun));
1454 }
1455 
1456 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1457 {
1458     IscsiLun *iscsilun = bs->opaque;
1459 
1460     /* We don't actually refresh here, but just return data queried in
1461      * iscsi_open(): iscsi targets don't change their limits. */
1462     if (iscsilun->lbp.lbpu) {
1463         if (iscsilun->bl.max_unmap < 0xffffffff) {
1464             bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
1465                                                  iscsilun);
1466         }
1467         bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1468                                                    iscsilun);
1469     }
1470 
1471     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1472         bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
1473                                                   iscsilun);
1474     }
1475     if (iscsilun->lbp.lbpws) {
1476         bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1477                                                         iscsilun);
1478     }
1479     bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
1480                                                  iscsilun);
1481 }
1482 
1483 /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
1484  * prepare.  Note that this will not re-establish a connection with an iSCSI
1485  * target - it is effectively a NOP.  */
1486 static int iscsi_reopen_prepare(BDRVReopenState *state,
1487                                 BlockReopenQueue *queue, Error **errp)
1488 {
1489     /* NOP */
1490     return 0;
1491 }
1492 
1493 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1494 {
1495     IscsiLun *iscsilun = bs->opaque;
1496     Error *local_err = NULL;
1497 
1498     if (iscsilun->type != TYPE_DISK) {
1499         return -ENOTSUP;
1500     }
1501 
1502     iscsi_readcapacity_sync(iscsilun, &local_err);
1503     if (local_err != NULL) {
1504         error_free(local_err);
1505         return -EIO;
1506     }
1507 
1508     if (offset > iscsi_getlength(bs)) {
1509         return -EINVAL;
1510     }
1511 
1512     if (iscsilun->allocationmap != NULL) {
1513         g_free(iscsilun->allocationmap);
1514         iscsilun->allocationmap =
1515             bitmap_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
1516                                                     iscsilun),
1517                                     iscsilun->cluster_sectors));
1518     }
1519 
1520     return 0;
1521 }
1522 
1523 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1524 {
1525     int ret = 0;
1526     int64_t total_size = 0;
1527     BlockDriverState *bs;
1528     IscsiLun *iscsilun = NULL;
1529     QDict *bs_options;
1530 
1531     bs = bdrv_new("", &error_abort);
1532 
1533     /* Read out options */
1534     total_size =
1535         qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
1536     bs->opaque = g_new0(struct IscsiLun, 1);
1537     iscsilun = bs->opaque;
1538 
1539     bs_options = qdict_new();
1540     qdict_put(bs_options, "filename", qstring_from_str(filename));
1541     ret = iscsi_open(bs, bs_options, 0, NULL);
1542     QDECREF(bs_options);
1543 
1544     if (ret != 0) {
1545         goto out;
1546     }
1547     iscsi_detach_aio_context(bs);
1548     if (iscsilun->type != TYPE_DISK) {
1549         ret = -ENODEV;
1550         goto out;
1551     }
1552     if (bs->total_sectors < total_size) {
1553         ret = -ENOSPC;
1554         goto out;
1555     }
1556 
1557     ret = 0;
1558 out:
1559     if (iscsilun->iscsi != NULL) {
1560         iscsi_destroy_context(iscsilun->iscsi);
1561     }
1562     g_free(bs->opaque);
1563     bs->opaque = NULL;
1564     bdrv_unref(bs);
1565     return ret;
1566 }
1567 
1568 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1569 {
1570     IscsiLun *iscsilun = bs->opaque;
1571     bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
1572     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1573     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1574     return 0;
1575 }
1576 
1577 static QemuOptsList iscsi_create_opts = {
1578     .name = "iscsi-create-opts",
1579     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1580     .desc = {
1581         {
1582             .name = BLOCK_OPT_SIZE,
1583             .type = QEMU_OPT_SIZE,
1584             .help = "Virtual disk size"
1585         },
1586         { /* end of list */ }
1587     }
1588 };
1589 
1590 static BlockDriver bdrv_iscsi = {
1591     .format_name     = "iscsi",
1592     .protocol_name   = "iscsi",
1593 
1594     .instance_size   = sizeof(IscsiLun),
1595     .bdrv_needs_filename = true,
1596     .bdrv_file_open  = iscsi_open,
1597     .bdrv_close      = iscsi_close,
1598     .bdrv_create     = iscsi_create,
1599     .create_opts     = &iscsi_create_opts,
1600     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1601 
1602     .bdrv_getlength  = iscsi_getlength,
1603     .bdrv_get_info   = iscsi_get_info,
1604     .bdrv_truncate   = iscsi_truncate,
1605     .bdrv_refresh_limits = iscsi_refresh_limits,
1606 
1607     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1608     .bdrv_co_discard      = iscsi_co_discard,
1609     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1610     .bdrv_co_readv         = iscsi_co_readv,
1611     .bdrv_co_writev        = iscsi_co_writev,
1612     .bdrv_co_flush_to_disk = iscsi_co_flush,
1613 
1614 #ifdef __linux__
1615     .bdrv_ioctl       = iscsi_ioctl,
1616     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1617 #endif
1618 
1619     .bdrv_detach_aio_context = iscsi_detach_aio_context,
1620     .bdrv_attach_aio_context = iscsi_attach_aio_context,
1621 };
1622 
1623 static QemuOptsList qemu_iscsi_opts = {
1624     .name = "iscsi",
1625     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1626     .desc = {
1627         {
1628             .name = "user",
1629             .type = QEMU_OPT_STRING,
1630             .help = "username for CHAP authentication to target",
1631         },{
1632             .name = "password",
1633             .type = QEMU_OPT_STRING,
1634             .help = "password for CHAP authentication to target",
1635         },{
1636             .name = "header-digest",
1637             .type = QEMU_OPT_STRING,
1638             .help = "HeaderDigest setting. "
1639                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1640         },{
1641             .name = "initiator-name",
1642             .type = QEMU_OPT_STRING,
1643             .help = "Initiator iqn name to use when connecting",
1644         },
1645         { /* end of list */ }
1646     },
1647 };
1648 
1649 static void iscsi_block_init(void)
1650 {
1651     bdrv_register(&bdrv_iscsi);
1652     qemu_add_opts(&qemu_iscsi_opts);
1653 }
1654 
1655 block_init(iscsi_block_init);
1656