xref: /qemu/hw/scsi/scsi-disk.c (revision c7c513389c6cb8c6dd60e55d1c99244de4e93663)
1 /*
2  * SCSI Device emulation
3  *
4  * Copyright (c) 2006 CodeSourcery.
5  * Based on code by Fabrice Bellard
6  *
7  * Written by Paul Brook
8  * Modifications:
9  *  2009-Dec-12 Artyom Tarasenko : implemented stamdard inquiry for the case
10  *                                 when the allocation length of CDB is smaller
11  *                                 than 36.
12  *  2009-Oct-13 Artyom Tarasenko : implemented the block descriptor in the
13  *                                 MODE SENSE response.
14  *
15  * This code is licensed under the LGPL.
16  *
17  * Note that this file only handles the SCSI architecture model and device
18  * commands.  Emulation of interface/link layer protocols is handled by
19  * the host adapter emulator.
20  */
21 
22 #include "qemu/osdep.h"
23 #include "qemu/units.h"
24 #include "qapi/error.h"
25 #include "qemu/error-report.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/module.h"
28 #include "qemu/hw-version.h"
29 #include "qemu/memalign.h"
30 #include "hw/scsi/scsi.h"
31 #include "migration/qemu-file-types.h"
32 #include "migration/vmstate.h"
33 #include "hw/scsi/emulation.h"
34 #include "scsi/constants.h"
35 #include "system/arch_init.h"
36 #include "system/block-backend.h"
37 #include "system/blockdev.h"
38 #include "hw/block/block.h"
39 #include "hw/qdev-properties.h"
40 #include "hw/qdev-properties-system.h"
41 #include "system/dma.h"
42 #include "system/system.h"
43 #include "qemu/cutils.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 #ifdef __linux
48 #include <scsi/sg.h>
49 #endif
50 
51 #define SCSI_WRITE_SAME_MAX         (512 * KiB)
52 #define SCSI_DMA_BUF_SIZE           (128 * KiB)
53 #define SCSI_MAX_INQUIRY_LEN        256
54 #define SCSI_MAX_MODE_LEN           256
55 
56 #define DEFAULT_DISCARD_GRANULARITY (4 * KiB)
57 #define DEFAULT_MAX_UNMAP_SIZE      (1 * GiB)
58 #define DEFAULT_MAX_IO_SIZE         INT_MAX     /* 2 GB - 1 block */
59 
60 #define TYPE_SCSI_DISK_BASE         "scsi-disk-base"
61 
62 #define MAX_SERIAL_LEN              36
63 #define MAX_SERIAL_LEN_FOR_DEVID    20
64 
65 OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE)
66 
67 struct SCSIDiskClass {
68     SCSIDeviceClass parent_class;
69     /*
70      * Callbacks receive ret == 0 for success. Errors are represented either as
71      * negative errno values, or as positive SAM status codes. For host_status
72      * errors, the function passes ret == -ENODEV and sets the host_status field
73      * of the SCSIRequest.
74      */
75     DMAIOFunc       *dma_readv;
76     DMAIOFunc       *dma_writev;
77     bool            (*need_fua_emulation)(SCSICommand *cmd);
78     void            (*update_sense)(SCSIRequest *r);
79 };
80 
81 typedef struct SCSIDiskReq {
82     SCSIRequest req;
83     /* Both sector and sector_count are in terms of BDRV_SECTOR_SIZE bytes.  */
84     uint64_t sector;
85     uint32_t sector_count;
86     uint32_t buflen;
87     bool started;
88     bool need_fua_emulation;
89     struct iovec iov;
90     QEMUIOVector qiov;
91     BlockAcctCookie acct;
92 } SCSIDiskReq;
93 
94 #define SCSI_DISK_F_REMOVABLE             0
95 #define SCSI_DISK_F_DPOFUA                1
96 #define SCSI_DISK_F_NO_REMOVABLE_DEVOPS   2
97 
98 struct SCSIDiskState {
99     SCSIDevice qdev;
100     uint32_t features;
101     bool media_changed;
102     bool media_event;
103     bool eject_request;
104     uint16_t port_index;
105     uint64_t max_unmap_size;
106     uint64_t max_io_size;
107     uint32_t quirks;
108     char *version;
109     char *serial;
110     char *vendor;
111     char *product;
112     char *device_id;
113     char *loadparm;     /* only for s390x */
114     bool tray_open;
115     bool tray_locked;
116     /*
117      * 0x0000        - rotation rate not reported
118      * 0x0001        - non-rotating medium (SSD)
119      * 0x0002-0x0400 - reserved
120      * 0x0401-0xffe  - rotations per minute
121      * 0xffff        - reserved
122      */
123     uint16_t rotation_rate;
124     bool migrate_emulated_scsi_request;
125 };
126 
127 static void scsi_free_request(SCSIRequest *req)
128 {
129     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
130 
131     qemu_vfree(r->iov.iov_base);
132 }
133 
134 /* Helper function for command completion with sense.  */
135 static void scsi_check_condition(SCSIDiskReq *r, SCSISense sense)
136 {
137     trace_scsi_disk_check_condition(r->req.tag, sense.key, sense.asc,
138                                     sense.ascq);
139     scsi_req_build_sense(&r->req, sense);
140     scsi_req_complete(&r->req, CHECK_CONDITION);
141 }
142 
143 static void scsi_init_iovec(SCSIDiskReq *r, size_t size)
144 {
145     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
146 
147     if (!r->iov.iov_base) {
148         r->buflen = size;
149         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
150     }
151     r->iov.iov_len = MIN(r->sector_count * BDRV_SECTOR_SIZE, r->buflen);
152     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
153 }
154 
155 static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req)
156 {
157     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
158 
159     qemu_put_be64s(f, &r->sector);
160     qemu_put_be32s(f, &r->sector_count);
161     qemu_put_be32s(f, &r->buflen);
162     if (r->buflen) {
163         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
164             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
165         } else if (!req->retry) {
166             uint32_t len = r->iov.iov_len;
167             qemu_put_be32s(f, &len);
168             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
169         }
170     }
171 }
172 
173 static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest *req)
174 {
175     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
176 
177     if (s->migrate_emulated_scsi_request) {
178         scsi_disk_save_request(f, req);
179     }
180 }
181 
182 static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
183 {
184     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
185 
186     qemu_get_be64s(f, &r->sector);
187     qemu_get_be32s(f, &r->sector_count);
188     qemu_get_be32s(f, &r->buflen);
189     if (r->buflen) {
190         scsi_init_iovec(r, r->buflen);
191         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
192             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
193         } else if (!r->req.retry) {
194             uint32_t len;
195             qemu_get_be32s(f, &len);
196             r->iov.iov_len = len;
197             assert(r->iov.iov_len <= r->buflen);
198             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
199         }
200     }
201 
202     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
203 }
204 
205 static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest *req)
206 {
207     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
208 
209     if (s->migrate_emulated_scsi_request) {
210         scsi_disk_load_request(f, req);
211     }
212 }
213 
214 /*
215  * scsi_handle_rw_error has two return values.  False means that the error
216  * must be ignored, true means that the error has been processed and the
217  * caller should not do anything else for this request.  Note that
218  * scsi_handle_rw_error always manages its reference counts, independent
219  * of the return value.
220  */
221 static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
222 {
223     bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
224     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
225     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
226     SCSISense sense = SENSE_CODE(NO_SENSE);
227     int16_t host_status;
228     int error;
229     bool req_has_sense = false;
230     BlockErrorAction action;
231     int status;
232 
233     /*
234      * host_status should only be set for SG_IO requests that came back with a
235      * host_status error in scsi_block_sgio_complete(). This error path passes
236      * -ENODEV as the return value.
237      *
238      * Reset host_status in the request because we may still want to complete
239      * the request successfully with the 'stop' or 'ignore' error policy.
240      */
241     host_status = r->req.host_status;
242     if (host_status != -1) {
243         assert(ret == -ENODEV);
244         r->req.host_status = -1;
245     }
246 
247     if (ret < 0) {
248         status = scsi_sense_from_errno(-ret, &sense);
249         error = -ret;
250     } else {
251         /* A passthrough command has completed with nonzero status.  */
252         status = ret;
253         switch (status) {
254         case CHECK_CONDITION:
255             req_has_sense = true;
256             error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
257             break;
258         case RESERVATION_CONFLICT:
259             /*
260              * Don't apply the error policy, always report to the guest.
261              *
262              * This is a passthrough code path, so it's not a backend error, but
263              * a response to an invalid guest request.
264              *
265              * Windows Failover Cluster validation intentionally sends invalid
266              * requests to verify that reservations work as intended. It is
267              * crucial that it sees the resulting errors.
268              *
269              * Treating a reservation conflict as a guest-side error is obvious
270              * when a pr-manager is in use. Without one, the situation is less
271              * clear, but there might be nothing that can be fixed on the host
272              * (like in the above example), and we don't want to be stuck in a
273              * loop where resuming the VM and retrying the request immediately
274              * stops it again. So always reporting is still the safer option in
275              * this case, too.
276              */
277             error = 0;
278             break;
279         default:
280             error = EINVAL;
281             break;
282         }
283     }
284 
285     /*
286      * Check whether the error has to be handled by the guest or should
287      * rather follow the rerror=/werror= settings.  Guest-handled errors
288      * are usually retried immediately, so do not post them to QMP and
289      * do not account them as failed I/O.
290      */
291     if (!error || (req_has_sense &&
292                    scsi_sense_buf_is_guest_recoverable(r->req.sense,
293                                                        sizeof(r->req.sense)))) {
294         action = BLOCK_ERROR_ACTION_REPORT;
295         acct_failed = false;
296     } else {
297         action = blk_get_error_action(s->qdev.conf.blk, is_read, error);
298         blk_error_action(s->qdev.conf.blk, action, is_read, error);
299     }
300 
301     switch (action) {
302     case BLOCK_ERROR_ACTION_REPORT:
303         if (acct_failed) {
304             block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
305         }
306         if (host_status != -1) {
307             scsi_req_complete_failed(&r->req, host_status);
308             return true;
309         }
310         if (req_has_sense) {
311             sdc->update_sense(&r->req);
312         } else if (status == CHECK_CONDITION) {
313             scsi_req_build_sense(&r->req, sense);
314         }
315         scsi_req_complete(&r->req, status);
316         return true;
317 
318     case BLOCK_ERROR_ACTION_IGNORE:
319         return false;
320 
321     case BLOCK_ERROR_ACTION_STOP:
322         scsi_req_retry(&r->req);
323         return true;
324 
325     default:
326         g_assert_not_reached();
327     }
328 }
329 
330 static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
331 {
332     if (r->req.io_canceled) {
333         scsi_req_cancel_complete(&r->req);
334         return true;
335     }
336 
337     if (ret != 0) {
338         return scsi_handle_rw_error(r, ret, acct_failed);
339     }
340 
341     return false;
342 }
343 
344 static void scsi_aio_complete(void *opaque, int ret)
345 {
346     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
347     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
348 
349     /* The request must run in its AioContext */
350     assert(r->req.ctx == qemu_get_current_aio_context());
351 
352     assert(r->req.aiocb != NULL);
353     r->req.aiocb = NULL;
354 
355     if (scsi_disk_req_check_error(r, ret, true)) {
356         goto done;
357     }
358 
359     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
360     scsi_req_complete(&r->req, GOOD);
361 
362 done:
363     scsi_req_unref(&r->req);
364 }
365 
366 static bool scsi_is_cmd_fua(SCSICommand *cmd)
367 {
368     switch (cmd->buf[0]) {
369     case READ_10:
370     case READ_12:
371     case READ_16:
372     case WRITE_10:
373     case WRITE_12:
374     case WRITE_16:
375         return (cmd->buf[1] & 8) != 0;
376 
377     case VERIFY_10:
378     case VERIFY_12:
379     case VERIFY_16:
380     case WRITE_VERIFY_10:
381     case WRITE_VERIFY_12:
382     case WRITE_VERIFY_16:
383         return true;
384 
385     case READ_6:
386     case WRITE_6:
387     default:
388         return false;
389     }
390 }
391 
392 static void scsi_write_do_fua(SCSIDiskReq *r)
393 {
394     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
395 
396     assert(r->req.aiocb == NULL);
397     assert(!r->req.io_canceled);
398 
399     if (r->need_fua_emulation) {
400         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
401                          BLOCK_ACCT_FLUSH);
402         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
403         return;
404     }
405 
406     scsi_req_complete(&r->req, GOOD);
407     scsi_req_unref(&r->req);
408 }
409 
410 static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
411 {
412     assert(r->req.aiocb == NULL);
413     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
414         goto done;
415     }
416 
417     r->sector += r->sector_count;
418     r->sector_count = 0;
419     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
420         scsi_write_do_fua(r);
421         return;
422     } else {
423         scsi_req_complete(&r->req, GOOD);
424     }
425 
426 done:
427     scsi_req_unref(&r->req);
428 }
429 
430 static void scsi_dma_complete(void *opaque, int ret)
431 {
432     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
433     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
434 
435     assert(r->req.aiocb != NULL);
436     r->req.aiocb = NULL;
437 
438     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
439     if (ret < 0) {
440         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
441     } else if (ret == 0) {
442         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
443     }
444     scsi_dma_complete_noio(r, ret);
445 }
446 
447 static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
448 {
449     uint32_t n;
450 
451     /* The request must run in its AioContext */
452     assert(r->req.ctx == qemu_get_current_aio_context());
453 
454     assert(r->req.aiocb == NULL);
455     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
456         goto done;
457     }
458 
459     n = r->qiov.size / BDRV_SECTOR_SIZE;
460     r->sector += n;
461     r->sector_count -= n;
462     scsi_req_data(&r->req, r->qiov.size);
463 
464 done:
465     scsi_req_unref(&r->req);
466 }
467 
468 static void scsi_read_complete(void *opaque, int ret)
469 {
470     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
471     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
472 
473     assert(r->req.aiocb != NULL);
474     r->req.aiocb = NULL;
475 
476     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
477     if (ret < 0) {
478         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
479     } else if (ret == 0) {
480         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
481         trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
482     }
483     scsi_read_complete_noio(r, ret);
484 }
485 
486 /* Actually issue a read to the block device.  */
487 static void scsi_do_read(SCSIDiskReq *r, int ret)
488 {
489     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
490     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
491 
492     assert (r->req.aiocb == NULL);
493     if (scsi_disk_req_check_error(r, ret, false)) {
494         goto done;
495     }
496 
497     /* The request is used as the AIO opaque value, so add a ref.  */
498     scsi_req_ref(&r->req);
499 
500     if (r->req.sg) {
501         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
502         r->req.residual -= r->req.sg->size;
503         r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
504                                   BDRV_SECTOR_SIZE,
505                                   sdc->dma_readv, r, scsi_dma_complete, r,
506                                   DMA_DIRECTION_FROM_DEVICE);
507     } else {
508         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
509         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
510                          r->qiov.size, BLOCK_ACCT_READ);
511         r->req.aiocb = sdc->dma_readv(r->sector << BDRV_SECTOR_BITS, &r->qiov,
512                                       scsi_read_complete, r, r);
513     }
514 
515 done:
516     scsi_req_unref(&r->req);
517 }
518 
519 static void scsi_do_read_cb(void *opaque, int ret)
520 {
521     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
522     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
523 
524     assert (r->req.aiocb != NULL);
525     r->req.aiocb = NULL;
526 
527     if (ret < 0) {
528         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
529     } else {
530         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
531     }
532     scsi_do_read(opaque, ret);
533 }
534 
535 /* Read more data from scsi device into buffer.  */
536 static void scsi_read_data(SCSIRequest *req)
537 {
538     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
539     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
540     bool first;
541 
542     trace_scsi_disk_read_data_count(r->sector_count);
543     if (r->sector_count == 0) {
544         /* This also clears the sense buffer for REQUEST SENSE.  */
545         scsi_req_complete(&r->req, GOOD);
546         return;
547     }
548 
549     /* No data transfer may already be in progress */
550     assert(r->req.aiocb == NULL);
551 
552     /* The request is used as the AIO opaque value, so add a ref.  */
553     scsi_req_ref(&r->req);
554     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
555         trace_scsi_disk_read_data_invalid();
556         scsi_read_complete_noio(r, -EINVAL);
557         return;
558     }
559 
560     if (!blk_is_available(req->dev->conf.blk)) {
561         scsi_read_complete_noio(r, -ENOMEDIUM);
562         return;
563     }
564 
565     first = !r->started;
566     r->started = true;
567     if (first && r->need_fua_emulation) {
568         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
569                          BLOCK_ACCT_FLUSH);
570         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
571     } else {
572         scsi_do_read(r, 0);
573     }
574 }
575 
576 static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
577 {
578     uint32_t n;
579 
580     /* The request must run in its AioContext */
581     assert(r->req.ctx == qemu_get_current_aio_context());
582 
583     assert (r->req.aiocb == NULL);
584     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
585         goto done;
586     }
587 
588     n = r->qiov.size / BDRV_SECTOR_SIZE;
589     r->sector += n;
590     r->sector_count -= n;
591     if (r->sector_count == 0) {
592         scsi_write_do_fua(r);
593         return;
594     } else {
595         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
596         trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size);
597         scsi_req_data(&r->req, r->qiov.size);
598     }
599 
600 done:
601     scsi_req_unref(&r->req);
602 }
603 
604 static void scsi_write_complete(void * opaque, int ret)
605 {
606     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
607     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
608 
609     assert (r->req.aiocb != NULL);
610     r->req.aiocb = NULL;
611 
612     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
613     if (ret < 0) {
614         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
615     } else if (ret == 0) {
616         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
617     }
618     scsi_write_complete_noio(r, ret);
619 }
620 
621 static void scsi_write_data(SCSIRequest *req)
622 {
623     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
624     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
625     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
626 
627     /* No data transfer may already be in progress */
628     assert(r->req.aiocb == NULL);
629 
630     /* The request is used as the AIO opaque value, so add a ref.  */
631     scsi_req_ref(&r->req);
632     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
633         trace_scsi_disk_write_data_invalid();
634         scsi_write_complete_noio(r, -EINVAL);
635         return;
636     }
637 
638     if (!r->req.sg && !r->qiov.size) {
639         /* Called for the first time.  Ask the driver to send us more data.  */
640         r->started = true;
641         scsi_write_complete_noio(r, 0);
642         return;
643     }
644     if (!blk_is_available(req->dev->conf.blk)) {
645         scsi_write_complete_noio(r, -ENOMEDIUM);
646         return;
647     }
648 
649     if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
650         r->req.cmd.buf[0] == VERIFY_16) {
651         if (r->req.sg) {
652             scsi_dma_complete_noio(r, 0);
653         } else {
654             scsi_write_complete_noio(r, 0);
655         }
656         return;
657     }
658 
659     if (r->req.sg) {
660         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
661         r->req.residual -= r->req.sg->size;
662         r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
663                                   BDRV_SECTOR_SIZE,
664                                   sdc->dma_writev, r, scsi_dma_complete, r,
665                                   DMA_DIRECTION_TO_DEVICE);
666     } else {
667         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
668                          r->qiov.size, BLOCK_ACCT_WRITE);
669         r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
670                                        scsi_write_complete, r, r);
671     }
672 }
673 
674 /* Return a pointer to the data buffer.  */
675 static uint8_t *scsi_get_buf(SCSIRequest *req)
676 {
677     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
678 
679     return (uint8_t *)r->iov.iov_base;
680 }
681 
682 static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
683 {
684     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
685     uint8_t page_code = req->cmd.buf[2];
686     int start, buflen = 0;
687 
688     outbuf[buflen++] = s->qdev.type & 0x1f;
689     outbuf[buflen++] = page_code;
690     outbuf[buflen++] = 0x00;
691     outbuf[buflen++] = 0x00;
692     start = buflen;
693 
694     switch (page_code) {
695     case 0x00: /* Supported page codes, mandatory */
696     {
697         trace_scsi_disk_emulate_vpd_page_00(req->cmd.xfer);
698         outbuf[buflen++] = 0x00; /* list of supported pages (this page) */
699         if (s->serial) {
700             outbuf[buflen++] = 0x80; /* unit serial number */
701         }
702         outbuf[buflen++] = 0x83; /* device identification */
703         if (s->qdev.type == TYPE_DISK) {
704             outbuf[buflen++] = 0xb0; /* block limits */
705             outbuf[buflen++] = 0xb1; /* block device characteristics */
706             outbuf[buflen++] = 0xb2; /* thin provisioning */
707         }
708         break;
709     }
710     case 0x80: /* Device serial number, optional */
711     {
712         int l;
713 
714         if (!s->serial) {
715             trace_scsi_disk_emulate_vpd_page_80_not_supported();
716             return -1;
717         }
718 
719         l = strlen(s->serial);
720         if (l > MAX_SERIAL_LEN) {
721             l = MAX_SERIAL_LEN;
722         }
723 
724         trace_scsi_disk_emulate_vpd_page_80(req->cmd.xfer);
725         memcpy(outbuf + buflen, s->serial, l);
726         buflen += l;
727         break;
728     }
729 
730     case 0x83: /* Device identification page, mandatory */
731     {
732         int id_len = s->device_id ? MIN(strlen(s->device_id), 255 - 8) : 0;
733 
734         trace_scsi_disk_emulate_vpd_page_83(req->cmd.xfer);
735 
736         if (id_len) {
737             outbuf[buflen++] = 0x2; /* ASCII */
738             outbuf[buflen++] = 0;   /* not officially assigned */
739             outbuf[buflen++] = 0;   /* reserved */
740             outbuf[buflen++] = id_len; /* length of data following */
741             memcpy(outbuf + buflen, s->device_id, id_len);
742             buflen += id_len;
743         }
744 
745         if (s->qdev.wwn) {
746             outbuf[buflen++] = 0x1; /* Binary */
747             outbuf[buflen++] = 0x3; /* NAA */
748             outbuf[buflen++] = 0;   /* reserved */
749             outbuf[buflen++] = 8;
750             stq_be_p(&outbuf[buflen], s->qdev.wwn);
751             buflen += 8;
752         }
753 
754         if (s->qdev.port_wwn) {
755             outbuf[buflen++] = 0x61; /* SAS / Binary */
756             outbuf[buflen++] = 0x93; /* PIV / Target port / NAA */
757             outbuf[buflen++] = 0;    /* reserved */
758             outbuf[buflen++] = 8;
759             stq_be_p(&outbuf[buflen], s->qdev.port_wwn);
760             buflen += 8;
761         }
762 
763         if (s->port_index) {
764             outbuf[buflen++] = 0x61; /* SAS / Binary */
765 
766             /* PIV/Target port/relative target port */
767             outbuf[buflen++] = 0x94;
768 
769             outbuf[buflen++] = 0;    /* reserved */
770             outbuf[buflen++] = 4;
771             stw_be_p(&outbuf[buflen + 2], s->port_index);
772             buflen += 4;
773         }
774         break;
775     }
776     case 0xb0: /* block limits */
777     {
778         SCSIBlockLimits bl = {};
779 
780         if (s->qdev.type == TYPE_ROM) {
781             trace_scsi_disk_emulate_vpd_page_b0_not_supported();
782             return -1;
783         }
784         bl.wsnz = 1;
785         bl.unmap_sectors =
786             s->qdev.conf.discard_granularity / s->qdev.blocksize;
787         bl.min_io_size =
788             s->qdev.conf.min_io_size / s->qdev.blocksize;
789         bl.opt_io_size =
790             s->qdev.conf.opt_io_size / s->qdev.blocksize;
791         bl.max_unmap_sectors =
792             s->max_unmap_size / s->qdev.blocksize;
793         bl.max_io_sectors =
794             s->max_io_size / s->qdev.blocksize;
795         /* 255 descriptors fit in 4 KiB with an 8-byte header */
796         bl.max_unmap_descr = 255;
797 
798         if (s->qdev.type == TYPE_DISK) {
799             int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk);
800             int max_io_sectors_blk =
801                 max_transfer_blk / s->qdev.blocksize;
802 
803             bl.max_io_sectors =
804                 MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors);
805         }
806         buflen += scsi_emulate_block_limits(outbuf + buflen, &bl);
807         break;
808     }
809     case 0xb1: /* block device characteristics */
810     {
811         buflen = 0x40;
812         outbuf[4] = (s->rotation_rate >> 8) & 0xff;
813         outbuf[5] = s->rotation_rate & 0xff;
814         outbuf[6] = 0; /* PRODUCT TYPE */
815         outbuf[7] = 0; /* WABEREQ | WACEREQ | NOMINAL FORM FACTOR */
816         outbuf[8] = 0; /* VBULS */
817         break;
818     }
819     case 0xb2: /* thin provisioning */
820     {
821         buflen = 8;
822         outbuf[4] = 0;
823         outbuf[5] = 0xe0; /* unmap & write_same 10/16 all supported */
824         outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1;
825         outbuf[7] = 0;
826         break;
827     }
828     default:
829         return -1;
830     }
831     /* done with EVPD */
832     assert(buflen - start <= 255);
833     outbuf[start - 1] = buflen - start;
834     return buflen;
835 }
836 
837 static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
838 {
839     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
840     int buflen = 0;
841 
842     if (req->cmd.buf[1] & 0x1) {
843         /* Vital product data */
844         return scsi_disk_emulate_vpd_page(req, outbuf);
845     }
846 
847     /* Standard INQUIRY data */
848     if (req->cmd.buf[2] != 0) {
849         return -1;
850     }
851 
852     /* PAGE CODE == 0 */
853     buflen = req->cmd.xfer;
854     if (buflen > SCSI_MAX_INQUIRY_LEN) {
855         buflen = SCSI_MAX_INQUIRY_LEN;
856     }
857 
858     outbuf[0] = s->qdev.type & 0x1f;
859     outbuf[1] = (s->features & (1 << SCSI_DISK_F_REMOVABLE)) ? 0x80 : 0;
860 
861     strpadcpy((char *) &outbuf[16], 16, s->product, ' ');
862     strpadcpy((char *) &outbuf[8], 8, s->vendor, ' ');
863 
864     memset(&outbuf[32], 0, 4);
865     memcpy(&outbuf[32], s->version, MIN(4, strlen(s->version)));
866     /*
867      * We claim conformance to SPC-3, which is required for guests
868      * to ask for modern features like READ CAPACITY(16) or the
869      * block characteristics VPD page by default.  Not all of SPC-3
870      * is actually implemented, but we're good enough.
871      */
872     outbuf[2] = s->qdev.default_scsi_version;
873     outbuf[3] = 2 | 0x10; /* Format 2, HiSup */
874 
875     if (buflen > 36) {
876         outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */
877     } else {
878         /* If the allocation length of CDB is too small,
879                the additional length is not adjusted */
880         outbuf[4] = 36 - 5;
881     }
882 
883     /* Sync data transfer and TCQ.  */
884     outbuf[7] = 0x10 | (req->bus->info->tcq ? 0x02 : 0);
885     return buflen;
886 }
887 
888 static inline bool media_is_dvd(SCSIDiskState *s)
889 {
890     uint64_t nb_sectors;
891     if (s->qdev.type != TYPE_ROM) {
892         return false;
893     }
894     if (!blk_is_available(s->qdev.conf.blk)) {
895         return false;
896     }
897     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
898     return nb_sectors > CD_MAX_SECTORS;
899 }
900 
901 static inline bool media_is_cd(SCSIDiskState *s)
902 {
903     uint64_t nb_sectors;
904     if (s->qdev.type != TYPE_ROM) {
905         return false;
906     }
907     if (!blk_is_available(s->qdev.conf.blk)) {
908         return false;
909     }
910     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
911     return nb_sectors <= CD_MAX_SECTORS;
912 }
913 
914 static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r,
915                                       uint8_t *outbuf)
916 {
917     uint8_t type = r->req.cmd.buf[1] & 7;
918 
919     if (s->qdev.type != TYPE_ROM) {
920         return -1;
921     }
922 
923     /* Types 1/2 are only defined for Blu-Ray.  */
924     if (type != 0) {
925         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
926         return -1;
927     }
928 
929     memset(outbuf, 0, 34);
930     outbuf[1] = 32;
931     outbuf[2] = 0xe; /* last session complete, disc finalized */
932     outbuf[3] = 1;   /* first track on disc */
933     outbuf[4] = 1;   /* # of sessions */
934     outbuf[5] = 1;   /* first track of last session */
935     outbuf[6] = 1;   /* last track of last session */
936     outbuf[7] = 0x20; /* unrestricted use */
937     outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */
938     /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
939     /* 12-23: not meaningful for CD-ROM or DVD-ROM */
940     /* 24-31: disc bar code */
941     /* 32: disc application code */
942     /* 33: number of OPC tables */
943 
944     return 34;
945 }
946 
947 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
948                                    uint8_t *outbuf)
949 {
950     static const int rds_caps_size[5] = {
951         [0] = 2048 + 4,
952         [1] = 4 + 4,
953         [3] = 188 + 4,
954         [4] = 2048 + 4,
955     };
956 
957     uint8_t media = r->req.cmd.buf[1];
958     uint8_t layer = r->req.cmd.buf[6];
959     uint8_t format = r->req.cmd.buf[7];
960     int size = -1;
961 
962     if (s->qdev.type != TYPE_ROM) {
963         return -1;
964     }
965     if (media != 0) {
966         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
967         return -1;
968     }
969 
970     if (format != 0xff) {
971         if (!blk_is_available(s->qdev.conf.blk)) {
972             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
973             return -1;
974         }
975         if (media_is_cd(s)) {
976             scsi_check_condition(r, SENSE_CODE(INCOMPATIBLE_FORMAT));
977             return -1;
978         }
979         if (format >= ARRAY_SIZE(rds_caps_size)) {
980             return -1;
981         }
982         size = rds_caps_size[format];
983         memset(outbuf, 0, size);
984     }
985 
986     switch (format) {
987     case 0x00: {
988         /* Physical format information */
989         uint64_t nb_sectors;
990         if (layer != 0) {
991             goto fail;
992         }
993         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
994 
995         outbuf[4] = 1;   /* DVD-ROM, part version 1 */
996         outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */
997         outbuf[6] = 1;   /* one layer, read-only (per MMC-2 spec) */
998         outbuf[7] = 0;   /* default densities */
999 
1000         stl_be_p(&outbuf[12], (nb_sectors >> 2) - 1); /* end sector */
1001         stl_be_p(&outbuf[16], (nb_sectors >> 2) - 1); /* l0 end sector */
1002         break;
1003     }
1004 
1005     case 0x01: /* DVD copyright information, all zeros */
1006         break;
1007 
1008     case 0x03: /* BCA information - invalid field for no BCA info */
1009         return -1;
1010 
1011     case 0x04: /* DVD disc manufacturing information, all zeros */
1012         break;
1013 
1014     case 0xff: { /* List capabilities */
1015         int i;
1016         size = 4;
1017         for (i = 0; i < ARRAY_SIZE(rds_caps_size); i++) {
1018             if (!rds_caps_size[i]) {
1019                 continue;
1020             }
1021             outbuf[size] = i;
1022             outbuf[size + 1] = 0x40; /* Not writable, readable */
1023             stw_be_p(&outbuf[size + 2], rds_caps_size[i]);
1024             size += 4;
1025         }
1026         break;
1027      }
1028 
1029     default:
1030         return -1;
1031     }
1032 
1033     /* Size of buffer, not including 2 byte size field */
1034     stw_be_p(outbuf, size - 2);
1035     return size;
1036 
1037 fail:
1038     return -1;
1039 }
1040 
1041 static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf)
1042 {
1043     uint8_t event_code, media_status;
1044 
1045     media_status = 0;
1046     if (s->tray_open) {
1047         media_status = MS_TRAY_OPEN;
1048     } else if (blk_is_inserted(s->qdev.conf.blk)) {
1049         media_status = MS_MEDIA_PRESENT;
1050     }
1051 
1052     /* Event notification descriptor */
1053     event_code = MEC_NO_CHANGE;
1054     if (media_status != MS_TRAY_OPEN) {
1055         if (s->media_event) {
1056             event_code = MEC_NEW_MEDIA;
1057             s->media_event = false;
1058         } else if (s->eject_request) {
1059             event_code = MEC_EJECT_REQUESTED;
1060             s->eject_request = false;
1061         }
1062     }
1063 
1064     outbuf[0] = event_code;
1065     outbuf[1] = media_status;
1066 
1067     /* These fields are reserved, just clear them. */
1068     outbuf[2] = 0;
1069     outbuf[3] = 0;
1070     return 4;
1071 }
1072 
1073 static int scsi_get_event_status_notification(SCSIDiskState *s, SCSIDiskReq *r,
1074                                               uint8_t *outbuf)
1075 {
1076     int size;
1077     uint8_t *buf = r->req.cmd.buf;
1078     uint8_t notification_class_request = buf[4];
1079     if (s->qdev.type != TYPE_ROM) {
1080         return -1;
1081     }
1082     if ((buf[1] & 1) == 0) {
1083         /* asynchronous */
1084         return -1;
1085     }
1086 
1087     size = 4;
1088     outbuf[0] = outbuf[1] = 0;
1089     outbuf[3] = 1 << GESN_MEDIA; /* supported events */
1090     if (notification_class_request & (1 << GESN_MEDIA)) {
1091         outbuf[2] = GESN_MEDIA;
1092         size += scsi_event_status_media(s, &outbuf[size]);
1093     } else {
1094         outbuf[2] = 0x80;
1095     }
1096     stw_be_p(outbuf, size - 4);
1097     return size;
1098 }
1099 
1100 static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf)
1101 {
1102     int current;
1103 
1104     if (s->qdev.type != TYPE_ROM) {
1105         return -1;
1106     }
1107 
1108     if (media_is_dvd(s)) {
1109         current = MMC_PROFILE_DVD_ROM;
1110     } else if (media_is_cd(s)) {
1111         current = MMC_PROFILE_CD_ROM;
1112     } else {
1113         current = MMC_PROFILE_NONE;
1114     }
1115 
1116     memset(outbuf, 0, 40);
1117     stl_be_p(&outbuf[0], 36); /* Bytes after the data length field */
1118     stw_be_p(&outbuf[6], current);
1119     /* outbuf[8] - outbuf[19]: Feature 0 - Profile list */
1120     outbuf[10] = 0x03; /* persistent, current */
1121     outbuf[11] = 8; /* two profiles */
1122     stw_be_p(&outbuf[12], MMC_PROFILE_DVD_ROM);
1123     outbuf[14] = (current == MMC_PROFILE_DVD_ROM);
1124     stw_be_p(&outbuf[16], MMC_PROFILE_CD_ROM);
1125     outbuf[18] = (current == MMC_PROFILE_CD_ROM);
1126     /* outbuf[20] - outbuf[31]: Feature 1 - Core feature */
1127     stw_be_p(&outbuf[20], 1);
1128     outbuf[22] = 0x08 | 0x03; /* version 2, persistent, current */
1129     outbuf[23] = 8;
1130     stl_be_p(&outbuf[24], 1); /* SCSI */
1131     outbuf[28] = 1; /* DBE = 1, mandatory */
1132     /* outbuf[32] - outbuf[39]: Feature 3 - Removable media feature */
1133     stw_be_p(&outbuf[32], 3);
1134     outbuf[34] = 0x08 | 0x03; /* version 2, persistent, current */
1135     outbuf[35] = 4;
1136     outbuf[36] = 0x39; /* tray, load=1, eject=1, unlocked at powerup, lock=1 */
1137     /* TODO: Random readable, CD read, DVD read, drive serial number,
1138        power management */
1139     return 40;
1140 }
1141 
1142 static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
1143 {
1144     if (s->qdev.type != TYPE_ROM) {
1145         return -1;
1146     }
1147     memset(outbuf, 0, 8);
1148     outbuf[5] = 1; /* CD-ROM */
1149     return 8;
1150 }
1151 
1152 static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
1153                            int page_control)
1154 {
1155     static const int mode_sense_valid[0x3f] = {
1156         [MODE_PAGE_VENDOR_SPECIFIC]        = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1157         [MODE_PAGE_HD_GEOMETRY]            = (1 << TYPE_DISK),
1158         [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK),
1159         [MODE_PAGE_CACHING]                = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1160         [MODE_PAGE_R_W_ERROR]              = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1161         [MODE_PAGE_AUDIO_CTL]              = (1 << TYPE_ROM),
1162         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
1163         [MODE_PAGE_APPLE_VENDOR]           = (1 << TYPE_ROM),
1164     };
1165 
1166     uint8_t *p = *p_outbuf + 2;
1167     int length;
1168 
1169     assert(page < ARRAY_SIZE(mode_sense_valid));
1170     if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
1171         return -1;
1172     }
1173 
1174     /*
1175      * If Changeable Values are requested, a mask denoting those mode parameters
1176      * that are changeable shall be returned. As we currently don't support
1177      * parameter changes via MODE_SELECT all bits are returned set to zero.
1178      * The buffer was already menset to zero by the caller of this function.
1179      *
1180      * The offsets here are off by two compared to the descriptions in the
1181      * SCSI specs, because those include a 2-byte header.  This is unfortunate,
1182      * but it is done so that offsets are consistent within our implementation
1183      * of MODE SENSE and MODE SELECT.  MODE SELECT has to deal with both
1184      * 2-byte and 4-byte headers.
1185      */
1186     switch (page) {
1187     case MODE_PAGE_HD_GEOMETRY:
1188         length = 0x16;
1189         if (page_control == 1) { /* Changeable Values */
1190             break;
1191         }
1192         /* if a geometry hint is available, use it */
1193         p[0] = (s->qdev.conf.cyls >> 16) & 0xff;
1194         p[1] = (s->qdev.conf.cyls >> 8) & 0xff;
1195         p[2] = s->qdev.conf.cyls & 0xff;
1196         p[3] = s->qdev.conf.heads & 0xff;
1197         /* Write precomp start cylinder, disabled */
1198         p[4] = (s->qdev.conf.cyls >> 16) & 0xff;
1199         p[5] = (s->qdev.conf.cyls >> 8) & 0xff;
1200         p[6] = s->qdev.conf.cyls & 0xff;
1201         /* Reduced current start cylinder, disabled */
1202         p[7] = (s->qdev.conf.cyls >> 16) & 0xff;
1203         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1204         p[9] = s->qdev.conf.cyls & 0xff;
1205         /* Device step rate [ns], 200ns */
1206         p[10] = 0;
1207         p[11] = 200;
1208         /* Landing zone cylinder */
1209         p[12] = 0xff;
1210         p[13] =  0xff;
1211         p[14] = 0xff;
1212         /* Medium rotation rate [rpm], 5400 rpm */
1213         p[18] = (5400 >> 8) & 0xff;
1214         p[19] = 5400 & 0xff;
1215         break;
1216 
1217     case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
1218         length = 0x1e;
1219         if (page_control == 1) { /* Changeable Values */
1220             break;
1221         }
1222         /* Transfer rate [kbit/s], 5Mbit/s */
1223         p[0] = 5000 >> 8;
1224         p[1] = 5000 & 0xff;
1225         /* if a geometry hint is available, use it */
1226         p[2] = s->qdev.conf.heads & 0xff;
1227         p[3] = s->qdev.conf.secs & 0xff;
1228         p[4] = s->qdev.blocksize >> 8;
1229         p[6] = (s->qdev.conf.cyls >> 8) & 0xff;
1230         p[7] = s->qdev.conf.cyls & 0xff;
1231         /* Write precomp start cylinder, disabled */
1232         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1233         p[9] = s->qdev.conf.cyls & 0xff;
1234         /* Reduced current start cylinder, disabled */
1235         p[10] = (s->qdev.conf.cyls >> 8) & 0xff;
1236         p[11] = s->qdev.conf.cyls & 0xff;
1237         /* Device step rate [100us], 100us */
1238         p[12] = 0;
1239         p[13] = 1;
1240         /* Device step pulse width [us], 1us */
1241         p[14] = 1;
1242         /* Device head settle delay [100us], 100us */
1243         p[15] = 0;
1244         p[16] = 1;
1245         /* Motor on delay [0.1s], 0.1s */
1246         p[17] = 1;
1247         /* Motor off delay [0.1s], 0.1s */
1248         p[18] = 1;
1249         /* Medium rotation rate [rpm], 5400 rpm */
1250         p[26] = (5400 >> 8) & 0xff;
1251         p[27] = 5400 & 0xff;
1252         break;
1253 
1254     case MODE_PAGE_CACHING:
1255         length = 0x12;
1256         if (page_control == 1 || /* Changeable Values */
1257             blk_enable_write_cache(s->qdev.conf.blk)) {
1258             p[0] = 4; /* WCE */
1259         }
1260         break;
1261 
1262     case MODE_PAGE_R_W_ERROR:
1263         length = 10;
1264         if (page_control == 1) { /* Changeable Values */
1265             if (s->qdev.type == TYPE_ROM) {
1266                 /* Automatic Write Reallocation Enabled */
1267                 p[0] = 0x80;
1268             }
1269             break;
1270         }
1271         p[0] = 0x80; /* Automatic Write Reallocation Enabled */
1272         if (s->qdev.type == TYPE_ROM) {
1273             p[1] = 0x20; /* Read Retry Count */
1274         }
1275         break;
1276 
1277     case MODE_PAGE_AUDIO_CTL:
1278         length = 14;
1279         break;
1280 
1281     case MODE_PAGE_CAPABILITIES:
1282         length = 0x14;
1283         if (page_control == 1) { /* Changeable Values */
1284             break;
1285         }
1286 
1287         p[0] = 0x3b; /* CD-R & CD-RW read */
1288         p[1] = 0; /* Writing not supported */
1289         p[2] = 0x7f; /* Audio, composite, digital out,
1290                         mode 2 form 1&2, multi session */
1291         p[3] = 0xff; /* CD DA, DA accurate, RW supported,
1292                         RW corrected, C2 errors, ISRC,
1293                         UPC, Bar code */
1294         p[4] = 0x2d | (s->tray_locked ? 2 : 0);
1295         /* Locking supported, jumper present, eject, tray */
1296         p[5] = 0; /* no volume & mute control, no
1297                      changer */
1298         p[6] = (50 * 176) >> 8; /* 50x read speed */
1299         p[7] = (50 * 176) & 0xff;
1300         p[8] = 2 >> 8; /* Two volume levels */
1301         p[9] = 2 & 0xff;
1302         p[10] = 2048 >> 8; /* 2M buffer */
1303         p[11] = 2048 & 0xff;
1304         p[12] = (16 * 176) >> 8; /* 16x read speed current */
1305         p[13] = (16 * 176) & 0xff;
1306         p[16] = (16 * 176) >> 8; /* 16x write speed */
1307         p[17] = (16 * 176) & 0xff;
1308         p[18] = (16 * 176) >> 8; /* 16x write speed current */
1309         p[19] = (16 * 176) & 0xff;
1310         break;
1311 
1312      case MODE_PAGE_APPLE_VENDOR:
1313         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR)) {
1314             length = 0x1e;
1315             if (page_control == 1) { /* Changeable Values */
1316                 break;
1317             }
1318 
1319             memset(p, 0, length);
1320             strcpy((char *)p + 8, "APPLE COMPUTER, INC   ");
1321             break;
1322         } else {
1323             return -1;
1324         }
1325 
1326     case MODE_PAGE_VENDOR_SPECIFIC:
1327         if (s->qdev.type == TYPE_DISK && (s->quirks &
1328             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1329             length = 0x2;
1330             if (page_control == 1) { /* Changeable Values */
1331                 p[0] = 0xff;
1332                 p[1] = 0xff;
1333                 break;
1334             }
1335             p[0] = 0;
1336             p[1] = 0;
1337             break;
1338         } else {
1339             return -1;
1340         }
1341 
1342     default:
1343         return -1;
1344     }
1345 
1346     assert(length < 256);
1347     (*p_outbuf)[0] = page;
1348     (*p_outbuf)[1] = length;
1349     *p_outbuf += length + 2;
1350     return length + 2;
1351 }
1352 
1353 static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
1354 {
1355     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1356     uint64_t nb_sectors;
1357     bool dbd;
1358     int page, buflen, ret, page_control;
1359     uint8_t *p;
1360     uint8_t dev_specific_param;
1361 
1362     dbd = (r->req.cmd.buf[1] & 0x8) != 0;
1363     page = r->req.cmd.buf[2] & 0x3f;
1364     page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
1365 
1366     trace_scsi_disk_emulate_mode_sense((r->req.cmd.buf[0] == MODE_SENSE) ? 6 :
1367                                        10, page, r->req.cmd.xfer, page_control);
1368     memset(outbuf, 0, r->req.cmd.xfer);
1369     p = outbuf;
1370 
1371     if (s->qdev.type == TYPE_DISK) {
1372         dev_specific_param = s->features & (1 << SCSI_DISK_F_DPOFUA) ? 0x10 : 0;
1373         if (!blk_is_writable(s->qdev.conf.blk)) {
1374             dev_specific_param |= 0x80; /* Readonly.  */
1375         }
1376     } else {
1377         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD)) {
1378             /* Use DBD from the request... */
1379             dev_specific_param = 0x00;
1380 
1381             /*
1382              * ... unless we receive a request for MODE_PAGE_APPLE_VENDOR
1383              * which should never return a block descriptor even though DBD is
1384              * not set, otherwise CDROM detection fails in MacOS
1385              */
1386             if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR) &&
1387                 page == MODE_PAGE_APPLE_VENDOR) {
1388                 dbd = true;
1389             }
1390         } else {
1391             /*
1392              * MMC prescribes that CD/DVD drives have no block descriptors,
1393              * and defines no device-specific parameter.
1394              */
1395             dev_specific_param = 0x00;
1396             dbd = true;
1397         }
1398     }
1399 
1400     if (r->req.cmd.buf[0] == MODE_SENSE) {
1401         p[1] = 0; /* Default media type.  */
1402         p[2] = dev_specific_param;
1403         p[3] = 0; /* Block descriptor length.  */
1404         p += 4;
1405     } else { /* MODE_SENSE_10 */
1406         p[2] = 0; /* Default media type.  */
1407         p[3] = dev_specific_param;
1408         p[6] = p[7] = 0; /* Block descriptor length.  */
1409         p += 8;
1410     }
1411 
1412     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1413     if (!dbd && nb_sectors) {
1414         if (r->req.cmd.buf[0] == MODE_SENSE) {
1415             outbuf[3] = 8; /* Block descriptor length  */
1416         } else { /* MODE_SENSE_10 */
1417             outbuf[7] = 8; /* Block descriptor length  */
1418         }
1419         nb_sectors /= (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1420         if (nb_sectors > 0xffffff) {
1421             nb_sectors = 0;
1422         }
1423         p[0] = 0; /* media density code */
1424         p[1] = (nb_sectors >> 16) & 0xff;
1425         p[2] = (nb_sectors >> 8) & 0xff;
1426         p[3] = nb_sectors & 0xff;
1427         p[4] = 0; /* reserved */
1428         p[5] = 0; /* bytes 5-7 are the sector size in bytes */
1429         p[6] = s->qdev.blocksize >> 8;
1430         p[7] = 0;
1431         p += 8;
1432     }
1433 
1434     if (page_control == 3) {
1435         /* Saved Values */
1436         scsi_check_condition(r, SENSE_CODE(SAVING_PARAMS_NOT_SUPPORTED));
1437         return -1;
1438     }
1439 
1440     if (page == 0x3f) {
1441         for (page = 0; page <= 0x3e; page++) {
1442             mode_sense_page(s, page, &p, page_control);
1443         }
1444     } else {
1445         ret = mode_sense_page(s, page, &p, page_control);
1446         if (ret == -1) {
1447             return -1;
1448         }
1449     }
1450 
1451     buflen = p - outbuf;
1452     /*
1453      * The mode data length field specifies the length in bytes of the
1454      * following data that is available to be transferred. The mode data
1455      * length does not include itself.
1456      */
1457     if (r->req.cmd.buf[0] == MODE_SENSE) {
1458         outbuf[0] = buflen - 1;
1459     } else { /* MODE_SENSE_10 */
1460         outbuf[0] = ((buflen - 2) >> 8) & 0xff;
1461         outbuf[1] = (buflen - 2) & 0xff;
1462     }
1463     return buflen;
1464 }
1465 
1466 static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
1467 {
1468     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1469     int start_track, format, msf, toclen;
1470     uint64_t nb_sectors;
1471 
1472     msf = req->cmd.buf[1] & 2;
1473     format = req->cmd.buf[2] & 0xf;
1474     start_track = req->cmd.buf[6];
1475     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1476     trace_scsi_disk_emulate_read_toc(start_track, format, msf >> 1);
1477     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
1478     switch (format) {
1479     case 0:
1480         toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track);
1481         break;
1482     case 1:
1483         /* multi session : only a single session defined */
1484         toclen = 12;
1485         memset(outbuf, 0, 12);
1486         outbuf[1] = 0x0a;
1487         outbuf[2] = 0x01;
1488         outbuf[3] = 0x01;
1489         break;
1490     case 2:
1491         toclen = cdrom_read_toc_raw(nb_sectors, outbuf, msf, start_track);
1492         break;
1493     default:
1494         return -1;
1495     }
1496     return toclen;
1497 }
1498 
1499 static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
1500 {
1501     SCSIRequest *req = &r->req;
1502     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1503     bool start = req->cmd.buf[4] & 1;
1504     bool loej = req->cmd.buf[4] & 2; /* load on start, eject on !start */
1505     int pwrcnd = req->cmd.buf[4] & 0xf0;
1506 
1507     if (pwrcnd) {
1508         /* eject/load only happens for power condition == 0 */
1509         return 0;
1510     }
1511 
1512     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && loej) {
1513         if (!start && !s->tray_open && s->tray_locked) {
1514             scsi_check_condition(r,
1515                                  blk_is_inserted(s->qdev.conf.blk)
1516                                  ? SENSE_CODE(ILLEGAL_REQ_REMOVAL_PREVENTED)
1517                                  : SENSE_CODE(NOT_READY_REMOVAL_PREVENTED));
1518             return -1;
1519         }
1520 
1521         if (s->tray_open != !start) {
1522             blk_eject(s->qdev.conf.blk, !start);
1523             s->tray_open = !start;
1524         }
1525     }
1526     return 0;
1527 }
1528 
1529 static void scsi_disk_emulate_read_data(SCSIRequest *req)
1530 {
1531     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1532     int buflen = r->iov.iov_len;
1533 
1534     if (buflen) {
1535         trace_scsi_disk_emulate_read_data(buflen);
1536         r->iov.iov_len = 0;
1537         r->started = true;
1538         scsi_req_data(&r->req, buflen);
1539         return;
1540     }
1541 
1542     /* This also clears the sense buffer for REQUEST SENSE.  */
1543     scsi_req_complete(&r->req, GOOD);
1544 }
1545 
1546 static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
1547                                        uint8_t *inbuf, int inlen)
1548 {
1549     uint8_t mode_current[SCSI_MAX_MODE_LEN];
1550     uint8_t mode_changeable[SCSI_MAX_MODE_LEN];
1551     uint8_t *p;
1552     int len, expected_len, changeable_len, i;
1553 
1554     /* The input buffer does not include the page header, so it is
1555      * off by 2 bytes.
1556      */
1557     expected_len = inlen + 2;
1558     if (expected_len > SCSI_MAX_MODE_LEN) {
1559         return -1;
1560     }
1561 
1562     /* MODE_PAGE_ALLS is only valid for MODE SENSE commands */
1563     if (page == MODE_PAGE_ALLS) {
1564         return -1;
1565     }
1566 
1567     p = mode_current;
1568     memset(mode_current, 0, inlen + 2);
1569     len = mode_sense_page(s, page, &p, 0);
1570     if (len < 0 || len != expected_len) {
1571         return -1;
1572     }
1573 
1574     p = mode_changeable;
1575     memset(mode_changeable, 0, inlen + 2);
1576     changeable_len = mode_sense_page(s, page, &p, 1);
1577     assert(changeable_len == len);
1578 
1579     /* Check that unchangeable bits are the same as what MODE SENSE
1580      * would return.
1581      */
1582     for (i = 2; i < len; i++) {
1583         if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) {
1584             return -1;
1585         }
1586     }
1587     return 0;
1588 }
1589 
1590 static void scsi_disk_apply_mode_select(SCSIDiskState *s, int page, uint8_t *p)
1591 {
1592     switch (page) {
1593     case MODE_PAGE_CACHING:
1594         blk_set_enable_write_cache(s->qdev.conf.blk, (p[0] & 4) != 0);
1595         break;
1596 
1597     default:
1598         break;
1599     }
1600 }
1601 
1602 static int mode_select_pages(SCSIDiskReq *r, uint8_t *p, int len, bool change)
1603 {
1604     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1605 
1606     while (len > 0) {
1607         int page, subpage, page_len;
1608 
1609         /* Parse both possible formats for the mode page headers.  */
1610         page = p[0] & 0x3f;
1611         if (p[0] & 0x40) {
1612             if (len < 4) {
1613                 goto invalid_param_len;
1614             }
1615             subpage = p[1];
1616             page_len = lduw_be_p(&p[2]);
1617             p += 4;
1618             len -= 4;
1619         } else {
1620             if (len < 2) {
1621                 goto invalid_param_len;
1622             }
1623             subpage = 0;
1624             page_len = p[1];
1625             p += 2;
1626             len -= 2;
1627         }
1628 
1629         if (subpage) {
1630             goto invalid_param;
1631         }
1632         if (page_len > len) {
1633             if (!(s->quirks & SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED)) {
1634                 goto invalid_param_len;
1635             }
1636             trace_scsi_disk_mode_select_page_truncated(page, page_len, len);
1637         }
1638 
1639         if (!change) {
1640             if (scsi_disk_check_mode_select(s, page, p, page_len) < 0) {
1641                 goto invalid_param;
1642             }
1643         } else {
1644             scsi_disk_apply_mode_select(s, page, p);
1645         }
1646 
1647         p += page_len;
1648         len -= page_len;
1649     }
1650     return 0;
1651 
1652 invalid_param:
1653     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1654     return -1;
1655 
1656 invalid_param_len:
1657     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1658     return -1;
1659 }
1660 
1661 static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
1662 {
1663     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1664     uint8_t *p = inbuf;
1665     int cmd = r->req.cmd.buf[0];
1666     int len = r->req.cmd.xfer;
1667     int hdr_len = (cmd == MODE_SELECT ? 4 : 8);
1668     int bd_len, bs;
1669     int pass;
1670 
1671     if ((r->req.cmd.buf[1] & 0x11) != 0x10) {
1672         if (!(s->quirks &
1673             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1674             /* We only support PF=1, SP=0.  */
1675             goto invalid_field;
1676         }
1677     }
1678 
1679     if (len < hdr_len) {
1680         goto invalid_param_len;
1681     }
1682 
1683     bd_len = (cmd == MODE_SELECT ? p[3] : lduw_be_p(&p[6]));
1684     len -= hdr_len;
1685     p += hdr_len;
1686     if (len < bd_len) {
1687         goto invalid_param_len;
1688     }
1689     if (bd_len != 0 && bd_len != 8) {
1690         goto invalid_param;
1691     }
1692 
1693     /* Allow changing the block size */
1694     if (bd_len) {
1695         bs = p[5] << 16 | p[6] << 8 | p[7];
1696 
1697         /*
1698          * Since the existing code only checks/updates bits 8-15 of the block
1699          * size, restrict ourselves to the same requirement for now to ensure
1700          * that a block size set by a block descriptor and then read back by
1701          * a subsequent SCSI command will be the same. Also disallow a block
1702          * size of 256 since we cannot handle anything below BDRV_SECTOR_SIZE.
1703          */
1704         if (bs && !(bs & ~0xfe00) && bs != s->qdev.blocksize) {
1705             s->qdev.blocksize = bs;
1706             trace_scsi_disk_mode_select_set_blocksize(s->qdev.blocksize);
1707         }
1708     }
1709 
1710     len -= bd_len;
1711     p += bd_len;
1712 
1713     /* Ensure no change is made if there is an error!  */
1714     for (pass = 0; pass < 2; pass++) {
1715         if (mode_select_pages(r, p, len, pass == 1) < 0) {
1716             assert(pass == 0);
1717             return;
1718         }
1719     }
1720     if (!blk_enable_write_cache(s->qdev.conf.blk)) {
1721         /* The request is used as the AIO opaque value, so add a ref.  */
1722         scsi_req_ref(&r->req);
1723         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
1724                          BLOCK_ACCT_FLUSH);
1725         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
1726         return;
1727     }
1728 
1729     scsi_req_complete(&r->req, GOOD);
1730     return;
1731 
1732 invalid_param:
1733     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1734     return;
1735 
1736 invalid_param_len:
1737     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1738     return;
1739 
1740 invalid_field:
1741     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1742 }
1743 
1744 /* sector_num and nb_sectors expected to be in qdev blocksize */
1745 static inline bool check_lba_range(SCSIDiskState *s,
1746                                    uint64_t sector_num, uint32_t nb_sectors)
1747 {
1748     /*
1749      * The first line tests that no overflow happens when computing the last
1750      * sector.  The second line tests that the last accessed sector is in
1751      * range.
1752      *
1753      * Careful, the computations should not underflow for nb_sectors == 0,
1754      * and a 0-block read to the first LBA beyond the end of device is
1755      * valid.
1756      */
1757     return (sector_num <= sector_num + nb_sectors &&
1758             sector_num + nb_sectors <= s->qdev.max_lba + 1);
1759 }
1760 
1761 typedef struct UnmapCBData {
1762     SCSIDiskReq *r;
1763     uint8_t *inbuf;
1764     int count;
1765 } UnmapCBData;
1766 
1767 static void scsi_unmap_complete(void *opaque, int ret);
1768 
1769 static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
1770 {
1771     SCSIDiskReq *r = data->r;
1772     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1773 
1774     assert(r->req.aiocb == NULL);
1775 
1776     if (data->count > 0) {
1777         uint64_t sector_num = ldq_be_p(&data->inbuf[0]);
1778         uint32_t nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
1779         r->sector = sector_num * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1780         r->sector_count = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1781 
1782         if (!check_lba_range(s, sector_num, nb_sectors)) {
1783             block_acct_invalid(blk_get_stats(s->qdev.conf.blk),
1784                                BLOCK_ACCT_UNMAP);
1785             scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1786             goto done;
1787         }
1788 
1789         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1790                          r->sector_count * BDRV_SECTOR_SIZE,
1791                          BLOCK_ACCT_UNMAP);
1792 
1793         r->req.aiocb = blk_aio_pdiscard(s->qdev.conf.blk,
1794                                         r->sector * BDRV_SECTOR_SIZE,
1795                                         r->sector_count * BDRV_SECTOR_SIZE,
1796                                         scsi_unmap_complete, data);
1797         data->count--;
1798         data->inbuf += 16;
1799         return;
1800     }
1801 
1802     scsi_req_complete(&r->req, GOOD);
1803 
1804 done:
1805     scsi_req_unref(&r->req);
1806     g_free(data);
1807 }
1808 
1809 static void scsi_unmap_complete(void *opaque, int ret)
1810 {
1811     UnmapCBData *data = opaque;
1812     SCSIDiskReq *r = data->r;
1813     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1814 
1815     assert(r->req.aiocb != NULL);
1816     r->req.aiocb = NULL;
1817 
1818     if (scsi_disk_req_check_error(r, ret, true)) {
1819         scsi_req_unref(&r->req);
1820         g_free(data);
1821     } else {
1822         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1823         scsi_unmap_complete_noio(data, ret);
1824     }
1825 }
1826 
1827 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
1828 {
1829     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1830     uint8_t *p = inbuf;
1831     int len = r->req.cmd.xfer;
1832     UnmapCBData *data;
1833 
1834     /* Reject ANCHOR=1.  */
1835     if (r->req.cmd.buf[1] & 0x1) {
1836         goto invalid_field;
1837     }
1838 
1839     if (len < 8) {
1840         goto invalid_param_len;
1841     }
1842     if (len < lduw_be_p(&p[0]) + 2) {
1843         goto invalid_param_len;
1844     }
1845     if (len < lduw_be_p(&p[2]) + 8) {
1846         goto invalid_param_len;
1847     }
1848     if (lduw_be_p(&p[2]) & 15) {
1849         goto invalid_param_len;
1850     }
1851 
1852     if (!blk_is_writable(s->qdev.conf.blk)) {
1853         block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1854         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1855         return;
1856     }
1857 
1858     data = g_new0(UnmapCBData, 1);
1859     data->r = r;
1860     data->inbuf = &p[8];
1861     data->count = lduw_be_p(&p[2]) >> 4;
1862 
1863     /* The matching unref is in scsi_unmap_complete, before data is freed.  */
1864     scsi_req_ref(&r->req);
1865     scsi_unmap_complete_noio(data, 0);
1866     return;
1867 
1868 invalid_param_len:
1869     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1870     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1871     return;
1872 
1873 invalid_field:
1874     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1875     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1876 }
1877 
1878 typedef struct WriteSameCBData {
1879     SCSIDiskReq *r;
1880     int64_t sector;
1881     int nb_sectors;
1882     QEMUIOVector qiov;
1883     struct iovec iov;
1884 } WriteSameCBData;
1885 
1886 static void scsi_write_same_complete(void *opaque, int ret)
1887 {
1888     WriteSameCBData *data = opaque;
1889     SCSIDiskReq *r = data->r;
1890     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1891 
1892     assert(r->req.aiocb != NULL);
1893     r->req.aiocb = NULL;
1894 
1895     if (scsi_disk_req_check_error(r, ret, true)) {
1896         goto done;
1897     }
1898 
1899     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1900 
1901     data->nb_sectors -= data->iov.iov_len / BDRV_SECTOR_SIZE;
1902     data->sector += data->iov.iov_len / BDRV_SECTOR_SIZE;
1903     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1904                             data->iov.iov_len);
1905     if (data->iov.iov_len) {
1906         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1907                          data->iov.iov_len, BLOCK_ACCT_WRITE);
1908         /* Reinitialize qiov, to handle unaligned WRITE SAME request
1909          * where final qiov may need smaller size */
1910         qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1911         r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1912                                        data->sector << BDRV_SECTOR_BITS,
1913                                        &data->qiov, 0,
1914                                        scsi_write_same_complete, data);
1915         return;
1916     }
1917 
1918     scsi_req_complete(&r->req, GOOD);
1919 
1920 done:
1921     scsi_req_unref(&r->req);
1922     qemu_vfree(data->iov.iov_base);
1923     g_free(data);
1924 }
1925 
1926 static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
1927 {
1928     SCSIRequest *req = &r->req;
1929     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1930     uint32_t nb_sectors = scsi_data_cdb_xfer(r->req.cmd.buf);
1931     WriteSameCBData *data;
1932     uint8_t *buf;
1933     int i, l;
1934 
1935     /* Fail if PBDATA=1 or LBDATA=1 or ANCHOR=1.  */
1936     if (nb_sectors == 0 || (req->cmd.buf[1] & 0x16)) {
1937         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1938         return;
1939     }
1940 
1941     if (!blk_is_writable(s->qdev.conf.blk)) {
1942         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1943         return;
1944     }
1945     if (!check_lba_range(s, r->req.cmd.lba, nb_sectors)) {
1946         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1947         return;
1948     }
1949 
1950     if ((req->cmd.buf[1] & 0x1) || buffer_is_zero(inbuf, s->qdev.blocksize)) {
1951         int flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0;
1952 
1953         /* The request is used as the AIO opaque value, so add a ref.  */
1954         scsi_req_ref(&r->req);
1955         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1956                          nb_sectors * s->qdev.blocksize,
1957                         BLOCK_ACCT_WRITE);
1958         r->req.aiocb = blk_aio_pwrite_zeroes(s->qdev.conf.blk,
1959                                 r->req.cmd.lba * s->qdev.blocksize,
1960                                 nb_sectors * s->qdev.blocksize,
1961                                 flags, scsi_aio_complete, r);
1962         return;
1963     }
1964 
1965     data = g_new0(WriteSameCBData, 1);
1966     data->r = r;
1967     data->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1968     data->nb_sectors = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1969     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1970                             SCSI_WRITE_SAME_MAX);
1971     data->iov.iov_base = buf = blk_blockalign(s->qdev.conf.blk,
1972                                               data->iov.iov_len);
1973     qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1974 
1975     for (i = 0; i < data->iov.iov_len; i += l) {
1976         l = MIN(s->qdev.blocksize, data->iov.iov_len - i);
1977         memcpy(&buf[i], inbuf, l);
1978     }
1979 
1980     scsi_req_ref(&r->req);
1981     block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1982                      data->iov.iov_len, BLOCK_ACCT_WRITE);
1983     r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1984                                    data->sector << BDRV_SECTOR_BITS,
1985                                    &data->qiov, 0,
1986                                    scsi_write_same_complete, data);
1987 }
1988 
1989 static void scsi_disk_emulate_write_data(SCSIRequest *req)
1990 {
1991     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1992 
1993     if (r->iov.iov_len) {
1994         int buflen = r->iov.iov_len;
1995         trace_scsi_disk_emulate_write_data(buflen);
1996         r->iov.iov_len = 0;
1997         scsi_req_data(&r->req, buflen);
1998         return;
1999     }
2000 
2001     switch (req->cmd.buf[0]) {
2002     case MODE_SELECT:
2003     case MODE_SELECT_10:
2004         /* This also clears the sense buffer for REQUEST SENSE.  */
2005         scsi_disk_emulate_mode_select(r, r->iov.iov_base);
2006         break;
2007 
2008     case UNMAP:
2009         scsi_disk_emulate_unmap(r, r->iov.iov_base);
2010         break;
2011 
2012     case VERIFY_10:
2013     case VERIFY_12:
2014     case VERIFY_16:
2015         if (r->req.status == -1) {
2016             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2017         }
2018         break;
2019 
2020     case WRITE_SAME_10:
2021     case WRITE_SAME_16:
2022         scsi_disk_emulate_write_same(r, r->iov.iov_base);
2023         break;
2024 
2025     case FORMAT_UNIT:
2026         scsi_req_complete(&r->req, GOOD);
2027         break;
2028 
2029     default:
2030         abort();
2031     }
2032 }
2033 
2034 static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
2035 {
2036     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2037     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2038     uint64_t nb_sectors;
2039     uint8_t *outbuf;
2040     int buflen;
2041 
2042     switch (req->cmd.buf[0]) {
2043     case INQUIRY:
2044     case MODE_SENSE:
2045     case MODE_SENSE_10:
2046     case RESERVE:
2047     case RESERVE_10:
2048     case RELEASE:
2049     case RELEASE_10:
2050     case START_STOP:
2051     case ALLOW_MEDIUM_REMOVAL:
2052     case GET_CONFIGURATION:
2053     case GET_EVENT_STATUS_NOTIFICATION:
2054     case MECHANISM_STATUS:
2055     case REQUEST_SENSE:
2056         break;
2057 
2058     default:
2059         if (!blk_is_available(s->qdev.conf.blk)) {
2060             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2061             return 0;
2062         }
2063         break;
2064     }
2065 
2066     /*
2067      * FIXME: we shouldn't return anything bigger than 4k, but the code
2068      * requires the buffer to be as big as req->cmd.xfer in several
2069      * places.  So, do not allow CDBs with a very large ALLOCATION
2070      * LENGTH.  The real fix would be to modify scsi_read_data and
2071      * dma_buf_read, so that they return data beyond the buflen
2072      * as all zeros.
2073      */
2074     if (req->cmd.xfer > 65536) {
2075         goto illegal_request;
2076     }
2077     r->buflen = MAX(4096, req->cmd.xfer);
2078 
2079     if (!r->iov.iov_base) {
2080         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
2081     }
2082 
2083     outbuf = r->iov.iov_base;
2084     memset(outbuf, 0, r->buflen);
2085     switch (req->cmd.buf[0]) {
2086     case TEST_UNIT_READY:
2087         assert(blk_is_available(s->qdev.conf.blk));
2088         break;
2089     case INQUIRY:
2090         buflen = scsi_disk_emulate_inquiry(req, outbuf);
2091         if (buflen < 0) {
2092             goto illegal_request;
2093         }
2094         break;
2095     case MODE_SENSE:
2096     case MODE_SENSE_10:
2097         buflen = scsi_disk_emulate_mode_sense(r, outbuf);
2098         if (buflen < 0) {
2099             goto illegal_request;
2100         }
2101         break;
2102     case READ_TOC:
2103         buflen = scsi_disk_emulate_read_toc(req, outbuf);
2104         if (buflen < 0) {
2105             goto illegal_request;
2106         }
2107         break;
2108     case RESERVE:
2109         if (req->cmd.buf[1] & 1) {
2110             goto illegal_request;
2111         }
2112         break;
2113     case RESERVE_10:
2114         if (req->cmd.buf[1] & 3) {
2115             goto illegal_request;
2116         }
2117         break;
2118     case RELEASE:
2119         if (req->cmd.buf[1] & 1) {
2120             goto illegal_request;
2121         }
2122         break;
2123     case RELEASE_10:
2124         if (req->cmd.buf[1] & 3) {
2125             goto illegal_request;
2126         }
2127         break;
2128     case START_STOP:
2129         if (scsi_disk_emulate_start_stop(r) < 0) {
2130             return 0;
2131         }
2132         break;
2133     case ALLOW_MEDIUM_REMOVAL:
2134         s->tray_locked = req->cmd.buf[4] & 1;
2135         blk_lock_medium(s->qdev.conf.blk, req->cmd.buf[4] & 1);
2136         break;
2137     case READ_CAPACITY_10:
2138         /* The normal LEN field for this command is zero.  */
2139         memset(outbuf, 0, 8);
2140         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2141         if (!nb_sectors) {
2142             scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2143             return 0;
2144         }
2145         if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) {
2146             goto illegal_request;
2147         }
2148         nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2149         /* Returned value is the address of the last sector.  */
2150         nb_sectors--;
2151         /* Remember the new size for read/write sanity checking. */
2152         s->qdev.max_lba = nb_sectors;
2153         /* Clip to 2TB, instead of returning capacity modulo 2TB. */
2154         if (nb_sectors > UINT32_MAX) {
2155             nb_sectors = UINT32_MAX;
2156         }
2157         outbuf[0] = (nb_sectors >> 24) & 0xff;
2158         outbuf[1] = (nb_sectors >> 16) & 0xff;
2159         outbuf[2] = (nb_sectors >> 8) & 0xff;
2160         outbuf[3] = nb_sectors & 0xff;
2161         outbuf[4] = 0;
2162         outbuf[5] = 0;
2163         outbuf[6] = s->qdev.blocksize >> 8;
2164         outbuf[7] = 0;
2165         break;
2166     case REQUEST_SENSE:
2167         /* Just return "NO SENSE".  */
2168         buflen = scsi_convert_sense(NULL, 0, outbuf, r->buflen,
2169                                     (req->cmd.buf[1] & 1) == 0);
2170         if (buflen < 0) {
2171             goto illegal_request;
2172         }
2173         break;
2174     case MECHANISM_STATUS:
2175         buflen = scsi_emulate_mechanism_status(s, outbuf);
2176         if (buflen < 0) {
2177             goto illegal_request;
2178         }
2179         break;
2180     case GET_CONFIGURATION:
2181         buflen = scsi_get_configuration(s, outbuf);
2182         if (buflen < 0) {
2183             goto illegal_request;
2184         }
2185         break;
2186     case GET_EVENT_STATUS_NOTIFICATION:
2187         buflen = scsi_get_event_status_notification(s, r, outbuf);
2188         if (buflen < 0) {
2189             goto illegal_request;
2190         }
2191         break;
2192     case READ_DISC_INFORMATION:
2193         buflen = scsi_read_disc_information(s, r, outbuf);
2194         if (buflen < 0) {
2195             goto illegal_request;
2196         }
2197         break;
2198     case READ_DVD_STRUCTURE:
2199         buflen = scsi_read_dvd_structure(s, r, outbuf);
2200         if (buflen < 0) {
2201             goto illegal_request;
2202         }
2203         break;
2204     case SERVICE_ACTION_IN_16:
2205         /* Service Action In subcommands. */
2206         if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
2207             trace_scsi_disk_emulate_command_SAI_16();
2208             memset(outbuf, 0, req->cmd.xfer);
2209             blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2210             if (!nb_sectors) {
2211                 scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2212                 return 0;
2213             }
2214             if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) {
2215                 goto illegal_request;
2216             }
2217             nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2218             /* Returned value is the address of the last sector.  */
2219             nb_sectors--;
2220             /* Remember the new size for read/write sanity checking. */
2221             s->qdev.max_lba = nb_sectors;
2222             outbuf[0] = (nb_sectors >> 56) & 0xff;
2223             outbuf[1] = (nb_sectors >> 48) & 0xff;
2224             outbuf[2] = (nb_sectors >> 40) & 0xff;
2225             outbuf[3] = (nb_sectors >> 32) & 0xff;
2226             outbuf[4] = (nb_sectors >> 24) & 0xff;
2227             outbuf[5] = (nb_sectors >> 16) & 0xff;
2228             outbuf[6] = (nb_sectors >> 8) & 0xff;
2229             outbuf[7] = nb_sectors & 0xff;
2230             outbuf[8] = 0;
2231             outbuf[9] = 0;
2232             outbuf[10] = s->qdev.blocksize >> 8;
2233             outbuf[11] = 0;
2234             outbuf[12] = 0;
2235             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
2236 
2237             /* set TPE bit if the format supports discard */
2238             if (s->qdev.conf.discard_granularity) {
2239                 outbuf[14] = 0x80;
2240             }
2241 
2242             /* Protection, exponent and lowest lba field left blank. */
2243             break;
2244         }
2245         trace_scsi_disk_emulate_command_SAI_unsupported();
2246         goto illegal_request;
2247     case SYNCHRONIZE_CACHE:
2248         /* The request is used as the AIO opaque value, so add a ref.  */
2249         scsi_req_ref(&r->req);
2250         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
2251                          BLOCK_ACCT_FLUSH);
2252         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
2253         return 0;
2254     case SEEK_10:
2255         trace_scsi_disk_emulate_command_SEEK_10(r->req.cmd.lba);
2256         if (r->req.cmd.lba > s->qdev.max_lba) {
2257             goto illegal_lba;
2258         }
2259         break;
2260     case MODE_SELECT:
2261         trace_scsi_disk_emulate_command_MODE_SELECT(r->req.cmd.xfer);
2262         break;
2263     case MODE_SELECT_10:
2264         trace_scsi_disk_emulate_command_MODE_SELECT_10(r->req.cmd.xfer);
2265         break;
2266     case UNMAP:
2267         trace_scsi_disk_emulate_command_UNMAP(r->req.cmd.xfer);
2268         break;
2269     case VERIFY_10:
2270     case VERIFY_12:
2271     case VERIFY_16:
2272         trace_scsi_disk_emulate_command_VERIFY((req->cmd.buf[1] >> 1) & 3);
2273         if (req->cmd.buf[1] & 6) {
2274             goto illegal_request;
2275         }
2276         break;
2277     case WRITE_SAME_10:
2278     case WRITE_SAME_16:
2279         trace_scsi_disk_emulate_command_WRITE_SAME(
2280                 req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16, r->req.cmd.xfer);
2281         break;
2282     case FORMAT_UNIT:
2283         trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
2284         break;
2285     default:
2286         trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
2287                                                 scsi_command_name(buf[0]));
2288         scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
2289         return 0;
2290     }
2291     assert(!r->req.aiocb);
2292     r->iov.iov_len = MIN(r->buflen, req->cmd.xfer);
2293     if (r->iov.iov_len == 0) {
2294         scsi_req_complete(&r->req, GOOD);
2295     }
2296     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2297         assert(r->iov.iov_len == req->cmd.xfer);
2298         return -r->iov.iov_len;
2299     } else {
2300         return r->iov.iov_len;
2301     }
2302 
2303 illegal_request:
2304     if (r->req.status == -1) {
2305         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2306     }
2307     return 0;
2308 
2309 illegal_lba:
2310     scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2311     return 0;
2312 }
2313 
2314 /* Execute a scsi command.  Returns the length of the data expected by the
2315    command.  This will be Positive for data transfers from the device
2316    (eg. disk reads), negative for transfers to the device (eg. disk writes),
2317    and zero if the command does not transfer any data.  */
2318 
2319 static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
2320 {
2321     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2322     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2323     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
2324     uint32_t len;
2325     uint8_t command;
2326 
2327     command = buf[0];
2328 
2329     if (!blk_is_available(s->qdev.conf.blk)) {
2330         scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2331         return 0;
2332     }
2333 
2334     len = scsi_data_cdb_xfer(r->req.cmd.buf);
2335     switch (command) {
2336     case READ_6:
2337     case READ_10:
2338     case READ_12:
2339     case READ_16:
2340         trace_scsi_disk_dma_command_READ(r->req.cmd.lba, len);
2341         /* Protection information is not supported.  For SCSI versions 2 and
2342          * older (as determined by snooping the guest's INQUIRY commands),
2343          * there is no RD/WR/VRPROTECT, so skip this check in these versions.
2344          */
2345         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2346             goto illegal_request;
2347         }
2348         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2349             goto illegal_lba;
2350         }
2351         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2352         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2353         break;
2354     case WRITE_6:
2355     case WRITE_10:
2356     case WRITE_12:
2357     case WRITE_16:
2358     case WRITE_VERIFY_10:
2359     case WRITE_VERIFY_12:
2360     case WRITE_VERIFY_16:
2361         if (!blk_is_writable(s->qdev.conf.blk)) {
2362             scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
2363             return 0;
2364         }
2365         trace_scsi_disk_dma_command_WRITE(
2366                 (command & 0xe) == 0xe ? "And Verify " : "",
2367                 r->req.cmd.lba, len);
2368         /* fall through */
2369     case VERIFY_10:
2370     case VERIFY_12:
2371     case VERIFY_16:
2372         /* We get here only for BYTCHK == 0x01 and only for scsi-block.
2373          * As far as DMA is concerned, we can treat it the same as a write;
2374          * scsi_block_do_sgio will send VERIFY commands.
2375          */
2376         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2377             goto illegal_request;
2378         }
2379         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2380             goto illegal_lba;
2381         }
2382         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2383         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2384         break;
2385     default:
2386         abort();
2387     illegal_request:
2388         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2389         return 0;
2390     illegal_lba:
2391         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2392         return 0;
2393     }
2394     r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
2395     if (r->sector_count == 0) {
2396         scsi_req_complete(&r->req, GOOD);
2397     }
2398     assert(r->iov.iov_len == 0);
2399     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2400         return -r->sector_count * BDRV_SECTOR_SIZE;
2401     } else {
2402         return r->sector_count * BDRV_SECTOR_SIZE;
2403     }
2404 }
2405 
2406 static void scsi_disk_reset(DeviceState *dev)
2407 {
2408     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev);
2409     uint64_t nb_sectors;
2410 
2411     scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
2412 
2413     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2414 
2415     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2416     if (nb_sectors) {
2417         nb_sectors--;
2418     }
2419     s->qdev.max_lba = nb_sectors;
2420     /* reset tray statuses */
2421     s->tray_locked = 0;
2422     s->tray_open = 0;
2423 
2424     s->qdev.scsi_version = s->qdev.default_scsi_version;
2425 }
2426 
2427 static void scsi_disk_drained_begin(void *opaque)
2428 {
2429     SCSIDiskState *s = opaque;
2430 
2431     scsi_device_drained_begin(&s->qdev);
2432 }
2433 
2434 static void scsi_disk_drained_end(void *opaque)
2435 {
2436     SCSIDiskState *s = opaque;
2437 
2438     scsi_device_drained_end(&s->qdev);
2439 }
2440 
2441 static void scsi_disk_resize_cb(void *opaque)
2442 {
2443     SCSIDiskState *s = opaque;
2444 
2445     /* SPC lists this sense code as available only for
2446      * direct-access devices.
2447      */
2448     if (s->qdev.type == TYPE_DISK) {
2449         scsi_device_report_change(&s->qdev, SENSE_CODE(CAPACITY_CHANGED));
2450     }
2451 }
2452 
2453 static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
2454 {
2455     SCSIDiskState *s = opaque;
2456 
2457     /*
2458      * When a CD gets changed, we have to report an ejected state and
2459      * then a loaded state to guests so that they detect tray
2460      * open/close and media change events.  Guests that do not use
2461      * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close
2462      * states rely on this behavior.
2463      *
2464      * media_changed governs the state machine used for unit attention
2465      * report.  media_event is used by GET EVENT STATUS NOTIFICATION.
2466      */
2467     s->media_changed = load;
2468     s->tray_open = !load;
2469     scsi_device_set_ua(&s->qdev, SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM));
2470     s->media_event = true;
2471     s->eject_request = false;
2472 }
2473 
2474 static void scsi_cd_eject_request_cb(void *opaque, bool force)
2475 {
2476     SCSIDiskState *s = opaque;
2477 
2478     s->eject_request = true;
2479     if (force) {
2480         s->tray_locked = false;
2481     }
2482 }
2483 
2484 static bool scsi_cd_is_tray_open(void *opaque)
2485 {
2486     return ((SCSIDiskState *)opaque)->tray_open;
2487 }
2488 
2489 static bool scsi_cd_is_medium_locked(void *opaque)
2490 {
2491     return ((SCSIDiskState *)opaque)->tray_locked;
2492 }
2493 
2494 static const BlockDevOps scsi_disk_removable_block_ops = {
2495     .change_media_cb  = scsi_cd_change_media_cb,
2496     .drained_begin    = scsi_disk_drained_begin,
2497     .drained_end      = scsi_disk_drained_end,
2498     .eject_request_cb = scsi_cd_eject_request_cb,
2499     .is_medium_locked = scsi_cd_is_medium_locked,
2500     .is_tray_open     = scsi_cd_is_tray_open,
2501     .resize_cb        = scsi_disk_resize_cb,
2502 };
2503 
2504 static const BlockDevOps scsi_disk_block_ops = {
2505     .drained_begin = scsi_disk_drained_begin,
2506     .drained_end   = scsi_disk_drained_end,
2507     .resize_cb     = scsi_disk_resize_cb,
2508 };
2509 
2510 static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
2511 {
2512     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2513     if (s->media_changed) {
2514         s->media_changed = false;
2515         scsi_device_set_ua(&s->qdev, SENSE_CODE(MEDIUM_CHANGED));
2516     }
2517 }
2518 
2519 static void scsi_realize(SCSIDevice *dev, Error **errp)
2520 {
2521     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2522     bool read_only;
2523 
2524     if (!s->qdev.conf.blk) {
2525         error_setg(errp, "drive property not set");
2526         return;
2527     }
2528 
2529     if (!(s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2530         !blk_is_inserted(s->qdev.conf.blk)) {
2531         error_setg(errp, "Device needs media, but drive is empty");
2532         return;
2533     }
2534 
2535     if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2536         return;
2537     }
2538 
2539     if (blk_get_aio_context(s->qdev.conf.blk) != qemu_get_aio_context() &&
2540         !s->qdev.hba_supports_iothread)
2541     {
2542         error_setg(errp, "HBA does not support iothreads");
2543         return;
2544     }
2545 
2546     if (dev->type == TYPE_DISK) {
2547         if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) {
2548             return;
2549         }
2550     }
2551 
2552     read_only = !blk_supports_write_perm(s->qdev.conf.blk);
2553     if (dev->type == TYPE_ROM) {
2554         read_only = true;
2555     }
2556 
2557     if (!blkconf_apply_backend_options(&dev->conf, read_only,
2558                                        dev->type == TYPE_DISK, errp)) {
2559         return;
2560     }
2561 
2562     if (s->qdev.conf.discard_granularity == -1) {
2563         s->qdev.conf.discard_granularity =
2564             MAX(s->qdev.conf.logical_block_size, DEFAULT_DISCARD_GRANULARITY);
2565     }
2566 
2567     if (!s->version) {
2568         s->version = g_strdup(qemu_hw_version());
2569     }
2570     if (!s->vendor) {
2571         s->vendor = g_strdup("QEMU");
2572     }
2573     if (s->serial && strlen(s->serial) > MAX_SERIAL_LEN) {
2574         error_setg(errp, "The serial number can't be longer than %d characters",
2575                    MAX_SERIAL_LEN);
2576         return;
2577     }
2578     if (!s->device_id) {
2579         if (s->serial) {
2580             if (strlen(s->serial) > MAX_SERIAL_LEN_FOR_DEVID) {
2581                 error_setg(errp, "The serial number can't be longer than %d "
2582                            "characters when it is also used as the default for "
2583                            "device_id", MAX_SERIAL_LEN_FOR_DEVID);
2584                 return;
2585             }
2586             s->device_id = g_strdup(s->serial);
2587         } else {
2588             const char *str = blk_name(s->qdev.conf.blk);
2589             if (str && *str) {
2590                 s->device_id = g_strdup(str);
2591             }
2592         }
2593     }
2594 
2595     if (blk_is_sg(s->qdev.conf.blk)) {
2596         error_setg(errp, "unwanted /dev/sg*");
2597         return;
2598     }
2599 
2600     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2601             !(s->features & (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS))) {
2602         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_removable_block_ops, s);
2603     } else {
2604         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_block_ops, s);
2605     }
2606 
2607     blk_iostatus_enable(s->qdev.conf.blk);
2608 
2609     add_boot_device_lchs(&dev->qdev, NULL,
2610                          dev->conf.lcyls,
2611                          dev->conf.lheads,
2612                          dev->conf.lsecs);
2613 }
2614 
2615 static void scsi_unrealize(SCSIDevice *dev)
2616 {
2617     del_boot_device_lchs(&dev->qdev, NULL);
2618 }
2619 
2620 static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
2621 {
2622     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2623 
2624     /* can happen for devices without drive. The error message for missing
2625      * backend will be issued in scsi_realize
2626      */
2627     if (s->qdev.conf.blk) {
2628         if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2629             return;
2630         }
2631     }
2632     s->qdev.blocksize = s->qdev.conf.logical_block_size;
2633     s->qdev.type = TYPE_DISK;
2634     if (!s->product) {
2635         s->product = g_strdup("QEMU HARDDISK");
2636     }
2637     scsi_realize(&s->qdev, errp);
2638 }
2639 
2640 static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
2641 {
2642     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2643     int ret;
2644     uint32_t blocksize = 2048;
2645 
2646     if (!dev->conf.blk) {
2647         /* Anonymous BlockBackend for an empty drive. As we put it into
2648          * dev->conf, qdev takes care of detaching on unplug. */
2649         dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
2650         ret = blk_attach_dev(dev->conf.blk, &dev->qdev);
2651         assert(ret == 0);
2652     }
2653 
2654     if (dev->conf.physical_block_size != 0) {
2655         blocksize = dev->conf.physical_block_size;
2656     }
2657 
2658     s->qdev.blocksize = blocksize;
2659     s->qdev.type = TYPE_ROM;
2660     s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2661     if (!s->product) {
2662         s->product = g_strdup("QEMU CD-ROM");
2663     }
2664     scsi_realize(&s->qdev, errp);
2665 }
2666 
2667 
2668 static const SCSIReqOps scsi_disk_emulate_reqops = {
2669     .size         = sizeof(SCSIDiskReq),
2670     .free_req     = scsi_free_request,
2671     .send_command = scsi_disk_emulate_command,
2672     .read_data    = scsi_disk_emulate_read_data,
2673     .write_data   = scsi_disk_emulate_write_data,
2674     .get_buf      = scsi_get_buf,
2675     .load_request = scsi_disk_emulate_load_request,
2676     .save_request = scsi_disk_emulate_save_request,
2677 };
2678 
2679 static const SCSIReqOps scsi_disk_dma_reqops = {
2680     .size         = sizeof(SCSIDiskReq),
2681     .free_req     = scsi_free_request,
2682     .send_command = scsi_disk_dma_command,
2683     .read_data    = scsi_read_data,
2684     .write_data   = scsi_write_data,
2685     .get_buf      = scsi_get_buf,
2686     .load_request = scsi_disk_load_request,
2687     .save_request = scsi_disk_save_request,
2688 };
2689 
2690 static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
2691     [TEST_UNIT_READY]                 = &scsi_disk_emulate_reqops,
2692     [INQUIRY]                         = &scsi_disk_emulate_reqops,
2693     [MODE_SENSE]                      = &scsi_disk_emulate_reqops,
2694     [MODE_SENSE_10]                   = &scsi_disk_emulate_reqops,
2695     [START_STOP]                      = &scsi_disk_emulate_reqops,
2696     [ALLOW_MEDIUM_REMOVAL]            = &scsi_disk_emulate_reqops,
2697     [READ_CAPACITY_10]                = &scsi_disk_emulate_reqops,
2698     [READ_TOC]                        = &scsi_disk_emulate_reqops,
2699     [READ_DVD_STRUCTURE]              = &scsi_disk_emulate_reqops,
2700     [READ_DISC_INFORMATION]           = &scsi_disk_emulate_reqops,
2701     [GET_CONFIGURATION]               = &scsi_disk_emulate_reqops,
2702     [GET_EVENT_STATUS_NOTIFICATION]   = &scsi_disk_emulate_reqops,
2703     [MECHANISM_STATUS]                = &scsi_disk_emulate_reqops,
2704     [SERVICE_ACTION_IN_16]            = &scsi_disk_emulate_reqops,
2705     [REQUEST_SENSE]                   = &scsi_disk_emulate_reqops,
2706     [SYNCHRONIZE_CACHE]               = &scsi_disk_emulate_reqops,
2707     [SEEK_10]                         = &scsi_disk_emulate_reqops,
2708     [MODE_SELECT]                     = &scsi_disk_emulate_reqops,
2709     [MODE_SELECT_10]                  = &scsi_disk_emulate_reqops,
2710     [UNMAP]                           = &scsi_disk_emulate_reqops,
2711     [WRITE_SAME_10]                   = &scsi_disk_emulate_reqops,
2712     [WRITE_SAME_16]                   = &scsi_disk_emulate_reqops,
2713     [VERIFY_10]                       = &scsi_disk_emulate_reqops,
2714     [VERIFY_12]                       = &scsi_disk_emulate_reqops,
2715     [VERIFY_16]                       = &scsi_disk_emulate_reqops,
2716     [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
2717 
2718     [READ_6]                          = &scsi_disk_dma_reqops,
2719     [READ_10]                         = &scsi_disk_dma_reqops,
2720     [READ_12]                         = &scsi_disk_dma_reqops,
2721     [READ_16]                         = &scsi_disk_dma_reqops,
2722     [WRITE_6]                         = &scsi_disk_dma_reqops,
2723     [WRITE_10]                        = &scsi_disk_dma_reqops,
2724     [WRITE_12]                        = &scsi_disk_dma_reqops,
2725     [WRITE_16]                        = &scsi_disk_dma_reqops,
2726     [WRITE_VERIFY_10]                 = &scsi_disk_dma_reqops,
2727     [WRITE_VERIFY_12]                 = &scsi_disk_dma_reqops,
2728     [WRITE_VERIFY_16]                 = &scsi_disk_dma_reqops,
2729 };
2730 
2731 static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf)
2732 {
2733     int len = scsi_cdb_length(buf);
2734     g_autoptr(GString) str = NULL;
2735 
2736     assert(len > 0 && len <= 16);
2737     str = qemu_hexdump_line(NULL, buf, len, 1, 0);
2738     trace_scsi_disk_new_request(lun, tag, str->str);
2739 }
2740 
2741 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
2742                                      uint8_t *buf, void *hba_private)
2743 {
2744     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2745     SCSIRequest *req;
2746     const SCSIReqOps *ops;
2747     uint8_t command;
2748 
2749     command = buf[0];
2750     ops = scsi_disk_reqops_dispatch[command];
2751     if (!ops) {
2752         ops = &scsi_disk_emulate_reqops;
2753     }
2754     req = scsi_req_alloc(ops, &s->qdev, tag, lun, hba_private);
2755 
2756     if (trace_event_get_state_backends(TRACE_SCSI_DISK_NEW_REQUEST)) {
2757         scsi_disk_new_request_dump(lun, tag, buf);
2758     }
2759 
2760     return req;
2761 }
2762 
2763 #ifdef __linux__
2764 static int get_device_type(SCSIDiskState *s)
2765 {
2766     uint8_t cmd[16];
2767     uint8_t buf[36];
2768     int ret;
2769 
2770     memset(cmd, 0, sizeof(cmd));
2771     memset(buf, 0, sizeof(buf));
2772     cmd[0] = INQUIRY;
2773     cmd[4] = sizeof(buf);
2774 
2775     ret = scsi_SG_IO_FROM_DEV(s->qdev.conf.blk, cmd, sizeof(cmd),
2776                               buf, sizeof(buf), s->qdev.io_timeout);
2777     if (ret < 0) {
2778         return -1;
2779     }
2780     s->qdev.type = buf[0];
2781     if (buf[1] & 0x80) {
2782         s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2783     }
2784     return 0;
2785 }
2786 
2787 static void scsi_block_realize(SCSIDevice *dev, Error **errp)
2788 {
2789     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2790     int sg_version;
2791     int rc;
2792 
2793     if (!s->qdev.conf.blk) {
2794         error_setg(errp, "drive property not set");
2795         return;
2796     }
2797 
2798     if (s->rotation_rate) {
2799         error_report_once("rotation_rate is specified for scsi-block but is "
2800                           "not implemented. This option is deprecated and will "
2801                           "be removed in a future version");
2802     }
2803 
2804     /* check we are using a driver managing SG_IO (version 3 and after) */
2805     rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version);
2806     if (rc < 0) {
2807         error_setg_errno(errp, -rc, "cannot get SG_IO version number");
2808         if (rc != -EPERM) {
2809             error_append_hint(errp, "Is this a SCSI device?\n");
2810         }
2811         return;
2812     }
2813     if (sg_version < 30000) {
2814         error_setg(errp, "scsi generic interface too old");
2815         return;
2816     }
2817 
2818     /* get device type from INQUIRY data */
2819     rc = get_device_type(s);
2820     if (rc < 0) {
2821         error_setg(errp, "INQUIRY failed");
2822         return;
2823     }
2824 
2825     /* Make a guess for the block size, we'll fix it when the guest sends.
2826      * READ CAPACITY.  If they don't, they likely would assume these sizes
2827      * anyway. (TODO: check in /sys).
2828      */
2829     if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM) {
2830         s->qdev.blocksize = 2048;
2831     } else {
2832         s->qdev.blocksize = 512;
2833     }
2834 
2835     /* Makes the scsi-block device not removable by using HMP and QMP eject
2836      * command.
2837      */
2838     s->features |= (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS);
2839 
2840     scsi_realize(&s->qdev, errp);
2841     scsi_generic_read_device_inquiry(&s->qdev);
2842 }
2843 
2844 typedef struct SCSIBlockReq {
2845     SCSIDiskReq req;
2846     sg_io_hdr_t io_header;
2847 
2848     /* Selected bytes of the original CDB, copied into our own CDB.  */
2849     uint8_t cmd, cdb1, group_number;
2850 
2851     /* CDB passed to SG_IO.  */
2852     uint8_t cdb[16];
2853     BlockCompletionFunc *cb;
2854     void *cb_opaque;
2855 } SCSIBlockReq;
2856 
2857 static void scsi_block_sgio_complete(void *opaque, int ret)
2858 {
2859     SCSIBlockReq *req = (SCSIBlockReq *)opaque;
2860     SCSIDiskReq *r = &req->req;
2861     sg_io_hdr_t *io_hdr = &req->io_header;
2862 
2863     if (ret == 0) {
2864         if (io_hdr->host_status != SCSI_HOST_OK) {
2865             r->req.host_status = io_hdr->host_status;
2866             ret = -ENODEV;
2867         } else if (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT) {
2868             ret = BUSY;
2869         } else {
2870             ret = io_hdr->status;
2871         }
2872     }
2873 
2874     req->cb(req->cb_opaque, ret);
2875 }
2876 
2877 static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
2878                                       int64_t offset, QEMUIOVector *iov,
2879                                       int direction,
2880                                       BlockCompletionFunc *cb, void *opaque)
2881 {
2882     sg_io_hdr_t *io_header = &req->io_header;
2883     SCSIDiskReq *r = &req->req;
2884     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
2885     int nb_logical_blocks;
2886     uint64_t lba;
2887     BlockAIOCB *aiocb;
2888 
2889     /* This is not supported yet.  It can only happen if the guest does
2890      * reads and writes that are not aligned to one logical sectors
2891      * _and_ cover multiple MemoryRegions.
2892      */
2893     assert(offset % s->qdev.blocksize == 0);
2894     assert(iov->size % s->qdev.blocksize == 0);
2895 
2896     io_header->interface_id = 'S';
2897 
2898     /* The data transfer comes from the QEMUIOVector.  */
2899     io_header->dxfer_direction = direction;
2900     io_header->dxfer_len = iov->size;
2901     io_header->dxferp = (void *)iov->iov;
2902     io_header->iovec_count = iov->niov;
2903     assert(io_header->iovec_count == iov->niov); /* no overflow! */
2904 
2905     /* Build a new CDB with the LBA and length patched in, in case
2906      * DMA helpers split the transfer in multiple segments.  Do not
2907      * build a CDB smaller than what the guest wanted, and only build
2908      * a larger one if strictly necessary.
2909      */
2910     io_header->cmdp = req->cdb;
2911     lba = offset / s->qdev.blocksize;
2912     nb_logical_blocks = io_header->dxfer_len / s->qdev.blocksize;
2913 
2914     if ((req->cmd >> 5) == 0 && lba <= 0x1ffff) {
2915         /* 6-byte CDB */
2916         stl_be_p(&req->cdb[0], lba | (req->cmd << 24));
2917         req->cdb[4] = nb_logical_blocks;
2918         req->cdb[5] = 0;
2919         io_header->cmd_len = 6;
2920     } else if ((req->cmd >> 5) <= 1 && lba <= 0xffffffffULL) {
2921         /* 10-byte CDB */
2922         req->cdb[0] = (req->cmd & 0x1f) | 0x20;
2923         req->cdb[1] = req->cdb1;
2924         stl_be_p(&req->cdb[2], lba);
2925         req->cdb[6] = req->group_number;
2926         stw_be_p(&req->cdb[7], nb_logical_blocks);
2927         req->cdb[9] = 0;
2928         io_header->cmd_len = 10;
2929     } else if ((req->cmd >> 5) != 4 && lba <= 0xffffffffULL) {
2930         /* 12-byte CDB */
2931         req->cdb[0] = (req->cmd & 0x1f) | 0xA0;
2932         req->cdb[1] = req->cdb1;
2933         stl_be_p(&req->cdb[2], lba);
2934         stl_be_p(&req->cdb[6], nb_logical_blocks);
2935         req->cdb[10] = req->group_number;
2936         req->cdb[11] = 0;
2937         io_header->cmd_len = 12;
2938     } else {
2939         /* 16-byte CDB */
2940         req->cdb[0] = (req->cmd & 0x1f) | 0x80;
2941         req->cdb[1] = req->cdb1;
2942         stq_be_p(&req->cdb[2], lba);
2943         stl_be_p(&req->cdb[10], nb_logical_blocks);
2944         req->cdb[14] = req->group_number;
2945         req->cdb[15] = 0;
2946         io_header->cmd_len = 16;
2947     }
2948 
2949     /* The rest is as in scsi-generic.c.  */
2950     io_header->mx_sb_len = sizeof(r->req.sense);
2951     io_header->sbp = r->req.sense;
2952     io_header->timeout = s->qdev.io_timeout * 1000;
2953     io_header->usr_ptr = r;
2954     io_header->flags |= SG_FLAG_DIRECT_IO;
2955     req->cb = cb;
2956     req->cb_opaque = opaque;
2957     trace_scsi_disk_aio_sgio_command(r->req.tag, req->cdb[0], lba,
2958                                      nb_logical_blocks, io_header->timeout);
2959     aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, scsi_block_sgio_complete, req);
2960     assert(aiocb != NULL);
2961     return aiocb;
2962 }
2963 
2964 static bool scsi_block_no_fua(SCSICommand *cmd)
2965 {
2966     return false;
2967 }
2968 
2969 static BlockAIOCB *scsi_block_dma_readv(int64_t offset,
2970                                         QEMUIOVector *iov,
2971                                         BlockCompletionFunc *cb, void *cb_opaque,
2972                                         void *opaque)
2973 {
2974     SCSIBlockReq *r = opaque;
2975     return scsi_block_do_sgio(r, offset, iov,
2976                               SG_DXFER_FROM_DEV, cb, cb_opaque);
2977 }
2978 
2979 static BlockAIOCB *scsi_block_dma_writev(int64_t offset,
2980                                          QEMUIOVector *iov,
2981                                          BlockCompletionFunc *cb, void *cb_opaque,
2982                                          void *opaque)
2983 {
2984     SCSIBlockReq *r = opaque;
2985     return scsi_block_do_sgio(r, offset, iov,
2986                               SG_DXFER_TO_DEV, cb, cb_opaque);
2987 }
2988 
2989 static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
2990 {
2991     switch (buf[0]) {
2992     case VERIFY_10:
2993     case VERIFY_12:
2994     case VERIFY_16:
2995         /* Check if BYTCHK == 0x01 (data-out buffer contains data
2996          * for the number of logical blocks specified in the length
2997          * field).  For other modes, do not use scatter/gather operation.
2998          */
2999         if ((buf[1] & 6) == 2) {
3000             return false;
3001         }
3002         break;
3003 
3004     case READ_6:
3005     case READ_10:
3006     case READ_12:
3007     case READ_16:
3008     case WRITE_6:
3009     case WRITE_10:
3010     case WRITE_12:
3011     case WRITE_16:
3012     case WRITE_VERIFY_10:
3013     case WRITE_VERIFY_12:
3014     case WRITE_VERIFY_16:
3015         /* MMC writing cannot be done via DMA helpers, because it sometimes
3016          * involves writing beyond the maximum LBA or to negative LBA (lead-in).
3017          * We might use scsi_block_dma_reqops as long as no writing commands are
3018          * seen, but performance usually isn't paramount on optical media.  So,
3019          * just make scsi-block operate the same as scsi-generic for them.
3020          */
3021         if (s->qdev.type != TYPE_ROM) {
3022             return false;
3023         }
3024         break;
3025 
3026     default:
3027         break;
3028     }
3029 
3030     return true;
3031 }
3032 
3033 
3034 static int32_t scsi_block_dma_command(SCSIRequest *req, uint8_t *buf)
3035 {
3036     SCSIBlockReq *r = (SCSIBlockReq *)req;
3037     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
3038 
3039     r->cmd = req->cmd.buf[0];
3040     switch (r->cmd >> 5) {
3041     case 0:
3042         /* 6-byte CDB.  */
3043         r->cdb1 = r->group_number = 0;
3044         break;
3045     case 1:
3046         /* 10-byte CDB.  */
3047         r->cdb1 = req->cmd.buf[1];
3048         r->group_number = req->cmd.buf[6];
3049         break;
3050     case 4:
3051         /* 12-byte CDB.  */
3052         r->cdb1 = req->cmd.buf[1];
3053         r->group_number = req->cmd.buf[10];
3054         break;
3055     case 5:
3056         /* 16-byte CDB.  */
3057         r->cdb1 = req->cmd.buf[1];
3058         r->group_number = req->cmd.buf[14];
3059         break;
3060     default:
3061         abort();
3062     }
3063 
3064     /* Protection information is not supported.  For SCSI versions 2 and
3065      * older (as determined by snooping the guest's INQUIRY commands),
3066      * there is no RD/WR/VRPROTECT, so skip this check in these versions.
3067      */
3068     if (s->qdev.scsi_version > 2 && (req->cmd.buf[1] & 0xe0)) {
3069         scsi_check_condition(&r->req, SENSE_CODE(INVALID_FIELD));
3070         return 0;
3071     }
3072 
3073     return scsi_disk_dma_command(req, buf);
3074 }
3075 
3076 static const SCSIReqOps scsi_block_dma_reqops = {
3077     .size         = sizeof(SCSIBlockReq),
3078     .free_req     = scsi_free_request,
3079     .send_command = scsi_block_dma_command,
3080     .read_data    = scsi_read_data,
3081     .write_data   = scsi_write_data,
3082     .get_buf      = scsi_get_buf,
3083     .load_request = scsi_disk_load_request,
3084     .save_request = scsi_disk_save_request,
3085 };
3086 
3087 static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
3088                                            uint32_t lun, uint8_t *buf,
3089                                            void *hba_private)
3090 {
3091     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3092 
3093     if (scsi_block_is_passthrough(s, buf)) {
3094         return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
3095                               hba_private);
3096     } else {
3097         return scsi_req_alloc(&scsi_block_dma_reqops, &s->qdev, tag, lun,
3098                               hba_private);
3099     }
3100 }
3101 
3102 static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
3103                                   uint8_t *buf, size_t buf_len,
3104                                   void *hba_private)
3105 {
3106     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3107 
3108     if (scsi_block_is_passthrough(s, buf)) {
3109         return scsi_bus_parse_cdb(&s->qdev, cmd, buf, buf_len, hba_private);
3110     } else {
3111         return scsi_req_parse_cdb(&s->qdev, cmd, buf, buf_len);
3112     }
3113 }
3114 
3115 static void scsi_block_update_sense(SCSIRequest *req)
3116 {
3117     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
3118     SCSIBlockReq *br = DO_UPCAST(SCSIBlockReq, req, r);
3119     r->req.sense_len = MIN(br->io_header.sb_len_wr, sizeof(r->req.sense));
3120 }
3121 #endif
3122 
3123 static
3124 BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
3125                            BlockCompletionFunc *cb, void *cb_opaque,
3126                            void *opaque)
3127 {
3128     SCSIDiskReq *r = opaque;
3129     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3130     return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3131 }
3132 
3133 static
3134 BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
3135                             BlockCompletionFunc *cb, void *cb_opaque,
3136                             void *opaque)
3137 {
3138     SCSIDiskReq *r = opaque;
3139     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3140     return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3141 }
3142 
3143 static char *scsi_property_get_loadparm(Object *obj, Error **errp)
3144 {
3145     return g_strdup(SCSI_DISK_BASE(obj)->loadparm);
3146 }
3147 
3148 static void scsi_property_set_loadparm(Object *obj, const char *value,
3149                                        Error **errp)
3150 {
3151     void *lp_str;
3152 
3153     if (object_property_get_int(obj, "bootindex", NULL) < 0) {
3154         error_setg(errp, "'loadparm' is only valid for boot devices");
3155         return;
3156     }
3157 
3158     lp_str = g_malloc0(strlen(value) + 1);
3159     if (!qdev_prop_sanitize_s390x_loadparm(lp_str, value, errp)) {
3160         g_free(lp_str);
3161         return;
3162     }
3163     SCSI_DISK_BASE(obj)->loadparm = lp_str;
3164 }
3165 
3166 static void scsi_property_add_specifics(DeviceClass *dc)
3167 {
3168     ObjectClass *oc = OBJECT_CLASS(dc);
3169 
3170     /* The loadparm property is only supported on s390x */
3171     if (qemu_arch_available(QEMU_ARCH_S390X)) {
3172         object_class_property_add_str(oc, "loadparm",
3173                                       scsi_property_get_loadparm,
3174                                       scsi_property_set_loadparm);
3175         object_class_property_set_description(oc, "loadparm",
3176                                               "load parameter (s390x only)");
3177     }
3178 }
3179 
3180 static void scsi_disk_base_class_initfn(ObjectClass *klass, const void *data)
3181 {
3182     DeviceClass *dc = DEVICE_CLASS(klass);
3183     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3184 
3185     dc->fw_name = "disk";
3186     device_class_set_legacy_reset(dc, scsi_disk_reset);
3187     sdc->dma_readv = scsi_dma_readv;
3188     sdc->dma_writev = scsi_dma_writev;
3189     sdc->need_fua_emulation = scsi_is_cmd_fua;
3190 }
3191 
3192 static const TypeInfo scsi_disk_base_info = {
3193     .name          = TYPE_SCSI_DISK_BASE,
3194     .parent        = TYPE_SCSI_DEVICE,
3195     .class_init    = scsi_disk_base_class_initfn,
3196     .instance_size = sizeof(SCSIDiskState),
3197     .class_size    = sizeof(SCSIDiskClass),
3198     .abstract      = true,
3199 };
3200 
3201 #define DEFINE_SCSI_DISK_PROPERTIES()                                   \
3202     DEFINE_PROP_DRIVE_IOTHREAD("drive", SCSIDiskState, qdev.conf.blk),  \
3203     DEFINE_BLOCK_PROPERTIES_BASE(SCSIDiskState, qdev.conf),             \
3204     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),            \
3205     DEFINE_PROP_STRING("ver", SCSIDiskState, version),                  \
3206     DEFINE_PROP_STRING("serial", SCSIDiskState, serial),                \
3207     DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),                \
3208     DEFINE_PROP_STRING("product", SCSIDiskState, product),              \
3209     DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id),          \
3210     DEFINE_PROP_BOOL("migrate-emulated-scsi-request", SCSIDiskState, migrate_emulated_scsi_request, true)
3211 
3212 
3213 static const Property scsi_hd_properties[] = {
3214     DEFINE_SCSI_DISK_PROPERTIES(),
3215     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
3216                     SCSI_DISK_F_REMOVABLE, false),
3217     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
3218                     SCSI_DISK_F_DPOFUA, false),
3219     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3220     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3221     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3222     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3223                        DEFAULT_MAX_UNMAP_SIZE),
3224     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3225                        DEFAULT_MAX_IO_SIZE),
3226     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3227     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3228                       5),
3229     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3230                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3231                     0),
3232     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
3233 };
3234 
3235 static const VMStateDescription vmstate_scsi_disk_state = {
3236     .name = "scsi-disk",
3237     .version_id = 1,
3238     .minimum_version_id = 1,
3239     .fields = (const VMStateField[]) {
3240         VMSTATE_SCSI_DEVICE(qdev, SCSIDiskState),
3241         VMSTATE_BOOL(media_changed, SCSIDiskState),
3242         VMSTATE_BOOL(media_event, SCSIDiskState),
3243         VMSTATE_BOOL(eject_request, SCSIDiskState),
3244         VMSTATE_BOOL(tray_open, SCSIDiskState),
3245         VMSTATE_BOOL(tray_locked, SCSIDiskState),
3246         VMSTATE_END_OF_LIST()
3247     }
3248 };
3249 
3250 static void scsi_hd_class_initfn(ObjectClass *klass, const void *data)
3251 {
3252     DeviceClass *dc = DEVICE_CLASS(klass);
3253     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3254 
3255     sc->realize      = scsi_hd_realize;
3256     sc->unrealize    = scsi_unrealize;
3257     sc->alloc_req    = scsi_new_request;
3258     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3259     dc->desc = "virtual SCSI disk";
3260     device_class_set_props(dc, scsi_hd_properties);
3261     dc->vmsd  = &vmstate_scsi_disk_state;
3262 
3263     scsi_property_add_specifics(dc);
3264 }
3265 
3266 static const TypeInfo scsi_hd_info = {
3267     .name          = "scsi-hd",
3268     .parent        = TYPE_SCSI_DISK_BASE,
3269     .class_init    = scsi_hd_class_initfn,
3270 };
3271 
3272 static const Property scsi_cd_properties[] = {
3273     DEFINE_SCSI_DISK_PROPERTIES(),
3274     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3275     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3276     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3277     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3278                        DEFAULT_MAX_IO_SIZE),
3279     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3280                       5),
3281     DEFINE_PROP_BIT("quirk_mode_page_apple_vendor", SCSIDiskState, quirks,
3282                     SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR, 0),
3283     DEFINE_PROP_BIT("quirk_mode_sense_rom_use_dbd", SCSIDiskState, quirks,
3284                     SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD, 0),
3285     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3286                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3287                     0),
3288     DEFINE_PROP_BIT("quirk_mode_page_truncated", SCSIDiskState, quirks,
3289                     SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED, 0),
3290 };
3291 
3292 static void scsi_cd_class_initfn(ObjectClass *klass, const void *data)
3293 {
3294     DeviceClass *dc = DEVICE_CLASS(klass);
3295     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3296 
3297     sc->realize      = scsi_cd_realize;
3298     sc->alloc_req    = scsi_new_request;
3299     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3300     dc->desc = "virtual SCSI CD-ROM";
3301     device_class_set_props(dc, scsi_cd_properties);
3302     dc->vmsd  = &vmstate_scsi_disk_state;
3303 
3304     scsi_property_add_specifics(dc);
3305 }
3306 
3307 static const TypeInfo scsi_cd_info = {
3308     .name          = "scsi-cd",
3309     .parent        = TYPE_SCSI_DISK_BASE,
3310     .class_init    = scsi_cd_class_initfn,
3311 };
3312 
3313 #ifdef __linux__
3314 static const Property scsi_block_properties[] = {
3315     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),
3316     DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk),
3317     DEFINE_PROP_BOOL("share-rw", SCSIDiskState, qdev.conf.share_rw, false),
3318     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3319     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3320                        DEFAULT_MAX_UNMAP_SIZE),
3321     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3322                        DEFAULT_MAX_IO_SIZE),
3323     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3324                       -1),
3325     DEFINE_PROP_UINT32("io_timeout", SCSIDiskState, qdev.io_timeout,
3326                        DEFAULT_IO_TIMEOUT),
3327 };
3328 
3329 static void scsi_block_class_initfn(ObjectClass *klass, const void *data)
3330 {
3331     DeviceClass *dc = DEVICE_CLASS(klass);
3332     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3333     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3334 
3335     sc->realize      = scsi_block_realize;
3336     sc->alloc_req    = scsi_block_new_request;
3337     sc->parse_cdb    = scsi_block_parse_cdb;
3338     sdc->dma_readv   = scsi_block_dma_readv;
3339     sdc->dma_writev  = scsi_block_dma_writev;
3340     sdc->update_sense = scsi_block_update_sense;
3341     sdc->need_fua_emulation = scsi_block_no_fua;
3342     dc->desc = "SCSI block device passthrough";
3343     device_class_set_props(dc, scsi_block_properties);
3344     dc->vmsd  = &vmstate_scsi_disk_state;
3345 }
3346 
3347 static const TypeInfo scsi_block_info = {
3348     .name          = "scsi-block",
3349     .parent        = TYPE_SCSI_DISK_BASE,
3350     .class_init    = scsi_block_class_initfn,
3351 };
3352 #endif
3353 
3354 static void scsi_disk_register_types(void)
3355 {
3356     type_register_static(&scsi_disk_base_info);
3357     type_register_static(&scsi_hd_info);
3358     type_register_static(&scsi_cd_info);
3359 #ifdef __linux__
3360     type_register_static(&scsi_block_info);
3361 #endif
3362 }
3363 
3364 type_init(scsi_disk_register_types)
3365