xref: /qemu/hw/scsi/scsi-disk.c (revision 897c68fb795cf03b89b6688a6f945d68a765c3e4)
1 /*
2  * SCSI Device emulation
3  *
4  * Copyright (c) 2006 CodeSourcery.
5  * Based on code by Fabrice Bellard
6  *
7  * Written by Paul Brook
8  * Modifications:
9  *  2009-Dec-12 Artyom Tarasenko : implemented stamdard inquiry for the case
10  *                                 when the allocation length of CDB is smaller
11  *                                 than 36.
12  *  2009-Oct-13 Artyom Tarasenko : implemented the block descriptor in the
13  *                                 MODE SENSE response.
14  *
15  * This code is licensed under the LGPL.
16  *
17  * Note that this file only handles the SCSI architecture model and device
18  * commands.  Emulation of interface/link layer protocols is handled by
19  * the host adapter emulator.
20  */
21 
22 #include "qemu/osdep.h"
23 #include "qemu/units.h"
24 #include "qapi/error.h"
25 #include "qemu/error-report.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/module.h"
28 #include "qemu/hw-version.h"
29 #include "qemu/memalign.h"
30 #include "hw/scsi/scsi.h"
31 #include "migration/qemu-file-types.h"
32 #include "migration/vmstate.h"
33 #include "hw/scsi/emulation.h"
34 #include "scsi/constants.h"
35 #include "system/arch_init.h"
36 #include "system/block-backend.h"
37 #include "system/blockdev.h"
38 #include "hw/block/block.h"
39 #include "hw/qdev-properties.h"
40 #include "hw/qdev-properties-system.h"
41 #include "system/dma.h"
42 #include "system/system.h"
43 #include "qemu/cutils.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 #ifdef __linux
48 #include <scsi/sg.h>
49 #endif
50 
51 #define SCSI_WRITE_SAME_MAX         (512 * KiB)
52 #define SCSI_DMA_BUF_SIZE           (128 * KiB)
53 #define SCSI_MAX_INQUIRY_LEN        256
54 #define SCSI_MAX_MODE_LEN           256
55 
56 #define DEFAULT_DISCARD_GRANULARITY (4 * KiB)
57 #define DEFAULT_MAX_UNMAP_SIZE      (1 * GiB)
58 #define DEFAULT_MAX_IO_SIZE         INT_MAX     /* 2 GB - 1 block */
59 
60 #define TYPE_SCSI_DISK_BASE         "scsi-disk-base"
61 
62 #define MAX_SERIAL_LEN              36
63 #define MAX_SERIAL_LEN_FOR_DEVID    20
64 
65 OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE)
66 
67 struct SCSIDiskClass {
68     SCSIDeviceClass parent_class;
69     /*
70      * Callbacks receive ret == 0 for success. Errors are represented either as
71      * negative errno values, or as positive SAM status codes.
72      *
73      * Beware: For errors returned in host_status, the function may directly
74      * complete the request and never call the callback.
75      */
76     DMAIOFunc       *dma_readv;
77     DMAIOFunc       *dma_writev;
78     bool            (*need_fua_emulation)(SCSICommand *cmd);
79     void            (*update_sense)(SCSIRequest *r);
80 };
81 
82 typedef struct SCSIDiskReq {
83     SCSIRequest req;
84     /* Both sector and sector_count are in terms of BDRV_SECTOR_SIZE bytes.  */
85     uint64_t sector;
86     uint32_t sector_count;
87     uint32_t buflen;
88     bool started;
89     bool need_fua_emulation;
90     struct iovec iov;
91     QEMUIOVector qiov;
92     BlockAcctCookie acct;
93 } SCSIDiskReq;
94 
95 #define SCSI_DISK_F_REMOVABLE             0
96 #define SCSI_DISK_F_DPOFUA                1
97 #define SCSI_DISK_F_NO_REMOVABLE_DEVOPS   2
98 
99 struct SCSIDiskState {
100     SCSIDevice qdev;
101     uint32_t features;
102     bool media_changed;
103     bool media_event;
104     bool eject_request;
105     uint16_t port_index;
106     uint64_t max_unmap_size;
107     uint64_t max_io_size;
108     uint32_t quirks;
109     char *version;
110     char *serial;
111     char *vendor;
112     char *product;
113     char *device_id;
114     char *loadparm;     /* only for s390x */
115     bool tray_open;
116     bool tray_locked;
117     /*
118      * 0x0000        - rotation rate not reported
119      * 0x0001        - non-rotating medium (SSD)
120      * 0x0002-0x0400 - reserved
121      * 0x0401-0xffe  - rotations per minute
122      * 0xffff        - reserved
123      */
124     uint16_t rotation_rate;
125     bool migrate_emulated_scsi_request;
126 };
127 
128 static void scsi_free_request(SCSIRequest *req)
129 {
130     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
131 
132     qemu_vfree(r->iov.iov_base);
133 }
134 
135 /* Helper function for command completion with sense.  */
136 static void scsi_check_condition(SCSIDiskReq *r, SCSISense sense)
137 {
138     trace_scsi_disk_check_condition(r->req.tag, sense.key, sense.asc,
139                                     sense.ascq);
140     scsi_req_build_sense(&r->req, sense);
141     scsi_req_complete(&r->req, CHECK_CONDITION);
142 }
143 
144 static void scsi_init_iovec(SCSIDiskReq *r, size_t size)
145 {
146     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
147 
148     if (!r->iov.iov_base) {
149         r->buflen = size;
150         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
151     }
152     r->iov.iov_len = MIN(r->sector_count * BDRV_SECTOR_SIZE, r->buflen);
153     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
154 }
155 
156 static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req)
157 {
158     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
159 
160     qemu_put_be64s(f, &r->sector);
161     qemu_put_be32s(f, &r->sector_count);
162     qemu_put_be32s(f, &r->buflen);
163     if (r->buflen) {
164         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
165             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
166         } else if (!req->retry) {
167             uint32_t len = r->iov.iov_len;
168             qemu_put_be32s(f, &len);
169             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
170         }
171     }
172 }
173 
174 static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest *req)
175 {
176     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
177 
178     if (s->migrate_emulated_scsi_request) {
179         scsi_disk_save_request(f, req);
180     }
181 }
182 
183 static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
184 {
185     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
186 
187     qemu_get_be64s(f, &r->sector);
188     qemu_get_be32s(f, &r->sector_count);
189     qemu_get_be32s(f, &r->buflen);
190     if (r->buflen) {
191         scsi_init_iovec(r, r->buflen);
192         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
193             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
194         } else if (!r->req.retry) {
195             uint32_t len;
196             qemu_get_be32s(f, &len);
197             r->iov.iov_len = len;
198             assert(r->iov.iov_len <= r->buflen);
199             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
200         }
201     }
202 
203     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
204 }
205 
206 static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest *req)
207 {
208     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
209 
210     if (s->migrate_emulated_scsi_request) {
211         scsi_disk_load_request(f, req);
212     }
213 }
214 
215 /*
216  * scsi_handle_rw_error has two return values.  False means that the error
217  * must be ignored, true means that the error has been processed and the
218  * caller should not do anything else for this request.  Note that
219  * scsi_handle_rw_error always manages its reference counts, independent
220  * of the return value.
221  */
222 static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
223 {
224     bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
225     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
226     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
227     SCSISense sense = SENSE_CODE(NO_SENSE);
228     int error;
229     bool req_has_sense = false;
230     BlockErrorAction action;
231     int status;
232 
233     if (ret < 0) {
234         status = scsi_sense_from_errno(-ret, &sense);
235         error = -ret;
236     } else {
237         /* A passthrough command has completed with nonzero status.  */
238         status = ret;
239         switch (status) {
240         case CHECK_CONDITION:
241             req_has_sense = true;
242             error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
243             break;
244         case RESERVATION_CONFLICT:
245             /*
246              * Don't apply the error policy, always report to the guest.
247              *
248              * This is a passthrough code path, so it's not a backend error, but
249              * a response to an invalid guest request.
250              *
251              * Windows Failover Cluster validation intentionally sends invalid
252              * requests to verify that reservations work as intended. It is
253              * crucial that it sees the resulting errors.
254              *
255              * Treating a reservation conflict as a guest-side error is obvious
256              * when a pr-manager is in use. Without one, the situation is less
257              * clear, but there might be nothing that can be fixed on the host
258              * (like in the above example), and we don't want to be stuck in a
259              * loop where resuming the VM and retrying the request immediately
260              * stops it again. So always reporting is still the safer option in
261              * this case, too.
262              */
263             error = 0;
264             break;
265         default:
266             error = EINVAL;
267             break;
268         }
269     }
270 
271     /*
272      * Check whether the error has to be handled by the guest or should
273      * rather follow the rerror=/werror= settings.  Guest-handled errors
274      * are usually retried immediately, so do not post them to QMP and
275      * do not account them as failed I/O.
276      */
277     if (!error || (req_has_sense &&
278                    scsi_sense_buf_is_guest_recoverable(r->req.sense,
279                                                        sizeof(r->req.sense)))) {
280         action = BLOCK_ERROR_ACTION_REPORT;
281         acct_failed = false;
282     } else {
283         action = blk_get_error_action(s->qdev.conf.blk, is_read, error);
284         blk_error_action(s->qdev.conf.blk, action, is_read, error);
285     }
286 
287     switch (action) {
288     case BLOCK_ERROR_ACTION_REPORT:
289         if (acct_failed) {
290             block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
291         }
292         if (req_has_sense) {
293             sdc->update_sense(&r->req);
294         } else if (status == CHECK_CONDITION) {
295             scsi_req_build_sense(&r->req, sense);
296         }
297         scsi_req_complete(&r->req, status);
298         return true;
299 
300     case BLOCK_ERROR_ACTION_IGNORE:
301         return false;
302 
303     case BLOCK_ERROR_ACTION_STOP:
304         scsi_req_retry(&r->req);
305         return true;
306 
307     default:
308         g_assert_not_reached();
309     }
310 }
311 
312 static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
313 {
314     if (r->req.io_canceled) {
315         scsi_req_cancel_complete(&r->req);
316         return true;
317     }
318 
319     if (ret != 0) {
320         return scsi_handle_rw_error(r, ret, acct_failed);
321     }
322 
323     return false;
324 }
325 
326 static void scsi_aio_complete(void *opaque, int ret)
327 {
328     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
329     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
330 
331     /* The request must run in its AioContext */
332     assert(r->req.ctx == qemu_get_current_aio_context());
333 
334     assert(r->req.aiocb != NULL);
335     r->req.aiocb = NULL;
336 
337     if (scsi_disk_req_check_error(r, ret, true)) {
338         goto done;
339     }
340 
341     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
342     scsi_req_complete(&r->req, GOOD);
343 
344 done:
345     scsi_req_unref(&r->req);
346 }
347 
348 static bool scsi_is_cmd_fua(SCSICommand *cmd)
349 {
350     switch (cmd->buf[0]) {
351     case READ_10:
352     case READ_12:
353     case READ_16:
354     case WRITE_10:
355     case WRITE_12:
356     case WRITE_16:
357         return (cmd->buf[1] & 8) != 0;
358 
359     case VERIFY_10:
360     case VERIFY_12:
361     case VERIFY_16:
362     case WRITE_VERIFY_10:
363     case WRITE_VERIFY_12:
364     case WRITE_VERIFY_16:
365         return true;
366 
367     case READ_6:
368     case WRITE_6:
369     default:
370         return false;
371     }
372 }
373 
374 static void scsi_write_do_fua(SCSIDiskReq *r)
375 {
376     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
377 
378     assert(r->req.aiocb == NULL);
379     assert(!r->req.io_canceled);
380 
381     if (r->need_fua_emulation) {
382         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
383                          BLOCK_ACCT_FLUSH);
384         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
385         return;
386     }
387 
388     scsi_req_complete(&r->req, GOOD);
389     scsi_req_unref(&r->req);
390 }
391 
392 static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
393 {
394     assert(r->req.aiocb == NULL);
395     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
396         goto done;
397     }
398 
399     r->sector += r->sector_count;
400     r->sector_count = 0;
401     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
402         scsi_write_do_fua(r);
403         return;
404     } else {
405         scsi_req_complete(&r->req, GOOD);
406     }
407 
408 done:
409     scsi_req_unref(&r->req);
410 }
411 
412 /* May not be called in all error cases, don't rely on cleanup here */
413 static void scsi_dma_complete(void *opaque, int ret)
414 {
415     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
416     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
417 
418     assert(r->req.aiocb != NULL);
419     r->req.aiocb = NULL;
420 
421     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
422     if (ret < 0) {
423         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
424     } else if (ret == 0) {
425         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
426     }
427     scsi_dma_complete_noio(r, ret);
428 }
429 
430 static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
431 {
432     uint32_t n;
433 
434     /* The request must run in its AioContext */
435     assert(r->req.ctx == qemu_get_current_aio_context());
436 
437     assert(r->req.aiocb == NULL);
438     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
439         goto done;
440     }
441 
442     n = r->qiov.size / BDRV_SECTOR_SIZE;
443     r->sector += n;
444     r->sector_count -= n;
445     scsi_req_data(&r->req, r->qiov.size);
446 
447 done:
448     scsi_req_unref(&r->req);
449 }
450 
451 /* May not be called in all error cases, don't rely on cleanup here */
452 static void scsi_read_complete(void *opaque, int ret)
453 {
454     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
455     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
456 
457     assert(r->req.aiocb != NULL);
458     r->req.aiocb = NULL;
459 
460     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
461     if (ret < 0) {
462         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
463     } else if (ret == 0) {
464         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
465         trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
466     }
467     scsi_read_complete_noio(r, ret);
468 }
469 
470 /* Actually issue a read to the block device.  */
471 static void scsi_do_read(SCSIDiskReq *r, int ret)
472 {
473     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
474     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
475 
476     assert (r->req.aiocb == NULL);
477     if (scsi_disk_req_check_error(r, ret, false)) {
478         goto done;
479     }
480 
481     /* The request is used as the AIO opaque value, so add a ref.  */
482     scsi_req_ref(&r->req);
483 
484     if (r->req.sg) {
485         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
486         r->req.residual -= r->req.sg->size;
487         r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
488                                   BDRV_SECTOR_SIZE,
489                                   sdc->dma_readv, r, scsi_dma_complete, r,
490                                   DMA_DIRECTION_FROM_DEVICE);
491     } else {
492         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
493         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
494                          r->qiov.size, BLOCK_ACCT_READ);
495         r->req.aiocb = sdc->dma_readv(r->sector << BDRV_SECTOR_BITS, &r->qiov,
496                                       scsi_read_complete, r, r);
497     }
498 
499 done:
500     scsi_req_unref(&r->req);
501 }
502 
503 static void scsi_do_read_cb(void *opaque, int ret)
504 {
505     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
506     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
507 
508     assert (r->req.aiocb != NULL);
509     r->req.aiocb = NULL;
510 
511     if (ret < 0) {
512         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
513     } else {
514         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
515     }
516     scsi_do_read(opaque, ret);
517 }
518 
519 /* Read more data from scsi device into buffer.  */
520 static void scsi_read_data(SCSIRequest *req)
521 {
522     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
523     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
524     bool first;
525 
526     trace_scsi_disk_read_data_count(r->sector_count);
527     if (r->sector_count == 0) {
528         /* This also clears the sense buffer for REQUEST SENSE.  */
529         scsi_req_complete(&r->req, GOOD);
530         return;
531     }
532 
533     /* No data transfer may already be in progress */
534     assert(r->req.aiocb == NULL);
535 
536     /* The request is used as the AIO opaque value, so add a ref.  */
537     scsi_req_ref(&r->req);
538     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
539         trace_scsi_disk_read_data_invalid();
540         scsi_read_complete_noio(r, -EINVAL);
541         return;
542     }
543 
544     if (!blk_is_available(req->dev->conf.blk)) {
545         scsi_read_complete_noio(r, -ENOMEDIUM);
546         return;
547     }
548 
549     first = !r->started;
550     r->started = true;
551     if (first && r->need_fua_emulation) {
552         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
553                          BLOCK_ACCT_FLUSH);
554         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
555     } else {
556         scsi_do_read(r, 0);
557     }
558 }
559 
560 static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
561 {
562     uint32_t n;
563 
564     /* The request must run in its AioContext */
565     assert(r->req.ctx == qemu_get_current_aio_context());
566 
567     assert (r->req.aiocb == NULL);
568     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
569         goto done;
570     }
571 
572     n = r->qiov.size / BDRV_SECTOR_SIZE;
573     r->sector += n;
574     r->sector_count -= n;
575     if (r->sector_count == 0) {
576         scsi_write_do_fua(r);
577         return;
578     } else {
579         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
580         trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size);
581         scsi_req_data(&r->req, r->qiov.size);
582     }
583 
584 done:
585     scsi_req_unref(&r->req);
586 }
587 
588 /* May not be called in all error cases, don't rely on cleanup here */
589 static void scsi_write_complete(void * opaque, int ret)
590 {
591     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
592     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
593 
594     assert (r->req.aiocb != NULL);
595     r->req.aiocb = NULL;
596 
597     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
598     if (ret < 0) {
599         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
600     } else if (ret == 0) {
601         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
602     }
603     scsi_write_complete_noio(r, ret);
604 }
605 
606 static void scsi_write_data(SCSIRequest *req)
607 {
608     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
609     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
610     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
611 
612     /* No data transfer may already be in progress */
613     assert(r->req.aiocb == NULL);
614 
615     /* The request is used as the AIO opaque value, so add a ref.  */
616     scsi_req_ref(&r->req);
617     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
618         trace_scsi_disk_write_data_invalid();
619         scsi_write_complete_noio(r, -EINVAL);
620         return;
621     }
622 
623     if (!r->req.sg && !r->qiov.size) {
624         /* Called for the first time.  Ask the driver to send us more data.  */
625         r->started = true;
626         scsi_write_complete_noio(r, 0);
627         return;
628     }
629     if (!blk_is_available(req->dev->conf.blk)) {
630         scsi_write_complete_noio(r, -ENOMEDIUM);
631         return;
632     }
633 
634     if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
635         r->req.cmd.buf[0] == VERIFY_16) {
636         if (r->req.sg) {
637             scsi_dma_complete_noio(r, 0);
638         } else {
639             scsi_write_complete_noio(r, 0);
640         }
641         return;
642     }
643 
644     if (r->req.sg) {
645         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
646         r->req.residual -= r->req.sg->size;
647         r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
648                                   BDRV_SECTOR_SIZE,
649                                   sdc->dma_writev, r, scsi_dma_complete, r,
650                                   DMA_DIRECTION_TO_DEVICE);
651     } else {
652         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
653                          r->qiov.size, BLOCK_ACCT_WRITE);
654         r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
655                                        scsi_write_complete, r, r);
656     }
657 }
658 
659 /* Return a pointer to the data buffer.  */
660 static uint8_t *scsi_get_buf(SCSIRequest *req)
661 {
662     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
663 
664     return (uint8_t *)r->iov.iov_base;
665 }
666 
667 static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
668 {
669     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
670     uint8_t page_code = req->cmd.buf[2];
671     int start, buflen = 0;
672 
673     outbuf[buflen++] = s->qdev.type & 0x1f;
674     outbuf[buflen++] = page_code;
675     outbuf[buflen++] = 0x00;
676     outbuf[buflen++] = 0x00;
677     start = buflen;
678 
679     switch (page_code) {
680     case 0x00: /* Supported page codes, mandatory */
681     {
682         trace_scsi_disk_emulate_vpd_page_00(req->cmd.xfer);
683         outbuf[buflen++] = 0x00; /* list of supported pages (this page) */
684         if (s->serial) {
685             outbuf[buflen++] = 0x80; /* unit serial number */
686         }
687         outbuf[buflen++] = 0x83; /* device identification */
688         if (s->qdev.type == TYPE_DISK) {
689             outbuf[buflen++] = 0xb0; /* block limits */
690             outbuf[buflen++] = 0xb1; /* block device characteristics */
691             outbuf[buflen++] = 0xb2; /* thin provisioning */
692         }
693         break;
694     }
695     case 0x80: /* Device serial number, optional */
696     {
697         int l;
698 
699         if (!s->serial) {
700             trace_scsi_disk_emulate_vpd_page_80_not_supported();
701             return -1;
702         }
703 
704         l = strlen(s->serial);
705         if (l > MAX_SERIAL_LEN) {
706             l = MAX_SERIAL_LEN;
707         }
708 
709         trace_scsi_disk_emulate_vpd_page_80(req->cmd.xfer);
710         memcpy(outbuf + buflen, s->serial, l);
711         buflen += l;
712         break;
713     }
714 
715     case 0x83: /* Device identification page, mandatory */
716     {
717         int id_len = s->device_id ? MIN(strlen(s->device_id), 255 - 8) : 0;
718 
719         trace_scsi_disk_emulate_vpd_page_83(req->cmd.xfer);
720 
721         if (id_len) {
722             outbuf[buflen++] = 0x2; /* ASCII */
723             outbuf[buflen++] = 0;   /* not officially assigned */
724             outbuf[buflen++] = 0;   /* reserved */
725             outbuf[buflen++] = id_len; /* length of data following */
726             memcpy(outbuf + buflen, s->device_id, id_len);
727             buflen += id_len;
728         }
729 
730         if (s->qdev.wwn) {
731             outbuf[buflen++] = 0x1; /* Binary */
732             outbuf[buflen++] = 0x3; /* NAA */
733             outbuf[buflen++] = 0;   /* reserved */
734             outbuf[buflen++] = 8;
735             stq_be_p(&outbuf[buflen], s->qdev.wwn);
736             buflen += 8;
737         }
738 
739         if (s->qdev.port_wwn) {
740             outbuf[buflen++] = 0x61; /* SAS / Binary */
741             outbuf[buflen++] = 0x93; /* PIV / Target port / NAA */
742             outbuf[buflen++] = 0;    /* reserved */
743             outbuf[buflen++] = 8;
744             stq_be_p(&outbuf[buflen], s->qdev.port_wwn);
745             buflen += 8;
746         }
747 
748         if (s->port_index) {
749             outbuf[buflen++] = 0x61; /* SAS / Binary */
750 
751             /* PIV/Target port/relative target port */
752             outbuf[buflen++] = 0x94;
753 
754             outbuf[buflen++] = 0;    /* reserved */
755             outbuf[buflen++] = 4;
756             stw_be_p(&outbuf[buflen + 2], s->port_index);
757             buflen += 4;
758         }
759         break;
760     }
761     case 0xb0: /* block limits */
762     {
763         SCSIBlockLimits bl = {};
764 
765         if (s->qdev.type == TYPE_ROM) {
766             trace_scsi_disk_emulate_vpd_page_b0_not_supported();
767             return -1;
768         }
769         bl.wsnz = 1;
770         bl.unmap_sectors =
771             s->qdev.conf.discard_granularity / s->qdev.blocksize;
772         bl.min_io_size =
773             s->qdev.conf.min_io_size / s->qdev.blocksize;
774         bl.opt_io_size =
775             s->qdev.conf.opt_io_size / s->qdev.blocksize;
776         bl.max_unmap_sectors =
777             s->max_unmap_size / s->qdev.blocksize;
778         bl.max_io_sectors =
779             s->max_io_size / s->qdev.blocksize;
780         /* 255 descriptors fit in 4 KiB with an 8-byte header */
781         bl.max_unmap_descr = 255;
782 
783         if (s->qdev.type == TYPE_DISK) {
784             int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk);
785             int max_io_sectors_blk =
786                 max_transfer_blk / s->qdev.blocksize;
787 
788             bl.max_io_sectors =
789                 MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors);
790         }
791         buflen += scsi_emulate_block_limits(outbuf + buflen, &bl);
792         break;
793     }
794     case 0xb1: /* block device characteristics */
795     {
796         buflen = 0x40;
797         outbuf[4] = (s->rotation_rate >> 8) & 0xff;
798         outbuf[5] = s->rotation_rate & 0xff;
799         outbuf[6] = 0; /* PRODUCT TYPE */
800         outbuf[7] = 0; /* WABEREQ | WACEREQ | NOMINAL FORM FACTOR */
801         outbuf[8] = 0; /* VBULS */
802         break;
803     }
804     case 0xb2: /* thin provisioning */
805     {
806         buflen = 8;
807         outbuf[4] = 0;
808         outbuf[5] = 0xe0; /* unmap & write_same 10/16 all supported */
809         outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1;
810         outbuf[7] = 0;
811         break;
812     }
813     default:
814         return -1;
815     }
816     /* done with EVPD */
817     assert(buflen - start <= 255);
818     outbuf[start - 1] = buflen - start;
819     return buflen;
820 }
821 
822 static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
823 {
824     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
825     int buflen = 0;
826 
827     if (req->cmd.buf[1] & 0x1) {
828         /* Vital product data */
829         return scsi_disk_emulate_vpd_page(req, outbuf);
830     }
831 
832     /* Standard INQUIRY data */
833     if (req->cmd.buf[2] != 0) {
834         return -1;
835     }
836 
837     /* PAGE CODE == 0 */
838     buflen = req->cmd.xfer;
839     if (buflen > SCSI_MAX_INQUIRY_LEN) {
840         buflen = SCSI_MAX_INQUIRY_LEN;
841     }
842 
843     outbuf[0] = s->qdev.type & 0x1f;
844     outbuf[1] = (s->features & (1 << SCSI_DISK_F_REMOVABLE)) ? 0x80 : 0;
845 
846     strpadcpy((char *) &outbuf[16], 16, s->product, ' ');
847     strpadcpy((char *) &outbuf[8], 8, s->vendor, ' ');
848 
849     memset(&outbuf[32], 0, 4);
850     memcpy(&outbuf[32], s->version, MIN(4, strlen(s->version)));
851     /*
852      * We claim conformance to SPC-3, which is required for guests
853      * to ask for modern features like READ CAPACITY(16) or the
854      * block characteristics VPD page by default.  Not all of SPC-3
855      * is actually implemented, but we're good enough.
856      */
857     outbuf[2] = s->qdev.default_scsi_version;
858     outbuf[3] = 2 | 0x10; /* Format 2, HiSup */
859 
860     if (buflen > 36) {
861         outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */
862     } else {
863         /* If the allocation length of CDB is too small,
864                the additional length is not adjusted */
865         outbuf[4] = 36 - 5;
866     }
867 
868     /* Sync data transfer and TCQ.  */
869     outbuf[7] = 0x10 | (req->bus->info->tcq ? 0x02 : 0);
870     return buflen;
871 }
872 
873 static inline bool media_is_dvd(SCSIDiskState *s)
874 {
875     uint64_t nb_sectors;
876     if (s->qdev.type != TYPE_ROM) {
877         return false;
878     }
879     if (!blk_is_available(s->qdev.conf.blk)) {
880         return false;
881     }
882     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
883     return nb_sectors > CD_MAX_SECTORS;
884 }
885 
886 static inline bool media_is_cd(SCSIDiskState *s)
887 {
888     uint64_t nb_sectors;
889     if (s->qdev.type != TYPE_ROM) {
890         return false;
891     }
892     if (!blk_is_available(s->qdev.conf.blk)) {
893         return false;
894     }
895     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
896     return nb_sectors <= CD_MAX_SECTORS;
897 }
898 
899 static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r,
900                                       uint8_t *outbuf)
901 {
902     uint8_t type = r->req.cmd.buf[1] & 7;
903 
904     if (s->qdev.type != TYPE_ROM) {
905         return -1;
906     }
907 
908     /* Types 1/2 are only defined for Blu-Ray.  */
909     if (type != 0) {
910         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
911         return -1;
912     }
913 
914     memset(outbuf, 0, 34);
915     outbuf[1] = 32;
916     outbuf[2] = 0xe; /* last session complete, disc finalized */
917     outbuf[3] = 1;   /* first track on disc */
918     outbuf[4] = 1;   /* # of sessions */
919     outbuf[5] = 1;   /* first track of last session */
920     outbuf[6] = 1;   /* last track of last session */
921     outbuf[7] = 0x20; /* unrestricted use */
922     outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */
923     /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
924     /* 12-23: not meaningful for CD-ROM or DVD-ROM */
925     /* 24-31: disc bar code */
926     /* 32: disc application code */
927     /* 33: number of OPC tables */
928 
929     return 34;
930 }
931 
932 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
933                                    uint8_t *outbuf)
934 {
935     static const int rds_caps_size[5] = {
936         [0] = 2048 + 4,
937         [1] = 4 + 4,
938         [3] = 188 + 4,
939         [4] = 2048 + 4,
940     };
941 
942     uint8_t media = r->req.cmd.buf[1];
943     uint8_t layer = r->req.cmd.buf[6];
944     uint8_t format = r->req.cmd.buf[7];
945     int size = -1;
946 
947     if (s->qdev.type != TYPE_ROM) {
948         return -1;
949     }
950     if (media != 0) {
951         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
952         return -1;
953     }
954 
955     if (format != 0xff) {
956         if (!blk_is_available(s->qdev.conf.blk)) {
957             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
958             return -1;
959         }
960         if (media_is_cd(s)) {
961             scsi_check_condition(r, SENSE_CODE(INCOMPATIBLE_FORMAT));
962             return -1;
963         }
964         if (format >= ARRAY_SIZE(rds_caps_size)) {
965             return -1;
966         }
967         size = rds_caps_size[format];
968         memset(outbuf, 0, size);
969     }
970 
971     switch (format) {
972     case 0x00: {
973         /* Physical format information */
974         uint64_t nb_sectors;
975         if (layer != 0) {
976             goto fail;
977         }
978         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
979 
980         outbuf[4] = 1;   /* DVD-ROM, part version 1 */
981         outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */
982         outbuf[6] = 1;   /* one layer, read-only (per MMC-2 spec) */
983         outbuf[7] = 0;   /* default densities */
984 
985         stl_be_p(&outbuf[12], (nb_sectors >> 2) - 1); /* end sector */
986         stl_be_p(&outbuf[16], (nb_sectors >> 2) - 1); /* l0 end sector */
987         break;
988     }
989 
990     case 0x01: /* DVD copyright information, all zeros */
991         break;
992 
993     case 0x03: /* BCA information - invalid field for no BCA info */
994         return -1;
995 
996     case 0x04: /* DVD disc manufacturing information, all zeros */
997         break;
998 
999     case 0xff: { /* List capabilities */
1000         int i;
1001         size = 4;
1002         for (i = 0; i < ARRAY_SIZE(rds_caps_size); i++) {
1003             if (!rds_caps_size[i]) {
1004                 continue;
1005             }
1006             outbuf[size] = i;
1007             outbuf[size + 1] = 0x40; /* Not writable, readable */
1008             stw_be_p(&outbuf[size + 2], rds_caps_size[i]);
1009             size += 4;
1010         }
1011         break;
1012      }
1013 
1014     default:
1015         return -1;
1016     }
1017 
1018     /* Size of buffer, not including 2 byte size field */
1019     stw_be_p(outbuf, size - 2);
1020     return size;
1021 
1022 fail:
1023     return -1;
1024 }
1025 
1026 static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf)
1027 {
1028     uint8_t event_code, media_status;
1029 
1030     media_status = 0;
1031     if (s->tray_open) {
1032         media_status = MS_TRAY_OPEN;
1033     } else if (blk_is_inserted(s->qdev.conf.blk)) {
1034         media_status = MS_MEDIA_PRESENT;
1035     }
1036 
1037     /* Event notification descriptor */
1038     event_code = MEC_NO_CHANGE;
1039     if (media_status != MS_TRAY_OPEN) {
1040         if (s->media_event) {
1041             event_code = MEC_NEW_MEDIA;
1042             s->media_event = false;
1043         } else if (s->eject_request) {
1044             event_code = MEC_EJECT_REQUESTED;
1045             s->eject_request = false;
1046         }
1047     }
1048 
1049     outbuf[0] = event_code;
1050     outbuf[1] = media_status;
1051 
1052     /* These fields are reserved, just clear them. */
1053     outbuf[2] = 0;
1054     outbuf[3] = 0;
1055     return 4;
1056 }
1057 
1058 static int scsi_get_event_status_notification(SCSIDiskState *s, SCSIDiskReq *r,
1059                                               uint8_t *outbuf)
1060 {
1061     int size;
1062     uint8_t *buf = r->req.cmd.buf;
1063     uint8_t notification_class_request = buf[4];
1064     if (s->qdev.type != TYPE_ROM) {
1065         return -1;
1066     }
1067     if ((buf[1] & 1) == 0) {
1068         /* asynchronous */
1069         return -1;
1070     }
1071 
1072     size = 4;
1073     outbuf[0] = outbuf[1] = 0;
1074     outbuf[3] = 1 << GESN_MEDIA; /* supported events */
1075     if (notification_class_request & (1 << GESN_MEDIA)) {
1076         outbuf[2] = GESN_MEDIA;
1077         size += scsi_event_status_media(s, &outbuf[size]);
1078     } else {
1079         outbuf[2] = 0x80;
1080     }
1081     stw_be_p(outbuf, size - 4);
1082     return size;
1083 }
1084 
1085 static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf)
1086 {
1087     int current;
1088 
1089     if (s->qdev.type != TYPE_ROM) {
1090         return -1;
1091     }
1092 
1093     if (media_is_dvd(s)) {
1094         current = MMC_PROFILE_DVD_ROM;
1095     } else if (media_is_cd(s)) {
1096         current = MMC_PROFILE_CD_ROM;
1097     } else {
1098         current = MMC_PROFILE_NONE;
1099     }
1100 
1101     memset(outbuf, 0, 40);
1102     stl_be_p(&outbuf[0], 36); /* Bytes after the data length field */
1103     stw_be_p(&outbuf[6], current);
1104     /* outbuf[8] - outbuf[19]: Feature 0 - Profile list */
1105     outbuf[10] = 0x03; /* persistent, current */
1106     outbuf[11] = 8; /* two profiles */
1107     stw_be_p(&outbuf[12], MMC_PROFILE_DVD_ROM);
1108     outbuf[14] = (current == MMC_PROFILE_DVD_ROM);
1109     stw_be_p(&outbuf[16], MMC_PROFILE_CD_ROM);
1110     outbuf[18] = (current == MMC_PROFILE_CD_ROM);
1111     /* outbuf[20] - outbuf[31]: Feature 1 - Core feature */
1112     stw_be_p(&outbuf[20], 1);
1113     outbuf[22] = 0x08 | 0x03; /* version 2, persistent, current */
1114     outbuf[23] = 8;
1115     stl_be_p(&outbuf[24], 1); /* SCSI */
1116     outbuf[28] = 1; /* DBE = 1, mandatory */
1117     /* outbuf[32] - outbuf[39]: Feature 3 - Removable media feature */
1118     stw_be_p(&outbuf[32], 3);
1119     outbuf[34] = 0x08 | 0x03; /* version 2, persistent, current */
1120     outbuf[35] = 4;
1121     outbuf[36] = 0x39; /* tray, load=1, eject=1, unlocked at powerup, lock=1 */
1122     /* TODO: Random readable, CD read, DVD read, drive serial number,
1123        power management */
1124     return 40;
1125 }
1126 
1127 static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
1128 {
1129     if (s->qdev.type != TYPE_ROM) {
1130         return -1;
1131     }
1132     memset(outbuf, 0, 8);
1133     outbuf[5] = 1; /* CD-ROM */
1134     return 8;
1135 }
1136 
1137 static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
1138                            int page_control)
1139 {
1140     static const int mode_sense_valid[0x3f] = {
1141         [MODE_PAGE_VENDOR_SPECIFIC]        = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1142         [MODE_PAGE_HD_GEOMETRY]            = (1 << TYPE_DISK),
1143         [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK),
1144         [MODE_PAGE_CACHING]                = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1145         [MODE_PAGE_R_W_ERROR]              = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1146         [MODE_PAGE_AUDIO_CTL]              = (1 << TYPE_ROM),
1147         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
1148         [MODE_PAGE_APPLE_VENDOR]           = (1 << TYPE_ROM),
1149     };
1150 
1151     uint8_t *p = *p_outbuf + 2;
1152     int length;
1153 
1154     assert(page < ARRAY_SIZE(mode_sense_valid));
1155     if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
1156         return -1;
1157     }
1158 
1159     /*
1160      * If Changeable Values are requested, a mask denoting those mode parameters
1161      * that are changeable shall be returned. As we currently don't support
1162      * parameter changes via MODE_SELECT all bits are returned set to zero.
1163      * The buffer was already menset to zero by the caller of this function.
1164      *
1165      * The offsets here are off by two compared to the descriptions in the
1166      * SCSI specs, because those include a 2-byte header.  This is unfortunate,
1167      * but it is done so that offsets are consistent within our implementation
1168      * of MODE SENSE and MODE SELECT.  MODE SELECT has to deal with both
1169      * 2-byte and 4-byte headers.
1170      */
1171     switch (page) {
1172     case MODE_PAGE_HD_GEOMETRY:
1173         length = 0x16;
1174         if (page_control == 1) { /* Changeable Values */
1175             break;
1176         }
1177         /* if a geometry hint is available, use it */
1178         p[0] = (s->qdev.conf.cyls >> 16) & 0xff;
1179         p[1] = (s->qdev.conf.cyls >> 8) & 0xff;
1180         p[2] = s->qdev.conf.cyls & 0xff;
1181         p[3] = s->qdev.conf.heads & 0xff;
1182         /* Write precomp start cylinder, disabled */
1183         p[4] = (s->qdev.conf.cyls >> 16) & 0xff;
1184         p[5] = (s->qdev.conf.cyls >> 8) & 0xff;
1185         p[6] = s->qdev.conf.cyls & 0xff;
1186         /* Reduced current start cylinder, disabled */
1187         p[7] = (s->qdev.conf.cyls >> 16) & 0xff;
1188         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1189         p[9] = s->qdev.conf.cyls & 0xff;
1190         /* Device step rate [ns], 200ns */
1191         p[10] = 0;
1192         p[11] = 200;
1193         /* Landing zone cylinder */
1194         p[12] = 0xff;
1195         p[13] =  0xff;
1196         p[14] = 0xff;
1197         /* Medium rotation rate [rpm], 5400 rpm */
1198         p[18] = (5400 >> 8) & 0xff;
1199         p[19] = 5400 & 0xff;
1200         break;
1201 
1202     case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
1203         length = 0x1e;
1204         if (page_control == 1) { /* Changeable Values */
1205             break;
1206         }
1207         /* Transfer rate [kbit/s], 5Mbit/s */
1208         p[0] = 5000 >> 8;
1209         p[1] = 5000 & 0xff;
1210         /* if a geometry hint is available, use it */
1211         p[2] = s->qdev.conf.heads & 0xff;
1212         p[3] = s->qdev.conf.secs & 0xff;
1213         p[4] = s->qdev.blocksize >> 8;
1214         p[6] = (s->qdev.conf.cyls >> 8) & 0xff;
1215         p[7] = s->qdev.conf.cyls & 0xff;
1216         /* Write precomp start cylinder, disabled */
1217         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1218         p[9] = s->qdev.conf.cyls & 0xff;
1219         /* Reduced current start cylinder, disabled */
1220         p[10] = (s->qdev.conf.cyls >> 8) & 0xff;
1221         p[11] = s->qdev.conf.cyls & 0xff;
1222         /* Device step rate [100us], 100us */
1223         p[12] = 0;
1224         p[13] = 1;
1225         /* Device step pulse width [us], 1us */
1226         p[14] = 1;
1227         /* Device head settle delay [100us], 100us */
1228         p[15] = 0;
1229         p[16] = 1;
1230         /* Motor on delay [0.1s], 0.1s */
1231         p[17] = 1;
1232         /* Motor off delay [0.1s], 0.1s */
1233         p[18] = 1;
1234         /* Medium rotation rate [rpm], 5400 rpm */
1235         p[26] = (5400 >> 8) & 0xff;
1236         p[27] = 5400 & 0xff;
1237         break;
1238 
1239     case MODE_PAGE_CACHING:
1240         length = 0x12;
1241         if (page_control == 1 || /* Changeable Values */
1242             blk_enable_write_cache(s->qdev.conf.blk)) {
1243             p[0] = 4; /* WCE */
1244         }
1245         break;
1246 
1247     case MODE_PAGE_R_W_ERROR:
1248         length = 10;
1249         if (page_control == 1) { /* Changeable Values */
1250             if (s->qdev.type == TYPE_ROM) {
1251                 /* Automatic Write Reallocation Enabled */
1252                 p[0] = 0x80;
1253             }
1254             break;
1255         }
1256         p[0] = 0x80; /* Automatic Write Reallocation Enabled */
1257         if (s->qdev.type == TYPE_ROM) {
1258             p[1] = 0x20; /* Read Retry Count */
1259         }
1260         break;
1261 
1262     case MODE_PAGE_AUDIO_CTL:
1263         length = 14;
1264         break;
1265 
1266     case MODE_PAGE_CAPABILITIES:
1267         length = 0x14;
1268         if (page_control == 1) { /* Changeable Values */
1269             break;
1270         }
1271 
1272         p[0] = 0x3b; /* CD-R & CD-RW read */
1273         p[1] = 0; /* Writing not supported */
1274         p[2] = 0x7f; /* Audio, composite, digital out,
1275                         mode 2 form 1&2, multi session */
1276         p[3] = 0xff; /* CD DA, DA accurate, RW supported,
1277                         RW corrected, C2 errors, ISRC,
1278                         UPC, Bar code */
1279         p[4] = 0x2d | (s->tray_locked ? 2 : 0);
1280         /* Locking supported, jumper present, eject, tray */
1281         p[5] = 0; /* no volume & mute control, no
1282                      changer */
1283         p[6] = (50 * 176) >> 8; /* 50x read speed */
1284         p[7] = (50 * 176) & 0xff;
1285         p[8] = 2 >> 8; /* Two volume levels */
1286         p[9] = 2 & 0xff;
1287         p[10] = 2048 >> 8; /* 2M buffer */
1288         p[11] = 2048 & 0xff;
1289         p[12] = (16 * 176) >> 8; /* 16x read speed current */
1290         p[13] = (16 * 176) & 0xff;
1291         p[16] = (16 * 176) >> 8; /* 16x write speed */
1292         p[17] = (16 * 176) & 0xff;
1293         p[18] = (16 * 176) >> 8; /* 16x write speed current */
1294         p[19] = (16 * 176) & 0xff;
1295         break;
1296 
1297      case MODE_PAGE_APPLE_VENDOR:
1298         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR)) {
1299             length = 0x1e;
1300             if (page_control == 1) { /* Changeable Values */
1301                 break;
1302             }
1303 
1304             memset(p, 0, length);
1305             strcpy((char *)p + 8, "APPLE COMPUTER, INC   ");
1306             break;
1307         } else {
1308             return -1;
1309         }
1310 
1311     case MODE_PAGE_VENDOR_SPECIFIC:
1312         if (s->qdev.type == TYPE_DISK && (s->quirks &
1313             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1314             length = 0x2;
1315             if (page_control == 1) { /* Changeable Values */
1316                 p[0] = 0xff;
1317                 p[1] = 0xff;
1318                 break;
1319             }
1320             p[0] = 0;
1321             p[1] = 0;
1322             break;
1323         } else {
1324             return -1;
1325         }
1326 
1327     default:
1328         return -1;
1329     }
1330 
1331     assert(length < 256);
1332     (*p_outbuf)[0] = page;
1333     (*p_outbuf)[1] = length;
1334     *p_outbuf += length + 2;
1335     return length + 2;
1336 }
1337 
1338 static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
1339 {
1340     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1341     uint64_t nb_sectors;
1342     bool dbd;
1343     int page, buflen, ret, page_control;
1344     uint8_t *p;
1345     uint8_t dev_specific_param;
1346 
1347     dbd = (r->req.cmd.buf[1] & 0x8) != 0;
1348     page = r->req.cmd.buf[2] & 0x3f;
1349     page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
1350 
1351     trace_scsi_disk_emulate_mode_sense((r->req.cmd.buf[0] == MODE_SENSE) ? 6 :
1352                                        10, page, r->req.cmd.xfer, page_control);
1353     memset(outbuf, 0, r->req.cmd.xfer);
1354     p = outbuf;
1355 
1356     if (s->qdev.type == TYPE_DISK) {
1357         dev_specific_param = s->features & (1 << SCSI_DISK_F_DPOFUA) ? 0x10 : 0;
1358         if (!blk_is_writable(s->qdev.conf.blk)) {
1359             dev_specific_param |= 0x80; /* Readonly.  */
1360         }
1361     } else {
1362         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD)) {
1363             /* Use DBD from the request... */
1364             dev_specific_param = 0x00;
1365 
1366             /*
1367              * ... unless we receive a request for MODE_PAGE_APPLE_VENDOR
1368              * which should never return a block descriptor even though DBD is
1369              * not set, otherwise CDROM detection fails in MacOS
1370              */
1371             if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR) &&
1372                 page == MODE_PAGE_APPLE_VENDOR) {
1373                 dbd = true;
1374             }
1375         } else {
1376             /*
1377              * MMC prescribes that CD/DVD drives have no block descriptors,
1378              * and defines no device-specific parameter.
1379              */
1380             dev_specific_param = 0x00;
1381             dbd = true;
1382         }
1383     }
1384 
1385     if (r->req.cmd.buf[0] == MODE_SENSE) {
1386         p[1] = 0; /* Default media type.  */
1387         p[2] = dev_specific_param;
1388         p[3] = 0; /* Block descriptor length.  */
1389         p += 4;
1390     } else { /* MODE_SENSE_10 */
1391         p[2] = 0; /* Default media type.  */
1392         p[3] = dev_specific_param;
1393         p[6] = p[7] = 0; /* Block descriptor length.  */
1394         p += 8;
1395     }
1396 
1397     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1398     if (!dbd && nb_sectors) {
1399         if (r->req.cmd.buf[0] == MODE_SENSE) {
1400             outbuf[3] = 8; /* Block descriptor length  */
1401         } else { /* MODE_SENSE_10 */
1402             outbuf[7] = 8; /* Block descriptor length  */
1403         }
1404         nb_sectors /= (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1405         if (nb_sectors > 0xffffff) {
1406             nb_sectors = 0;
1407         }
1408         p[0] = 0; /* media density code */
1409         p[1] = (nb_sectors >> 16) & 0xff;
1410         p[2] = (nb_sectors >> 8) & 0xff;
1411         p[3] = nb_sectors & 0xff;
1412         p[4] = 0; /* reserved */
1413         p[5] = 0; /* bytes 5-7 are the sector size in bytes */
1414         p[6] = s->qdev.blocksize >> 8;
1415         p[7] = 0;
1416         p += 8;
1417     }
1418 
1419     if (page_control == 3) {
1420         /* Saved Values */
1421         scsi_check_condition(r, SENSE_CODE(SAVING_PARAMS_NOT_SUPPORTED));
1422         return -1;
1423     }
1424 
1425     if (page == 0x3f) {
1426         for (page = 0; page <= 0x3e; page++) {
1427             mode_sense_page(s, page, &p, page_control);
1428         }
1429     } else {
1430         ret = mode_sense_page(s, page, &p, page_control);
1431         if (ret == -1) {
1432             return -1;
1433         }
1434     }
1435 
1436     buflen = p - outbuf;
1437     /*
1438      * The mode data length field specifies the length in bytes of the
1439      * following data that is available to be transferred. The mode data
1440      * length does not include itself.
1441      */
1442     if (r->req.cmd.buf[0] == MODE_SENSE) {
1443         outbuf[0] = buflen - 1;
1444     } else { /* MODE_SENSE_10 */
1445         outbuf[0] = ((buflen - 2) >> 8) & 0xff;
1446         outbuf[1] = (buflen - 2) & 0xff;
1447     }
1448     return buflen;
1449 }
1450 
1451 static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
1452 {
1453     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1454     int start_track, format, msf, toclen;
1455     uint64_t nb_sectors;
1456 
1457     msf = req->cmd.buf[1] & 2;
1458     format = req->cmd.buf[2] & 0xf;
1459     start_track = req->cmd.buf[6];
1460     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1461     trace_scsi_disk_emulate_read_toc(start_track, format, msf >> 1);
1462     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
1463     switch (format) {
1464     case 0:
1465         toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track);
1466         break;
1467     case 1:
1468         /* multi session : only a single session defined */
1469         toclen = 12;
1470         memset(outbuf, 0, 12);
1471         outbuf[1] = 0x0a;
1472         outbuf[2] = 0x01;
1473         outbuf[3] = 0x01;
1474         break;
1475     case 2:
1476         toclen = cdrom_read_toc_raw(nb_sectors, outbuf, msf, start_track);
1477         break;
1478     default:
1479         return -1;
1480     }
1481     return toclen;
1482 }
1483 
1484 static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
1485 {
1486     SCSIRequest *req = &r->req;
1487     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1488     bool start = req->cmd.buf[4] & 1;
1489     bool loej = req->cmd.buf[4] & 2; /* load on start, eject on !start */
1490     int pwrcnd = req->cmd.buf[4] & 0xf0;
1491 
1492     if (pwrcnd) {
1493         /* eject/load only happens for power condition == 0 */
1494         return 0;
1495     }
1496 
1497     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && loej) {
1498         if (!start && !s->tray_open && s->tray_locked) {
1499             scsi_check_condition(r,
1500                                  blk_is_inserted(s->qdev.conf.blk)
1501                                  ? SENSE_CODE(ILLEGAL_REQ_REMOVAL_PREVENTED)
1502                                  : SENSE_CODE(NOT_READY_REMOVAL_PREVENTED));
1503             return -1;
1504         }
1505 
1506         if (s->tray_open != !start) {
1507             blk_eject(s->qdev.conf.blk, !start);
1508             s->tray_open = !start;
1509         }
1510     }
1511     return 0;
1512 }
1513 
1514 static void scsi_disk_emulate_read_data(SCSIRequest *req)
1515 {
1516     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1517     int buflen = r->iov.iov_len;
1518 
1519     if (buflen) {
1520         trace_scsi_disk_emulate_read_data(buflen);
1521         r->iov.iov_len = 0;
1522         r->started = true;
1523         scsi_req_data(&r->req, buflen);
1524         return;
1525     }
1526 
1527     /* This also clears the sense buffer for REQUEST SENSE.  */
1528     scsi_req_complete(&r->req, GOOD);
1529 }
1530 
1531 static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
1532                                        uint8_t *inbuf, int inlen)
1533 {
1534     uint8_t mode_current[SCSI_MAX_MODE_LEN];
1535     uint8_t mode_changeable[SCSI_MAX_MODE_LEN];
1536     uint8_t *p;
1537     int len, expected_len, changeable_len, i;
1538 
1539     /* The input buffer does not include the page header, so it is
1540      * off by 2 bytes.
1541      */
1542     expected_len = inlen + 2;
1543     if (expected_len > SCSI_MAX_MODE_LEN) {
1544         return -1;
1545     }
1546 
1547     /* MODE_PAGE_ALLS is only valid for MODE SENSE commands */
1548     if (page == MODE_PAGE_ALLS) {
1549         return -1;
1550     }
1551 
1552     p = mode_current;
1553     memset(mode_current, 0, inlen + 2);
1554     len = mode_sense_page(s, page, &p, 0);
1555     if (len < 0 || len != expected_len) {
1556         return -1;
1557     }
1558 
1559     p = mode_changeable;
1560     memset(mode_changeable, 0, inlen + 2);
1561     changeable_len = mode_sense_page(s, page, &p, 1);
1562     assert(changeable_len == len);
1563 
1564     /* Check that unchangeable bits are the same as what MODE SENSE
1565      * would return.
1566      */
1567     for (i = 2; i < len; i++) {
1568         if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) {
1569             return -1;
1570         }
1571     }
1572     return 0;
1573 }
1574 
1575 static void scsi_disk_apply_mode_select(SCSIDiskState *s, int page, uint8_t *p)
1576 {
1577     switch (page) {
1578     case MODE_PAGE_CACHING:
1579         blk_set_enable_write_cache(s->qdev.conf.blk, (p[0] & 4) != 0);
1580         break;
1581 
1582     default:
1583         break;
1584     }
1585 }
1586 
1587 static int mode_select_pages(SCSIDiskReq *r, uint8_t *p, int len, bool change)
1588 {
1589     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1590 
1591     while (len > 0) {
1592         int page, subpage, page_len;
1593 
1594         /* Parse both possible formats for the mode page headers.  */
1595         page = p[0] & 0x3f;
1596         if (p[0] & 0x40) {
1597             if (len < 4) {
1598                 goto invalid_param_len;
1599             }
1600             subpage = p[1];
1601             page_len = lduw_be_p(&p[2]);
1602             p += 4;
1603             len -= 4;
1604         } else {
1605             if (len < 2) {
1606                 goto invalid_param_len;
1607             }
1608             subpage = 0;
1609             page_len = p[1];
1610             p += 2;
1611             len -= 2;
1612         }
1613 
1614         if (subpage) {
1615             goto invalid_param;
1616         }
1617         if (page_len > len) {
1618             if (!(s->quirks & SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED)) {
1619                 goto invalid_param_len;
1620             }
1621             trace_scsi_disk_mode_select_page_truncated(page, page_len, len);
1622         }
1623 
1624         if (!change) {
1625             if (scsi_disk_check_mode_select(s, page, p, page_len) < 0) {
1626                 goto invalid_param;
1627             }
1628         } else {
1629             scsi_disk_apply_mode_select(s, page, p);
1630         }
1631 
1632         p += page_len;
1633         len -= page_len;
1634     }
1635     return 0;
1636 
1637 invalid_param:
1638     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1639     return -1;
1640 
1641 invalid_param_len:
1642     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1643     return -1;
1644 }
1645 
1646 static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
1647 {
1648     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1649     uint8_t *p = inbuf;
1650     int cmd = r->req.cmd.buf[0];
1651     int len = r->req.cmd.xfer;
1652     int hdr_len = (cmd == MODE_SELECT ? 4 : 8);
1653     int bd_len, bs;
1654     int pass;
1655 
1656     if ((r->req.cmd.buf[1] & 0x11) != 0x10) {
1657         if (!(s->quirks &
1658             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1659             /* We only support PF=1, SP=0.  */
1660             goto invalid_field;
1661         }
1662     }
1663 
1664     if (len < hdr_len) {
1665         goto invalid_param_len;
1666     }
1667 
1668     bd_len = (cmd == MODE_SELECT ? p[3] : lduw_be_p(&p[6]));
1669     len -= hdr_len;
1670     p += hdr_len;
1671     if (len < bd_len) {
1672         goto invalid_param_len;
1673     }
1674     if (bd_len != 0 && bd_len != 8) {
1675         goto invalid_param;
1676     }
1677 
1678     /* Allow changing the block size */
1679     if (bd_len) {
1680         bs = p[5] << 16 | p[6] << 8 | p[7];
1681 
1682         /*
1683          * Since the existing code only checks/updates bits 8-15 of the block
1684          * size, restrict ourselves to the same requirement for now to ensure
1685          * that a block size set by a block descriptor and then read back by
1686          * a subsequent SCSI command will be the same. Also disallow a block
1687          * size of 256 since we cannot handle anything below BDRV_SECTOR_SIZE.
1688          */
1689         if (bs && !(bs & ~0xfe00) && bs != s->qdev.blocksize) {
1690             s->qdev.blocksize = bs;
1691             trace_scsi_disk_mode_select_set_blocksize(s->qdev.blocksize);
1692         }
1693     }
1694 
1695     len -= bd_len;
1696     p += bd_len;
1697 
1698     /* Ensure no change is made if there is an error!  */
1699     for (pass = 0; pass < 2; pass++) {
1700         if (mode_select_pages(r, p, len, pass == 1) < 0) {
1701             assert(pass == 0);
1702             return;
1703         }
1704     }
1705     if (!blk_enable_write_cache(s->qdev.conf.blk)) {
1706         /* The request is used as the AIO opaque value, so add a ref.  */
1707         scsi_req_ref(&r->req);
1708         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
1709                          BLOCK_ACCT_FLUSH);
1710         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
1711         return;
1712     }
1713 
1714     scsi_req_complete(&r->req, GOOD);
1715     return;
1716 
1717 invalid_param:
1718     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1719     return;
1720 
1721 invalid_param_len:
1722     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1723     return;
1724 
1725 invalid_field:
1726     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1727 }
1728 
1729 /* sector_num and nb_sectors expected to be in qdev blocksize */
1730 static inline bool check_lba_range(SCSIDiskState *s,
1731                                    uint64_t sector_num, uint32_t nb_sectors)
1732 {
1733     /*
1734      * The first line tests that no overflow happens when computing the last
1735      * sector.  The second line tests that the last accessed sector is in
1736      * range.
1737      *
1738      * Careful, the computations should not underflow for nb_sectors == 0,
1739      * and a 0-block read to the first LBA beyond the end of device is
1740      * valid.
1741      */
1742     return (sector_num <= sector_num + nb_sectors &&
1743             sector_num + nb_sectors <= s->qdev.max_lba + 1);
1744 }
1745 
1746 typedef struct UnmapCBData {
1747     SCSIDiskReq *r;
1748     uint8_t *inbuf;
1749     int count;
1750 } UnmapCBData;
1751 
1752 static void scsi_unmap_complete(void *opaque, int ret);
1753 
1754 static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
1755 {
1756     SCSIDiskReq *r = data->r;
1757     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1758 
1759     assert(r->req.aiocb == NULL);
1760 
1761     if (data->count > 0) {
1762         uint64_t sector_num = ldq_be_p(&data->inbuf[0]);
1763         uint32_t nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
1764         r->sector = sector_num * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1765         r->sector_count = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1766 
1767         if (!check_lba_range(s, sector_num, nb_sectors)) {
1768             block_acct_invalid(blk_get_stats(s->qdev.conf.blk),
1769                                BLOCK_ACCT_UNMAP);
1770             scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1771             goto done;
1772         }
1773 
1774         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1775                          r->sector_count * BDRV_SECTOR_SIZE,
1776                          BLOCK_ACCT_UNMAP);
1777 
1778         r->req.aiocb = blk_aio_pdiscard(s->qdev.conf.blk,
1779                                         r->sector * BDRV_SECTOR_SIZE,
1780                                         r->sector_count * BDRV_SECTOR_SIZE,
1781                                         scsi_unmap_complete, data);
1782         data->count--;
1783         data->inbuf += 16;
1784         return;
1785     }
1786 
1787     scsi_req_complete(&r->req, GOOD);
1788 
1789 done:
1790     scsi_req_unref(&r->req);
1791     g_free(data);
1792 }
1793 
1794 static void scsi_unmap_complete(void *opaque, int ret)
1795 {
1796     UnmapCBData *data = opaque;
1797     SCSIDiskReq *r = data->r;
1798     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1799 
1800     assert(r->req.aiocb != NULL);
1801     r->req.aiocb = NULL;
1802 
1803     if (scsi_disk_req_check_error(r, ret, true)) {
1804         scsi_req_unref(&r->req);
1805         g_free(data);
1806     } else {
1807         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1808         scsi_unmap_complete_noio(data, ret);
1809     }
1810 }
1811 
1812 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
1813 {
1814     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1815     uint8_t *p = inbuf;
1816     int len = r->req.cmd.xfer;
1817     UnmapCBData *data;
1818 
1819     /* Reject ANCHOR=1.  */
1820     if (r->req.cmd.buf[1] & 0x1) {
1821         goto invalid_field;
1822     }
1823 
1824     if (len < 8) {
1825         goto invalid_param_len;
1826     }
1827     if (len < lduw_be_p(&p[0]) + 2) {
1828         goto invalid_param_len;
1829     }
1830     if (len < lduw_be_p(&p[2]) + 8) {
1831         goto invalid_param_len;
1832     }
1833     if (lduw_be_p(&p[2]) & 15) {
1834         goto invalid_param_len;
1835     }
1836 
1837     if (!blk_is_writable(s->qdev.conf.blk)) {
1838         block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1839         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1840         return;
1841     }
1842 
1843     data = g_new0(UnmapCBData, 1);
1844     data->r = r;
1845     data->inbuf = &p[8];
1846     data->count = lduw_be_p(&p[2]) >> 4;
1847 
1848     /* The matching unref is in scsi_unmap_complete, before data is freed.  */
1849     scsi_req_ref(&r->req);
1850     scsi_unmap_complete_noio(data, 0);
1851     return;
1852 
1853 invalid_param_len:
1854     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1855     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1856     return;
1857 
1858 invalid_field:
1859     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1860     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1861 }
1862 
1863 typedef struct WriteSameCBData {
1864     SCSIDiskReq *r;
1865     int64_t sector;
1866     int nb_sectors;
1867     QEMUIOVector qiov;
1868     struct iovec iov;
1869 } WriteSameCBData;
1870 
1871 static void scsi_write_same_complete(void *opaque, int ret)
1872 {
1873     WriteSameCBData *data = opaque;
1874     SCSIDiskReq *r = data->r;
1875     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1876 
1877     assert(r->req.aiocb != NULL);
1878     r->req.aiocb = NULL;
1879 
1880     if (scsi_disk_req_check_error(r, ret, true)) {
1881         goto done;
1882     }
1883 
1884     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1885 
1886     data->nb_sectors -= data->iov.iov_len / BDRV_SECTOR_SIZE;
1887     data->sector += data->iov.iov_len / BDRV_SECTOR_SIZE;
1888     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1889                             data->iov.iov_len);
1890     if (data->iov.iov_len) {
1891         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1892                          data->iov.iov_len, BLOCK_ACCT_WRITE);
1893         /* Reinitialize qiov, to handle unaligned WRITE SAME request
1894          * where final qiov may need smaller size */
1895         qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1896         r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1897                                        data->sector << BDRV_SECTOR_BITS,
1898                                        &data->qiov, 0,
1899                                        scsi_write_same_complete, data);
1900         return;
1901     }
1902 
1903     scsi_req_complete(&r->req, GOOD);
1904 
1905 done:
1906     scsi_req_unref(&r->req);
1907     qemu_vfree(data->iov.iov_base);
1908     g_free(data);
1909 }
1910 
1911 static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
1912 {
1913     SCSIRequest *req = &r->req;
1914     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1915     uint32_t nb_sectors = scsi_data_cdb_xfer(r->req.cmd.buf);
1916     WriteSameCBData *data;
1917     uint8_t *buf;
1918     int i, l;
1919 
1920     /* Fail if PBDATA=1 or LBDATA=1 or ANCHOR=1.  */
1921     if (nb_sectors == 0 || (req->cmd.buf[1] & 0x16)) {
1922         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1923         return;
1924     }
1925 
1926     if (!blk_is_writable(s->qdev.conf.blk)) {
1927         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1928         return;
1929     }
1930     if (!check_lba_range(s, r->req.cmd.lba, nb_sectors)) {
1931         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1932         return;
1933     }
1934 
1935     if ((req->cmd.buf[1] & 0x1) || buffer_is_zero(inbuf, s->qdev.blocksize)) {
1936         int flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0;
1937 
1938         /* The request is used as the AIO opaque value, so add a ref.  */
1939         scsi_req_ref(&r->req);
1940         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1941                          nb_sectors * s->qdev.blocksize,
1942                         BLOCK_ACCT_WRITE);
1943         r->req.aiocb = blk_aio_pwrite_zeroes(s->qdev.conf.blk,
1944                                 r->req.cmd.lba * s->qdev.blocksize,
1945                                 nb_sectors * s->qdev.blocksize,
1946                                 flags, scsi_aio_complete, r);
1947         return;
1948     }
1949 
1950     data = g_new0(WriteSameCBData, 1);
1951     data->r = r;
1952     data->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1953     data->nb_sectors = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1954     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1955                             SCSI_WRITE_SAME_MAX);
1956     data->iov.iov_base = buf = blk_blockalign(s->qdev.conf.blk,
1957                                               data->iov.iov_len);
1958     qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1959 
1960     for (i = 0; i < data->iov.iov_len; i += l) {
1961         l = MIN(s->qdev.blocksize, data->iov.iov_len - i);
1962         memcpy(&buf[i], inbuf, l);
1963     }
1964 
1965     scsi_req_ref(&r->req);
1966     block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1967                      data->iov.iov_len, BLOCK_ACCT_WRITE);
1968     r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1969                                    data->sector << BDRV_SECTOR_BITS,
1970                                    &data->qiov, 0,
1971                                    scsi_write_same_complete, data);
1972 }
1973 
1974 static void scsi_disk_emulate_write_data(SCSIRequest *req)
1975 {
1976     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1977 
1978     if (r->iov.iov_len) {
1979         int buflen = r->iov.iov_len;
1980         trace_scsi_disk_emulate_write_data(buflen);
1981         r->iov.iov_len = 0;
1982         scsi_req_data(&r->req, buflen);
1983         return;
1984     }
1985 
1986     switch (req->cmd.buf[0]) {
1987     case MODE_SELECT:
1988     case MODE_SELECT_10:
1989         /* This also clears the sense buffer for REQUEST SENSE.  */
1990         scsi_disk_emulate_mode_select(r, r->iov.iov_base);
1991         break;
1992 
1993     case UNMAP:
1994         scsi_disk_emulate_unmap(r, r->iov.iov_base);
1995         break;
1996 
1997     case VERIFY_10:
1998     case VERIFY_12:
1999     case VERIFY_16:
2000         if (r->req.status == -1) {
2001             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2002         }
2003         break;
2004 
2005     case WRITE_SAME_10:
2006     case WRITE_SAME_16:
2007         scsi_disk_emulate_write_same(r, r->iov.iov_base);
2008         break;
2009 
2010     case FORMAT_UNIT:
2011         scsi_req_complete(&r->req, GOOD);
2012         break;
2013 
2014     default:
2015         abort();
2016     }
2017 }
2018 
2019 static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
2020 {
2021     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2022     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2023     uint64_t nb_sectors;
2024     uint8_t *outbuf;
2025     int buflen;
2026 
2027     switch (req->cmd.buf[0]) {
2028     case INQUIRY:
2029     case MODE_SENSE:
2030     case MODE_SENSE_10:
2031     case RESERVE:
2032     case RESERVE_10:
2033     case RELEASE:
2034     case RELEASE_10:
2035     case START_STOP:
2036     case ALLOW_MEDIUM_REMOVAL:
2037     case GET_CONFIGURATION:
2038     case GET_EVENT_STATUS_NOTIFICATION:
2039     case MECHANISM_STATUS:
2040     case REQUEST_SENSE:
2041         break;
2042 
2043     default:
2044         if (!blk_is_available(s->qdev.conf.blk)) {
2045             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2046             return 0;
2047         }
2048         break;
2049     }
2050 
2051     /*
2052      * FIXME: we shouldn't return anything bigger than 4k, but the code
2053      * requires the buffer to be as big as req->cmd.xfer in several
2054      * places.  So, do not allow CDBs with a very large ALLOCATION
2055      * LENGTH.  The real fix would be to modify scsi_read_data and
2056      * dma_buf_read, so that they return data beyond the buflen
2057      * as all zeros.
2058      */
2059     if (req->cmd.xfer > 65536) {
2060         goto illegal_request;
2061     }
2062     r->buflen = MAX(4096, req->cmd.xfer);
2063 
2064     if (!r->iov.iov_base) {
2065         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
2066     }
2067 
2068     outbuf = r->iov.iov_base;
2069     memset(outbuf, 0, r->buflen);
2070     switch (req->cmd.buf[0]) {
2071     case TEST_UNIT_READY:
2072         assert(blk_is_available(s->qdev.conf.blk));
2073         break;
2074     case INQUIRY:
2075         buflen = scsi_disk_emulate_inquiry(req, outbuf);
2076         if (buflen < 0) {
2077             goto illegal_request;
2078         }
2079         break;
2080     case MODE_SENSE:
2081     case MODE_SENSE_10:
2082         buflen = scsi_disk_emulate_mode_sense(r, outbuf);
2083         if (buflen < 0) {
2084             goto illegal_request;
2085         }
2086         break;
2087     case READ_TOC:
2088         buflen = scsi_disk_emulate_read_toc(req, outbuf);
2089         if (buflen < 0) {
2090             goto illegal_request;
2091         }
2092         break;
2093     case RESERVE:
2094         if (req->cmd.buf[1] & 1) {
2095             goto illegal_request;
2096         }
2097         break;
2098     case RESERVE_10:
2099         if (req->cmd.buf[1] & 3) {
2100             goto illegal_request;
2101         }
2102         break;
2103     case RELEASE:
2104         if (req->cmd.buf[1] & 1) {
2105             goto illegal_request;
2106         }
2107         break;
2108     case RELEASE_10:
2109         if (req->cmd.buf[1] & 3) {
2110             goto illegal_request;
2111         }
2112         break;
2113     case START_STOP:
2114         if (scsi_disk_emulate_start_stop(r) < 0) {
2115             return 0;
2116         }
2117         break;
2118     case ALLOW_MEDIUM_REMOVAL:
2119         s->tray_locked = req->cmd.buf[4] & 1;
2120         blk_lock_medium(s->qdev.conf.blk, req->cmd.buf[4] & 1);
2121         break;
2122     case READ_CAPACITY_10:
2123         /* The normal LEN field for this command is zero.  */
2124         memset(outbuf, 0, 8);
2125         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2126         if (!nb_sectors) {
2127             scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2128             return 0;
2129         }
2130         if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) {
2131             goto illegal_request;
2132         }
2133         nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2134         /* Returned value is the address of the last sector.  */
2135         nb_sectors--;
2136         /* Remember the new size for read/write sanity checking. */
2137         s->qdev.max_lba = nb_sectors;
2138         /* Clip to 2TB, instead of returning capacity modulo 2TB. */
2139         if (nb_sectors > UINT32_MAX) {
2140             nb_sectors = UINT32_MAX;
2141         }
2142         outbuf[0] = (nb_sectors >> 24) & 0xff;
2143         outbuf[1] = (nb_sectors >> 16) & 0xff;
2144         outbuf[2] = (nb_sectors >> 8) & 0xff;
2145         outbuf[3] = nb_sectors & 0xff;
2146         outbuf[4] = 0;
2147         outbuf[5] = 0;
2148         outbuf[6] = s->qdev.blocksize >> 8;
2149         outbuf[7] = 0;
2150         break;
2151     case REQUEST_SENSE:
2152         /* Just return "NO SENSE".  */
2153         buflen = scsi_convert_sense(NULL, 0, outbuf, r->buflen,
2154                                     (req->cmd.buf[1] & 1) == 0);
2155         if (buflen < 0) {
2156             goto illegal_request;
2157         }
2158         break;
2159     case MECHANISM_STATUS:
2160         buflen = scsi_emulate_mechanism_status(s, outbuf);
2161         if (buflen < 0) {
2162             goto illegal_request;
2163         }
2164         break;
2165     case GET_CONFIGURATION:
2166         buflen = scsi_get_configuration(s, outbuf);
2167         if (buflen < 0) {
2168             goto illegal_request;
2169         }
2170         break;
2171     case GET_EVENT_STATUS_NOTIFICATION:
2172         buflen = scsi_get_event_status_notification(s, r, outbuf);
2173         if (buflen < 0) {
2174             goto illegal_request;
2175         }
2176         break;
2177     case READ_DISC_INFORMATION:
2178         buflen = scsi_read_disc_information(s, r, outbuf);
2179         if (buflen < 0) {
2180             goto illegal_request;
2181         }
2182         break;
2183     case READ_DVD_STRUCTURE:
2184         buflen = scsi_read_dvd_structure(s, r, outbuf);
2185         if (buflen < 0) {
2186             goto illegal_request;
2187         }
2188         break;
2189     case SERVICE_ACTION_IN_16:
2190         /* Service Action In subcommands. */
2191         if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
2192             trace_scsi_disk_emulate_command_SAI_16();
2193             memset(outbuf, 0, req->cmd.xfer);
2194             blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2195             if (!nb_sectors) {
2196                 scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2197                 return 0;
2198             }
2199             if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) {
2200                 goto illegal_request;
2201             }
2202             nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2203             /* Returned value is the address of the last sector.  */
2204             nb_sectors--;
2205             /* Remember the new size for read/write sanity checking. */
2206             s->qdev.max_lba = nb_sectors;
2207             outbuf[0] = (nb_sectors >> 56) & 0xff;
2208             outbuf[1] = (nb_sectors >> 48) & 0xff;
2209             outbuf[2] = (nb_sectors >> 40) & 0xff;
2210             outbuf[3] = (nb_sectors >> 32) & 0xff;
2211             outbuf[4] = (nb_sectors >> 24) & 0xff;
2212             outbuf[5] = (nb_sectors >> 16) & 0xff;
2213             outbuf[6] = (nb_sectors >> 8) & 0xff;
2214             outbuf[7] = nb_sectors & 0xff;
2215             outbuf[8] = 0;
2216             outbuf[9] = 0;
2217             outbuf[10] = s->qdev.blocksize >> 8;
2218             outbuf[11] = 0;
2219             outbuf[12] = 0;
2220             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
2221 
2222             /* set TPE bit if the format supports discard */
2223             if (s->qdev.conf.discard_granularity) {
2224                 outbuf[14] = 0x80;
2225             }
2226 
2227             /* Protection, exponent and lowest lba field left blank. */
2228             break;
2229         }
2230         trace_scsi_disk_emulate_command_SAI_unsupported();
2231         goto illegal_request;
2232     case SYNCHRONIZE_CACHE:
2233         /* The request is used as the AIO opaque value, so add a ref.  */
2234         scsi_req_ref(&r->req);
2235         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
2236                          BLOCK_ACCT_FLUSH);
2237         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
2238         return 0;
2239     case SEEK_10:
2240         trace_scsi_disk_emulate_command_SEEK_10(r->req.cmd.lba);
2241         if (r->req.cmd.lba > s->qdev.max_lba) {
2242             goto illegal_lba;
2243         }
2244         break;
2245     case MODE_SELECT:
2246         trace_scsi_disk_emulate_command_MODE_SELECT(r->req.cmd.xfer);
2247         break;
2248     case MODE_SELECT_10:
2249         trace_scsi_disk_emulate_command_MODE_SELECT_10(r->req.cmd.xfer);
2250         break;
2251     case UNMAP:
2252         trace_scsi_disk_emulate_command_UNMAP(r->req.cmd.xfer);
2253         break;
2254     case VERIFY_10:
2255     case VERIFY_12:
2256     case VERIFY_16:
2257         trace_scsi_disk_emulate_command_VERIFY((req->cmd.buf[1] >> 1) & 3);
2258         if (req->cmd.buf[1] & 6) {
2259             goto illegal_request;
2260         }
2261         break;
2262     case WRITE_SAME_10:
2263     case WRITE_SAME_16:
2264         trace_scsi_disk_emulate_command_WRITE_SAME(
2265                 req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16, r->req.cmd.xfer);
2266         break;
2267     case FORMAT_UNIT:
2268         trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
2269         break;
2270     default:
2271         trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
2272                                                 scsi_command_name(buf[0]));
2273         scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
2274         return 0;
2275     }
2276     assert(!r->req.aiocb);
2277     r->iov.iov_len = MIN(r->buflen, req->cmd.xfer);
2278     if (r->iov.iov_len == 0) {
2279         scsi_req_complete(&r->req, GOOD);
2280     }
2281     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2282         assert(r->iov.iov_len == req->cmd.xfer);
2283         return -r->iov.iov_len;
2284     } else {
2285         return r->iov.iov_len;
2286     }
2287 
2288 illegal_request:
2289     if (r->req.status == -1) {
2290         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2291     }
2292     return 0;
2293 
2294 illegal_lba:
2295     scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2296     return 0;
2297 }
2298 
2299 /* Execute a scsi command.  Returns the length of the data expected by the
2300    command.  This will be Positive for data transfers from the device
2301    (eg. disk reads), negative for transfers to the device (eg. disk writes),
2302    and zero if the command does not transfer any data.  */
2303 
2304 static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
2305 {
2306     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2307     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2308     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
2309     uint32_t len;
2310     uint8_t command;
2311 
2312     command = buf[0];
2313 
2314     if (!blk_is_available(s->qdev.conf.blk)) {
2315         scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2316         return 0;
2317     }
2318 
2319     len = scsi_data_cdb_xfer(r->req.cmd.buf);
2320     switch (command) {
2321     case READ_6:
2322     case READ_10:
2323     case READ_12:
2324     case READ_16:
2325         trace_scsi_disk_dma_command_READ(r->req.cmd.lba, len);
2326         /* Protection information is not supported.  For SCSI versions 2 and
2327          * older (as determined by snooping the guest's INQUIRY commands),
2328          * there is no RD/WR/VRPROTECT, so skip this check in these versions.
2329          */
2330         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2331             goto illegal_request;
2332         }
2333         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2334             goto illegal_lba;
2335         }
2336         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2337         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2338         break;
2339     case WRITE_6:
2340     case WRITE_10:
2341     case WRITE_12:
2342     case WRITE_16:
2343     case WRITE_VERIFY_10:
2344     case WRITE_VERIFY_12:
2345     case WRITE_VERIFY_16:
2346         if (!blk_is_writable(s->qdev.conf.blk)) {
2347             scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
2348             return 0;
2349         }
2350         trace_scsi_disk_dma_command_WRITE(
2351                 (command & 0xe) == 0xe ? "And Verify " : "",
2352                 r->req.cmd.lba, len);
2353         /* fall through */
2354     case VERIFY_10:
2355     case VERIFY_12:
2356     case VERIFY_16:
2357         /* We get here only for BYTCHK == 0x01 and only for scsi-block.
2358          * As far as DMA is concerned, we can treat it the same as a write;
2359          * scsi_block_do_sgio will send VERIFY commands.
2360          */
2361         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2362             goto illegal_request;
2363         }
2364         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2365             goto illegal_lba;
2366         }
2367         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2368         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2369         break;
2370     default:
2371         abort();
2372     illegal_request:
2373         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2374         return 0;
2375     illegal_lba:
2376         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2377         return 0;
2378     }
2379     r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
2380     if (r->sector_count == 0) {
2381         scsi_req_complete(&r->req, GOOD);
2382     }
2383     assert(r->iov.iov_len == 0);
2384     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2385         return -r->sector_count * BDRV_SECTOR_SIZE;
2386     } else {
2387         return r->sector_count * BDRV_SECTOR_SIZE;
2388     }
2389 }
2390 
2391 static void scsi_disk_reset(DeviceState *dev)
2392 {
2393     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev);
2394     uint64_t nb_sectors;
2395 
2396     scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
2397 
2398     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2399 
2400     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2401     if (nb_sectors) {
2402         nb_sectors--;
2403     }
2404     s->qdev.max_lba = nb_sectors;
2405     /* reset tray statuses */
2406     s->tray_locked = 0;
2407     s->tray_open = 0;
2408 
2409     s->qdev.scsi_version = s->qdev.default_scsi_version;
2410 }
2411 
2412 static void scsi_disk_drained_begin(void *opaque)
2413 {
2414     SCSIDiskState *s = opaque;
2415 
2416     scsi_device_drained_begin(&s->qdev);
2417 }
2418 
2419 static void scsi_disk_drained_end(void *opaque)
2420 {
2421     SCSIDiskState *s = opaque;
2422 
2423     scsi_device_drained_end(&s->qdev);
2424 }
2425 
2426 static void scsi_disk_resize_cb(void *opaque)
2427 {
2428     SCSIDiskState *s = opaque;
2429 
2430     /* SPC lists this sense code as available only for
2431      * direct-access devices.
2432      */
2433     if (s->qdev.type == TYPE_DISK) {
2434         scsi_device_report_change(&s->qdev, SENSE_CODE(CAPACITY_CHANGED));
2435     }
2436 }
2437 
2438 static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
2439 {
2440     SCSIDiskState *s = opaque;
2441 
2442     /*
2443      * When a CD gets changed, we have to report an ejected state and
2444      * then a loaded state to guests so that they detect tray
2445      * open/close and media change events.  Guests that do not use
2446      * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close
2447      * states rely on this behavior.
2448      *
2449      * media_changed governs the state machine used for unit attention
2450      * report.  media_event is used by GET EVENT STATUS NOTIFICATION.
2451      */
2452     s->media_changed = load;
2453     s->tray_open = !load;
2454     scsi_device_set_ua(&s->qdev, SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM));
2455     s->media_event = true;
2456     s->eject_request = false;
2457 }
2458 
2459 static void scsi_cd_eject_request_cb(void *opaque, bool force)
2460 {
2461     SCSIDiskState *s = opaque;
2462 
2463     s->eject_request = true;
2464     if (force) {
2465         s->tray_locked = false;
2466     }
2467 }
2468 
2469 static bool scsi_cd_is_tray_open(void *opaque)
2470 {
2471     return ((SCSIDiskState *)opaque)->tray_open;
2472 }
2473 
2474 static bool scsi_cd_is_medium_locked(void *opaque)
2475 {
2476     return ((SCSIDiskState *)opaque)->tray_locked;
2477 }
2478 
2479 static const BlockDevOps scsi_disk_removable_block_ops = {
2480     .change_media_cb  = scsi_cd_change_media_cb,
2481     .drained_begin    = scsi_disk_drained_begin,
2482     .drained_end      = scsi_disk_drained_end,
2483     .eject_request_cb = scsi_cd_eject_request_cb,
2484     .is_medium_locked = scsi_cd_is_medium_locked,
2485     .is_tray_open     = scsi_cd_is_tray_open,
2486     .resize_cb        = scsi_disk_resize_cb,
2487 };
2488 
2489 static const BlockDevOps scsi_disk_block_ops = {
2490     .drained_begin = scsi_disk_drained_begin,
2491     .drained_end   = scsi_disk_drained_end,
2492     .resize_cb     = scsi_disk_resize_cb,
2493 };
2494 
2495 static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
2496 {
2497     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2498     if (s->media_changed) {
2499         s->media_changed = false;
2500         scsi_device_set_ua(&s->qdev, SENSE_CODE(MEDIUM_CHANGED));
2501     }
2502 }
2503 
2504 static void scsi_realize(SCSIDevice *dev, Error **errp)
2505 {
2506     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2507     bool read_only;
2508 
2509     if (!s->qdev.conf.blk) {
2510         error_setg(errp, "drive property not set");
2511         return;
2512     }
2513 
2514     if (!(s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2515         !blk_is_inserted(s->qdev.conf.blk)) {
2516         error_setg(errp, "Device needs media, but drive is empty");
2517         return;
2518     }
2519 
2520     if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2521         return;
2522     }
2523 
2524     if (blk_get_aio_context(s->qdev.conf.blk) != qemu_get_aio_context() &&
2525         !s->qdev.hba_supports_iothread)
2526     {
2527         error_setg(errp, "HBA does not support iothreads");
2528         return;
2529     }
2530 
2531     if (dev->type == TYPE_DISK) {
2532         if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) {
2533             return;
2534         }
2535     }
2536 
2537     read_only = !blk_supports_write_perm(s->qdev.conf.blk);
2538     if (dev->type == TYPE_ROM) {
2539         read_only = true;
2540     }
2541 
2542     if (!blkconf_apply_backend_options(&dev->conf, read_only,
2543                                        dev->type == TYPE_DISK, errp)) {
2544         return;
2545     }
2546 
2547     if (s->qdev.conf.discard_granularity == -1) {
2548         s->qdev.conf.discard_granularity =
2549             MAX(s->qdev.conf.logical_block_size, DEFAULT_DISCARD_GRANULARITY);
2550     }
2551 
2552     if (!s->version) {
2553         s->version = g_strdup(qemu_hw_version());
2554     }
2555     if (!s->vendor) {
2556         s->vendor = g_strdup("QEMU");
2557     }
2558     if (s->serial && strlen(s->serial) > MAX_SERIAL_LEN) {
2559         error_setg(errp, "The serial number can't be longer than %d characters",
2560                    MAX_SERIAL_LEN);
2561         return;
2562     }
2563     if (!s->device_id) {
2564         if (s->serial) {
2565             if (strlen(s->serial) > MAX_SERIAL_LEN_FOR_DEVID) {
2566                 error_setg(errp, "The serial number can't be longer than %d "
2567                            "characters when it is also used as the default for "
2568                            "device_id", MAX_SERIAL_LEN_FOR_DEVID);
2569                 return;
2570             }
2571             s->device_id = g_strdup(s->serial);
2572         } else {
2573             const char *str = blk_name(s->qdev.conf.blk);
2574             if (str && *str) {
2575                 s->device_id = g_strdup(str);
2576             }
2577         }
2578     }
2579 
2580     if (blk_is_sg(s->qdev.conf.blk)) {
2581         error_setg(errp, "unwanted /dev/sg*");
2582         return;
2583     }
2584 
2585     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2586             !(s->features & (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS))) {
2587         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_removable_block_ops, s);
2588     } else {
2589         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_block_ops, s);
2590     }
2591 
2592     blk_iostatus_enable(s->qdev.conf.blk);
2593 
2594     add_boot_device_lchs(&dev->qdev, NULL,
2595                          dev->conf.lcyls,
2596                          dev->conf.lheads,
2597                          dev->conf.lsecs);
2598 }
2599 
2600 static void scsi_unrealize(SCSIDevice *dev)
2601 {
2602     del_boot_device_lchs(&dev->qdev, NULL);
2603 }
2604 
2605 static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
2606 {
2607     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2608 
2609     /* can happen for devices without drive. The error message for missing
2610      * backend will be issued in scsi_realize
2611      */
2612     if (s->qdev.conf.blk) {
2613         if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2614             return;
2615         }
2616     }
2617     s->qdev.blocksize = s->qdev.conf.logical_block_size;
2618     s->qdev.type = TYPE_DISK;
2619     if (!s->product) {
2620         s->product = g_strdup("QEMU HARDDISK");
2621     }
2622     scsi_realize(&s->qdev, errp);
2623 }
2624 
2625 static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
2626 {
2627     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2628     int ret;
2629     uint32_t blocksize = 2048;
2630 
2631     if (!dev->conf.blk) {
2632         /* Anonymous BlockBackend for an empty drive. As we put it into
2633          * dev->conf, qdev takes care of detaching on unplug. */
2634         dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
2635         ret = blk_attach_dev(dev->conf.blk, &dev->qdev);
2636         assert(ret == 0);
2637     }
2638 
2639     if (dev->conf.physical_block_size != 0) {
2640         blocksize = dev->conf.physical_block_size;
2641     }
2642 
2643     s->qdev.blocksize = blocksize;
2644     s->qdev.type = TYPE_ROM;
2645     s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2646     if (!s->product) {
2647         s->product = g_strdup("QEMU CD-ROM");
2648     }
2649     scsi_realize(&s->qdev, errp);
2650 }
2651 
2652 
2653 static const SCSIReqOps scsi_disk_emulate_reqops = {
2654     .size         = sizeof(SCSIDiskReq),
2655     .free_req     = scsi_free_request,
2656     .send_command = scsi_disk_emulate_command,
2657     .read_data    = scsi_disk_emulate_read_data,
2658     .write_data   = scsi_disk_emulate_write_data,
2659     .get_buf      = scsi_get_buf,
2660     .load_request = scsi_disk_emulate_load_request,
2661     .save_request = scsi_disk_emulate_save_request,
2662 };
2663 
2664 static const SCSIReqOps scsi_disk_dma_reqops = {
2665     .size         = sizeof(SCSIDiskReq),
2666     .free_req     = scsi_free_request,
2667     .send_command = scsi_disk_dma_command,
2668     .read_data    = scsi_read_data,
2669     .write_data   = scsi_write_data,
2670     .get_buf      = scsi_get_buf,
2671     .load_request = scsi_disk_load_request,
2672     .save_request = scsi_disk_save_request,
2673 };
2674 
2675 static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
2676     [TEST_UNIT_READY]                 = &scsi_disk_emulate_reqops,
2677     [INQUIRY]                         = &scsi_disk_emulate_reqops,
2678     [MODE_SENSE]                      = &scsi_disk_emulate_reqops,
2679     [MODE_SENSE_10]                   = &scsi_disk_emulate_reqops,
2680     [START_STOP]                      = &scsi_disk_emulate_reqops,
2681     [ALLOW_MEDIUM_REMOVAL]            = &scsi_disk_emulate_reqops,
2682     [READ_CAPACITY_10]                = &scsi_disk_emulate_reqops,
2683     [READ_TOC]                        = &scsi_disk_emulate_reqops,
2684     [READ_DVD_STRUCTURE]              = &scsi_disk_emulate_reqops,
2685     [READ_DISC_INFORMATION]           = &scsi_disk_emulate_reqops,
2686     [GET_CONFIGURATION]               = &scsi_disk_emulate_reqops,
2687     [GET_EVENT_STATUS_NOTIFICATION]   = &scsi_disk_emulate_reqops,
2688     [MECHANISM_STATUS]                = &scsi_disk_emulate_reqops,
2689     [SERVICE_ACTION_IN_16]            = &scsi_disk_emulate_reqops,
2690     [REQUEST_SENSE]                   = &scsi_disk_emulate_reqops,
2691     [SYNCHRONIZE_CACHE]               = &scsi_disk_emulate_reqops,
2692     [SEEK_10]                         = &scsi_disk_emulate_reqops,
2693     [MODE_SELECT]                     = &scsi_disk_emulate_reqops,
2694     [MODE_SELECT_10]                  = &scsi_disk_emulate_reqops,
2695     [UNMAP]                           = &scsi_disk_emulate_reqops,
2696     [WRITE_SAME_10]                   = &scsi_disk_emulate_reqops,
2697     [WRITE_SAME_16]                   = &scsi_disk_emulate_reqops,
2698     [VERIFY_10]                       = &scsi_disk_emulate_reqops,
2699     [VERIFY_12]                       = &scsi_disk_emulate_reqops,
2700     [VERIFY_16]                       = &scsi_disk_emulate_reqops,
2701     [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
2702 
2703     [READ_6]                          = &scsi_disk_dma_reqops,
2704     [READ_10]                         = &scsi_disk_dma_reqops,
2705     [READ_12]                         = &scsi_disk_dma_reqops,
2706     [READ_16]                         = &scsi_disk_dma_reqops,
2707     [WRITE_6]                         = &scsi_disk_dma_reqops,
2708     [WRITE_10]                        = &scsi_disk_dma_reqops,
2709     [WRITE_12]                        = &scsi_disk_dma_reqops,
2710     [WRITE_16]                        = &scsi_disk_dma_reqops,
2711     [WRITE_VERIFY_10]                 = &scsi_disk_dma_reqops,
2712     [WRITE_VERIFY_12]                 = &scsi_disk_dma_reqops,
2713     [WRITE_VERIFY_16]                 = &scsi_disk_dma_reqops,
2714 };
2715 
2716 static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf)
2717 {
2718     int len = scsi_cdb_length(buf);
2719     g_autoptr(GString) str = NULL;
2720 
2721     assert(len > 0 && len <= 16);
2722     str = qemu_hexdump_line(NULL, buf, len, 1, 0);
2723     trace_scsi_disk_new_request(lun, tag, str->str);
2724 }
2725 
2726 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
2727                                      uint8_t *buf, void *hba_private)
2728 {
2729     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2730     SCSIRequest *req;
2731     const SCSIReqOps *ops;
2732     uint8_t command;
2733 
2734     command = buf[0];
2735     ops = scsi_disk_reqops_dispatch[command];
2736     if (!ops) {
2737         ops = &scsi_disk_emulate_reqops;
2738     }
2739     req = scsi_req_alloc(ops, &s->qdev, tag, lun, hba_private);
2740 
2741     if (trace_event_get_state_backends(TRACE_SCSI_DISK_NEW_REQUEST)) {
2742         scsi_disk_new_request_dump(lun, tag, buf);
2743     }
2744 
2745     return req;
2746 }
2747 
2748 #ifdef __linux__
2749 static int get_device_type(SCSIDiskState *s)
2750 {
2751     uint8_t cmd[16];
2752     uint8_t buf[36];
2753     int ret;
2754 
2755     memset(cmd, 0, sizeof(cmd));
2756     memset(buf, 0, sizeof(buf));
2757     cmd[0] = INQUIRY;
2758     cmd[4] = sizeof(buf);
2759 
2760     ret = scsi_SG_IO_FROM_DEV(s->qdev.conf.blk, cmd, sizeof(cmd),
2761                               buf, sizeof(buf), s->qdev.io_timeout);
2762     if (ret < 0) {
2763         return -1;
2764     }
2765     s->qdev.type = buf[0];
2766     if (buf[1] & 0x80) {
2767         s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2768     }
2769     return 0;
2770 }
2771 
2772 static void scsi_block_realize(SCSIDevice *dev, Error **errp)
2773 {
2774     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2775     int sg_version;
2776     int rc;
2777 
2778     if (!s->qdev.conf.blk) {
2779         error_setg(errp, "drive property not set");
2780         return;
2781     }
2782 
2783     if (s->rotation_rate) {
2784         error_report_once("rotation_rate is specified for scsi-block but is "
2785                           "not implemented. This option is deprecated and will "
2786                           "be removed in a future version");
2787     }
2788 
2789     /* check we are using a driver managing SG_IO (version 3 and after) */
2790     rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version);
2791     if (rc < 0) {
2792         error_setg_errno(errp, -rc, "cannot get SG_IO version number");
2793         if (rc != -EPERM) {
2794             error_append_hint(errp, "Is this a SCSI device?\n");
2795         }
2796         return;
2797     }
2798     if (sg_version < 30000) {
2799         error_setg(errp, "scsi generic interface too old");
2800         return;
2801     }
2802 
2803     /* get device type from INQUIRY data */
2804     rc = get_device_type(s);
2805     if (rc < 0) {
2806         error_setg(errp, "INQUIRY failed");
2807         return;
2808     }
2809 
2810     /* Make a guess for the block size, we'll fix it when the guest sends.
2811      * READ CAPACITY.  If they don't, they likely would assume these sizes
2812      * anyway. (TODO: check in /sys).
2813      */
2814     if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM) {
2815         s->qdev.blocksize = 2048;
2816     } else {
2817         s->qdev.blocksize = 512;
2818     }
2819 
2820     /* Makes the scsi-block device not removable by using HMP and QMP eject
2821      * command.
2822      */
2823     s->features |= (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS);
2824 
2825     scsi_realize(&s->qdev, errp);
2826     scsi_generic_read_device_inquiry(&s->qdev);
2827 }
2828 
2829 typedef struct SCSIBlockReq {
2830     SCSIDiskReq req;
2831     sg_io_hdr_t io_header;
2832 
2833     /* Selected bytes of the original CDB, copied into our own CDB.  */
2834     uint8_t cmd, cdb1, group_number;
2835 
2836     /* CDB passed to SG_IO.  */
2837     uint8_t cdb[16];
2838     BlockCompletionFunc *cb;
2839     void *cb_opaque;
2840 } SCSIBlockReq;
2841 
2842 static void scsi_block_sgio_complete(void *opaque, int ret)
2843 {
2844     SCSIBlockReq *req = (SCSIBlockReq *)opaque;
2845     SCSIDiskReq *r = &req->req;
2846     sg_io_hdr_t *io_hdr = &req->io_header;
2847 
2848     if (ret == 0) {
2849         /* FIXME This skips calling req->cb() and any cleanup in it */
2850         if (io_hdr->host_status != SCSI_HOST_OK) {
2851             scsi_req_complete_failed(&r->req, io_hdr->host_status);
2852             scsi_req_unref(&r->req);
2853             return;
2854         }
2855 
2856         if (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT) {
2857             ret = BUSY;
2858         } else {
2859             ret = io_hdr->status;
2860         }
2861     }
2862 
2863     req->cb(req->cb_opaque, ret);
2864 }
2865 
2866 static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
2867                                       int64_t offset, QEMUIOVector *iov,
2868                                       int direction,
2869                                       BlockCompletionFunc *cb, void *opaque)
2870 {
2871     sg_io_hdr_t *io_header = &req->io_header;
2872     SCSIDiskReq *r = &req->req;
2873     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
2874     int nb_logical_blocks;
2875     uint64_t lba;
2876     BlockAIOCB *aiocb;
2877 
2878     /* This is not supported yet.  It can only happen if the guest does
2879      * reads and writes that are not aligned to one logical sectors
2880      * _and_ cover multiple MemoryRegions.
2881      */
2882     assert(offset % s->qdev.blocksize == 0);
2883     assert(iov->size % s->qdev.blocksize == 0);
2884 
2885     io_header->interface_id = 'S';
2886 
2887     /* The data transfer comes from the QEMUIOVector.  */
2888     io_header->dxfer_direction = direction;
2889     io_header->dxfer_len = iov->size;
2890     io_header->dxferp = (void *)iov->iov;
2891     io_header->iovec_count = iov->niov;
2892     assert(io_header->iovec_count == iov->niov); /* no overflow! */
2893 
2894     /* Build a new CDB with the LBA and length patched in, in case
2895      * DMA helpers split the transfer in multiple segments.  Do not
2896      * build a CDB smaller than what the guest wanted, and only build
2897      * a larger one if strictly necessary.
2898      */
2899     io_header->cmdp = req->cdb;
2900     lba = offset / s->qdev.blocksize;
2901     nb_logical_blocks = io_header->dxfer_len / s->qdev.blocksize;
2902 
2903     if ((req->cmd >> 5) == 0 && lba <= 0x1ffff) {
2904         /* 6-byte CDB */
2905         stl_be_p(&req->cdb[0], lba | (req->cmd << 24));
2906         req->cdb[4] = nb_logical_blocks;
2907         req->cdb[5] = 0;
2908         io_header->cmd_len = 6;
2909     } else if ((req->cmd >> 5) <= 1 && lba <= 0xffffffffULL) {
2910         /* 10-byte CDB */
2911         req->cdb[0] = (req->cmd & 0x1f) | 0x20;
2912         req->cdb[1] = req->cdb1;
2913         stl_be_p(&req->cdb[2], lba);
2914         req->cdb[6] = req->group_number;
2915         stw_be_p(&req->cdb[7], nb_logical_blocks);
2916         req->cdb[9] = 0;
2917         io_header->cmd_len = 10;
2918     } else if ((req->cmd >> 5) != 4 && lba <= 0xffffffffULL) {
2919         /* 12-byte CDB */
2920         req->cdb[0] = (req->cmd & 0x1f) | 0xA0;
2921         req->cdb[1] = req->cdb1;
2922         stl_be_p(&req->cdb[2], lba);
2923         stl_be_p(&req->cdb[6], nb_logical_blocks);
2924         req->cdb[10] = req->group_number;
2925         req->cdb[11] = 0;
2926         io_header->cmd_len = 12;
2927     } else {
2928         /* 16-byte CDB */
2929         req->cdb[0] = (req->cmd & 0x1f) | 0x80;
2930         req->cdb[1] = req->cdb1;
2931         stq_be_p(&req->cdb[2], lba);
2932         stl_be_p(&req->cdb[10], nb_logical_blocks);
2933         req->cdb[14] = req->group_number;
2934         req->cdb[15] = 0;
2935         io_header->cmd_len = 16;
2936     }
2937 
2938     /* The rest is as in scsi-generic.c.  */
2939     io_header->mx_sb_len = sizeof(r->req.sense);
2940     io_header->sbp = r->req.sense;
2941     io_header->timeout = s->qdev.io_timeout * 1000;
2942     io_header->usr_ptr = r;
2943     io_header->flags |= SG_FLAG_DIRECT_IO;
2944     req->cb = cb;
2945     req->cb_opaque = opaque;
2946     trace_scsi_disk_aio_sgio_command(r->req.tag, req->cdb[0], lba,
2947                                      nb_logical_blocks, io_header->timeout);
2948     aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, scsi_block_sgio_complete, req);
2949     assert(aiocb != NULL);
2950     return aiocb;
2951 }
2952 
2953 static bool scsi_block_no_fua(SCSICommand *cmd)
2954 {
2955     return false;
2956 }
2957 
2958 static BlockAIOCB *scsi_block_dma_readv(int64_t offset,
2959                                         QEMUIOVector *iov,
2960                                         BlockCompletionFunc *cb, void *cb_opaque,
2961                                         void *opaque)
2962 {
2963     SCSIBlockReq *r = opaque;
2964     return scsi_block_do_sgio(r, offset, iov,
2965                               SG_DXFER_FROM_DEV, cb, cb_opaque);
2966 }
2967 
2968 static BlockAIOCB *scsi_block_dma_writev(int64_t offset,
2969                                          QEMUIOVector *iov,
2970                                          BlockCompletionFunc *cb, void *cb_opaque,
2971                                          void *opaque)
2972 {
2973     SCSIBlockReq *r = opaque;
2974     return scsi_block_do_sgio(r, offset, iov,
2975                               SG_DXFER_TO_DEV, cb, cb_opaque);
2976 }
2977 
2978 static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
2979 {
2980     switch (buf[0]) {
2981     case VERIFY_10:
2982     case VERIFY_12:
2983     case VERIFY_16:
2984         /* Check if BYTCHK == 0x01 (data-out buffer contains data
2985          * for the number of logical blocks specified in the length
2986          * field).  For other modes, do not use scatter/gather operation.
2987          */
2988         if ((buf[1] & 6) == 2) {
2989             return false;
2990         }
2991         break;
2992 
2993     case READ_6:
2994     case READ_10:
2995     case READ_12:
2996     case READ_16:
2997     case WRITE_6:
2998     case WRITE_10:
2999     case WRITE_12:
3000     case WRITE_16:
3001     case WRITE_VERIFY_10:
3002     case WRITE_VERIFY_12:
3003     case WRITE_VERIFY_16:
3004         /* MMC writing cannot be done via DMA helpers, because it sometimes
3005          * involves writing beyond the maximum LBA or to negative LBA (lead-in).
3006          * We might use scsi_block_dma_reqops as long as no writing commands are
3007          * seen, but performance usually isn't paramount on optical media.  So,
3008          * just make scsi-block operate the same as scsi-generic for them.
3009          */
3010         if (s->qdev.type != TYPE_ROM) {
3011             return false;
3012         }
3013         break;
3014 
3015     default:
3016         break;
3017     }
3018 
3019     return true;
3020 }
3021 
3022 
3023 static int32_t scsi_block_dma_command(SCSIRequest *req, uint8_t *buf)
3024 {
3025     SCSIBlockReq *r = (SCSIBlockReq *)req;
3026     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
3027 
3028     r->cmd = req->cmd.buf[0];
3029     switch (r->cmd >> 5) {
3030     case 0:
3031         /* 6-byte CDB.  */
3032         r->cdb1 = r->group_number = 0;
3033         break;
3034     case 1:
3035         /* 10-byte CDB.  */
3036         r->cdb1 = req->cmd.buf[1];
3037         r->group_number = req->cmd.buf[6];
3038         break;
3039     case 4:
3040         /* 12-byte CDB.  */
3041         r->cdb1 = req->cmd.buf[1];
3042         r->group_number = req->cmd.buf[10];
3043         break;
3044     case 5:
3045         /* 16-byte CDB.  */
3046         r->cdb1 = req->cmd.buf[1];
3047         r->group_number = req->cmd.buf[14];
3048         break;
3049     default:
3050         abort();
3051     }
3052 
3053     /* Protection information is not supported.  For SCSI versions 2 and
3054      * older (as determined by snooping the guest's INQUIRY commands),
3055      * there is no RD/WR/VRPROTECT, so skip this check in these versions.
3056      */
3057     if (s->qdev.scsi_version > 2 && (req->cmd.buf[1] & 0xe0)) {
3058         scsi_check_condition(&r->req, SENSE_CODE(INVALID_FIELD));
3059         return 0;
3060     }
3061 
3062     return scsi_disk_dma_command(req, buf);
3063 }
3064 
3065 static const SCSIReqOps scsi_block_dma_reqops = {
3066     .size         = sizeof(SCSIBlockReq),
3067     .free_req     = scsi_free_request,
3068     .send_command = scsi_block_dma_command,
3069     .read_data    = scsi_read_data,
3070     .write_data   = scsi_write_data,
3071     .get_buf      = scsi_get_buf,
3072     .load_request = scsi_disk_load_request,
3073     .save_request = scsi_disk_save_request,
3074 };
3075 
3076 static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
3077                                            uint32_t lun, uint8_t *buf,
3078                                            void *hba_private)
3079 {
3080     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3081 
3082     if (scsi_block_is_passthrough(s, buf)) {
3083         return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
3084                               hba_private);
3085     } else {
3086         return scsi_req_alloc(&scsi_block_dma_reqops, &s->qdev, tag, lun,
3087                               hba_private);
3088     }
3089 }
3090 
3091 static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
3092                                   uint8_t *buf, size_t buf_len,
3093                                   void *hba_private)
3094 {
3095     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3096 
3097     if (scsi_block_is_passthrough(s, buf)) {
3098         return scsi_bus_parse_cdb(&s->qdev, cmd, buf, buf_len, hba_private);
3099     } else {
3100         return scsi_req_parse_cdb(&s->qdev, cmd, buf, buf_len);
3101     }
3102 }
3103 
3104 static void scsi_block_update_sense(SCSIRequest *req)
3105 {
3106     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
3107     SCSIBlockReq *br = DO_UPCAST(SCSIBlockReq, req, r);
3108     r->req.sense_len = MIN(br->io_header.sb_len_wr, sizeof(r->req.sense));
3109 }
3110 #endif
3111 
3112 static
3113 BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
3114                            BlockCompletionFunc *cb, void *cb_opaque,
3115                            void *opaque)
3116 {
3117     SCSIDiskReq *r = opaque;
3118     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3119     return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3120 }
3121 
3122 static
3123 BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
3124                             BlockCompletionFunc *cb, void *cb_opaque,
3125                             void *opaque)
3126 {
3127     SCSIDiskReq *r = opaque;
3128     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3129     return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3130 }
3131 
3132 static char *scsi_property_get_loadparm(Object *obj, Error **errp)
3133 {
3134     return g_strdup(SCSI_DISK_BASE(obj)->loadparm);
3135 }
3136 
3137 static void scsi_property_set_loadparm(Object *obj, const char *value,
3138                                        Error **errp)
3139 {
3140     void *lp_str;
3141 
3142     if (object_property_get_int(obj, "bootindex", NULL) < 0) {
3143         error_setg(errp, "'loadparm' is only valid for boot devices");
3144         return;
3145     }
3146 
3147     lp_str = g_malloc0(strlen(value) + 1);
3148     if (!qdev_prop_sanitize_s390x_loadparm(lp_str, value, errp)) {
3149         g_free(lp_str);
3150         return;
3151     }
3152     SCSI_DISK_BASE(obj)->loadparm = lp_str;
3153 }
3154 
3155 static void scsi_property_add_specifics(DeviceClass *dc)
3156 {
3157     ObjectClass *oc = OBJECT_CLASS(dc);
3158 
3159     /* The loadparm property is only supported on s390x */
3160     if (qemu_arch_available(QEMU_ARCH_S390X)) {
3161         object_class_property_add_str(oc, "loadparm",
3162                                       scsi_property_get_loadparm,
3163                                       scsi_property_set_loadparm);
3164         object_class_property_set_description(oc, "loadparm",
3165                                               "load parameter (s390x only)");
3166     }
3167 }
3168 
3169 static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data)
3170 {
3171     DeviceClass *dc = DEVICE_CLASS(klass);
3172     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3173 
3174     dc->fw_name = "disk";
3175     device_class_set_legacy_reset(dc, scsi_disk_reset);
3176     sdc->dma_readv = scsi_dma_readv;
3177     sdc->dma_writev = scsi_dma_writev;
3178     sdc->need_fua_emulation = scsi_is_cmd_fua;
3179 }
3180 
3181 static const TypeInfo scsi_disk_base_info = {
3182     .name          = TYPE_SCSI_DISK_BASE,
3183     .parent        = TYPE_SCSI_DEVICE,
3184     .class_init    = scsi_disk_base_class_initfn,
3185     .instance_size = sizeof(SCSIDiskState),
3186     .class_size    = sizeof(SCSIDiskClass),
3187     .abstract      = true,
3188 };
3189 
3190 #define DEFINE_SCSI_DISK_PROPERTIES()                                   \
3191     DEFINE_PROP_DRIVE_IOTHREAD("drive", SCSIDiskState, qdev.conf.blk),  \
3192     DEFINE_BLOCK_PROPERTIES_BASE(SCSIDiskState, qdev.conf),             \
3193     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),            \
3194     DEFINE_PROP_STRING("ver", SCSIDiskState, version),                  \
3195     DEFINE_PROP_STRING("serial", SCSIDiskState, serial),                \
3196     DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),                \
3197     DEFINE_PROP_STRING("product", SCSIDiskState, product),              \
3198     DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id),          \
3199     DEFINE_PROP_BOOL("migrate-emulated-scsi-request", SCSIDiskState, migrate_emulated_scsi_request, true)
3200 
3201 
3202 static const Property scsi_hd_properties[] = {
3203     DEFINE_SCSI_DISK_PROPERTIES(),
3204     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
3205                     SCSI_DISK_F_REMOVABLE, false),
3206     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
3207                     SCSI_DISK_F_DPOFUA, false),
3208     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3209     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3210     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3211     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3212                        DEFAULT_MAX_UNMAP_SIZE),
3213     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3214                        DEFAULT_MAX_IO_SIZE),
3215     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3216     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3217                       5),
3218     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3219                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3220                     0),
3221     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
3222 };
3223 
3224 static const VMStateDescription vmstate_scsi_disk_state = {
3225     .name = "scsi-disk",
3226     .version_id = 1,
3227     .minimum_version_id = 1,
3228     .fields = (const VMStateField[]) {
3229         VMSTATE_SCSI_DEVICE(qdev, SCSIDiskState),
3230         VMSTATE_BOOL(media_changed, SCSIDiskState),
3231         VMSTATE_BOOL(media_event, SCSIDiskState),
3232         VMSTATE_BOOL(eject_request, SCSIDiskState),
3233         VMSTATE_BOOL(tray_open, SCSIDiskState),
3234         VMSTATE_BOOL(tray_locked, SCSIDiskState),
3235         VMSTATE_END_OF_LIST()
3236     }
3237 };
3238 
3239 static void scsi_hd_class_initfn(ObjectClass *klass, void *data)
3240 {
3241     DeviceClass *dc = DEVICE_CLASS(klass);
3242     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3243 
3244     sc->realize      = scsi_hd_realize;
3245     sc->unrealize    = scsi_unrealize;
3246     sc->alloc_req    = scsi_new_request;
3247     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3248     dc->desc = "virtual SCSI disk";
3249     device_class_set_props(dc, scsi_hd_properties);
3250     dc->vmsd  = &vmstate_scsi_disk_state;
3251 
3252     scsi_property_add_specifics(dc);
3253 }
3254 
3255 static const TypeInfo scsi_hd_info = {
3256     .name          = "scsi-hd",
3257     .parent        = TYPE_SCSI_DISK_BASE,
3258     .class_init    = scsi_hd_class_initfn,
3259 };
3260 
3261 static const Property scsi_cd_properties[] = {
3262     DEFINE_SCSI_DISK_PROPERTIES(),
3263     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3264     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3265     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3266     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3267                        DEFAULT_MAX_IO_SIZE),
3268     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3269                       5),
3270     DEFINE_PROP_BIT("quirk_mode_page_apple_vendor", SCSIDiskState, quirks,
3271                     SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR, 0),
3272     DEFINE_PROP_BIT("quirk_mode_sense_rom_use_dbd", SCSIDiskState, quirks,
3273                     SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD, 0),
3274     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3275                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3276                     0),
3277     DEFINE_PROP_BIT("quirk_mode_page_truncated", SCSIDiskState, quirks,
3278                     SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED, 0),
3279 };
3280 
3281 static void scsi_cd_class_initfn(ObjectClass *klass, void *data)
3282 {
3283     DeviceClass *dc = DEVICE_CLASS(klass);
3284     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3285 
3286     sc->realize      = scsi_cd_realize;
3287     sc->alloc_req    = scsi_new_request;
3288     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3289     dc->desc = "virtual SCSI CD-ROM";
3290     device_class_set_props(dc, scsi_cd_properties);
3291     dc->vmsd  = &vmstate_scsi_disk_state;
3292 
3293     scsi_property_add_specifics(dc);
3294 }
3295 
3296 static const TypeInfo scsi_cd_info = {
3297     .name          = "scsi-cd",
3298     .parent        = TYPE_SCSI_DISK_BASE,
3299     .class_init    = scsi_cd_class_initfn,
3300 };
3301 
3302 #ifdef __linux__
3303 static const Property scsi_block_properties[] = {
3304     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),
3305     DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk),
3306     DEFINE_PROP_BOOL("share-rw", SCSIDiskState, qdev.conf.share_rw, false),
3307     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3308     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3309                        DEFAULT_MAX_UNMAP_SIZE),
3310     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3311                        DEFAULT_MAX_IO_SIZE),
3312     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3313                       -1),
3314     DEFINE_PROP_UINT32("io_timeout", SCSIDiskState, qdev.io_timeout,
3315                        DEFAULT_IO_TIMEOUT),
3316 };
3317 
3318 static void scsi_block_class_initfn(ObjectClass *klass, void *data)
3319 {
3320     DeviceClass *dc = DEVICE_CLASS(klass);
3321     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3322     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3323 
3324     sc->realize      = scsi_block_realize;
3325     sc->alloc_req    = scsi_block_new_request;
3326     sc->parse_cdb    = scsi_block_parse_cdb;
3327     sdc->dma_readv   = scsi_block_dma_readv;
3328     sdc->dma_writev  = scsi_block_dma_writev;
3329     sdc->update_sense = scsi_block_update_sense;
3330     sdc->need_fua_emulation = scsi_block_no_fua;
3331     dc->desc = "SCSI block device passthrough";
3332     device_class_set_props(dc, scsi_block_properties);
3333     dc->vmsd  = &vmstate_scsi_disk_state;
3334 }
3335 
3336 static const TypeInfo scsi_block_info = {
3337     .name          = "scsi-block",
3338     .parent        = TYPE_SCSI_DISK_BASE,
3339     .class_init    = scsi_block_class_initfn,
3340 };
3341 #endif
3342 
3343 static void scsi_disk_register_types(void)
3344 {
3345     type_register_static(&scsi_disk_base_info);
3346     type_register_static(&scsi_hd_info);
3347     type_register_static(&scsi_cd_info);
3348 #ifdef __linux__
3349     type_register_static(&scsi_block_info);
3350 #endif
3351 }
3352 
3353 type_init(scsi_disk_register_types)
3354