xref: /cloud-hypervisor/virtio-devices/src/vhost_user/blk.rs (revision 686e6d50824fcc7403a51b91545899a6301d6216)
1 // Copyright 2019 Intel Corporation. All Rights Reserved.
2 // SPDX-License-Identifier: Apache-2.0
3 
4 use super::super::{ActivateResult, VirtioCommon, VirtioDevice, VirtioDeviceType};
5 use super::vu_common_ctrl::{VhostUserConfig, VhostUserHandle};
6 use super::{Error, Result, DEFAULT_VIRTIO_FEATURES};
7 use crate::seccomp_filters::Thread;
8 use crate::thread_helper::spawn_virtio_thread;
9 use crate::vhost_user::VhostUserCommon;
10 use crate::{GuestMemoryMmap, GuestRegionMmap};
11 use crate::{VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM};
12 use block_util::VirtioBlockConfig;
13 use seccompiler::SeccompAction;
14 use std::mem;
15 use std::result;
16 use std::sync::{Arc, Barrier, Mutex};
17 use std::thread;
18 use std::vec::Vec;
19 use versionize::{VersionMap, Versionize, VersionizeResult};
20 use versionize_derive::Versionize;
21 use vhost::vhost_user::message::{
22     VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserVirtioFeatures,
23     VHOST_USER_CONFIG_OFFSET,
24 };
25 use vhost::vhost_user::{MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler};
26 use virtio_bindings::bindings::virtio_blk::{
27     VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH,
28     VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX,
29     VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES,
30 };
31 use virtio_queue::Queue;
32 use vm_memory::{ByteValued, GuestMemoryAtomic};
33 use vm_migration::{
34     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable,
35     Transportable, VersionMapped,
36 };
37 use vmm_sys_util::eventfd::EventFd;
38 
39 const DEFAULT_QUEUE_NUMBER: usize = 1;
40 
41 #[derive(Versionize)]
42 pub struct State {
43     pub avail_features: u64,
44     pub acked_features: u64,
45     pub config: VirtioBlockConfig,
46     pub acked_protocol_features: u64,
47     pub vu_num_queues: usize,
48 }
49 
50 impl VersionMapped for State {}
51 
52 struct SlaveReqHandler {}
53 impl VhostUserMasterReqHandler for SlaveReqHandler {}
54 
55 pub struct Blk {
56     common: VirtioCommon,
57     vu_common: VhostUserCommon,
58     id: String,
59     config: VirtioBlockConfig,
60     guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
61     epoll_thread: Option<thread::JoinHandle<()>>,
62     seccomp_action: SeccompAction,
63     exit_evt: EventFd,
64     iommu: bool,
65 }
66 
67 impl Blk {
68     /// Create a new vhost-user-blk device
69     pub fn new(
70         id: String,
71         vu_cfg: VhostUserConfig,
72         restoring: bool,
73         seccomp_action: SeccompAction,
74         exit_evt: EventFd,
75         iommu: bool,
76     ) -> Result<Blk> {
77         let num_queues = vu_cfg.num_queues;
78 
79         if restoring {
80             // We need 'queue_sizes' to report a number of queues that will be
81             // enough to handle all the potential queues. VirtioPciDevice::new()
82             // will create the actual queues based on this information.
83             return Ok(Blk {
84                 common: VirtioCommon {
85                     device_type: VirtioDeviceType::Block as u32,
86                     queue_sizes: vec![vu_cfg.queue_size; num_queues],
87                     paused_sync: Some(Arc::new(Barrier::new(2))),
88                     min_queues: DEFAULT_QUEUE_NUMBER as u16,
89                     ..Default::default()
90                 },
91                 vu_common: VhostUserCommon {
92                     socket_path: vu_cfg.socket,
93                     vu_num_queues: num_queues,
94                     ..Default::default()
95                 },
96                 id,
97                 config: VirtioBlockConfig::default(),
98                 guest_memory: None,
99                 epoll_thread: None,
100                 seccomp_action,
101                 exit_evt,
102                 iommu,
103             });
104         }
105 
106         let mut vu =
107             VhostUserHandle::connect_vhost_user(false, &vu_cfg.socket, num_queues as u64, false)?;
108 
109         // Filling device and vring features VMM supports.
110         let mut avail_features = 1 << VIRTIO_BLK_F_SIZE_MAX
111             | 1 << VIRTIO_BLK_F_SEG_MAX
112             | 1 << VIRTIO_BLK_F_GEOMETRY
113             | 1 << VIRTIO_BLK_F_RO
114             | 1 << VIRTIO_BLK_F_BLK_SIZE
115             | 1 << VIRTIO_BLK_F_FLUSH
116             | 1 << VIRTIO_BLK_F_TOPOLOGY
117             | 1 << VIRTIO_BLK_F_CONFIG_WCE
118             | 1 << VIRTIO_BLK_F_DISCARD
119             | 1 << VIRTIO_BLK_F_WRITE_ZEROES
120             | DEFAULT_VIRTIO_FEATURES;
121 
122         if num_queues > 1 {
123             avail_features |= 1 << VIRTIO_BLK_F_MQ;
124         }
125 
126         let avail_protocol_features = VhostUserProtocolFeatures::CONFIG
127             | VhostUserProtocolFeatures::MQ
128             | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS
129             | VhostUserProtocolFeatures::REPLY_ACK
130             | VhostUserProtocolFeatures::INFLIGHT_SHMFD
131             | VhostUserProtocolFeatures::LOG_SHMFD;
132 
133         let (acked_features, acked_protocol_features) =
134             vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
135 
136         let backend_num_queues =
137             if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 {
138                 vu.socket_handle()
139                     .get_queue_num()
140                     .map_err(Error::VhostUserGetQueueMaxNum)? as usize
141             } else {
142                 DEFAULT_QUEUE_NUMBER
143             };
144 
145         if num_queues > backend_num_queues {
146             error!("vhost-user-blk requested too many queues ({}) since the backend only supports {}\n",
147                 num_queues, backend_num_queues);
148             return Err(Error::BadQueueNum);
149         }
150 
151         let config_len = mem::size_of::<VirtioBlockConfig>();
152         let config_space: Vec<u8> = vec![0u8; config_len as usize];
153         let (_, config_space) = vu
154             .socket_handle()
155             .get_config(
156                 VHOST_USER_CONFIG_OFFSET,
157                 config_len as u32,
158                 VhostUserConfigFlags::WRITABLE,
159                 config_space.as_slice(),
160             )
161             .map_err(Error::VhostUserGetConfig)?;
162         let mut config = VirtioBlockConfig::default();
163         if let Some(backend_config) = VirtioBlockConfig::from_slice(config_space.as_slice()) {
164             config = *backend_config;
165             config.num_queues = num_queues as u16;
166         }
167 
168         Ok(Blk {
169             common: VirtioCommon {
170                 device_type: VirtioDeviceType::Block as u32,
171                 queue_sizes: vec![vu_cfg.queue_size; num_queues],
172                 avail_features: acked_features,
173                 // If part of the available features that have been acked, the
174                 // PROTOCOL_FEATURES bit must be already set through the VIRTIO
175                 // acked features as we know the guest would never ack it, thus
176                 // the feature would be lost.
177                 acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(),
178                 paused_sync: Some(Arc::new(Barrier::new(2))),
179                 min_queues: DEFAULT_QUEUE_NUMBER as u16,
180                 ..Default::default()
181             },
182             vu_common: VhostUserCommon {
183                 vu: Some(Arc::new(Mutex::new(vu))),
184                 acked_protocol_features,
185                 socket_path: vu_cfg.socket,
186                 vu_num_queues: num_queues,
187                 ..Default::default()
188             },
189             id,
190             config,
191             guest_memory: None,
192             epoll_thread: None,
193             seccomp_action,
194             exit_evt,
195             iommu,
196         })
197     }
198 
199     fn state(&self) -> State {
200         State {
201             avail_features: self.common.avail_features,
202             acked_features: self.common.acked_features,
203             config: self.config,
204             acked_protocol_features: self.vu_common.acked_protocol_features,
205             vu_num_queues: self.vu_common.vu_num_queues,
206         }
207     }
208 
209     fn set_state(&mut self, state: &State) {
210         self.common.avail_features = state.avail_features;
211         self.common.acked_features = state.acked_features;
212         self.config = state.config;
213         self.vu_common.acked_protocol_features = state.acked_protocol_features;
214         self.vu_common.vu_num_queues = state.vu_num_queues;
215 
216         if let Err(e) = self
217             .vu_common
218             .restore_backend_connection(self.common.acked_features)
219         {
220             error!(
221                 "Failed restoring connection with vhost-user backend: {:?}",
222                 e
223             );
224         }
225     }
226 }
227 
228 impl Drop for Blk {
229     fn drop(&mut self) {
230         if let Some(kill_evt) = self.common.kill_evt.take() {
231             if let Err(e) = kill_evt.write(1) {
232                 error!("failed to kill vhost-user-blk: {:?}", e);
233             }
234         }
235     }
236 }
237 
238 impl VirtioDevice for Blk {
239     fn device_type(&self) -> u32 {
240         self.common.device_type
241     }
242 
243     fn queue_max_sizes(&self) -> &[u16] {
244         &self.common.queue_sizes
245     }
246 
247     fn features(&self) -> u64 {
248         let mut features = self.common.avail_features;
249         if self.iommu {
250             features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
251         }
252         features
253     }
254 
255     fn ack_features(&mut self, value: u64) {
256         self.common.ack_features(value)
257     }
258 
259     fn read_config(&self, offset: u64, data: &mut [u8]) {
260         self.read_config_from_slice(self.config.as_slice(), offset, data);
261     }
262 
263     fn write_config(&mut self, offset: u64, data: &[u8]) {
264         // The "writeback" field is the only mutable field
265         let writeback_offset =
266             (&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64);
267         if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback)
268         {
269             error!(
270                 "Attempt to write to read-only field: offset {:x} length {}",
271                 offset,
272                 data.len()
273             );
274             return;
275         }
276 
277         self.config.writeback = data[0];
278         if let Some(vu) = &self.vu_common.vu {
279             if let Err(e) = vu
280                 .lock()
281                 .unwrap()
282                 .socket_handle()
283                 .set_config(offset as u32, VhostUserConfigFlags::WRITABLE, data)
284                 .map_err(Error::VhostUserSetConfig)
285             {
286                 error!("Failed setting vhost-user-blk configuration: {:?}", e);
287             }
288         }
289     }
290 
291     fn activate(
292         &mut self,
293         mem: GuestMemoryAtomic<GuestMemoryMmap>,
294         interrupt_cb: Arc<dyn VirtioInterrupt>,
295         queues: Vec<(usize, Queue<GuestMemoryAtomic<GuestMemoryMmap>>, EventFd)>,
296     ) -> ActivateResult {
297         self.common.activate(&queues, &interrupt_cb)?;
298         self.guest_memory = Some(mem.clone());
299 
300         let slave_req_handler: Option<MasterReqHandler<SlaveReqHandler>> = None;
301 
302         // Run a dedicated thread for handling potential reconnections with
303         // the backend.
304         let (kill_evt, pause_evt) = self.common.dup_eventfds();
305 
306         let mut handler = self.vu_common.activate(
307             mem,
308             queues,
309             interrupt_cb,
310             self.common.acked_features,
311             slave_req_handler,
312             kill_evt,
313             pause_evt,
314         )?;
315 
316         let paused = self.common.paused.clone();
317         let paused_sync = self.common.paused_sync.clone();
318 
319         let mut epoll_threads = Vec::new();
320 
321         spawn_virtio_thread(
322             &self.id,
323             &self.seccomp_action,
324             Thread::VirtioVhostBlock,
325             &mut epoll_threads,
326             &self.exit_evt,
327             move || {
328                 if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
329                     error!("Error running worker: {:?}", e);
330                 }
331             },
332         )?;
333         self.epoll_thread = Some(epoll_threads.remove(0));
334 
335         Ok(())
336     }
337 
338     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
339         // We first must resume the virtio thread if it was paused.
340         if self.common.pause_evt.take().is_some() {
341             self.common.resume().ok()?;
342         }
343 
344         if let Some(vu) = &self.vu_common.vu {
345             if let Err(e) = vu.lock().unwrap().reset_vhost_user() {
346                 error!("Failed to reset vhost-user daemon: {:?}", e);
347                 return None;
348             }
349         }
350 
351         if let Some(kill_evt) = self.common.kill_evt.take() {
352             // Ignore the result because there is nothing we can do about it.
353             let _ = kill_evt.write(1);
354         }
355 
356         event!("virtio-device", "reset", "id", &self.id);
357 
358         // Return the interrupt
359         Some(self.common.interrupt_cb.take().unwrap())
360     }
361 
362     fn shutdown(&mut self) {
363         self.vu_common.shutdown()
364     }
365 
366     fn add_memory_region(
367         &mut self,
368         region: &Arc<GuestRegionMmap>,
369     ) -> std::result::Result<(), crate::Error> {
370         self.vu_common.add_memory_region(&self.guest_memory, region)
371     }
372 }
373 
374 impl Pausable for Blk {
375     fn pause(&mut self) -> result::Result<(), MigratableError> {
376         self.vu_common.pause()?;
377         self.common.pause()
378     }
379 
380     fn resume(&mut self) -> result::Result<(), MigratableError> {
381         self.common.resume()?;
382 
383         if let Some(epoll_thread) = &self.epoll_thread {
384             epoll_thread.thread().unpark();
385         }
386 
387         self.vu_common.resume()
388     }
389 }
390 
391 impl Snapshottable for Blk {
392     fn id(&self) -> String {
393         self.id.clone()
394     }
395 
396     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
397         self.vu_common.snapshot(&self.id(), &self.state())
398     }
399 
400     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
401         self.set_state(&snapshot.to_versioned_state(&self.id)?);
402         Ok(())
403     }
404 }
405 impl Transportable for Blk {}
406 
407 impl Migratable for Blk {
408     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
409         self.vu_common.start_dirty_log(&self.guest_memory)
410     }
411 
412     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
413         self.vu_common.stop_dirty_log()
414     }
415 
416     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
417         self.vu_common.dirty_log(&self.guest_memory)
418     }
419 
420     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
421         self.vu_common.start_migration()
422     }
423 
424     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
425         self.vu_common
426             .complete_migration(self.common.kill_evt.take())
427     }
428 }
429