1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use super::super::{ActivateResult, VirtioCommon, VirtioDevice, VirtioDeviceType}; 5 use super::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 6 use super::{Error, Result, DEFAULT_VIRTIO_FEATURES}; 7 use crate::seccomp_filters::Thread; 8 use crate::thread_helper::spawn_virtio_thread; 9 use crate::vhost_user::VhostUserCommon; 10 use crate::{GuestMemoryMmap, GuestRegionMmap}; 11 use crate::{VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM}; 12 use block_util::VirtioBlockConfig; 13 use seccompiler::SeccompAction; 14 use std::mem; 15 use std::result; 16 use std::sync::{Arc, Barrier, Mutex}; 17 use std::thread; 18 use std::vec::Vec; 19 use versionize::{VersionMap, Versionize, VersionizeResult}; 20 use versionize_derive::Versionize; 21 use vhost::vhost_user::message::{ 22 VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserVirtioFeatures, 23 VHOST_USER_CONFIG_OFFSET, 24 }; 25 use vhost::vhost_user::{MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler}; 26 use virtio_bindings::bindings::virtio_blk::{ 27 VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, 28 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, 29 VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, 30 }; 31 use virtio_queue::Queue; 32 use vm_memory::{ByteValued, GuestMemoryAtomic}; 33 use vm_migration::{ 34 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 35 Transportable, VersionMapped, 36 }; 37 use vmm_sys_util::eventfd::EventFd; 38 39 const DEFAULT_QUEUE_NUMBER: usize = 1; 40 41 #[derive(Versionize)] 42 pub struct State { 43 pub avail_features: u64, 44 pub acked_features: u64, 45 pub config: VirtioBlockConfig, 46 pub acked_protocol_features: u64, 47 pub vu_num_queues: usize, 48 } 49 50 impl VersionMapped for State {} 51 52 struct SlaveReqHandler {} 53 impl VhostUserMasterReqHandler for SlaveReqHandler {} 54 55 pub struct Blk { 56 common: VirtioCommon, 57 vu_common: VhostUserCommon, 58 id: String, 59 config: VirtioBlockConfig, 60 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 61 epoll_thread: Option<thread::JoinHandle<()>>, 62 seccomp_action: SeccompAction, 63 exit_evt: EventFd, 64 iommu: bool, 65 } 66 67 impl Blk { 68 /// Create a new vhost-user-blk device 69 pub fn new( 70 id: String, 71 vu_cfg: VhostUserConfig, 72 restoring: bool, 73 seccomp_action: SeccompAction, 74 exit_evt: EventFd, 75 iommu: bool, 76 ) -> Result<Blk> { 77 let num_queues = vu_cfg.num_queues; 78 79 if restoring { 80 // We need 'queue_sizes' to report a number of queues that will be 81 // enough to handle all the potential queues. VirtioPciDevice::new() 82 // will create the actual queues based on this information. 83 return Ok(Blk { 84 common: VirtioCommon { 85 device_type: VirtioDeviceType::Block as u32, 86 queue_sizes: vec![vu_cfg.queue_size; num_queues], 87 paused_sync: Some(Arc::new(Barrier::new(2))), 88 min_queues: DEFAULT_QUEUE_NUMBER as u16, 89 ..Default::default() 90 }, 91 vu_common: VhostUserCommon { 92 socket_path: vu_cfg.socket, 93 vu_num_queues: num_queues, 94 ..Default::default() 95 }, 96 id, 97 config: VirtioBlockConfig::default(), 98 guest_memory: None, 99 epoll_thread: None, 100 seccomp_action, 101 exit_evt, 102 iommu, 103 }); 104 } 105 106 let mut vu = 107 VhostUserHandle::connect_vhost_user(false, &vu_cfg.socket, num_queues as u64, false)?; 108 109 // Filling device and vring features VMM supports. 110 let mut avail_features = 1 << VIRTIO_BLK_F_SIZE_MAX 111 | 1 << VIRTIO_BLK_F_SEG_MAX 112 | 1 << VIRTIO_BLK_F_GEOMETRY 113 | 1 << VIRTIO_BLK_F_RO 114 | 1 << VIRTIO_BLK_F_BLK_SIZE 115 | 1 << VIRTIO_BLK_F_FLUSH 116 | 1 << VIRTIO_BLK_F_TOPOLOGY 117 | 1 << VIRTIO_BLK_F_CONFIG_WCE 118 | 1 << VIRTIO_BLK_F_DISCARD 119 | 1 << VIRTIO_BLK_F_WRITE_ZEROES 120 | DEFAULT_VIRTIO_FEATURES; 121 122 if num_queues > 1 { 123 avail_features |= 1 << VIRTIO_BLK_F_MQ; 124 } 125 126 let avail_protocol_features = VhostUserProtocolFeatures::CONFIG 127 | VhostUserProtocolFeatures::MQ 128 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 129 | VhostUserProtocolFeatures::REPLY_ACK 130 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 131 | VhostUserProtocolFeatures::LOG_SHMFD; 132 133 let (acked_features, acked_protocol_features) = 134 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 135 136 let backend_num_queues = 137 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 138 vu.socket_handle() 139 .get_queue_num() 140 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 141 } else { 142 DEFAULT_QUEUE_NUMBER 143 }; 144 145 if num_queues > backend_num_queues { 146 error!("vhost-user-blk requested too many queues ({}) since the backend only supports {}\n", 147 num_queues, backend_num_queues); 148 return Err(Error::BadQueueNum); 149 } 150 151 let config_len = mem::size_of::<VirtioBlockConfig>(); 152 let config_space: Vec<u8> = vec![0u8; config_len as usize]; 153 let (_, config_space) = vu 154 .socket_handle() 155 .get_config( 156 VHOST_USER_CONFIG_OFFSET, 157 config_len as u32, 158 VhostUserConfigFlags::WRITABLE, 159 config_space.as_slice(), 160 ) 161 .map_err(Error::VhostUserGetConfig)?; 162 let mut config = VirtioBlockConfig::default(); 163 if let Some(backend_config) = VirtioBlockConfig::from_slice(config_space.as_slice()) { 164 config = *backend_config; 165 config.num_queues = num_queues as u16; 166 } 167 168 Ok(Blk { 169 common: VirtioCommon { 170 device_type: VirtioDeviceType::Block as u32, 171 queue_sizes: vec![vu_cfg.queue_size; num_queues], 172 avail_features: acked_features, 173 // If part of the available features that have been acked, the 174 // PROTOCOL_FEATURES bit must be already set through the VIRTIO 175 // acked features as we know the guest would never ack it, thus 176 // the feature would be lost. 177 acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 178 paused_sync: Some(Arc::new(Barrier::new(2))), 179 min_queues: DEFAULT_QUEUE_NUMBER as u16, 180 ..Default::default() 181 }, 182 vu_common: VhostUserCommon { 183 vu: Some(Arc::new(Mutex::new(vu))), 184 acked_protocol_features, 185 socket_path: vu_cfg.socket, 186 vu_num_queues: num_queues, 187 ..Default::default() 188 }, 189 id, 190 config, 191 guest_memory: None, 192 epoll_thread: None, 193 seccomp_action, 194 exit_evt, 195 iommu, 196 }) 197 } 198 199 fn state(&self) -> State { 200 State { 201 avail_features: self.common.avail_features, 202 acked_features: self.common.acked_features, 203 config: self.config, 204 acked_protocol_features: self.vu_common.acked_protocol_features, 205 vu_num_queues: self.vu_common.vu_num_queues, 206 } 207 } 208 209 fn set_state(&mut self, state: &State) { 210 self.common.avail_features = state.avail_features; 211 self.common.acked_features = state.acked_features; 212 self.config = state.config; 213 self.vu_common.acked_protocol_features = state.acked_protocol_features; 214 self.vu_common.vu_num_queues = state.vu_num_queues; 215 216 if let Err(e) = self 217 .vu_common 218 .restore_backend_connection(self.common.acked_features) 219 { 220 error!( 221 "Failed restoring connection with vhost-user backend: {:?}", 222 e 223 ); 224 } 225 } 226 } 227 228 impl Drop for Blk { 229 fn drop(&mut self) { 230 if let Some(kill_evt) = self.common.kill_evt.take() { 231 if let Err(e) = kill_evt.write(1) { 232 error!("failed to kill vhost-user-blk: {:?}", e); 233 } 234 } 235 } 236 } 237 238 impl VirtioDevice for Blk { 239 fn device_type(&self) -> u32 { 240 self.common.device_type 241 } 242 243 fn queue_max_sizes(&self) -> &[u16] { 244 &self.common.queue_sizes 245 } 246 247 fn features(&self) -> u64 { 248 let mut features = self.common.avail_features; 249 if self.iommu { 250 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 251 } 252 features 253 } 254 255 fn ack_features(&mut self, value: u64) { 256 self.common.ack_features(value) 257 } 258 259 fn read_config(&self, offset: u64, data: &mut [u8]) { 260 self.read_config_from_slice(self.config.as_slice(), offset, data); 261 } 262 263 fn write_config(&mut self, offset: u64, data: &[u8]) { 264 // The "writeback" field is the only mutable field 265 let writeback_offset = 266 (&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64); 267 if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback) 268 { 269 error!( 270 "Attempt to write to read-only field: offset {:x} length {}", 271 offset, 272 data.len() 273 ); 274 return; 275 } 276 277 self.config.writeback = data[0]; 278 if let Some(vu) = &self.vu_common.vu { 279 if let Err(e) = vu 280 .lock() 281 .unwrap() 282 .socket_handle() 283 .set_config(offset as u32, VhostUserConfigFlags::WRITABLE, data) 284 .map_err(Error::VhostUserSetConfig) 285 { 286 error!("Failed setting vhost-user-blk configuration: {:?}", e); 287 } 288 } 289 } 290 291 fn activate( 292 &mut self, 293 mem: GuestMemoryAtomic<GuestMemoryMmap>, 294 interrupt_cb: Arc<dyn VirtioInterrupt>, 295 queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>, 296 queue_evts: Vec<EventFd>, 297 ) -> ActivateResult { 298 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 299 self.guest_memory = Some(mem.clone()); 300 301 let slave_req_handler: Option<MasterReqHandler<SlaveReqHandler>> = None; 302 303 // Run a dedicated thread for handling potential reconnections with 304 // the backend. 305 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 306 307 let mut handler = self.vu_common.activate( 308 mem, 309 queues, 310 queue_evts, 311 interrupt_cb, 312 self.common.acked_features, 313 slave_req_handler, 314 kill_evt, 315 pause_evt, 316 )?; 317 318 let paused = self.common.paused.clone(); 319 let paused_sync = self.common.paused_sync.clone(); 320 321 let mut epoll_threads = Vec::new(); 322 323 spawn_virtio_thread( 324 &self.id, 325 &self.seccomp_action, 326 Thread::VirtioVhostBlock, 327 &mut epoll_threads, 328 &self.exit_evt, 329 move || { 330 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 331 error!("Error running worker: {:?}", e); 332 } 333 }, 334 )?; 335 self.epoll_thread = Some(epoll_threads.remove(0)); 336 337 Ok(()) 338 } 339 340 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 341 // We first must resume the virtio thread if it was paused. 342 if self.common.pause_evt.take().is_some() { 343 self.common.resume().ok()?; 344 } 345 346 if let Some(vu) = &self.vu_common.vu { 347 if let Err(e) = vu 348 .lock() 349 .unwrap() 350 .reset_vhost_user(self.common.queue_sizes.len()) 351 { 352 error!("Failed to reset vhost-user daemon: {:?}", e); 353 return None; 354 } 355 } 356 357 if let Some(kill_evt) = self.common.kill_evt.take() { 358 // Ignore the result because there is nothing we can do about it. 359 let _ = kill_evt.write(1); 360 } 361 362 event!("virtio-device", "reset", "id", &self.id); 363 364 // Return the interrupt 365 Some(self.common.interrupt_cb.take().unwrap()) 366 } 367 368 fn shutdown(&mut self) { 369 self.vu_common.shutdown() 370 } 371 372 fn add_memory_region( 373 &mut self, 374 region: &Arc<GuestRegionMmap>, 375 ) -> std::result::Result<(), crate::Error> { 376 self.vu_common.add_memory_region(&self.guest_memory, region) 377 } 378 } 379 380 impl Pausable for Blk { 381 fn pause(&mut self) -> result::Result<(), MigratableError> { 382 self.vu_common.pause()?; 383 self.common.pause() 384 } 385 386 fn resume(&mut self) -> result::Result<(), MigratableError> { 387 self.common.resume()?; 388 389 if let Some(epoll_thread) = &self.epoll_thread { 390 epoll_thread.thread().unpark(); 391 } 392 393 self.vu_common.resume() 394 } 395 } 396 397 impl Snapshottable for Blk { 398 fn id(&self) -> String { 399 self.id.clone() 400 } 401 402 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 403 self.vu_common.snapshot(&self.id(), &self.state()) 404 } 405 406 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 407 self.set_state(&snapshot.to_versioned_state(&self.id)?); 408 Ok(()) 409 } 410 } 411 impl Transportable for Blk {} 412 413 impl Migratable for Blk { 414 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 415 self.vu_common.start_dirty_log(&self.guest_memory) 416 } 417 418 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 419 self.vu_common.stop_dirty_log() 420 } 421 422 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 423 self.vu_common.dirty_log(&self.guest_memory) 424 } 425 426 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 427 self.vu_common.start_migration() 428 } 429 430 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 431 self.vu_common 432 .complete_migration(self.common.kill_evt.take()) 433 } 434 } 435