1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use super::super::{ActivateResult, VirtioCommon, VirtioDevice, VirtioDeviceType}; 5 use super::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 6 use super::{Error, Result, DEFAULT_VIRTIO_FEATURES}; 7 use crate::seccomp_filters::Thread; 8 use crate::thread_helper::spawn_virtio_thread; 9 use crate::vhost_user::VhostUserCommon; 10 use crate::{GuestMemoryMmap, GuestRegionMmap}; 11 use crate::{VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM}; 12 use block::VirtioBlockConfig; 13 use seccompiler::SeccompAction; 14 use std::mem; 15 use std::result; 16 use std::sync::atomic::AtomicBool; 17 use std::sync::{Arc, Barrier, Mutex}; 18 use std::thread; 19 use versionize::{VersionMap, Versionize, VersionizeResult}; 20 use versionize_derive::Versionize; 21 use vhost::vhost_user::message::{ 22 VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserVirtioFeatures, 23 VHOST_USER_CONFIG_OFFSET, 24 }; 25 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; 26 use virtio_bindings::virtio_blk::{ 27 VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, 28 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, 29 VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, 30 }; 31 use virtio_queue::Queue; 32 use vm_memory::{ByteValued, GuestMemoryAtomic}; 33 use vm_migration::{ 34 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 35 Transportable, VersionMapped, 36 }; 37 use vmm_sys_util::eventfd::EventFd; 38 39 const DEFAULT_QUEUE_NUMBER: usize = 1; 40 41 #[derive(Versionize)] 42 pub struct State { 43 pub avail_features: u64, 44 pub acked_features: u64, 45 pub config: VirtioBlockConfig, 46 pub acked_protocol_features: u64, 47 pub vu_num_queues: usize, 48 } 49 50 impl VersionMapped for State {} 51 52 struct BackendReqHandler {} 53 impl VhostUserFrontendReqHandler for BackendReqHandler {} 54 55 pub struct Blk { 56 common: VirtioCommon, 57 vu_common: VhostUserCommon, 58 id: String, 59 config: VirtioBlockConfig, 60 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 61 epoll_thread: Option<thread::JoinHandle<()>>, 62 seccomp_action: SeccompAction, 63 exit_evt: EventFd, 64 iommu: bool, 65 } 66 67 impl Blk { 68 /// Create a new vhost-user-blk device 69 pub fn new( 70 id: String, 71 vu_cfg: VhostUserConfig, 72 seccomp_action: SeccompAction, 73 exit_evt: EventFd, 74 iommu: bool, 75 state: Option<State>, 76 ) -> Result<Blk> { 77 let num_queues = vu_cfg.num_queues; 78 79 let mut vu = 80 VhostUserHandle::connect_vhost_user(false, &vu_cfg.socket, num_queues as u64, false)?; 81 82 let ( 83 avail_features, 84 acked_features, 85 acked_protocol_features, 86 vu_num_queues, 87 config, 88 paused, 89 ) = if let Some(state) = state { 90 info!("Restoring vhost-user-block {}", id); 91 92 vu.set_protocol_features_vhost_user( 93 state.acked_features, 94 state.acked_protocol_features, 95 )?; 96 97 ( 98 state.avail_features, 99 state.acked_features, 100 state.acked_protocol_features, 101 state.vu_num_queues, 102 state.config, 103 true, 104 ) 105 } else { 106 // Filling device and vring features VMM supports. 107 let mut avail_features = 1 << VIRTIO_BLK_F_SIZE_MAX 108 | 1 << VIRTIO_BLK_F_SEG_MAX 109 | 1 << VIRTIO_BLK_F_GEOMETRY 110 | 1 << VIRTIO_BLK_F_RO 111 | 1 << VIRTIO_BLK_F_BLK_SIZE 112 | 1 << VIRTIO_BLK_F_FLUSH 113 | 1 << VIRTIO_BLK_F_TOPOLOGY 114 | 1 << VIRTIO_BLK_F_CONFIG_WCE 115 | 1 << VIRTIO_BLK_F_DISCARD 116 | 1 << VIRTIO_BLK_F_WRITE_ZEROES 117 | DEFAULT_VIRTIO_FEATURES; 118 119 if num_queues > 1 { 120 avail_features |= 1 << VIRTIO_BLK_F_MQ; 121 } 122 123 let avail_protocol_features = VhostUserProtocolFeatures::CONFIG 124 | VhostUserProtocolFeatures::MQ 125 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 126 | VhostUserProtocolFeatures::REPLY_ACK 127 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 128 | VhostUserProtocolFeatures::LOG_SHMFD; 129 130 let (acked_features, acked_protocol_features) = 131 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 132 133 let backend_num_queues = 134 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 135 vu.socket_handle() 136 .get_queue_num() 137 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 138 } else { 139 DEFAULT_QUEUE_NUMBER 140 }; 141 142 if num_queues > backend_num_queues { 143 error!("vhost-user-blk requested too many queues ({}) since the backend only supports {}\n", 144 num_queues, backend_num_queues); 145 return Err(Error::BadQueueNum); 146 } 147 148 let config_len = mem::size_of::<VirtioBlockConfig>(); 149 let config_space: Vec<u8> = vec![0u8; config_len]; 150 let (_, config_space) = vu 151 .socket_handle() 152 .get_config( 153 VHOST_USER_CONFIG_OFFSET, 154 config_len as u32, 155 VhostUserConfigFlags::WRITABLE, 156 config_space.as_slice(), 157 ) 158 .map_err(Error::VhostUserGetConfig)?; 159 let mut config = VirtioBlockConfig::default(); 160 if let Some(backend_config) = VirtioBlockConfig::from_slice(config_space.as_slice()) { 161 config = *backend_config; 162 config.num_queues = num_queues as u16; 163 } 164 165 ( 166 acked_features, 167 // If part of the available features that have been acked, 168 // the PROTOCOL_FEATURES bit must be already set through 169 // the VIRTIO acked features as we know the guest would 170 // never ack it, thus the feature would be lost. 171 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 172 acked_protocol_features, 173 num_queues, 174 config, 175 false, 176 ) 177 }; 178 179 Ok(Blk { 180 common: VirtioCommon { 181 device_type: VirtioDeviceType::Block as u32, 182 queue_sizes: vec![vu_cfg.queue_size; num_queues], 183 avail_features, 184 acked_features, 185 paused_sync: Some(Arc::new(Barrier::new(2))), 186 min_queues: DEFAULT_QUEUE_NUMBER as u16, 187 paused: Arc::new(AtomicBool::new(paused)), 188 ..Default::default() 189 }, 190 vu_common: VhostUserCommon { 191 vu: Some(Arc::new(Mutex::new(vu))), 192 acked_protocol_features, 193 socket_path: vu_cfg.socket, 194 vu_num_queues, 195 ..Default::default() 196 }, 197 id, 198 config, 199 guest_memory: None, 200 epoll_thread: None, 201 seccomp_action, 202 exit_evt, 203 iommu, 204 }) 205 } 206 207 fn state(&self) -> State { 208 State { 209 avail_features: self.common.avail_features, 210 acked_features: self.common.acked_features, 211 config: self.config, 212 acked_protocol_features: self.vu_common.acked_protocol_features, 213 vu_num_queues: self.vu_common.vu_num_queues, 214 } 215 } 216 } 217 218 impl Drop for Blk { 219 fn drop(&mut self) { 220 if let Some(kill_evt) = self.common.kill_evt.take() { 221 if let Err(e) = kill_evt.write(1) { 222 error!("failed to kill vhost-user-blk: {:?}", e); 223 } 224 } 225 self.common.wait_for_epoll_threads(); 226 if let Some(thread) = self.epoll_thread.take() { 227 if let Err(e) = thread.join() { 228 error!("Error joining thread: {:?}", e); 229 } 230 } 231 } 232 } 233 234 impl VirtioDevice for Blk { 235 fn device_type(&self) -> u32 { 236 self.common.device_type 237 } 238 239 fn queue_max_sizes(&self) -> &[u16] { 240 &self.common.queue_sizes 241 } 242 243 fn features(&self) -> u64 { 244 let mut features = self.common.avail_features; 245 if self.iommu { 246 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 247 } 248 features 249 } 250 251 fn ack_features(&mut self, value: u64) { 252 self.common.ack_features(value) 253 } 254 255 fn read_config(&self, offset: u64, data: &mut [u8]) { 256 self.read_config_from_slice(self.config.as_slice(), offset, data); 257 } 258 259 fn write_config(&mut self, offset: u64, data: &[u8]) { 260 // The "writeback" field is the only mutable field 261 let writeback_offset = 262 (&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64); 263 if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback) 264 { 265 error!( 266 "Attempt to write to read-only field: offset {:x} length {}", 267 offset, 268 data.len() 269 ); 270 return; 271 } 272 273 self.config.writeback = data[0]; 274 if let Some(vu) = &self.vu_common.vu { 275 if let Err(e) = vu 276 .lock() 277 .unwrap() 278 .socket_handle() 279 .set_config(offset as u32, VhostUserConfigFlags::WRITABLE, data) 280 .map_err(Error::VhostUserSetConfig) 281 { 282 error!("Failed setting vhost-user-blk configuration: {:?}", e); 283 } 284 } 285 } 286 287 fn activate( 288 &mut self, 289 mem: GuestMemoryAtomic<GuestMemoryMmap>, 290 interrupt_cb: Arc<dyn VirtioInterrupt>, 291 queues: Vec<(usize, Queue, EventFd)>, 292 ) -> ActivateResult { 293 self.common.activate(&queues, &interrupt_cb)?; 294 self.guest_memory = Some(mem.clone()); 295 296 let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None; 297 298 // Run a dedicated thread for handling potential reconnections with 299 // the backend. 300 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 301 302 let mut handler = self.vu_common.activate( 303 mem, 304 queues, 305 interrupt_cb, 306 self.common.acked_features, 307 backend_req_handler, 308 kill_evt, 309 pause_evt, 310 )?; 311 312 let paused = self.common.paused.clone(); 313 let paused_sync = self.common.paused_sync.clone(); 314 315 let mut epoll_threads = Vec::new(); 316 317 spawn_virtio_thread( 318 &self.id, 319 &self.seccomp_action, 320 Thread::VirtioVhostBlock, 321 &mut epoll_threads, 322 &self.exit_evt, 323 move || handler.run(paused, paused_sync.unwrap()), 324 )?; 325 self.epoll_thread = Some(epoll_threads.remove(0)); 326 327 Ok(()) 328 } 329 330 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 331 // We first must resume the virtio thread if it was paused. 332 if self.common.pause_evt.take().is_some() { 333 self.common.resume().ok()?; 334 } 335 336 if let Some(vu) = &self.vu_common.vu { 337 if let Err(e) = vu.lock().unwrap().reset_vhost_user() { 338 error!("Failed to reset vhost-user daemon: {:?}", e); 339 return None; 340 } 341 } 342 343 if let Some(kill_evt) = self.common.kill_evt.take() { 344 // Ignore the result because there is nothing we can do about it. 345 let _ = kill_evt.write(1); 346 } 347 348 event!("virtio-device", "reset", "id", &self.id); 349 350 // Return the interrupt 351 Some(self.common.interrupt_cb.take().unwrap()) 352 } 353 354 fn shutdown(&mut self) { 355 self.vu_common.shutdown() 356 } 357 358 fn add_memory_region( 359 &mut self, 360 region: &Arc<GuestRegionMmap>, 361 ) -> std::result::Result<(), crate::Error> { 362 self.vu_common.add_memory_region(&self.guest_memory, region) 363 } 364 } 365 366 impl Pausable for Blk { 367 fn pause(&mut self) -> result::Result<(), MigratableError> { 368 self.vu_common.pause()?; 369 self.common.pause() 370 } 371 372 fn resume(&mut self) -> result::Result<(), MigratableError> { 373 self.common.resume()?; 374 375 if let Some(epoll_thread) = &self.epoll_thread { 376 epoll_thread.thread().unpark(); 377 } 378 379 self.vu_common.resume() 380 } 381 } 382 383 impl Snapshottable for Blk { 384 fn id(&self) -> String { 385 self.id.clone() 386 } 387 388 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 389 self.vu_common.snapshot(&self.state()) 390 } 391 } 392 impl Transportable for Blk {} 393 394 impl Migratable for Blk { 395 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 396 self.vu_common.start_dirty_log(&self.guest_memory) 397 } 398 399 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 400 self.vu_common.stop_dirty_log() 401 } 402 403 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 404 self.vu_common.dirty_log(&self.guest_memory) 405 } 406 407 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 408 self.vu_common.start_migration() 409 } 410 411 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 412 self.vu_common 413 .complete_migration(self.common.kill_evt.take()) 414 } 415 } 416