1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use super::super::{ActivateResult, VirtioCommon, VirtioDevice, VirtioDeviceType}; 5 use super::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 6 use super::{Error, Result, DEFAULT_VIRTIO_FEATURES}; 7 use crate::seccomp_filters::Thread; 8 use crate::thread_helper::spawn_virtio_thread; 9 use crate::vhost_user::VhostUserCommon; 10 use crate::{GuestMemoryMmap, GuestRegionMmap}; 11 use crate::{VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM}; 12 use block::VirtioBlockConfig; 13 use seccompiler::SeccompAction; 14 use std::mem; 15 use std::result; 16 use std::sync::atomic::AtomicBool; 17 use std::sync::{Arc, Barrier, Mutex}; 18 use std::thread; 19 use std::vec::Vec; 20 use versionize::{VersionMap, Versionize, VersionizeResult}; 21 use versionize_derive::Versionize; 22 use vhost::vhost_user::message::{ 23 VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserVirtioFeatures, 24 VHOST_USER_CONFIG_OFFSET, 25 }; 26 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; 27 use virtio_bindings::virtio_blk::{ 28 VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, 29 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, 30 VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, 31 }; 32 use virtio_queue::Queue; 33 use vm_memory::{ByteValued, GuestMemoryAtomic}; 34 use vm_migration::{ 35 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 36 Transportable, VersionMapped, 37 }; 38 use vmm_sys_util::eventfd::EventFd; 39 40 const DEFAULT_QUEUE_NUMBER: usize = 1; 41 42 #[derive(Versionize)] 43 pub struct State { 44 pub avail_features: u64, 45 pub acked_features: u64, 46 pub config: VirtioBlockConfig, 47 pub acked_protocol_features: u64, 48 pub vu_num_queues: usize, 49 } 50 51 impl VersionMapped for State {} 52 53 struct BackendReqHandler {} 54 impl VhostUserFrontendReqHandler for BackendReqHandler {} 55 56 pub struct Blk { 57 common: VirtioCommon, 58 vu_common: VhostUserCommon, 59 id: String, 60 config: VirtioBlockConfig, 61 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 62 epoll_thread: Option<thread::JoinHandle<()>>, 63 seccomp_action: SeccompAction, 64 exit_evt: EventFd, 65 iommu: bool, 66 } 67 68 impl Blk { 69 /// Create a new vhost-user-blk device 70 pub fn new( 71 id: String, 72 vu_cfg: VhostUserConfig, 73 seccomp_action: SeccompAction, 74 exit_evt: EventFd, 75 iommu: bool, 76 state: Option<State>, 77 ) -> Result<Blk> { 78 let num_queues = vu_cfg.num_queues; 79 80 let mut vu = 81 VhostUserHandle::connect_vhost_user(false, &vu_cfg.socket, num_queues as u64, false)?; 82 83 let ( 84 avail_features, 85 acked_features, 86 acked_protocol_features, 87 vu_num_queues, 88 config, 89 paused, 90 ) = if let Some(state) = state { 91 info!("Restoring vhost-user-block {}", id); 92 93 vu.set_protocol_features_vhost_user( 94 state.acked_features, 95 state.acked_protocol_features, 96 )?; 97 98 ( 99 state.avail_features, 100 state.acked_features, 101 state.acked_protocol_features, 102 state.vu_num_queues, 103 state.config, 104 true, 105 ) 106 } else { 107 // Filling device and vring features VMM supports. 108 let mut avail_features = 1 << VIRTIO_BLK_F_SIZE_MAX 109 | 1 << VIRTIO_BLK_F_SEG_MAX 110 | 1 << VIRTIO_BLK_F_GEOMETRY 111 | 1 << VIRTIO_BLK_F_RO 112 | 1 << VIRTIO_BLK_F_BLK_SIZE 113 | 1 << VIRTIO_BLK_F_FLUSH 114 | 1 << VIRTIO_BLK_F_TOPOLOGY 115 | 1 << VIRTIO_BLK_F_CONFIG_WCE 116 | 1 << VIRTIO_BLK_F_DISCARD 117 | 1 << VIRTIO_BLK_F_WRITE_ZEROES 118 | DEFAULT_VIRTIO_FEATURES; 119 120 if num_queues > 1 { 121 avail_features |= 1 << VIRTIO_BLK_F_MQ; 122 } 123 124 let avail_protocol_features = VhostUserProtocolFeatures::CONFIG 125 | VhostUserProtocolFeatures::MQ 126 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 127 | VhostUserProtocolFeatures::REPLY_ACK 128 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 129 | VhostUserProtocolFeatures::LOG_SHMFD; 130 131 let (acked_features, acked_protocol_features) = 132 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 133 134 let backend_num_queues = 135 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 136 vu.socket_handle() 137 .get_queue_num() 138 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 139 } else { 140 DEFAULT_QUEUE_NUMBER 141 }; 142 143 if num_queues > backend_num_queues { 144 error!("vhost-user-blk requested too many queues ({}) since the backend only supports {}\n", 145 num_queues, backend_num_queues); 146 return Err(Error::BadQueueNum); 147 } 148 149 let config_len = mem::size_of::<VirtioBlockConfig>(); 150 let config_space: Vec<u8> = vec![0u8; config_len]; 151 let (_, config_space) = vu 152 .socket_handle() 153 .get_config( 154 VHOST_USER_CONFIG_OFFSET, 155 config_len as u32, 156 VhostUserConfigFlags::WRITABLE, 157 config_space.as_slice(), 158 ) 159 .map_err(Error::VhostUserGetConfig)?; 160 let mut config = VirtioBlockConfig::default(); 161 if let Some(backend_config) = VirtioBlockConfig::from_slice(config_space.as_slice()) { 162 config = *backend_config; 163 config.num_queues = num_queues as u16; 164 } 165 166 ( 167 acked_features, 168 // If part of the available features that have been acked, 169 // the PROTOCOL_FEATURES bit must be already set through 170 // the VIRTIO acked features as we know the guest would 171 // never ack it, thus the feature would be lost. 172 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 173 acked_protocol_features, 174 num_queues, 175 config, 176 false, 177 ) 178 }; 179 180 Ok(Blk { 181 common: VirtioCommon { 182 device_type: VirtioDeviceType::Block as u32, 183 queue_sizes: vec![vu_cfg.queue_size; num_queues], 184 avail_features, 185 acked_features, 186 paused_sync: Some(Arc::new(Barrier::new(2))), 187 min_queues: DEFAULT_QUEUE_NUMBER as u16, 188 paused: Arc::new(AtomicBool::new(paused)), 189 ..Default::default() 190 }, 191 vu_common: VhostUserCommon { 192 vu: Some(Arc::new(Mutex::new(vu))), 193 acked_protocol_features, 194 socket_path: vu_cfg.socket, 195 vu_num_queues, 196 ..Default::default() 197 }, 198 id, 199 config, 200 guest_memory: None, 201 epoll_thread: None, 202 seccomp_action, 203 exit_evt, 204 iommu, 205 }) 206 } 207 208 fn state(&self) -> State { 209 State { 210 avail_features: self.common.avail_features, 211 acked_features: self.common.acked_features, 212 config: self.config, 213 acked_protocol_features: self.vu_common.acked_protocol_features, 214 vu_num_queues: self.vu_common.vu_num_queues, 215 } 216 } 217 } 218 219 impl Drop for Blk { 220 fn drop(&mut self) { 221 if let Some(kill_evt) = self.common.kill_evt.take() { 222 if let Err(e) = kill_evt.write(1) { 223 error!("failed to kill vhost-user-blk: {:?}", e); 224 } 225 } 226 self.common.wait_for_epoll_threads(); 227 if let Some(thread) = self.epoll_thread.take() { 228 if let Err(e) = thread.join() { 229 error!("Error joining thread: {:?}", e); 230 } 231 } 232 } 233 } 234 235 impl VirtioDevice for Blk { 236 fn device_type(&self) -> u32 { 237 self.common.device_type 238 } 239 240 fn queue_max_sizes(&self) -> &[u16] { 241 &self.common.queue_sizes 242 } 243 244 fn features(&self) -> u64 { 245 let mut features = self.common.avail_features; 246 if self.iommu { 247 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 248 } 249 features 250 } 251 252 fn ack_features(&mut self, value: u64) { 253 self.common.ack_features(value) 254 } 255 256 fn read_config(&self, offset: u64, data: &mut [u8]) { 257 self.read_config_from_slice(self.config.as_slice(), offset, data); 258 } 259 260 fn write_config(&mut self, offset: u64, data: &[u8]) { 261 // The "writeback" field is the only mutable field 262 let writeback_offset = 263 (&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64); 264 if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback) 265 { 266 error!( 267 "Attempt to write to read-only field: offset {:x} length {}", 268 offset, 269 data.len() 270 ); 271 return; 272 } 273 274 self.config.writeback = data[0]; 275 if let Some(vu) = &self.vu_common.vu { 276 if let Err(e) = vu 277 .lock() 278 .unwrap() 279 .socket_handle() 280 .set_config(offset as u32, VhostUserConfigFlags::WRITABLE, data) 281 .map_err(Error::VhostUserSetConfig) 282 { 283 error!("Failed setting vhost-user-blk configuration: {:?}", e); 284 } 285 } 286 } 287 288 fn activate( 289 &mut self, 290 mem: GuestMemoryAtomic<GuestMemoryMmap>, 291 interrupt_cb: Arc<dyn VirtioInterrupt>, 292 queues: Vec<(usize, Queue, EventFd)>, 293 ) -> ActivateResult { 294 self.common.activate(&queues, &interrupt_cb)?; 295 self.guest_memory = Some(mem.clone()); 296 297 let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None; 298 299 // Run a dedicated thread for handling potential reconnections with 300 // the backend. 301 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 302 303 let mut handler = self.vu_common.activate( 304 mem, 305 queues, 306 interrupt_cb, 307 self.common.acked_features, 308 backend_req_handler, 309 kill_evt, 310 pause_evt, 311 )?; 312 313 let paused = self.common.paused.clone(); 314 let paused_sync = self.common.paused_sync.clone(); 315 316 let mut epoll_threads = Vec::new(); 317 318 spawn_virtio_thread( 319 &self.id, 320 &self.seccomp_action, 321 Thread::VirtioVhostBlock, 322 &mut epoll_threads, 323 &self.exit_evt, 324 move || handler.run(paused, paused_sync.unwrap()), 325 )?; 326 self.epoll_thread = Some(epoll_threads.remove(0)); 327 328 Ok(()) 329 } 330 331 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 332 // We first must resume the virtio thread if it was paused. 333 if self.common.pause_evt.take().is_some() { 334 self.common.resume().ok()?; 335 } 336 337 if let Some(vu) = &self.vu_common.vu { 338 if let Err(e) = vu.lock().unwrap().reset_vhost_user() { 339 error!("Failed to reset vhost-user daemon: {:?}", e); 340 return None; 341 } 342 } 343 344 if let Some(kill_evt) = self.common.kill_evt.take() { 345 // Ignore the result because there is nothing we can do about it. 346 let _ = kill_evt.write(1); 347 } 348 349 event!("virtio-device", "reset", "id", &self.id); 350 351 // Return the interrupt 352 Some(self.common.interrupt_cb.take().unwrap()) 353 } 354 355 fn shutdown(&mut self) { 356 self.vu_common.shutdown() 357 } 358 359 fn add_memory_region( 360 &mut self, 361 region: &Arc<GuestRegionMmap>, 362 ) -> std::result::Result<(), crate::Error> { 363 self.vu_common.add_memory_region(&self.guest_memory, region) 364 } 365 } 366 367 impl Pausable for Blk { 368 fn pause(&mut self) -> result::Result<(), MigratableError> { 369 self.vu_common.pause()?; 370 self.common.pause() 371 } 372 373 fn resume(&mut self) -> result::Result<(), MigratableError> { 374 self.common.resume()?; 375 376 if let Some(epoll_thread) = &self.epoll_thread { 377 epoll_thread.thread().unpark(); 378 } 379 380 self.vu_common.resume() 381 } 382 } 383 384 impl Snapshottable for Blk { 385 fn id(&self) -> String { 386 self.id.clone() 387 } 388 389 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 390 self.vu_common.snapshot(&self.state()) 391 } 392 } 393 impl Transportable for Blk {} 394 395 impl Migratable for Blk { 396 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 397 self.vu_common.start_dirty_log(&self.guest_memory) 398 } 399 400 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 401 self.vu_common.stop_dirty_log() 402 } 403 404 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 405 self.vu_common.dirty_log(&self.guest_memory) 406 } 407 408 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 409 self.vu_common.start_migration() 410 } 411 412 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 413 self.vu_common 414 .complete_migration(self.common.kill_evt.take()) 415 } 416 } 417