1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use std::sync::atomic::AtomicBool; 5 use std::sync::{Arc, Barrier, Mutex}; 6 use std::{mem, result, thread}; 7 8 use block::VirtioBlockConfig; 9 use seccompiler::SeccompAction; 10 use serde::{Deserialize, Serialize}; 11 use vhost::vhost_user::message::{ 12 VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserVirtioFeatures, 13 VHOST_USER_CONFIG_OFFSET, 14 }; 15 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; 16 use virtio_bindings::virtio_blk::{ 17 VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, 18 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, 19 VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, 20 }; 21 use virtio_queue::Queue; 22 use vm_memory::{ByteValued, GuestMemoryAtomic}; 23 use vm_migration::protocol::MemoryRangeTable; 24 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 25 use vmm_sys_util::eventfd::EventFd; 26 27 use super::super::{ActivateResult, VirtioCommon, VirtioDevice, VirtioDeviceType}; 28 use super::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 29 use super::{Error, Result, DEFAULT_VIRTIO_FEATURES}; 30 use crate::seccomp_filters::Thread; 31 use crate::thread_helper::spawn_virtio_thread; 32 use crate::vhost_user::VhostUserCommon; 33 use crate::{GuestMemoryMmap, GuestRegionMmap, VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM}; 34 35 const DEFAULT_QUEUE_NUMBER: usize = 1; 36 37 #[derive(Serialize, Deserialize)] 38 pub struct State { 39 pub avail_features: u64, 40 pub acked_features: u64, 41 pub config: VirtioBlockConfig, 42 pub acked_protocol_features: u64, 43 pub vu_num_queues: usize, 44 } 45 46 struct BackendReqHandler {} 47 impl VhostUserFrontendReqHandler for BackendReqHandler {} 48 49 pub struct Blk { 50 common: VirtioCommon, 51 vu_common: VhostUserCommon, 52 id: String, 53 config: VirtioBlockConfig, 54 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 55 epoll_thread: Option<thread::JoinHandle<()>>, 56 seccomp_action: SeccompAction, 57 exit_evt: EventFd, 58 iommu: bool, 59 } 60 61 impl Blk { 62 /// Create a new vhost-user-blk device 63 pub fn new( 64 id: String, 65 vu_cfg: VhostUserConfig, 66 seccomp_action: SeccompAction, 67 exit_evt: EventFd, 68 iommu: bool, 69 state: Option<State>, 70 ) -> Result<Blk> { 71 let num_queues = vu_cfg.num_queues; 72 73 let mut vu = 74 VhostUserHandle::connect_vhost_user(false, &vu_cfg.socket, num_queues as u64, false)?; 75 76 let ( 77 avail_features, 78 acked_features, 79 acked_protocol_features, 80 vu_num_queues, 81 config, 82 paused, 83 ) = if let Some(state) = state { 84 info!("Restoring vhost-user-block {}", id); 85 86 vu.set_protocol_features_vhost_user( 87 state.acked_features, 88 state.acked_protocol_features, 89 )?; 90 91 ( 92 state.avail_features, 93 state.acked_features, 94 state.acked_protocol_features, 95 state.vu_num_queues, 96 state.config, 97 true, 98 ) 99 } else { 100 // Filling device and vring features VMM supports. 101 let mut avail_features = 1 << VIRTIO_BLK_F_SIZE_MAX 102 | 1 << VIRTIO_BLK_F_SEG_MAX 103 | 1 << VIRTIO_BLK_F_GEOMETRY 104 | 1 << VIRTIO_BLK_F_RO 105 | 1 << VIRTIO_BLK_F_BLK_SIZE 106 | 1 << VIRTIO_BLK_F_FLUSH 107 | 1 << VIRTIO_BLK_F_TOPOLOGY 108 | 1 << VIRTIO_BLK_F_CONFIG_WCE 109 | 1 << VIRTIO_BLK_F_DISCARD 110 | 1 << VIRTIO_BLK_F_WRITE_ZEROES 111 | DEFAULT_VIRTIO_FEATURES; 112 113 if num_queues > 1 { 114 avail_features |= 1 << VIRTIO_BLK_F_MQ; 115 } 116 117 let avail_protocol_features = VhostUserProtocolFeatures::CONFIG 118 | VhostUserProtocolFeatures::MQ 119 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 120 | VhostUserProtocolFeatures::REPLY_ACK 121 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 122 | VhostUserProtocolFeatures::LOG_SHMFD; 123 124 let (acked_features, acked_protocol_features) = 125 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 126 127 let backend_num_queues = 128 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 129 vu.socket_handle() 130 .get_queue_num() 131 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 132 } else { 133 DEFAULT_QUEUE_NUMBER 134 }; 135 136 if num_queues > backend_num_queues { 137 error!("vhost-user-blk requested too many queues ({}) since the backend only supports {}\n", 138 num_queues, backend_num_queues); 139 return Err(Error::BadQueueNum); 140 } 141 142 let config_len = mem::size_of::<VirtioBlockConfig>(); 143 let config_space: Vec<u8> = vec![0u8; config_len]; 144 let (_, config_space) = vu 145 .socket_handle() 146 .get_config( 147 VHOST_USER_CONFIG_OFFSET, 148 config_len as u32, 149 VhostUserConfigFlags::WRITABLE, 150 config_space.as_slice(), 151 ) 152 .map_err(Error::VhostUserGetConfig)?; 153 let mut config = VirtioBlockConfig::default(); 154 if let Some(backend_config) = VirtioBlockConfig::from_slice(config_space.as_slice()) { 155 config = *backend_config; 156 config.num_queues = num_queues as u16; 157 } 158 159 ( 160 acked_features, 161 // If part of the available features that have been acked, 162 // the PROTOCOL_FEATURES bit must be already set through 163 // the VIRTIO acked features as we know the guest would 164 // never ack it, thus the feature would be lost. 165 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 166 acked_protocol_features, 167 num_queues, 168 config, 169 false, 170 ) 171 }; 172 173 Ok(Blk { 174 common: VirtioCommon { 175 device_type: VirtioDeviceType::Block as u32, 176 queue_sizes: vec![vu_cfg.queue_size; num_queues], 177 avail_features, 178 acked_features, 179 paused_sync: Some(Arc::new(Barrier::new(2))), 180 min_queues: DEFAULT_QUEUE_NUMBER as u16, 181 paused: Arc::new(AtomicBool::new(paused)), 182 ..Default::default() 183 }, 184 vu_common: VhostUserCommon { 185 vu: Some(Arc::new(Mutex::new(vu))), 186 acked_protocol_features, 187 socket_path: vu_cfg.socket, 188 vu_num_queues, 189 ..Default::default() 190 }, 191 id, 192 config, 193 guest_memory: None, 194 epoll_thread: None, 195 seccomp_action, 196 exit_evt, 197 iommu, 198 }) 199 } 200 201 fn state(&self) -> State { 202 State { 203 avail_features: self.common.avail_features, 204 acked_features: self.common.acked_features, 205 config: self.config, 206 acked_protocol_features: self.vu_common.acked_protocol_features, 207 vu_num_queues: self.vu_common.vu_num_queues, 208 } 209 } 210 } 211 212 impl Drop for Blk { 213 fn drop(&mut self) { 214 if let Some(kill_evt) = self.common.kill_evt.take() { 215 if let Err(e) = kill_evt.write(1) { 216 error!("failed to kill vhost-user-blk: {:?}", e); 217 } 218 } 219 self.common.wait_for_epoll_threads(); 220 if let Some(thread) = self.epoll_thread.take() { 221 if let Err(e) = thread.join() { 222 error!("Error joining thread: {:?}", e); 223 } 224 } 225 } 226 } 227 228 impl VirtioDevice for Blk { 229 fn device_type(&self) -> u32 { 230 self.common.device_type 231 } 232 233 fn queue_max_sizes(&self) -> &[u16] { 234 &self.common.queue_sizes 235 } 236 237 fn features(&self) -> u64 { 238 let mut features = self.common.avail_features; 239 if self.iommu { 240 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 241 } 242 features 243 } 244 245 fn ack_features(&mut self, value: u64) { 246 self.common.ack_features(value) 247 } 248 249 fn read_config(&self, offset: u64, data: &mut [u8]) { 250 self.read_config_from_slice(self.config.as_slice(), offset, data); 251 } 252 253 fn write_config(&mut self, offset: u64, data: &[u8]) { 254 // The "writeback" field is the only mutable field 255 let writeback_offset = 256 (&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64); 257 if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback) 258 { 259 error!( 260 "Attempt to write to read-only field: offset {:x} length {}", 261 offset, 262 data.len() 263 ); 264 return; 265 } 266 267 self.config.writeback = data[0]; 268 if let Some(vu) = &self.vu_common.vu { 269 if let Err(e) = vu 270 .lock() 271 .unwrap() 272 .socket_handle() 273 .set_config(offset as u32, VhostUserConfigFlags::WRITABLE, data) 274 .map_err(Error::VhostUserSetConfig) 275 { 276 error!("Failed setting vhost-user-blk configuration: {:?}", e); 277 } 278 } 279 } 280 281 fn activate( 282 &mut self, 283 mem: GuestMemoryAtomic<GuestMemoryMmap>, 284 interrupt_cb: Arc<dyn VirtioInterrupt>, 285 queues: Vec<(usize, Queue, EventFd)>, 286 ) -> ActivateResult { 287 self.common.activate(&queues, &interrupt_cb)?; 288 self.guest_memory = Some(mem.clone()); 289 290 let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None; 291 292 // Run a dedicated thread for handling potential reconnections with 293 // the backend. 294 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 295 296 let mut handler = self.vu_common.activate( 297 mem, 298 queues, 299 interrupt_cb, 300 self.common.acked_features, 301 backend_req_handler, 302 kill_evt, 303 pause_evt, 304 )?; 305 306 let paused = self.common.paused.clone(); 307 let paused_sync = self.common.paused_sync.clone(); 308 309 let mut epoll_threads = Vec::new(); 310 311 spawn_virtio_thread( 312 &self.id, 313 &self.seccomp_action, 314 Thread::VirtioVhostBlock, 315 &mut epoll_threads, 316 &self.exit_evt, 317 move || handler.run(paused, paused_sync.unwrap()), 318 )?; 319 self.epoll_thread = Some(epoll_threads.remove(0)); 320 321 Ok(()) 322 } 323 324 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 325 // We first must resume the virtio thread if it was paused. 326 if self.common.pause_evt.take().is_some() { 327 self.common.resume().ok()?; 328 } 329 330 if let Some(vu) = &self.vu_common.vu { 331 if let Err(e) = vu.lock().unwrap().reset_vhost_user() { 332 error!("Failed to reset vhost-user daemon: {:?}", e); 333 return None; 334 } 335 } 336 337 if let Some(kill_evt) = self.common.kill_evt.take() { 338 // Ignore the result because there is nothing we can do about it. 339 let _ = kill_evt.write(1); 340 } 341 342 event!("virtio-device", "reset", "id", &self.id); 343 344 // Return the interrupt 345 Some(self.common.interrupt_cb.take().unwrap()) 346 } 347 348 fn shutdown(&mut self) { 349 self.vu_common.shutdown() 350 } 351 352 fn add_memory_region( 353 &mut self, 354 region: &Arc<GuestRegionMmap>, 355 ) -> std::result::Result<(), crate::Error> { 356 self.vu_common.add_memory_region(&self.guest_memory, region) 357 } 358 } 359 360 impl Pausable for Blk { 361 fn pause(&mut self) -> result::Result<(), MigratableError> { 362 self.vu_common.pause()?; 363 self.common.pause() 364 } 365 366 fn resume(&mut self) -> result::Result<(), MigratableError> { 367 self.common.resume()?; 368 369 if let Some(epoll_thread) = &self.epoll_thread { 370 epoll_thread.thread().unpark(); 371 } 372 373 self.vu_common.resume() 374 } 375 } 376 377 impl Snapshottable for Blk { 378 fn id(&self) -> String { 379 self.id.clone() 380 } 381 382 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 383 self.vu_common.snapshot(&self.state()) 384 } 385 } 386 impl Transportable for Blk {} 387 388 impl Migratable for Blk { 389 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 390 self.vu_common.start_dirty_log(&self.guest_memory) 391 } 392 393 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 394 self.vu_common.stop_dirty_log() 395 } 396 397 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 398 self.vu_common.dirty_log(&self.guest_memory) 399 } 400 401 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 402 self.vu_common.start_migration() 403 } 404 405 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 406 self.vu_common 407 .complete_migration(self.common.kill_evt.take()) 408 } 409 } 410