1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use super::super::{ActivateResult, VirtioCommon, VirtioDevice, VirtioDeviceType}; 5 use super::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 6 use super::{Error, Result, DEFAULT_VIRTIO_FEATURES}; 7 use crate::seccomp_filters::Thread; 8 use crate::thread_helper::spawn_virtio_thread; 9 use crate::vhost_user::VhostUserCommon; 10 use crate::{GuestMemoryMmap, GuestRegionMmap}; 11 use crate::{VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM}; 12 use block::VirtioBlockConfig; 13 use seccompiler::SeccompAction; 14 use serde::{Deserialize, Serialize}; 15 use std::mem; 16 use std::result; 17 use std::sync::atomic::AtomicBool; 18 use std::sync::{Arc, Barrier, Mutex}; 19 use std::thread; 20 21 use vhost::vhost_user::message::{ 22 VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserVirtioFeatures, 23 VHOST_USER_CONFIG_OFFSET, 24 }; 25 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; 26 use virtio_bindings::virtio_blk::{ 27 VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, 28 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, 29 VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, 30 }; 31 use virtio_queue::Queue; 32 use vm_memory::{ByteValued, GuestMemoryAtomic}; 33 use vm_migration::{ 34 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 35 Transportable, 36 }; 37 use vmm_sys_util::eventfd::EventFd; 38 39 const DEFAULT_QUEUE_NUMBER: usize = 1; 40 41 #[derive(Serialize, Deserialize)] 42 pub struct State { 43 pub avail_features: u64, 44 pub acked_features: u64, 45 pub config: VirtioBlockConfig, 46 pub acked_protocol_features: u64, 47 pub vu_num_queues: usize, 48 } 49 50 struct BackendReqHandler {} 51 impl VhostUserFrontendReqHandler for BackendReqHandler {} 52 53 pub struct Blk { 54 common: VirtioCommon, 55 vu_common: VhostUserCommon, 56 id: String, 57 config: VirtioBlockConfig, 58 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 59 epoll_thread: Option<thread::JoinHandle<()>>, 60 seccomp_action: SeccompAction, 61 exit_evt: EventFd, 62 iommu: bool, 63 } 64 65 impl Blk { 66 /// Create a new vhost-user-blk device 67 pub fn new( 68 id: String, 69 vu_cfg: VhostUserConfig, 70 seccomp_action: SeccompAction, 71 exit_evt: EventFd, 72 iommu: bool, 73 state: Option<State>, 74 ) -> Result<Blk> { 75 let num_queues = vu_cfg.num_queues; 76 77 let mut vu = 78 VhostUserHandle::connect_vhost_user(false, &vu_cfg.socket, num_queues as u64, false)?; 79 80 let ( 81 avail_features, 82 acked_features, 83 acked_protocol_features, 84 vu_num_queues, 85 config, 86 paused, 87 ) = if let Some(state) = state { 88 info!("Restoring vhost-user-block {}", id); 89 90 vu.set_protocol_features_vhost_user( 91 state.acked_features, 92 state.acked_protocol_features, 93 )?; 94 95 ( 96 state.avail_features, 97 state.acked_features, 98 state.acked_protocol_features, 99 state.vu_num_queues, 100 state.config, 101 true, 102 ) 103 } else { 104 // Filling device and vring features VMM supports. 105 let mut avail_features = 1 << VIRTIO_BLK_F_SIZE_MAX 106 | 1 << VIRTIO_BLK_F_SEG_MAX 107 | 1 << VIRTIO_BLK_F_GEOMETRY 108 | 1 << VIRTIO_BLK_F_RO 109 | 1 << VIRTIO_BLK_F_BLK_SIZE 110 | 1 << VIRTIO_BLK_F_FLUSH 111 | 1 << VIRTIO_BLK_F_TOPOLOGY 112 | 1 << VIRTIO_BLK_F_CONFIG_WCE 113 | 1 << VIRTIO_BLK_F_DISCARD 114 | 1 << VIRTIO_BLK_F_WRITE_ZEROES 115 | DEFAULT_VIRTIO_FEATURES; 116 117 if num_queues > 1 { 118 avail_features |= 1 << VIRTIO_BLK_F_MQ; 119 } 120 121 let avail_protocol_features = VhostUserProtocolFeatures::CONFIG 122 | VhostUserProtocolFeatures::MQ 123 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 124 | VhostUserProtocolFeatures::REPLY_ACK 125 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 126 | VhostUserProtocolFeatures::LOG_SHMFD; 127 128 let (acked_features, acked_protocol_features) = 129 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 130 131 let backend_num_queues = 132 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 133 vu.socket_handle() 134 .get_queue_num() 135 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 136 } else { 137 DEFAULT_QUEUE_NUMBER 138 }; 139 140 if num_queues > backend_num_queues { 141 error!("vhost-user-blk requested too many queues ({}) since the backend only supports {}\n", 142 num_queues, backend_num_queues); 143 return Err(Error::BadQueueNum); 144 } 145 146 let config_len = mem::size_of::<VirtioBlockConfig>(); 147 let config_space: Vec<u8> = vec![0u8; config_len]; 148 let (_, config_space) = vu 149 .socket_handle() 150 .get_config( 151 VHOST_USER_CONFIG_OFFSET, 152 config_len as u32, 153 VhostUserConfigFlags::WRITABLE, 154 config_space.as_slice(), 155 ) 156 .map_err(Error::VhostUserGetConfig)?; 157 let mut config = VirtioBlockConfig::default(); 158 if let Some(backend_config) = VirtioBlockConfig::from_slice(config_space.as_slice()) { 159 config = *backend_config; 160 config.num_queues = num_queues as u16; 161 } 162 163 ( 164 acked_features, 165 // If part of the available features that have been acked, 166 // the PROTOCOL_FEATURES bit must be already set through 167 // the VIRTIO acked features as we know the guest would 168 // never ack it, thus the feature would be lost. 169 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 170 acked_protocol_features, 171 num_queues, 172 config, 173 false, 174 ) 175 }; 176 177 Ok(Blk { 178 common: VirtioCommon { 179 device_type: VirtioDeviceType::Block as u32, 180 queue_sizes: vec![vu_cfg.queue_size; num_queues], 181 avail_features, 182 acked_features, 183 paused_sync: Some(Arc::new(Barrier::new(2))), 184 min_queues: DEFAULT_QUEUE_NUMBER as u16, 185 paused: Arc::new(AtomicBool::new(paused)), 186 ..Default::default() 187 }, 188 vu_common: VhostUserCommon { 189 vu: Some(Arc::new(Mutex::new(vu))), 190 acked_protocol_features, 191 socket_path: vu_cfg.socket, 192 vu_num_queues, 193 ..Default::default() 194 }, 195 id, 196 config, 197 guest_memory: None, 198 epoll_thread: None, 199 seccomp_action, 200 exit_evt, 201 iommu, 202 }) 203 } 204 205 fn state(&self) -> State { 206 State { 207 avail_features: self.common.avail_features, 208 acked_features: self.common.acked_features, 209 config: self.config, 210 acked_protocol_features: self.vu_common.acked_protocol_features, 211 vu_num_queues: self.vu_common.vu_num_queues, 212 } 213 } 214 } 215 216 impl Drop for Blk { 217 fn drop(&mut self) { 218 if let Some(kill_evt) = self.common.kill_evt.take() { 219 if let Err(e) = kill_evt.write(1) { 220 error!("failed to kill vhost-user-blk: {:?}", e); 221 } 222 } 223 self.common.wait_for_epoll_threads(); 224 if let Some(thread) = self.epoll_thread.take() { 225 if let Err(e) = thread.join() { 226 error!("Error joining thread: {:?}", e); 227 } 228 } 229 } 230 } 231 232 impl VirtioDevice for Blk { 233 fn device_type(&self) -> u32 { 234 self.common.device_type 235 } 236 237 fn queue_max_sizes(&self) -> &[u16] { 238 &self.common.queue_sizes 239 } 240 241 fn features(&self) -> u64 { 242 let mut features = self.common.avail_features; 243 if self.iommu { 244 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 245 } 246 features 247 } 248 249 fn ack_features(&mut self, value: u64) { 250 self.common.ack_features(value) 251 } 252 253 fn read_config(&self, offset: u64, data: &mut [u8]) { 254 self.read_config_from_slice(self.config.as_slice(), offset, data); 255 } 256 257 fn write_config(&mut self, offset: u64, data: &[u8]) { 258 // The "writeback" field is the only mutable field 259 let writeback_offset = 260 (&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64); 261 if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback) 262 { 263 error!( 264 "Attempt to write to read-only field: offset {:x} length {}", 265 offset, 266 data.len() 267 ); 268 return; 269 } 270 271 self.config.writeback = data[0]; 272 if let Some(vu) = &self.vu_common.vu { 273 if let Err(e) = vu 274 .lock() 275 .unwrap() 276 .socket_handle() 277 .set_config(offset as u32, VhostUserConfigFlags::WRITABLE, data) 278 .map_err(Error::VhostUserSetConfig) 279 { 280 error!("Failed setting vhost-user-blk configuration: {:?}", e); 281 } 282 } 283 } 284 285 fn activate( 286 &mut self, 287 mem: GuestMemoryAtomic<GuestMemoryMmap>, 288 interrupt_cb: Arc<dyn VirtioInterrupt>, 289 queues: Vec<(usize, Queue, EventFd)>, 290 ) -> ActivateResult { 291 self.common.activate(&queues, &interrupt_cb)?; 292 self.guest_memory = Some(mem.clone()); 293 294 let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None; 295 296 // Run a dedicated thread for handling potential reconnections with 297 // the backend. 298 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 299 300 let mut handler = self.vu_common.activate( 301 mem, 302 queues, 303 interrupt_cb, 304 self.common.acked_features, 305 backend_req_handler, 306 kill_evt, 307 pause_evt, 308 )?; 309 310 let paused = self.common.paused.clone(); 311 let paused_sync = self.common.paused_sync.clone(); 312 313 let mut epoll_threads = Vec::new(); 314 315 spawn_virtio_thread( 316 &self.id, 317 &self.seccomp_action, 318 Thread::VirtioVhostBlock, 319 &mut epoll_threads, 320 &self.exit_evt, 321 move || handler.run(paused, paused_sync.unwrap()), 322 )?; 323 self.epoll_thread = Some(epoll_threads.remove(0)); 324 325 Ok(()) 326 } 327 328 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 329 // We first must resume the virtio thread if it was paused. 330 if self.common.pause_evt.take().is_some() { 331 self.common.resume().ok()?; 332 } 333 334 if let Some(vu) = &self.vu_common.vu { 335 if let Err(e) = vu.lock().unwrap().reset_vhost_user() { 336 error!("Failed to reset vhost-user daemon: {:?}", e); 337 return None; 338 } 339 } 340 341 if let Some(kill_evt) = self.common.kill_evt.take() { 342 // Ignore the result because there is nothing we can do about it. 343 let _ = kill_evt.write(1); 344 } 345 346 event!("virtio-device", "reset", "id", &self.id); 347 348 // Return the interrupt 349 Some(self.common.interrupt_cb.take().unwrap()) 350 } 351 352 fn shutdown(&mut self) { 353 self.vu_common.shutdown() 354 } 355 356 fn add_memory_region( 357 &mut self, 358 region: &Arc<GuestRegionMmap>, 359 ) -> std::result::Result<(), crate::Error> { 360 self.vu_common.add_memory_region(&self.guest_memory, region) 361 } 362 } 363 364 impl Pausable for Blk { 365 fn pause(&mut self) -> result::Result<(), MigratableError> { 366 self.vu_common.pause()?; 367 self.common.pause() 368 } 369 370 fn resume(&mut self) -> result::Result<(), MigratableError> { 371 self.common.resume()?; 372 373 if let Some(epoll_thread) = &self.epoll_thread { 374 epoll_thread.thread().unpark(); 375 } 376 377 self.vu_common.resume() 378 } 379 } 380 381 impl Snapshottable for Blk { 382 fn id(&self) -> String { 383 self.id.clone() 384 } 385 386 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 387 self.vu_common.snapshot(&self.state()) 388 } 389 } 390 impl Transportable for Blk {} 391 392 impl Migratable for Blk { 393 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 394 self.vu_common.start_dirty_log(&self.guest_memory) 395 } 396 397 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 398 self.vu_common.stop_dirty_log() 399 } 400 401 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 402 self.vu_common.dirty_log(&self.guest_memory) 403 } 404 405 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 406 self.vu_common.start_migration() 407 } 408 409 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 410 self.vu_common 411 .complete_migration(self.common.kill_evt.take()) 412 } 413 } 414