1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use std::mem; 5 use std::result; 6 use std::sync::atomic::AtomicBool; 7 use std::sync::{Arc, Barrier, Mutex}; 8 use std::thread; 9 10 use block::VirtioBlockConfig; 11 use seccompiler::SeccompAction; 12 use serde::{Deserialize, Serialize}; 13 use vhost::vhost_user::message::{ 14 VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserVirtioFeatures, 15 VHOST_USER_CONFIG_OFFSET, 16 }; 17 use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; 18 use virtio_bindings::virtio_blk::{ 19 VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, 20 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, 21 VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, 22 }; 23 use virtio_queue::Queue; 24 use vm_memory::{ByteValued, GuestMemoryAtomic}; 25 use vm_migration::{ 26 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 27 Transportable, 28 }; 29 use vmm_sys_util::eventfd::EventFd; 30 31 use super::super::{ActivateResult, VirtioCommon, VirtioDevice, VirtioDeviceType}; 32 use super::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 33 use super::{Error, Result, DEFAULT_VIRTIO_FEATURES}; 34 use crate::seccomp_filters::Thread; 35 use crate::thread_helper::spawn_virtio_thread; 36 use crate::vhost_user::VhostUserCommon; 37 use crate::{GuestMemoryMmap, GuestRegionMmap}; 38 use crate::{VirtioInterrupt, VIRTIO_F_IOMMU_PLATFORM}; 39 40 const DEFAULT_QUEUE_NUMBER: usize = 1; 41 42 #[derive(Serialize, Deserialize)] 43 pub struct State { 44 pub avail_features: u64, 45 pub acked_features: u64, 46 pub config: VirtioBlockConfig, 47 pub acked_protocol_features: u64, 48 pub vu_num_queues: usize, 49 } 50 51 struct BackendReqHandler {} 52 impl VhostUserFrontendReqHandler for BackendReqHandler {} 53 54 pub struct Blk { 55 common: VirtioCommon, 56 vu_common: VhostUserCommon, 57 id: String, 58 config: VirtioBlockConfig, 59 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 60 epoll_thread: Option<thread::JoinHandle<()>>, 61 seccomp_action: SeccompAction, 62 exit_evt: EventFd, 63 iommu: bool, 64 } 65 66 impl Blk { 67 /// Create a new vhost-user-blk device 68 pub fn new( 69 id: String, 70 vu_cfg: VhostUserConfig, 71 seccomp_action: SeccompAction, 72 exit_evt: EventFd, 73 iommu: bool, 74 state: Option<State>, 75 ) -> Result<Blk> { 76 let num_queues = vu_cfg.num_queues; 77 78 let mut vu = 79 VhostUserHandle::connect_vhost_user(false, &vu_cfg.socket, num_queues as u64, false)?; 80 81 let ( 82 avail_features, 83 acked_features, 84 acked_protocol_features, 85 vu_num_queues, 86 config, 87 paused, 88 ) = if let Some(state) = state { 89 info!("Restoring vhost-user-block {}", id); 90 91 vu.set_protocol_features_vhost_user( 92 state.acked_features, 93 state.acked_protocol_features, 94 )?; 95 96 ( 97 state.avail_features, 98 state.acked_features, 99 state.acked_protocol_features, 100 state.vu_num_queues, 101 state.config, 102 true, 103 ) 104 } else { 105 // Filling device and vring features VMM supports. 106 let mut avail_features = 1 << VIRTIO_BLK_F_SIZE_MAX 107 | 1 << VIRTIO_BLK_F_SEG_MAX 108 | 1 << VIRTIO_BLK_F_GEOMETRY 109 | 1 << VIRTIO_BLK_F_RO 110 | 1 << VIRTIO_BLK_F_BLK_SIZE 111 | 1 << VIRTIO_BLK_F_FLUSH 112 | 1 << VIRTIO_BLK_F_TOPOLOGY 113 | 1 << VIRTIO_BLK_F_CONFIG_WCE 114 | 1 << VIRTIO_BLK_F_DISCARD 115 | 1 << VIRTIO_BLK_F_WRITE_ZEROES 116 | DEFAULT_VIRTIO_FEATURES; 117 118 if num_queues > 1 { 119 avail_features |= 1 << VIRTIO_BLK_F_MQ; 120 } 121 122 let avail_protocol_features = VhostUserProtocolFeatures::CONFIG 123 | VhostUserProtocolFeatures::MQ 124 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 125 | VhostUserProtocolFeatures::REPLY_ACK 126 | VhostUserProtocolFeatures::INFLIGHT_SHMFD 127 | VhostUserProtocolFeatures::LOG_SHMFD; 128 129 let (acked_features, acked_protocol_features) = 130 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 131 132 let backend_num_queues = 133 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 134 vu.socket_handle() 135 .get_queue_num() 136 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 137 } else { 138 DEFAULT_QUEUE_NUMBER 139 }; 140 141 if num_queues > backend_num_queues { 142 error!("vhost-user-blk requested too many queues ({}) since the backend only supports {}\n", 143 num_queues, backend_num_queues); 144 return Err(Error::BadQueueNum); 145 } 146 147 let config_len = mem::size_of::<VirtioBlockConfig>(); 148 let config_space: Vec<u8> = vec![0u8; config_len]; 149 let (_, config_space) = vu 150 .socket_handle() 151 .get_config( 152 VHOST_USER_CONFIG_OFFSET, 153 config_len as u32, 154 VhostUserConfigFlags::WRITABLE, 155 config_space.as_slice(), 156 ) 157 .map_err(Error::VhostUserGetConfig)?; 158 let mut config = VirtioBlockConfig::default(); 159 if let Some(backend_config) = VirtioBlockConfig::from_slice(config_space.as_slice()) { 160 config = *backend_config; 161 config.num_queues = num_queues as u16; 162 } 163 164 ( 165 acked_features, 166 // If part of the available features that have been acked, 167 // the PROTOCOL_FEATURES bit must be already set through 168 // the VIRTIO acked features as we know the guest would 169 // never ack it, thus the feature would be lost. 170 acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), 171 acked_protocol_features, 172 num_queues, 173 config, 174 false, 175 ) 176 }; 177 178 Ok(Blk { 179 common: VirtioCommon { 180 device_type: VirtioDeviceType::Block as u32, 181 queue_sizes: vec![vu_cfg.queue_size; num_queues], 182 avail_features, 183 acked_features, 184 paused_sync: Some(Arc::new(Barrier::new(2))), 185 min_queues: DEFAULT_QUEUE_NUMBER as u16, 186 paused: Arc::new(AtomicBool::new(paused)), 187 ..Default::default() 188 }, 189 vu_common: VhostUserCommon { 190 vu: Some(Arc::new(Mutex::new(vu))), 191 acked_protocol_features, 192 socket_path: vu_cfg.socket, 193 vu_num_queues, 194 ..Default::default() 195 }, 196 id, 197 config, 198 guest_memory: None, 199 epoll_thread: None, 200 seccomp_action, 201 exit_evt, 202 iommu, 203 }) 204 } 205 206 fn state(&self) -> State { 207 State { 208 avail_features: self.common.avail_features, 209 acked_features: self.common.acked_features, 210 config: self.config, 211 acked_protocol_features: self.vu_common.acked_protocol_features, 212 vu_num_queues: self.vu_common.vu_num_queues, 213 } 214 } 215 } 216 217 impl Drop for Blk { 218 fn drop(&mut self) { 219 if let Some(kill_evt) = self.common.kill_evt.take() { 220 if let Err(e) = kill_evt.write(1) { 221 error!("failed to kill vhost-user-blk: {:?}", e); 222 } 223 } 224 self.common.wait_for_epoll_threads(); 225 if let Some(thread) = self.epoll_thread.take() { 226 if let Err(e) = thread.join() { 227 error!("Error joining thread: {:?}", e); 228 } 229 } 230 } 231 } 232 233 impl VirtioDevice for Blk { 234 fn device_type(&self) -> u32 { 235 self.common.device_type 236 } 237 238 fn queue_max_sizes(&self) -> &[u16] { 239 &self.common.queue_sizes 240 } 241 242 fn features(&self) -> u64 { 243 let mut features = self.common.avail_features; 244 if self.iommu { 245 features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 246 } 247 features 248 } 249 250 fn ack_features(&mut self, value: u64) { 251 self.common.ack_features(value) 252 } 253 254 fn read_config(&self, offset: u64, data: &mut [u8]) { 255 self.read_config_from_slice(self.config.as_slice(), offset, data); 256 } 257 258 fn write_config(&mut self, offset: u64, data: &[u8]) { 259 // The "writeback" field is the only mutable field 260 let writeback_offset = 261 (&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64); 262 if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback) 263 { 264 error!( 265 "Attempt to write to read-only field: offset {:x} length {}", 266 offset, 267 data.len() 268 ); 269 return; 270 } 271 272 self.config.writeback = data[0]; 273 if let Some(vu) = &self.vu_common.vu { 274 if let Err(e) = vu 275 .lock() 276 .unwrap() 277 .socket_handle() 278 .set_config(offset as u32, VhostUserConfigFlags::WRITABLE, data) 279 .map_err(Error::VhostUserSetConfig) 280 { 281 error!("Failed setting vhost-user-blk configuration: {:?}", e); 282 } 283 } 284 } 285 286 fn activate( 287 &mut self, 288 mem: GuestMemoryAtomic<GuestMemoryMmap>, 289 interrupt_cb: Arc<dyn VirtioInterrupt>, 290 queues: Vec<(usize, Queue, EventFd)>, 291 ) -> ActivateResult { 292 self.common.activate(&queues, &interrupt_cb)?; 293 self.guest_memory = Some(mem.clone()); 294 295 let backend_req_handler: Option<FrontendReqHandler<BackendReqHandler>> = None; 296 297 // Run a dedicated thread for handling potential reconnections with 298 // the backend. 299 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 300 301 let mut handler = self.vu_common.activate( 302 mem, 303 queues, 304 interrupt_cb, 305 self.common.acked_features, 306 backend_req_handler, 307 kill_evt, 308 pause_evt, 309 )?; 310 311 let paused = self.common.paused.clone(); 312 let paused_sync = self.common.paused_sync.clone(); 313 314 let mut epoll_threads = Vec::new(); 315 316 spawn_virtio_thread( 317 &self.id, 318 &self.seccomp_action, 319 Thread::VirtioVhostBlock, 320 &mut epoll_threads, 321 &self.exit_evt, 322 move || handler.run(paused, paused_sync.unwrap()), 323 )?; 324 self.epoll_thread = Some(epoll_threads.remove(0)); 325 326 Ok(()) 327 } 328 329 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 330 // We first must resume the virtio thread if it was paused. 331 if self.common.pause_evt.take().is_some() { 332 self.common.resume().ok()?; 333 } 334 335 if let Some(vu) = &self.vu_common.vu { 336 if let Err(e) = vu.lock().unwrap().reset_vhost_user() { 337 error!("Failed to reset vhost-user daemon: {:?}", e); 338 return None; 339 } 340 } 341 342 if let Some(kill_evt) = self.common.kill_evt.take() { 343 // Ignore the result because there is nothing we can do about it. 344 let _ = kill_evt.write(1); 345 } 346 347 event!("virtio-device", "reset", "id", &self.id); 348 349 // Return the interrupt 350 Some(self.common.interrupt_cb.take().unwrap()) 351 } 352 353 fn shutdown(&mut self) { 354 self.vu_common.shutdown() 355 } 356 357 fn add_memory_region( 358 &mut self, 359 region: &Arc<GuestRegionMmap>, 360 ) -> std::result::Result<(), crate::Error> { 361 self.vu_common.add_memory_region(&self.guest_memory, region) 362 } 363 } 364 365 impl Pausable for Blk { 366 fn pause(&mut self) -> result::Result<(), MigratableError> { 367 self.vu_common.pause()?; 368 self.common.pause() 369 } 370 371 fn resume(&mut self) -> result::Result<(), MigratableError> { 372 self.common.resume()?; 373 374 if let Some(epoll_thread) = &self.epoll_thread { 375 epoll_thread.thread().unpark(); 376 } 377 378 self.vu_common.resume() 379 } 380 } 381 382 impl Snapshottable for Blk { 383 fn id(&self) -> String { 384 self.id.clone() 385 } 386 387 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 388 self.vu_common.snapshot(&self.state()) 389 } 390 } 391 impl Transportable for Blk {} 392 393 impl Migratable for Blk { 394 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 395 self.vu_common.start_dirty_log(&self.guest_memory) 396 } 397 398 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 399 self.vu_common.stop_dirty_log() 400 } 401 402 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 403 self.vu_common.dirty_log(&self.guest_memory) 404 } 405 406 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 407 self.vu_common.start_migration() 408 } 409 410 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 411 self.vu_common 412 .complete_migration(self.common.kill_evt.take()) 413 } 414 } 415