1 // Copyright 2019 Intel Corporation. All Rights Reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 use super::super::{ 5 ActivateError, ActivateResult, Queue, VirtioCommon, VirtioDevice, VirtioDeviceType, 6 }; 7 use super::vu_common_ctrl::{VhostUserConfig, VhostUserHandle}; 8 use super::{Error, Result, DEFAULT_VIRTIO_FEATURES}; 9 use crate::vhost_user::{Inflight, VhostUserEpollHandler}; 10 use crate::VirtioInterrupt; 11 use crate::{GuestMemoryMmap, GuestRegionMmap}; 12 use anyhow::anyhow; 13 use block_util::VirtioBlockConfig; 14 use std::mem; 15 use std::ops::Deref; 16 use std::os::unix::io::AsRawFd; 17 use std::result; 18 use std::sync::{Arc, Barrier, Mutex}; 19 use std::thread; 20 use std::vec::Vec; 21 use versionize::{VersionMap, Versionize, VersionizeResult}; 22 use versionize_derive::Versionize; 23 use vhost::vhost_user::message::VhostUserConfigFlags; 24 use vhost::vhost_user::message::VHOST_USER_CONFIG_OFFSET; 25 use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; 26 use vhost::vhost_user::{MasterReqHandler, VhostUserMaster, VhostUserMasterReqHandler}; 27 use virtio_bindings::bindings::virtio_blk::{ 28 VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, 29 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, 30 VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, 31 }; 32 use vm_memory::{Address, ByteValued, GuestAddressSpace, GuestMemory, GuestMemoryAtomic}; 33 use vm_migration::{ 34 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, Snapshottable, 35 Transportable, VersionMapped, 36 }; 37 use vmm_sys_util::eventfd::EventFd; 38 39 const DEFAULT_QUEUE_NUMBER: usize = 1; 40 41 #[derive(Versionize)] 42 pub struct State { 43 pub avail_features: u64, 44 pub acked_features: u64, 45 pub config: VirtioBlockConfig, 46 } 47 48 impl VersionMapped for State {} 49 50 struct SlaveReqHandler {} 51 impl VhostUserMasterReqHandler for SlaveReqHandler {} 52 53 pub struct Blk { 54 common: VirtioCommon, 55 id: String, 56 vu: Arc<Mutex<VhostUserHandle>>, 57 config: VirtioBlockConfig, 58 guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 59 acked_protocol_features: u64, 60 socket_path: String, 61 epoll_thread: Option<thread::JoinHandle<()>>, 62 vu_num_queues: usize, 63 } 64 65 impl Blk { 66 /// Create a new vhost-user-blk device 67 pub fn new(id: String, vu_cfg: VhostUserConfig) -> Result<Blk> { 68 let num_queues = vu_cfg.num_queues; 69 70 let mut vu = 71 VhostUserHandle::connect_vhost_user(false, &vu_cfg.socket, num_queues as u64, false)?; 72 73 // Filling device and vring features VMM supports. 74 let mut avail_features = 1 << VIRTIO_BLK_F_SIZE_MAX 75 | 1 << VIRTIO_BLK_F_SEG_MAX 76 | 1 << VIRTIO_BLK_F_GEOMETRY 77 | 1 << VIRTIO_BLK_F_RO 78 | 1 << VIRTIO_BLK_F_BLK_SIZE 79 | 1 << VIRTIO_BLK_F_FLUSH 80 | 1 << VIRTIO_BLK_F_TOPOLOGY 81 | 1 << VIRTIO_BLK_F_CONFIG_WCE 82 | 1 << VIRTIO_BLK_F_DISCARD 83 | 1 << VIRTIO_BLK_F_WRITE_ZEROES 84 | DEFAULT_VIRTIO_FEATURES; 85 86 if num_queues > 1 { 87 avail_features |= 1 << VIRTIO_BLK_F_MQ; 88 } 89 90 let avail_protocol_features = VhostUserProtocolFeatures::CONFIG 91 | VhostUserProtocolFeatures::MQ 92 | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS 93 | VhostUserProtocolFeatures::REPLY_ACK 94 | VhostUserProtocolFeatures::INFLIGHT_SHMFD; 95 96 let (acked_features, acked_protocol_features) = 97 vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; 98 99 let backend_num_queues = 100 if acked_protocol_features & VhostUserProtocolFeatures::MQ.bits() != 0 { 101 vu.socket_handle() 102 .get_queue_num() 103 .map_err(Error::VhostUserGetQueueMaxNum)? as usize 104 } else { 105 DEFAULT_QUEUE_NUMBER 106 }; 107 108 if num_queues > backend_num_queues { 109 error!("vhost-user-blk requested too many queues ({}) since the backend only supports {}\n", 110 num_queues, backend_num_queues); 111 return Err(Error::BadQueueNum); 112 } 113 114 let config_len = mem::size_of::<VirtioBlockConfig>(); 115 let config_space: Vec<u8> = vec![0u8; config_len as usize]; 116 let (_, config_space) = vu 117 .socket_handle() 118 .get_config( 119 VHOST_USER_CONFIG_OFFSET, 120 config_len as u32, 121 VhostUserConfigFlags::WRITABLE, 122 config_space.as_slice(), 123 ) 124 .map_err(Error::VhostUserGetConfig)?; 125 let mut config = VirtioBlockConfig::default(); 126 if let Some(backend_config) = VirtioBlockConfig::from_slice(config_space.as_slice()) { 127 config = *backend_config; 128 config.num_queues = num_queues as u16; 129 } 130 131 Ok(Blk { 132 common: VirtioCommon { 133 device_type: VirtioDeviceType::Block as u32, 134 queue_sizes: vec![vu_cfg.queue_size; num_queues], 135 avail_features: acked_features, 136 acked_features: 0, 137 paused_sync: Some(Arc::new(Barrier::new(2))), 138 min_queues: DEFAULT_QUEUE_NUMBER as u16, 139 ..Default::default() 140 }, 141 id, 142 vu: Arc::new(Mutex::new(vu)), 143 config, 144 guest_memory: None, 145 acked_protocol_features, 146 socket_path: vu_cfg.socket, 147 epoll_thread: None, 148 vu_num_queues: num_queues, 149 }) 150 } 151 152 fn state(&self) -> State { 153 State { 154 avail_features: self.common.avail_features, 155 acked_features: self.common.acked_features, 156 config: self.config, 157 } 158 } 159 160 fn set_state(&mut self, state: &State) { 161 self.common.avail_features = state.avail_features; 162 self.common.acked_features = state.acked_features; 163 self.config = state.config; 164 } 165 } 166 167 impl Drop for Blk { 168 fn drop(&mut self) { 169 if let Some(kill_evt) = self.common.kill_evt.take() { 170 if let Err(e) = kill_evt.write(1) { 171 error!("failed to kill vhost-user-blk: {:?}", e); 172 } 173 } 174 } 175 } 176 177 impl VirtioDevice for Blk { 178 fn device_type(&self) -> u32 { 179 self.common.device_type 180 } 181 182 fn queue_max_sizes(&self) -> &[u16] { 183 &self.common.queue_sizes 184 } 185 186 fn features(&self) -> u64 { 187 self.common.avail_features 188 } 189 190 fn ack_features(&mut self, value: u64) { 191 self.common.ack_features(value) 192 } 193 194 fn read_config(&self, offset: u64, data: &mut [u8]) { 195 self.read_config_from_slice(self.config.as_slice(), offset, data); 196 } 197 198 fn write_config(&mut self, offset: u64, data: &[u8]) { 199 // The "writeback" field is the only mutable field 200 let writeback_offset = 201 (&self.config.writeback as *const _ as u64) - (&self.config as *const _ as u64); 202 if offset != writeback_offset || data.len() != std::mem::size_of_val(&self.config.writeback) 203 { 204 error!( 205 "Attempt to write to read-only field: offset {:x} length {}", 206 offset, 207 data.len() 208 ); 209 return; 210 } 211 212 self.config.writeback = data[0]; 213 if let Err(e) = self 214 .vu 215 .lock() 216 .unwrap() 217 .socket_handle() 218 .set_config(offset as u32, VhostUserConfigFlags::WRITABLE, data) 219 .map_err(Error::VhostUserSetConfig) 220 { 221 error!("Failed setting vhost-user-blk configuration: {:?}", e); 222 } 223 } 224 225 fn activate( 226 &mut self, 227 mem: GuestMemoryAtomic<GuestMemoryMmap>, 228 interrupt_cb: Arc<dyn VirtioInterrupt>, 229 queues: Vec<Queue>, 230 queue_evts: Vec<EventFd>, 231 ) -> ActivateResult { 232 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 233 self.guest_memory = Some(mem.clone()); 234 235 let slave_req_handler: Option<MasterReqHandler<SlaveReqHandler>> = None; 236 237 // The backend acknowledged features must contain the protocol feature 238 // bit in case it was initially set but lost through the features 239 // negotiation with the guest. 240 let backend_acked_features = self.common.acked_features 241 | (self.common.avail_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()); 242 243 let mut inflight: Option<Inflight> = 244 if self.acked_protocol_features & VhostUserProtocolFeatures::INFLIGHT_SHMFD.bits() != 0 245 { 246 Some(Inflight::default()) 247 } else { 248 None 249 }; 250 251 self.vu 252 .lock() 253 .unwrap() 254 .setup_vhost_user( 255 &mem.memory(), 256 queues.clone(), 257 queue_evts.iter().map(|q| q.try_clone().unwrap()).collect(), 258 &interrupt_cb, 259 backend_acked_features, 260 &slave_req_handler, 261 inflight.as_mut(), 262 ) 263 .map_err(ActivateError::VhostUserBlkSetup)?; 264 265 // Run a dedicated thread for handling potential reconnections with 266 // the backend. 267 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 268 269 let mut handler: VhostUserEpollHandler<SlaveReqHandler> = VhostUserEpollHandler { 270 vu: self.vu.clone(), 271 mem, 272 kill_evt, 273 pause_evt, 274 queues, 275 queue_evts, 276 virtio_interrupt: interrupt_cb, 277 acked_features: backend_acked_features, 278 acked_protocol_features: self.acked_protocol_features, 279 socket_path: self.socket_path.clone(), 280 server: false, 281 slave_req_handler: None, 282 inflight, 283 }; 284 285 let paused = self.common.paused.clone(); 286 let paused_sync = self.common.paused_sync.clone(); 287 288 thread::Builder::new() 289 .name(self.id.to_string()) 290 .spawn(move || { 291 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 292 error!("Error running vhost-user-blk worker: {:?}", e); 293 } 294 }) 295 .map(|thread| self.epoll_thread = Some(thread)) 296 .map_err(|e| { 297 error!("failed to clone queue EventFd: {}", e); 298 ActivateError::BadActivate 299 })?; 300 301 Ok(()) 302 } 303 304 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 305 // We first must resume the virtio thread if it was paused. 306 if self.common.pause_evt.take().is_some() { 307 self.common.resume().ok()?; 308 } 309 310 if let Err(e) = self 311 .vu 312 .lock() 313 .unwrap() 314 .reset_vhost_user(self.common.queue_sizes.len()) 315 { 316 error!("Failed to reset vhost-user daemon: {:?}", e); 317 return None; 318 } 319 320 if let Some(kill_evt) = self.common.kill_evt.take() { 321 // Ignore the result because there is nothing we can do about it. 322 let _ = kill_evt.write(1); 323 } 324 325 event!("virtio-device", "reset", "id", &self.id); 326 327 // Return the interrupt 328 Some(self.common.interrupt_cb.take().unwrap()) 329 } 330 331 fn shutdown(&mut self) { 332 let _ = unsafe { libc::close(self.vu.lock().unwrap().socket_handle().as_raw_fd()) }; 333 } 334 335 fn add_memory_region( 336 &mut self, 337 region: &Arc<GuestRegionMmap>, 338 ) -> std::result::Result<(), crate::Error> { 339 if self.acked_protocol_features & VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS.bits() != 0 340 { 341 self.vu 342 .lock() 343 .unwrap() 344 .add_memory_region(region) 345 .map_err(crate::Error::VhostUserAddMemoryRegion) 346 } else if let Some(guest_memory) = &self.guest_memory { 347 self.vu 348 .lock() 349 .unwrap() 350 .update_mem_table(guest_memory.memory().deref()) 351 .map_err(crate::Error::VhostUserUpdateMemory) 352 } else { 353 Ok(()) 354 } 355 } 356 } 357 358 impl Pausable for Blk { 359 fn pause(&mut self) -> result::Result<(), MigratableError> { 360 self.vu 361 .lock() 362 .unwrap() 363 .pause_vhost_user(self.vu_num_queues) 364 .map_err(|e| { 365 MigratableError::Pause(anyhow!("Error pausing vhost-user-blk backend: {:?}", e)) 366 })?; 367 368 self.common.pause() 369 } 370 371 fn resume(&mut self) -> result::Result<(), MigratableError> { 372 self.common.resume()?; 373 374 if let Some(epoll_thread) = &self.epoll_thread { 375 epoll_thread.thread().unpark(); 376 } 377 378 self.vu 379 .lock() 380 .unwrap() 381 .resume_vhost_user(self.vu_num_queues) 382 .map_err(|e| { 383 MigratableError::Resume(anyhow!("Error resuming vhost-user-blk backend: {:?}", e)) 384 }) 385 } 386 } 387 388 impl Snapshottable for Blk { 389 fn id(&self) -> String { 390 self.id.clone() 391 } 392 393 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 394 Snapshot::new_from_versioned_state(&self.id(), &self.state()) 395 } 396 397 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 398 self.set_state(&snapshot.to_versioned_state(&self.id)?); 399 Ok(()) 400 } 401 } 402 impl Transportable for Blk {} 403 404 impl Migratable for Blk { 405 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 406 if let Some(guest_memory) = &self.guest_memory { 407 let last_ram_addr = guest_memory.memory().last_addr().raw_value(); 408 self.vu 409 .lock() 410 .unwrap() 411 .start_dirty_log(last_ram_addr) 412 .map_err(|e| { 413 MigratableError::MigrateStart(anyhow!( 414 "Error starting migration for vhost-user-blk backend: {:?}", 415 e 416 )) 417 }) 418 } else { 419 Err(MigratableError::MigrateStart(anyhow!( 420 "Missing guest memory" 421 ))) 422 } 423 } 424 425 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 426 self.vu.lock().unwrap().stop_dirty_log().map_err(|e| { 427 MigratableError::MigrateStop(anyhow!( 428 "Error stopping migration for vhost-user-blk backend: {:?}", 429 e 430 )) 431 }) 432 } 433 434 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 435 if let Some(guest_memory) = &self.guest_memory { 436 let last_ram_addr = guest_memory.memory().last_addr().raw_value(); 437 self.vu 438 .lock() 439 .unwrap() 440 .dirty_log(last_ram_addr) 441 .map_err(|e| { 442 MigratableError::MigrateDirtyRanges(anyhow!( 443 "Error retrieving dirty ranges from vhost-user-blk backend: {:?}", 444 e 445 )) 446 }) 447 } else { 448 Err(MigratableError::MigrateDirtyRanges(anyhow!( 449 "Missing guest memory" 450 ))) 451 } 452 } 453 } 454