1 // Copyright 2019 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use super::Error as DeviceError; 10 use super::{ 11 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, 12 UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, 13 VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, 14 }; 15 use crate::seccomp_filters::Thread; 16 use crate::thread_helper::spawn_virtio_thread; 17 use crate::{GuestMemoryMmap, MmapRegion}; 18 use crate::{VirtioInterrupt, VirtioInterruptType}; 19 use anyhow::anyhow; 20 use seccompiler::SeccompAction; 21 use std::fs::File; 22 use std::io; 23 use std::mem::size_of; 24 use std::os::unix::io::AsRawFd; 25 use std::result; 26 use std::sync::atomic::AtomicBool; 27 use std::sync::{Arc, Barrier}; 28 use thiserror::Error; 29 use versionize::{VersionMap, Versionize, VersionizeResult}; 30 use versionize_derive::Versionize; 31 use virtio_queue::{DescriptorChain, Queue, QueueT}; 32 use vm_memory::{ 33 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 34 GuestMemoryError, GuestMemoryLoadGuard, 35 }; 36 use vm_migration::VersionMapped; 37 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 38 use vm_virtio::{AccessPlatform, Translatable}; 39 use vmm_sys_util::eventfd::EventFd; 40 41 const QUEUE_SIZE: u16 = 256; 42 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 43 44 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; 45 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0; 46 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1; 47 48 // New descriptors are pending on the virtio queue. 49 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 50 51 #[derive(Copy, Clone, Debug, Default, Versionize)] 52 #[repr(C)] 53 struct VirtioPmemConfig { 54 start: u64, 55 size: u64, 56 } 57 58 // SAFETY: it only has data and has no implicit padding. 59 unsafe impl ByteValued for VirtioPmemConfig {} 60 61 #[derive(Copy, Clone, Debug, Default)] 62 #[repr(C)] 63 struct VirtioPmemReq { 64 type_: u32, 65 } 66 67 // SAFETY: it only has data and has no implicit padding. 68 unsafe impl ByteValued for VirtioPmemReq {} 69 70 #[derive(Copy, Clone, Debug, Default)] 71 #[repr(C)] 72 struct VirtioPmemResp { 73 ret: u32, 74 } 75 76 // SAFETY: it only has data and has no implicit padding. 77 unsafe impl ByteValued for VirtioPmemResp {} 78 79 #[derive(Error, Debug)] 80 enum Error { 81 #[error("Bad guest memory addresses: {0}")] 82 GuestMemory(GuestMemoryError), 83 #[error("Unexpected write-only descriptor")] 84 UnexpectedWriteOnlyDescriptor, 85 #[error("Unexpected read-only descriptor")] 86 UnexpectedReadOnlyDescriptor, 87 #[error("Descriptor chain too short")] 88 DescriptorChainTooShort, 89 #[error("Buffer length too small")] 90 BufferLengthTooSmall, 91 #[error("Invalid request")] 92 InvalidRequest, 93 #[error("Failed adding used index: {0}")] 94 QueueAddUsed(virtio_queue::Error), 95 } 96 97 #[derive(Debug, PartialEq, Eq)] 98 enum RequestType { 99 Flush, 100 } 101 102 struct Request { 103 type_: RequestType, 104 status_addr: GuestAddress, 105 } 106 107 impl Request { 108 fn parse( 109 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 110 access_platform: Option<&Arc<dyn AccessPlatform>>, 111 ) -> result::Result<Request, Error> { 112 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 113 // The descriptor contains the request type which MUST be readable. 114 if desc.is_write_only() { 115 return Err(Error::UnexpectedWriteOnlyDescriptor); 116 } 117 118 if desc.len() as usize != size_of::<VirtioPmemReq>() { 119 return Err(Error::InvalidRequest); 120 } 121 122 let request: VirtioPmemReq = desc_chain 123 .memory() 124 .read_obj( 125 desc.addr() 126 .translate_gva(access_platform, desc.len() as usize), 127 ) 128 .map_err(Error::GuestMemory)?; 129 130 let request_type = match request.type_ { 131 VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush, 132 _ => return Err(Error::InvalidRequest), 133 }; 134 135 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 136 137 // The status MUST always be writable 138 if !status_desc.is_write_only() { 139 return Err(Error::UnexpectedReadOnlyDescriptor); 140 } 141 142 if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() { 143 return Err(Error::BufferLengthTooSmall); 144 } 145 146 Ok(Request { 147 type_: request_type, 148 status_addr: status_desc 149 .addr() 150 .translate_gva(access_platform, status_desc.len() as usize), 151 }) 152 } 153 } 154 155 struct PmemEpollHandler { 156 mem: GuestMemoryAtomic<GuestMemoryMmap>, 157 queue: Queue, 158 disk: File, 159 interrupt_cb: Arc<dyn VirtioInterrupt>, 160 queue_evt: EventFd, 161 kill_evt: EventFd, 162 pause_evt: EventFd, 163 access_platform: Option<Arc<dyn AccessPlatform>>, 164 } 165 166 impl PmemEpollHandler { 167 fn process_queue(&mut self) -> result::Result<bool, Error> { 168 let mut used_descs = false; 169 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 170 let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) { 171 Ok(ref req) if (req.type_ == RequestType::Flush) => { 172 let status_code = match self.disk.sync_all() { 173 Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, 174 Err(e) => { 175 error!("failed flushing disk image: {}", e); 176 VIRTIO_PMEM_RESP_TYPE_EIO 177 } 178 }; 179 180 let resp = VirtioPmemResp { ret: status_code }; 181 match desc_chain.memory().write_obj(resp, req.status_addr) { 182 Ok(_) => size_of::<VirtioPmemResp>() as u32, 183 Err(e) => { 184 error!("bad guest memory address: {}", e); 185 0 186 } 187 } 188 } 189 Ok(ref req) => { 190 // Currently, there is only one virtio-pmem request, FLUSH. 191 error!("Invalid virtio request type {:?}", req.type_); 192 0 193 } 194 Err(e) => { 195 error!("Failed to parse available descriptor chain: {:?}", e); 196 0 197 } 198 }; 199 200 self.queue 201 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 202 .map_err(Error::QueueAddUsed)?; 203 used_descs = true; 204 } 205 206 Ok(used_descs) 207 } 208 209 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 210 self.interrupt_cb 211 .trigger(VirtioInterruptType::Queue(0)) 212 .map_err(|e| { 213 error!("Failed to signal used queue: {:?}", e); 214 DeviceError::FailedSignalingUsedQueue(e) 215 }) 216 } 217 218 fn run( 219 &mut self, 220 paused: Arc<AtomicBool>, 221 paused_sync: Arc<Barrier>, 222 ) -> result::Result<(), EpollHelperError> { 223 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 224 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 225 helper.run(paused, paused_sync, self)?; 226 227 Ok(()) 228 } 229 } 230 231 impl EpollHelperHandler for PmemEpollHandler { 232 fn handle_event( 233 &mut self, 234 _helper: &mut EpollHelper, 235 event: &epoll::Event, 236 ) -> result::Result<(), EpollHelperError> { 237 let ev_type = event.data as u16; 238 match ev_type { 239 QUEUE_AVAIL_EVENT => { 240 self.queue_evt.read().map_err(|e| { 241 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 242 })?; 243 244 let needs_notification = self.process_queue().map_err(|e| { 245 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 246 })?; 247 248 if needs_notification { 249 self.signal_used_queue().map_err(|e| { 250 EpollHelperError::HandleEvent(anyhow!( 251 "Failed to signal used queue: {:?}", 252 e 253 )) 254 })?; 255 } 256 } 257 _ => { 258 return Err(EpollHelperError::HandleEvent(anyhow!( 259 "Unexpected event: {}", 260 ev_type 261 ))); 262 } 263 } 264 Ok(()) 265 } 266 } 267 268 pub struct Pmem { 269 common: VirtioCommon, 270 id: String, 271 disk: Option<File>, 272 config: VirtioPmemConfig, 273 mapping: UserspaceMapping, 274 seccomp_action: SeccompAction, 275 exit_evt: EventFd, 276 277 // Hold ownership of the memory that is allocated for the device 278 // which will be automatically dropped when the device is dropped 279 _region: MmapRegion, 280 } 281 282 #[derive(Versionize)] 283 pub struct PmemState { 284 avail_features: u64, 285 acked_features: u64, 286 config: VirtioPmemConfig, 287 } 288 289 impl VersionMapped for PmemState {} 290 291 impl Pmem { 292 #[allow(clippy::too_many_arguments)] 293 pub fn new( 294 id: String, 295 disk: File, 296 addr: GuestAddress, 297 mapping: UserspaceMapping, 298 _region: MmapRegion, 299 iommu: bool, 300 seccomp_action: SeccompAction, 301 exit_evt: EventFd, 302 state: Option<PmemState>, 303 ) -> io::Result<Pmem> { 304 let (avail_features, acked_features, config, paused) = if let Some(state) = state { 305 info!("Restoring virtio-pmem {}", id); 306 ( 307 state.avail_features, 308 state.acked_features, 309 state.config, 310 true, 311 ) 312 } else { 313 let config = VirtioPmemConfig { 314 start: addr.raw_value().to_le(), 315 size: (_region.size() as u64).to_le(), 316 }; 317 318 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 319 320 if iommu { 321 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 322 } 323 (avail_features, 0, config, false) 324 }; 325 326 Ok(Pmem { 327 common: VirtioCommon { 328 device_type: VirtioDeviceType::Pmem as u32, 329 queue_sizes: QUEUE_SIZES.to_vec(), 330 paused_sync: Some(Arc::new(Barrier::new(2))), 331 avail_features, 332 acked_features, 333 min_queues: 1, 334 paused: Arc::new(AtomicBool::new(paused)), 335 ..Default::default() 336 }, 337 id, 338 disk: Some(disk), 339 config, 340 mapping, 341 seccomp_action, 342 _region, 343 exit_evt, 344 }) 345 } 346 347 fn state(&self) -> PmemState { 348 PmemState { 349 avail_features: self.common.avail_features, 350 acked_features: self.common.acked_features, 351 config: self.config, 352 } 353 } 354 355 #[cfg(fuzzing)] 356 pub fn wait_for_epoll_threads(&mut self) { 357 self.common.wait_for_epoll_threads(); 358 } 359 } 360 361 impl Drop for Pmem { 362 fn drop(&mut self) { 363 if let Some(kill_evt) = self.common.kill_evt.take() { 364 // Ignore the result because there is nothing we can do about it. 365 let _ = kill_evt.write(1); 366 } 367 } 368 } 369 370 impl VirtioDevice for Pmem { 371 fn device_type(&self) -> u32 { 372 self.common.device_type 373 } 374 375 fn queue_max_sizes(&self) -> &[u16] { 376 &self.common.queue_sizes 377 } 378 379 fn features(&self) -> u64 { 380 self.common.avail_features 381 } 382 383 fn ack_features(&mut self, value: u64) { 384 self.common.ack_features(value) 385 } 386 387 fn read_config(&self, offset: u64, data: &mut [u8]) { 388 self.read_config_from_slice(self.config.as_slice(), offset, data); 389 } 390 391 fn activate( 392 &mut self, 393 mem: GuestMemoryAtomic<GuestMemoryMmap>, 394 interrupt_cb: Arc<dyn VirtioInterrupt>, 395 mut queues: Vec<(usize, Queue, EventFd)>, 396 ) -> ActivateResult { 397 self.common.activate(&queues, &interrupt_cb)?; 398 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 399 if let Some(disk) = self.disk.as_ref() { 400 let disk = disk.try_clone().map_err(|e| { 401 error!("failed cloning pmem disk: {}", e); 402 ActivateError::BadActivate 403 })?; 404 405 let (_, queue, queue_evt) = queues.remove(0); 406 407 let mut handler = PmemEpollHandler { 408 mem, 409 queue, 410 disk, 411 interrupt_cb, 412 queue_evt, 413 kill_evt, 414 pause_evt, 415 access_platform: self.common.access_platform.clone(), 416 }; 417 418 let paused = self.common.paused.clone(); 419 let paused_sync = self.common.paused_sync.clone(); 420 let mut epoll_threads = Vec::new(); 421 422 spawn_virtio_thread( 423 &self.id, 424 &self.seccomp_action, 425 Thread::VirtioPmem, 426 &mut epoll_threads, 427 &self.exit_evt, 428 move || handler.run(paused, paused_sync.unwrap()), 429 )?; 430 431 self.common.epoll_threads = Some(epoll_threads); 432 433 event!("virtio-device", "activated", "id", &self.id); 434 return Ok(()); 435 } 436 Err(ActivateError::BadActivate) 437 } 438 439 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 440 let result = self.common.reset(); 441 event!("virtio-device", "reset", "id", &self.id); 442 result 443 } 444 445 fn userspace_mappings(&self) -> Vec<UserspaceMapping> { 446 vec![self.mapping.clone()] 447 } 448 449 fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) { 450 self.common.set_access_platform(access_platform) 451 } 452 } 453 454 impl Pausable for Pmem { 455 fn pause(&mut self) -> result::Result<(), MigratableError> { 456 self.common.pause() 457 } 458 459 fn resume(&mut self) -> result::Result<(), MigratableError> { 460 self.common.resume() 461 } 462 } 463 464 impl Snapshottable for Pmem { 465 fn id(&self) -> String { 466 self.id.clone() 467 } 468 469 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 470 Snapshot::new_from_versioned_state(&self.state()) 471 } 472 } 473 474 impl Transportable for Pmem {} 475 impl Migratable for Pmem {} 476