1 // Copyright 2019 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use super::Error as DeviceError; 10 use super::{ 11 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, 12 UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, 13 VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, 14 }; 15 use crate::seccomp_filters::Thread; 16 use crate::thread_helper::spawn_virtio_thread; 17 use crate::{GuestMemoryMmap, MmapRegion}; 18 use crate::{VirtioInterrupt, VirtioInterruptType}; 19 use anyhow::anyhow; 20 use seccompiler::SeccompAction; 21 use std::fs::File; 22 use std::io; 23 use std::mem::size_of; 24 use std::os::unix::io::AsRawFd; 25 use std::result; 26 use std::sync::atomic::AtomicBool; 27 use std::sync::{Arc, Barrier}; 28 use thiserror::Error; 29 use versionize::{VersionMap, Versionize, VersionizeResult}; 30 use versionize_derive::Versionize; 31 use virtio_queue::{DescriptorChain, Queue, QueueT}; 32 use vm_memory::{ 33 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 34 GuestMemoryError, GuestMemoryLoadGuard, 35 }; 36 use vm_migration::VersionMapped; 37 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 38 use vm_virtio::{AccessPlatform, Translatable}; 39 use vmm_sys_util::eventfd::EventFd; 40 41 const QUEUE_SIZE: u16 = 256; 42 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 43 44 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; 45 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0; 46 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1; 47 48 // New descriptors are pending on the virtio queue. 49 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 50 51 #[derive(Copy, Clone, Debug, Default, Versionize)] 52 #[repr(C)] 53 struct VirtioPmemConfig { 54 start: u64, 55 size: u64, 56 } 57 58 // SAFETY: it only has data and has no implicit padding. 59 unsafe impl ByteValued for VirtioPmemConfig {} 60 61 #[derive(Copy, Clone, Debug, Default)] 62 #[repr(C)] 63 struct VirtioPmemReq { 64 type_: u32, 65 } 66 67 // SAFETY: it only has data and has no implicit padding. 68 unsafe impl ByteValued for VirtioPmemReq {} 69 70 #[derive(Copy, Clone, Debug, Default)] 71 #[repr(C)] 72 struct VirtioPmemResp { 73 ret: u32, 74 } 75 76 // SAFETY: it only has data and has no implicit padding. 77 unsafe impl ByteValued for VirtioPmemResp {} 78 79 #[derive(Error, Debug)] 80 enum Error { 81 #[error("Bad guest memory addresses: {0}")] 82 GuestMemory(GuestMemoryError), 83 #[error("Unexpected write-only descriptor")] 84 UnexpectedWriteOnlyDescriptor, 85 #[error("Unexpected read-only descriptor")] 86 UnexpectedReadOnlyDescriptor, 87 #[error("Descriptor chain too short")] 88 DescriptorChainTooShort, 89 #[error("Buffer length too small")] 90 BufferLengthTooSmall, 91 #[error("Invalid request")] 92 InvalidRequest, 93 #[error("Failed adding used index: {0}")] 94 QueueAddUsed(virtio_queue::Error), 95 } 96 97 #[derive(Debug, PartialEq, Eq)] 98 enum RequestType { 99 Flush, 100 } 101 102 struct Request { 103 type_: RequestType, 104 status_addr: GuestAddress, 105 } 106 107 impl Request { 108 fn parse( 109 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 110 access_platform: Option<&Arc<dyn AccessPlatform>>, 111 ) -> result::Result<Request, Error> { 112 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 113 // The descriptor contains the request type which MUST be readable. 114 if desc.is_write_only() { 115 return Err(Error::UnexpectedWriteOnlyDescriptor); 116 } 117 118 if desc.len() as usize != size_of::<VirtioPmemReq>() { 119 return Err(Error::InvalidRequest); 120 } 121 122 let request: VirtioPmemReq = desc_chain 123 .memory() 124 .read_obj( 125 desc.addr() 126 .translate_gva(access_platform, desc.len() as usize), 127 ) 128 .map_err(Error::GuestMemory)?; 129 130 let request_type = match request.type_ { 131 VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush, 132 _ => return Err(Error::InvalidRequest), 133 }; 134 135 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 136 137 // The status MUST always be writable 138 if !status_desc.is_write_only() { 139 return Err(Error::UnexpectedReadOnlyDescriptor); 140 } 141 142 if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() { 143 return Err(Error::BufferLengthTooSmall); 144 } 145 146 Ok(Request { 147 type_: request_type, 148 status_addr: status_desc 149 .addr() 150 .translate_gva(access_platform, status_desc.len() as usize), 151 }) 152 } 153 } 154 155 struct PmemEpollHandler { 156 mem: GuestMemoryAtomic<GuestMemoryMmap>, 157 queue: Queue, 158 disk: File, 159 interrupt_cb: Arc<dyn VirtioInterrupt>, 160 queue_evt: EventFd, 161 kill_evt: EventFd, 162 pause_evt: EventFd, 163 access_platform: Option<Arc<dyn AccessPlatform>>, 164 } 165 166 impl PmemEpollHandler { 167 fn process_queue(&mut self) -> result::Result<bool, Error> { 168 let mut used_descs = false; 169 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 170 let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) { 171 Ok(ref req) if (req.type_ == RequestType::Flush) => { 172 let status_code = match self.disk.sync_all() { 173 Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, 174 Err(e) => { 175 error!("failed flushing disk image: {}", e); 176 VIRTIO_PMEM_RESP_TYPE_EIO 177 } 178 }; 179 180 let resp = VirtioPmemResp { ret: status_code }; 181 match desc_chain.memory().write_obj(resp, req.status_addr) { 182 Ok(_) => size_of::<VirtioPmemResp>() as u32, 183 Err(e) => { 184 error!("bad guest memory address: {}", e); 185 0 186 } 187 } 188 } 189 Ok(ref req) => { 190 // Currently, there is only one virtio-pmem request, FLUSH. 191 error!("Invalid virtio request type {:?}", req.type_); 192 0 193 } 194 Err(e) => { 195 error!("Failed to parse available descriptor chain: {:?}", e); 196 0 197 } 198 }; 199 200 self.queue 201 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 202 .map_err(Error::QueueAddUsed)?; 203 used_descs = true; 204 } 205 206 Ok(used_descs) 207 } 208 209 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 210 self.interrupt_cb 211 .trigger(VirtioInterruptType::Queue(0)) 212 .map_err(|e| { 213 error!("Failed to signal used queue: {:?}", e); 214 DeviceError::FailedSignalingUsedQueue(e) 215 }) 216 } 217 218 fn run( 219 &mut self, 220 paused: Arc<AtomicBool>, 221 paused_sync: Arc<Barrier>, 222 ) -> result::Result<(), EpollHelperError> { 223 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 224 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 225 helper.run(paused, paused_sync, self)?; 226 227 Ok(()) 228 } 229 } 230 231 impl EpollHelperHandler for PmemEpollHandler { 232 fn handle_event( 233 &mut self, 234 _helper: &mut EpollHelper, 235 event: &epoll::Event, 236 ) -> result::Result<(), EpollHelperError> { 237 let ev_type = event.data as u16; 238 match ev_type { 239 QUEUE_AVAIL_EVENT => { 240 self.queue_evt.read().map_err(|e| { 241 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 242 })?; 243 244 let needs_notification = self.process_queue().map_err(|e| { 245 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 246 })?; 247 248 if needs_notification { 249 self.signal_used_queue().map_err(|e| { 250 EpollHelperError::HandleEvent(anyhow!( 251 "Failed to signal used queue: {:?}", 252 e 253 )) 254 })?; 255 } 256 } 257 _ => { 258 return Err(EpollHelperError::HandleEvent(anyhow!( 259 "Unexpected event: {}", 260 ev_type 261 ))); 262 } 263 } 264 Ok(()) 265 } 266 } 267 268 pub struct Pmem { 269 common: VirtioCommon, 270 id: String, 271 disk: Option<File>, 272 config: VirtioPmemConfig, 273 mapping: UserspaceMapping, 274 seccomp_action: SeccompAction, 275 exit_evt: EventFd, 276 277 // Hold ownership of the memory that is allocated for the device 278 // which will be automatically dropped when the device is dropped 279 _region: MmapRegion, 280 } 281 282 #[derive(Versionize)] 283 pub struct PmemState { 284 avail_features: u64, 285 acked_features: u64, 286 config: VirtioPmemConfig, 287 } 288 289 impl VersionMapped for PmemState {} 290 291 impl Pmem { 292 #[allow(clippy::too_many_arguments)] 293 pub fn new( 294 id: String, 295 disk: File, 296 addr: GuestAddress, 297 mapping: UserspaceMapping, 298 _region: MmapRegion, 299 iommu: bool, 300 seccomp_action: SeccompAction, 301 exit_evt: EventFd, 302 state: Option<PmemState>, 303 ) -> io::Result<Pmem> { 304 let (avail_features, acked_features, config) = if let Some(state) = state { 305 info!("Restoring virtio-pmem {}", id); 306 (state.avail_features, state.acked_features, state.config) 307 } else { 308 let config = VirtioPmemConfig { 309 start: addr.raw_value().to_le(), 310 size: (_region.size() as u64).to_le(), 311 }; 312 313 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 314 315 if iommu { 316 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 317 } 318 (avail_features, 0, config) 319 }; 320 321 Ok(Pmem { 322 common: VirtioCommon { 323 device_type: VirtioDeviceType::Pmem as u32, 324 queue_sizes: QUEUE_SIZES.to_vec(), 325 paused_sync: Some(Arc::new(Barrier::new(2))), 326 avail_features, 327 acked_features, 328 min_queues: 1, 329 ..Default::default() 330 }, 331 id, 332 disk: Some(disk), 333 config, 334 mapping, 335 seccomp_action, 336 _region, 337 exit_evt, 338 }) 339 } 340 341 fn state(&self) -> PmemState { 342 PmemState { 343 avail_features: self.common.avail_features, 344 acked_features: self.common.acked_features, 345 config: self.config, 346 } 347 } 348 349 #[cfg(fuzzing)] 350 pub fn wait_for_epoll_threads(&mut self) { 351 self.common.wait_for_epoll_threads(); 352 } 353 } 354 355 impl Drop for Pmem { 356 fn drop(&mut self) { 357 if let Some(kill_evt) = self.common.kill_evt.take() { 358 // Ignore the result because there is nothing we can do about it. 359 let _ = kill_evt.write(1); 360 } 361 } 362 } 363 364 impl VirtioDevice for Pmem { 365 fn device_type(&self) -> u32 { 366 self.common.device_type 367 } 368 369 fn queue_max_sizes(&self) -> &[u16] { 370 &self.common.queue_sizes 371 } 372 373 fn features(&self) -> u64 { 374 self.common.avail_features 375 } 376 377 fn ack_features(&mut self, value: u64) { 378 self.common.ack_features(value) 379 } 380 381 fn read_config(&self, offset: u64, data: &mut [u8]) { 382 self.read_config_from_slice(self.config.as_slice(), offset, data); 383 } 384 385 fn activate( 386 &mut self, 387 mem: GuestMemoryAtomic<GuestMemoryMmap>, 388 interrupt_cb: Arc<dyn VirtioInterrupt>, 389 mut queues: Vec<(usize, Queue, EventFd)>, 390 ) -> ActivateResult { 391 self.common.activate(&queues, &interrupt_cb)?; 392 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 393 if let Some(disk) = self.disk.as_ref() { 394 let disk = disk.try_clone().map_err(|e| { 395 error!("failed cloning pmem disk: {}", e); 396 ActivateError::BadActivate 397 })?; 398 399 let (_, queue, queue_evt) = queues.remove(0); 400 401 let mut handler = PmemEpollHandler { 402 mem, 403 queue, 404 disk, 405 interrupt_cb, 406 queue_evt, 407 kill_evt, 408 pause_evt, 409 access_platform: self.common.access_platform.clone(), 410 }; 411 412 let paused = self.common.paused.clone(); 413 let paused_sync = self.common.paused_sync.clone(); 414 let mut epoll_threads = Vec::new(); 415 416 spawn_virtio_thread( 417 &self.id, 418 &self.seccomp_action, 419 Thread::VirtioPmem, 420 &mut epoll_threads, 421 &self.exit_evt, 422 move || handler.run(paused, paused_sync.unwrap()), 423 )?; 424 425 self.common.epoll_threads = Some(epoll_threads); 426 427 event!("virtio-device", "activated", "id", &self.id); 428 return Ok(()); 429 } 430 Err(ActivateError::BadActivate) 431 } 432 433 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 434 let result = self.common.reset(); 435 event!("virtio-device", "reset", "id", &self.id); 436 result 437 } 438 439 fn userspace_mappings(&self) -> Vec<UserspaceMapping> { 440 vec![self.mapping.clone()] 441 } 442 443 fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) { 444 self.common.set_access_platform(access_platform) 445 } 446 } 447 448 impl Pausable for Pmem { 449 fn pause(&mut self) -> result::Result<(), MigratableError> { 450 self.common.pause() 451 } 452 453 fn resume(&mut self) -> result::Result<(), MigratableError> { 454 self.common.resume() 455 } 456 } 457 458 impl Snapshottable for Pmem { 459 fn id(&self) -> String { 460 self.id.clone() 461 } 462 463 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 464 Snapshot::new_from_versioned_state(&self.id, &self.state()) 465 } 466 } 467 468 impl Transportable for Pmem {} 469 impl Migratable for Pmem {} 470