1 // Copyright 2019 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use std::fs::File; 10 use std::io; 11 use std::mem::size_of; 12 use std::os::unix::io::AsRawFd; 13 use std::result; 14 use std::sync::atomic::AtomicBool; 15 use std::sync::{Arc, Barrier}; 16 17 use anyhow::anyhow; 18 use seccompiler::SeccompAction; 19 use serde::{Deserialize, Serialize}; 20 use thiserror::Error; 21 use virtio_queue::{DescriptorChain, Queue, QueueT}; 22 use vm_memory::{ 23 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 24 GuestMemoryError, GuestMemoryLoadGuard, 25 }; 26 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 27 use vm_virtio::{AccessPlatform, Translatable}; 28 use vmm_sys_util::eventfd::EventFd; 29 30 use super::Error as DeviceError; 31 use super::{ 32 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, 33 UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, 34 VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, 35 }; 36 use crate::seccomp_filters::Thread; 37 use crate::thread_helper::spawn_virtio_thread; 38 use crate::{GuestMemoryMmap, MmapRegion}; 39 use crate::{VirtioInterrupt, VirtioInterruptType}; 40 41 const QUEUE_SIZE: u16 = 256; 42 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 43 44 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; 45 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0; 46 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1; 47 48 // New descriptors are pending on the virtio queue. 49 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 50 51 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)] 52 #[repr(C)] 53 struct VirtioPmemConfig { 54 start: u64, 55 size: u64, 56 } 57 58 // SAFETY: it only has data and has no implicit padding. 59 unsafe impl ByteValued for VirtioPmemConfig {} 60 61 #[derive(Copy, Clone, Debug, Default)] 62 #[repr(C)] 63 struct VirtioPmemReq { 64 type_: u32, 65 } 66 67 // SAFETY: it only has data and has no implicit padding. 68 unsafe impl ByteValued for VirtioPmemReq {} 69 70 #[derive(Copy, Clone, Debug, Default)] 71 #[repr(C)] 72 struct VirtioPmemResp { 73 ret: u32, 74 } 75 76 // SAFETY: it only has data and has no implicit padding. 77 unsafe impl ByteValued for VirtioPmemResp {} 78 79 #[derive(Error, Debug)] 80 enum Error { 81 #[error("Bad guest memory addresses: {0}")] 82 GuestMemory(GuestMemoryError), 83 #[error("Unexpected write-only descriptor")] 84 UnexpectedWriteOnlyDescriptor, 85 #[error("Unexpected read-only descriptor")] 86 UnexpectedReadOnlyDescriptor, 87 #[error("Descriptor chain too short")] 88 DescriptorChainTooShort, 89 #[error("Buffer length too small")] 90 BufferLengthTooSmall, 91 #[error("Invalid request")] 92 InvalidRequest, 93 #[error("Failed adding used index: {0}")] 94 QueueAddUsed(virtio_queue::Error), 95 } 96 97 #[derive(Debug, PartialEq, Eq)] 98 enum RequestType { 99 Flush, 100 } 101 102 struct Request { 103 type_: RequestType, 104 status_addr: GuestAddress, 105 } 106 107 impl Request { 108 fn parse( 109 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 110 access_platform: Option<&Arc<dyn AccessPlatform>>, 111 ) -> result::Result<Request, Error> { 112 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 113 // The descriptor contains the request type which MUST be readable. 114 if desc.is_write_only() { 115 return Err(Error::UnexpectedWriteOnlyDescriptor); 116 } 117 118 if desc.len() as usize != size_of::<VirtioPmemReq>() { 119 return Err(Error::InvalidRequest); 120 } 121 122 let request: VirtioPmemReq = desc_chain 123 .memory() 124 .read_obj( 125 desc.addr() 126 .translate_gva(access_platform, desc.len() as usize), 127 ) 128 .map_err(Error::GuestMemory)?; 129 130 let request_type = match request.type_ { 131 VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush, 132 _ => return Err(Error::InvalidRequest), 133 }; 134 135 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 136 137 // The status MUST always be writable 138 if !status_desc.is_write_only() { 139 return Err(Error::UnexpectedReadOnlyDescriptor); 140 } 141 142 if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() { 143 return Err(Error::BufferLengthTooSmall); 144 } 145 146 Ok(Request { 147 type_: request_type, 148 status_addr: status_desc 149 .addr() 150 .translate_gva(access_platform, status_desc.len() as usize), 151 }) 152 } 153 } 154 155 struct PmemEpollHandler { 156 mem: GuestMemoryAtomic<GuestMemoryMmap>, 157 queue: Queue, 158 disk: File, 159 interrupt_cb: Arc<dyn VirtioInterrupt>, 160 queue_evt: EventFd, 161 kill_evt: EventFd, 162 pause_evt: EventFd, 163 access_platform: Option<Arc<dyn AccessPlatform>>, 164 } 165 166 impl PmemEpollHandler { 167 fn process_queue(&mut self) -> result::Result<bool, Error> { 168 let mut used_descs = false; 169 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 170 let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) { 171 Ok(ref req) if (req.type_ == RequestType::Flush) => { 172 let status_code = match self.disk.sync_all() { 173 Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, 174 Err(e) => { 175 error!("failed flushing disk image: {}", e); 176 VIRTIO_PMEM_RESP_TYPE_EIO 177 } 178 }; 179 180 let resp = VirtioPmemResp { ret: status_code }; 181 match desc_chain.memory().write_obj(resp, req.status_addr) { 182 Ok(_) => size_of::<VirtioPmemResp>() as u32, 183 Err(e) => { 184 error!("bad guest memory address: {}", e); 185 0 186 } 187 } 188 } 189 Ok(ref req) => { 190 // Currently, there is only one virtio-pmem request, FLUSH. 191 error!("Invalid virtio request type {:?}", req.type_); 192 0 193 } 194 Err(e) => { 195 error!("Failed to parse available descriptor chain: {:?}", e); 196 0 197 } 198 }; 199 200 self.queue 201 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 202 .map_err(Error::QueueAddUsed)?; 203 used_descs = true; 204 } 205 206 Ok(used_descs) 207 } 208 209 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 210 self.interrupt_cb 211 .trigger(VirtioInterruptType::Queue(0)) 212 .map_err(|e| { 213 error!("Failed to signal used queue: {:?}", e); 214 DeviceError::FailedSignalingUsedQueue(e) 215 }) 216 } 217 218 fn run( 219 &mut self, 220 paused: Arc<AtomicBool>, 221 paused_sync: Arc<Barrier>, 222 ) -> result::Result<(), EpollHelperError> { 223 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 224 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 225 helper.run(paused, paused_sync, self)?; 226 227 Ok(()) 228 } 229 } 230 231 impl EpollHelperHandler for PmemEpollHandler { 232 fn handle_event( 233 &mut self, 234 _helper: &mut EpollHelper, 235 event: &epoll::Event, 236 ) -> result::Result<(), EpollHelperError> { 237 let ev_type = event.data as u16; 238 match ev_type { 239 QUEUE_AVAIL_EVENT => { 240 self.queue_evt.read().map_err(|e| { 241 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 242 })?; 243 244 let needs_notification = self.process_queue().map_err(|e| { 245 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 246 })?; 247 248 if needs_notification { 249 self.signal_used_queue().map_err(|e| { 250 EpollHelperError::HandleEvent(anyhow!( 251 "Failed to signal used queue: {:?}", 252 e 253 )) 254 })?; 255 } 256 } 257 _ => { 258 return Err(EpollHelperError::HandleEvent(anyhow!( 259 "Unexpected event: {}", 260 ev_type 261 ))); 262 } 263 } 264 Ok(()) 265 } 266 } 267 268 pub struct Pmem { 269 common: VirtioCommon, 270 id: String, 271 disk: Option<File>, 272 config: VirtioPmemConfig, 273 mapping: UserspaceMapping, 274 seccomp_action: SeccompAction, 275 exit_evt: EventFd, 276 277 // Hold ownership of the memory that is allocated for the device 278 // which will be automatically dropped when the device is dropped 279 _region: MmapRegion, 280 } 281 282 #[derive(Serialize, Deserialize)] 283 pub struct PmemState { 284 avail_features: u64, 285 acked_features: u64, 286 config: VirtioPmemConfig, 287 } 288 289 impl Pmem { 290 #[allow(clippy::too_many_arguments)] 291 pub fn new( 292 id: String, 293 disk: File, 294 addr: GuestAddress, 295 mapping: UserspaceMapping, 296 _region: MmapRegion, 297 iommu: bool, 298 seccomp_action: SeccompAction, 299 exit_evt: EventFd, 300 state: Option<PmemState>, 301 ) -> io::Result<Pmem> { 302 let (avail_features, acked_features, config, paused) = if let Some(state) = state { 303 info!("Restoring virtio-pmem {}", id); 304 ( 305 state.avail_features, 306 state.acked_features, 307 state.config, 308 true, 309 ) 310 } else { 311 let config = VirtioPmemConfig { 312 start: addr.raw_value().to_le(), 313 size: (_region.size() as u64).to_le(), 314 }; 315 316 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 317 318 if iommu { 319 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 320 } 321 (avail_features, 0, config, false) 322 }; 323 324 Ok(Pmem { 325 common: VirtioCommon { 326 device_type: VirtioDeviceType::Pmem as u32, 327 queue_sizes: QUEUE_SIZES.to_vec(), 328 paused_sync: Some(Arc::new(Barrier::new(2))), 329 avail_features, 330 acked_features, 331 min_queues: 1, 332 paused: Arc::new(AtomicBool::new(paused)), 333 ..Default::default() 334 }, 335 id, 336 disk: Some(disk), 337 config, 338 mapping, 339 seccomp_action, 340 _region, 341 exit_evt, 342 }) 343 } 344 345 fn state(&self) -> PmemState { 346 PmemState { 347 avail_features: self.common.avail_features, 348 acked_features: self.common.acked_features, 349 config: self.config, 350 } 351 } 352 353 #[cfg(fuzzing)] 354 pub fn wait_for_epoll_threads(&mut self) { 355 self.common.wait_for_epoll_threads(); 356 } 357 } 358 359 impl Drop for Pmem { 360 fn drop(&mut self) { 361 if let Some(kill_evt) = self.common.kill_evt.take() { 362 // Ignore the result because there is nothing we can do about it. 363 let _ = kill_evt.write(1); 364 } 365 self.common.wait_for_epoll_threads(); 366 } 367 } 368 369 impl VirtioDevice for Pmem { 370 fn device_type(&self) -> u32 { 371 self.common.device_type 372 } 373 374 fn queue_max_sizes(&self) -> &[u16] { 375 &self.common.queue_sizes 376 } 377 378 fn features(&self) -> u64 { 379 self.common.avail_features 380 } 381 382 fn ack_features(&mut self, value: u64) { 383 self.common.ack_features(value) 384 } 385 386 fn read_config(&self, offset: u64, data: &mut [u8]) { 387 self.read_config_from_slice(self.config.as_slice(), offset, data); 388 } 389 390 fn activate( 391 &mut self, 392 mem: GuestMemoryAtomic<GuestMemoryMmap>, 393 interrupt_cb: Arc<dyn VirtioInterrupt>, 394 mut queues: Vec<(usize, Queue, EventFd)>, 395 ) -> ActivateResult { 396 self.common.activate(&queues, &interrupt_cb)?; 397 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 398 if let Some(disk) = self.disk.as_ref() { 399 let disk = disk.try_clone().map_err(|e| { 400 error!("failed cloning pmem disk: {}", e); 401 ActivateError::BadActivate 402 })?; 403 404 let (_, queue, queue_evt) = queues.remove(0); 405 406 let mut handler = PmemEpollHandler { 407 mem, 408 queue, 409 disk, 410 interrupt_cb, 411 queue_evt, 412 kill_evt, 413 pause_evt, 414 access_platform: self.common.access_platform.clone(), 415 }; 416 417 let paused = self.common.paused.clone(); 418 let paused_sync = self.common.paused_sync.clone(); 419 let mut epoll_threads = Vec::new(); 420 421 spawn_virtio_thread( 422 &self.id, 423 &self.seccomp_action, 424 Thread::VirtioPmem, 425 &mut epoll_threads, 426 &self.exit_evt, 427 move || handler.run(paused, paused_sync.unwrap()), 428 )?; 429 430 self.common.epoll_threads = Some(epoll_threads); 431 432 event!("virtio-device", "activated", "id", &self.id); 433 return Ok(()); 434 } 435 Err(ActivateError::BadActivate) 436 } 437 438 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 439 let result = self.common.reset(); 440 event!("virtio-device", "reset", "id", &self.id); 441 result 442 } 443 444 fn userspace_mappings(&self) -> Vec<UserspaceMapping> { 445 vec![self.mapping.clone()] 446 } 447 448 fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) { 449 self.common.set_access_platform(access_platform) 450 } 451 } 452 453 impl Pausable for Pmem { 454 fn pause(&mut self) -> result::Result<(), MigratableError> { 455 self.common.pause() 456 } 457 458 fn resume(&mut self) -> result::Result<(), MigratableError> { 459 self.common.resume() 460 } 461 } 462 463 impl Snapshottable for Pmem { 464 fn id(&self) -> String { 465 self.id.clone() 466 } 467 468 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 469 Snapshot::new_from_state(&self.state()) 470 } 471 } 472 473 impl Transportable for Pmem {} 474 impl Migratable for Pmem {} 475