1 // Copyright 2019 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use super::Error as DeviceError; 10 use super::{ 11 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, 12 UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, 13 VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, 14 }; 15 use crate::seccomp_filters::Thread; 16 use crate::thread_helper::spawn_virtio_thread; 17 use crate::{GuestMemoryMmap, MmapRegion}; 18 use crate::{VirtioInterrupt, VirtioInterruptType}; 19 use anyhow::anyhow; 20 use seccompiler::SeccompAction; 21 use serde::{Deserialize, Serialize}; 22 use std::fs::File; 23 use std::io; 24 use std::mem::size_of; 25 use std::os::unix::io::AsRawFd; 26 use std::result; 27 use std::sync::atomic::AtomicBool; 28 use std::sync::{Arc, Barrier}; 29 use thiserror::Error; 30 use virtio_queue::{DescriptorChain, Queue, QueueT}; 31 use vm_memory::{ 32 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 33 GuestMemoryError, GuestMemoryLoadGuard, 34 }; 35 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 36 use vm_virtio::{AccessPlatform, Translatable}; 37 use vmm_sys_util::eventfd::EventFd; 38 39 const QUEUE_SIZE: u16 = 256; 40 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 41 42 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; 43 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0; 44 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1; 45 46 // New descriptors are pending on the virtio queue. 47 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 48 49 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)] 50 #[repr(C)] 51 struct VirtioPmemConfig { 52 start: u64, 53 size: u64, 54 } 55 56 // SAFETY: it only has data and has no implicit padding. 57 unsafe impl ByteValued for VirtioPmemConfig {} 58 59 #[derive(Copy, Clone, Debug, Default)] 60 #[repr(C)] 61 struct VirtioPmemReq { 62 type_: u32, 63 } 64 65 // SAFETY: it only has data and has no implicit padding. 66 unsafe impl ByteValued for VirtioPmemReq {} 67 68 #[derive(Copy, Clone, Debug, Default)] 69 #[repr(C)] 70 struct VirtioPmemResp { 71 ret: u32, 72 } 73 74 // SAFETY: it only has data and has no implicit padding. 75 unsafe impl ByteValued for VirtioPmemResp {} 76 77 #[derive(Error, Debug)] 78 enum Error { 79 #[error("Bad guest memory addresses: {0}")] 80 GuestMemory(GuestMemoryError), 81 #[error("Unexpected write-only descriptor")] 82 UnexpectedWriteOnlyDescriptor, 83 #[error("Unexpected read-only descriptor")] 84 UnexpectedReadOnlyDescriptor, 85 #[error("Descriptor chain too short")] 86 DescriptorChainTooShort, 87 #[error("Buffer length too small")] 88 BufferLengthTooSmall, 89 #[error("Invalid request")] 90 InvalidRequest, 91 #[error("Failed adding used index: {0}")] 92 QueueAddUsed(virtio_queue::Error), 93 } 94 95 #[derive(Debug, PartialEq, Eq)] 96 enum RequestType { 97 Flush, 98 } 99 100 struct Request { 101 type_: RequestType, 102 status_addr: GuestAddress, 103 } 104 105 impl Request { 106 fn parse( 107 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 108 access_platform: Option<&Arc<dyn AccessPlatform>>, 109 ) -> result::Result<Request, Error> { 110 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 111 // The descriptor contains the request type which MUST be readable. 112 if desc.is_write_only() { 113 return Err(Error::UnexpectedWriteOnlyDescriptor); 114 } 115 116 if desc.len() as usize != size_of::<VirtioPmemReq>() { 117 return Err(Error::InvalidRequest); 118 } 119 120 let request: VirtioPmemReq = desc_chain 121 .memory() 122 .read_obj( 123 desc.addr() 124 .translate_gva(access_platform, desc.len() as usize), 125 ) 126 .map_err(Error::GuestMemory)?; 127 128 let request_type = match request.type_ { 129 VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush, 130 _ => return Err(Error::InvalidRequest), 131 }; 132 133 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 134 135 // The status MUST always be writable 136 if !status_desc.is_write_only() { 137 return Err(Error::UnexpectedReadOnlyDescriptor); 138 } 139 140 if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() { 141 return Err(Error::BufferLengthTooSmall); 142 } 143 144 Ok(Request { 145 type_: request_type, 146 status_addr: status_desc 147 .addr() 148 .translate_gva(access_platform, status_desc.len() as usize), 149 }) 150 } 151 } 152 153 struct PmemEpollHandler { 154 mem: GuestMemoryAtomic<GuestMemoryMmap>, 155 queue: Queue, 156 disk: File, 157 interrupt_cb: Arc<dyn VirtioInterrupt>, 158 queue_evt: EventFd, 159 kill_evt: EventFd, 160 pause_evt: EventFd, 161 access_platform: Option<Arc<dyn AccessPlatform>>, 162 } 163 164 impl PmemEpollHandler { 165 fn process_queue(&mut self) -> result::Result<bool, Error> { 166 let mut used_descs = false; 167 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 168 let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) { 169 Ok(ref req) if (req.type_ == RequestType::Flush) => { 170 let status_code = match self.disk.sync_all() { 171 Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, 172 Err(e) => { 173 error!("failed flushing disk image: {}", e); 174 VIRTIO_PMEM_RESP_TYPE_EIO 175 } 176 }; 177 178 let resp = VirtioPmemResp { ret: status_code }; 179 match desc_chain.memory().write_obj(resp, req.status_addr) { 180 Ok(_) => size_of::<VirtioPmemResp>() as u32, 181 Err(e) => { 182 error!("bad guest memory address: {}", e); 183 0 184 } 185 } 186 } 187 Ok(ref req) => { 188 // Currently, there is only one virtio-pmem request, FLUSH. 189 error!("Invalid virtio request type {:?}", req.type_); 190 0 191 } 192 Err(e) => { 193 error!("Failed to parse available descriptor chain: {:?}", e); 194 0 195 } 196 }; 197 198 self.queue 199 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 200 .map_err(Error::QueueAddUsed)?; 201 used_descs = true; 202 } 203 204 Ok(used_descs) 205 } 206 207 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 208 self.interrupt_cb 209 .trigger(VirtioInterruptType::Queue(0)) 210 .map_err(|e| { 211 error!("Failed to signal used queue: {:?}", e); 212 DeviceError::FailedSignalingUsedQueue(e) 213 }) 214 } 215 216 fn run( 217 &mut self, 218 paused: Arc<AtomicBool>, 219 paused_sync: Arc<Barrier>, 220 ) -> result::Result<(), EpollHelperError> { 221 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 222 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 223 helper.run(paused, paused_sync, self)?; 224 225 Ok(()) 226 } 227 } 228 229 impl EpollHelperHandler for PmemEpollHandler { 230 fn handle_event( 231 &mut self, 232 _helper: &mut EpollHelper, 233 event: &epoll::Event, 234 ) -> result::Result<(), EpollHelperError> { 235 let ev_type = event.data as u16; 236 match ev_type { 237 QUEUE_AVAIL_EVENT => { 238 self.queue_evt.read().map_err(|e| { 239 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 240 })?; 241 242 let needs_notification = self.process_queue().map_err(|e| { 243 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 244 })?; 245 246 if needs_notification { 247 self.signal_used_queue().map_err(|e| { 248 EpollHelperError::HandleEvent(anyhow!( 249 "Failed to signal used queue: {:?}", 250 e 251 )) 252 })?; 253 } 254 } 255 _ => { 256 return Err(EpollHelperError::HandleEvent(anyhow!( 257 "Unexpected event: {}", 258 ev_type 259 ))); 260 } 261 } 262 Ok(()) 263 } 264 } 265 266 pub struct Pmem { 267 common: VirtioCommon, 268 id: String, 269 disk: Option<File>, 270 config: VirtioPmemConfig, 271 mapping: UserspaceMapping, 272 seccomp_action: SeccompAction, 273 exit_evt: EventFd, 274 275 // Hold ownership of the memory that is allocated for the device 276 // which will be automatically dropped when the device is dropped 277 _region: MmapRegion, 278 } 279 280 #[derive(Serialize, Deserialize)] 281 pub struct PmemState { 282 avail_features: u64, 283 acked_features: u64, 284 config: VirtioPmemConfig, 285 } 286 287 impl Pmem { 288 #[allow(clippy::too_many_arguments)] 289 pub fn new( 290 id: String, 291 disk: File, 292 addr: GuestAddress, 293 mapping: UserspaceMapping, 294 _region: MmapRegion, 295 iommu: bool, 296 seccomp_action: SeccompAction, 297 exit_evt: EventFd, 298 state: Option<PmemState>, 299 ) -> io::Result<Pmem> { 300 let (avail_features, acked_features, config, paused) = if let Some(state) = state { 301 info!("Restoring virtio-pmem {}", id); 302 ( 303 state.avail_features, 304 state.acked_features, 305 state.config, 306 true, 307 ) 308 } else { 309 let config = VirtioPmemConfig { 310 start: addr.raw_value().to_le(), 311 size: (_region.size() as u64).to_le(), 312 }; 313 314 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 315 316 if iommu { 317 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 318 } 319 (avail_features, 0, config, false) 320 }; 321 322 Ok(Pmem { 323 common: VirtioCommon { 324 device_type: VirtioDeviceType::Pmem as u32, 325 queue_sizes: QUEUE_SIZES.to_vec(), 326 paused_sync: Some(Arc::new(Barrier::new(2))), 327 avail_features, 328 acked_features, 329 min_queues: 1, 330 paused: Arc::new(AtomicBool::new(paused)), 331 ..Default::default() 332 }, 333 id, 334 disk: Some(disk), 335 config, 336 mapping, 337 seccomp_action, 338 _region, 339 exit_evt, 340 }) 341 } 342 343 fn state(&self) -> PmemState { 344 PmemState { 345 avail_features: self.common.avail_features, 346 acked_features: self.common.acked_features, 347 config: self.config, 348 } 349 } 350 351 #[cfg(fuzzing)] 352 pub fn wait_for_epoll_threads(&mut self) { 353 self.common.wait_for_epoll_threads(); 354 } 355 } 356 357 impl Drop for Pmem { 358 fn drop(&mut self) { 359 if let Some(kill_evt) = self.common.kill_evt.take() { 360 // Ignore the result because there is nothing we can do about it. 361 let _ = kill_evt.write(1); 362 } 363 self.common.wait_for_epoll_threads(); 364 } 365 } 366 367 impl VirtioDevice for Pmem { 368 fn device_type(&self) -> u32 { 369 self.common.device_type 370 } 371 372 fn queue_max_sizes(&self) -> &[u16] { 373 &self.common.queue_sizes 374 } 375 376 fn features(&self) -> u64 { 377 self.common.avail_features 378 } 379 380 fn ack_features(&mut self, value: u64) { 381 self.common.ack_features(value) 382 } 383 384 fn read_config(&self, offset: u64, data: &mut [u8]) { 385 self.read_config_from_slice(self.config.as_slice(), offset, data); 386 } 387 388 fn activate( 389 &mut self, 390 mem: GuestMemoryAtomic<GuestMemoryMmap>, 391 interrupt_cb: Arc<dyn VirtioInterrupt>, 392 mut queues: Vec<(usize, Queue, EventFd)>, 393 ) -> ActivateResult { 394 self.common.activate(&queues, &interrupt_cb)?; 395 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 396 if let Some(disk) = self.disk.as_ref() { 397 let disk = disk.try_clone().map_err(|e| { 398 error!("failed cloning pmem disk: {}", e); 399 ActivateError::BadActivate 400 })?; 401 402 let (_, queue, queue_evt) = queues.remove(0); 403 404 let mut handler = PmemEpollHandler { 405 mem, 406 queue, 407 disk, 408 interrupt_cb, 409 queue_evt, 410 kill_evt, 411 pause_evt, 412 access_platform: self.common.access_platform.clone(), 413 }; 414 415 let paused = self.common.paused.clone(); 416 let paused_sync = self.common.paused_sync.clone(); 417 let mut epoll_threads = Vec::new(); 418 419 spawn_virtio_thread( 420 &self.id, 421 &self.seccomp_action, 422 Thread::VirtioPmem, 423 &mut epoll_threads, 424 &self.exit_evt, 425 move || handler.run(paused, paused_sync.unwrap()), 426 )?; 427 428 self.common.epoll_threads = Some(epoll_threads); 429 430 event!("virtio-device", "activated", "id", &self.id); 431 return Ok(()); 432 } 433 Err(ActivateError::BadActivate) 434 } 435 436 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 437 let result = self.common.reset(); 438 event!("virtio-device", "reset", "id", &self.id); 439 result 440 } 441 442 fn userspace_mappings(&self) -> Vec<UserspaceMapping> { 443 vec![self.mapping.clone()] 444 } 445 446 fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) { 447 self.common.set_access_platform(access_platform) 448 } 449 } 450 451 impl Pausable for Pmem { 452 fn pause(&mut self) -> result::Result<(), MigratableError> { 453 self.common.pause() 454 } 455 456 fn resume(&mut self) -> result::Result<(), MigratableError> { 457 self.common.resume() 458 } 459 } 460 461 impl Snapshottable for Pmem { 462 fn id(&self) -> String { 463 self.id.clone() 464 } 465 466 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 467 Snapshot::new_from_state(&self.state()) 468 } 469 } 470 471 impl Transportable for Pmem {} 472 impl Migratable for Pmem {} 473