1 // Copyright 2019 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use std::fs::File; 10 use std::mem::size_of; 11 use std::os::unix::io::AsRawFd; 12 use std::sync::atomic::AtomicBool; 13 use std::sync::{Arc, Barrier}; 14 use std::{io, result}; 15 16 use anyhow::anyhow; 17 use seccompiler::SeccompAction; 18 use serde::{Deserialize, Serialize}; 19 use thiserror::Error; 20 use virtio_queue::{DescriptorChain, Queue, QueueT}; 21 use vm_memory::{ 22 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 23 GuestMemoryError, GuestMemoryLoadGuard, 24 }; 25 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 26 use vm_virtio::{AccessPlatform, Translatable}; 27 use vmm_sys_util::eventfd::EventFd; 28 29 use super::{ 30 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, 31 Error as DeviceError, UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, 32 EPOLL_HELPER_EVENT_LAST, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, 33 }; 34 use crate::seccomp_filters::Thread; 35 use crate::thread_helper::spawn_virtio_thread; 36 use crate::{GuestMemoryMmap, MmapRegion, VirtioInterrupt, VirtioInterruptType}; 37 38 const QUEUE_SIZE: u16 = 256; 39 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 40 41 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; 42 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0; 43 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1; 44 45 // New descriptors are pending on the virtio queue. 46 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 47 48 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)] 49 #[repr(C)] 50 struct VirtioPmemConfig { 51 start: u64, 52 size: u64, 53 } 54 55 // SAFETY: it only has data and has no implicit padding. 56 unsafe impl ByteValued for VirtioPmemConfig {} 57 58 #[derive(Copy, Clone, Debug, Default)] 59 #[repr(C)] 60 struct VirtioPmemReq { 61 type_: u32, 62 } 63 64 // SAFETY: it only has data and has no implicit padding. 65 unsafe impl ByteValued for VirtioPmemReq {} 66 67 #[derive(Copy, Clone, Debug, Default)] 68 #[repr(C)] 69 struct VirtioPmemResp { 70 ret: u32, 71 } 72 73 // SAFETY: it only has data and has no implicit padding. 74 unsafe impl ByteValued for VirtioPmemResp {} 75 76 #[derive(Error, Debug)] 77 enum Error { 78 #[error("Bad guest memory addresses: {0}")] 79 GuestMemory(GuestMemoryError), 80 #[error("Unexpected write-only descriptor")] 81 UnexpectedWriteOnlyDescriptor, 82 #[error("Unexpected read-only descriptor")] 83 UnexpectedReadOnlyDescriptor, 84 #[error("Descriptor chain too short")] 85 DescriptorChainTooShort, 86 #[error("Buffer length too small")] 87 BufferLengthTooSmall, 88 #[error("Invalid request")] 89 InvalidRequest, 90 #[error("Failed adding used index: {0}")] 91 QueueAddUsed(virtio_queue::Error), 92 } 93 94 #[derive(Debug, PartialEq, Eq)] 95 enum RequestType { 96 Flush, 97 } 98 99 struct Request { 100 type_: RequestType, 101 status_addr: GuestAddress, 102 } 103 104 impl Request { 105 fn parse( 106 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 107 access_platform: Option<&Arc<dyn AccessPlatform>>, 108 ) -> result::Result<Request, Error> { 109 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 110 // The descriptor contains the request type which MUST be readable. 111 if desc.is_write_only() { 112 return Err(Error::UnexpectedWriteOnlyDescriptor); 113 } 114 115 if desc.len() as usize != size_of::<VirtioPmemReq>() { 116 return Err(Error::InvalidRequest); 117 } 118 119 let request: VirtioPmemReq = desc_chain 120 .memory() 121 .read_obj( 122 desc.addr() 123 .translate_gva(access_platform, desc.len() as usize), 124 ) 125 .map_err(Error::GuestMemory)?; 126 127 let request_type = match request.type_ { 128 VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush, 129 _ => return Err(Error::InvalidRequest), 130 }; 131 132 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 133 134 // The status MUST always be writable 135 if !status_desc.is_write_only() { 136 return Err(Error::UnexpectedReadOnlyDescriptor); 137 } 138 139 if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() { 140 return Err(Error::BufferLengthTooSmall); 141 } 142 143 Ok(Request { 144 type_: request_type, 145 status_addr: status_desc 146 .addr() 147 .translate_gva(access_platform, status_desc.len() as usize), 148 }) 149 } 150 } 151 152 struct PmemEpollHandler { 153 mem: GuestMemoryAtomic<GuestMemoryMmap>, 154 queue: Queue, 155 disk: File, 156 interrupt_cb: Arc<dyn VirtioInterrupt>, 157 queue_evt: EventFd, 158 kill_evt: EventFd, 159 pause_evt: EventFd, 160 access_platform: Option<Arc<dyn AccessPlatform>>, 161 } 162 163 impl PmemEpollHandler { 164 fn process_queue(&mut self) -> result::Result<bool, Error> { 165 let mut used_descs = false; 166 while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) { 167 let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) { 168 Ok(ref req) if (req.type_ == RequestType::Flush) => { 169 let status_code = match self.disk.sync_all() { 170 Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, 171 Err(e) => { 172 error!("failed flushing disk image: {}", e); 173 VIRTIO_PMEM_RESP_TYPE_EIO 174 } 175 }; 176 177 let resp = VirtioPmemResp { ret: status_code }; 178 match desc_chain.memory().write_obj(resp, req.status_addr) { 179 Ok(_) => size_of::<VirtioPmemResp>() as u32, 180 Err(e) => { 181 error!("bad guest memory address: {}", e); 182 0 183 } 184 } 185 } 186 Ok(ref req) => { 187 // Currently, there is only one virtio-pmem request, FLUSH. 188 error!("Invalid virtio request type {:?}", req.type_); 189 0 190 } 191 Err(e) => { 192 error!("Failed to parse available descriptor chain: {:?}", e); 193 0 194 } 195 }; 196 197 self.queue 198 .add_used(desc_chain.memory(), desc_chain.head_index(), len) 199 .map_err(Error::QueueAddUsed)?; 200 used_descs = true; 201 } 202 203 Ok(used_descs) 204 } 205 206 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 207 self.interrupt_cb 208 .trigger(VirtioInterruptType::Queue(0)) 209 .map_err(|e| { 210 error!("Failed to signal used queue: {:?}", e); 211 DeviceError::FailedSignalingUsedQueue(e) 212 }) 213 } 214 215 fn run( 216 &mut self, 217 paused: Arc<AtomicBool>, 218 paused_sync: Arc<Barrier>, 219 ) -> result::Result<(), EpollHelperError> { 220 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 221 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 222 helper.run(paused, paused_sync, self)?; 223 224 Ok(()) 225 } 226 } 227 228 impl EpollHelperHandler for PmemEpollHandler { 229 fn handle_event( 230 &mut self, 231 _helper: &mut EpollHelper, 232 event: &epoll::Event, 233 ) -> result::Result<(), EpollHelperError> { 234 let ev_type = event.data as u16; 235 match ev_type { 236 QUEUE_AVAIL_EVENT => { 237 self.queue_evt.read().map_err(|e| { 238 EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e)) 239 })?; 240 241 let needs_notification = self.process_queue().map_err(|e| { 242 EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e)) 243 })?; 244 245 if needs_notification { 246 self.signal_used_queue().map_err(|e| { 247 EpollHelperError::HandleEvent(anyhow!( 248 "Failed to signal used queue: {:?}", 249 e 250 )) 251 })?; 252 } 253 } 254 _ => { 255 return Err(EpollHelperError::HandleEvent(anyhow!( 256 "Unexpected event: {}", 257 ev_type 258 ))); 259 } 260 } 261 Ok(()) 262 } 263 } 264 265 pub struct Pmem { 266 common: VirtioCommon, 267 id: String, 268 disk: Option<File>, 269 config: VirtioPmemConfig, 270 mapping: UserspaceMapping, 271 seccomp_action: SeccompAction, 272 exit_evt: EventFd, 273 274 // Hold ownership of the memory that is allocated for the device 275 // which will be automatically dropped when the device is dropped 276 _region: MmapRegion, 277 } 278 279 #[derive(Serialize, Deserialize)] 280 pub struct PmemState { 281 avail_features: u64, 282 acked_features: u64, 283 config: VirtioPmemConfig, 284 } 285 286 impl Pmem { 287 #[allow(clippy::too_many_arguments)] 288 pub fn new( 289 id: String, 290 disk: File, 291 addr: GuestAddress, 292 mapping: UserspaceMapping, 293 _region: MmapRegion, 294 iommu: bool, 295 seccomp_action: SeccompAction, 296 exit_evt: EventFd, 297 state: Option<PmemState>, 298 ) -> io::Result<Pmem> { 299 let (avail_features, acked_features, config, paused) = if let Some(state) = state { 300 info!("Restoring virtio-pmem {}", id); 301 ( 302 state.avail_features, 303 state.acked_features, 304 state.config, 305 true, 306 ) 307 } else { 308 let config = VirtioPmemConfig { 309 start: addr.raw_value().to_le(), 310 size: (_region.size() as u64).to_le(), 311 }; 312 313 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 314 315 if iommu { 316 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 317 } 318 (avail_features, 0, config, false) 319 }; 320 321 Ok(Pmem { 322 common: VirtioCommon { 323 device_type: VirtioDeviceType::Pmem as u32, 324 queue_sizes: QUEUE_SIZES.to_vec(), 325 paused_sync: Some(Arc::new(Barrier::new(2))), 326 avail_features, 327 acked_features, 328 min_queues: 1, 329 paused: Arc::new(AtomicBool::new(paused)), 330 ..Default::default() 331 }, 332 id, 333 disk: Some(disk), 334 config, 335 mapping, 336 seccomp_action, 337 _region, 338 exit_evt, 339 }) 340 } 341 342 fn state(&self) -> PmemState { 343 PmemState { 344 avail_features: self.common.avail_features, 345 acked_features: self.common.acked_features, 346 config: self.config, 347 } 348 } 349 350 #[cfg(fuzzing)] 351 pub fn wait_for_epoll_threads(&mut self) { 352 self.common.wait_for_epoll_threads(); 353 } 354 } 355 356 impl Drop for Pmem { 357 fn drop(&mut self) { 358 if let Some(kill_evt) = self.common.kill_evt.take() { 359 // Ignore the result because there is nothing we can do about it. 360 let _ = kill_evt.write(1); 361 } 362 self.common.wait_for_epoll_threads(); 363 } 364 } 365 366 impl VirtioDevice for Pmem { 367 fn device_type(&self) -> u32 { 368 self.common.device_type 369 } 370 371 fn queue_max_sizes(&self) -> &[u16] { 372 &self.common.queue_sizes 373 } 374 375 fn features(&self) -> u64 { 376 self.common.avail_features 377 } 378 379 fn ack_features(&mut self, value: u64) { 380 self.common.ack_features(value) 381 } 382 383 fn read_config(&self, offset: u64, data: &mut [u8]) { 384 self.read_config_from_slice(self.config.as_slice(), offset, data); 385 } 386 387 fn activate( 388 &mut self, 389 mem: GuestMemoryAtomic<GuestMemoryMmap>, 390 interrupt_cb: Arc<dyn VirtioInterrupt>, 391 mut queues: Vec<(usize, Queue, EventFd)>, 392 ) -> ActivateResult { 393 self.common.activate(&queues, &interrupt_cb)?; 394 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 395 if let Some(disk) = self.disk.as_ref() { 396 let disk = disk.try_clone().map_err(|e| { 397 error!("failed cloning pmem disk: {}", e); 398 ActivateError::BadActivate 399 })?; 400 401 let (_, queue, queue_evt) = queues.remove(0); 402 403 let mut handler = PmemEpollHandler { 404 mem, 405 queue, 406 disk, 407 interrupt_cb, 408 queue_evt, 409 kill_evt, 410 pause_evt, 411 access_platform: self.common.access_platform.clone(), 412 }; 413 414 let paused = self.common.paused.clone(); 415 let paused_sync = self.common.paused_sync.clone(); 416 let mut epoll_threads = Vec::new(); 417 418 spawn_virtio_thread( 419 &self.id, 420 &self.seccomp_action, 421 Thread::VirtioPmem, 422 &mut epoll_threads, 423 &self.exit_evt, 424 move || handler.run(paused, paused_sync.unwrap()), 425 )?; 426 427 self.common.epoll_threads = Some(epoll_threads); 428 429 event!("virtio-device", "activated", "id", &self.id); 430 return Ok(()); 431 } 432 Err(ActivateError::BadActivate) 433 } 434 435 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 436 let result = self.common.reset(); 437 event!("virtio-device", "reset", "id", &self.id); 438 result 439 } 440 441 fn userspace_mappings(&self) -> Vec<UserspaceMapping> { 442 vec![self.mapping.clone()] 443 } 444 445 fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) { 446 self.common.set_access_platform(access_platform) 447 } 448 } 449 450 impl Pausable for Pmem { 451 fn pause(&mut self) -> result::Result<(), MigratableError> { 452 self.common.pause() 453 } 454 455 fn resume(&mut self) -> result::Result<(), MigratableError> { 456 self.common.resume() 457 } 458 } 459 460 impl Snapshottable for Pmem { 461 fn id(&self) -> String { 462 self.id.clone() 463 } 464 465 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 466 Snapshot::new_from_state(&self.state()) 467 } 468 } 469 470 impl Transportable for Pmem {} 471 impl Migratable for Pmem {} 472