1 // Copyright 2019 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use super::Error as DeviceError; 10 use super::{ 11 ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, 12 UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, 13 VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, 14 }; 15 use crate::seccomp_filters::Thread; 16 use crate::thread_helper::spawn_virtio_thread; 17 use crate::{GuestMemoryMmap, MmapRegion}; 18 use crate::{VirtioInterrupt, VirtioInterruptType}; 19 use seccompiler::SeccompAction; 20 use std::fmt::{self, Display}; 21 use std::fs::File; 22 use std::io; 23 use std::mem::size_of; 24 use std::os::unix::io::AsRawFd; 25 use std::result; 26 use std::sync::atomic::AtomicBool; 27 use std::sync::{Arc, Barrier}; 28 use versionize::{VersionMap, Versionize, VersionizeResult}; 29 use versionize_derive::Versionize; 30 use virtio_queue::{DescriptorChain, Queue}; 31 use vm_memory::{ 32 Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError, 33 GuestMemoryLoadGuard, 34 }; 35 use vm_migration::VersionMapped; 36 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 37 use vm_virtio::{AccessPlatform, Translatable}; 38 use vmm_sys_util::eventfd::EventFd; 39 40 const QUEUE_SIZE: u16 = 256; 41 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 42 43 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; 44 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0; 45 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1; 46 47 // New descriptors are pending on the virtio queue. 48 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 49 50 #[derive(Copy, Clone, Debug, Default, Versionize)] 51 #[repr(C)] 52 struct VirtioPmemConfig { 53 start: u64, 54 size: u64, 55 } 56 57 // SAFETY: it only has data and has no implicit padding. 58 unsafe impl ByteValued for VirtioPmemConfig {} 59 60 #[derive(Copy, Clone, Debug, Default)] 61 #[repr(C)] 62 struct VirtioPmemReq { 63 type_: u32, 64 } 65 66 // SAFETY: it only has data and has no implicit padding. 67 unsafe impl ByteValued for VirtioPmemReq {} 68 69 #[derive(Copy, Clone, Debug, Default)] 70 #[repr(C)] 71 struct VirtioPmemResp { 72 ret: u32, 73 } 74 75 // SAFETY: it only has data and has no implicit padding. 76 unsafe impl ByteValued for VirtioPmemResp {} 77 78 #[derive(Debug)] 79 enum Error { 80 /// Guest gave us bad memory addresses. 81 GuestMemory(GuestMemoryError), 82 /// Guest gave us a write only descriptor that protocol says to read from. 83 UnexpectedWriteOnlyDescriptor, 84 /// Guest gave us a read only descriptor that protocol says to write to. 85 UnexpectedReadOnlyDescriptor, 86 /// Guest gave us too few descriptors in a descriptor chain. 87 DescriptorChainTooShort, 88 /// Guest gave us a buffer that was too short to use. 89 BufferLengthTooSmall, 90 /// Guest sent us invalid request. 91 InvalidRequest, 92 } 93 94 impl Display for Error { 95 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 96 use self::Error::*; 97 98 match self { 99 BufferLengthTooSmall => write!(f, "buffer length too small"), 100 DescriptorChainTooShort => write!(f, "descriptor chain too short"), 101 GuestMemory(e) => write!(f, "bad guest memory address: {}", e), 102 InvalidRequest => write!(f, "invalid request"), 103 UnexpectedReadOnlyDescriptor => write!(f, "unexpected read-only descriptor"), 104 UnexpectedWriteOnlyDescriptor => write!(f, "unexpected write-only descriptor"), 105 } 106 } 107 } 108 109 #[derive(Debug, PartialEq)] 110 enum RequestType { 111 Flush, 112 } 113 114 struct Request { 115 type_: RequestType, 116 status_addr: GuestAddress, 117 } 118 119 impl Request { 120 fn parse( 121 desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>, 122 access_platform: Option<&Arc<dyn AccessPlatform>>, 123 ) -> result::Result<Request, Error> { 124 let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 125 // The descriptor contains the request type which MUST be readable. 126 if desc.is_write_only() { 127 return Err(Error::UnexpectedWriteOnlyDescriptor); 128 } 129 130 if desc.len() as usize != size_of::<VirtioPmemReq>() { 131 return Err(Error::InvalidRequest); 132 } 133 134 let request: VirtioPmemReq = desc_chain 135 .memory() 136 .read_obj( 137 desc.addr() 138 .translate_gva(access_platform, desc.len() as usize), 139 ) 140 .map_err(Error::GuestMemory)?; 141 142 let request_type = match request.type_ { 143 VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush, 144 _ => return Err(Error::InvalidRequest), 145 }; 146 147 let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; 148 149 // The status MUST always be writable 150 if !status_desc.is_write_only() { 151 return Err(Error::UnexpectedReadOnlyDescriptor); 152 } 153 154 if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() { 155 return Err(Error::BufferLengthTooSmall); 156 } 157 158 Ok(Request { 159 type_: request_type, 160 status_addr: status_desc 161 .addr() 162 .translate_gva(access_platform, status_desc.len() as usize), 163 }) 164 } 165 } 166 167 struct PmemEpollHandler { 168 queue: Queue<GuestMemoryAtomic<GuestMemoryMmap>>, 169 disk: File, 170 interrupt_cb: Arc<dyn VirtioInterrupt>, 171 queue_evt: EventFd, 172 kill_evt: EventFd, 173 pause_evt: EventFd, 174 access_platform: Option<Arc<dyn AccessPlatform>>, 175 } 176 177 impl PmemEpollHandler { 178 fn process_queue(&mut self) -> bool { 179 let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize]; 180 let mut used_count = 0; 181 for mut desc_chain in self.queue.iter().unwrap() { 182 let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) { 183 Ok(ref req) if (req.type_ == RequestType::Flush) => { 184 let status_code = match self.disk.sync_all() { 185 Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, 186 Err(e) => { 187 error!("failed flushing disk image: {}", e); 188 VIRTIO_PMEM_RESP_TYPE_EIO 189 } 190 }; 191 192 let resp = VirtioPmemResp { ret: status_code }; 193 match desc_chain.memory().write_obj(resp, req.status_addr) { 194 Ok(_) => size_of::<VirtioPmemResp>() as u32, 195 Err(e) => { 196 error!("bad guest memory address: {}", e); 197 0 198 } 199 } 200 } 201 Ok(ref req) => { 202 // Currently, there is only one virtio-pmem request, FLUSH. 203 error!("Invalid virtio request type {:?}", req.type_); 204 0 205 } 206 Err(e) => { 207 error!("Failed to parse available descriptor chain: {:?}", e); 208 0 209 } 210 }; 211 212 used_desc_heads[used_count] = (desc_chain.head_index(), len); 213 used_count += 1; 214 } 215 216 for &(desc_index, len) in &used_desc_heads[..used_count] { 217 self.queue.add_used(desc_index, len).unwrap(); 218 } 219 used_count > 0 220 } 221 222 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 223 self.interrupt_cb 224 .trigger(VirtioInterruptType::Queue(0)) 225 .map_err(|e| { 226 error!("Failed to signal used queue: {:?}", e); 227 DeviceError::FailedSignalingUsedQueue(e) 228 }) 229 } 230 231 fn run( 232 &mut self, 233 paused: Arc<AtomicBool>, 234 paused_sync: Arc<Barrier>, 235 ) -> result::Result<(), EpollHelperError> { 236 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 237 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 238 helper.run(paused, paused_sync, self)?; 239 240 Ok(()) 241 } 242 } 243 244 impl EpollHelperHandler for PmemEpollHandler { 245 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 246 let ev_type = event.data as u16; 247 match ev_type { 248 QUEUE_AVAIL_EVENT => { 249 if let Err(e) = self.queue_evt.read() { 250 error!("Failed to get queue event: {:?}", e); 251 return true; 252 } else if self.process_queue() { 253 if let Err(e) = self.signal_used_queue() { 254 error!("Failed to signal used queue: {:?}", e); 255 return true; 256 } 257 } 258 } 259 _ => { 260 error!("Unexpected event: {}", ev_type); 261 return true; 262 } 263 } 264 false 265 } 266 } 267 268 pub struct Pmem { 269 common: VirtioCommon, 270 id: String, 271 disk: Option<File>, 272 config: VirtioPmemConfig, 273 mapping: UserspaceMapping, 274 seccomp_action: SeccompAction, 275 exit_evt: EventFd, 276 277 // Hold ownership of the memory that is allocated for the device 278 // which will be automatically dropped when the device is dropped 279 _region: MmapRegion, 280 } 281 282 #[derive(Versionize)] 283 pub struct PmemState { 284 avail_features: u64, 285 acked_features: u64, 286 config: VirtioPmemConfig, 287 } 288 289 impl VersionMapped for PmemState {} 290 291 impl Pmem { 292 #[allow(clippy::too_many_arguments)] 293 pub fn new( 294 id: String, 295 disk: File, 296 addr: GuestAddress, 297 mapping: UserspaceMapping, 298 _region: MmapRegion, 299 iommu: bool, 300 seccomp_action: SeccompAction, 301 exit_evt: EventFd, 302 ) -> io::Result<Pmem> { 303 let config = VirtioPmemConfig { 304 start: addr.raw_value().to_le(), 305 size: (_region.size() as u64).to_le(), 306 }; 307 308 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 309 310 if iommu { 311 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 312 } 313 314 Ok(Pmem { 315 common: VirtioCommon { 316 device_type: VirtioDeviceType::Pmem as u32, 317 queue_sizes: QUEUE_SIZES.to_vec(), 318 paused_sync: Some(Arc::new(Barrier::new(2))), 319 avail_features, 320 min_queues: 1, 321 ..Default::default() 322 }, 323 id, 324 disk: Some(disk), 325 config, 326 mapping, 327 seccomp_action, 328 _region, 329 exit_evt, 330 }) 331 } 332 333 fn state(&self) -> PmemState { 334 PmemState { 335 avail_features: self.common.avail_features, 336 acked_features: self.common.acked_features, 337 config: self.config, 338 } 339 } 340 341 fn set_state(&mut self, state: &PmemState) { 342 self.common.avail_features = state.avail_features; 343 self.common.acked_features = state.acked_features; 344 self.config = state.config; 345 } 346 } 347 348 impl Drop for Pmem { 349 fn drop(&mut self) { 350 if let Some(kill_evt) = self.common.kill_evt.take() { 351 // Ignore the result because there is nothing we can do about it. 352 let _ = kill_evt.write(1); 353 } 354 } 355 } 356 357 impl VirtioDevice for Pmem { 358 fn device_type(&self) -> u32 { 359 self.common.device_type 360 } 361 362 fn queue_max_sizes(&self) -> &[u16] { 363 &self.common.queue_sizes 364 } 365 366 fn features(&self) -> u64 { 367 self.common.avail_features 368 } 369 370 fn ack_features(&mut self, value: u64) { 371 self.common.ack_features(value) 372 } 373 374 fn read_config(&self, offset: u64, data: &mut [u8]) { 375 self.read_config_from_slice(self.config.as_slice(), offset, data); 376 } 377 378 fn activate( 379 &mut self, 380 _mem: GuestMemoryAtomic<GuestMemoryMmap>, 381 interrupt_cb: Arc<dyn VirtioInterrupt>, 382 mut queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>, 383 mut queue_evts: Vec<EventFd>, 384 ) -> ActivateResult { 385 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 386 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 387 if let Some(disk) = self.disk.as_ref() { 388 let disk = disk.try_clone().map_err(|e| { 389 error!("failed cloning pmem disk: {}", e); 390 ActivateError::BadActivate 391 })?; 392 let mut handler = PmemEpollHandler { 393 queue: queues.remove(0), 394 disk, 395 interrupt_cb, 396 queue_evt: queue_evts.remove(0), 397 kill_evt, 398 pause_evt, 399 access_platform: self.common.access_platform.clone(), 400 }; 401 402 let paused = self.common.paused.clone(); 403 let paused_sync = self.common.paused_sync.clone(); 404 let mut epoll_threads = Vec::new(); 405 406 spawn_virtio_thread( 407 &self.id, 408 &self.seccomp_action, 409 Thread::VirtioPmem, 410 &mut epoll_threads, 411 &self.exit_evt, 412 move || { 413 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 414 error!("Error running worker: {:?}", e); 415 } 416 }, 417 )?; 418 419 self.common.epoll_threads = Some(epoll_threads); 420 421 event!("virtio-device", "activated", "id", &self.id); 422 return Ok(()); 423 } 424 Err(ActivateError::BadActivate) 425 } 426 427 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 428 let result = self.common.reset(); 429 event!("virtio-device", "reset", "id", &self.id); 430 result 431 } 432 433 fn userspace_mappings(&self) -> Vec<UserspaceMapping> { 434 vec![self.mapping.clone()] 435 } 436 437 fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) { 438 self.common.set_access_platform(access_platform) 439 } 440 } 441 442 impl Pausable for Pmem { 443 fn pause(&mut self) -> result::Result<(), MigratableError> { 444 self.common.pause() 445 } 446 447 fn resume(&mut self) -> result::Result<(), MigratableError> { 448 self.common.resume() 449 } 450 } 451 452 impl Snapshottable for Pmem { 453 fn id(&self) -> String { 454 self.id.clone() 455 } 456 457 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 458 Snapshot::new_from_versioned_state(&self.id, &self.state()) 459 } 460 461 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 462 self.set_state(&snapshot.to_versioned_state(&self.id)?); 463 Ok(()) 464 } 465 } 466 467 impl Transportable for Pmem {} 468 impl Migratable for Pmem {} 469