1 // Copyright 2019 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use super::Error as DeviceError; 10 use super::{ 11 ActivateError, ActivateResult, DescriptorChain, EpollHelper, EpollHelperError, 12 EpollHelperHandler, Queue, UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, 13 EPOLL_HELPER_EVENT_LAST, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, 14 }; 15 use crate::seccomp_filters::Thread; 16 use crate::thread_helper::spawn_virtio_thread; 17 use crate::{GuestMemoryMmap, MmapRegion}; 18 use crate::{VirtioInterrupt, VirtioInterruptType}; 19 use seccompiler::SeccompAction; 20 use std::fmt::{self, Display}; 21 use std::fs::File; 22 use std::io; 23 use std::mem::size_of; 24 use std::os::unix::io::AsRawFd; 25 use std::result; 26 use std::sync::atomic::AtomicBool; 27 use std::sync::{Arc, Barrier}; 28 use versionize::{VersionMap, Versionize, VersionizeResult}; 29 use versionize_derive::Versionize; 30 use vm_memory::{ 31 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 32 GuestMemoryError, 33 }; 34 use vm_migration::VersionMapped; 35 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 36 use vmm_sys_util::eventfd::EventFd; 37 38 const QUEUE_SIZE: u16 = 256; 39 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 40 41 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; 42 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0; 43 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1; 44 45 // New descriptors are pending on the virtio queue. 46 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 47 48 #[derive(Copy, Clone, Debug, Default, Versionize)] 49 #[repr(C)] 50 struct VirtioPmemConfig { 51 start: u64, 52 size: u64, 53 } 54 55 // Safe because it only has data and has no implicit padding. 56 unsafe impl ByteValued for VirtioPmemConfig {} 57 58 #[derive(Copy, Clone, Debug, Default)] 59 #[repr(C)] 60 struct VirtioPmemReq { 61 type_: u32, 62 } 63 64 // Safe because it only has data and has no implicit padding. 65 unsafe impl ByteValued for VirtioPmemReq {} 66 67 #[derive(Copy, Clone, Debug, Default)] 68 #[repr(C)] 69 struct VirtioPmemResp { 70 ret: u32, 71 } 72 73 // Safe because it only has data and has no implicit padding. 74 unsafe impl ByteValued for VirtioPmemResp {} 75 76 #[derive(Debug)] 77 enum Error { 78 /// Guest gave us bad memory addresses. 79 GuestMemory(GuestMemoryError), 80 /// Guest gave us a write only descriptor that protocol says to read from. 81 UnexpectedWriteOnlyDescriptor, 82 /// Guest gave us a read only descriptor that protocol says to write to. 83 UnexpectedReadOnlyDescriptor, 84 /// Guest gave us too few descriptors in a descriptor chain. 85 DescriptorChainTooShort, 86 /// Guest gave us a buffer that was too short to use. 87 BufferLengthTooSmall, 88 /// Guest sent us invalid request. 89 InvalidRequest, 90 } 91 92 impl Display for Error { 93 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 94 use self::Error::*; 95 96 match self { 97 BufferLengthTooSmall => write!(f, "buffer length too small"), 98 DescriptorChainTooShort => write!(f, "descriptor chain too short"), 99 GuestMemory(e) => write!(f, "bad guest memory address: {}", e), 100 InvalidRequest => write!(f, "invalid request"), 101 UnexpectedReadOnlyDescriptor => write!(f, "unexpected read-only descriptor"), 102 UnexpectedWriteOnlyDescriptor => write!(f, "unexpected write-only descriptor"), 103 } 104 } 105 } 106 107 #[derive(Debug, PartialEq)] 108 enum RequestType { 109 Flush, 110 } 111 112 struct Request { 113 type_: RequestType, 114 status_addr: GuestAddress, 115 } 116 117 impl Request { 118 fn parse( 119 avail_desc: &DescriptorChain, 120 mem: &GuestMemoryMmap, 121 ) -> result::Result<Request, Error> { 122 // The head contains the request type which MUST be readable. 123 if avail_desc.is_write_only() { 124 return Err(Error::UnexpectedWriteOnlyDescriptor); 125 } 126 127 if avail_desc.len as usize != size_of::<VirtioPmemReq>() { 128 return Err(Error::InvalidRequest); 129 } 130 131 let request: VirtioPmemReq = mem.read_obj(avail_desc.addr).map_err(Error::GuestMemory)?; 132 133 let request_type = match request.type_ { 134 VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush, 135 _ => return Err(Error::InvalidRequest), 136 }; 137 138 let status_desc = avail_desc 139 .next_descriptor() 140 .ok_or(Error::DescriptorChainTooShort)?; 141 142 // The status MUST always be writable 143 if !status_desc.is_write_only() { 144 return Err(Error::UnexpectedReadOnlyDescriptor); 145 } 146 147 if (status_desc.len as usize) < size_of::<VirtioPmemResp>() { 148 return Err(Error::BufferLengthTooSmall); 149 } 150 151 Ok(Request { 152 type_: request_type, 153 status_addr: status_desc.addr, 154 }) 155 } 156 } 157 158 struct PmemEpollHandler { 159 queue: Queue, 160 mem: GuestMemoryAtomic<GuestMemoryMmap>, 161 disk: File, 162 interrupt_cb: Arc<dyn VirtioInterrupt>, 163 queue_evt: EventFd, 164 kill_evt: EventFd, 165 pause_evt: EventFd, 166 } 167 168 impl PmemEpollHandler { 169 fn process_queue(&mut self) -> bool { 170 let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize]; 171 let mut used_count = 0; 172 let mem = self.mem.memory(); 173 for avail_desc in self.queue.iter(&mem) { 174 let len = match Request::parse(&avail_desc, &mem) { 175 Ok(ref req) if (req.type_ == RequestType::Flush) => { 176 let status_code = match self.disk.sync_all() { 177 Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, 178 Err(e) => { 179 error!("failed flushing disk image: {}", e); 180 VIRTIO_PMEM_RESP_TYPE_EIO 181 } 182 }; 183 184 let resp = VirtioPmemResp { ret: status_code }; 185 match mem.write_obj(resp, req.status_addr) { 186 Ok(_) => size_of::<VirtioPmemResp>() as u32, 187 Err(e) => { 188 error!("bad guest memory address: {}", e); 189 0 190 } 191 } 192 } 193 Ok(ref req) => { 194 // Currently, there is only one virtio-pmem request, FLUSH. 195 error!("Invalid virtio request type {:?}", req.type_); 196 0 197 } 198 Err(e) => { 199 error!("Failed to parse available descriptor chain: {:?}", e); 200 0 201 } 202 }; 203 204 used_desc_heads[used_count] = (avail_desc.index, len); 205 used_count += 1; 206 } 207 208 for &(desc_index, len) in &used_desc_heads[..used_count] { 209 self.queue.add_used(&mem, desc_index, len); 210 } 211 used_count > 0 212 } 213 214 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 215 self.interrupt_cb 216 .trigger(&VirtioInterruptType::Queue, Some(&self.queue)) 217 .map_err(|e| { 218 error!("Failed to signal used queue: {:?}", e); 219 DeviceError::FailedSignalingUsedQueue(e) 220 }) 221 } 222 223 fn run( 224 &mut self, 225 paused: Arc<AtomicBool>, 226 paused_sync: Arc<Barrier>, 227 ) -> result::Result<(), EpollHelperError> { 228 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 229 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 230 helper.run(paused, paused_sync, self)?; 231 232 Ok(()) 233 } 234 } 235 236 impl EpollHelperHandler for PmemEpollHandler { 237 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 238 let ev_type = event.data as u16; 239 match ev_type { 240 QUEUE_AVAIL_EVENT => { 241 if let Err(e) = self.queue_evt.read() { 242 error!("Failed to get queue event: {:?}", e); 243 return true; 244 } else if self.process_queue() { 245 if let Err(e) = self.signal_used_queue() { 246 error!("Failed to signal used queue: {:?}", e); 247 return true; 248 } 249 } 250 } 251 _ => { 252 error!("Unexpected event: {}", ev_type); 253 return true; 254 } 255 } 256 false 257 } 258 } 259 260 pub struct Pmem { 261 common: VirtioCommon, 262 id: String, 263 disk: Option<File>, 264 config: VirtioPmemConfig, 265 mapping: UserspaceMapping, 266 seccomp_action: SeccompAction, 267 exit_evt: EventFd, 268 269 // Hold ownership of the memory that is allocated for the device 270 // which will be automatically dropped when the device is dropped 271 _region: MmapRegion, 272 } 273 274 #[derive(Versionize)] 275 pub struct PmemState { 276 avail_features: u64, 277 acked_features: u64, 278 config: VirtioPmemConfig, 279 } 280 281 impl VersionMapped for PmemState {} 282 283 impl Pmem { 284 #[allow(clippy::too_many_arguments)] 285 pub fn new( 286 id: String, 287 disk: File, 288 addr: GuestAddress, 289 mapping: UserspaceMapping, 290 _region: MmapRegion, 291 iommu: bool, 292 seccomp_action: SeccompAction, 293 exit_evt: EventFd, 294 ) -> io::Result<Pmem> { 295 let config = VirtioPmemConfig { 296 start: addr.raw_value().to_le(), 297 size: (_region.size() as u64).to_le(), 298 }; 299 300 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 301 302 if iommu { 303 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 304 } 305 306 Ok(Pmem { 307 common: VirtioCommon { 308 device_type: VirtioDeviceType::Pmem as u32, 309 queue_sizes: QUEUE_SIZES.to_vec(), 310 paused_sync: Some(Arc::new(Barrier::new(2))), 311 avail_features, 312 min_queues: 1, 313 ..Default::default() 314 }, 315 id, 316 disk: Some(disk), 317 config, 318 mapping, 319 seccomp_action, 320 _region, 321 exit_evt, 322 }) 323 } 324 325 fn state(&self) -> PmemState { 326 PmemState { 327 avail_features: self.common.avail_features, 328 acked_features: self.common.acked_features, 329 config: self.config, 330 } 331 } 332 333 fn set_state(&mut self, state: &PmemState) { 334 self.common.avail_features = state.avail_features; 335 self.common.acked_features = state.acked_features; 336 self.config = state.config; 337 } 338 } 339 340 impl Drop for Pmem { 341 fn drop(&mut self) { 342 if let Some(kill_evt) = self.common.kill_evt.take() { 343 // Ignore the result because there is nothing we can do about it. 344 let _ = kill_evt.write(1); 345 } 346 } 347 } 348 349 impl VirtioDevice for Pmem { 350 fn device_type(&self) -> u32 { 351 self.common.device_type 352 } 353 354 fn queue_max_sizes(&self) -> &[u16] { 355 &self.common.queue_sizes 356 } 357 358 fn features(&self) -> u64 { 359 self.common.avail_features 360 } 361 362 fn ack_features(&mut self, value: u64) { 363 self.common.ack_features(value) 364 } 365 366 fn read_config(&self, offset: u64, data: &mut [u8]) { 367 self.read_config_from_slice(self.config.as_slice(), offset, data); 368 } 369 370 fn activate( 371 &mut self, 372 mem: GuestMemoryAtomic<GuestMemoryMmap>, 373 interrupt_cb: Arc<dyn VirtioInterrupt>, 374 mut queues: Vec<Queue>, 375 mut queue_evts: Vec<EventFd>, 376 ) -> ActivateResult { 377 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 378 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 379 if let Some(disk) = self.disk.as_ref() { 380 let disk = disk.try_clone().map_err(|e| { 381 error!("failed cloning pmem disk: {}", e); 382 ActivateError::BadActivate 383 })?; 384 let mut handler = PmemEpollHandler { 385 queue: queues.remove(0), 386 mem, 387 disk, 388 interrupt_cb, 389 queue_evt: queue_evts.remove(0), 390 kill_evt, 391 pause_evt, 392 }; 393 394 let paused = self.common.paused.clone(); 395 let paused_sync = self.common.paused_sync.clone(); 396 let mut epoll_threads = Vec::new(); 397 398 spawn_virtio_thread( 399 &self.id, 400 &self.seccomp_action, 401 Thread::VirtioPmem, 402 &mut epoll_threads, 403 &self.exit_evt, 404 move || { 405 if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 406 error!("Error running worker: {:?}", e); 407 } 408 }, 409 )?; 410 411 self.common.epoll_threads = Some(epoll_threads); 412 413 event!("virtio-device", "activated", "id", &self.id); 414 return Ok(()); 415 } 416 Err(ActivateError::BadActivate) 417 } 418 419 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 420 let result = self.common.reset(); 421 event!("virtio-device", "reset", "id", &self.id); 422 result 423 } 424 425 fn userspace_mappings(&self) -> Vec<UserspaceMapping> { 426 vec![self.mapping.clone()] 427 } 428 } 429 430 impl Pausable for Pmem { 431 fn pause(&mut self) -> result::Result<(), MigratableError> { 432 self.common.pause() 433 } 434 435 fn resume(&mut self) -> result::Result<(), MigratableError> { 436 self.common.resume() 437 } 438 } 439 440 impl Snapshottable for Pmem { 441 fn id(&self) -> String { 442 self.id.clone() 443 } 444 445 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 446 Snapshot::new_from_versioned_state(&self.id, &self.state()) 447 } 448 449 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 450 self.set_state(&snapshot.to_versioned_state(&self.id)?); 451 Ok(()) 452 } 453 } 454 455 impl Transportable for Pmem {} 456 impl Migratable for Pmem {} 457