1 // Copyright 2019 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use super::Error as DeviceError; 10 use super::{ 11 ActivateError, ActivateResult, DescriptorChain, EpollHelper, EpollHelperError, 12 EpollHelperHandler, Queue, UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, 13 EPOLL_HELPER_EVENT_LAST, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1, 14 }; 15 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 16 use crate::{GuestMemoryMmap, MmapRegion}; 17 use crate::{VirtioInterrupt, VirtioInterruptType}; 18 use seccomp::{SeccompAction, SeccompFilter}; 19 use std::fmt::{self, Display}; 20 use std::fs::File; 21 use std::io; 22 use std::mem::size_of; 23 use std::os::unix::io::AsRawFd; 24 use std::result; 25 use std::sync::atomic::AtomicBool; 26 use std::sync::{Arc, Barrier}; 27 use std::thread; 28 use versionize::{VersionMap, Versionize, VersionizeResult}; 29 use versionize_derive::Versionize; 30 use vm_memory::{ 31 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, 32 GuestMemoryError, 33 }; 34 use vm_migration::VersionMapped; 35 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 36 use vmm_sys_util::eventfd::EventFd; 37 38 const QUEUE_SIZE: u16 = 256; 39 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; 40 41 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; 42 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0; 43 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1; 44 45 // New descriptors are pending on the virtio queue. 46 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; 47 48 #[derive(Copy, Clone, Debug, Default, Versionize)] 49 #[repr(C)] 50 struct VirtioPmemConfig { 51 start: u64, 52 size: u64, 53 } 54 55 // Safe because it only has data and has no implicit padding. 56 unsafe impl ByteValued for VirtioPmemConfig {} 57 58 #[derive(Copy, Clone, Debug, Default)] 59 #[repr(C)] 60 struct VirtioPmemReq { 61 type_: u32, 62 } 63 64 // Safe because it only has data and has no implicit padding. 65 unsafe impl ByteValued for VirtioPmemReq {} 66 67 #[derive(Copy, Clone, Debug, Default)] 68 #[repr(C)] 69 struct VirtioPmemResp { 70 ret: u32, 71 } 72 73 // Safe because it only has data and has no implicit padding. 74 unsafe impl ByteValued for VirtioPmemResp {} 75 76 #[derive(Debug)] 77 enum Error { 78 /// Guest gave us bad memory addresses. 79 GuestMemory(GuestMemoryError), 80 /// Guest gave us a write only descriptor that protocol says to read from. 81 UnexpectedWriteOnlyDescriptor, 82 /// Guest gave us a read only descriptor that protocol says to write to. 83 UnexpectedReadOnlyDescriptor, 84 /// Guest gave us too few descriptors in a descriptor chain. 85 DescriptorChainTooShort, 86 /// Guest gave us a buffer that was too short to use. 87 BufferLengthTooSmall, 88 /// Guest sent us invalid request. 89 InvalidRequest, 90 } 91 92 impl Display for Error { 93 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 94 use self::Error::*; 95 96 match self { 97 BufferLengthTooSmall => write!(f, "buffer length too small"), 98 DescriptorChainTooShort => write!(f, "descriptor chain too short"), 99 GuestMemory(e) => write!(f, "bad guest memory address: {}", e), 100 InvalidRequest => write!(f, "invalid request"), 101 UnexpectedReadOnlyDescriptor => write!(f, "unexpected read-only descriptor"), 102 UnexpectedWriteOnlyDescriptor => write!(f, "unexpected write-only descriptor"), 103 } 104 } 105 } 106 107 #[derive(Debug, PartialEq)] 108 enum RequestType { 109 Flush, 110 } 111 112 struct Request { 113 type_: RequestType, 114 status_addr: GuestAddress, 115 } 116 117 impl Request { 118 fn parse( 119 avail_desc: &DescriptorChain, 120 mem: &GuestMemoryMmap, 121 ) -> result::Result<Request, Error> { 122 // The head contains the request type which MUST be readable. 123 if avail_desc.is_write_only() { 124 return Err(Error::UnexpectedWriteOnlyDescriptor); 125 } 126 127 if avail_desc.len as usize != size_of::<VirtioPmemReq>() { 128 return Err(Error::InvalidRequest); 129 } 130 131 let request: VirtioPmemReq = mem.read_obj(avail_desc.addr).map_err(Error::GuestMemory)?; 132 133 let request_type = match request.type_ { 134 VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush, 135 _ => return Err(Error::InvalidRequest), 136 }; 137 138 let status_desc = avail_desc 139 .next_descriptor() 140 .ok_or(Error::DescriptorChainTooShort)?; 141 142 // The status MUST always be writable 143 if !status_desc.is_write_only() { 144 return Err(Error::UnexpectedReadOnlyDescriptor); 145 } 146 147 if (status_desc.len as usize) < size_of::<VirtioPmemResp>() { 148 return Err(Error::BufferLengthTooSmall); 149 } 150 151 Ok(Request { 152 type_: request_type, 153 status_addr: status_desc.addr, 154 }) 155 } 156 } 157 158 struct PmemEpollHandler { 159 queue: Queue, 160 mem: GuestMemoryAtomic<GuestMemoryMmap>, 161 disk: File, 162 interrupt_cb: Arc<dyn VirtioInterrupt>, 163 queue_evt: EventFd, 164 kill_evt: EventFd, 165 pause_evt: EventFd, 166 } 167 168 impl PmemEpollHandler { 169 fn process_queue(&mut self) -> bool { 170 let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize]; 171 let mut used_count = 0; 172 let mem = self.mem.memory(); 173 for avail_desc in self.queue.iter(&mem) { 174 let len = match Request::parse(&avail_desc, &mem) { 175 Ok(ref req) if (req.type_ == RequestType::Flush) => { 176 let status_code = match self.disk.sync_all() { 177 Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK, 178 Err(e) => { 179 error!("failed flushing disk image: {}", e); 180 VIRTIO_PMEM_RESP_TYPE_EIO 181 } 182 }; 183 184 let resp = VirtioPmemResp { ret: status_code }; 185 match mem.write_obj(resp, req.status_addr) { 186 Ok(_) => size_of::<VirtioPmemResp>() as u32, 187 Err(e) => { 188 error!("bad guest memory address: {}", e); 189 0 190 } 191 } 192 } 193 Ok(ref req) => { 194 // Currently, there is only one virtio-pmem request, FLUSH. 195 error!("Invalid virtio request type {:?}", req.type_); 196 0 197 } 198 Err(e) => { 199 error!("Failed to parse available descriptor chain: {:?}", e); 200 0 201 } 202 }; 203 204 used_desc_heads[used_count] = (avail_desc.index, len); 205 used_count += 1; 206 } 207 208 for &(desc_index, len) in &used_desc_heads[..used_count] { 209 self.queue.add_used(&mem, desc_index, len); 210 } 211 used_count > 0 212 } 213 214 fn signal_used_queue(&self) -> result::Result<(), DeviceError> { 215 self.interrupt_cb 216 .trigger(&VirtioInterruptType::Queue, Some(&self.queue)) 217 .map_err(|e| { 218 error!("Failed to signal used queue: {:?}", e); 219 DeviceError::FailedSignalingUsedQueue(e) 220 }) 221 } 222 223 fn run( 224 &mut self, 225 paused: Arc<AtomicBool>, 226 paused_sync: Arc<Barrier>, 227 ) -> result::Result<(), EpollHelperError> { 228 let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; 229 helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?; 230 helper.run(paused, paused_sync, self)?; 231 232 Ok(()) 233 } 234 } 235 236 impl EpollHelperHandler for PmemEpollHandler { 237 fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { 238 let ev_type = event.data as u16; 239 match ev_type { 240 QUEUE_AVAIL_EVENT => { 241 if let Err(e) = self.queue_evt.read() { 242 error!("Failed to get queue event: {:?}", e); 243 return true; 244 } else if self.process_queue() { 245 if let Err(e) = self.signal_used_queue() { 246 error!("Failed to signal used queue: {:?}", e); 247 return true; 248 } 249 } 250 } 251 _ => { 252 error!("Unexpected event: {}", ev_type); 253 return true; 254 } 255 } 256 false 257 } 258 } 259 260 pub struct Pmem { 261 common: VirtioCommon, 262 id: String, 263 disk: Option<File>, 264 config: VirtioPmemConfig, 265 mapping: UserspaceMapping, 266 seccomp_action: SeccompAction, 267 268 // Hold ownership of the memory that is allocated for the device 269 // which will be automatically dropped when the device is dropped 270 _region: MmapRegion, 271 } 272 273 #[derive(Versionize)] 274 pub struct PmemState { 275 avail_features: u64, 276 acked_features: u64, 277 config: VirtioPmemConfig, 278 } 279 280 impl VersionMapped for PmemState {} 281 282 impl Pmem { 283 pub fn new( 284 id: String, 285 disk: File, 286 addr: GuestAddress, 287 mapping: UserspaceMapping, 288 _region: MmapRegion, 289 iommu: bool, 290 seccomp_action: SeccompAction, 291 ) -> io::Result<Pmem> { 292 let config = VirtioPmemConfig { 293 start: addr.raw_value().to_le(), 294 size: (_region.size() as u64).to_le(), 295 }; 296 297 let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; 298 299 if iommu { 300 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; 301 } 302 303 Ok(Pmem { 304 common: VirtioCommon { 305 device_type: VirtioDeviceType::Pmem as u32, 306 queue_sizes: QUEUE_SIZES.to_vec(), 307 paused_sync: Some(Arc::new(Barrier::new(2))), 308 avail_features, 309 min_queues: 1, 310 ..Default::default() 311 }, 312 id, 313 disk: Some(disk), 314 config, 315 mapping, 316 seccomp_action, 317 _region, 318 }) 319 } 320 321 fn state(&self) -> PmemState { 322 PmemState { 323 avail_features: self.common.avail_features, 324 acked_features: self.common.acked_features, 325 config: self.config, 326 } 327 } 328 329 fn set_state(&mut self, state: &PmemState) { 330 self.common.avail_features = state.avail_features; 331 self.common.acked_features = state.acked_features; 332 self.config = state.config; 333 } 334 } 335 336 impl Drop for Pmem { 337 fn drop(&mut self) { 338 if let Some(kill_evt) = self.common.kill_evt.take() { 339 // Ignore the result because there is nothing we can do about it. 340 let _ = kill_evt.write(1); 341 } 342 } 343 } 344 345 impl VirtioDevice for Pmem { 346 fn device_type(&self) -> u32 { 347 self.common.device_type 348 } 349 350 fn queue_max_sizes(&self) -> &[u16] { 351 &self.common.queue_sizes 352 } 353 354 fn features(&self) -> u64 { 355 self.common.avail_features 356 } 357 358 fn ack_features(&mut self, value: u64) { 359 self.common.ack_features(value) 360 } 361 362 fn read_config(&self, offset: u64, data: &mut [u8]) { 363 self.read_config_from_slice(self.config.as_slice(), offset, data); 364 } 365 366 fn activate( 367 &mut self, 368 mem: GuestMemoryAtomic<GuestMemoryMmap>, 369 interrupt_cb: Arc<dyn VirtioInterrupt>, 370 mut queues: Vec<Queue>, 371 mut queue_evts: Vec<EventFd>, 372 ) -> ActivateResult { 373 self.common.activate(&queues, &queue_evts, &interrupt_cb)?; 374 let (kill_evt, pause_evt) = self.common.dup_eventfds(); 375 if let Some(disk) = self.disk.as_ref() { 376 let disk = disk.try_clone().map_err(|e| { 377 error!("failed cloning pmem disk: {}", e); 378 ActivateError::BadActivate 379 })?; 380 let mut handler = PmemEpollHandler { 381 queue: queues.remove(0), 382 mem, 383 disk, 384 interrupt_cb, 385 queue_evt: queue_evts.remove(0), 386 kill_evt, 387 pause_evt, 388 }; 389 390 let paused = self.common.paused.clone(); 391 let paused_sync = self.common.paused_sync.clone(); 392 let mut epoll_threads = Vec::new(); 393 // Retrieve seccomp filter for virtio_pmem thread 394 let virtio_pmem_seccomp_filter = 395 get_seccomp_filter(&self.seccomp_action, Thread::VirtioPmem) 396 .map_err(ActivateError::CreateSeccompFilter)?; 397 thread::Builder::new() 398 .name(self.id.clone()) 399 .spawn(move || { 400 if let Err(e) = SeccompFilter::apply(virtio_pmem_seccomp_filter) { 401 error!("Error applying seccomp filter: {:?}", e); 402 } else if let Err(e) = handler.run(paused, paused_sync.unwrap()) { 403 error!("Error running worker: {:?}", e); 404 } 405 }) 406 .map(|thread| epoll_threads.push(thread)) 407 .map_err(|e| { 408 error!("failed to clone virtio-pmem epoll thread: {}", e); 409 ActivateError::BadActivate 410 })?; 411 412 self.common.epoll_threads = Some(epoll_threads); 413 414 event!("virtio-device", "activated", "id", &self.id); 415 return Ok(()); 416 } 417 Err(ActivateError::BadActivate) 418 } 419 420 fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> { 421 let result = self.common.reset(); 422 event!("virtio-device", "reset", "id", &self.id); 423 result 424 } 425 426 fn userspace_mappings(&self) -> Vec<UserspaceMapping> { 427 vec![self.mapping.clone()] 428 } 429 } 430 431 impl Pausable for Pmem { 432 fn pause(&mut self) -> result::Result<(), MigratableError> { 433 self.common.pause() 434 } 435 436 fn resume(&mut self) -> result::Result<(), MigratableError> { 437 self.common.resume() 438 } 439 } 440 441 impl Snapshottable for Pmem { 442 fn id(&self) -> String { 443 self.id.clone() 444 } 445 446 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 447 Snapshot::new_from_versioned_state(&self.id, &self.state()) 448 } 449 450 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 451 self.set_state(&snapshot.to_versioned_state(&self.id)?); 452 Ok(()) 453 } 454 } 455 456 impl Transportable for Pmem {} 457 impl Migratable for Pmem {} 458