xref: /cloud-hypervisor/virtio-devices/src/pmem.rs (revision b440cb7d2330770cd415b63544a371d4caa2db3a)
1 // Copyright 2019 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Copyright © 2019 Intel Corporation
6 //
7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
8 
9 use super::Error as DeviceError;
10 use super::{
11     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
12     UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST,
13     VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1,
14 };
15 use crate::seccomp_filters::Thread;
16 use crate::thread_helper::spawn_virtio_thread;
17 use crate::{GuestMemoryMmap, MmapRegion};
18 use crate::{VirtioInterrupt, VirtioInterruptType};
19 use seccompiler::SeccompAction;
20 use std::fmt::{self, Display};
21 use std::fs::File;
22 use std::io;
23 use std::mem::size_of;
24 use std::os::unix::io::AsRawFd;
25 use std::result;
26 use std::sync::atomic::AtomicBool;
27 use std::sync::{Arc, Barrier};
28 use versionize::{VersionMap, Versionize, VersionizeResult};
29 use versionize_derive::Versionize;
30 use virtio_queue::{DescriptorChain, Queue};
31 use vm_memory::{
32     Address, ByteValued, Bytes, GuestAddress, GuestMemoryAtomic, GuestMemoryError,
33     GuestMemoryLoadGuard,
34 };
35 use vm_migration::VersionMapped;
36 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
37 use vm_virtio::{AccessPlatform, Translatable};
38 use vmm_sys_util::eventfd::EventFd;
39 
40 const QUEUE_SIZE: u16 = 256;
41 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
42 
43 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
44 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
45 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
46 
47 // New descriptors are pending on the virtio queue.
48 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
49 
50 #[derive(Copy, Clone, Debug, Default, Versionize)]
51 #[repr(C)]
52 struct VirtioPmemConfig {
53     start: u64,
54     size: u64,
55 }
56 
57 // SAFETY: it only has data and has no implicit padding.
58 unsafe impl ByteValued for VirtioPmemConfig {}
59 
60 #[derive(Copy, Clone, Debug, Default)]
61 #[repr(C)]
62 struct VirtioPmemReq {
63     type_: u32,
64 }
65 
66 // SAFETY: it only has data and has no implicit padding.
67 unsafe impl ByteValued for VirtioPmemReq {}
68 
69 #[derive(Copy, Clone, Debug, Default)]
70 #[repr(C)]
71 struct VirtioPmemResp {
72     ret: u32,
73 }
74 
75 // SAFETY: it only has data and has no implicit padding.
76 unsafe impl ByteValued for VirtioPmemResp {}
77 
78 #[derive(Debug)]
79 enum Error {
80     /// Guest gave us bad memory addresses.
81     GuestMemory(GuestMemoryError),
82     /// Guest gave us a write only descriptor that protocol says to read from.
83     UnexpectedWriteOnlyDescriptor,
84     /// Guest gave us a read only descriptor that protocol says to write to.
85     UnexpectedReadOnlyDescriptor,
86     /// Guest gave us too few descriptors in a descriptor chain.
87     DescriptorChainTooShort,
88     /// Guest gave us a buffer that was too short to use.
89     BufferLengthTooSmall,
90     /// Guest sent us invalid request.
91     InvalidRequest,
92 }
93 
94 impl Display for Error {
95     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
96         use self::Error::*;
97 
98         match self {
99             BufferLengthTooSmall => write!(f, "buffer length too small"),
100             DescriptorChainTooShort => write!(f, "descriptor chain too short"),
101             GuestMemory(e) => write!(f, "bad guest memory address: {}", e),
102             InvalidRequest => write!(f, "invalid request"),
103             UnexpectedReadOnlyDescriptor => write!(f, "unexpected read-only descriptor"),
104             UnexpectedWriteOnlyDescriptor => write!(f, "unexpected write-only descriptor"),
105         }
106     }
107 }
108 
109 #[derive(Debug, PartialEq, Eq)]
110 enum RequestType {
111     Flush,
112 }
113 
114 struct Request {
115     type_: RequestType,
116     status_addr: GuestAddress,
117 }
118 
119 impl Request {
120     fn parse(
121         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
122         access_platform: Option<&Arc<dyn AccessPlatform>>,
123     ) -> result::Result<Request, Error> {
124         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
125         // The descriptor contains the request type which MUST be readable.
126         if desc.is_write_only() {
127             return Err(Error::UnexpectedWriteOnlyDescriptor);
128         }
129 
130         if desc.len() as usize != size_of::<VirtioPmemReq>() {
131             return Err(Error::InvalidRequest);
132         }
133 
134         let request: VirtioPmemReq = desc_chain
135             .memory()
136             .read_obj(
137                 desc.addr()
138                     .translate_gva(access_platform, desc.len() as usize),
139             )
140             .map_err(Error::GuestMemory)?;
141 
142         let request_type = match request.type_ {
143             VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush,
144             _ => return Err(Error::InvalidRequest),
145         };
146 
147         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
148 
149         // The status MUST always be writable
150         if !status_desc.is_write_only() {
151             return Err(Error::UnexpectedReadOnlyDescriptor);
152         }
153 
154         if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() {
155             return Err(Error::BufferLengthTooSmall);
156         }
157 
158         Ok(Request {
159             type_: request_type,
160             status_addr: status_desc
161                 .addr()
162                 .translate_gva(access_platform, status_desc.len() as usize),
163         })
164     }
165 }
166 
167 struct PmemEpollHandler {
168     queue: Queue<GuestMemoryAtomic<GuestMemoryMmap>>,
169     disk: File,
170     interrupt_cb: Arc<dyn VirtioInterrupt>,
171     queue_evt: EventFd,
172     kill_evt: EventFd,
173     pause_evt: EventFd,
174     access_platform: Option<Arc<dyn AccessPlatform>>,
175 }
176 
177 impl PmemEpollHandler {
178     fn process_queue(&mut self) -> bool {
179         let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize];
180         let mut used_count = 0;
181         for mut desc_chain in self.queue.iter().unwrap() {
182             let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) {
183                 Ok(ref req) if (req.type_ == RequestType::Flush) => {
184                     let status_code = match self.disk.sync_all() {
185                         Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK,
186                         Err(e) => {
187                             error!("failed flushing disk image: {}", e);
188                             VIRTIO_PMEM_RESP_TYPE_EIO
189                         }
190                     };
191 
192                     let resp = VirtioPmemResp { ret: status_code };
193                     match desc_chain.memory().write_obj(resp, req.status_addr) {
194                         Ok(_) => size_of::<VirtioPmemResp>() as u32,
195                         Err(e) => {
196                             error!("bad guest memory address: {}", e);
197                             0
198                         }
199                     }
200                 }
201                 Ok(ref req) => {
202                     // Currently, there is only one virtio-pmem request, FLUSH.
203                     error!("Invalid virtio request type {:?}", req.type_);
204                     0
205                 }
206                 Err(e) => {
207                     error!("Failed to parse available descriptor chain: {:?}", e);
208                     0
209                 }
210             };
211 
212             used_desc_heads[used_count] = (desc_chain.head_index(), len);
213             used_count += 1;
214         }
215 
216         for &(desc_index, len) in &used_desc_heads[..used_count] {
217             self.queue.add_used(desc_index, len).unwrap();
218         }
219         used_count > 0
220     }
221 
222     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
223         self.interrupt_cb
224             .trigger(VirtioInterruptType::Queue(0))
225             .map_err(|e| {
226                 error!("Failed to signal used queue: {:?}", e);
227                 DeviceError::FailedSignalingUsedQueue(e)
228             })
229     }
230 
231     fn run(
232         &mut self,
233         paused: Arc<AtomicBool>,
234         paused_sync: Arc<Barrier>,
235     ) -> result::Result<(), EpollHelperError> {
236         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
237         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
238         helper.run(paused, paused_sync, self)?;
239 
240         Ok(())
241     }
242 }
243 
244 impl EpollHelperHandler for PmemEpollHandler {
245     fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
246         let ev_type = event.data as u16;
247         match ev_type {
248             QUEUE_AVAIL_EVENT => {
249                 if let Err(e) = self.queue_evt.read() {
250                     error!("Failed to get queue event: {:?}", e);
251                     return true;
252                 } else if self.process_queue() {
253                     if let Err(e) = self.signal_used_queue() {
254                         error!("Failed to signal used queue: {:?}", e);
255                         return true;
256                     }
257                 }
258             }
259             _ => {
260                 error!("Unexpected event: {}", ev_type);
261                 return true;
262             }
263         }
264         false
265     }
266 }
267 
268 pub struct Pmem {
269     common: VirtioCommon,
270     id: String,
271     disk: Option<File>,
272     config: VirtioPmemConfig,
273     mapping: UserspaceMapping,
274     seccomp_action: SeccompAction,
275     exit_evt: EventFd,
276 
277     // Hold ownership of the memory that is allocated for the device
278     // which will be automatically dropped when the device is dropped
279     _region: MmapRegion,
280 }
281 
282 #[derive(Versionize)]
283 pub struct PmemState {
284     avail_features: u64,
285     acked_features: u64,
286     config: VirtioPmemConfig,
287 }
288 
289 impl VersionMapped for PmemState {}
290 
291 impl Pmem {
292     #[allow(clippy::too_many_arguments)]
293     pub fn new(
294         id: String,
295         disk: File,
296         addr: GuestAddress,
297         mapping: UserspaceMapping,
298         _region: MmapRegion,
299         iommu: bool,
300         seccomp_action: SeccompAction,
301         exit_evt: EventFd,
302     ) -> io::Result<Pmem> {
303         let config = VirtioPmemConfig {
304             start: addr.raw_value().to_le(),
305             size: (_region.size() as u64).to_le(),
306         };
307 
308         let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
309 
310         if iommu {
311             avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
312         }
313 
314         Ok(Pmem {
315             common: VirtioCommon {
316                 device_type: VirtioDeviceType::Pmem as u32,
317                 queue_sizes: QUEUE_SIZES.to_vec(),
318                 paused_sync: Some(Arc::new(Barrier::new(2))),
319                 avail_features,
320                 min_queues: 1,
321                 ..Default::default()
322             },
323             id,
324             disk: Some(disk),
325             config,
326             mapping,
327             seccomp_action,
328             _region,
329             exit_evt,
330         })
331     }
332 
333     fn state(&self) -> PmemState {
334         PmemState {
335             avail_features: self.common.avail_features,
336             acked_features: self.common.acked_features,
337             config: self.config,
338         }
339     }
340 
341     fn set_state(&mut self, state: &PmemState) {
342         self.common.avail_features = state.avail_features;
343         self.common.acked_features = state.acked_features;
344         self.config = state.config;
345     }
346 }
347 
348 impl Drop for Pmem {
349     fn drop(&mut self) {
350         if let Some(kill_evt) = self.common.kill_evt.take() {
351             // Ignore the result because there is nothing we can do about it.
352             let _ = kill_evt.write(1);
353         }
354     }
355 }
356 
357 impl VirtioDevice for Pmem {
358     fn device_type(&self) -> u32 {
359         self.common.device_type
360     }
361 
362     fn queue_max_sizes(&self) -> &[u16] {
363         &self.common.queue_sizes
364     }
365 
366     fn features(&self) -> u64 {
367         self.common.avail_features
368     }
369 
370     fn ack_features(&mut self, value: u64) {
371         self.common.ack_features(value)
372     }
373 
374     fn read_config(&self, offset: u64, data: &mut [u8]) {
375         self.read_config_from_slice(self.config.as_slice(), offset, data);
376     }
377 
378     fn activate(
379         &mut self,
380         _mem: GuestMemoryAtomic<GuestMemoryMmap>,
381         interrupt_cb: Arc<dyn VirtioInterrupt>,
382         mut queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>,
383         mut queue_evts: Vec<EventFd>,
384     ) -> ActivateResult {
385         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
386         let (kill_evt, pause_evt) = self.common.dup_eventfds();
387         if let Some(disk) = self.disk.as_ref() {
388             let disk = disk.try_clone().map_err(|e| {
389                 error!("failed cloning pmem disk: {}", e);
390                 ActivateError::BadActivate
391             })?;
392             let mut handler = PmemEpollHandler {
393                 queue: queues.remove(0),
394                 disk,
395                 interrupt_cb,
396                 queue_evt: queue_evts.remove(0),
397                 kill_evt,
398                 pause_evt,
399                 access_platform: self.common.access_platform.clone(),
400             };
401 
402             let paused = self.common.paused.clone();
403             let paused_sync = self.common.paused_sync.clone();
404             let mut epoll_threads = Vec::new();
405 
406             spawn_virtio_thread(
407                 &self.id,
408                 &self.seccomp_action,
409                 Thread::VirtioPmem,
410                 &mut epoll_threads,
411                 &self.exit_evt,
412                 move || {
413                     if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
414                         error!("Error running worker: {:?}", e);
415                     }
416                 },
417             )?;
418 
419             self.common.epoll_threads = Some(epoll_threads);
420 
421             event!("virtio-device", "activated", "id", &self.id);
422             return Ok(());
423         }
424         Err(ActivateError::BadActivate)
425     }
426 
427     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
428         let result = self.common.reset();
429         event!("virtio-device", "reset", "id", &self.id);
430         result
431     }
432 
433     fn userspace_mappings(&self) -> Vec<UserspaceMapping> {
434         vec![self.mapping.clone()]
435     }
436 
437     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
438         self.common.set_access_platform(access_platform)
439     }
440 }
441 
442 impl Pausable for Pmem {
443     fn pause(&mut self) -> result::Result<(), MigratableError> {
444         self.common.pause()
445     }
446 
447     fn resume(&mut self) -> result::Result<(), MigratableError> {
448         self.common.resume()
449     }
450 }
451 
452 impl Snapshottable for Pmem {
453     fn id(&self) -> String {
454         self.id.clone()
455     }
456 
457     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
458         Snapshot::new_from_versioned_state(&self.id, &self.state())
459     }
460 
461     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
462         self.set_state(&snapshot.to_versioned_state(&self.id)?);
463         Ok(())
464     }
465 }
466 
467 impl Transportable for Pmem {}
468 impl Migratable for Pmem {}
469