xref: /cloud-hypervisor/virtio-devices/src/pmem.rs (revision 5e52729453cb62edbe4fb3a4aa24f8cca31e667e)
1 // Copyright 2019 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Copyright © 2019 Intel Corporation
6 //
7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
8 
9 use super::Error as DeviceError;
10 use super::{
11     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
12     UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST,
13     VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1,
14 };
15 use crate::seccomp_filters::Thread;
16 use crate::thread_helper::spawn_virtio_thread;
17 use crate::{GuestMemoryMmap, MmapRegion};
18 use crate::{VirtioInterrupt, VirtioInterruptType};
19 use anyhow::anyhow;
20 use seccompiler::SeccompAction;
21 use std::fs::File;
22 use std::io;
23 use std::mem::size_of;
24 use std::os::unix::io::AsRawFd;
25 use std::result;
26 use std::sync::atomic::AtomicBool;
27 use std::sync::{Arc, Barrier};
28 use thiserror::Error;
29 use versionize::{VersionMap, Versionize, VersionizeResult};
30 use versionize_derive::Versionize;
31 use virtio_queue::{DescriptorChain, Queue, QueueT};
32 use vm_memory::{
33     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
34     GuestMemoryError, GuestMemoryLoadGuard,
35 };
36 use vm_migration::VersionMapped;
37 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
38 use vm_virtio::{AccessPlatform, Translatable};
39 use vmm_sys_util::eventfd::EventFd;
40 
41 const QUEUE_SIZE: u16 = 256;
42 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
43 
44 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
45 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
46 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
47 
48 // New descriptors are pending on the virtio queue.
49 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
50 
51 #[derive(Copy, Clone, Debug, Default, Versionize)]
52 #[repr(C)]
53 struct VirtioPmemConfig {
54     start: u64,
55     size: u64,
56 }
57 
58 // SAFETY: it only has data and has no implicit padding.
59 unsafe impl ByteValued for VirtioPmemConfig {}
60 
61 #[derive(Copy, Clone, Debug, Default)]
62 #[repr(C)]
63 struct VirtioPmemReq {
64     type_: u32,
65 }
66 
67 // SAFETY: it only has data and has no implicit padding.
68 unsafe impl ByteValued for VirtioPmemReq {}
69 
70 #[derive(Copy, Clone, Debug, Default)]
71 #[repr(C)]
72 struct VirtioPmemResp {
73     ret: u32,
74 }
75 
76 // SAFETY: it only has data and has no implicit padding.
77 unsafe impl ByteValued for VirtioPmemResp {}
78 
79 #[derive(Error, Debug)]
80 enum Error {
81     #[error("Bad guest memory addresses: {0}")]
82     GuestMemory(GuestMemoryError),
83     #[error("Unexpected write-only descriptor")]
84     UnexpectedWriteOnlyDescriptor,
85     #[error("Unexpected read-only descriptor")]
86     UnexpectedReadOnlyDescriptor,
87     #[error("Descriptor chain too short")]
88     DescriptorChainTooShort,
89     #[error("Buffer length too small")]
90     BufferLengthTooSmall,
91     #[error("Invalid request")]
92     InvalidRequest,
93     #[error("Failed adding used index: {0}")]
94     QueueAddUsed(virtio_queue::Error),
95 }
96 
97 #[derive(Debug, PartialEq, Eq)]
98 enum RequestType {
99     Flush,
100 }
101 
102 struct Request {
103     type_: RequestType,
104     status_addr: GuestAddress,
105 }
106 
107 impl Request {
108     fn parse(
109         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
110         access_platform: Option<&Arc<dyn AccessPlatform>>,
111     ) -> result::Result<Request, Error> {
112         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
113         // The descriptor contains the request type which MUST be readable.
114         if desc.is_write_only() {
115             return Err(Error::UnexpectedWriteOnlyDescriptor);
116         }
117 
118         if desc.len() as usize != size_of::<VirtioPmemReq>() {
119             return Err(Error::InvalidRequest);
120         }
121 
122         let request: VirtioPmemReq = desc_chain
123             .memory()
124             .read_obj(
125                 desc.addr()
126                     .translate_gva(access_platform, desc.len() as usize),
127             )
128             .map_err(Error::GuestMemory)?;
129 
130         let request_type = match request.type_ {
131             VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush,
132             _ => return Err(Error::InvalidRequest),
133         };
134 
135         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
136 
137         // The status MUST always be writable
138         if !status_desc.is_write_only() {
139             return Err(Error::UnexpectedReadOnlyDescriptor);
140         }
141 
142         if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() {
143             return Err(Error::BufferLengthTooSmall);
144         }
145 
146         Ok(Request {
147             type_: request_type,
148             status_addr: status_desc
149                 .addr()
150                 .translate_gva(access_platform, status_desc.len() as usize),
151         })
152     }
153 }
154 
155 struct PmemEpollHandler {
156     mem: GuestMemoryAtomic<GuestMemoryMmap>,
157     queue: Queue,
158     disk: File,
159     interrupt_cb: Arc<dyn VirtioInterrupt>,
160     queue_evt: EventFd,
161     kill_evt: EventFd,
162     pause_evt: EventFd,
163     access_platform: Option<Arc<dyn AccessPlatform>>,
164 }
165 
166 impl PmemEpollHandler {
167     fn process_queue(&mut self) -> result::Result<bool, Error> {
168         let mut used_descs = false;
169         while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) {
170             let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) {
171                 Ok(ref req) if (req.type_ == RequestType::Flush) => {
172                     let status_code = match self.disk.sync_all() {
173                         Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK,
174                         Err(e) => {
175                             error!("failed flushing disk image: {}", e);
176                             VIRTIO_PMEM_RESP_TYPE_EIO
177                         }
178                     };
179 
180                     let resp = VirtioPmemResp { ret: status_code };
181                     match desc_chain.memory().write_obj(resp, req.status_addr) {
182                         Ok(_) => size_of::<VirtioPmemResp>() as u32,
183                         Err(e) => {
184                             error!("bad guest memory address: {}", e);
185                             0
186                         }
187                     }
188                 }
189                 Ok(ref req) => {
190                     // Currently, there is only one virtio-pmem request, FLUSH.
191                     error!("Invalid virtio request type {:?}", req.type_);
192                     0
193                 }
194                 Err(e) => {
195                     error!("Failed to parse available descriptor chain: {:?}", e);
196                     0
197                 }
198             };
199 
200             self.queue
201                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
202                 .map_err(Error::QueueAddUsed)?;
203             used_descs = true;
204         }
205 
206         Ok(used_descs)
207     }
208 
209     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
210         self.interrupt_cb
211             .trigger(VirtioInterruptType::Queue(0))
212             .map_err(|e| {
213                 error!("Failed to signal used queue: {:?}", e);
214                 DeviceError::FailedSignalingUsedQueue(e)
215             })
216     }
217 
218     fn run(
219         &mut self,
220         paused: Arc<AtomicBool>,
221         paused_sync: Arc<Barrier>,
222     ) -> result::Result<(), EpollHelperError> {
223         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
224         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
225         helper.run(paused, paused_sync, self)?;
226 
227         Ok(())
228     }
229 }
230 
231 impl EpollHelperHandler for PmemEpollHandler {
232     fn handle_event(
233         &mut self,
234         _helper: &mut EpollHelper,
235         event: &epoll::Event,
236     ) -> result::Result<(), EpollHelperError> {
237         let ev_type = event.data as u16;
238         match ev_type {
239             QUEUE_AVAIL_EVENT => {
240                 self.queue_evt.read().map_err(|e| {
241                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
242                 })?;
243 
244                 let needs_notification = self.process_queue().map_err(|e| {
245                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
246                 })?;
247 
248                 if needs_notification {
249                     self.signal_used_queue().map_err(|e| {
250                         EpollHelperError::HandleEvent(anyhow!(
251                             "Failed to signal used queue: {:?}",
252                             e
253                         ))
254                     })?;
255                 }
256             }
257             _ => {
258                 return Err(EpollHelperError::HandleEvent(anyhow!(
259                     "Unexpected event: {}",
260                     ev_type
261                 )));
262             }
263         }
264         Ok(())
265     }
266 }
267 
268 pub struct Pmem {
269     common: VirtioCommon,
270     id: String,
271     disk: Option<File>,
272     config: VirtioPmemConfig,
273     mapping: UserspaceMapping,
274     seccomp_action: SeccompAction,
275     exit_evt: EventFd,
276 
277     // Hold ownership of the memory that is allocated for the device
278     // which will be automatically dropped when the device is dropped
279     _region: MmapRegion,
280 }
281 
282 #[derive(Versionize)]
283 pub struct PmemState {
284     avail_features: u64,
285     acked_features: u64,
286     config: VirtioPmemConfig,
287 }
288 
289 impl VersionMapped for PmemState {}
290 
291 impl Pmem {
292     #[allow(clippy::too_many_arguments)]
293     pub fn new(
294         id: String,
295         disk: File,
296         addr: GuestAddress,
297         mapping: UserspaceMapping,
298         _region: MmapRegion,
299         iommu: bool,
300         seccomp_action: SeccompAction,
301         exit_evt: EventFd,
302         state: Option<PmemState>,
303     ) -> io::Result<Pmem> {
304         let (avail_features, acked_features, config, paused) = if let Some(state) = state {
305             info!("Restoring virtio-pmem {}", id);
306             (
307                 state.avail_features,
308                 state.acked_features,
309                 state.config,
310                 true,
311             )
312         } else {
313             let config = VirtioPmemConfig {
314                 start: addr.raw_value().to_le(),
315                 size: (_region.size() as u64).to_le(),
316             };
317 
318             let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
319 
320             if iommu {
321                 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
322             }
323             (avail_features, 0, config, false)
324         };
325 
326         Ok(Pmem {
327             common: VirtioCommon {
328                 device_type: VirtioDeviceType::Pmem as u32,
329                 queue_sizes: QUEUE_SIZES.to_vec(),
330                 paused_sync: Some(Arc::new(Barrier::new(2))),
331                 avail_features,
332                 acked_features,
333                 min_queues: 1,
334                 paused: Arc::new(AtomicBool::new(paused)),
335                 ..Default::default()
336             },
337             id,
338             disk: Some(disk),
339             config,
340             mapping,
341             seccomp_action,
342             _region,
343             exit_evt,
344         })
345     }
346 
347     fn state(&self) -> PmemState {
348         PmemState {
349             avail_features: self.common.avail_features,
350             acked_features: self.common.acked_features,
351             config: self.config,
352         }
353     }
354 
355     #[cfg(fuzzing)]
356     pub fn wait_for_epoll_threads(&mut self) {
357         self.common.wait_for_epoll_threads();
358     }
359 }
360 
361 impl Drop for Pmem {
362     fn drop(&mut self) {
363         if let Some(kill_evt) = self.common.kill_evt.take() {
364             // Ignore the result because there is nothing we can do about it.
365             let _ = kill_evt.write(1);
366         }
367     }
368 }
369 
370 impl VirtioDevice for Pmem {
371     fn device_type(&self) -> u32 {
372         self.common.device_type
373     }
374 
375     fn queue_max_sizes(&self) -> &[u16] {
376         &self.common.queue_sizes
377     }
378 
379     fn features(&self) -> u64 {
380         self.common.avail_features
381     }
382 
383     fn ack_features(&mut self, value: u64) {
384         self.common.ack_features(value)
385     }
386 
387     fn read_config(&self, offset: u64, data: &mut [u8]) {
388         self.read_config_from_slice(self.config.as_slice(), offset, data);
389     }
390 
391     fn activate(
392         &mut self,
393         mem: GuestMemoryAtomic<GuestMemoryMmap>,
394         interrupt_cb: Arc<dyn VirtioInterrupt>,
395         mut queues: Vec<(usize, Queue, EventFd)>,
396     ) -> ActivateResult {
397         self.common.activate(&queues, &interrupt_cb)?;
398         let (kill_evt, pause_evt) = self.common.dup_eventfds();
399         if let Some(disk) = self.disk.as_ref() {
400             let disk = disk.try_clone().map_err(|e| {
401                 error!("failed cloning pmem disk: {}", e);
402                 ActivateError::BadActivate
403             })?;
404 
405             let (_, queue, queue_evt) = queues.remove(0);
406 
407             let mut handler = PmemEpollHandler {
408                 mem,
409                 queue,
410                 disk,
411                 interrupt_cb,
412                 queue_evt,
413                 kill_evt,
414                 pause_evt,
415                 access_platform: self.common.access_platform.clone(),
416             };
417 
418             let paused = self.common.paused.clone();
419             let paused_sync = self.common.paused_sync.clone();
420             let mut epoll_threads = Vec::new();
421 
422             spawn_virtio_thread(
423                 &self.id,
424                 &self.seccomp_action,
425                 Thread::VirtioPmem,
426                 &mut epoll_threads,
427                 &self.exit_evt,
428                 move || handler.run(paused, paused_sync.unwrap()),
429             )?;
430 
431             self.common.epoll_threads = Some(epoll_threads);
432 
433             event!("virtio-device", "activated", "id", &self.id);
434             return Ok(());
435         }
436         Err(ActivateError::BadActivate)
437     }
438 
439     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
440         let result = self.common.reset();
441         event!("virtio-device", "reset", "id", &self.id);
442         result
443     }
444 
445     fn userspace_mappings(&self) -> Vec<UserspaceMapping> {
446         vec![self.mapping.clone()]
447     }
448 
449     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
450         self.common.set_access_platform(access_platform)
451     }
452 }
453 
454 impl Pausable for Pmem {
455     fn pause(&mut self) -> result::Result<(), MigratableError> {
456         self.common.pause()
457     }
458 
459     fn resume(&mut self) -> result::Result<(), MigratableError> {
460         self.common.resume()
461     }
462 }
463 
464 impl Snapshottable for Pmem {
465     fn id(&self) -> String {
466         self.id.clone()
467     }
468 
469     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
470         Snapshot::new_from_versioned_state(&self.state())
471     }
472 }
473 
474 impl Transportable for Pmem {}
475 impl Migratable for Pmem {}
476