xref: /cloud-hypervisor/virtio-devices/src/pmem.rs (revision d10f20eb718023742143fa847a37f3d6114ead52)
1 // Copyright 2019 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Copyright © 2019 Intel Corporation
6 //
7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
8 
9 use super::Error as DeviceError;
10 use super::{
11     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
12     UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST,
13     VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1,
14 };
15 use crate::seccomp_filters::Thread;
16 use crate::thread_helper::spawn_virtio_thread;
17 use crate::{GuestMemoryMmap, MmapRegion};
18 use crate::{VirtioInterrupt, VirtioInterruptType};
19 use anyhow::anyhow;
20 use seccompiler::SeccompAction;
21 use serde::{Deserialize, Serialize};
22 use std::fs::File;
23 use std::io;
24 use std::mem::size_of;
25 use std::os::unix::io::AsRawFd;
26 use std::result;
27 use std::sync::atomic::AtomicBool;
28 use std::sync::{Arc, Barrier};
29 use thiserror::Error;
30 use virtio_queue::{DescriptorChain, Queue, QueueT};
31 use vm_memory::{
32     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
33     GuestMemoryError, GuestMemoryLoadGuard,
34 };
35 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
36 use vm_virtio::{AccessPlatform, Translatable};
37 use vmm_sys_util::eventfd::EventFd;
38 
39 const QUEUE_SIZE: u16 = 256;
40 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
41 
42 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
43 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
44 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
45 
46 // New descriptors are pending on the virtio queue.
47 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
48 
49 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)]
50 #[repr(C)]
51 struct VirtioPmemConfig {
52     start: u64,
53     size: u64,
54 }
55 
56 // SAFETY: it only has data and has no implicit padding.
57 unsafe impl ByteValued for VirtioPmemConfig {}
58 
59 #[derive(Copy, Clone, Debug, Default)]
60 #[repr(C)]
61 struct VirtioPmemReq {
62     type_: u32,
63 }
64 
65 // SAFETY: it only has data and has no implicit padding.
66 unsafe impl ByteValued for VirtioPmemReq {}
67 
68 #[derive(Copy, Clone, Debug, Default)]
69 #[repr(C)]
70 struct VirtioPmemResp {
71     ret: u32,
72 }
73 
74 // SAFETY: it only has data and has no implicit padding.
75 unsafe impl ByteValued for VirtioPmemResp {}
76 
77 #[derive(Error, Debug)]
78 enum Error {
79     #[error("Bad guest memory addresses: {0}")]
80     GuestMemory(GuestMemoryError),
81     #[error("Unexpected write-only descriptor")]
82     UnexpectedWriteOnlyDescriptor,
83     #[error("Unexpected read-only descriptor")]
84     UnexpectedReadOnlyDescriptor,
85     #[error("Descriptor chain too short")]
86     DescriptorChainTooShort,
87     #[error("Buffer length too small")]
88     BufferLengthTooSmall,
89     #[error("Invalid request")]
90     InvalidRequest,
91     #[error("Failed adding used index: {0}")]
92     QueueAddUsed(virtio_queue::Error),
93 }
94 
95 #[derive(Debug, PartialEq, Eq)]
96 enum RequestType {
97     Flush,
98 }
99 
100 struct Request {
101     type_: RequestType,
102     status_addr: GuestAddress,
103 }
104 
105 impl Request {
106     fn parse(
107         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
108         access_platform: Option<&Arc<dyn AccessPlatform>>,
109     ) -> result::Result<Request, Error> {
110         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
111         // The descriptor contains the request type which MUST be readable.
112         if desc.is_write_only() {
113             return Err(Error::UnexpectedWriteOnlyDescriptor);
114         }
115 
116         if desc.len() as usize != size_of::<VirtioPmemReq>() {
117             return Err(Error::InvalidRequest);
118         }
119 
120         let request: VirtioPmemReq = desc_chain
121             .memory()
122             .read_obj(
123                 desc.addr()
124                     .translate_gva(access_platform, desc.len() as usize),
125             )
126             .map_err(Error::GuestMemory)?;
127 
128         let request_type = match request.type_ {
129             VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush,
130             _ => return Err(Error::InvalidRequest),
131         };
132 
133         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
134 
135         // The status MUST always be writable
136         if !status_desc.is_write_only() {
137             return Err(Error::UnexpectedReadOnlyDescriptor);
138         }
139 
140         if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() {
141             return Err(Error::BufferLengthTooSmall);
142         }
143 
144         Ok(Request {
145             type_: request_type,
146             status_addr: status_desc
147                 .addr()
148                 .translate_gva(access_platform, status_desc.len() as usize),
149         })
150     }
151 }
152 
153 struct PmemEpollHandler {
154     mem: GuestMemoryAtomic<GuestMemoryMmap>,
155     queue: Queue,
156     disk: File,
157     interrupt_cb: Arc<dyn VirtioInterrupt>,
158     queue_evt: EventFd,
159     kill_evt: EventFd,
160     pause_evt: EventFd,
161     access_platform: Option<Arc<dyn AccessPlatform>>,
162 }
163 
164 impl PmemEpollHandler {
165     fn process_queue(&mut self) -> result::Result<bool, Error> {
166         let mut used_descs = false;
167         while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) {
168             let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) {
169                 Ok(ref req) if (req.type_ == RequestType::Flush) => {
170                     let status_code = match self.disk.sync_all() {
171                         Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK,
172                         Err(e) => {
173                             error!("failed flushing disk image: {}", e);
174                             VIRTIO_PMEM_RESP_TYPE_EIO
175                         }
176                     };
177 
178                     let resp = VirtioPmemResp { ret: status_code };
179                     match desc_chain.memory().write_obj(resp, req.status_addr) {
180                         Ok(_) => size_of::<VirtioPmemResp>() as u32,
181                         Err(e) => {
182                             error!("bad guest memory address: {}", e);
183                             0
184                         }
185                     }
186                 }
187                 Ok(ref req) => {
188                     // Currently, there is only one virtio-pmem request, FLUSH.
189                     error!("Invalid virtio request type {:?}", req.type_);
190                     0
191                 }
192                 Err(e) => {
193                     error!("Failed to parse available descriptor chain: {:?}", e);
194                     0
195                 }
196             };
197 
198             self.queue
199                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
200                 .map_err(Error::QueueAddUsed)?;
201             used_descs = true;
202         }
203 
204         Ok(used_descs)
205     }
206 
207     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
208         self.interrupt_cb
209             .trigger(VirtioInterruptType::Queue(0))
210             .map_err(|e| {
211                 error!("Failed to signal used queue: {:?}", e);
212                 DeviceError::FailedSignalingUsedQueue(e)
213             })
214     }
215 
216     fn run(
217         &mut self,
218         paused: Arc<AtomicBool>,
219         paused_sync: Arc<Barrier>,
220     ) -> result::Result<(), EpollHelperError> {
221         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
222         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
223         helper.run(paused, paused_sync, self)?;
224 
225         Ok(())
226     }
227 }
228 
229 impl EpollHelperHandler for PmemEpollHandler {
230     fn handle_event(
231         &mut self,
232         _helper: &mut EpollHelper,
233         event: &epoll::Event,
234     ) -> result::Result<(), EpollHelperError> {
235         let ev_type = event.data as u16;
236         match ev_type {
237             QUEUE_AVAIL_EVENT => {
238                 self.queue_evt.read().map_err(|e| {
239                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
240                 })?;
241 
242                 let needs_notification = self.process_queue().map_err(|e| {
243                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
244                 })?;
245 
246                 if needs_notification {
247                     self.signal_used_queue().map_err(|e| {
248                         EpollHelperError::HandleEvent(anyhow!(
249                             "Failed to signal used queue: {:?}",
250                             e
251                         ))
252                     })?;
253                 }
254             }
255             _ => {
256                 return Err(EpollHelperError::HandleEvent(anyhow!(
257                     "Unexpected event: {}",
258                     ev_type
259                 )));
260             }
261         }
262         Ok(())
263     }
264 }
265 
266 pub struct Pmem {
267     common: VirtioCommon,
268     id: String,
269     disk: Option<File>,
270     config: VirtioPmemConfig,
271     mapping: UserspaceMapping,
272     seccomp_action: SeccompAction,
273     exit_evt: EventFd,
274 
275     // Hold ownership of the memory that is allocated for the device
276     // which will be automatically dropped when the device is dropped
277     _region: MmapRegion,
278 }
279 
280 #[derive(Serialize, Deserialize)]
281 pub struct PmemState {
282     avail_features: u64,
283     acked_features: u64,
284     config: VirtioPmemConfig,
285 }
286 
287 impl Pmem {
288     #[allow(clippy::too_many_arguments)]
289     pub fn new(
290         id: String,
291         disk: File,
292         addr: GuestAddress,
293         mapping: UserspaceMapping,
294         _region: MmapRegion,
295         iommu: bool,
296         seccomp_action: SeccompAction,
297         exit_evt: EventFd,
298         state: Option<PmemState>,
299     ) -> io::Result<Pmem> {
300         let (avail_features, acked_features, config, paused) = if let Some(state) = state {
301             info!("Restoring virtio-pmem {}", id);
302             (
303                 state.avail_features,
304                 state.acked_features,
305                 state.config,
306                 true,
307             )
308         } else {
309             let config = VirtioPmemConfig {
310                 start: addr.raw_value().to_le(),
311                 size: (_region.size() as u64).to_le(),
312             };
313 
314             let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
315 
316             if iommu {
317                 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
318             }
319             (avail_features, 0, config, false)
320         };
321 
322         Ok(Pmem {
323             common: VirtioCommon {
324                 device_type: VirtioDeviceType::Pmem as u32,
325                 queue_sizes: QUEUE_SIZES.to_vec(),
326                 paused_sync: Some(Arc::new(Barrier::new(2))),
327                 avail_features,
328                 acked_features,
329                 min_queues: 1,
330                 paused: Arc::new(AtomicBool::new(paused)),
331                 ..Default::default()
332             },
333             id,
334             disk: Some(disk),
335             config,
336             mapping,
337             seccomp_action,
338             _region,
339             exit_evt,
340         })
341     }
342 
343     fn state(&self) -> PmemState {
344         PmemState {
345             avail_features: self.common.avail_features,
346             acked_features: self.common.acked_features,
347             config: self.config,
348         }
349     }
350 
351     #[cfg(fuzzing)]
352     pub fn wait_for_epoll_threads(&mut self) {
353         self.common.wait_for_epoll_threads();
354     }
355 }
356 
357 impl Drop for Pmem {
358     fn drop(&mut self) {
359         if let Some(kill_evt) = self.common.kill_evt.take() {
360             // Ignore the result because there is nothing we can do about it.
361             let _ = kill_evt.write(1);
362         }
363         self.common.wait_for_epoll_threads();
364     }
365 }
366 
367 impl VirtioDevice for Pmem {
368     fn device_type(&self) -> u32 {
369         self.common.device_type
370     }
371 
372     fn queue_max_sizes(&self) -> &[u16] {
373         &self.common.queue_sizes
374     }
375 
376     fn features(&self) -> u64 {
377         self.common.avail_features
378     }
379 
380     fn ack_features(&mut self, value: u64) {
381         self.common.ack_features(value)
382     }
383 
384     fn read_config(&self, offset: u64, data: &mut [u8]) {
385         self.read_config_from_slice(self.config.as_slice(), offset, data);
386     }
387 
388     fn activate(
389         &mut self,
390         mem: GuestMemoryAtomic<GuestMemoryMmap>,
391         interrupt_cb: Arc<dyn VirtioInterrupt>,
392         mut queues: Vec<(usize, Queue, EventFd)>,
393     ) -> ActivateResult {
394         self.common.activate(&queues, &interrupt_cb)?;
395         let (kill_evt, pause_evt) = self.common.dup_eventfds();
396         if let Some(disk) = self.disk.as_ref() {
397             let disk = disk.try_clone().map_err(|e| {
398                 error!("failed cloning pmem disk: {}", e);
399                 ActivateError::BadActivate
400             })?;
401 
402             let (_, queue, queue_evt) = queues.remove(0);
403 
404             let mut handler = PmemEpollHandler {
405                 mem,
406                 queue,
407                 disk,
408                 interrupt_cb,
409                 queue_evt,
410                 kill_evt,
411                 pause_evt,
412                 access_platform: self.common.access_platform.clone(),
413             };
414 
415             let paused = self.common.paused.clone();
416             let paused_sync = self.common.paused_sync.clone();
417             let mut epoll_threads = Vec::new();
418 
419             spawn_virtio_thread(
420                 &self.id,
421                 &self.seccomp_action,
422                 Thread::VirtioPmem,
423                 &mut epoll_threads,
424                 &self.exit_evt,
425                 move || handler.run(paused, paused_sync.unwrap()),
426             )?;
427 
428             self.common.epoll_threads = Some(epoll_threads);
429 
430             event!("virtio-device", "activated", "id", &self.id);
431             return Ok(());
432         }
433         Err(ActivateError::BadActivate)
434     }
435 
436     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
437         let result = self.common.reset();
438         event!("virtio-device", "reset", "id", &self.id);
439         result
440     }
441 
442     fn userspace_mappings(&self) -> Vec<UserspaceMapping> {
443         vec![self.mapping.clone()]
444     }
445 
446     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
447         self.common.set_access_platform(access_platform)
448     }
449 }
450 
451 impl Pausable for Pmem {
452     fn pause(&mut self) -> result::Result<(), MigratableError> {
453         self.common.pause()
454     }
455 
456     fn resume(&mut self) -> result::Result<(), MigratableError> {
457         self.common.resume()
458     }
459 }
460 
461 impl Snapshottable for Pmem {
462     fn id(&self) -> String {
463         self.id.clone()
464     }
465 
466     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
467         Snapshot::new_from_state(&self.state())
468     }
469 }
470 
471 impl Transportable for Pmem {}
472 impl Migratable for Pmem {}
473