xref: /cloud-hypervisor/virtio-devices/src/pmem.rs (revision 61e57e1cb149de03ae1e0b799b9e5ba9a4a63ace)
1 // Copyright 2019 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Copyright © 2019 Intel Corporation
6 //
7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
8 
9 use std::fs::File;
10 use std::mem::size_of;
11 use std::os::unix::io::AsRawFd;
12 use std::sync::atomic::AtomicBool;
13 use std::sync::{Arc, Barrier};
14 use std::{io, result};
15 
16 use anyhow::anyhow;
17 use seccompiler::SeccompAction;
18 use serde::{Deserialize, Serialize};
19 use thiserror::Error;
20 use virtio_queue::{DescriptorChain, Queue, QueueT};
21 use vm_memory::{
22     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
23     GuestMemoryError, GuestMemoryLoadGuard,
24 };
25 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
26 use vm_virtio::{AccessPlatform, Translatable};
27 use vmm_sys_util::eventfd::EventFd;
28 
29 use super::{
30     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler,
31     Error as DeviceError, UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType,
32     EPOLL_HELPER_EVENT_LAST, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1,
33 };
34 use crate::seccomp_filters::Thread;
35 use crate::thread_helper::spawn_virtio_thread;
36 use crate::{GuestMemoryMmap, MmapRegion, VirtioInterrupt, VirtioInterruptType};
37 
38 const QUEUE_SIZE: u16 = 256;
39 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
40 
41 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
42 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
43 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
44 
45 // New descriptors are pending on the virtio queue.
46 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
47 
48 #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)]
49 #[repr(C)]
50 struct VirtioPmemConfig {
51     start: u64,
52     size: u64,
53 }
54 
55 // SAFETY: it only has data and has no implicit padding.
56 unsafe impl ByteValued for VirtioPmemConfig {}
57 
58 #[derive(Copy, Clone, Debug, Default)]
59 #[repr(C)]
60 struct VirtioPmemReq {
61     type_: u32,
62 }
63 
64 // SAFETY: it only has data and has no implicit padding.
65 unsafe impl ByteValued for VirtioPmemReq {}
66 
67 #[derive(Copy, Clone, Debug, Default)]
68 #[repr(C)]
69 struct VirtioPmemResp {
70     ret: u32,
71 }
72 
73 // SAFETY: it only has data and has no implicit padding.
74 unsafe impl ByteValued for VirtioPmemResp {}
75 
76 #[derive(Error, Debug)]
77 enum Error {
78     #[error("Bad guest memory addresses: {0}")]
79     GuestMemory(GuestMemoryError),
80     #[error("Unexpected write-only descriptor")]
81     UnexpectedWriteOnlyDescriptor,
82     #[error("Unexpected read-only descriptor")]
83     UnexpectedReadOnlyDescriptor,
84     #[error("Descriptor chain too short")]
85     DescriptorChainTooShort,
86     #[error("Buffer length too small")]
87     BufferLengthTooSmall,
88     #[error("Invalid request")]
89     InvalidRequest,
90     #[error("Failed adding used index: {0}")]
91     QueueAddUsed(virtio_queue::Error),
92 }
93 
94 #[derive(Debug, PartialEq, Eq)]
95 enum RequestType {
96     Flush,
97 }
98 
99 struct Request {
100     type_: RequestType,
101     status_addr: GuestAddress,
102 }
103 
104 impl Request {
105     fn parse(
106         desc_chain: &mut DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>,
107         access_platform: Option<&Arc<dyn AccessPlatform>>,
108     ) -> result::Result<Request, Error> {
109         let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
110         // The descriptor contains the request type which MUST be readable.
111         if desc.is_write_only() {
112             return Err(Error::UnexpectedWriteOnlyDescriptor);
113         }
114 
115         if desc.len() as usize != size_of::<VirtioPmemReq>() {
116             return Err(Error::InvalidRequest);
117         }
118 
119         let request: VirtioPmemReq = desc_chain
120             .memory()
121             .read_obj(
122                 desc.addr()
123                     .translate_gva(access_platform, desc.len() as usize),
124             )
125             .map_err(Error::GuestMemory)?;
126 
127         let request_type = match request.type_ {
128             VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush,
129             _ => return Err(Error::InvalidRequest),
130         };
131 
132         let status_desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?;
133 
134         // The status MUST always be writable
135         if !status_desc.is_write_only() {
136             return Err(Error::UnexpectedReadOnlyDescriptor);
137         }
138 
139         if (status_desc.len() as usize) < size_of::<VirtioPmemResp>() {
140             return Err(Error::BufferLengthTooSmall);
141         }
142 
143         Ok(Request {
144             type_: request_type,
145             status_addr: status_desc
146                 .addr()
147                 .translate_gva(access_platform, status_desc.len() as usize),
148         })
149     }
150 }
151 
152 struct PmemEpollHandler {
153     mem: GuestMemoryAtomic<GuestMemoryMmap>,
154     queue: Queue,
155     disk: File,
156     interrupt_cb: Arc<dyn VirtioInterrupt>,
157     queue_evt: EventFd,
158     kill_evt: EventFd,
159     pause_evt: EventFd,
160     access_platform: Option<Arc<dyn AccessPlatform>>,
161 }
162 
163 impl PmemEpollHandler {
164     fn process_queue(&mut self) -> result::Result<bool, Error> {
165         let mut used_descs = false;
166         while let Some(mut desc_chain) = self.queue.pop_descriptor_chain(self.mem.memory()) {
167             let len = match Request::parse(&mut desc_chain, self.access_platform.as_ref()) {
168                 Ok(ref req) if (req.type_ == RequestType::Flush) => {
169                     let status_code = match self.disk.sync_all() {
170                         Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK,
171                         Err(e) => {
172                             error!("failed flushing disk image: {}", e);
173                             VIRTIO_PMEM_RESP_TYPE_EIO
174                         }
175                     };
176 
177                     let resp = VirtioPmemResp { ret: status_code };
178                     match desc_chain.memory().write_obj(resp, req.status_addr) {
179                         Ok(_) => size_of::<VirtioPmemResp>() as u32,
180                         Err(e) => {
181                             error!("bad guest memory address: {}", e);
182                             0
183                         }
184                     }
185                 }
186                 Ok(ref req) => {
187                     // Currently, there is only one virtio-pmem request, FLUSH.
188                     error!("Invalid virtio request type {:?}", req.type_);
189                     0
190                 }
191                 Err(e) => {
192                     error!("Failed to parse available descriptor chain: {:?}", e);
193                     0
194                 }
195             };
196 
197             self.queue
198                 .add_used(desc_chain.memory(), desc_chain.head_index(), len)
199                 .map_err(Error::QueueAddUsed)?;
200             used_descs = true;
201         }
202 
203         Ok(used_descs)
204     }
205 
206     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
207         self.interrupt_cb
208             .trigger(VirtioInterruptType::Queue(0))
209             .map_err(|e| {
210                 error!("Failed to signal used queue: {:?}", e);
211                 DeviceError::FailedSignalingUsedQueue(e)
212             })
213     }
214 
215     fn run(
216         &mut self,
217         paused: Arc<AtomicBool>,
218         paused_sync: Arc<Barrier>,
219     ) -> result::Result<(), EpollHelperError> {
220         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
221         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
222         helper.run(paused, paused_sync, self)?;
223 
224         Ok(())
225     }
226 }
227 
228 impl EpollHelperHandler for PmemEpollHandler {
229     fn handle_event(
230         &mut self,
231         _helper: &mut EpollHelper,
232         event: &epoll::Event,
233     ) -> result::Result<(), EpollHelperError> {
234         let ev_type = event.data as u16;
235         match ev_type {
236             QUEUE_AVAIL_EVENT => {
237                 self.queue_evt.read().map_err(|e| {
238                     EpollHelperError::HandleEvent(anyhow!("Failed to get queue event: {:?}", e))
239                 })?;
240 
241                 let needs_notification = self.process_queue().map_err(|e| {
242                     EpollHelperError::HandleEvent(anyhow!("Failed to process queue : {:?}", e))
243                 })?;
244 
245                 if needs_notification {
246                     self.signal_used_queue().map_err(|e| {
247                         EpollHelperError::HandleEvent(anyhow!(
248                             "Failed to signal used queue: {:?}",
249                             e
250                         ))
251                     })?;
252                 }
253             }
254             _ => {
255                 return Err(EpollHelperError::HandleEvent(anyhow!(
256                     "Unexpected event: {}",
257                     ev_type
258                 )));
259             }
260         }
261         Ok(())
262     }
263 }
264 
265 pub struct Pmem {
266     common: VirtioCommon,
267     id: String,
268     disk: Option<File>,
269     config: VirtioPmemConfig,
270     mapping: UserspaceMapping,
271     seccomp_action: SeccompAction,
272     exit_evt: EventFd,
273 
274     // Hold ownership of the memory that is allocated for the device
275     // which will be automatically dropped when the device is dropped
276     _region: MmapRegion,
277 }
278 
279 #[derive(Serialize, Deserialize)]
280 pub struct PmemState {
281     avail_features: u64,
282     acked_features: u64,
283     config: VirtioPmemConfig,
284 }
285 
286 impl Pmem {
287     #[allow(clippy::too_many_arguments)]
288     pub fn new(
289         id: String,
290         disk: File,
291         addr: GuestAddress,
292         mapping: UserspaceMapping,
293         _region: MmapRegion,
294         iommu: bool,
295         seccomp_action: SeccompAction,
296         exit_evt: EventFd,
297         state: Option<PmemState>,
298     ) -> io::Result<Pmem> {
299         let (avail_features, acked_features, config, paused) = if let Some(state) = state {
300             info!("Restoring virtio-pmem {}", id);
301             (
302                 state.avail_features,
303                 state.acked_features,
304                 state.config,
305                 true,
306             )
307         } else {
308             let config = VirtioPmemConfig {
309                 start: addr.raw_value().to_le(),
310                 size: (_region.size() as u64).to_le(),
311             };
312 
313             let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
314 
315             if iommu {
316                 avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
317             }
318             (avail_features, 0, config, false)
319         };
320 
321         Ok(Pmem {
322             common: VirtioCommon {
323                 device_type: VirtioDeviceType::Pmem as u32,
324                 queue_sizes: QUEUE_SIZES.to_vec(),
325                 paused_sync: Some(Arc::new(Barrier::new(2))),
326                 avail_features,
327                 acked_features,
328                 min_queues: 1,
329                 paused: Arc::new(AtomicBool::new(paused)),
330                 ..Default::default()
331             },
332             id,
333             disk: Some(disk),
334             config,
335             mapping,
336             seccomp_action,
337             _region,
338             exit_evt,
339         })
340     }
341 
342     fn state(&self) -> PmemState {
343         PmemState {
344             avail_features: self.common.avail_features,
345             acked_features: self.common.acked_features,
346             config: self.config,
347         }
348     }
349 
350     #[cfg(fuzzing)]
351     pub fn wait_for_epoll_threads(&mut self) {
352         self.common.wait_for_epoll_threads();
353     }
354 }
355 
356 impl Drop for Pmem {
357     fn drop(&mut self) {
358         if let Some(kill_evt) = self.common.kill_evt.take() {
359             // Ignore the result because there is nothing we can do about it.
360             let _ = kill_evt.write(1);
361         }
362         self.common.wait_for_epoll_threads();
363     }
364 }
365 
366 impl VirtioDevice for Pmem {
367     fn device_type(&self) -> u32 {
368         self.common.device_type
369     }
370 
371     fn queue_max_sizes(&self) -> &[u16] {
372         &self.common.queue_sizes
373     }
374 
375     fn features(&self) -> u64 {
376         self.common.avail_features
377     }
378 
379     fn ack_features(&mut self, value: u64) {
380         self.common.ack_features(value)
381     }
382 
383     fn read_config(&self, offset: u64, data: &mut [u8]) {
384         self.read_config_from_slice(self.config.as_slice(), offset, data);
385     }
386 
387     fn activate(
388         &mut self,
389         mem: GuestMemoryAtomic<GuestMemoryMmap>,
390         interrupt_cb: Arc<dyn VirtioInterrupt>,
391         mut queues: Vec<(usize, Queue, EventFd)>,
392     ) -> ActivateResult {
393         self.common.activate(&queues, &interrupt_cb)?;
394         let (kill_evt, pause_evt) = self.common.dup_eventfds();
395         if let Some(disk) = self.disk.as_ref() {
396             let disk = disk.try_clone().map_err(|e| {
397                 error!("failed cloning pmem disk: {}", e);
398                 ActivateError::BadActivate
399             })?;
400 
401             let (_, queue, queue_evt) = queues.remove(0);
402 
403             let mut handler = PmemEpollHandler {
404                 mem,
405                 queue,
406                 disk,
407                 interrupt_cb,
408                 queue_evt,
409                 kill_evt,
410                 pause_evt,
411                 access_platform: self.common.access_platform.clone(),
412             };
413 
414             let paused = self.common.paused.clone();
415             let paused_sync = self.common.paused_sync.clone();
416             let mut epoll_threads = Vec::new();
417 
418             spawn_virtio_thread(
419                 &self.id,
420                 &self.seccomp_action,
421                 Thread::VirtioPmem,
422                 &mut epoll_threads,
423                 &self.exit_evt,
424                 move || handler.run(paused, paused_sync.unwrap()),
425             )?;
426 
427             self.common.epoll_threads = Some(epoll_threads);
428 
429             event!("virtio-device", "activated", "id", &self.id);
430             return Ok(());
431         }
432         Err(ActivateError::BadActivate)
433     }
434 
435     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
436         let result = self.common.reset();
437         event!("virtio-device", "reset", "id", &self.id);
438         result
439     }
440 
441     fn userspace_mappings(&self) -> Vec<UserspaceMapping> {
442         vec![self.mapping.clone()]
443     }
444 
445     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
446         self.common.set_access_platform(access_platform)
447     }
448 }
449 
450 impl Pausable for Pmem {
451     fn pause(&mut self) -> result::Result<(), MigratableError> {
452         self.common.pause()
453     }
454 
455     fn resume(&mut self) -> result::Result<(), MigratableError> {
456         self.common.resume()
457     }
458 }
459 
460 impl Snapshottable for Pmem {
461     fn id(&self) -> String {
462         self.id.clone()
463     }
464 
465     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
466         Snapshot::new_from_state(&self.state())
467     }
468 }
469 
470 impl Transportable for Pmem {}
471 impl Migratable for Pmem {}
472