xref: /cloud-hypervisor/virtio-devices/src/pmem.rs (revision 9af2968a7dc47b89bf07ea9dc5e735084efcfa3a)
1 // Copyright 2019 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Copyright © 2019 Intel Corporation
6 //
7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
8 
9 use super::Error as DeviceError;
10 use super::{
11     ActivateError, ActivateResult, DescriptorChain, EpollHelper, EpollHelperError,
12     EpollHelperHandler, Queue, UserspaceMapping, VirtioCommon, VirtioDevice, VirtioDeviceType,
13     EPOLL_HELPER_EVENT_LAST, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_VERSION_1,
14 };
15 use crate::seccomp_filters::{get_seccomp_filter, Thread};
16 use crate::{GuestMemoryMmap, MmapRegion};
17 use crate::{VirtioInterrupt, VirtioInterruptType};
18 use seccomp::{SeccompAction, SeccompFilter};
19 use std::fmt::{self, Display};
20 use std::fs::File;
21 use std::io;
22 use std::mem::size_of;
23 use std::os::unix::io::AsRawFd;
24 use std::result;
25 use std::sync::atomic::AtomicBool;
26 use std::sync::{Arc, Barrier};
27 use std::thread;
28 use versionize::{VersionMap, Versionize, VersionizeResult};
29 use versionize_derive::Versionize;
30 use vm_memory::{
31     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
32     GuestMemoryError,
33 };
34 use vm_migration::VersionMapped;
35 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
36 use vmm_sys_util::eventfd::EventFd;
37 
38 const QUEUE_SIZE: u16 = 256;
39 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
40 
41 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
42 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
43 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
44 
45 // New descriptors are pending on the virtio queue.
46 const QUEUE_AVAIL_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
47 
48 #[derive(Copy, Clone, Debug, Default, Versionize)]
49 #[repr(C)]
50 struct VirtioPmemConfig {
51     start: u64,
52     size: u64,
53 }
54 
55 // Safe because it only has data and has no implicit padding.
56 unsafe impl ByteValued for VirtioPmemConfig {}
57 
58 #[derive(Copy, Clone, Debug, Default)]
59 #[repr(C)]
60 struct VirtioPmemReq {
61     type_: u32,
62 }
63 
64 // Safe because it only has data and has no implicit padding.
65 unsafe impl ByteValued for VirtioPmemReq {}
66 
67 #[derive(Copy, Clone, Debug, Default)]
68 #[repr(C)]
69 struct VirtioPmemResp {
70     ret: u32,
71 }
72 
73 // Safe because it only has data and has no implicit padding.
74 unsafe impl ByteValued for VirtioPmemResp {}
75 
76 #[derive(Debug)]
77 enum Error {
78     /// Guest gave us bad memory addresses.
79     GuestMemory(GuestMemoryError),
80     /// Guest gave us a write only descriptor that protocol says to read from.
81     UnexpectedWriteOnlyDescriptor,
82     /// Guest gave us a read only descriptor that protocol says to write to.
83     UnexpectedReadOnlyDescriptor,
84     /// Guest gave us too few descriptors in a descriptor chain.
85     DescriptorChainTooShort,
86     /// Guest gave us a buffer that was too short to use.
87     BufferLengthTooSmall,
88     /// Guest sent us invalid request.
89     InvalidRequest,
90 }
91 
92 impl Display for Error {
93     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
94         use self::Error::*;
95 
96         match self {
97             BufferLengthTooSmall => write!(f, "buffer length too small"),
98             DescriptorChainTooShort => write!(f, "descriptor chain too short"),
99             GuestMemory(e) => write!(f, "bad guest memory address: {}", e),
100             InvalidRequest => write!(f, "invalid request"),
101             UnexpectedReadOnlyDescriptor => write!(f, "unexpected read-only descriptor"),
102             UnexpectedWriteOnlyDescriptor => write!(f, "unexpected write-only descriptor"),
103         }
104     }
105 }
106 
107 #[derive(Debug, PartialEq)]
108 enum RequestType {
109     Flush,
110 }
111 
112 struct Request {
113     type_: RequestType,
114     status_addr: GuestAddress,
115 }
116 
117 impl Request {
118     fn parse(
119         avail_desc: &DescriptorChain,
120         mem: &GuestMemoryMmap,
121     ) -> result::Result<Request, Error> {
122         // The head contains the request type which MUST be readable.
123         if avail_desc.is_write_only() {
124             return Err(Error::UnexpectedWriteOnlyDescriptor);
125         }
126 
127         if avail_desc.len as usize != size_of::<VirtioPmemReq>() {
128             return Err(Error::InvalidRequest);
129         }
130 
131         let request: VirtioPmemReq = mem.read_obj(avail_desc.addr).map_err(Error::GuestMemory)?;
132 
133         let request_type = match request.type_ {
134             VIRTIO_PMEM_REQ_TYPE_FLUSH => RequestType::Flush,
135             _ => return Err(Error::InvalidRequest),
136         };
137 
138         let status_desc = avail_desc
139             .next_descriptor()
140             .ok_or(Error::DescriptorChainTooShort)?;
141 
142         // The status MUST always be writable
143         if !status_desc.is_write_only() {
144             return Err(Error::UnexpectedReadOnlyDescriptor);
145         }
146 
147         if (status_desc.len as usize) < size_of::<VirtioPmemResp>() {
148             return Err(Error::BufferLengthTooSmall);
149         }
150 
151         Ok(Request {
152             type_: request_type,
153             status_addr: status_desc.addr,
154         })
155     }
156 }
157 
158 struct PmemEpollHandler {
159     queue: Queue,
160     mem: GuestMemoryAtomic<GuestMemoryMmap>,
161     disk: File,
162     interrupt_cb: Arc<dyn VirtioInterrupt>,
163     queue_evt: EventFd,
164     kill_evt: EventFd,
165     pause_evt: EventFd,
166 }
167 
168 impl PmemEpollHandler {
169     fn process_queue(&mut self) -> bool {
170         let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize];
171         let mut used_count = 0;
172         let mem = self.mem.memory();
173         for avail_desc in self.queue.iter(&mem) {
174             let len = match Request::parse(&avail_desc, &mem) {
175                 Ok(ref req) if (req.type_ == RequestType::Flush) => {
176                     let status_code = match self.disk.sync_all() {
177                         Ok(()) => VIRTIO_PMEM_RESP_TYPE_OK,
178                         Err(e) => {
179                             error!("failed flushing disk image: {}", e);
180                             VIRTIO_PMEM_RESP_TYPE_EIO
181                         }
182                     };
183 
184                     let resp = VirtioPmemResp { ret: status_code };
185                     match mem.write_obj(resp, req.status_addr) {
186                         Ok(_) => size_of::<VirtioPmemResp>() as u32,
187                         Err(e) => {
188                             error!("bad guest memory address: {}", e);
189                             0
190                         }
191                     }
192                 }
193                 Ok(ref req) => {
194                     // Currently, there is only one virtio-pmem request, FLUSH.
195                     error!("Invalid virtio request type {:?}", req.type_);
196                     0
197                 }
198                 Err(e) => {
199                     error!("Failed to parse available descriptor chain: {:?}", e);
200                     0
201                 }
202             };
203 
204             used_desc_heads[used_count] = (avail_desc.index, len);
205             used_count += 1;
206         }
207 
208         for &(desc_index, len) in &used_desc_heads[..used_count] {
209             self.queue.add_used(&mem, desc_index, len);
210         }
211         used_count > 0
212     }
213 
214     fn signal_used_queue(&self) -> result::Result<(), DeviceError> {
215         self.interrupt_cb
216             .trigger(&VirtioInterruptType::Queue, Some(&self.queue))
217             .map_err(|e| {
218                 error!("Failed to signal used queue: {:?}", e);
219                 DeviceError::FailedSignalingUsedQueue(e)
220             })
221     }
222 
223     fn run(
224         &mut self,
225         paused: Arc<AtomicBool>,
226         paused_sync: Arc<Barrier>,
227     ) -> result::Result<(), EpollHelperError> {
228         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
229         helper.add_event(self.queue_evt.as_raw_fd(), QUEUE_AVAIL_EVENT)?;
230         helper.run(paused, paused_sync, self)?;
231 
232         Ok(())
233     }
234 }
235 
236 impl EpollHelperHandler for PmemEpollHandler {
237     fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
238         let ev_type = event.data as u16;
239         match ev_type {
240             QUEUE_AVAIL_EVENT => {
241                 if let Err(e) = self.queue_evt.read() {
242                     error!("Failed to get queue event: {:?}", e);
243                     return true;
244                 } else if self.process_queue() {
245                     if let Err(e) = self.signal_used_queue() {
246                         error!("Failed to signal used queue: {:?}", e);
247                         return true;
248                     }
249                 }
250             }
251             _ => {
252                 error!("Unexpected event: {}", ev_type);
253                 return true;
254             }
255         }
256         false
257     }
258 }
259 
260 pub struct Pmem {
261     common: VirtioCommon,
262     id: String,
263     disk: Option<File>,
264     config: VirtioPmemConfig,
265     mapping: UserspaceMapping,
266     seccomp_action: SeccompAction,
267 
268     // Hold ownership of the memory that is allocated for the device
269     // which will be automatically dropped when the device is dropped
270     _region: MmapRegion,
271 }
272 
273 #[derive(Versionize)]
274 pub struct PmemState {
275     avail_features: u64,
276     acked_features: u64,
277     config: VirtioPmemConfig,
278 }
279 
280 impl VersionMapped for PmemState {}
281 
282 impl Pmem {
283     pub fn new(
284         id: String,
285         disk: File,
286         addr: GuestAddress,
287         mapping: UserspaceMapping,
288         _region: MmapRegion,
289         iommu: bool,
290         seccomp_action: SeccompAction,
291     ) -> io::Result<Pmem> {
292         let config = VirtioPmemConfig {
293             start: addr.raw_value().to_le(),
294             size: (_region.size() as u64).to_le(),
295         };
296 
297         let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
298 
299         if iommu {
300             avail_features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
301         }
302 
303         Ok(Pmem {
304             common: VirtioCommon {
305                 device_type: VirtioDeviceType::Pmem as u32,
306                 queue_sizes: QUEUE_SIZES.to_vec(),
307                 paused_sync: Some(Arc::new(Barrier::new(2))),
308                 avail_features,
309                 min_queues: 1,
310                 ..Default::default()
311             },
312             id,
313             disk: Some(disk),
314             config,
315             mapping,
316             seccomp_action,
317             _region,
318         })
319     }
320 
321     fn state(&self) -> PmemState {
322         PmemState {
323             avail_features: self.common.avail_features,
324             acked_features: self.common.acked_features,
325             config: self.config,
326         }
327     }
328 
329     fn set_state(&mut self, state: &PmemState) {
330         self.common.avail_features = state.avail_features;
331         self.common.acked_features = state.acked_features;
332         self.config = state.config;
333     }
334 }
335 
336 impl Drop for Pmem {
337     fn drop(&mut self) {
338         if let Some(kill_evt) = self.common.kill_evt.take() {
339             // Ignore the result because there is nothing we can do about it.
340             let _ = kill_evt.write(1);
341         }
342     }
343 }
344 
345 impl VirtioDevice for Pmem {
346     fn device_type(&self) -> u32 {
347         self.common.device_type
348     }
349 
350     fn queue_max_sizes(&self) -> &[u16] {
351         &self.common.queue_sizes
352     }
353 
354     fn features(&self) -> u64 {
355         self.common.avail_features
356     }
357 
358     fn ack_features(&mut self, value: u64) {
359         self.common.ack_features(value)
360     }
361 
362     fn read_config(&self, offset: u64, data: &mut [u8]) {
363         self.read_config_from_slice(self.config.as_slice(), offset, data);
364     }
365 
366     fn activate(
367         &mut self,
368         mem: GuestMemoryAtomic<GuestMemoryMmap>,
369         interrupt_cb: Arc<dyn VirtioInterrupt>,
370         mut queues: Vec<Queue>,
371         mut queue_evts: Vec<EventFd>,
372     ) -> ActivateResult {
373         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
374         let (kill_evt, pause_evt) = self.common.dup_eventfds();
375         if let Some(disk) = self.disk.as_ref() {
376             let disk = disk.try_clone().map_err(|e| {
377                 error!("failed cloning pmem disk: {}", e);
378                 ActivateError::BadActivate
379             })?;
380             let mut handler = PmemEpollHandler {
381                 queue: queues.remove(0),
382                 mem,
383                 disk,
384                 interrupt_cb,
385                 queue_evt: queue_evts.remove(0),
386                 kill_evt,
387                 pause_evt,
388             };
389 
390             let paused = self.common.paused.clone();
391             let paused_sync = self.common.paused_sync.clone();
392             let mut epoll_threads = Vec::new();
393             // Retrieve seccomp filter for virtio_pmem thread
394             let virtio_pmem_seccomp_filter =
395                 get_seccomp_filter(&self.seccomp_action, Thread::VirtioPmem)
396                     .map_err(ActivateError::CreateSeccompFilter)?;
397             thread::Builder::new()
398                 .name(self.id.clone())
399                 .spawn(move || {
400                     if let Err(e) = SeccompFilter::apply(virtio_pmem_seccomp_filter) {
401                         error!("Error applying seccomp filter: {:?}", e);
402                     } else if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
403                         error!("Error running worker: {:?}", e);
404                     }
405                 })
406                 .map(|thread| epoll_threads.push(thread))
407                 .map_err(|e| {
408                     error!("failed to clone virtio-pmem epoll thread: {}", e);
409                     ActivateError::BadActivate
410                 })?;
411 
412             self.common.epoll_threads = Some(epoll_threads);
413 
414             event!("virtio-device", "activated", "id", &self.id);
415             return Ok(());
416         }
417         Err(ActivateError::BadActivate)
418     }
419 
420     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
421         let result = self.common.reset();
422         event!("virtio-device", "reset", "id", &self.id);
423         result
424     }
425 
426     fn userspace_mappings(&self) -> Vec<UserspaceMapping> {
427         vec![self.mapping.clone()]
428     }
429 }
430 
431 impl Pausable for Pmem {
432     fn pause(&mut self) -> result::Result<(), MigratableError> {
433         self.common.pause()
434     }
435 
436     fn resume(&mut self) -> result::Result<(), MigratableError> {
437         self.common.resume()
438     }
439 }
440 
441 impl Snapshottable for Pmem {
442     fn id(&self) -> String {
443         self.id.clone()
444     }
445 
446     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
447         Snapshot::new_from_versioned_state(&self.id, &self.state())
448     }
449 
450     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
451         self.set_state(&snapshot.to_versioned_state(&self.id)?);
452         Ok(())
453     }
454 }
455 
456 impl Transportable for Pmem {}
457 impl Migratable for Pmem {}
458