xref: /cloud-hypervisor/virtio-devices/src/vdpa.rs (revision 1968805ba291ae08e07abf0ef8c0ade4cf11ab68)
1 // Copyright © 2022 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 use std::collections::BTreeMap;
7 use std::sync::atomic::{AtomicBool, Ordering};
8 use std::sync::{Arc, Mutex};
9 use std::{io, result};
10 
11 use anyhow::anyhow;
12 use serde::{Deserialize, Serialize};
13 use thiserror::Error;
14 use vhost::vdpa::{VhostVdpa, VhostVdpaIovaRange};
15 use vhost::vhost_kern::vdpa::VhostKernVdpa;
16 use vhost::vhost_kern::vhost_binding::VHOST_BACKEND_F_SUSPEND;
17 use vhost::vhost_kern::VhostKernFeatures;
18 use vhost::{VhostBackend, VringConfigData};
19 use virtio_queue::{Descriptor, Queue, QueueT};
20 use vm_device::dma_mapping::ExternalDmaMapping;
21 use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic};
22 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
23 use vm_virtio::{AccessPlatform, Translatable};
24 use vmm_sys_util::eventfd::EventFd;
25 
26 use crate::{
27     ActivateError, ActivateResult, GuestMemoryMmap, VirtioCommon, VirtioDevice, VirtioInterrupt,
28     VirtioInterruptType, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FEATURES_OK,
29     VIRTIO_F_IOMMU_PLATFORM,
30 };
31 
32 #[derive(Error, Debug)]
33 pub enum Error {
34     #[error("Failed to create vhost-vdpa: {0}")]
35     CreateVhostVdpa(vhost::Error),
36     #[error("Failed to map DMA range: {0}")]
37     DmaMap(vhost::Error),
38     #[error("Failed to unmap DMA range: {0}")]
39     DmaUnmap(vhost::Error),
40     #[error("Failed to get address range")]
41     GetAddressRange,
42     #[error("Failed to get the available index from the virtio queue: {0}")]
43     GetAvailableIndex(virtio_queue::Error),
44     #[error("Get virtio configuration size: {0}")]
45     GetConfigSize(vhost::Error),
46     #[error("Get virtio device identifier: {0}")]
47     GetDeviceId(vhost::Error),
48     #[error("Failed to get backend specific features: {0}")]
49     GetBackendFeatures(vhost::Error),
50     #[error("Failed to get virtio features: {0}")]
51     GetFeatures(vhost::Error),
52     #[error("Failed to get the IOVA range: {0}")]
53     GetIovaRange(vhost::Error),
54     #[error("Failed to get queue size: {0}")]
55     GetVringNum(vhost::Error),
56     #[error("Invalid IOVA range: {0}-{1}")]
57     InvalidIovaRange(u64, u64),
58     #[error("Missing VIRTIO_F_ACCESS_PLATFORM feature")]
59     MissingAccessPlatformVirtioFeature,
60     #[error("Failed to reset owner: {0}")]
61     ResetOwner(vhost::Error),
62     #[error("Failed to set backend specific features: {0}")]
63     SetBackendFeatures(vhost::Error),
64     #[error("Failed to set backend configuration: {0}")]
65     SetConfig(vhost::Error),
66     #[error("Failed to set eventfd notifying about a configuration change: {0}")]
67     SetConfigCall(vhost::Error),
68     #[error("Failed to set virtio features: {0}")]
69     SetFeatures(vhost::Error),
70     #[error("Failed to set memory table: {0}")]
71     SetMemTable(vhost::Error),
72     #[error("Failed to set owner: {0}")]
73     SetOwner(vhost::Error),
74     #[error("Failed to set virtio status: {0}")]
75     SetStatus(vhost::Error),
76     #[error("Failed to set vring address: {0}")]
77     SetVringAddr(vhost::Error),
78     #[error("Failed to set vring base: {0}")]
79     SetVringBase(vhost::Error),
80     #[error("Failed to set vring eventfd when buffer are used: {0}")]
81     SetVringCall(vhost::Error),
82     #[error("Failed to enable/disable vring: {0}")]
83     SetVringEnable(vhost::Error),
84     #[error("Failed to set vring eventfd when new descriptors are available: {0}")]
85     SetVringKick(vhost::Error),
86     #[error("Failed to set vring size: {0}")]
87     SetVringNum(vhost::Error),
88 }
89 
90 pub type Result<T> = std::result::Result<T, Error>;
91 
92 #[derive(Serialize, Deserialize)]
93 pub struct VdpaState {
94     pub avail_features: u64,
95     pub acked_features: u64,
96     pub device_type: u32,
97     pub iova_range_first: u64,
98     pub iova_range_last: u64,
99     pub config: Vec<u8>,
100     pub queue_sizes: Vec<u16>,
101     pub backend_features: u64,
102 }
103 
104 pub struct Vdpa {
105     common: VirtioCommon,
106     id: String,
107     vhost: Option<VhostKernVdpa<GuestMemoryAtomic<GuestMemoryMmap>>>,
108     iova_range: VhostVdpaIovaRange,
109     enabled_queues: BTreeMap<usize, bool>,
110     backend_features: u64,
111     migrating: bool,
112 }
113 
114 impl Vdpa {
115     pub fn new(
116         id: String,
117         device_path: &str,
118         mem: GuestMemoryAtomic<GuestMemoryMmap>,
119         num_queues: u16,
120         state: Option<VdpaState>,
121     ) -> Result<Self> {
122         let mut vhost = VhostKernVdpa::new(device_path, mem).map_err(Error::CreateVhostVdpa)?;
123         vhost.set_owner().map_err(Error::SetOwner)?;
124 
125         let (
126             device_type,
127             avail_features,
128             acked_features,
129             queue_sizes,
130             iova_range,
131             backend_features,
132             paused,
133         ) = if let Some(state) = state {
134             info!("Restoring vDPA {}", id);
135 
136             vhost.set_backend_features_acked(state.backend_features);
137             vhost
138                 .set_config(0, state.config.as_slice())
139                 .map_err(Error::SetConfig)?;
140 
141             (
142                 state.device_type,
143                 state.avail_features,
144                 state.acked_features,
145                 state.queue_sizes,
146                 VhostVdpaIovaRange {
147                     first: state.iova_range_first,
148                     last: state.iova_range_last,
149                 },
150                 state.backend_features,
151                 false,
152             )
153         } else {
154             let device_type = vhost.get_device_id().map_err(Error::GetDeviceId)?;
155             let queue_size = vhost.get_vring_num().map_err(Error::GetVringNum)?;
156             let avail_features = vhost.get_features().map_err(Error::GetFeatures)?;
157             let backend_features = vhost
158                 .get_backend_features()
159                 .map_err(Error::GetBackendFeatures)?;
160             vhost.set_backend_features_acked(backend_features);
161 
162             let iova_range = vhost.get_iova_range().map_err(Error::GetIovaRange)?;
163 
164             if avail_features & (1u64 << VIRTIO_F_IOMMU_PLATFORM) == 0 {
165                 return Err(Error::MissingAccessPlatformVirtioFeature);
166             }
167 
168             (
169                 device_type,
170                 avail_features,
171                 0,
172                 vec![queue_size; num_queues as usize],
173                 iova_range,
174                 backend_features,
175                 false,
176             )
177         };
178 
179         Ok(Vdpa {
180             common: VirtioCommon {
181                 device_type,
182                 queue_sizes,
183                 avail_features,
184                 acked_features,
185                 min_queues: num_queues,
186                 paused: Arc::new(AtomicBool::new(paused)),
187                 ..Default::default()
188             },
189             id,
190             vhost: Some(vhost),
191             iova_range,
192             enabled_queues: BTreeMap::new(),
193             backend_features,
194             migrating: false,
195         })
196     }
197 
198     fn enable_vrings(&mut self, enable: bool) -> Result<()> {
199         assert!(self.vhost.is_some());
200 
201         for (queue_index, enabled) in self.enabled_queues.iter_mut() {
202             if *enabled != enable {
203                 self.vhost
204                     .as_ref()
205                     .unwrap()
206                     .set_vring_enable(*queue_index, enable)
207                     .map_err(Error::SetVringEnable)?;
208                 *enabled = enable;
209             }
210         }
211 
212         Ok(())
213     }
214 
215     fn activate_vdpa(
216         &mut self,
217         mem: &GuestMemoryMmap,
218         virtio_interrupt: &Arc<dyn VirtioInterrupt>,
219         queues: Vec<(usize, Queue, EventFd)>,
220     ) -> Result<()> {
221         assert!(self.vhost.is_some());
222         self.vhost
223             .as_ref()
224             .unwrap()
225             .set_features(self.common.acked_features)
226             .map_err(Error::SetFeatures)?;
227         self.vhost
228             .as_mut()
229             .unwrap()
230             .set_backend_features(self.backend_features)
231             .map_err(Error::SetBackendFeatures)?;
232 
233         for (queue_index, queue, queue_evt) in queues.iter() {
234             let queue_max_size = queue.max_size();
235             let queue_size = queue.size();
236             self.vhost
237                 .as_ref()
238                 .unwrap()
239                 .set_vring_num(*queue_index, queue_size)
240                 .map_err(Error::SetVringNum)?;
241 
242             let config_data = VringConfigData {
243                 queue_max_size,
244                 queue_size,
245                 flags: 0u32,
246                 desc_table_addr: queue.desc_table().translate_gpa(
247                     self.common.access_platform.as_ref(),
248                     queue_size as usize * std::mem::size_of::<Descriptor>(),
249                 ),
250                 used_ring_addr: queue.used_ring().translate_gpa(
251                     self.common.access_platform.as_ref(),
252                     4 + queue_size as usize * 8,
253                 ),
254                 avail_ring_addr: queue.avail_ring().translate_gpa(
255                     self.common.access_platform.as_ref(),
256                     4 + queue_size as usize * 2,
257                 ),
258                 log_addr: None,
259             };
260 
261             self.vhost
262                 .as_ref()
263                 .unwrap()
264                 .set_vring_addr(*queue_index, &config_data)
265                 .map_err(Error::SetVringAddr)?;
266             self.vhost
267                 .as_ref()
268                 .unwrap()
269                 .set_vring_base(
270                     *queue_index,
271                     queue
272                         .avail_idx(mem, Ordering::Acquire)
273                         .map_err(Error::GetAvailableIndex)?
274                         .0,
275                 )
276                 .map_err(Error::SetVringBase)?;
277 
278             if let Some(eventfd) =
279                 virtio_interrupt.notifier(VirtioInterruptType::Queue(*queue_index as u16))
280             {
281                 self.vhost
282                     .as_ref()
283                     .unwrap()
284                     .set_vring_call(*queue_index, &eventfd)
285                     .map_err(Error::SetVringCall)?;
286             }
287 
288             self.vhost
289                 .as_ref()
290                 .unwrap()
291                 .set_vring_kick(*queue_index, queue_evt)
292                 .map_err(Error::SetVringKick)?;
293 
294             self.enabled_queues.insert(*queue_index, false);
295         }
296 
297         // Setup the config eventfd if there is one
298         if let Some(eventfd) = virtio_interrupt.notifier(VirtioInterruptType::Config) {
299             self.vhost
300                 .as_ref()
301                 .unwrap()
302                 .set_config_call(&eventfd)
303                 .map_err(Error::SetConfigCall)?;
304         }
305 
306         self.enable_vrings(true)?;
307 
308         self.vhost
309             .as_ref()
310             .unwrap()
311             .set_status(
312                 (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK) as u8,
313             )
314             .map_err(Error::SetStatus)
315     }
316 
317     fn reset_vdpa(&mut self) -> Result<()> {
318         self.enable_vrings(false)?;
319 
320         assert!(self.vhost.is_some());
321         self.vhost
322             .as_ref()
323             .unwrap()
324             .set_status(0)
325             .map_err(Error::SetStatus)
326     }
327 
328     fn dma_map(
329         &mut self,
330         iova: u64,
331         size: u64,
332         host_vaddr: *const u8,
333         readonly: bool,
334     ) -> Result<()> {
335         let iova_last = iova + size - 1;
336         if iova < self.iova_range.first || iova_last > self.iova_range.last {
337             return Err(Error::InvalidIovaRange(iova, iova_last));
338         }
339 
340         assert!(self.vhost.is_some());
341         self.vhost
342             .as_ref()
343             .unwrap()
344             .dma_map(iova, size, host_vaddr, readonly)
345             .map_err(Error::DmaMap)
346     }
347 
348     fn dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
349         let iova_last = iova + size - 1;
350         if iova < self.iova_range.first || iova_last > self.iova_range.last {
351             return Err(Error::InvalidIovaRange(iova, iova_last));
352         }
353 
354         assert!(self.vhost.is_some());
355         self.vhost
356             .as_ref()
357             .unwrap()
358             .dma_unmap(iova, size)
359             .map_err(Error::DmaUnmap)
360     }
361 
362     fn state(&self) -> Result<VdpaState> {
363         assert!(self.vhost.is_some());
364         let config_size = self
365             .vhost
366             .as_ref()
367             .unwrap()
368             .get_config_size()
369             .map_err(Error::GetConfigSize)?;
370         let mut config = vec![0; config_size as usize];
371         self.read_config(0, config.as_mut_slice());
372 
373         Ok(VdpaState {
374             avail_features: self.common.avail_features,
375             acked_features: self.common.acked_features,
376             device_type: self.common.device_type,
377             queue_sizes: self.common.queue_sizes.clone(),
378             iova_range_first: self.iova_range.first,
379             iova_range_last: self.iova_range.last,
380             config,
381             backend_features: self.backend_features,
382         })
383     }
384 }
385 
386 impl VirtioDevice for Vdpa {
387     fn device_type(&self) -> u32 {
388         self.common.device_type
389     }
390 
391     fn queue_max_sizes(&self) -> &[u16] {
392         &self.common.queue_sizes
393     }
394 
395     fn features(&self) -> u64 {
396         self.common.avail_features
397     }
398 
399     fn ack_features(&mut self, value: u64) {
400         self.common.ack_features(value)
401     }
402 
403     fn read_config(&self, offset: u64, data: &mut [u8]) {
404         assert!(self.vhost.is_some());
405         if let Err(e) = self.vhost.as_ref().unwrap().get_config(offset as u32, data) {
406             error!("Failed reading virtio config: {}", e);
407         }
408     }
409 
410     fn write_config(&mut self, offset: u64, data: &[u8]) {
411         assert!(self.vhost.is_some());
412         if let Err(e) = self.vhost.as_ref().unwrap().set_config(offset as u32, data) {
413             error!("Failed writing virtio config: {}", e);
414         }
415     }
416 
417     fn activate(
418         &mut self,
419         mem: GuestMemoryAtomic<GuestMemoryMmap>,
420         virtio_interrupt: Arc<dyn VirtioInterrupt>,
421         queues: Vec<(usize, Queue, EventFd)>,
422     ) -> ActivateResult {
423         self.activate_vdpa(&mem.memory(), &virtio_interrupt, queues)
424             .map_err(ActivateError::ActivateVdpa)?;
425 
426         // Store the virtio interrupt handler as we need to return it on reset
427         self.common.interrupt_cb = Some(virtio_interrupt);
428 
429         event!("vdpa", "activated", "id", &self.id);
430         Ok(())
431     }
432 
433     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
434         if let Err(e) = self.reset_vdpa() {
435             error!("Failed to reset vhost-vdpa: {:?}", e);
436             return None;
437         }
438 
439         event!("vdpa", "reset", "id", &self.id);
440 
441         // Return the virtio interrupt handler
442         self.common.interrupt_cb.take()
443     }
444 
445     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
446         self.common.set_access_platform(access_platform)
447     }
448 }
449 
450 impl Pausable for Vdpa {
451     fn pause(&mut self) -> std::result::Result<(), MigratableError> {
452         if !self.migrating {
453             Err(MigratableError::Pause(anyhow!(
454                 "Can't pause a vDPA device outside live migration"
455             )))
456         } else {
457             Ok(())
458         }
459     }
460 
461     fn resume(&mut self) -> std::result::Result<(), MigratableError> {
462         if !self.common.paused.load(Ordering::SeqCst) {
463             return Ok(());
464         }
465 
466         if !self.migrating {
467             Err(MigratableError::Resume(anyhow!(
468                 "Can't resume a vDPA device outside live migration"
469             )))
470         } else {
471             Ok(())
472         }
473     }
474 }
475 
476 impl Snapshottable for Vdpa {
477     fn id(&self) -> String {
478         self.id.clone()
479     }
480 
481     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
482         if !self.migrating {
483             return Err(MigratableError::Snapshot(anyhow!(
484                 "Can't snapshot a vDPA device outside live migration"
485             )));
486         }
487 
488         let snapshot = Snapshot::new_from_state(&self.state().map_err(|e| {
489             MigratableError::Snapshot(anyhow!("Error snapshotting vDPA device: {:?}", e))
490         })?)?;
491 
492         // Force the vhost handler to be dropped in order to close the vDPA
493         // file. This will ensure the device can be accessed if the VM is
494         // migrated on the same host machine.
495         self.vhost.take();
496 
497         Ok(snapshot)
498     }
499 }
500 
501 impl Transportable for Vdpa {}
502 
503 impl Migratable for Vdpa {
504     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
505         self.migrating = true;
506         // Given there's no way to track dirty pages, we must suspend the
507         // device as soon as the migration process starts.
508         if self.backend_features & (1 << VHOST_BACKEND_F_SUSPEND) != 0 {
509             assert!(self.vhost.is_some());
510             self.vhost.as_ref().unwrap().suspend().map_err(|e| {
511                 MigratableError::StartMigration(anyhow!("Error suspending vDPA device: {:?}", e))
512             })
513         } else {
514             Err(MigratableError::StartMigration(anyhow!(
515                 "vDPA device can't be suspended"
516             )))
517         }
518     }
519 
520     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
521         self.migrating = false;
522         Ok(())
523     }
524 }
525 
526 pub struct VdpaDmaMapping<M: GuestAddressSpace> {
527     device: Arc<Mutex<Vdpa>>,
528     memory: Arc<M>,
529 }
530 
531 impl<M: GuestAddressSpace> VdpaDmaMapping<M> {
532     pub fn new(device: Arc<Mutex<Vdpa>>, memory: Arc<M>) -> Self {
533         Self { device, memory }
534     }
535 }
536 
537 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VdpaDmaMapping<M> {
538     fn map(&self, iova: u64, gpa: u64, size: u64) -> result::Result<(), io::Error> {
539         let mem = self.memory.memory();
540         let guest_addr = GuestAddress(gpa);
541         let user_addr = if mem.check_range(guest_addr, size as usize) {
542             mem.get_host_address(guest_addr).unwrap() as *const u8
543         } else {
544             return Err(io::Error::other(format!(
545                 "failed to convert guest address 0x{gpa:x} into \
546                      host user virtual address"
547             )));
548         };
549 
550         debug!(
551             "DMA map iova 0x{:x}, gpa 0x{:x}, size 0x{:x}, host_addr 0x{:x}",
552             iova, gpa, size, user_addr as u64
553         );
554         self.device
555             .lock()
556             .unwrap()
557             .dma_map(iova, size, user_addr, false)
558             .map_err(|e| {
559                 io::Error::other(format!(
560                     "failed to map memory for vDPA device, \
561                          iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}"
562                 ))
563             })
564     }
565 
566     fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> {
567         debug!("DMA unmap iova 0x{:x} size 0x{:x}", iova, size);
568         self.device
569             .lock()
570             .unwrap()
571             .dma_unmap(iova, size)
572             .map_err(|e| {
573                 io::Error::other(format!(
574                     "failed to unmap memory for vDPA device, \
575                      iova 0x{iova:x}, size 0x{size:x}: {e:?}"
576                 ))
577             })
578     }
579 }
580