xref: /cloud-hypervisor/virtio-devices/src/vdpa.rs (revision 190d90196fff389b60b93b57acf958957b71b249)
1 // Copyright © 2022 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 use std::collections::BTreeMap;
7 use std::sync::atomic::{AtomicBool, Ordering};
8 use std::sync::{Arc, Mutex};
9 use std::{io, result};
10 
11 use anyhow::anyhow;
12 use serde::{Deserialize, Serialize};
13 use thiserror::Error;
14 use vhost::vdpa::{VhostVdpa, VhostVdpaIovaRange};
15 use vhost::vhost_kern::vdpa::VhostKernVdpa;
16 use vhost::vhost_kern::vhost_binding::VHOST_BACKEND_F_SUSPEND;
17 use vhost::vhost_kern::VhostKernFeatures;
18 use vhost::{VhostBackend, VringConfigData};
19 use virtio_queue::desc::RawDescriptor;
20 use virtio_queue::{Queue, QueueT};
21 use vm_device::dma_mapping::ExternalDmaMapping;
22 use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic};
23 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
24 use vm_virtio::{AccessPlatform, Translatable};
25 use vmm_sys_util::eventfd::EventFd;
26 
27 use crate::{
28     ActivateError, ActivateResult, GuestMemoryMmap, VirtioCommon, VirtioDevice, VirtioInterrupt,
29     VirtioInterruptType, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FEATURES_OK,
30     VIRTIO_F_IOMMU_PLATFORM,
31 };
32 
33 #[derive(Error, Debug)]
34 pub enum Error {
35     #[error("Failed to create vhost-vdpa")]
36     CreateVhostVdpa(#[source] vhost::Error),
37     #[error("Failed to map DMA range")]
38     DmaMap(#[source] vhost::Error),
39     #[error("Failed to unmap DMA range")]
40     DmaUnmap(#[source] vhost::Error),
41     #[error("Failed to get address range")]
42     GetAddressRange,
43     #[error("Failed to get the available index from the virtio queue")]
44     GetAvailableIndex(#[source] virtio_queue::Error),
45     #[error("Get virtio configuration size")]
46     GetConfigSize(#[source] vhost::Error),
47     #[error("Get virtio device identifier")]
48     GetDeviceId(#[source] vhost::Error),
49     #[error("Failed to get backend specific features")]
50     GetBackendFeatures(#[source] vhost::Error),
51     #[error("Failed to get virtio features")]
52     GetFeatures(#[source] vhost::Error),
53     #[error("Failed to get the IOVA range")]
54     GetIovaRange(#[source] vhost::Error),
55     #[error("Failed to get queue size")]
56     GetVringNum(#[source] vhost::Error),
57     #[error("Invalid IOVA range: {0}-{1}")]
58     InvalidIovaRange(u64, u64),
59     #[error("Missing VIRTIO_F_ACCESS_PLATFORM feature")]
60     MissingAccessPlatformVirtioFeature,
61     #[error("Failed to reset owner")]
62     ResetOwner(#[source] vhost::Error),
63     #[error("Failed to set backend specific features")]
64     SetBackendFeatures(#[source] vhost::Error),
65     #[error("Failed to set backend configuration")]
66     SetConfig(#[source] vhost::Error),
67     #[error("Failed to set eventfd notifying about a configuration change")]
68     SetConfigCall(#[source] vhost::Error),
69     #[error("Failed to set virtio features")]
70     SetFeatures(#[source] vhost::Error),
71     #[error("Failed to set memory table")]
72     SetMemTable(#[source] vhost::Error),
73     #[error("Failed to set owner")]
74     SetOwner(#[source] vhost::Error),
75     #[error("Failed to set virtio status")]
76     SetStatus(#[source] vhost::Error),
77     #[error("Failed to set vring address")]
78     SetVringAddr(#[source] vhost::Error),
79     #[error("Failed to set vring base")]
80     SetVringBase(#[source] vhost::Error),
81     #[error("Failed to set vring eventfd when buffer are used")]
82     SetVringCall(#[source] vhost::Error),
83     #[error("Failed to enable/disable vring")]
84     SetVringEnable(#[source] vhost::Error),
85     #[error("Failed to set vring eventfd when new descriptors are available")]
86     SetVringKick(#[source] vhost::Error),
87     #[error("Failed to set vring size")]
88     SetVringNum(#[source] vhost::Error),
89 }
90 
91 pub type Result<T> = std::result::Result<T, Error>;
92 
93 #[derive(Serialize, Deserialize)]
94 pub struct VdpaState {
95     pub avail_features: u64,
96     pub acked_features: u64,
97     pub device_type: u32,
98     pub iova_range_first: u64,
99     pub iova_range_last: u64,
100     pub config: Vec<u8>,
101     pub queue_sizes: Vec<u16>,
102     pub backend_features: u64,
103 }
104 
105 pub struct Vdpa {
106     common: VirtioCommon,
107     id: String,
108     vhost: Option<VhostKernVdpa<GuestMemoryAtomic<GuestMemoryMmap>>>,
109     iova_range: VhostVdpaIovaRange,
110     enabled_queues: BTreeMap<usize, bool>,
111     backend_features: u64,
112     migrating: bool,
113 }
114 
115 impl Vdpa {
new( id: String, device_path: &str, mem: GuestMemoryAtomic<GuestMemoryMmap>, num_queues: u16, state: Option<VdpaState>, ) -> Result<Self>116     pub fn new(
117         id: String,
118         device_path: &str,
119         mem: GuestMemoryAtomic<GuestMemoryMmap>,
120         num_queues: u16,
121         state: Option<VdpaState>,
122     ) -> Result<Self> {
123         let mut vhost = VhostKernVdpa::new(device_path, mem).map_err(Error::CreateVhostVdpa)?;
124         vhost.set_owner().map_err(Error::SetOwner)?;
125 
126         let (
127             device_type,
128             avail_features,
129             acked_features,
130             queue_sizes,
131             iova_range,
132             backend_features,
133             paused,
134         ) = if let Some(state) = state {
135             info!("Restoring vDPA {}", id);
136 
137             vhost.set_backend_features_acked(state.backend_features);
138             vhost
139                 .set_config(0, state.config.as_slice())
140                 .map_err(Error::SetConfig)?;
141 
142             (
143                 state.device_type,
144                 state.avail_features,
145                 state.acked_features,
146                 state.queue_sizes,
147                 VhostVdpaIovaRange {
148                     first: state.iova_range_first,
149                     last: state.iova_range_last,
150                 },
151                 state.backend_features,
152                 false,
153             )
154         } else {
155             let device_type = vhost.get_device_id().map_err(Error::GetDeviceId)?;
156             let queue_size = vhost.get_vring_num().map_err(Error::GetVringNum)?;
157             let avail_features = vhost.get_features().map_err(Error::GetFeatures)?;
158             let backend_features = vhost
159                 .get_backend_features()
160                 .map_err(Error::GetBackendFeatures)?;
161             vhost.set_backend_features_acked(backend_features);
162 
163             let iova_range = vhost.get_iova_range().map_err(Error::GetIovaRange)?;
164 
165             if avail_features & (1u64 << VIRTIO_F_IOMMU_PLATFORM) == 0 {
166                 return Err(Error::MissingAccessPlatformVirtioFeature);
167             }
168 
169             (
170                 device_type,
171                 avail_features,
172                 0,
173                 vec![queue_size; num_queues as usize],
174                 iova_range,
175                 backend_features,
176                 false,
177             )
178         };
179 
180         Ok(Vdpa {
181             common: VirtioCommon {
182                 device_type,
183                 queue_sizes,
184                 avail_features,
185                 acked_features,
186                 min_queues: num_queues,
187                 paused: Arc::new(AtomicBool::new(paused)),
188                 ..Default::default()
189             },
190             id,
191             vhost: Some(vhost),
192             iova_range,
193             enabled_queues: BTreeMap::new(),
194             backend_features,
195             migrating: false,
196         })
197     }
198 
enable_vrings(&mut self, enable: bool) -> Result<()>199     fn enable_vrings(&mut self, enable: bool) -> Result<()> {
200         assert!(self.vhost.is_some());
201 
202         for (queue_index, enabled) in self.enabled_queues.iter_mut() {
203             if *enabled != enable {
204                 self.vhost
205                     .as_ref()
206                     .unwrap()
207                     .set_vring_enable(*queue_index, enable)
208                     .map_err(Error::SetVringEnable)?;
209                 *enabled = enable;
210             }
211         }
212 
213         Ok(())
214     }
215 
activate_vdpa( &mut self, mem: &GuestMemoryMmap, virtio_interrupt: &Arc<dyn VirtioInterrupt>, queues: Vec<(usize, Queue, EventFd)>, ) -> Result<()>216     fn activate_vdpa(
217         &mut self,
218         mem: &GuestMemoryMmap,
219         virtio_interrupt: &Arc<dyn VirtioInterrupt>,
220         queues: Vec<(usize, Queue, EventFd)>,
221     ) -> Result<()> {
222         assert!(self.vhost.is_some());
223         self.vhost
224             .as_ref()
225             .unwrap()
226             .set_features(self.common.acked_features)
227             .map_err(Error::SetFeatures)?;
228         self.vhost
229             .as_mut()
230             .unwrap()
231             .set_backend_features(self.backend_features)
232             .map_err(Error::SetBackendFeatures)?;
233 
234         for (queue_index, queue, queue_evt) in queues.iter() {
235             let queue_max_size = queue.max_size();
236             let queue_size = queue.size();
237             self.vhost
238                 .as_ref()
239                 .unwrap()
240                 .set_vring_num(*queue_index, queue_size)
241                 .map_err(Error::SetVringNum)?;
242 
243             let config_data = VringConfigData {
244                 queue_max_size,
245                 queue_size,
246                 flags: 0u32,
247                 desc_table_addr: queue.desc_table().translate_gpa(
248                     self.common.access_platform.as_ref(),
249                     queue_size as usize * std::mem::size_of::<RawDescriptor>(),
250                 ),
251                 used_ring_addr: queue.used_ring().translate_gpa(
252                     self.common.access_platform.as_ref(),
253                     4 + queue_size as usize * 8,
254                 ),
255                 avail_ring_addr: queue.avail_ring().translate_gpa(
256                     self.common.access_platform.as_ref(),
257                     4 + queue_size as usize * 2,
258                 ),
259                 log_addr: None,
260             };
261 
262             self.vhost
263                 .as_ref()
264                 .unwrap()
265                 .set_vring_addr(*queue_index, &config_data)
266                 .map_err(Error::SetVringAddr)?;
267             self.vhost
268                 .as_ref()
269                 .unwrap()
270                 .set_vring_base(
271                     *queue_index,
272                     queue
273                         .avail_idx(mem, Ordering::Acquire)
274                         .map_err(Error::GetAvailableIndex)?
275                         .0,
276                 )
277                 .map_err(Error::SetVringBase)?;
278 
279             if let Some(eventfd) =
280                 virtio_interrupt.notifier(VirtioInterruptType::Queue(*queue_index as u16))
281             {
282                 self.vhost
283                     .as_ref()
284                     .unwrap()
285                     .set_vring_call(*queue_index, &eventfd)
286                     .map_err(Error::SetVringCall)?;
287             }
288 
289             self.vhost
290                 .as_ref()
291                 .unwrap()
292                 .set_vring_kick(*queue_index, queue_evt)
293                 .map_err(Error::SetVringKick)?;
294 
295             self.enabled_queues.insert(*queue_index, false);
296         }
297 
298         // Setup the config eventfd if there is one
299         if let Some(eventfd) = virtio_interrupt.notifier(VirtioInterruptType::Config) {
300             self.vhost
301                 .as_ref()
302                 .unwrap()
303                 .set_config_call(&eventfd)
304                 .map_err(Error::SetConfigCall)?;
305         }
306 
307         self.enable_vrings(true)?;
308 
309         self.vhost
310             .as_ref()
311             .unwrap()
312             .set_status(
313                 (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK) as u8,
314             )
315             .map_err(Error::SetStatus)
316     }
317 
reset_vdpa(&mut self) -> Result<()>318     fn reset_vdpa(&mut self) -> Result<()> {
319         self.enable_vrings(false)?;
320 
321         assert!(self.vhost.is_some());
322         self.vhost
323             .as_ref()
324             .unwrap()
325             .set_status(0)
326             .map_err(Error::SetStatus)
327     }
328 
dma_map( &mut self, iova: u64, size: u64, host_vaddr: *const u8, readonly: bool, ) -> Result<()>329     fn dma_map(
330         &mut self,
331         iova: u64,
332         size: u64,
333         host_vaddr: *const u8,
334         readonly: bool,
335     ) -> Result<()> {
336         let iova_last = iova + size - 1;
337         if iova < self.iova_range.first || iova_last > self.iova_range.last {
338             return Err(Error::InvalidIovaRange(iova, iova_last));
339         }
340 
341         assert!(self.vhost.is_some());
342         self.vhost
343             .as_ref()
344             .unwrap()
345             .dma_map(iova, size, host_vaddr, readonly)
346             .map_err(Error::DmaMap)
347     }
348 
dma_unmap(&self, iova: u64, size: u64) -> Result<()>349     fn dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
350         let iova_last = iova + size - 1;
351         if iova < self.iova_range.first || iova_last > self.iova_range.last {
352             return Err(Error::InvalidIovaRange(iova, iova_last));
353         }
354 
355         assert!(self.vhost.is_some());
356         self.vhost
357             .as_ref()
358             .unwrap()
359             .dma_unmap(iova, size)
360             .map_err(Error::DmaUnmap)
361     }
362 
state(&self) -> Result<VdpaState>363     fn state(&self) -> Result<VdpaState> {
364         assert!(self.vhost.is_some());
365         let config_size = self
366             .vhost
367             .as_ref()
368             .unwrap()
369             .get_config_size()
370             .map_err(Error::GetConfigSize)?;
371         let mut config = vec![0; config_size as usize];
372         self.read_config(0, config.as_mut_slice());
373 
374         Ok(VdpaState {
375             avail_features: self.common.avail_features,
376             acked_features: self.common.acked_features,
377             device_type: self.common.device_type,
378             queue_sizes: self.common.queue_sizes.clone(),
379             iova_range_first: self.iova_range.first,
380             iova_range_last: self.iova_range.last,
381             config,
382             backend_features: self.backend_features,
383         })
384     }
385 }
386 
387 impl VirtioDevice for Vdpa {
device_type(&self) -> u32388     fn device_type(&self) -> u32 {
389         self.common.device_type
390     }
391 
queue_max_sizes(&self) -> &[u16]392     fn queue_max_sizes(&self) -> &[u16] {
393         &self.common.queue_sizes
394     }
395 
features(&self) -> u64396     fn features(&self) -> u64 {
397         self.common.avail_features
398     }
399 
ack_features(&mut self, value: u64)400     fn ack_features(&mut self, value: u64) {
401         self.common.ack_features(value)
402     }
403 
read_config(&self, offset: u64, data: &mut [u8])404     fn read_config(&self, offset: u64, data: &mut [u8]) {
405         assert!(self.vhost.is_some());
406         if let Err(e) = self.vhost.as_ref().unwrap().get_config(offset as u32, data) {
407             error!("Failed reading virtio config: {}", e);
408         }
409     }
410 
write_config(&mut self, offset: u64, data: &[u8])411     fn write_config(&mut self, offset: u64, data: &[u8]) {
412         assert!(self.vhost.is_some());
413         if let Err(e) = self.vhost.as_ref().unwrap().set_config(offset as u32, data) {
414             error!("Failed writing virtio config: {}", e);
415         }
416     }
417 
activate( &mut self, mem: GuestMemoryAtomic<GuestMemoryMmap>, virtio_interrupt: Arc<dyn VirtioInterrupt>, queues: Vec<(usize, Queue, EventFd)>, ) -> ActivateResult418     fn activate(
419         &mut self,
420         mem: GuestMemoryAtomic<GuestMemoryMmap>,
421         virtio_interrupt: Arc<dyn VirtioInterrupt>,
422         queues: Vec<(usize, Queue, EventFd)>,
423     ) -> ActivateResult {
424         self.activate_vdpa(&mem.memory(), &virtio_interrupt, queues)
425             .map_err(ActivateError::ActivateVdpa)?;
426 
427         // Store the virtio interrupt handler as we need to return it on reset
428         self.common.interrupt_cb = Some(virtio_interrupt);
429 
430         event!("vdpa", "activated", "id", &self.id);
431         Ok(())
432     }
433 
reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>>434     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
435         if let Err(e) = self.reset_vdpa() {
436             error!("Failed to reset vhost-vdpa: {:?}", e);
437             return None;
438         }
439 
440         event!("vdpa", "reset", "id", &self.id);
441 
442         // Return the virtio interrupt handler
443         self.common.interrupt_cb.take()
444     }
445 
set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>)446     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
447         self.common.set_access_platform(access_platform)
448     }
449 }
450 
451 impl Pausable for Vdpa {
pause(&mut self) -> std::result::Result<(), MigratableError>452     fn pause(&mut self) -> std::result::Result<(), MigratableError> {
453         if !self.migrating {
454             Err(MigratableError::Pause(anyhow!(
455                 "Can't pause a vDPA device outside live migration"
456             )))
457         } else {
458             Ok(())
459         }
460     }
461 
resume(&mut self) -> std::result::Result<(), MigratableError>462     fn resume(&mut self) -> std::result::Result<(), MigratableError> {
463         if !self.common.paused.load(Ordering::SeqCst) {
464             return Ok(());
465         }
466 
467         if !self.migrating {
468             Err(MigratableError::Resume(anyhow!(
469                 "Can't resume a vDPA device outside live migration"
470             )))
471         } else {
472             Ok(())
473         }
474     }
475 }
476 
477 impl Snapshottable for Vdpa {
id(&self) -> String478     fn id(&self) -> String {
479         self.id.clone()
480     }
481 
snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError>482     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
483         if !self.migrating {
484             return Err(MigratableError::Snapshot(anyhow!(
485                 "Can't snapshot a vDPA device outside live migration"
486             )));
487         }
488 
489         let snapshot = Snapshot::new_from_state(&self.state().map_err(|e| {
490             MigratableError::Snapshot(anyhow!("Error snapshotting vDPA device: {:?}", e))
491         })?)?;
492 
493         // Force the vhost handler to be dropped in order to close the vDPA
494         // file. This will ensure the device can be accessed if the VM is
495         // migrated on the same host machine.
496         self.vhost.take();
497 
498         Ok(snapshot)
499     }
500 }
501 
502 impl Transportable for Vdpa {}
503 
504 impl Migratable for Vdpa {
start_migration(&mut self) -> std::result::Result<(), MigratableError>505     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
506         self.migrating = true;
507         // Given there's no way to track dirty pages, we must suspend the
508         // device as soon as the migration process starts.
509         if self.backend_features & (1 << VHOST_BACKEND_F_SUSPEND) != 0 {
510             assert!(self.vhost.is_some());
511             self.vhost.as_ref().unwrap().suspend().map_err(|e| {
512                 MigratableError::StartMigration(anyhow!("Error suspending vDPA device: {:?}", e))
513             })
514         } else {
515             Err(MigratableError::StartMigration(anyhow!(
516                 "vDPA device can't be suspended"
517             )))
518         }
519     }
520 
complete_migration(&mut self) -> std::result::Result<(), MigratableError>521     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
522         self.migrating = false;
523         Ok(())
524     }
525 }
526 
527 pub struct VdpaDmaMapping<M: GuestAddressSpace> {
528     device: Arc<Mutex<Vdpa>>,
529     memory: Arc<M>,
530 }
531 
532 impl<M: GuestAddressSpace> VdpaDmaMapping<M> {
new(device: Arc<Mutex<Vdpa>>, memory: Arc<M>) -> Self533     pub fn new(device: Arc<Mutex<Vdpa>>, memory: Arc<M>) -> Self {
534         Self { device, memory }
535     }
536 }
537 
538 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VdpaDmaMapping<M> {
map(&self, iova: u64, gpa: u64, size: u64) -> result::Result<(), io::Error>539     fn map(&self, iova: u64, gpa: u64, size: u64) -> result::Result<(), io::Error> {
540         let mem = self.memory.memory();
541         let guest_addr = GuestAddress(gpa);
542         let user_addr = if mem.check_range(guest_addr, size as usize) {
543             mem.get_host_address(guest_addr).unwrap() as *const u8
544         } else {
545             return Err(io::Error::other(format!(
546                 "failed to convert guest address 0x{gpa:x} into \
547                      host user virtual address"
548             )));
549         };
550 
551         debug!(
552             "DMA map iova 0x{:x}, gpa 0x{:x}, size 0x{:x}, host_addr 0x{:x}",
553             iova, gpa, size, user_addr as u64
554         );
555         self.device
556             .lock()
557             .unwrap()
558             .dma_map(iova, size, user_addr, false)
559             .map_err(|e| {
560                 io::Error::other(format!(
561                     "failed to map memory for vDPA device, \
562                          iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}"
563                 ))
564             })
565     }
566 
unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error>567     fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> {
568         debug!("DMA unmap iova 0x{:x} size 0x{:x}", iova, size);
569         self.device
570             .lock()
571             .unwrap()
572             .dma_unmap(iova, size)
573             .map_err(|e| {
574                 io::Error::other(format!(
575                     "failed to unmap memory for vDPA device, \
576                      iova 0x{iova:x}, size 0x{size:x}: {e:?}"
577                 ))
578             })
579     }
580 }
581