xref: /cloud-hypervisor/virtio-devices/src/vdpa.rs (revision 8803e4a2e7f8e9596b72f81d3c916390e5b10fbd)
1 // Copyright © 2022 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 use crate::{
7     ActivateError, ActivateResult, GuestMemoryMmap, VirtioCommon, VirtioDevice, VirtioInterrupt,
8     VirtioInterruptType, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FEATURES_OK,
9     VIRTIO_F_IOMMU_PLATFORM,
10 };
11 use anyhow::anyhow;
12 use serde::{Deserialize, Serialize};
13 use std::{
14     collections::BTreeMap,
15     io, result,
16     sync::{
17         atomic::{AtomicBool, Ordering},
18         Arc, Mutex,
19     },
20 };
21 use thiserror::Error;
22 use vhost::{
23     vdpa::{VhostVdpa, VhostVdpaIovaRange},
24     vhost_kern::VhostKernFeatures,
25     vhost_kern::{vdpa::VhostKernVdpa, vhost_binding::VHOST_BACKEND_F_SUSPEND},
26     VhostBackend, VringConfigData,
27 };
28 use virtio_queue::{Descriptor, Queue, QueueT};
29 use vm_device::dma_mapping::ExternalDmaMapping;
30 use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic};
31 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
32 use vm_virtio::{AccessPlatform, Translatable};
33 use vmm_sys_util::eventfd::EventFd;
34 
35 #[derive(Error, Debug)]
36 pub enum Error {
37     #[error("Failed to create vhost-vdpa: {0}")]
38     CreateVhostVdpa(vhost::Error),
39     #[error("Failed to map DMA range: {0}")]
40     DmaMap(vhost::Error),
41     #[error("Failed to unmap DMA range: {0}")]
42     DmaUnmap(vhost::Error),
43     #[error("Failed to get address range")]
44     GetAddressRange,
45     #[error("Failed to get the available index from the virtio queue: {0}")]
46     GetAvailableIndex(virtio_queue::Error),
47     #[error("Get virtio configuration size: {0}")]
48     GetConfigSize(vhost::Error),
49     #[error("Get virtio device identifier: {0}")]
50     GetDeviceId(vhost::Error),
51     #[error("Failed to get backend specific features: {0}")]
52     GetBackendFeatures(vhost::Error),
53     #[error("Failed to get virtio features: {0}")]
54     GetFeatures(vhost::Error),
55     #[error("Failed to get the IOVA range: {0}")]
56     GetIovaRange(vhost::Error),
57     #[error("Failed to get queue size: {0}")]
58     GetVringNum(vhost::Error),
59     #[error("Invalid IOVA range: {0}-{1}")]
60     InvalidIovaRange(u64, u64),
61     #[error("Missing VIRTIO_F_ACCESS_PLATFORM feature")]
62     MissingAccessPlatformVirtioFeature,
63     #[error("Failed to reset owner: {0}")]
64     ResetOwner(vhost::Error),
65     #[error("Failed to set backend specific features: {0}")]
66     SetBackendFeatures(vhost::Error),
67     #[error("Failed to set backend configuration: {0}")]
68     SetConfig(vhost::Error),
69     #[error("Failed to set eventfd notifying about a configuration change: {0}")]
70     SetConfigCall(vhost::Error),
71     #[error("Failed to set virtio features: {0}")]
72     SetFeatures(vhost::Error),
73     #[error("Failed to set memory table: {0}")]
74     SetMemTable(vhost::Error),
75     #[error("Failed to set owner: {0}")]
76     SetOwner(vhost::Error),
77     #[error("Failed to set virtio status: {0}")]
78     SetStatus(vhost::Error),
79     #[error("Failed to set vring address: {0}")]
80     SetVringAddr(vhost::Error),
81     #[error("Failed to set vring base: {0}")]
82     SetVringBase(vhost::Error),
83     #[error("Failed to set vring eventfd when buffer are used: {0}")]
84     SetVringCall(vhost::Error),
85     #[error("Failed to enable/disable vring: {0}")]
86     SetVringEnable(vhost::Error),
87     #[error("Failed to set vring eventfd when new descriptors are available: {0}")]
88     SetVringKick(vhost::Error),
89     #[error("Failed to set vring size: {0}")]
90     SetVringNum(vhost::Error),
91 }
92 
93 pub type Result<T> = std::result::Result<T, Error>;
94 
95 #[derive(Serialize, Deserialize)]
96 pub struct VdpaState {
97     pub avail_features: u64,
98     pub acked_features: u64,
99     pub device_type: u32,
100     pub iova_range_first: u64,
101     pub iova_range_last: u64,
102     pub config: Vec<u8>,
103     pub queue_sizes: Vec<u16>,
104     pub backend_features: u64,
105 }
106 
107 pub struct Vdpa {
108     common: VirtioCommon,
109     id: String,
110     vhost: Option<VhostKernVdpa<GuestMemoryAtomic<GuestMemoryMmap>>>,
111     iova_range: VhostVdpaIovaRange,
112     enabled_queues: BTreeMap<usize, bool>,
113     backend_features: u64,
114     migrating: bool,
115 }
116 
117 impl Vdpa {
118     pub fn new(
119         id: String,
120         device_path: &str,
121         mem: GuestMemoryAtomic<GuestMemoryMmap>,
122         num_queues: u16,
123         state: Option<VdpaState>,
124     ) -> Result<Self> {
125         let mut vhost = VhostKernVdpa::new(device_path, mem).map_err(Error::CreateVhostVdpa)?;
126         vhost.set_owner().map_err(Error::SetOwner)?;
127 
128         let (
129             device_type,
130             avail_features,
131             acked_features,
132             queue_sizes,
133             iova_range,
134             backend_features,
135             paused,
136         ) = if let Some(state) = state {
137             info!("Restoring vDPA {}", id);
138 
139             vhost.set_backend_features_acked(state.backend_features);
140             vhost
141                 .set_config(0, state.config.as_slice())
142                 .map_err(Error::SetConfig)?;
143 
144             (
145                 state.device_type,
146                 state.avail_features,
147                 state.acked_features,
148                 state.queue_sizes,
149                 VhostVdpaIovaRange {
150                     first: state.iova_range_first,
151                     last: state.iova_range_last,
152                 },
153                 state.backend_features,
154                 false,
155             )
156         } else {
157             let device_type = vhost.get_device_id().map_err(Error::GetDeviceId)?;
158             let queue_size = vhost.get_vring_num().map_err(Error::GetVringNum)?;
159             let avail_features = vhost.get_features().map_err(Error::GetFeatures)?;
160             let backend_features = vhost
161                 .get_backend_features()
162                 .map_err(Error::GetBackendFeatures)?;
163             vhost.set_backend_features_acked(backend_features);
164 
165             let iova_range = vhost.get_iova_range().map_err(Error::GetIovaRange)?;
166 
167             if avail_features & (1u64 << VIRTIO_F_IOMMU_PLATFORM) == 0 {
168                 return Err(Error::MissingAccessPlatformVirtioFeature);
169             }
170 
171             (
172                 device_type,
173                 avail_features,
174                 0,
175                 vec![queue_size; num_queues as usize],
176                 iova_range,
177                 backend_features,
178                 false,
179             )
180         };
181 
182         Ok(Vdpa {
183             common: VirtioCommon {
184                 device_type,
185                 queue_sizes,
186                 avail_features,
187                 acked_features,
188                 min_queues: num_queues,
189                 paused: Arc::new(AtomicBool::new(paused)),
190                 ..Default::default()
191             },
192             id,
193             vhost: Some(vhost),
194             iova_range,
195             enabled_queues: BTreeMap::new(),
196             backend_features,
197             migrating: false,
198         })
199     }
200 
201     fn enable_vrings(&mut self, enable: bool) -> Result<()> {
202         assert!(self.vhost.is_some());
203 
204         for (queue_index, enabled) in self.enabled_queues.iter_mut() {
205             if *enabled != enable {
206                 self.vhost
207                     .as_ref()
208                     .unwrap()
209                     .set_vring_enable(*queue_index, enable)
210                     .map_err(Error::SetVringEnable)?;
211                 *enabled = enable;
212             }
213         }
214 
215         Ok(())
216     }
217 
218     fn activate_vdpa(
219         &mut self,
220         mem: &GuestMemoryMmap,
221         virtio_interrupt: &Arc<dyn VirtioInterrupt>,
222         queues: Vec<(usize, Queue, EventFd)>,
223     ) -> Result<()> {
224         assert!(self.vhost.is_some());
225         self.vhost
226             .as_ref()
227             .unwrap()
228             .set_features(self.common.acked_features)
229             .map_err(Error::SetFeatures)?;
230         self.vhost
231             .as_mut()
232             .unwrap()
233             .set_backend_features(self.backend_features)
234             .map_err(Error::SetBackendFeatures)?;
235 
236         for (queue_index, queue, queue_evt) in queues.iter() {
237             let queue_max_size = queue.max_size();
238             let queue_size = queue.size();
239             self.vhost
240                 .as_ref()
241                 .unwrap()
242                 .set_vring_num(*queue_index, queue_size)
243                 .map_err(Error::SetVringNum)?;
244 
245             let config_data = VringConfigData {
246                 queue_max_size,
247                 queue_size,
248                 flags: 0u32,
249                 desc_table_addr: queue.desc_table().translate_gpa(
250                     self.common.access_platform.as_ref(),
251                     queue_size as usize * std::mem::size_of::<Descriptor>(),
252                 ),
253                 used_ring_addr: queue.used_ring().translate_gpa(
254                     self.common.access_platform.as_ref(),
255                     4 + queue_size as usize * 8,
256                 ),
257                 avail_ring_addr: queue.avail_ring().translate_gpa(
258                     self.common.access_platform.as_ref(),
259                     4 + queue_size as usize * 2,
260                 ),
261                 log_addr: None,
262             };
263 
264             self.vhost
265                 .as_ref()
266                 .unwrap()
267                 .set_vring_addr(*queue_index, &config_data)
268                 .map_err(Error::SetVringAddr)?;
269             self.vhost
270                 .as_ref()
271                 .unwrap()
272                 .set_vring_base(
273                     *queue_index,
274                     queue
275                         .avail_idx(mem, Ordering::Acquire)
276                         .map_err(Error::GetAvailableIndex)?
277                         .0,
278                 )
279                 .map_err(Error::SetVringBase)?;
280 
281             if let Some(eventfd) =
282                 virtio_interrupt.notifier(VirtioInterruptType::Queue(*queue_index as u16))
283             {
284                 self.vhost
285                     .as_ref()
286                     .unwrap()
287                     .set_vring_call(*queue_index, &eventfd)
288                     .map_err(Error::SetVringCall)?;
289             }
290 
291             self.vhost
292                 .as_ref()
293                 .unwrap()
294                 .set_vring_kick(*queue_index, queue_evt)
295                 .map_err(Error::SetVringKick)?;
296 
297             self.enabled_queues.insert(*queue_index, false);
298         }
299 
300         // Setup the config eventfd if there is one
301         if let Some(eventfd) = virtio_interrupt.notifier(VirtioInterruptType::Config) {
302             self.vhost
303                 .as_ref()
304                 .unwrap()
305                 .set_config_call(&eventfd)
306                 .map_err(Error::SetConfigCall)?;
307         }
308 
309         self.enable_vrings(true)?;
310 
311         self.vhost
312             .as_ref()
313             .unwrap()
314             .set_status(
315                 (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK) as u8,
316             )
317             .map_err(Error::SetStatus)
318     }
319 
320     fn reset_vdpa(&mut self) -> Result<()> {
321         self.enable_vrings(false)?;
322 
323         assert!(self.vhost.is_some());
324         self.vhost
325             .as_ref()
326             .unwrap()
327             .set_status(0)
328             .map_err(Error::SetStatus)
329     }
330 
331     fn dma_map(
332         &mut self,
333         iova: u64,
334         size: u64,
335         host_vaddr: *const u8,
336         readonly: bool,
337     ) -> Result<()> {
338         let iova_last = iova + size - 1;
339         if iova < self.iova_range.first || iova_last > self.iova_range.last {
340             return Err(Error::InvalidIovaRange(iova, iova_last));
341         }
342 
343         assert!(self.vhost.is_some());
344         self.vhost
345             .as_ref()
346             .unwrap()
347             .dma_map(iova, size, host_vaddr, readonly)
348             .map_err(Error::DmaMap)
349     }
350 
351     fn dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
352         let iova_last = iova + size - 1;
353         if iova < self.iova_range.first || iova_last > self.iova_range.last {
354             return Err(Error::InvalidIovaRange(iova, iova_last));
355         }
356 
357         assert!(self.vhost.is_some());
358         self.vhost
359             .as_ref()
360             .unwrap()
361             .dma_unmap(iova, size)
362             .map_err(Error::DmaUnmap)
363     }
364 
365     fn state(&self) -> Result<VdpaState> {
366         assert!(self.vhost.is_some());
367         let config_size = self
368             .vhost
369             .as_ref()
370             .unwrap()
371             .get_config_size()
372             .map_err(Error::GetConfigSize)?;
373         let mut config = vec![0; config_size as usize];
374         self.read_config(0, config.as_mut_slice());
375 
376         Ok(VdpaState {
377             avail_features: self.common.avail_features,
378             acked_features: self.common.acked_features,
379             device_type: self.common.device_type,
380             queue_sizes: self.common.queue_sizes.clone(),
381             iova_range_first: self.iova_range.first,
382             iova_range_last: self.iova_range.last,
383             config,
384             backend_features: self.backend_features,
385         })
386     }
387 }
388 
389 impl VirtioDevice for Vdpa {
390     fn device_type(&self) -> u32 {
391         self.common.device_type
392     }
393 
394     fn queue_max_sizes(&self) -> &[u16] {
395         &self.common.queue_sizes
396     }
397 
398     fn features(&self) -> u64 {
399         self.common.avail_features
400     }
401 
402     fn ack_features(&mut self, value: u64) {
403         self.common.ack_features(value)
404     }
405 
406     fn read_config(&self, offset: u64, data: &mut [u8]) {
407         assert!(self.vhost.is_some());
408         if let Err(e) = self.vhost.as_ref().unwrap().get_config(offset as u32, data) {
409             error!("Failed reading virtio config: {}", e);
410         }
411     }
412 
413     fn write_config(&mut self, offset: u64, data: &[u8]) {
414         assert!(self.vhost.is_some());
415         if let Err(e) = self.vhost.as_ref().unwrap().set_config(offset as u32, data) {
416             error!("Failed writing virtio config: {}", e);
417         }
418     }
419 
420     fn activate(
421         &mut self,
422         mem: GuestMemoryAtomic<GuestMemoryMmap>,
423         virtio_interrupt: Arc<dyn VirtioInterrupt>,
424         queues: Vec<(usize, Queue, EventFd)>,
425     ) -> ActivateResult {
426         self.activate_vdpa(&mem.memory(), &virtio_interrupt, queues)
427             .map_err(ActivateError::ActivateVdpa)?;
428 
429         // Store the virtio interrupt handler as we need to return it on reset
430         self.common.interrupt_cb = Some(virtio_interrupt);
431 
432         event!("vdpa", "activated", "id", &self.id);
433         Ok(())
434     }
435 
436     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
437         if let Err(e) = self.reset_vdpa() {
438             error!("Failed to reset vhost-vdpa: {:?}", e);
439             return None;
440         }
441 
442         event!("vdpa", "reset", "id", &self.id);
443 
444         // Return the virtio interrupt handler
445         self.common.interrupt_cb.take()
446     }
447 
448     fn set_access_platform(&mut self, access_platform: Arc<dyn AccessPlatform>) {
449         self.common.set_access_platform(access_platform)
450     }
451 }
452 
453 impl Pausable for Vdpa {
454     fn pause(&mut self) -> std::result::Result<(), MigratableError> {
455         if !self.migrating {
456             Err(MigratableError::Pause(anyhow!(
457                 "Can't pause a vDPA device outside live migration"
458             )))
459         } else {
460             Ok(())
461         }
462     }
463 
464     fn resume(&mut self) -> std::result::Result<(), MigratableError> {
465         if !self.common.paused.load(Ordering::SeqCst) {
466             return Ok(());
467         }
468 
469         if !self.migrating {
470             Err(MigratableError::Resume(anyhow!(
471                 "Can't resume a vDPA device outside live migration"
472             )))
473         } else {
474             Ok(())
475         }
476     }
477 }
478 
479 impl Snapshottable for Vdpa {
480     fn id(&self) -> String {
481         self.id.clone()
482     }
483 
484     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
485         if !self.migrating {
486             return Err(MigratableError::Snapshot(anyhow!(
487                 "Can't snapshot a vDPA device outside live migration"
488             )));
489         }
490 
491         let snapshot = Snapshot::new_from_state(&self.state().map_err(|e| {
492             MigratableError::Snapshot(anyhow!("Error snapshotting vDPA device: {:?}", e))
493         })?)?;
494 
495         // Force the vhost handler to be dropped in order to close the vDPA
496         // file. This will ensure the device can be accessed if the VM is
497         // migrated on the same host machine.
498         self.vhost.take();
499 
500         Ok(snapshot)
501     }
502 }
503 
504 impl Transportable for Vdpa {}
505 
506 impl Migratable for Vdpa {
507     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
508         self.migrating = true;
509         // Given there's no way to track dirty pages, we must suspend the
510         // device as soon as the migration process starts.
511         if self.backend_features & (1 << VHOST_BACKEND_F_SUSPEND) != 0 {
512             assert!(self.vhost.is_some());
513             self.vhost.as_ref().unwrap().suspend().map_err(|e| {
514                 MigratableError::StartMigration(anyhow!("Error suspending vDPA device: {:?}", e))
515             })
516         } else {
517             Err(MigratableError::StartMigration(anyhow!(
518                 "vDPA device can't be suspended"
519             )))
520         }
521     }
522 
523     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
524         self.migrating = false;
525         Ok(())
526     }
527 }
528 
529 pub struct VdpaDmaMapping<M: GuestAddressSpace> {
530     device: Arc<Mutex<Vdpa>>,
531     memory: Arc<M>,
532 }
533 
534 impl<M: GuestAddressSpace> VdpaDmaMapping<M> {
535     pub fn new(device: Arc<Mutex<Vdpa>>, memory: Arc<M>) -> Self {
536         Self { device, memory }
537     }
538 }
539 
540 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VdpaDmaMapping<M> {
541     fn map(&self, iova: u64, gpa: u64, size: u64) -> result::Result<(), io::Error> {
542         let mem = self.memory.memory();
543         let guest_addr = GuestAddress(gpa);
544         let user_addr = if mem.check_range(guest_addr, size as usize) {
545             mem.get_host_address(guest_addr).unwrap() as *const u8
546         } else {
547             return Err(io::Error::new(
548                 io::ErrorKind::Other,
549                 format!(
550                     "failed to convert guest address 0x{gpa:x} into \
551                      host user virtual address"
552                 ),
553             ));
554         };
555 
556         debug!(
557             "DMA map iova 0x{:x}, gpa 0x{:x}, size 0x{:x}, host_addr 0x{:x}",
558             iova, gpa, size, user_addr as u64
559         );
560         self.device
561             .lock()
562             .unwrap()
563             .dma_map(iova, size, user_addr, false)
564             .map_err(|e| {
565                 io::Error::new(
566                     io::ErrorKind::Other,
567                     format!(
568                         "failed to map memory for vDPA device, \
569                          iova 0x{iova:x}, gpa 0x{gpa:x}, size 0x{size:x}: {e:?}"
570                     ),
571                 )
572             })
573     }
574 
575     fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> {
576         debug!("DMA unmap iova 0x{:x} size 0x{:x}", iova, size);
577         self.device
578             .lock()
579             .unwrap()
580             .dma_unmap(iova, size)
581             .map_err(|e| {
582                 io::Error::new(
583                     io::ErrorKind::Other,
584                     format!(
585                         "failed to unmap memory for vDPA device, \
586                      iova 0x{iova:x}, size 0x{size:x}: {e:?}"
587                     ),
588                 )
589             })
590     }
591 }
592