xref: /cloud-hypervisor/pci/src/vfio_user.rs (revision eea9bcea38e0c5649f444c829f3a4f9c22aa486c)
1 // Copyright © 2021 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 use crate::vfio::{Interrupt, UserMemoryRegion, Vfio, VfioCommon, VfioError};
7 use crate::{BarReprogrammingParams, PciBarConfiguration, VfioPciError};
8 use crate::{
9     PciBdf, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass,
10 };
11 use anyhow::anyhow;
12 use hypervisor::HypervisorVmError;
13 use std::any::Any;
14 use std::collections::HashMap;
15 use std::os::unix::prelude::AsRawFd;
16 use std::ptr::null_mut;
17 use std::sync::{Arc, Barrier, Mutex};
18 use std::u32;
19 use thiserror::Error;
20 use vfio_bindings::bindings::vfio::*;
21 use vfio_ioctls::VfioIrq;
22 use vfio_user::{Client, Error as VfioUserError};
23 use vm_allocator::{AddressAllocator, SystemAllocator};
24 use vm_device::dma_mapping::ExternalDmaMapping;
25 use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig};
26 use vm_device::{BusDevice, Resource};
27 use vm_memory::bitmap::AtomicBitmap;
28 use vm_memory::{
29     Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap,
30 };
31 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
32 use vmm_sys_util::eventfd::EventFd;
33 
34 pub struct VfioUserPciDevice {
35     id: String,
36     vm: Arc<dyn hypervisor::Vm>,
37     client: Arc<Mutex<Client>>,
38     common: VfioCommon,
39     memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
40 }
41 
42 #[derive(Error, Debug)]
43 pub enum VfioUserPciDeviceError {
44     #[error("Client error: {0}")]
45     Client(#[source] VfioUserError),
46     #[error("Failed to map VFIO PCI region into guest: {0}")]
47     MapRegionGuest(#[source] HypervisorVmError),
48     #[error("Failed to DMA map: {0}")]
49     DmaMap(#[source] VfioUserError),
50     #[error("Failed to DMA unmap: {0}")]
51     DmaUnmap(#[source] VfioUserError),
52     #[error("Failed to initialize legacy interrupts: {0}")]
53     InitializeLegacyInterrupts(#[source] VfioPciError),
54 }
55 
56 #[derive(Copy, Clone)]
57 enum PciVfioUserSubclass {
58     VfioUserSubclass = 0xff,
59 }
60 
61 impl PciSubclass for PciVfioUserSubclass {
62     fn get_register_value(&self) -> u8 {
63         *self as u8
64     }
65 }
66 
67 impl VfioUserPciDevice {
68     #[allow(clippy::too_many_arguments)]
69     pub fn new(
70         id: String,
71         vm: &Arc<dyn hypervisor::Vm>,
72         client: Arc<Mutex<Client>>,
73         msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
74         legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
75         bdf: PciBdf,
76         restoring: bool,
77         memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
78     ) -> Result<Self, VfioUserPciDeviceError> {
79         // This is used for the BAR and capabilities only
80         let configuration = PciConfiguration::new(
81             0,
82             0,
83             0,
84             PciClassCode::Other,
85             &PciVfioUserSubclass::VfioUserSubclass,
86             None,
87             PciHeaderType::Device,
88             0,
89             0,
90             None,
91         );
92         let resettable = client.lock().unwrap().resettable();
93         if resettable {
94             client
95                 .lock()
96                 .unwrap()
97                 .reset()
98                 .map_err(VfioUserPciDeviceError::Client)?;
99         }
100 
101         let vfio_wrapper = VfioUserClientWrapper {
102             client: client.clone(),
103         };
104 
105         let mut common = VfioCommon {
106             mmio_regions: Vec::new(),
107             configuration,
108             interrupt: Interrupt {
109                 intx: None,
110                 msi: None,
111                 msix: None,
112             },
113             msi_interrupt_manager,
114             legacy_interrupt_group,
115             vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
116             patches: HashMap::new(),
117         };
118 
119         // No need to parse capabilities from the device if on the restore path.
120         // The initialization will be performed later when restore() will be
121         // called.
122         if !restoring {
123             common.parse_capabilities(bdf);
124             common
125                 .initialize_legacy_interrupt()
126                 .map_err(VfioUserPciDeviceError::InitializeLegacyInterrupts)?;
127         }
128 
129         Ok(Self {
130             id,
131             vm: vm.clone(),
132             client,
133             common,
134             memory_slot,
135         })
136     }
137 
138     pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> {
139         for mmio_region in &mut self.common.mmio_regions {
140             let region_flags = self
141                 .client
142                 .lock()
143                 .unwrap()
144                 .region(mmio_region.index)
145                 .unwrap()
146                 .flags;
147             let file_offset = self
148                 .client
149                 .lock()
150                 .unwrap()
151                 .region(mmio_region.index)
152                 .unwrap()
153                 .file_offset
154                 .clone();
155 
156             let sparse_areas = self
157                 .client
158                 .lock()
159                 .unwrap()
160                 .region(mmio_region.index)
161                 .unwrap()
162                 .sparse_areas
163                 .clone();
164 
165             if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
166                 let mut prot = 0;
167                 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 {
168                     prot |= libc::PROT_READ;
169                 }
170                 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 {
171                     prot |= libc::PROT_WRITE;
172                 }
173 
174                 let mmaps = if sparse_areas.is_empty() {
175                     vec![vfio_region_sparse_mmap_area {
176                         offset: 0,
177                         size: mmio_region.length,
178                     }]
179                 } else {
180                     sparse_areas
181                 };
182 
183                 for s in mmaps.iter() {
184                     let host_addr = unsafe {
185                         libc::mmap(
186                             null_mut(),
187                             s.size as usize,
188                             prot,
189                             libc::MAP_SHARED,
190                             file_offset.as_ref().unwrap().file().as_raw_fd(),
191                             file_offset.as_ref().unwrap().start() as libc::off_t
192                                 + s.offset as libc::off_t,
193                         )
194                     };
195 
196                     if host_addr == libc::MAP_FAILED {
197                         error!(
198                             "Could not mmap regions, error:{}",
199                             std::io::Error::last_os_error()
200                         );
201                         continue;
202                     }
203 
204                     let user_memory_region = UserMemoryRegion {
205                         slot: (self.memory_slot)(),
206                         start: mmio_region.start.0 + s.offset,
207                         size: s.size,
208                         host_addr: host_addr as u64,
209                     };
210 
211                     mmio_region.user_memory_regions.push(user_memory_region);
212 
213                     let mem_region = self.vm.make_user_memory_region(
214                         user_memory_region.slot,
215                         user_memory_region.start,
216                         user_memory_region.size,
217                         user_memory_region.host_addr,
218                         false,
219                         false,
220                     );
221 
222                     self.vm
223                         .create_user_memory_region(mem_region)
224                         .map_err(VfioUserPciDeviceError::MapRegionGuest)?;
225                 }
226             }
227         }
228 
229         Ok(())
230     }
231 
232     pub fn unmap_mmio_regions(&mut self) {
233         for mmio_region in self.common.mmio_regions.iter() {
234             for user_memory_region in mmio_region.user_memory_regions.iter() {
235                 // Remove region
236                 let r = self.vm.make_user_memory_region(
237                     user_memory_region.slot,
238                     user_memory_region.start,
239                     user_memory_region.size,
240                     user_memory_region.host_addr,
241                     false,
242                     false,
243                 );
244 
245                 if let Err(e) = self.vm.remove_user_memory_region(r) {
246                     error!("Could not remove the userspace memory region: {}", e);
247                 }
248 
249                 // Remove mmaps
250                 let ret = unsafe {
251                     libc::munmap(
252                         user_memory_region.host_addr as *mut libc::c_void,
253                         user_memory_region.size as usize,
254                     )
255                 };
256                 if ret != 0 {
257                     error!(
258                         "Could not unmap region {}, error:{}",
259                         mmio_region.index,
260                         std::io::Error::last_os_error()
261                     );
262                 }
263             }
264         }
265     }
266 
267     pub fn dma_map(
268         &mut self,
269         region: &GuestRegionMmap<AtomicBitmap>,
270     ) -> Result<(), VfioUserPciDeviceError> {
271         let (fd, offset) = match region.file_offset() {
272             Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()),
273             None => return Ok(()),
274         };
275 
276         self.client
277             .lock()
278             .unwrap()
279             .dma_map(
280                 offset,
281                 region.start_addr().raw_value(),
282                 region.len() as u64,
283                 fd,
284             )
285             .map_err(VfioUserPciDeviceError::DmaMap)
286     }
287 
288     pub fn dma_unmap(
289         &mut self,
290         region: &GuestRegionMmap<AtomicBitmap>,
291     ) -> Result<(), VfioUserPciDeviceError> {
292         self.client
293             .lock()
294             .unwrap()
295             .dma_unmap(region.start_addr().raw_value(), region.len() as u64)
296             .map_err(VfioUserPciDeviceError::DmaUnmap)
297     }
298 }
299 
300 impl BusDevice for VfioUserPciDevice {
301     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
302         self.read_bar(base, offset, data)
303     }
304 
305     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
306         self.write_bar(base, offset, data)
307     }
308 }
309 
310 #[repr(u32)]
311 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
312 #[allow(dead_code)]
313 enum Regions {
314     Bar0,
315     Bar1,
316     Bar2,
317     Bar3,
318     Bar4,
319     Bar5,
320     Rom,
321     Config,
322     Vga,
323     Migration,
324 }
325 
326 struct VfioUserClientWrapper {
327     client: Arc<Mutex<Client>>,
328 }
329 
330 impl Vfio for VfioUserClientWrapper {
331     fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) {
332         self.client
333             .lock()
334             .unwrap()
335             .region_read(index, offset, data)
336             .ok();
337     }
338 
339     fn region_write(&self, index: u32, offset: u64, data: &[u8]) {
340         self.client
341             .lock()
342             .unwrap()
343             .region_write(index, offset, data)
344             .ok();
345     }
346 
347     fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> {
348         self.client
349             .lock()
350             .unwrap()
351             .get_irq_info(irq_index)
352             .ok()
353             .map(|i| VfioIrq {
354                 index: i.index,
355                 flags: i.flags,
356                 count: i.count,
357             })
358     }
359 
360     fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
361         info!(
362             "Enabling IRQ {:x} number of fds = {:?}",
363             irq_index,
364             event_fds.len()
365         );
366         let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect();
367 
368         // Batch into blocks of 16 fds as sendmsg() has a size limit
369         let mut sent_fds = 0;
370         let num_fds = event_fds.len() as u32;
371         while sent_fds < num_fds {
372             let remaining_fds = num_fds - sent_fds;
373             let count = if remaining_fds > 16 {
374                 16
375             } else {
376                 remaining_fds
377             };
378 
379             self.client
380                 .lock()
381                 .unwrap()
382                 .set_irqs(
383                     irq_index,
384                     VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
385                     sent_fds,
386                     count,
387                     &fds[sent_fds as usize..(sent_fds + count) as usize],
388                 )
389                 .map_err(VfioError::VfioUser)?;
390 
391             sent_fds += count;
392         }
393 
394         Ok(())
395     }
396 
397     fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> {
398         info!("Disabling IRQ {:x}", irq_index);
399         self.client
400             .lock()
401             .unwrap()
402             .set_irqs(
403                 irq_index,
404                 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
405                 0,
406                 0,
407                 &[],
408             )
409             .map_err(VfioError::VfioUser)
410     }
411 
412     fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> {
413         info!("Unmasking IRQ {:x}", irq_index);
414         self.client
415             .lock()
416             .unwrap()
417             .set_irqs(
418                 irq_index,
419                 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
420                 0,
421                 1,
422                 &[],
423             )
424             .map_err(VfioError::VfioUser)
425     }
426 }
427 
428 impl PciDevice for VfioUserPciDevice {
429     fn allocate_bars(
430         &mut self,
431         allocator: &Arc<Mutex<SystemAllocator>>,
432         mmio_allocator: &mut AddressAllocator,
433         resources: Option<Vec<Resource>>,
434     ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
435         self.common
436             .allocate_bars(allocator, mmio_allocator, resources)
437     }
438 
439     fn free_bars(
440         &mut self,
441         allocator: &mut SystemAllocator,
442         mmio_allocator: &mut AddressAllocator,
443     ) -> Result<(), PciDeviceError> {
444         self.common.free_bars(allocator, mmio_allocator)
445     }
446 
447     fn as_any(&mut self) -> &mut dyn Any {
448         self
449     }
450 
451     fn detect_bar_reprogramming(
452         &mut self,
453         reg_idx: usize,
454         data: &[u8],
455     ) -> Option<BarReprogrammingParams> {
456         self.common
457             .configuration
458             .detect_bar_reprogramming(reg_idx, data)
459     }
460 
461     fn write_config_register(
462         &mut self,
463         reg_idx: usize,
464         offset: u64,
465         data: &[u8],
466     ) -> Option<Arc<Barrier>> {
467         self.common.write_config_register(reg_idx, offset, data)
468     }
469 
470     fn read_config_register(&mut self, reg_idx: usize) -> u32 {
471         self.common.read_config_register(reg_idx)
472     }
473 
474     fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
475         self.common.read_bar(base, offset, data)
476     }
477 
478     fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
479         self.common.write_bar(base, offset, data)
480     }
481 
482     fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> {
483         info!("Moving BAR 0x{:x} -> 0x{:x}", old_base, new_base);
484         for mmio_region in self.common.mmio_regions.iter_mut() {
485             if mmio_region.start.raw_value() == old_base {
486                 mmio_region.start = GuestAddress(new_base);
487 
488                 for user_memory_region in mmio_region.user_memory_regions.iter_mut() {
489                     // Remove old region
490                     let old_region = self.vm.make_user_memory_region(
491                         user_memory_region.slot,
492                         user_memory_region.start,
493                         user_memory_region.size,
494                         user_memory_region.host_addr,
495                         false,
496                         false,
497                     );
498 
499                     self.vm
500                         .remove_user_memory_region(old_region)
501                         .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
502 
503                     // Update the user memory region with the correct start address.
504                     if new_base > old_base {
505                         user_memory_region.start += new_base - old_base;
506                     } else {
507                         user_memory_region.start -= old_base - new_base;
508                     }
509 
510                     // Insert new region
511                     let new_region = self.vm.make_user_memory_region(
512                         user_memory_region.slot,
513                         user_memory_region.start,
514                         user_memory_region.size,
515                         user_memory_region.host_addr,
516                         false,
517                         false,
518                     );
519 
520                     self.vm
521                         .create_user_memory_region(new_region)
522                         .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
523                 }
524                 info!("Moved bar 0x{:x} -> 0x{:x}", old_base, new_base);
525             }
526         }
527 
528         Ok(())
529     }
530 
531     fn id(&self) -> Option<String> {
532         Some(self.id.clone())
533     }
534 }
535 
536 impl Drop for VfioUserPciDevice {
537     fn drop(&mut self) {
538         self.unmap_mmio_regions();
539 
540         if let Some(msix) = &self.common.interrupt.msix {
541             if msix.bar.enabled() {
542                 self.common.disable_msix();
543             }
544         }
545 
546         if let Some(msi) = &self.common.interrupt.msi {
547             if msi.cfg.enabled() {
548                 self.common.disable_msi()
549             }
550         }
551 
552         if self.common.interrupt.intx_in_use() {
553             self.common.disable_intx();
554         }
555 
556         if let Err(e) = self.client.lock().unwrap().shutdown() {
557             error!("Failed shutting down vfio-user client: {}", e);
558         }
559     }
560 }
561 
562 impl Pausable for VfioUserPciDevice {}
563 
564 impl Snapshottable for VfioUserPciDevice {
565     fn id(&self) -> String {
566         self.id.clone()
567     }
568 
569     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
570         let mut vfio_pci_dev_snapshot = Snapshot::new(&self.id);
571 
572         // Snapshot VfioCommon
573         vfio_pci_dev_snapshot.add_snapshot(self.common.snapshot()?);
574 
575         Ok(vfio_pci_dev_snapshot)
576     }
577 
578     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
579         // Restore VfioCommon
580         if let Some(vfio_common_snapshot) = snapshot.snapshots.get(&self.common.id()) {
581             self.common.restore(*vfio_common_snapshot.clone())?;
582             self.map_mmio_regions().map_err(|e| {
583                 MigratableError::Restore(anyhow!(
584                     "Could not map MMIO regions for VfioUserPciDevice on restore {:?}",
585                     e
586                 ))
587             })?;
588         }
589 
590         Ok(())
591     }
592 }
593 impl Transportable for VfioUserPciDevice {}
594 impl Migratable for VfioUserPciDevice {}
595 
596 pub struct VfioUserDmaMapping<M: GuestAddressSpace> {
597     client: Arc<Mutex<Client>>,
598     memory: Arc<M>,
599 }
600 
601 impl<M: GuestAddressSpace> VfioUserDmaMapping<M> {
602     pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self {
603         Self { client, memory }
604     }
605 }
606 
607 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> {
608     fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> {
609         let mem = self.memory.memory();
610         let guest_addr = GuestAddress(gpa);
611         let region = mem.find_region(guest_addr);
612 
613         if let Some(region) = region {
614             let file_offset = region.file_offset().unwrap();
615             let offset = (GuestAddress(gpa).checked_offset_from(region.start_addr())).unwrap()
616                 + file_offset.start();
617 
618             self.client
619                 .lock()
620                 .unwrap()
621                 .dma_map(offset, iova, size, file_offset.file().as_raw_fd())
622                 .map_err(|e| {
623                     std::io::Error::new(
624                         std::io::ErrorKind::Other,
625                         format!("Error mapping region: {}", e),
626                     )
627                 })
628         } else {
629             Err(std::io::Error::new(
630                 std::io::ErrorKind::Other,
631                 format!("Region not found for 0x{:x}", gpa),
632             ))
633         }
634     }
635 
636     fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> {
637         self.client
638             .lock()
639             .unwrap()
640             .dma_unmap(iova, size)
641             .map_err(|e| {
642                 std::io::Error::new(
643                     std::io::ErrorKind::Other,
644                     format!("Error unmapping region: {}", e),
645                 )
646             })
647     }
648 }
649