xref: /cloud-hypervisor/pci/src/vfio_user.rs (revision 88a9f799449c04180c6b9a21d3b9c0c4b57e2bd6)
1 // Copyright © 2021 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 use std::any::Any;
7 use std::os::unix::prelude::AsRawFd;
8 use std::ptr::null_mut;
9 use std::sync::{Arc, Barrier, Mutex};
10 
11 use hypervisor::HypervisorVmError;
12 use thiserror::Error;
13 use vfio_bindings::bindings::vfio::*;
14 use vfio_ioctls::VfioIrq;
15 use vfio_user::{Client, Error as VfioUserError};
16 use vm_allocator::{AddressAllocator, SystemAllocator};
17 use vm_device::dma_mapping::ExternalDmaMapping;
18 use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig};
19 use vm_device::{BusDevice, Resource};
20 use vm_memory::bitmap::AtomicBitmap;
21 use vm_memory::{
22     Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap,
23 };
24 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
25 use vmm_sys_util::eventfd::EventFd;
26 
27 use crate::vfio::{UserMemoryRegion, Vfio, VfioCommon, VfioError, VFIO_COMMON_ID};
28 use crate::{BarReprogrammingParams, PciBarConfiguration, VfioPciError};
29 use crate::{PciBdf, PciDevice, PciDeviceError, PciSubclass};
30 
31 pub struct VfioUserPciDevice {
32     id: String,
33     vm: Arc<dyn hypervisor::Vm>,
34     client: Arc<Mutex<Client>>,
35     common: VfioCommon,
36     memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
37 }
38 
39 #[derive(Error, Debug)]
40 pub enum VfioUserPciDeviceError {
41     #[error("Client error: {0}")]
42     Client(#[source] VfioUserError),
43     #[error("Failed to map VFIO PCI region into guest: {0}")]
44     MapRegionGuest(#[source] HypervisorVmError),
45     #[error("Failed to DMA map: {0}")]
46     DmaMap(#[source] VfioUserError),
47     #[error("Failed to DMA unmap: {0}")]
48     DmaUnmap(#[source] VfioUserError),
49     #[error("Failed to initialize legacy interrupts: {0}")]
50     InitializeLegacyInterrupts(#[source] VfioPciError),
51     #[error("Failed to create VfioCommon: {0}")]
52     CreateVfioCommon(#[source] VfioPciError),
53 }
54 
55 #[derive(Copy, Clone)]
56 enum PciVfioUserSubclass {
57     VfioUserSubclass = 0xff,
58 }
59 
60 impl PciSubclass for PciVfioUserSubclass {
61     fn get_register_value(&self) -> u8 {
62         *self as u8
63     }
64 }
65 
66 impl VfioUserPciDevice {
67     #[allow(clippy::too_many_arguments)]
68     pub fn new(
69         id: String,
70         vm: &Arc<dyn hypervisor::Vm>,
71         client: Arc<Mutex<Client>>,
72         msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
73         legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
74         bdf: PciBdf,
75         memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
76         snapshot: Option<Snapshot>,
77     ) -> Result<Self, VfioUserPciDeviceError> {
78         let resettable = client.lock().unwrap().resettable();
79         if resettable {
80             client
81                 .lock()
82                 .unwrap()
83                 .reset()
84                 .map_err(VfioUserPciDeviceError::Client)?;
85         }
86 
87         let vfio_wrapper = VfioUserClientWrapper {
88             client: client.clone(),
89         };
90 
91         let common = VfioCommon::new(
92             msi_interrupt_manager,
93             legacy_interrupt_group,
94             Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
95             &PciVfioUserSubclass::VfioUserSubclass,
96             bdf,
97             vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID),
98             None,
99         )
100         .map_err(VfioUserPciDeviceError::CreateVfioCommon)?;
101 
102         Ok(Self {
103             id,
104             vm: vm.clone(),
105             client,
106             common,
107             memory_slot,
108         })
109     }
110 
111     pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> {
112         for mmio_region in &mut self.common.mmio_regions {
113             let region_flags = self
114                 .client
115                 .lock()
116                 .unwrap()
117                 .region(mmio_region.index)
118                 .unwrap()
119                 .flags;
120             let file_offset = self
121                 .client
122                 .lock()
123                 .unwrap()
124                 .region(mmio_region.index)
125                 .unwrap()
126                 .file_offset
127                 .clone();
128 
129             let sparse_areas = self
130                 .client
131                 .lock()
132                 .unwrap()
133                 .region(mmio_region.index)
134                 .unwrap()
135                 .sparse_areas
136                 .clone();
137 
138             if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
139                 let mut prot = 0;
140                 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 {
141                     prot |= libc::PROT_READ;
142                 }
143                 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 {
144                     prot |= libc::PROT_WRITE;
145                 }
146 
147                 let mmaps = if sparse_areas.is_empty() {
148                     vec![vfio_region_sparse_mmap_area {
149                         offset: 0,
150                         size: mmio_region.length,
151                     }]
152                 } else {
153                     sparse_areas
154                 };
155 
156                 for s in mmaps.iter() {
157                     // SAFETY: FFI call with correct arguments
158                     let host_addr = unsafe {
159                         libc::mmap(
160                             null_mut(),
161                             s.size as usize,
162                             prot,
163                             libc::MAP_SHARED,
164                             file_offset.as_ref().unwrap().file().as_raw_fd(),
165                             file_offset.as_ref().unwrap().start() as libc::off_t
166                                 + s.offset as libc::off_t,
167                         )
168                     };
169 
170                     if host_addr == libc::MAP_FAILED {
171                         error!(
172                             "Could not mmap regions, error:{}",
173                             std::io::Error::last_os_error()
174                         );
175                         continue;
176                     }
177 
178                     let user_memory_region = UserMemoryRegion {
179                         slot: (self.memory_slot)(),
180                         start: mmio_region.start.0 + s.offset,
181                         size: s.size,
182                         host_addr: host_addr as u64,
183                     };
184 
185                     mmio_region.user_memory_regions.push(user_memory_region);
186 
187                     let mem_region = self.vm.make_user_memory_region(
188                         user_memory_region.slot,
189                         user_memory_region.start,
190                         user_memory_region.size,
191                         user_memory_region.host_addr,
192                         false,
193                         false,
194                     );
195 
196                     self.vm
197                         .create_user_memory_region(mem_region)
198                         .map_err(VfioUserPciDeviceError::MapRegionGuest)?;
199                 }
200             }
201         }
202 
203         Ok(())
204     }
205 
206     pub fn unmap_mmio_regions(&mut self) {
207         for mmio_region in self.common.mmio_regions.iter() {
208             for user_memory_region in mmio_region.user_memory_regions.iter() {
209                 // Remove region
210                 let r = self.vm.make_user_memory_region(
211                     user_memory_region.slot,
212                     user_memory_region.start,
213                     user_memory_region.size,
214                     user_memory_region.host_addr,
215                     false,
216                     false,
217                 );
218 
219                 if let Err(e) = self.vm.remove_user_memory_region(r) {
220                     error!("Could not remove the userspace memory region: {}", e);
221                 }
222 
223                 // Remove mmaps
224                 // SAFETY: FFI call with correct arguments
225                 let ret = unsafe {
226                     libc::munmap(
227                         user_memory_region.host_addr as *mut libc::c_void,
228                         user_memory_region.size as usize,
229                     )
230                 };
231                 if ret != 0 {
232                     error!(
233                         "Could not unmap region {}, error:{}",
234                         mmio_region.index,
235                         std::io::Error::last_os_error()
236                     );
237                 }
238             }
239         }
240     }
241 
242     pub fn dma_map(
243         &mut self,
244         region: &GuestRegionMmap<AtomicBitmap>,
245     ) -> Result<(), VfioUserPciDeviceError> {
246         let (fd, offset) = match region.file_offset() {
247             Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()),
248             None => return Ok(()),
249         };
250 
251         self.client
252             .lock()
253             .unwrap()
254             .dma_map(offset, region.start_addr().raw_value(), region.len(), fd)
255             .map_err(VfioUserPciDeviceError::DmaMap)
256     }
257 
258     pub fn dma_unmap(
259         &mut self,
260         region: &GuestRegionMmap<AtomicBitmap>,
261     ) -> Result<(), VfioUserPciDeviceError> {
262         self.client
263             .lock()
264             .unwrap()
265             .dma_unmap(region.start_addr().raw_value(), region.len())
266             .map_err(VfioUserPciDeviceError::DmaUnmap)
267     }
268 }
269 
270 impl BusDevice for VfioUserPciDevice {
271     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
272         self.read_bar(base, offset, data)
273     }
274 
275     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
276         self.write_bar(base, offset, data)
277     }
278 }
279 
280 #[repr(u32)]
281 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
282 #[allow(dead_code)]
283 enum Regions {
284     Bar0,
285     Bar1,
286     Bar2,
287     Bar3,
288     Bar4,
289     Bar5,
290     Rom,
291     Config,
292     Vga,
293     Migration,
294 }
295 
296 struct VfioUserClientWrapper {
297     client: Arc<Mutex<Client>>,
298 }
299 
300 impl Vfio for VfioUserClientWrapper {
301     fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) {
302         self.client
303             .lock()
304             .unwrap()
305             .region_read(index, offset, data)
306             .ok();
307     }
308 
309     fn region_write(&self, index: u32, offset: u64, data: &[u8]) {
310         self.client
311             .lock()
312             .unwrap()
313             .region_write(index, offset, data)
314             .ok();
315     }
316 
317     fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> {
318         self.client
319             .lock()
320             .unwrap()
321             .get_irq_info(irq_index)
322             .ok()
323             .map(|i| VfioIrq {
324                 index: i.index,
325                 flags: i.flags,
326                 count: i.count,
327             })
328     }
329 
330     fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
331         info!(
332             "Enabling IRQ {:x} number of fds = {:?}",
333             irq_index,
334             event_fds.len()
335         );
336         let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect();
337 
338         // Batch into blocks of 16 fds as sendmsg() has a size limit
339         let mut sent_fds = 0;
340         let num_fds = event_fds.len() as u32;
341         while sent_fds < num_fds {
342             let remaining_fds = num_fds - sent_fds;
343             let count = if remaining_fds > 16 {
344                 16
345             } else {
346                 remaining_fds
347             };
348 
349             self.client
350                 .lock()
351                 .unwrap()
352                 .set_irqs(
353                     irq_index,
354                     VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
355                     sent_fds,
356                     count,
357                     &fds[sent_fds as usize..(sent_fds + count) as usize],
358                 )
359                 .map_err(VfioError::VfioUser)?;
360 
361             sent_fds += count;
362         }
363 
364         Ok(())
365     }
366 
367     fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> {
368         info!("Disabling IRQ {:x}", irq_index);
369         self.client
370             .lock()
371             .unwrap()
372             .set_irqs(
373                 irq_index,
374                 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
375                 0,
376                 0,
377                 &[],
378             )
379             .map_err(VfioError::VfioUser)
380     }
381 
382     fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> {
383         info!("Unmasking IRQ {:x}", irq_index);
384         self.client
385             .lock()
386             .unwrap()
387             .set_irqs(
388                 irq_index,
389                 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
390                 0,
391                 1,
392                 &[],
393             )
394             .map_err(VfioError::VfioUser)
395     }
396 }
397 
398 impl PciDevice for VfioUserPciDevice {
399     fn allocate_bars(
400         &mut self,
401         allocator: &Arc<Mutex<SystemAllocator>>,
402         mmio32_allocator: &mut AddressAllocator,
403         mmio64_allocator: &mut AddressAllocator,
404         resources: Option<Vec<Resource>>,
405     ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
406         self.common
407             .allocate_bars(allocator, mmio32_allocator, mmio64_allocator, resources)
408     }
409 
410     fn free_bars(
411         &mut self,
412         allocator: &mut SystemAllocator,
413         mmio32_allocator: &mut AddressAllocator,
414         mmio64_allocator: &mut AddressAllocator,
415     ) -> Result<(), PciDeviceError> {
416         self.common
417             .free_bars(allocator, mmio32_allocator, mmio64_allocator)
418     }
419 
420     fn as_any(&mut self) -> &mut dyn Any {
421         self
422     }
423 
424     fn detect_bar_reprogramming(
425         &mut self,
426         reg_idx: usize,
427         data: &[u8],
428     ) -> Option<BarReprogrammingParams> {
429         self.common
430             .configuration
431             .detect_bar_reprogramming(reg_idx, data)
432     }
433 
434     fn write_config_register(
435         &mut self,
436         reg_idx: usize,
437         offset: u64,
438         data: &[u8],
439     ) -> Option<Arc<Barrier>> {
440         self.common.write_config_register(reg_idx, offset, data)
441     }
442 
443     fn read_config_register(&mut self, reg_idx: usize) -> u32 {
444         self.common.read_config_register(reg_idx)
445     }
446 
447     fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
448         self.common.read_bar(base, offset, data)
449     }
450 
451     fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
452         self.common.write_bar(base, offset, data)
453     }
454 
455     fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> {
456         info!("Moving BAR 0x{:x} -> 0x{:x}", old_base, new_base);
457         for mmio_region in self.common.mmio_regions.iter_mut() {
458             if mmio_region.start.raw_value() == old_base {
459                 mmio_region.start = GuestAddress(new_base);
460 
461                 for user_memory_region in mmio_region.user_memory_regions.iter_mut() {
462                     // Remove old region
463                     let old_region = self.vm.make_user_memory_region(
464                         user_memory_region.slot,
465                         user_memory_region.start,
466                         user_memory_region.size,
467                         user_memory_region.host_addr,
468                         false,
469                         false,
470                     );
471 
472                     self.vm
473                         .remove_user_memory_region(old_region)
474                         .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
475 
476                     // Update the user memory region with the correct start address.
477                     if new_base > old_base {
478                         user_memory_region.start += new_base - old_base;
479                     } else {
480                         user_memory_region.start -= old_base - new_base;
481                     }
482 
483                     // Insert new region
484                     let new_region = self.vm.make_user_memory_region(
485                         user_memory_region.slot,
486                         user_memory_region.start,
487                         user_memory_region.size,
488                         user_memory_region.host_addr,
489                         false,
490                         false,
491                     );
492 
493                     self.vm
494                         .create_user_memory_region(new_region)
495                         .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
496                 }
497                 info!("Moved bar 0x{:x} -> 0x{:x}", old_base, new_base);
498             }
499         }
500 
501         Ok(())
502     }
503 
504     fn id(&self) -> Option<String> {
505         Some(self.id.clone())
506     }
507 }
508 
509 impl Drop for VfioUserPciDevice {
510     fn drop(&mut self) {
511         self.unmap_mmio_regions();
512 
513         if let Some(msix) = &self.common.interrupt.msix {
514             if msix.bar.enabled() {
515                 self.common.disable_msix();
516             }
517         }
518 
519         if let Some(msi) = &self.common.interrupt.msi {
520             if msi.cfg.enabled() {
521                 self.common.disable_msi()
522             }
523         }
524 
525         if self.common.interrupt.intx_in_use() {
526             self.common.disable_intx();
527         }
528 
529         if let Err(e) = self.client.lock().unwrap().shutdown() {
530             error!("Failed shutting down vfio-user client: {}", e);
531         }
532     }
533 }
534 
535 impl Pausable for VfioUserPciDevice {}
536 
537 impl Snapshottable for VfioUserPciDevice {
538     fn id(&self) -> String {
539         self.id.clone()
540     }
541 
542     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
543         let mut vfio_pci_dev_snapshot = Snapshot::default();
544 
545         // Snapshot VfioCommon
546         vfio_pci_dev_snapshot.add_snapshot(self.common.id(), self.common.snapshot()?);
547 
548         Ok(vfio_pci_dev_snapshot)
549     }
550 }
551 impl Transportable for VfioUserPciDevice {}
552 impl Migratable for VfioUserPciDevice {}
553 
554 pub struct VfioUserDmaMapping<M: GuestAddressSpace> {
555     client: Arc<Mutex<Client>>,
556     memory: Arc<M>,
557 }
558 
559 impl<M: GuestAddressSpace> VfioUserDmaMapping<M> {
560     pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self {
561         Self { client, memory }
562     }
563 }
564 
565 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> {
566     fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> {
567         let mem = self.memory.memory();
568         let guest_addr = GuestAddress(gpa);
569         let region = mem.find_region(guest_addr);
570 
571         if let Some(region) = region {
572             let file_offset = region.file_offset().unwrap();
573             let offset = (GuestAddress(gpa).checked_offset_from(region.start_addr())).unwrap()
574                 + file_offset.start();
575 
576             self.client
577                 .lock()
578                 .unwrap()
579                 .dma_map(offset, iova, size, file_offset.file().as_raw_fd())
580                 .map_err(|e| {
581                     std::io::Error::new(
582                         std::io::ErrorKind::Other,
583                         format!("Error mapping region: {e}"),
584                     )
585                 })
586         } else {
587             Err(std::io::Error::new(
588                 std::io::ErrorKind::Other,
589                 format!("Region not found for 0x{gpa:x}"),
590             ))
591         }
592     }
593 
594     fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> {
595         self.client
596             .lock()
597             .unwrap()
598             .dma_unmap(iova, size)
599             .map_err(|e| {
600                 std::io::Error::new(
601                     std::io::ErrorKind::Other,
602                     format!("Error unmapping region: {e}"),
603                 )
604             })
605     }
606 }
607