xref: /cloud-hypervisor/pci/src/vfio_user.rs (revision 2fe7f54ece2a8f0461ed29aaeab41614f1f2da75)
1 // Copyright © 2021 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 use std::any::Any;
7 use std::os::unix::prelude::AsRawFd;
8 use std::ptr::null_mut;
9 use std::sync::{Arc, Barrier, Mutex};
10 
11 use hypervisor::HypervisorVmError;
12 use thiserror::Error;
13 use vfio_bindings::bindings::vfio::*;
14 use vfio_ioctls::VfioIrq;
15 use vfio_user::{Client, Error as VfioUserError};
16 use vm_allocator::{AddressAllocator, MemorySlotAllocator, SystemAllocator};
17 use vm_device::dma_mapping::ExternalDmaMapping;
18 use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig};
19 use vm_device::{BusDevice, Resource};
20 use vm_memory::bitmap::AtomicBitmap;
21 use vm_memory::{
22     Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap,
23 };
24 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
25 use vmm_sys_util::eventfd::EventFd;
26 
27 use crate::vfio::{UserMemoryRegion, Vfio, VfioCommon, VfioError, VFIO_COMMON_ID};
28 use crate::{
29     BarReprogrammingParams, PciBarConfiguration, PciBdf, PciDevice, PciDeviceError, PciSubclass,
30     VfioPciError,
31 };
32 
33 pub struct VfioUserPciDevice {
34     id: String,
35     vm: Arc<dyn hypervisor::Vm>,
36     client: Arc<Mutex<Client>>,
37     common: VfioCommon,
38     memory_slot_allocator: MemorySlotAllocator,
39 }
40 
41 #[derive(Error, Debug)]
42 pub enum VfioUserPciDeviceError {
43     #[error("Client error: {0}")]
44     Client(#[source] VfioUserError),
45     #[error("Failed to map VFIO PCI region into guest: {0}")]
46     MapRegionGuest(#[source] HypervisorVmError),
47     #[error("Failed to DMA map: {0}")]
48     DmaMap(#[source] VfioUserError),
49     #[error("Failed to DMA unmap: {0}")]
50     DmaUnmap(#[source] VfioUserError),
51     #[error("Failed to initialize legacy interrupts: {0}")]
52     InitializeLegacyInterrupts(#[source] VfioPciError),
53     #[error("Failed to create VfioCommon: {0}")]
54     CreateVfioCommon(#[source] VfioPciError),
55 }
56 
57 #[derive(Copy, Clone)]
58 enum PciVfioUserSubclass {
59     VfioUserSubclass = 0xff,
60 }
61 
62 impl PciSubclass for PciVfioUserSubclass {
63     fn get_register_value(&self) -> u8 {
64         *self as u8
65     }
66 }
67 
68 impl VfioUserPciDevice {
69     #[allow(clippy::too_many_arguments)]
70     pub fn new(
71         id: String,
72         vm: &Arc<dyn hypervisor::Vm>,
73         client: Arc<Mutex<Client>>,
74         msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
75         legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
76         bdf: PciBdf,
77         memory_slot_allocator: MemorySlotAllocator,
78         snapshot: Option<Snapshot>,
79     ) -> Result<Self, VfioUserPciDeviceError> {
80         let resettable = client.lock().unwrap().resettable();
81         if resettable {
82             client
83                 .lock()
84                 .unwrap()
85                 .reset()
86                 .map_err(VfioUserPciDeviceError::Client)?;
87         }
88 
89         let vfio_wrapper = VfioUserClientWrapper {
90             client: client.clone(),
91         };
92 
93         let common = VfioCommon::new(
94             msi_interrupt_manager,
95             legacy_interrupt_group,
96             Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
97             &PciVfioUserSubclass::VfioUserSubclass,
98             bdf,
99             vm_migration::snapshot_from_id(snapshot.as_ref(), VFIO_COMMON_ID),
100             None,
101         )
102         .map_err(VfioUserPciDeviceError::CreateVfioCommon)?;
103 
104         Ok(Self {
105             id,
106             vm: vm.clone(),
107             client,
108             common,
109             memory_slot_allocator,
110         })
111     }
112 
113     pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> {
114         for mmio_region in &mut self.common.mmio_regions {
115             let region_flags = self
116                 .client
117                 .lock()
118                 .unwrap()
119                 .region(mmio_region.index)
120                 .unwrap()
121                 .flags;
122             let file_offset = self
123                 .client
124                 .lock()
125                 .unwrap()
126                 .region(mmio_region.index)
127                 .unwrap()
128                 .file_offset
129                 .clone();
130 
131             let sparse_areas = self
132                 .client
133                 .lock()
134                 .unwrap()
135                 .region(mmio_region.index)
136                 .unwrap()
137                 .sparse_areas
138                 .clone();
139 
140             if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
141                 let mut prot = 0;
142                 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 {
143                     prot |= libc::PROT_READ;
144                 }
145                 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 {
146                     prot |= libc::PROT_WRITE;
147                 }
148 
149                 let mmaps = if sparse_areas.is_empty() {
150                     vec![vfio_region_sparse_mmap_area {
151                         offset: 0,
152                         size: mmio_region.length,
153                     }]
154                 } else {
155                     sparse_areas
156                 };
157 
158                 for s in mmaps.iter() {
159                     // SAFETY: FFI call with correct arguments
160                     let host_addr = unsafe {
161                         libc::mmap(
162                             null_mut(),
163                             s.size as usize,
164                             prot,
165                             libc::MAP_SHARED,
166                             file_offset.as_ref().unwrap().file().as_raw_fd(),
167                             file_offset.as_ref().unwrap().start() as libc::off_t
168                                 + s.offset as libc::off_t,
169                         )
170                     };
171 
172                     if host_addr == libc::MAP_FAILED {
173                         error!(
174                             "Could not mmap regions, error:{}",
175                             std::io::Error::last_os_error()
176                         );
177                         continue;
178                     }
179 
180                     let user_memory_region = UserMemoryRegion {
181                         slot: self.memory_slot_allocator.next_memory_slot(),
182                         start: mmio_region.start.0 + s.offset,
183                         size: s.size,
184                         host_addr: host_addr as u64,
185                     };
186 
187                     mmio_region.user_memory_regions.push(user_memory_region);
188 
189                     let mem_region = self.vm.make_user_memory_region(
190                         user_memory_region.slot,
191                         user_memory_region.start,
192                         user_memory_region.size,
193                         user_memory_region.host_addr,
194                         false,
195                         false,
196                     );
197 
198                     self.vm
199                         .create_user_memory_region(mem_region)
200                         .map_err(VfioUserPciDeviceError::MapRegionGuest)?;
201                 }
202             }
203         }
204 
205         Ok(())
206     }
207 
208     pub fn unmap_mmio_regions(&mut self) {
209         for mmio_region in self.common.mmio_regions.iter() {
210             for user_memory_region in mmio_region.user_memory_regions.iter() {
211                 // Remove region
212                 let r = self.vm.make_user_memory_region(
213                     user_memory_region.slot,
214                     user_memory_region.start,
215                     user_memory_region.size,
216                     user_memory_region.host_addr,
217                     false,
218                     false,
219                 );
220 
221                 if let Err(e) = self.vm.remove_user_memory_region(r) {
222                     error!("Could not remove the userspace memory region: {}", e);
223                 }
224 
225                 self.memory_slot_allocator
226                     .free_memory_slot(user_memory_region.slot);
227 
228                 // Remove mmaps
229                 // SAFETY: FFI call with correct arguments
230                 let ret = unsafe {
231                     libc::munmap(
232                         user_memory_region.host_addr as *mut libc::c_void,
233                         user_memory_region.size as usize,
234                     )
235                 };
236                 if ret != 0 {
237                     error!(
238                         "Could not unmap region {}, error:{}",
239                         mmio_region.index,
240                         std::io::Error::last_os_error()
241                     );
242                 }
243             }
244         }
245     }
246 
247     pub fn dma_map(
248         &mut self,
249         region: &GuestRegionMmap<AtomicBitmap>,
250     ) -> Result<(), VfioUserPciDeviceError> {
251         let (fd, offset) = match region.file_offset() {
252             Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()),
253             None => return Ok(()),
254         };
255 
256         self.client
257             .lock()
258             .unwrap()
259             .dma_map(offset, region.start_addr().raw_value(), region.len(), fd)
260             .map_err(VfioUserPciDeviceError::DmaMap)
261     }
262 
263     pub fn dma_unmap(
264         &mut self,
265         region: &GuestRegionMmap<AtomicBitmap>,
266     ) -> Result<(), VfioUserPciDeviceError> {
267         self.client
268             .lock()
269             .unwrap()
270             .dma_unmap(region.start_addr().raw_value(), region.len())
271             .map_err(VfioUserPciDeviceError::DmaUnmap)
272     }
273 }
274 
275 impl BusDevice for VfioUserPciDevice {
276     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
277         self.read_bar(base, offset, data)
278     }
279 
280     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
281         self.write_bar(base, offset, data)
282     }
283 }
284 
285 #[repr(u32)]
286 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
287 #[allow(dead_code)]
288 enum Regions {
289     Bar0,
290     Bar1,
291     Bar2,
292     Bar3,
293     Bar4,
294     Bar5,
295     Rom,
296     Config,
297     Vga,
298     Migration,
299 }
300 
301 struct VfioUserClientWrapper {
302     client: Arc<Mutex<Client>>,
303 }
304 
305 impl Vfio for VfioUserClientWrapper {
306     fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) {
307         self.client
308             .lock()
309             .unwrap()
310             .region_read(index, offset, data)
311             .ok();
312     }
313 
314     fn region_write(&self, index: u32, offset: u64, data: &[u8]) {
315         self.client
316             .lock()
317             .unwrap()
318             .region_write(index, offset, data)
319             .ok();
320     }
321 
322     fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> {
323         self.client
324             .lock()
325             .unwrap()
326             .get_irq_info(irq_index)
327             .ok()
328             .map(|i| VfioIrq {
329                 index: i.index,
330                 flags: i.flags,
331                 count: i.count,
332             })
333     }
334 
335     fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
336         info!(
337             "Enabling IRQ {:x} number of fds = {:?}",
338             irq_index,
339             event_fds.len()
340         );
341         let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect();
342 
343         // Batch into blocks of 16 fds as sendmsg() has a size limit
344         let mut sent_fds = 0;
345         let num_fds = event_fds.len() as u32;
346         while sent_fds < num_fds {
347             let remaining_fds = num_fds - sent_fds;
348             let count = if remaining_fds > 16 {
349                 16
350             } else {
351                 remaining_fds
352             };
353 
354             self.client
355                 .lock()
356                 .unwrap()
357                 .set_irqs(
358                     irq_index,
359                     VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
360                     sent_fds,
361                     count,
362                     &fds[sent_fds as usize..(sent_fds + count) as usize],
363                 )
364                 .map_err(VfioError::VfioUser)?;
365 
366             sent_fds += count;
367         }
368 
369         Ok(())
370     }
371 
372     fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> {
373         info!("Disabling IRQ {:x}", irq_index);
374         self.client
375             .lock()
376             .unwrap()
377             .set_irqs(
378                 irq_index,
379                 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
380                 0,
381                 0,
382                 &[],
383             )
384             .map_err(VfioError::VfioUser)
385     }
386 
387     fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> {
388         info!("Unmasking IRQ {:x}", irq_index);
389         self.client
390             .lock()
391             .unwrap()
392             .set_irqs(
393                 irq_index,
394                 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
395                 0,
396                 1,
397                 &[],
398             )
399             .map_err(VfioError::VfioUser)
400     }
401 }
402 
403 impl PciDevice for VfioUserPciDevice {
404     fn allocate_bars(
405         &mut self,
406         allocator: &Arc<Mutex<SystemAllocator>>,
407         mmio32_allocator: &mut AddressAllocator,
408         mmio64_allocator: &mut AddressAllocator,
409         resources: Option<Vec<Resource>>,
410     ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
411         self.common
412             .allocate_bars(allocator, mmio32_allocator, mmio64_allocator, resources)
413     }
414 
415     fn free_bars(
416         &mut self,
417         allocator: &mut SystemAllocator,
418         mmio32_allocator: &mut AddressAllocator,
419         mmio64_allocator: &mut AddressAllocator,
420     ) -> Result<(), PciDeviceError> {
421         self.common
422             .free_bars(allocator, mmio32_allocator, mmio64_allocator)
423     }
424 
425     fn as_any_mut(&mut self) -> &mut dyn Any {
426         self
427     }
428 
429     fn detect_bar_reprogramming(
430         &mut self,
431         reg_idx: usize,
432         data: &[u8],
433     ) -> Option<BarReprogrammingParams> {
434         self.common
435             .configuration
436             .detect_bar_reprogramming(reg_idx, data)
437     }
438 
439     fn write_config_register(
440         &mut self,
441         reg_idx: usize,
442         offset: u64,
443         data: &[u8],
444     ) -> Option<Arc<Barrier>> {
445         self.common.write_config_register(reg_idx, offset, data)
446     }
447 
448     fn read_config_register(&mut self, reg_idx: usize) -> u32 {
449         self.common.read_config_register(reg_idx)
450     }
451 
452     fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
453         self.common.read_bar(base, offset, data)
454     }
455 
456     fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
457         self.common.write_bar(base, offset, data)
458     }
459 
460     fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> {
461         info!("Moving BAR 0x{:x} -> 0x{:x}", old_base, new_base);
462         for mmio_region in self.common.mmio_regions.iter_mut() {
463             if mmio_region.start.raw_value() == old_base {
464                 mmio_region.start = GuestAddress(new_base);
465 
466                 for user_memory_region in mmio_region.user_memory_regions.iter_mut() {
467                     // Remove old region
468                     let old_region = self.vm.make_user_memory_region(
469                         user_memory_region.slot,
470                         user_memory_region.start,
471                         user_memory_region.size,
472                         user_memory_region.host_addr,
473                         false,
474                         false,
475                     );
476 
477                     self.vm
478                         .remove_user_memory_region(old_region)
479                         .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
480 
481                     // Update the user memory region with the correct start address.
482                     if new_base > old_base {
483                         user_memory_region.start += new_base - old_base;
484                     } else {
485                         user_memory_region.start -= old_base - new_base;
486                     }
487 
488                     // Insert new region
489                     let new_region = self.vm.make_user_memory_region(
490                         user_memory_region.slot,
491                         user_memory_region.start,
492                         user_memory_region.size,
493                         user_memory_region.host_addr,
494                         false,
495                         false,
496                     );
497 
498                     self.vm
499                         .create_user_memory_region(new_region)
500                         .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
501                 }
502                 info!("Moved bar 0x{:x} -> 0x{:x}", old_base, new_base);
503             }
504         }
505 
506         Ok(())
507     }
508 
509     fn id(&self) -> Option<String> {
510         Some(self.id.clone())
511     }
512 }
513 
514 impl Drop for VfioUserPciDevice {
515     fn drop(&mut self) {
516         self.unmap_mmio_regions();
517 
518         if let Some(msix) = &self.common.interrupt.msix {
519             if msix.bar.enabled() {
520                 self.common.disable_msix();
521             }
522         }
523 
524         if let Some(msi) = &self.common.interrupt.msi {
525             if msi.cfg.enabled() {
526                 self.common.disable_msi()
527             }
528         }
529 
530         if self.common.interrupt.intx_in_use() {
531             self.common.disable_intx();
532         }
533 
534         if let Err(e) = self.client.lock().unwrap().shutdown() {
535             error!("Failed shutting down vfio-user client: {}", e);
536         }
537     }
538 }
539 
540 impl Pausable for VfioUserPciDevice {}
541 
542 impl Snapshottable for VfioUserPciDevice {
543     fn id(&self) -> String {
544         self.id.clone()
545     }
546 
547     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
548         let mut vfio_pci_dev_snapshot = Snapshot::default();
549 
550         // Snapshot VfioCommon
551         vfio_pci_dev_snapshot.add_snapshot(self.common.id(), self.common.snapshot()?);
552 
553         Ok(vfio_pci_dev_snapshot)
554     }
555 }
556 impl Transportable for VfioUserPciDevice {}
557 impl Migratable for VfioUserPciDevice {}
558 
559 pub struct VfioUserDmaMapping<M: GuestAddressSpace> {
560     client: Arc<Mutex<Client>>,
561     memory: Arc<M>,
562 }
563 
564 impl<M: GuestAddressSpace> VfioUserDmaMapping<M> {
565     pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self {
566         Self { client, memory }
567     }
568 }
569 
570 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> {
571     fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> {
572         let mem = self.memory.memory();
573         let guest_addr = GuestAddress(gpa);
574         let region = mem.find_region(guest_addr);
575 
576         if let Some(region) = region {
577             let file_offset = region.file_offset().unwrap();
578             let offset = (GuestAddress(gpa).checked_offset_from(region.start_addr())).unwrap()
579                 + file_offset.start();
580 
581             self.client
582                 .lock()
583                 .unwrap()
584                 .dma_map(offset, iova, size, file_offset.file().as_raw_fd())
585                 .map_err(|e| {
586                     std::io::Error::new(
587                         std::io::ErrorKind::Other,
588                         format!("Error mapping region: {e}"),
589                     )
590                 })
591         } else {
592             Err(std::io::Error::new(
593                 std::io::ErrorKind::Other,
594                 format!("Region not found for 0x{gpa:x}"),
595             ))
596         }
597     }
598 
599     fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> {
600         self.client
601             .lock()
602             .unwrap()
603             .dma_unmap(iova, size)
604             .map_err(|e| {
605                 std::io::Error::new(
606                     std::io::ErrorKind::Other,
607                     format!("Error unmapping region: {e}"),
608                 )
609             })
610     }
611 }
612