xref: /cloud-hypervisor/pci/src/vfio_user.rs (revision b440cb7d2330770cd415b63544a371d4caa2db3a)
1 // Copyright © 2021 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 use crate::vfio::{Interrupt, UserMemoryRegion, Vfio, VfioCommon, VfioError};
7 use crate::{BarReprogrammingParams, PciBarConfiguration, VfioPciError};
8 use crate::{
9     PciBdf, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciSubclass,
10 };
11 use anyhow::anyhow;
12 use hypervisor::HypervisorVmError;
13 use std::any::Any;
14 use std::os::unix::prelude::AsRawFd;
15 use std::ptr::null_mut;
16 use std::sync::{Arc, Barrier, Mutex};
17 use std::u32;
18 use thiserror::Error;
19 use vfio_bindings::bindings::vfio::*;
20 use vfio_ioctls::VfioIrq;
21 use vfio_user::{Client, Error as VfioUserError};
22 use vm_allocator::{AddressAllocator, SystemAllocator};
23 use vm_device::dma_mapping::ExternalDmaMapping;
24 use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig};
25 use vm_device::{BusDevice, Resource};
26 use vm_memory::bitmap::AtomicBitmap;
27 use vm_memory::{
28     Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap,
29 };
30 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
31 use vmm_sys_util::eventfd::EventFd;
32 
33 pub struct VfioUserPciDevice {
34     id: String,
35     vm: Arc<dyn hypervisor::Vm>,
36     client: Arc<Mutex<Client>>,
37     common: VfioCommon,
38     memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
39 }
40 
41 #[derive(Error, Debug)]
42 pub enum VfioUserPciDeviceError {
43     #[error("Client error: {0}")]
44     Client(#[source] VfioUserError),
45     #[error("Failed to map VFIO PCI region into guest: {0}")]
46     MapRegionGuest(#[source] HypervisorVmError),
47     #[error("Failed to DMA map: {0}")]
48     DmaMap(#[source] VfioUserError),
49     #[error("Failed to DMA unmap: {0}")]
50     DmaUnmap(#[source] VfioUserError),
51     #[error("Failed to initialize legacy interrupts: {0}")]
52     InitializeLegacyInterrupts(#[source] VfioPciError),
53 }
54 
55 #[derive(Copy, Clone)]
56 enum PciVfioUserSubclass {
57     VfioUserSubclass = 0xff,
58 }
59 
60 impl PciSubclass for PciVfioUserSubclass {
61     fn get_register_value(&self) -> u8 {
62         *self as u8
63     }
64 }
65 
66 impl VfioUserPciDevice {
67     #[allow(clippy::too_many_arguments)]
68     pub fn new(
69         id: String,
70         vm: &Arc<dyn hypervisor::Vm>,
71         client: Arc<Mutex<Client>>,
72         msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
73         legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
74         bdf: PciBdf,
75         restoring: bool,
76         memory_slot: Arc<dyn Fn() -> u32 + Send + Sync>,
77     ) -> Result<Self, VfioUserPciDeviceError> {
78         // This is used for the BAR and capabilities only
79         let configuration = PciConfiguration::new(
80             0,
81             0,
82             0,
83             PciClassCode::Other,
84             &PciVfioUserSubclass::VfioUserSubclass,
85             None,
86             PciHeaderType::Device,
87             0,
88             0,
89             None,
90         );
91         let resettable = client.lock().unwrap().resettable();
92         if resettable {
93             client
94                 .lock()
95                 .unwrap()
96                 .reset()
97                 .map_err(VfioUserPciDeviceError::Client)?;
98         }
99 
100         let vfio_wrapper = VfioUserClientWrapper {
101             client: client.clone(),
102         };
103 
104         let mut common = VfioCommon {
105             mmio_regions: Vec::new(),
106             configuration,
107             interrupt: Interrupt {
108                 intx: None,
109                 msi: None,
110                 msix: None,
111             },
112             msi_interrupt_manager,
113             legacy_interrupt_group,
114             vfio_wrapper: Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
115         };
116 
117         // No need to parse capabilities from the device if on the restore path.
118         // The initialization will be performed later when restore() will be
119         // called.
120         if !restoring {
121             common.parse_capabilities(bdf);
122             common
123                 .initialize_legacy_interrupt()
124                 .map_err(VfioUserPciDeviceError::InitializeLegacyInterrupts)?;
125         }
126 
127         Ok(Self {
128             id,
129             vm: vm.clone(),
130             client,
131             common,
132             memory_slot,
133         })
134     }
135 
136     pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> {
137         for mmio_region in &mut self.common.mmio_regions {
138             let region_flags = self
139                 .client
140                 .lock()
141                 .unwrap()
142                 .region(mmio_region.index)
143                 .unwrap()
144                 .flags;
145             let file_offset = self
146                 .client
147                 .lock()
148                 .unwrap()
149                 .region(mmio_region.index)
150                 .unwrap()
151                 .file_offset
152                 .clone();
153 
154             let sparse_areas = self
155                 .client
156                 .lock()
157                 .unwrap()
158                 .region(mmio_region.index)
159                 .unwrap()
160                 .sparse_areas
161                 .clone();
162 
163             if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
164                 let mut prot = 0;
165                 if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 {
166                     prot |= libc::PROT_READ;
167                 }
168                 if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 {
169                     prot |= libc::PROT_WRITE;
170                 }
171 
172                 let mmaps = if sparse_areas.is_empty() {
173                     vec![vfio_region_sparse_mmap_area {
174                         offset: 0,
175                         size: mmio_region.length,
176                     }]
177                 } else {
178                     sparse_areas
179                 };
180 
181                 for s in mmaps.iter() {
182                     let host_addr = unsafe {
183                         libc::mmap(
184                             null_mut(),
185                             s.size as usize,
186                             prot,
187                             libc::MAP_SHARED,
188                             file_offset.as_ref().unwrap().file().as_raw_fd(),
189                             file_offset.as_ref().unwrap().start() as libc::off_t
190                                 + s.offset as libc::off_t,
191                         )
192                     };
193 
194                     if host_addr == libc::MAP_FAILED {
195                         error!(
196                             "Could not mmap regions, error:{}",
197                             std::io::Error::last_os_error()
198                         );
199                         continue;
200                     }
201 
202                     let user_memory_region = UserMemoryRegion {
203                         slot: (self.memory_slot)(),
204                         start: mmio_region.start.0 + s.offset,
205                         size: s.size,
206                         host_addr: host_addr as u64,
207                     };
208 
209                     mmio_region.user_memory_regions.push(user_memory_region);
210 
211                     let mem_region = self.vm.make_user_memory_region(
212                         user_memory_region.slot,
213                         user_memory_region.start,
214                         user_memory_region.size,
215                         user_memory_region.host_addr,
216                         false,
217                         false,
218                     );
219 
220                     self.vm
221                         .create_user_memory_region(mem_region)
222                         .map_err(VfioUserPciDeviceError::MapRegionGuest)?;
223                 }
224             }
225         }
226 
227         Ok(())
228     }
229 
230     pub fn unmap_mmio_regions(&mut self) {
231         for mmio_region in self.common.mmio_regions.iter() {
232             for user_memory_region in mmio_region.user_memory_regions.iter() {
233                 // Remove region
234                 let r = self.vm.make_user_memory_region(
235                     user_memory_region.slot,
236                     user_memory_region.start,
237                     user_memory_region.size,
238                     user_memory_region.host_addr,
239                     false,
240                     false,
241                 );
242 
243                 if let Err(e) = self.vm.remove_user_memory_region(r) {
244                     error!("Could not remove the userspace memory region: {}", e);
245                 }
246 
247                 // Remove mmaps
248                 let ret = unsafe {
249                     libc::munmap(
250                         user_memory_region.host_addr as *mut libc::c_void,
251                         user_memory_region.size as usize,
252                     )
253                 };
254                 if ret != 0 {
255                     error!(
256                         "Could not unmap region {}, error:{}",
257                         mmio_region.index,
258                         std::io::Error::last_os_error()
259                     );
260                 }
261             }
262         }
263     }
264 
265     pub fn dma_map(
266         &mut self,
267         region: &GuestRegionMmap<AtomicBitmap>,
268     ) -> Result<(), VfioUserPciDeviceError> {
269         let (fd, offset) = match region.file_offset() {
270             Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()),
271             None => return Ok(()),
272         };
273 
274         self.client
275             .lock()
276             .unwrap()
277             .dma_map(
278                 offset,
279                 region.start_addr().raw_value(),
280                 region.len() as u64,
281                 fd,
282             )
283             .map_err(VfioUserPciDeviceError::DmaMap)
284     }
285 
286     pub fn dma_unmap(
287         &mut self,
288         region: &GuestRegionMmap<AtomicBitmap>,
289     ) -> Result<(), VfioUserPciDeviceError> {
290         self.client
291             .lock()
292             .unwrap()
293             .dma_unmap(region.start_addr().raw_value(), region.len() as u64)
294             .map_err(VfioUserPciDeviceError::DmaUnmap)
295     }
296 }
297 
298 impl BusDevice for VfioUserPciDevice {
299     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
300         self.read_bar(base, offset, data)
301     }
302 
303     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
304         self.write_bar(base, offset, data)
305     }
306 }
307 
308 #[repr(u32)]
309 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
310 #[allow(dead_code)]
311 enum Regions {
312     Bar0,
313     Bar1,
314     Bar2,
315     Bar3,
316     Bar4,
317     Bar5,
318     Rom,
319     Config,
320     Vga,
321     Migration,
322 }
323 
324 struct VfioUserClientWrapper {
325     client: Arc<Mutex<Client>>,
326 }
327 
328 impl Vfio for VfioUserClientWrapper {
329     fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) {
330         self.client
331             .lock()
332             .unwrap()
333             .region_read(index, offset, data)
334             .ok();
335     }
336 
337     fn region_write(&self, index: u32, offset: u64, data: &[u8]) {
338         self.client
339             .lock()
340             .unwrap()
341             .region_write(index, offset, data)
342             .ok();
343     }
344 
345     fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> {
346         self.client
347             .lock()
348             .unwrap()
349             .get_irq_info(irq_index)
350             .ok()
351             .map(|i| VfioIrq {
352                 index: i.index,
353                 flags: i.flags,
354                 count: i.count,
355             })
356     }
357 
358     fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
359         info!(
360             "Enabling IRQ {:x} number of fds = {:?}",
361             irq_index,
362             event_fds.len()
363         );
364         let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect();
365 
366         // Batch into blocks of 16 fds as sendmsg() has a size limit
367         let mut sent_fds = 0;
368         let num_fds = event_fds.len() as u32;
369         while sent_fds < num_fds {
370             let remaining_fds = num_fds - sent_fds;
371             let count = if remaining_fds > 16 {
372                 16
373             } else {
374                 remaining_fds
375             };
376 
377             self.client
378                 .lock()
379                 .unwrap()
380                 .set_irqs(
381                     irq_index,
382                     VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
383                     sent_fds,
384                     count,
385                     &fds[sent_fds as usize..(sent_fds + count) as usize],
386                 )
387                 .map_err(VfioError::VfioUser)?;
388 
389             sent_fds += count;
390         }
391 
392         Ok(())
393     }
394 
395     fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> {
396         info!("Disabling IRQ {:x}", irq_index);
397         self.client
398             .lock()
399             .unwrap()
400             .set_irqs(
401                 irq_index,
402                 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
403                 0,
404                 0,
405                 &[],
406             )
407             .map_err(VfioError::VfioUser)
408     }
409 
410     fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> {
411         info!("Unmasking IRQ {:x}", irq_index);
412         self.client
413             .lock()
414             .unwrap()
415             .set_irqs(
416                 irq_index,
417                 VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
418                 0,
419                 1,
420                 &[],
421             )
422             .map_err(VfioError::VfioUser)
423     }
424 }
425 
426 impl PciDevice for VfioUserPciDevice {
427     fn allocate_bars(
428         &mut self,
429         allocator: &Arc<Mutex<SystemAllocator>>,
430         mmio_allocator: &mut AddressAllocator,
431         resources: Option<Vec<Resource>>,
432     ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
433         self.common
434             .allocate_bars(allocator, mmio_allocator, resources)
435     }
436 
437     fn free_bars(
438         &mut self,
439         allocator: &mut SystemAllocator,
440         mmio_allocator: &mut AddressAllocator,
441     ) -> Result<(), PciDeviceError> {
442         self.common.free_bars(allocator, mmio_allocator)
443     }
444 
445     fn as_any(&mut self) -> &mut dyn Any {
446         self
447     }
448 
449     fn detect_bar_reprogramming(
450         &mut self,
451         reg_idx: usize,
452         data: &[u8],
453     ) -> Option<BarReprogrammingParams> {
454         self.common
455             .configuration
456             .detect_bar_reprogramming(reg_idx, data)
457     }
458 
459     fn write_config_register(
460         &mut self,
461         reg_idx: usize,
462         offset: u64,
463         data: &[u8],
464     ) -> Option<Arc<Barrier>> {
465         self.common.write_config_register(reg_idx, offset, data)
466     }
467 
468     fn read_config_register(&mut self, reg_idx: usize) -> u32 {
469         self.common.read_config_register(reg_idx)
470     }
471 
472     fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
473         self.common.read_bar(base, offset, data)
474     }
475 
476     fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
477         self.common.write_bar(base, offset, data)
478     }
479 
480     fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> {
481         info!("Moving BAR 0x{:x} -> 0x{:x}", old_base, new_base);
482         for mmio_region in self.common.mmio_regions.iter_mut() {
483             if mmio_region.start.raw_value() == old_base {
484                 mmio_region.start = GuestAddress(new_base);
485 
486                 for user_memory_region in mmio_region.user_memory_regions.iter_mut() {
487                     // Remove old region
488                     let old_region = self.vm.make_user_memory_region(
489                         user_memory_region.slot,
490                         user_memory_region.start,
491                         user_memory_region.size,
492                         user_memory_region.host_addr,
493                         false,
494                         false,
495                     );
496 
497                     self.vm
498                         .remove_user_memory_region(old_region)
499                         .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
500 
501                     // Update the user memory region with the correct start address.
502                     if new_base > old_base {
503                         user_memory_region.start += new_base - old_base;
504                     } else {
505                         user_memory_region.start -= old_base - new_base;
506                     }
507 
508                     // Insert new region
509                     let new_region = self.vm.make_user_memory_region(
510                         user_memory_region.slot,
511                         user_memory_region.start,
512                         user_memory_region.size,
513                         user_memory_region.host_addr,
514                         false,
515                         false,
516                     );
517 
518                     self.vm
519                         .create_user_memory_region(new_region)
520                         .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
521                 }
522                 info!("Moved bar 0x{:x} -> 0x{:x}", old_base, new_base);
523             }
524         }
525 
526         Ok(())
527     }
528 
529     fn id(&self) -> Option<String> {
530         Some(self.id.clone())
531     }
532 }
533 
534 impl Drop for VfioUserPciDevice {
535     fn drop(&mut self) {
536         self.unmap_mmio_regions();
537 
538         if let Some(msix) = &self.common.interrupt.msix {
539             if msix.bar.enabled() {
540                 self.common.disable_msix();
541             }
542         }
543 
544         if let Some(msi) = &self.common.interrupt.msi {
545             if msi.cfg.enabled() {
546                 self.common.disable_msi()
547             }
548         }
549 
550         if self.common.interrupt.intx_in_use() {
551             self.common.disable_intx();
552         }
553 
554         if let Err(e) = self.client.lock().unwrap().shutdown() {
555             error!("Failed shutting down vfio-user client: {}", e);
556         }
557     }
558 }
559 
560 impl Pausable for VfioUserPciDevice {}
561 
562 impl Snapshottable for VfioUserPciDevice {
563     fn id(&self) -> String {
564         self.id.clone()
565     }
566 
567     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
568         let mut vfio_pci_dev_snapshot = Snapshot::new(&self.id);
569 
570         // Snapshot VfioCommon
571         vfio_pci_dev_snapshot.add_snapshot(self.common.snapshot()?);
572 
573         Ok(vfio_pci_dev_snapshot)
574     }
575 
576     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
577         // Restore VfioCommon
578         if let Some(vfio_common_snapshot) = snapshot.snapshots.get(&self.common.id()) {
579             self.common.restore(*vfio_common_snapshot.clone())?;
580             self.map_mmio_regions().map_err(|e| {
581                 MigratableError::Restore(anyhow!(
582                     "Could not map MMIO regions for VfioUserPciDevice on restore {:?}",
583                     e
584                 ))
585             })?;
586         }
587 
588         Ok(())
589     }
590 }
591 impl Transportable for VfioUserPciDevice {}
592 impl Migratable for VfioUserPciDevice {}
593 
594 pub struct VfioUserDmaMapping<M: GuestAddressSpace> {
595     client: Arc<Mutex<Client>>,
596     memory: Arc<M>,
597 }
598 
599 impl<M: GuestAddressSpace> VfioUserDmaMapping<M> {
600     pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self {
601         Self { client, memory }
602     }
603 }
604 
605 impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> {
606     fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> {
607         let mem = self.memory.memory();
608         let guest_addr = GuestAddress(gpa);
609         let region = mem.find_region(guest_addr);
610 
611         if let Some(region) = region {
612             let file_offset = region.file_offset().unwrap();
613             let offset = (GuestAddress(gpa).checked_offset_from(region.start_addr())).unwrap()
614                 + file_offset.start();
615 
616             self.client
617                 .lock()
618                 .unwrap()
619                 .dma_map(offset, iova, size, file_offset.file().as_raw_fd())
620                 .map_err(|e| {
621                     std::io::Error::new(
622                         std::io::ErrorKind::Other,
623                         format!("Error mapping region: {}", e),
624                     )
625                 })
626         } else {
627             Err(std::io::Error::new(
628                 std::io::ErrorKind::Other,
629                 format!("Region not found for 0x{:x}", gpa),
630             ))
631         }
632     }
633 
634     fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> {
635         self.client
636             .lock()
637             .unwrap()
638             .dma_unmap(iova, size)
639             .map_err(|e| {
640                 std::io::Error::new(
641                     std::io::ErrorKind::Other,
642                     format!("Error unmapping region: {}", e),
643                 )
644             })
645     }
646 }
647