1 // Copyright 2018 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE-BSD-3-Clause file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use super::VirtioPciCommonConfig; 10 use crate::transport::VirtioTransport; 11 use crate::GuestMemoryMmap; 12 use crate::{ 13 ActivateResult, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioInterruptType, 14 DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FAILED, DEVICE_FEATURES_OK, 15 DEVICE_INIT, 16 }; 17 use anyhow::anyhow; 18 use libc::EFD_NONBLOCK; 19 use pci::{ 20 BarReprogrammingParams, MsixCap, MsixConfig, PciBarConfiguration, PciBarRegionType, 21 PciCapability, PciCapabilityId, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, 22 PciHeaderType, PciMassStorageSubclass, PciNetworkControllerSubclass, PciSubclass, 23 }; 24 use std::any::Any; 25 use std::cmp; 26 use std::io::Write; 27 use std::ops::Deref; 28 use std::result; 29 use std::sync::atomic::{AtomicBool, AtomicU16, AtomicUsize, Ordering}; 30 use std::sync::{Arc, Barrier, Mutex}; 31 use versionize::{VersionMap, Versionize, VersionizeResult}; 32 use versionize_derive::Versionize; 33 use virtio_queue::{Error as QueueError, Queue, QueueT}; 34 use vm_allocator::{AddressAllocator, SystemAllocator}; 35 use vm_device::dma_mapping::ExternalDmaMapping; 36 use vm_device::interrupt::{ 37 InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig, 38 }; 39 use vm_device::{BusDevice, Resource}; 40 use vm_memory::{Address, ByteValued, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, Le32}; 41 use vm_migration::{ 42 Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped, 43 }; 44 use vm_virtio::AccessPlatform; 45 use vmm_sys_util::{errno::Result, eventfd::EventFd}; 46 47 /// Vector value used to disable MSI for a queue. 48 const VIRTQ_MSI_NO_VECTOR: u16 = 0xffff; 49 50 #[derive(Debug)] 51 enum Error { 52 /// Failed to retrieve queue ring's index. 53 QueueRingIndex(QueueError), 54 } 55 56 #[allow(clippy::enum_variant_names)] 57 enum PciCapabilityType { 58 CommonConfig = 1, 59 NotifyConfig = 2, 60 IsrConfig = 3, 61 DeviceConfig = 4, 62 PciConfig = 5, 63 SharedMemoryConfig = 8, 64 } 65 66 // This offset represents the 2 bytes omitted from the VirtioPciCap structure 67 // as they are already handled through add_capability(). These 2 bytes are the 68 // fields cap_vndr (1 byte) and cap_next (1 byte) defined in the virtio spec. 69 const VIRTIO_PCI_CAP_OFFSET: usize = 2; 70 71 #[allow(dead_code)] 72 #[repr(packed)] 73 #[derive(Clone, Copy, Default)] 74 struct VirtioPciCap { 75 cap_len: u8, // Generic PCI field: capability length 76 cfg_type: u8, // Identifies the structure. 77 pci_bar: u8, // Where to find it. 78 id: u8, // Multiple capabilities of the same type 79 padding: [u8; 2], // Pad to full dword. 80 offset: Le32, // Offset within bar. 81 length: Le32, // Length of the structure, in bytes. 82 } 83 // SAFETY: All members are simple numbers and any value is valid. 84 unsafe impl ByteValued for VirtioPciCap {} 85 86 impl PciCapability for VirtioPciCap { 87 fn bytes(&self) -> &[u8] { 88 self.as_slice() 89 } 90 91 fn id(&self) -> PciCapabilityId { 92 PciCapabilityId::VendorSpecific 93 } 94 } 95 96 const VIRTIO_PCI_CAP_LEN_OFFSET: u8 = 2; 97 98 impl VirtioPciCap { 99 pub fn new(cfg_type: PciCapabilityType, pci_bar: u8, offset: u32, length: u32) -> Self { 100 VirtioPciCap { 101 cap_len: (std::mem::size_of::<VirtioPciCap>() as u8) + VIRTIO_PCI_CAP_LEN_OFFSET, 102 cfg_type: cfg_type as u8, 103 pci_bar, 104 id: 0, 105 padding: [0; 2], 106 offset: Le32::from(offset), 107 length: Le32::from(length), 108 } 109 } 110 } 111 112 #[allow(dead_code)] 113 #[repr(packed)] 114 #[derive(Clone, Copy, Default)] 115 struct VirtioPciNotifyCap { 116 cap: VirtioPciCap, 117 notify_off_multiplier: Le32, 118 } 119 // SAFETY: All members are simple numbers and any value is valid. 120 unsafe impl ByteValued for VirtioPciNotifyCap {} 121 122 impl PciCapability for VirtioPciNotifyCap { 123 fn bytes(&self) -> &[u8] { 124 self.as_slice() 125 } 126 127 fn id(&self) -> PciCapabilityId { 128 PciCapabilityId::VendorSpecific 129 } 130 } 131 132 impl VirtioPciNotifyCap { 133 pub fn new( 134 cfg_type: PciCapabilityType, 135 pci_bar: u8, 136 offset: u32, 137 length: u32, 138 multiplier: Le32, 139 ) -> Self { 140 VirtioPciNotifyCap { 141 cap: VirtioPciCap { 142 cap_len: (std::mem::size_of::<VirtioPciNotifyCap>() as u8) 143 + VIRTIO_PCI_CAP_LEN_OFFSET, 144 cfg_type: cfg_type as u8, 145 pci_bar, 146 id: 0, 147 padding: [0; 2], 148 offset: Le32::from(offset), 149 length: Le32::from(length), 150 }, 151 notify_off_multiplier: multiplier, 152 } 153 } 154 } 155 156 #[allow(dead_code)] 157 #[repr(packed)] 158 #[derive(Clone, Copy, Default)] 159 struct VirtioPciCap64 { 160 cap: VirtioPciCap, 161 offset_hi: Le32, 162 length_hi: Le32, 163 } 164 // SAFETY: All members are simple numbers and any value is valid. 165 unsafe impl ByteValued for VirtioPciCap64 {} 166 167 impl PciCapability for VirtioPciCap64 { 168 fn bytes(&self) -> &[u8] { 169 self.as_slice() 170 } 171 172 fn id(&self) -> PciCapabilityId { 173 PciCapabilityId::VendorSpecific 174 } 175 } 176 177 impl VirtioPciCap64 { 178 pub fn new(cfg_type: PciCapabilityType, pci_bar: u8, id: u8, offset: u64, length: u64) -> Self { 179 VirtioPciCap64 { 180 cap: VirtioPciCap { 181 cap_len: (std::mem::size_of::<VirtioPciCap64>() as u8) + VIRTIO_PCI_CAP_LEN_OFFSET, 182 cfg_type: cfg_type as u8, 183 pci_bar, 184 id, 185 padding: [0; 2], 186 offset: Le32::from(offset as u32), 187 length: Le32::from(length as u32), 188 }, 189 offset_hi: Le32::from((offset >> 32) as u32), 190 length_hi: Le32::from((length >> 32) as u32), 191 } 192 } 193 } 194 195 #[allow(dead_code)] 196 #[repr(packed)] 197 #[derive(Clone, Copy, Default)] 198 struct VirtioPciCfgCap { 199 cap: VirtioPciCap, 200 pci_cfg_data: [u8; 4], 201 } 202 // SAFETY: All members are simple numbers and any value is valid. 203 unsafe impl ByteValued for VirtioPciCfgCap {} 204 205 impl PciCapability for VirtioPciCfgCap { 206 fn bytes(&self) -> &[u8] { 207 self.as_slice() 208 } 209 210 fn id(&self) -> PciCapabilityId { 211 PciCapabilityId::VendorSpecific 212 } 213 } 214 215 impl VirtioPciCfgCap { 216 fn new() -> Self { 217 VirtioPciCfgCap { 218 cap: VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0), 219 ..Default::default() 220 } 221 } 222 } 223 224 #[derive(Clone, Copy, Default)] 225 struct VirtioPciCfgCapInfo { 226 offset: usize, 227 cap: VirtioPciCfgCap, 228 } 229 230 #[allow(dead_code)] 231 #[derive(Copy, Clone)] 232 pub enum PciVirtioSubclass { 233 NonTransitionalBase = 0xff, 234 } 235 236 impl PciSubclass for PciVirtioSubclass { 237 fn get_register_value(&self) -> u8 { 238 *self as u8 239 } 240 } 241 242 // Allocate one bar for the structs pointed to by the capability structures. 243 // As per the PCI specification, because the same BAR shares MSI-X and non 244 // MSI-X structures, it is recommended to use 8KiB alignment for all those 245 // structures. 246 const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000; 247 const COMMON_CONFIG_SIZE: u64 = 56; 248 const ISR_CONFIG_BAR_OFFSET: u64 = 0x2000; 249 const ISR_CONFIG_SIZE: u64 = 1; 250 const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x4000; 251 const DEVICE_CONFIG_SIZE: u64 = 0x1000; 252 const NOTIFICATION_BAR_OFFSET: u64 = 0x6000; 253 const NOTIFICATION_SIZE: u64 = 0x1000; 254 const MSIX_TABLE_BAR_OFFSET: u64 = 0x8000; 255 // The size is 256KiB because the table can hold up to 2048 entries, with each 256 // entry being 128 bits (4 DWORDS). 257 const MSIX_TABLE_SIZE: u64 = 0x40000; 258 const MSIX_PBA_BAR_OFFSET: u64 = 0x48000; 259 // The size is 2KiB because the Pending Bit Array has one bit per vector and it 260 // can support up to 2048 vectors. 261 const MSIX_PBA_SIZE: u64 = 0x800; 262 // The BAR size must be a power of 2. 263 const CAPABILITY_BAR_SIZE: u64 = 0x80000; 264 const VIRTIO_COMMON_BAR_INDEX: usize = 0; 265 const VIRTIO_SHM_BAR_INDEX: usize = 2; 266 267 const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address. 268 269 const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4; 270 const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID. 271 272 #[derive(Versionize)] 273 struct QueueState { 274 max_size: u16, 275 size: u16, 276 ready: bool, 277 desc_table: u64, 278 avail_ring: u64, 279 used_ring: u64, 280 } 281 282 #[derive(Versionize)] 283 struct VirtioPciDeviceState { 284 device_activated: bool, 285 queues: Vec<QueueState>, 286 interrupt_status: usize, 287 } 288 289 impl VersionMapped for VirtioPciDeviceState {} 290 291 pub struct VirtioPciDeviceActivator { 292 interrupt: Option<Arc<dyn VirtioInterrupt>>, 293 memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 294 device: Arc<Mutex<dyn VirtioDevice>>, 295 device_activated: Arc<AtomicBool>, 296 queues: Option<Vec<(usize, Queue, EventFd)>>, 297 barrier: Option<Arc<Barrier>>, 298 id: String, 299 } 300 301 impl VirtioPciDeviceActivator { 302 pub fn activate(&mut self) -> ActivateResult { 303 self.device.lock().unwrap().activate( 304 self.memory.take().unwrap(), 305 self.interrupt.take().unwrap(), 306 self.queues.take().unwrap(), 307 )?; 308 self.device_activated.store(true, Ordering::SeqCst); 309 310 if let Some(barrier) = self.barrier.take() { 311 info!("{}: Waiting for barrier", self.id); 312 barrier.wait(); 313 info!("{}: Barrier released", self.id); 314 } 315 316 Ok(()) 317 } 318 } 319 320 pub struct VirtioPciDevice { 321 id: String, 322 323 // PCI configuration registers. 324 configuration: PciConfiguration, 325 326 // virtio PCI common configuration 327 common_config: VirtioPciCommonConfig, 328 329 // MSI-X config 330 msix_config: Option<Arc<Mutex<MsixConfig>>>, 331 332 // Number of MSI-X vectors 333 msix_num: u16, 334 335 // Virtio device reference and status 336 device: Arc<Mutex<dyn VirtioDevice>>, 337 device_activated: Arc<AtomicBool>, 338 339 // PCI interrupts. 340 interrupt_status: Arc<AtomicUsize>, 341 virtio_interrupt: Option<Arc<dyn VirtioInterrupt>>, 342 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 343 344 // virtio queues 345 queues: Vec<Queue>, 346 queue_evts: Vec<EventFd>, 347 348 // Guest memory 349 memory: GuestMemoryAtomic<GuestMemoryMmap>, 350 351 // Settings PCI BAR 352 settings_bar: u8, 353 354 // Whether to use 64-bit bar location or 32-bit 355 use_64bit_bar: bool, 356 357 // Add a dedicated structure to hold information about the very specific 358 // virtio-pci capability VIRTIO_PCI_CAP_PCI_CFG. This is needed to support 359 // the legacy/backward compatible mechanism of letting the guest access the 360 // other virtio capabilities without mapping the PCI BARs. This can be 361 // needed when the guest tries to early access the virtio configuration of 362 // a device. 363 cap_pci_cfg_info: VirtioPciCfgCapInfo, 364 365 // Details of bar regions to free 366 bar_regions: Vec<PciBarConfiguration>, 367 368 // EventFd to signal on to request activation 369 activate_evt: EventFd, 370 371 // Optional DMA handler 372 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 373 374 // Pending activations 375 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 376 } 377 378 impl VirtioPciDevice { 379 /// Constructs a new PCI transport for the given virtio device. 380 #[allow(clippy::too_many_arguments)] 381 pub fn new( 382 id: String, 383 memory: GuestMemoryAtomic<GuestMemoryMmap>, 384 device: Arc<Mutex<dyn VirtioDevice>>, 385 msix_num: u16, 386 access_platform: Option<Arc<dyn AccessPlatform>>, 387 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 388 pci_device_bdf: u32, 389 activate_evt: EventFd, 390 use_64bit_bar: bool, 391 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 392 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 393 ) -> Result<Self> { 394 let device_clone = device.clone(); 395 let mut locked_device = device_clone.lock().unwrap(); 396 let mut queue_evts = Vec::new(); 397 for _ in locked_device.queue_max_sizes().iter() { 398 queue_evts.push(EventFd::new(EFD_NONBLOCK)?) 399 } 400 let num_queues = locked_device.queue_max_sizes().len(); 401 402 if let Some(access_platform) = &access_platform { 403 locked_device.set_access_platform(access_platform.clone()); 404 } 405 406 let queues = locked_device 407 .queue_max_sizes() 408 .iter() 409 .map(|&s| Queue::new(s).unwrap()) 410 .collect(); 411 412 let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + locked_device.device_type() as u16; 413 414 let interrupt_source_group = interrupt_manager.create_group(MsiIrqGroupConfig { 415 base: 0, 416 count: msix_num as InterruptIndex, 417 })?; 418 419 let (msix_config, msix_config_clone) = if msix_num > 0 { 420 let msix_config = Arc::new(Mutex::new(MsixConfig::new( 421 msix_num, 422 interrupt_source_group.clone(), 423 pci_device_bdf, 424 ))); 425 let msix_config_clone = msix_config.clone(); 426 (Some(msix_config), Some(msix_config_clone)) 427 } else { 428 (None, None) 429 }; 430 431 let (class, subclass) = match VirtioDeviceType::from(locked_device.device_type()) { 432 VirtioDeviceType::Net => ( 433 PciClassCode::NetworkController, 434 &PciNetworkControllerSubclass::EthernetController as &dyn PciSubclass, 435 ), 436 VirtioDeviceType::Block => ( 437 PciClassCode::MassStorage, 438 &PciMassStorageSubclass::MassStorage as &dyn PciSubclass, 439 ), 440 _ => ( 441 PciClassCode::Other, 442 &PciVirtioSubclass::NonTransitionalBase as &dyn PciSubclass, 443 ), 444 }; 445 446 let configuration = PciConfiguration::new( 447 VIRTIO_PCI_VENDOR_ID, 448 pci_device_id, 449 0x1, // For modern virtio-PCI devices 450 class, 451 subclass, 452 None, 453 PciHeaderType::Device, 454 VIRTIO_PCI_VENDOR_ID, 455 pci_device_id, 456 msix_config_clone, 457 ); 458 459 let mut virtio_pci_device = VirtioPciDevice { 460 id, 461 configuration, 462 common_config: VirtioPciCommonConfig { 463 access_platform, 464 driver_status: 0, 465 config_generation: 0, 466 device_feature_select: 0, 467 driver_feature_select: 0, 468 queue_select: 0, 469 msix_config: Arc::new(AtomicU16::new(VIRTQ_MSI_NO_VECTOR)), 470 msix_queues: Arc::new(Mutex::new(vec![VIRTQ_MSI_NO_VECTOR; num_queues])), 471 }, 472 msix_config, 473 msix_num, 474 device, 475 device_activated: Arc::new(AtomicBool::new(false)), 476 interrupt_status: Arc::new(AtomicUsize::new(0)), 477 virtio_interrupt: None, 478 queues, 479 queue_evts, 480 memory, 481 settings_bar: 0, 482 use_64bit_bar, 483 interrupt_source_group, 484 cap_pci_cfg_info: VirtioPciCfgCapInfo::default(), 485 bar_regions: vec![], 486 activate_evt, 487 dma_handler, 488 pending_activations, 489 }; 490 491 if let Some(msix_config) = &virtio_pci_device.msix_config { 492 virtio_pci_device.virtio_interrupt = Some(Arc::new(VirtioInterruptMsix::new( 493 msix_config.clone(), 494 virtio_pci_device.common_config.msix_config.clone(), 495 virtio_pci_device.common_config.msix_queues.clone(), 496 virtio_pci_device.interrupt_source_group.clone(), 497 ))); 498 } 499 500 Ok(virtio_pci_device) 501 } 502 503 fn state(&self) -> VirtioPciDeviceState { 504 VirtioPciDeviceState { 505 device_activated: self.device_activated.load(Ordering::Acquire), 506 interrupt_status: self.interrupt_status.load(Ordering::Acquire), 507 queues: self 508 .queues 509 .iter() 510 .map(|q| QueueState { 511 max_size: q.max_size(), 512 size: q.size(), 513 ready: q.ready(), 514 desc_table: q.desc_table(), 515 avail_ring: q.avail_ring(), 516 used_ring: q.used_ring(), 517 }) 518 .collect(), 519 } 520 } 521 522 fn set_state(&mut self, state: &VirtioPciDeviceState) -> std::result::Result<(), Error> { 523 self.device_activated 524 .store(state.device_activated, Ordering::Release); 525 self.interrupt_status 526 .store(state.interrupt_status, Ordering::Release); 527 528 // Update virtqueues indexes for both available and used rings. 529 for (i, queue) in self.queues.iter_mut().enumerate() { 530 queue.set_size(state.queues[i].size); 531 queue.set_ready(state.queues[i].ready); 532 queue 533 .try_set_desc_table_address(GuestAddress(state.queues[i].desc_table)) 534 .unwrap(); 535 queue 536 .try_set_avail_ring_address(GuestAddress(state.queues[i].avail_ring)) 537 .unwrap(); 538 queue 539 .try_set_used_ring_address(GuestAddress(state.queues[i].used_ring)) 540 .unwrap(); 541 queue.set_next_avail( 542 queue 543 .used_idx(self.memory.memory().deref(), Ordering::Acquire) 544 .map_err(Error::QueueRingIndex)? 545 .0, 546 ); 547 queue.set_next_used( 548 queue 549 .used_idx(self.memory.memory().deref(), Ordering::Acquire) 550 .map_err(Error::QueueRingIndex)? 551 .0, 552 ); 553 } 554 555 Ok(()) 556 } 557 558 /// Gets the list of queue events that must be triggered whenever the VM writes to 559 /// `virtio::NOTIFY_REG_OFFSET` past the MMIO base. Each event must be triggered when the 560 /// value being written equals the index of the event in this list. 561 fn queue_evts(&self) -> &[EventFd] { 562 self.queue_evts.as_slice() 563 } 564 565 fn is_driver_ready(&self) -> bool { 566 let ready_bits = 567 (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK) as u8; 568 self.common_config.driver_status == ready_bits 569 && self.common_config.driver_status & DEVICE_FAILED as u8 == 0 570 } 571 572 /// Determines if the driver has requested the device (re)init / reset itself 573 fn is_driver_init(&self) -> bool { 574 self.common_config.driver_status == DEVICE_INIT as u8 575 } 576 577 pub fn config_bar_addr(&self) -> u64 { 578 self.configuration.get_bar_addr(self.settings_bar as usize) 579 } 580 581 fn add_pci_capabilities( 582 &mut self, 583 settings_bar: u8, 584 ) -> std::result::Result<(), PciDeviceError> { 585 // Add pointers to the different configuration structures from the PCI capabilities. 586 let common_cap = VirtioPciCap::new( 587 PciCapabilityType::CommonConfig, 588 settings_bar, 589 COMMON_CONFIG_BAR_OFFSET as u32, 590 COMMON_CONFIG_SIZE as u32, 591 ); 592 self.configuration 593 .add_capability(&common_cap) 594 .map_err(PciDeviceError::CapabilitiesSetup)?; 595 596 let isr_cap = VirtioPciCap::new( 597 PciCapabilityType::IsrConfig, 598 settings_bar, 599 ISR_CONFIG_BAR_OFFSET as u32, 600 ISR_CONFIG_SIZE as u32, 601 ); 602 self.configuration 603 .add_capability(&isr_cap) 604 .map_err(PciDeviceError::CapabilitiesSetup)?; 605 606 // TODO(dgreid) - set based on device's configuration size? 607 let device_cap = VirtioPciCap::new( 608 PciCapabilityType::DeviceConfig, 609 settings_bar, 610 DEVICE_CONFIG_BAR_OFFSET as u32, 611 DEVICE_CONFIG_SIZE as u32, 612 ); 613 self.configuration 614 .add_capability(&device_cap) 615 .map_err(PciDeviceError::CapabilitiesSetup)?; 616 617 let notify_cap = VirtioPciNotifyCap::new( 618 PciCapabilityType::NotifyConfig, 619 settings_bar, 620 NOTIFICATION_BAR_OFFSET as u32, 621 NOTIFICATION_SIZE as u32, 622 Le32::from(NOTIFY_OFF_MULTIPLIER), 623 ); 624 self.configuration 625 .add_capability(¬ify_cap) 626 .map_err(PciDeviceError::CapabilitiesSetup)?; 627 628 let configuration_cap = VirtioPciCfgCap::new(); 629 self.cap_pci_cfg_info.offset = self 630 .configuration 631 .add_capability(&configuration_cap) 632 .map_err(PciDeviceError::CapabilitiesSetup)? 633 + VIRTIO_PCI_CAP_OFFSET; 634 self.cap_pci_cfg_info.cap = configuration_cap; 635 636 if self.msix_config.is_some() { 637 let msix_cap = MsixCap::new( 638 settings_bar, 639 self.msix_num, 640 MSIX_TABLE_BAR_OFFSET as u32, 641 settings_bar, 642 MSIX_PBA_BAR_OFFSET as u32, 643 ); 644 self.configuration 645 .add_capability(&msix_cap) 646 .map_err(PciDeviceError::CapabilitiesSetup)?; 647 } 648 649 self.settings_bar = settings_bar; 650 Ok(()) 651 } 652 653 fn read_cap_pci_cfg(&mut self, offset: usize, mut data: &mut [u8]) { 654 let cap_slice = self.cap_pci_cfg_info.cap.as_slice(); 655 let data_len = data.len(); 656 let cap_len = cap_slice.len(); 657 if offset + data_len > cap_len { 658 error!("Failed to read cap_pci_cfg from config space"); 659 return; 660 } 661 662 if offset < std::mem::size_of::<VirtioPciCap>() { 663 if let Some(end) = offset.checked_add(data_len) { 664 // This write can't fail, offset and end are checked against config_len. 665 data.write_all(&cap_slice[offset..cmp::min(end, cap_len)]) 666 .unwrap(); 667 } 668 } else { 669 // Safe since we know self.cap_pci_cfg_info.cap.cap.offset is 32bits long. 670 let bar_offset: u32 = 671 unsafe { std::mem::transmute(self.cap_pci_cfg_info.cap.cap.offset) }; 672 self.read_bar(0, bar_offset as u64, data) 673 } 674 } 675 676 fn write_cap_pci_cfg(&mut self, offset: usize, data: &[u8]) -> Option<Arc<Barrier>> { 677 let cap_slice = self.cap_pci_cfg_info.cap.as_mut_slice(); 678 let data_len = data.len(); 679 let cap_len = cap_slice.len(); 680 if offset + data_len > cap_len { 681 error!("Failed to write cap_pci_cfg to config space"); 682 return None; 683 } 684 685 if offset < std::mem::size_of::<VirtioPciCap>() { 686 let (_, right) = cap_slice.split_at_mut(offset); 687 right[..data_len].copy_from_slice(data); 688 None 689 } else { 690 // Safe since we know self.cap_pci_cfg_info.cap.cap.offset is 32bits long. 691 let bar_offset: u32 = 692 unsafe { std::mem::transmute(self.cap_pci_cfg_info.cap.cap.offset) }; 693 self.write_bar(0, bar_offset as u64, data) 694 } 695 } 696 697 pub fn virtio_device(&self) -> Arc<Mutex<dyn VirtioDevice>> { 698 self.device.clone() 699 } 700 701 fn prepare_activator(&mut self, barrier: Option<Arc<Barrier>>) -> VirtioPciDeviceActivator { 702 let mut queues = Vec::new(); 703 704 for (queue_index, queue) in self.queues.iter().enumerate() { 705 if !queue.ready() { 706 continue; 707 } 708 709 if !queue.is_valid(self.memory.memory().deref()) { 710 error!("Queue {} is not valid", queue_index); 711 } 712 713 queues.push(( 714 queue_index, 715 vm_virtio::clone_queue(queue), 716 self.queue_evts[queue_index].try_clone().unwrap(), 717 )); 718 } 719 720 VirtioPciDeviceActivator { 721 interrupt: self.virtio_interrupt.take(), 722 memory: Some(self.memory.clone()), 723 device: self.device.clone(), 724 queues: Some(queues), 725 device_activated: self.device_activated.clone(), 726 barrier, 727 id: self.id.clone(), 728 } 729 } 730 731 fn activate(&mut self) -> ActivateResult { 732 self.prepare_activator(None).activate() 733 } 734 735 fn needs_activation(&self) -> bool { 736 !self.device_activated.load(Ordering::SeqCst) && self.is_driver_ready() 737 } 738 739 pub fn dma_handler(&self) -> Option<&Arc<dyn ExternalDmaMapping>> { 740 self.dma_handler.as_ref() 741 } 742 } 743 744 impl VirtioTransport for VirtioPciDevice { 745 fn ioeventfds(&self, base_addr: u64) -> Vec<(&EventFd, u64)> { 746 let notify_base = base_addr + NOTIFICATION_BAR_OFFSET; 747 self.queue_evts() 748 .iter() 749 .enumerate() 750 .map(|(i, event)| { 751 ( 752 event, 753 notify_base + i as u64 * u64::from(NOTIFY_OFF_MULTIPLIER), 754 ) 755 }) 756 .collect() 757 } 758 } 759 760 pub struct VirtioInterruptMsix { 761 msix_config: Arc<Mutex<MsixConfig>>, 762 config_vector: Arc<AtomicU16>, 763 queues_vectors: Arc<Mutex<Vec<u16>>>, 764 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 765 } 766 767 impl VirtioInterruptMsix { 768 pub fn new( 769 msix_config: Arc<Mutex<MsixConfig>>, 770 config_vector: Arc<AtomicU16>, 771 queues_vectors: Arc<Mutex<Vec<u16>>>, 772 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 773 ) -> Self { 774 VirtioInterruptMsix { 775 msix_config, 776 config_vector, 777 queues_vectors, 778 interrupt_source_group, 779 } 780 } 781 } 782 783 impl VirtioInterrupt for VirtioInterruptMsix { 784 fn trigger(&self, int_type: VirtioInterruptType) -> std::result::Result<(), std::io::Error> { 785 let vector = match int_type { 786 VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), 787 VirtioInterruptType::Queue(queue_index) => { 788 self.queues_vectors.lock().unwrap()[queue_index as usize] 789 } 790 }; 791 792 if vector == VIRTQ_MSI_NO_VECTOR { 793 return Ok(()); 794 } 795 796 let config = &mut self.msix_config.lock().unwrap(); 797 let entry = &config.table_entries[vector as usize]; 798 // In case the vector control register associated with the entry 799 // has its first bit set, this means the vector is masked and the 800 // device should not inject the interrupt. 801 // Instead, the Pending Bit Array table is updated to reflect there 802 // is a pending interrupt for this specific vector. 803 if config.masked() || entry.masked() { 804 config.set_pba_bit(vector, false); 805 return Ok(()); 806 } 807 808 self.interrupt_source_group 809 .trigger(vector as InterruptIndex) 810 } 811 812 fn notifier(&self, int_type: VirtioInterruptType) -> Option<EventFd> { 813 let vector = match int_type { 814 VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), 815 VirtioInterruptType::Queue(queue_index) => { 816 self.queues_vectors.lock().unwrap()[queue_index as usize] 817 } 818 }; 819 820 self.interrupt_source_group 821 .notifier(vector as InterruptIndex) 822 } 823 } 824 825 impl PciDevice for VirtioPciDevice { 826 fn write_config_register( 827 &mut self, 828 reg_idx: usize, 829 offset: u64, 830 data: &[u8], 831 ) -> Option<Arc<Barrier>> { 832 // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG 833 // is accessed. This capability has a special meaning as it allows the 834 // guest to access other capabilities without mapping the PCI BAR. 835 let base = reg_idx * 4; 836 if base + offset as usize >= self.cap_pci_cfg_info.offset 837 && base + offset as usize + data.len() 838 <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len() 839 { 840 let offset = base + offset as usize - self.cap_pci_cfg_info.offset; 841 self.write_cap_pci_cfg(offset, data) 842 } else { 843 self.configuration 844 .write_config_register(reg_idx, offset, data); 845 None 846 } 847 } 848 849 fn read_config_register(&mut self, reg_idx: usize) -> u32 { 850 // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG 851 // is accessed. This capability has a special meaning as it allows the 852 // guest to access other capabilities without mapping the PCI BAR. 853 let base = reg_idx * 4; 854 if base >= self.cap_pci_cfg_info.offset 855 && base + 4 <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len() 856 { 857 let offset = base - self.cap_pci_cfg_info.offset; 858 let mut data = [0u8; 4]; 859 self.read_cap_pci_cfg(offset, &mut data); 860 u32::from_le_bytes(data) 861 } else { 862 self.configuration.read_reg(reg_idx) 863 } 864 } 865 866 fn detect_bar_reprogramming( 867 &mut self, 868 reg_idx: usize, 869 data: &[u8], 870 ) -> Option<BarReprogrammingParams> { 871 self.configuration.detect_bar_reprogramming(reg_idx, data) 872 } 873 874 fn allocate_bars( 875 &mut self, 876 allocator: &Arc<Mutex<SystemAllocator>>, 877 mmio_allocator: &mut AddressAllocator, 878 resources: Option<Vec<Resource>>, 879 ) -> std::result::Result<Vec<PciBarConfiguration>, PciDeviceError> { 880 let mut bars = Vec::new(); 881 let device_clone = self.device.clone(); 882 let device = device_clone.lock().unwrap(); 883 884 let mut settings_bar_addr = None; 885 if let Some(resources) = &resources { 886 for resource in resources { 887 if let Resource::PciBar { index, base, .. } = resource { 888 if *index == VIRTIO_COMMON_BAR_INDEX { 889 settings_bar_addr = Some(GuestAddress(*base)); 890 break; 891 } 892 } 893 } 894 // Error out if no resource was matching the BAR id. 895 if settings_bar_addr.is_none() { 896 return Err(PciDeviceError::MissingResource); 897 } 898 } 899 900 // Allocate the virtio-pci capability BAR. 901 // See http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-740004 902 let (virtio_pci_bar_addr, region_type) = if self.use_64bit_bar { 903 let region_type = PciBarRegionType::Memory64BitRegion; 904 let addr = mmio_allocator 905 .allocate( 906 settings_bar_addr, 907 CAPABILITY_BAR_SIZE, 908 Some(CAPABILITY_BAR_SIZE), 909 ) 910 .ok_or(PciDeviceError::IoAllocationFailed(CAPABILITY_BAR_SIZE))?; 911 (addr, region_type) 912 } else { 913 let region_type = PciBarRegionType::Memory32BitRegion; 914 let addr = allocator 915 .lock() 916 .unwrap() 917 .allocate_mmio_hole_addresses( 918 settings_bar_addr, 919 CAPABILITY_BAR_SIZE, 920 Some(CAPABILITY_BAR_SIZE), 921 ) 922 .ok_or(PciDeviceError::IoAllocationFailed(CAPABILITY_BAR_SIZE))?; 923 (addr, region_type) 924 }; 925 926 let bar = PciBarConfiguration::default() 927 .set_index(VIRTIO_COMMON_BAR_INDEX) 928 .set_address(virtio_pci_bar_addr.raw_value()) 929 .set_size(CAPABILITY_BAR_SIZE) 930 .set_region_type(region_type); 931 self.configuration.add_pci_bar(&bar).map_err(|e| { 932 PciDeviceError::IoRegistrationFailed(virtio_pci_bar_addr.raw_value(), e) 933 })?; 934 935 bars.push(bar); 936 937 // Once the BARs are allocated, the capabilities can be added to the PCI configuration. 938 self.add_pci_capabilities(VIRTIO_COMMON_BAR_INDEX as u8)?; 939 940 // Allocate a dedicated BAR if there are some shared memory regions. 941 if let Some(shm_list) = device.get_shm_regions() { 942 let bar = PciBarConfiguration::default() 943 .set_index(VIRTIO_SHM_BAR_INDEX) 944 .set_address(shm_list.addr.raw_value()) 945 .set_size(shm_list.len); 946 self.configuration 947 .add_pci_bar(&bar) 948 .map_err(|e| PciDeviceError::IoRegistrationFailed(shm_list.addr.raw_value(), e))?; 949 950 bars.push(bar); 951 952 for (idx, shm) in shm_list.region_list.iter().enumerate() { 953 let shm_cap = VirtioPciCap64::new( 954 PciCapabilityType::SharedMemoryConfig, 955 VIRTIO_SHM_BAR_INDEX as u8, 956 idx as u8, 957 shm.offset, 958 shm.len, 959 ); 960 self.configuration 961 .add_capability(&shm_cap) 962 .map_err(PciDeviceError::CapabilitiesSetup)?; 963 } 964 } 965 966 self.bar_regions = bars.clone(); 967 968 Ok(bars) 969 } 970 971 fn free_bars( 972 &mut self, 973 allocator: &mut SystemAllocator, 974 mmio_allocator: &mut AddressAllocator, 975 ) -> std::result::Result<(), PciDeviceError> { 976 for bar in self.bar_regions.drain(..) { 977 match bar.region_type() { 978 PciBarRegionType::Memory32BitRegion => { 979 allocator.free_mmio_hole_addresses(GuestAddress(bar.addr()), bar.size()); 980 } 981 PciBarRegionType::Memory64BitRegion => { 982 mmio_allocator.free(GuestAddress(bar.addr()), bar.size()); 983 } 984 _ => error!("Unexpected PCI bar type"), 985 } 986 } 987 Ok(()) 988 } 989 990 fn move_bar(&mut self, old_base: u64, new_base: u64) -> result::Result<(), std::io::Error> { 991 // We only update our idea of the bar in order to support free_bars() above. 992 // The majority of the reallocation is done inside DeviceManager. 993 for bar in self.bar_regions.iter_mut() { 994 if bar.addr() == old_base { 995 *bar = bar.set_address(new_base); 996 } 997 } 998 999 Ok(()) 1000 } 1001 1002 fn read_bar(&mut self, _base: u64, offset: u64, data: &mut [u8]) { 1003 match offset { 1004 o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => self.common_config.read( 1005 o - COMMON_CONFIG_BAR_OFFSET, 1006 data, 1007 &mut self.queues, 1008 self.device.clone(), 1009 ), 1010 o if (ISR_CONFIG_BAR_OFFSET..ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE).contains(&o) => { 1011 if let Some(v) = data.get_mut(0) { 1012 // Reading this register resets it to 0. 1013 *v = self.interrupt_status.swap(0, Ordering::AcqRel) as u8; 1014 } 1015 } 1016 o if (DEVICE_CONFIG_BAR_OFFSET..DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE) 1017 .contains(&o) => 1018 { 1019 let device = self.device.lock().unwrap(); 1020 device.read_config(o - DEVICE_CONFIG_BAR_OFFSET, data); 1021 } 1022 o if (NOTIFICATION_BAR_OFFSET..NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE) 1023 .contains(&o) => 1024 { 1025 // Handled with ioeventfds. 1026 } 1027 o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { 1028 if let Some(msix_config) = &self.msix_config { 1029 msix_config 1030 .lock() 1031 .unwrap() 1032 .read_table(o - MSIX_TABLE_BAR_OFFSET, data); 1033 } 1034 } 1035 o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { 1036 if let Some(msix_config) = &self.msix_config { 1037 msix_config 1038 .lock() 1039 .unwrap() 1040 .read_pba(o - MSIX_PBA_BAR_OFFSET, data); 1041 } 1042 } 1043 _ => (), 1044 } 1045 } 1046 1047 fn write_bar(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 1048 match offset { 1049 o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => self.common_config.write( 1050 o - COMMON_CONFIG_BAR_OFFSET, 1051 data, 1052 &mut self.queues, 1053 self.device.clone(), 1054 ), 1055 o if (ISR_CONFIG_BAR_OFFSET..ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE).contains(&o) => { 1056 if let Some(v) = data.first() { 1057 self.interrupt_status 1058 .fetch_and(!(*v as usize), Ordering::AcqRel); 1059 } 1060 } 1061 o if (DEVICE_CONFIG_BAR_OFFSET..DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE) 1062 .contains(&o) => 1063 { 1064 let mut device = self.device.lock().unwrap(); 1065 device.write_config(o - DEVICE_CONFIG_BAR_OFFSET, data); 1066 } 1067 o if (NOTIFICATION_BAR_OFFSET..NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE) 1068 .contains(&o) => 1069 { 1070 // Handled with ioeventfds. 1071 error!("Unexpected write to notification BAR: offset = 0x{:x}", o); 1072 } 1073 o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { 1074 if let Some(msix_config) = &self.msix_config { 1075 msix_config 1076 .lock() 1077 .unwrap() 1078 .write_table(o - MSIX_TABLE_BAR_OFFSET, data); 1079 } 1080 } 1081 o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { 1082 if let Some(msix_config) = &self.msix_config { 1083 msix_config 1084 .lock() 1085 .unwrap() 1086 .write_pba(o - MSIX_PBA_BAR_OFFSET, data); 1087 } 1088 } 1089 _ => (), 1090 }; 1091 1092 // Try and activate the device if the driver status has changed 1093 if self.needs_activation() { 1094 let barrier = Arc::new(Barrier::new(2)); 1095 let activator = self.prepare_activator(Some(barrier.clone())); 1096 self.pending_activations.lock().unwrap().push(activator); 1097 info!( 1098 "{}: Needs activation; writing to activate event fd", 1099 self.id 1100 ); 1101 self.activate_evt.write(1).ok(); 1102 info!("{}: Needs activation; returning barrier", self.id); 1103 return Some(barrier); 1104 } 1105 1106 // Device has been reset by the driver 1107 if self.device_activated.load(Ordering::SeqCst) && self.is_driver_init() { 1108 let mut device = self.device.lock().unwrap(); 1109 if let Some(virtio_interrupt) = device.reset() { 1110 // Upon reset the device returns its interrupt EventFD 1111 self.virtio_interrupt = Some(virtio_interrupt); 1112 self.device_activated.store(false, Ordering::SeqCst); 1113 1114 // Reset queue readiness (changes queue_enable), queue sizes 1115 // and selected_queue as per spec for reset 1116 self.queues.iter_mut().for_each(Queue::reset); 1117 self.common_config.queue_select = 0; 1118 } else { 1119 error!("Attempt to reset device when not implemented in underlying device"); 1120 self.common_config.driver_status = crate::DEVICE_FAILED as u8; 1121 } 1122 } 1123 1124 None 1125 } 1126 1127 fn as_any(&mut self) -> &mut dyn Any { 1128 self 1129 } 1130 1131 fn id(&self) -> Option<String> { 1132 Some(self.id.clone()) 1133 } 1134 } 1135 1136 impl BusDevice for VirtioPciDevice { 1137 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 1138 self.read_bar(base, offset, data) 1139 } 1140 1141 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 1142 self.write_bar(base, offset, data) 1143 } 1144 } 1145 1146 impl Pausable for VirtioPciDevice { 1147 fn pause(&mut self) -> result::Result<(), MigratableError> { 1148 Ok(()) 1149 } 1150 1151 fn resume(&mut self) -> result::Result<(), MigratableError> { 1152 Ok(()) 1153 } 1154 } 1155 1156 impl Snapshottable for VirtioPciDevice { 1157 fn id(&self) -> String { 1158 self.id.clone() 1159 } 1160 1161 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1162 let mut virtio_pci_dev_snapshot = 1163 Snapshot::new_from_versioned_state(&self.id, &self.state())?; 1164 1165 // Snapshot PciConfiguration 1166 virtio_pci_dev_snapshot.add_snapshot(self.configuration.snapshot()?); 1167 1168 // Snapshot VirtioPciCommonConfig 1169 virtio_pci_dev_snapshot.add_snapshot(self.common_config.snapshot()?); 1170 1171 // Snapshot MSI-X 1172 if let Some(msix_config) = &self.msix_config { 1173 virtio_pci_dev_snapshot.add_snapshot(msix_config.lock().unwrap().snapshot()?); 1174 } 1175 1176 Ok(virtio_pci_dev_snapshot) 1177 } 1178 1179 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 1180 if let Some(virtio_pci_dev_section) = 1181 snapshot.snapshot_data.get(&format!("{}-section", self.id)) 1182 { 1183 // Restore MSI-X 1184 if let Some(msix_config) = &self.msix_config { 1185 let id = msix_config.lock().unwrap().id(); 1186 if let Some(msix_snapshot) = snapshot.snapshots.get(&id) { 1187 msix_config 1188 .lock() 1189 .unwrap() 1190 .restore(*msix_snapshot.clone())?; 1191 } 1192 } 1193 1194 // Restore VirtioPciCommonConfig 1195 if let Some(virtio_config_snapshot) = snapshot.snapshots.get(&self.common_config.id()) { 1196 self.common_config 1197 .restore(*virtio_config_snapshot.clone())?; 1198 } 1199 1200 // Restore PciConfiguration 1201 if let Some(pci_config_snapshot) = snapshot.snapshots.get(&self.configuration.id()) { 1202 self.configuration.restore(*pci_config_snapshot.clone())?; 1203 } 1204 1205 // First restore the status of the virtqueues. 1206 self.set_state(&virtio_pci_dev_section.to_versioned_state()?) 1207 .map_err(|e| { 1208 MigratableError::Restore(anyhow!( 1209 "Could not restore VIRTIO_PCI_DEVICE state {:?}", 1210 e 1211 )) 1212 })?; 1213 1214 // Then we can activate the device, as we know at this point that 1215 // the virtqueues are in the right state and the device is ready 1216 // to be activated, which will spawn each virtio worker thread. 1217 if self.device_activated.load(Ordering::SeqCst) && self.is_driver_ready() { 1218 self.activate().map_err(|e| { 1219 MigratableError::Restore(anyhow!("Failed activating the device: {:?}", e)) 1220 })?; 1221 } 1222 1223 return Ok(()); 1224 } 1225 1226 Err(MigratableError::Restore(anyhow!( 1227 "Could not find VIRTIO_PCI_DEVICE snapshot section" 1228 ))) 1229 } 1230 } 1231 impl Transportable for VirtioPciDevice {} 1232 impl Migratable for VirtioPciDevice {} 1233