1 // Copyright 2018 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE-BSD-3-Clause file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use super::VirtioPciCommonConfig; 10 use crate::transport::VirtioTransport; 11 use crate::GuestMemoryMmap; 12 use crate::{ 13 ActivateResult, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioInterruptType, 14 DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FAILED, DEVICE_FEATURES_OK, 15 DEVICE_INIT, 16 }; 17 use anyhow::anyhow; 18 use libc::EFD_NONBLOCK; 19 use pci::{ 20 BarReprogrammingParams, MsixCap, MsixConfig, PciBarConfiguration, PciBarRegionType, 21 PciCapability, PciCapabilityId, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, 22 PciHeaderType, PciMassStorageSubclass, PciNetworkControllerSubclass, PciSubclass, 23 }; 24 use std::any::Any; 25 use std::cmp; 26 use std::io::Write; 27 use std::result; 28 use std::sync::atomic::{AtomicBool, AtomicU16, AtomicUsize, Ordering}; 29 use std::sync::{Arc, Barrier, Mutex}; 30 use versionize::{VersionMap, Versionize, VersionizeResult}; 31 use versionize_derive::Versionize; 32 use virtio_queue::{Error as QueueError, Queue}; 33 use vm_allocator::{AddressAllocator, SystemAllocator}; 34 use vm_device::dma_mapping::ExternalDmaMapping; 35 use vm_device::interrupt::{ 36 InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig, 37 }; 38 use vm_device::BusDevice; 39 use vm_memory::{Address, ByteValued, GuestAddress, GuestMemoryAtomic, GuestUsize, Le32}; 40 use vm_migration::{ 41 Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped, 42 }; 43 use vm_virtio::AccessPlatform; 44 use vmm_sys_util::{errno::Result, eventfd::EventFd}; 45 46 /// Vector value used to disable MSI for a queue. 47 const VIRTQ_MSI_NO_VECTOR: u16 = 0xffff; 48 49 #[derive(Debug)] 50 enum Error { 51 /// Failed to retrieve queue ring's index. 52 QueueRingIndex(QueueError), 53 } 54 55 #[allow(clippy::enum_variant_names)] 56 enum PciCapabilityType { 57 CommonConfig = 1, 58 NotifyConfig = 2, 59 IsrConfig = 3, 60 DeviceConfig = 4, 61 PciConfig = 5, 62 SharedMemoryConfig = 8, 63 } 64 65 // This offset represents the 2 bytes omitted from the VirtioPciCap structure 66 // as they are already handled through add_capability(). These 2 bytes are the 67 // fields cap_vndr (1 byte) and cap_next (1 byte) defined in the virtio spec. 68 const VIRTIO_PCI_CAP_OFFSET: usize = 2; 69 70 #[allow(dead_code)] 71 #[repr(packed)] 72 #[derive(Clone, Copy, Default)] 73 struct VirtioPciCap { 74 cap_len: u8, // Generic PCI field: capability length 75 cfg_type: u8, // Identifies the structure. 76 pci_bar: u8, // Where to find it. 77 id: u8, // Multiple capabilities of the same type 78 padding: [u8; 2], // Pad to full dword. 79 offset: Le32, // Offset within bar. 80 length: Le32, // Length of the structure, in bytes. 81 } 82 // SAFETY: All members are simple numbers and any value is valid. 83 unsafe impl ByteValued for VirtioPciCap {} 84 85 impl PciCapability for VirtioPciCap { 86 fn bytes(&self) -> &[u8] { 87 self.as_slice() 88 } 89 90 fn id(&self) -> PciCapabilityId { 91 PciCapabilityId::VendorSpecific 92 } 93 } 94 95 const VIRTIO_PCI_CAP_LEN_OFFSET: u8 = 2; 96 97 impl VirtioPciCap { 98 pub fn new(cfg_type: PciCapabilityType, pci_bar: u8, offset: u32, length: u32) -> Self { 99 VirtioPciCap { 100 cap_len: (std::mem::size_of::<VirtioPciCap>() as u8) + VIRTIO_PCI_CAP_LEN_OFFSET, 101 cfg_type: cfg_type as u8, 102 pci_bar, 103 id: 0, 104 padding: [0; 2], 105 offset: Le32::from(offset), 106 length: Le32::from(length), 107 } 108 } 109 } 110 111 #[allow(dead_code)] 112 #[repr(packed)] 113 #[derive(Clone, Copy, Default)] 114 struct VirtioPciNotifyCap { 115 cap: VirtioPciCap, 116 notify_off_multiplier: Le32, 117 } 118 // SAFETY: All members are simple numbers and any value is valid. 119 unsafe impl ByteValued for VirtioPciNotifyCap {} 120 121 impl PciCapability for VirtioPciNotifyCap { 122 fn bytes(&self) -> &[u8] { 123 self.as_slice() 124 } 125 126 fn id(&self) -> PciCapabilityId { 127 PciCapabilityId::VendorSpecific 128 } 129 } 130 131 impl VirtioPciNotifyCap { 132 pub fn new( 133 cfg_type: PciCapabilityType, 134 pci_bar: u8, 135 offset: u32, 136 length: u32, 137 multiplier: Le32, 138 ) -> Self { 139 VirtioPciNotifyCap { 140 cap: VirtioPciCap { 141 cap_len: (std::mem::size_of::<VirtioPciNotifyCap>() as u8) 142 + VIRTIO_PCI_CAP_LEN_OFFSET, 143 cfg_type: cfg_type as u8, 144 pci_bar, 145 id: 0, 146 padding: [0; 2], 147 offset: Le32::from(offset), 148 length: Le32::from(length), 149 }, 150 notify_off_multiplier: multiplier, 151 } 152 } 153 } 154 155 #[allow(dead_code)] 156 #[repr(packed)] 157 #[derive(Clone, Copy, Default)] 158 struct VirtioPciCap64 { 159 cap: VirtioPciCap, 160 offset_hi: Le32, 161 length_hi: Le32, 162 } 163 // SAFETY: All members are simple numbers and any value is valid. 164 unsafe impl ByteValued for VirtioPciCap64 {} 165 166 impl PciCapability for VirtioPciCap64 { 167 fn bytes(&self) -> &[u8] { 168 self.as_slice() 169 } 170 171 fn id(&self) -> PciCapabilityId { 172 PciCapabilityId::VendorSpecific 173 } 174 } 175 176 impl VirtioPciCap64 { 177 pub fn new(cfg_type: PciCapabilityType, pci_bar: u8, id: u8, offset: u64, length: u64) -> Self { 178 VirtioPciCap64 { 179 cap: VirtioPciCap { 180 cap_len: (std::mem::size_of::<VirtioPciCap64>() as u8) + VIRTIO_PCI_CAP_LEN_OFFSET, 181 cfg_type: cfg_type as u8, 182 pci_bar, 183 id, 184 padding: [0; 2], 185 offset: Le32::from(offset as u32), 186 length: Le32::from(length as u32), 187 }, 188 offset_hi: Le32::from((offset >> 32) as u32), 189 length_hi: Le32::from((length >> 32) as u32), 190 } 191 } 192 } 193 194 #[allow(dead_code)] 195 #[repr(packed)] 196 #[derive(Clone, Copy, Default)] 197 struct VirtioPciCfgCap { 198 cap: VirtioPciCap, 199 pci_cfg_data: [u8; 4], 200 } 201 // SAFETY: All members are simple numbers and any value is valid. 202 unsafe impl ByteValued for VirtioPciCfgCap {} 203 204 impl PciCapability for VirtioPciCfgCap { 205 fn bytes(&self) -> &[u8] { 206 self.as_slice() 207 } 208 209 fn id(&self) -> PciCapabilityId { 210 PciCapabilityId::VendorSpecific 211 } 212 } 213 214 impl VirtioPciCfgCap { 215 fn new() -> Self { 216 VirtioPciCfgCap { 217 cap: VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0), 218 ..Default::default() 219 } 220 } 221 } 222 223 #[derive(Clone, Copy, Default)] 224 struct VirtioPciCfgCapInfo { 225 offset: usize, 226 cap: VirtioPciCfgCap, 227 } 228 229 #[allow(dead_code)] 230 #[derive(Copy, Clone)] 231 pub enum PciVirtioSubclass { 232 NonTransitionalBase = 0xff, 233 } 234 235 impl PciSubclass for PciVirtioSubclass { 236 fn get_register_value(&self) -> u8 { 237 *self as u8 238 } 239 } 240 241 // Allocate one bar for the structs pointed to by the capability structures. 242 // As per the PCI specification, because the same BAR shares MSI-X and non 243 // MSI-X structures, it is recommended to use 8KiB alignment for all those 244 // structures. 245 const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000; 246 const COMMON_CONFIG_SIZE: u64 = 56; 247 const ISR_CONFIG_BAR_OFFSET: u64 = 0x2000; 248 const ISR_CONFIG_SIZE: u64 = 1; 249 const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x4000; 250 const DEVICE_CONFIG_SIZE: u64 = 0x1000; 251 const NOTIFICATION_BAR_OFFSET: u64 = 0x6000; 252 const NOTIFICATION_SIZE: u64 = 0x1000; 253 const MSIX_TABLE_BAR_OFFSET: u64 = 0x8000; 254 // The size is 256KiB because the table can hold up to 2048 entries, with each 255 // entry being 128 bits (4 DWORDS). 256 const MSIX_TABLE_SIZE: u64 = 0x40000; 257 const MSIX_PBA_BAR_OFFSET: u64 = 0x48000; 258 // The size is 2KiB because the Pending Bit Array has one bit per vector and it 259 // can support up to 2048 vectors. 260 const MSIX_PBA_SIZE: u64 = 0x800; 261 // The BAR size must be a power of 2. 262 const CAPABILITY_BAR_SIZE: u64 = 0x80000; 263 264 const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address. 265 266 const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4; 267 const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID. 268 269 #[derive(Versionize)] 270 struct QueueState { 271 max_size: u16, 272 size: u16, 273 ready: bool, 274 desc_table: u64, 275 avail_ring: u64, 276 used_ring: u64, 277 } 278 279 #[derive(Versionize)] 280 struct VirtioPciDeviceState { 281 device_activated: bool, 282 queues: Vec<QueueState>, 283 interrupt_status: usize, 284 } 285 286 impl VersionMapped for VirtioPciDeviceState {} 287 288 pub struct VirtioPciDevice { 289 id: String, 290 291 // PCI configuration registers. 292 configuration: PciConfiguration, 293 294 // virtio PCI common configuration 295 common_config: VirtioPciCommonConfig, 296 297 // MSI-X config 298 msix_config: Option<Arc<Mutex<MsixConfig>>>, 299 300 // Number of MSI-X vectors 301 msix_num: u16, 302 303 // Virtio device reference and status 304 device: Arc<Mutex<dyn VirtioDevice>>, 305 device_activated: Arc<AtomicBool>, 306 307 // PCI interrupts. 308 interrupt_status: Arc<AtomicUsize>, 309 virtio_interrupt: Option<Arc<dyn VirtioInterrupt>>, 310 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 311 312 // virtio queues 313 queues: Vec<Queue<GuestMemoryAtomic<GuestMemoryMmap>>>, 314 queue_evts: Vec<EventFd>, 315 316 // Guest memory 317 memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 318 319 // Settings PCI BAR 320 settings_bar: u8, 321 settings_bar_addr: Option<GuestAddress>, 322 323 // Whether to use 64-bit bar location or 32-bit 324 use_64bit_bar: bool, 325 326 // Add a dedicated structure to hold information about the very specific 327 // virtio-pci capability VIRTIO_PCI_CAP_PCI_CFG. This is needed to support 328 // the legacy/backward compatible mechanism of letting the guest access the 329 // other virtio capabilities without mapping the PCI BARs. This can be 330 // needed when the guest tries to early access the virtio configuration of 331 // a device. 332 cap_pci_cfg_info: VirtioPciCfgCapInfo, 333 334 // Details of bar regions to free 335 bar_regions: Vec<(GuestAddress, GuestUsize, PciBarRegionType)>, 336 337 // EventFd to signal on to request activation 338 activate_evt: EventFd, 339 340 // Barrier that is used to wait on for activation 341 activate_barrier: Arc<Barrier>, 342 343 // Optional DMA handler 344 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 345 } 346 347 impl VirtioPciDevice { 348 /// Constructs a new PCI transport for the given virtio device. 349 #[allow(clippy::too_many_arguments)] 350 pub fn new( 351 id: String, 352 memory: GuestMemoryAtomic<GuestMemoryMmap>, 353 device: Arc<Mutex<dyn VirtioDevice>>, 354 msix_num: u16, 355 access_platform: Option<Arc<dyn AccessPlatform>>, 356 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 357 pci_device_bdf: u32, 358 activate_evt: EventFd, 359 use_64bit_bar: bool, 360 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 361 ) -> Result<Self> { 362 let device_clone = device.clone(); 363 let mut locked_device = device_clone.lock().unwrap(); 364 let mut queue_evts = Vec::new(); 365 for _ in locked_device.queue_max_sizes().iter() { 366 queue_evts.push(EventFd::new(EFD_NONBLOCK)?) 367 } 368 let num_queues = locked_device.queue_max_sizes().len(); 369 370 if let Some(access_platform) = &access_platform { 371 locked_device.set_access_platform(access_platform.clone()); 372 } 373 374 let queues = locked_device 375 .queue_max_sizes() 376 .iter() 377 .map(|&s| { 378 Queue::<GuestMemoryAtomic<GuestMemoryMmap>, virtio_queue::QueueState>::new( 379 memory.clone(), 380 s, 381 ) 382 }) 383 .collect(); 384 385 let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + locked_device.device_type() as u16; 386 387 let interrupt_source_group = interrupt_manager.create_group(MsiIrqGroupConfig { 388 base: 0, 389 count: msix_num as InterruptIndex, 390 })?; 391 392 let (msix_config, msix_config_clone) = if msix_num > 0 { 393 let msix_config = Arc::new(Mutex::new(MsixConfig::new( 394 msix_num, 395 interrupt_source_group.clone(), 396 pci_device_bdf, 397 ))); 398 let msix_config_clone = msix_config.clone(); 399 (Some(msix_config), Some(msix_config_clone)) 400 } else { 401 (None, None) 402 }; 403 404 let (class, subclass) = match VirtioDeviceType::from(locked_device.device_type()) { 405 VirtioDeviceType::Net => ( 406 PciClassCode::NetworkController, 407 &PciNetworkControllerSubclass::EthernetController as &dyn PciSubclass, 408 ), 409 VirtioDeviceType::Block => ( 410 PciClassCode::MassStorage, 411 &PciMassStorageSubclass::MassStorage as &dyn PciSubclass, 412 ), 413 _ => ( 414 PciClassCode::Other, 415 &PciVirtioSubclass::NonTransitionalBase as &dyn PciSubclass, 416 ), 417 }; 418 419 let configuration = PciConfiguration::new( 420 VIRTIO_PCI_VENDOR_ID, 421 pci_device_id, 422 0x1, // For modern virtio-PCI devices 423 class, 424 subclass, 425 None, 426 PciHeaderType::Device, 427 VIRTIO_PCI_VENDOR_ID, 428 pci_device_id, 429 msix_config_clone, 430 ); 431 432 let mut virtio_pci_device = VirtioPciDevice { 433 id, 434 configuration, 435 common_config: VirtioPciCommonConfig { 436 access_platform, 437 driver_status: 0, 438 config_generation: 0, 439 device_feature_select: 0, 440 driver_feature_select: 0, 441 queue_select: 0, 442 msix_config: Arc::new(AtomicU16::new(VIRTQ_MSI_NO_VECTOR)), 443 msix_queues: Arc::new(Mutex::new(vec![VIRTQ_MSI_NO_VECTOR; num_queues])), 444 }, 445 msix_config, 446 msix_num, 447 device, 448 device_activated: Arc::new(AtomicBool::new(false)), 449 interrupt_status: Arc::new(AtomicUsize::new(0)), 450 virtio_interrupt: None, 451 queues, 452 queue_evts, 453 memory: Some(memory), 454 settings_bar: 0, 455 settings_bar_addr: None, 456 use_64bit_bar, 457 interrupt_source_group, 458 cap_pci_cfg_info: VirtioPciCfgCapInfo::default(), 459 bar_regions: vec![], 460 activate_evt, 461 activate_barrier: Arc::new(Barrier::new(2)), 462 dma_handler, 463 }; 464 465 if let Some(msix_config) = &virtio_pci_device.msix_config { 466 virtio_pci_device.virtio_interrupt = Some(Arc::new(VirtioInterruptMsix::new( 467 msix_config.clone(), 468 virtio_pci_device.common_config.msix_config.clone(), 469 virtio_pci_device.common_config.msix_queues.clone(), 470 virtio_pci_device.interrupt_source_group.clone(), 471 ))); 472 } 473 474 Ok(virtio_pci_device) 475 } 476 477 fn state(&self) -> VirtioPciDeviceState { 478 VirtioPciDeviceState { 479 device_activated: self.device_activated.load(Ordering::Acquire), 480 interrupt_status: self.interrupt_status.load(Ordering::Acquire), 481 queues: self 482 .queues 483 .iter() 484 .map(|q| QueueState { 485 max_size: q.max_size(), 486 size: q.state.size, 487 ready: q.state.ready, 488 desc_table: q.state.desc_table.0, 489 avail_ring: q.state.avail_ring.0, 490 used_ring: q.state.used_ring.0, 491 }) 492 .collect(), 493 } 494 } 495 496 fn set_state(&mut self, state: &VirtioPciDeviceState) -> std::result::Result<(), Error> { 497 self.device_activated 498 .store(state.device_activated, Ordering::Release); 499 self.interrupt_status 500 .store(state.interrupt_status, Ordering::Release); 501 502 // Update virtqueues indexes for both available and used rings. 503 for (i, queue) in self.queues.iter_mut().enumerate() { 504 queue.state.size = state.queues[i].size; 505 queue.state.ready = state.queues[i].ready; 506 queue.state.desc_table = GuestAddress(state.queues[i].desc_table); 507 queue.state.avail_ring = GuestAddress(state.queues[i].avail_ring); 508 queue.state.used_ring = GuestAddress(state.queues[i].used_ring); 509 queue.set_next_avail( 510 queue 511 .used_idx(Ordering::Acquire) 512 .map_err(Error::QueueRingIndex)? 513 .0, 514 ); 515 queue.set_next_used( 516 queue 517 .used_idx(Ordering::Acquire) 518 .map_err(Error::QueueRingIndex)? 519 .0, 520 ); 521 } 522 523 Ok(()) 524 } 525 526 /// Gets the list of queue events that must be triggered whenever the VM writes to 527 /// `virtio::NOTIFY_REG_OFFSET` past the MMIO base. Each event must be triggered when the 528 /// value being written equals the index of the event in this list. 529 fn queue_evts(&self) -> &[EventFd] { 530 self.queue_evts.as_slice() 531 } 532 533 fn is_driver_ready(&self) -> bool { 534 let ready_bits = 535 (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK) as u8; 536 self.common_config.driver_status == ready_bits 537 && self.common_config.driver_status & DEVICE_FAILED as u8 == 0 538 } 539 540 /// Determines if the driver has requested the device (re)init / reset itself 541 fn is_driver_init(&self) -> bool { 542 self.common_config.driver_status == DEVICE_INIT as u8 543 } 544 545 // This function is used by the caller to provide the expected base address 546 // for the virtio-pci configuration BAR. 547 pub fn set_config_bar_addr(&mut self, bar_addr: u64) { 548 self.settings_bar_addr = Some(GuestAddress(bar_addr)); 549 } 550 551 pub fn config_bar_addr(&self) -> u64 { 552 self.configuration.get_bar_addr(self.settings_bar as usize) 553 } 554 555 fn add_pci_capabilities( 556 &mut self, 557 settings_bar: u8, 558 ) -> std::result::Result<(), PciDeviceError> { 559 // Add pointers to the different configuration structures from the PCI capabilities. 560 let common_cap = VirtioPciCap::new( 561 PciCapabilityType::CommonConfig, 562 settings_bar, 563 COMMON_CONFIG_BAR_OFFSET as u32, 564 COMMON_CONFIG_SIZE as u32, 565 ); 566 self.configuration 567 .add_capability(&common_cap) 568 .map_err(PciDeviceError::CapabilitiesSetup)?; 569 570 let isr_cap = VirtioPciCap::new( 571 PciCapabilityType::IsrConfig, 572 settings_bar, 573 ISR_CONFIG_BAR_OFFSET as u32, 574 ISR_CONFIG_SIZE as u32, 575 ); 576 self.configuration 577 .add_capability(&isr_cap) 578 .map_err(PciDeviceError::CapabilitiesSetup)?; 579 580 // TODO(dgreid) - set based on device's configuration size? 581 let device_cap = VirtioPciCap::new( 582 PciCapabilityType::DeviceConfig, 583 settings_bar, 584 DEVICE_CONFIG_BAR_OFFSET as u32, 585 DEVICE_CONFIG_SIZE as u32, 586 ); 587 self.configuration 588 .add_capability(&device_cap) 589 .map_err(PciDeviceError::CapabilitiesSetup)?; 590 591 let notify_cap = VirtioPciNotifyCap::new( 592 PciCapabilityType::NotifyConfig, 593 settings_bar, 594 NOTIFICATION_BAR_OFFSET as u32, 595 NOTIFICATION_SIZE as u32, 596 Le32::from(NOTIFY_OFF_MULTIPLIER), 597 ); 598 self.configuration 599 .add_capability(¬ify_cap) 600 .map_err(PciDeviceError::CapabilitiesSetup)?; 601 602 let configuration_cap = VirtioPciCfgCap::new(); 603 self.cap_pci_cfg_info.offset = self 604 .configuration 605 .add_capability(&configuration_cap) 606 .map_err(PciDeviceError::CapabilitiesSetup)? 607 + VIRTIO_PCI_CAP_OFFSET; 608 self.cap_pci_cfg_info.cap = configuration_cap; 609 610 if self.msix_config.is_some() { 611 let msix_cap = MsixCap::new( 612 settings_bar, 613 self.msix_num, 614 MSIX_TABLE_BAR_OFFSET as u32, 615 settings_bar, 616 MSIX_PBA_BAR_OFFSET as u32, 617 ); 618 self.configuration 619 .add_capability(&msix_cap) 620 .map_err(PciDeviceError::CapabilitiesSetup)?; 621 } 622 623 self.settings_bar = settings_bar; 624 Ok(()) 625 } 626 627 fn read_cap_pci_cfg(&mut self, offset: usize, mut data: &mut [u8]) { 628 let cap_slice = self.cap_pci_cfg_info.cap.as_slice(); 629 let data_len = data.len(); 630 let cap_len = cap_slice.len(); 631 if offset + data_len > cap_len { 632 error!("Failed to read cap_pci_cfg from config space"); 633 return; 634 } 635 636 if offset < std::mem::size_of::<VirtioPciCap>() { 637 if let Some(end) = offset.checked_add(data_len) { 638 // This write can't fail, offset and end are checked against config_len. 639 data.write_all(&cap_slice[offset..cmp::min(end, cap_len)]) 640 .unwrap(); 641 } 642 } else { 643 // Safe since we know self.cap_pci_cfg_info.cap.cap.offset is 32bits long. 644 let bar_offset: u32 = 645 unsafe { std::mem::transmute(self.cap_pci_cfg_info.cap.cap.offset) }; 646 self.read_bar(0, bar_offset as u64, data) 647 } 648 } 649 650 fn write_cap_pci_cfg(&mut self, offset: usize, data: &[u8]) -> Option<Arc<Barrier>> { 651 let cap_slice = self.cap_pci_cfg_info.cap.as_mut_slice(); 652 let data_len = data.len(); 653 let cap_len = cap_slice.len(); 654 if offset + data_len > cap_len { 655 error!("Failed to write cap_pci_cfg to config space"); 656 return None; 657 } 658 659 if offset < std::mem::size_of::<VirtioPciCap>() { 660 let (_, right) = cap_slice.split_at_mut(offset); 661 right[..data_len].copy_from_slice(data); 662 None 663 } else { 664 // Safe since we know self.cap_pci_cfg_info.cap.cap.offset is 32bits long. 665 let bar_offset: u32 = 666 unsafe { std::mem::transmute(self.cap_pci_cfg_info.cap.cap.offset) }; 667 self.write_bar(0, bar_offset as u64, data) 668 } 669 } 670 671 pub fn virtio_device(&self) -> Arc<Mutex<dyn VirtioDevice>> { 672 self.device.clone() 673 } 674 675 fn activate(&mut self) -> ActivateResult { 676 if let Some(virtio_interrupt) = self.virtio_interrupt.take() { 677 if self.memory.is_some() { 678 let mem = self.memory.as_ref().unwrap().clone(); 679 let mut device = self.device.lock().unwrap(); 680 let mut queue_evts = Vec::new(); 681 let mut queues = self.queues.clone(); 682 queues.retain(|q| q.state.ready); 683 for (i, queue) in queues.iter().enumerate() { 684 queue_evts.push(self.queue_evts[i].try_clone().unwrap()); 685 if !queue.is_valid() { 686 error!("Queue {} is not valid", i); 687 } 688 } 689 return device.activate(mem, virtio_interrupt, queues, queue_evts); 690 } 691 } 692 Ok(()) 693 } 694 695 pub fn maybe_activate(&mut self) { 696 if self.needs_activation() { 697 self.activate().expect("Failed to activate device"); 698 self.device_activated.store(true, Ordering::SeqCst); 699 info!("{}: Waiting for barrier", self.id); 700 self.activate_barrier.wait(); 701 info!("{}: Barrier released", self.id); 702 } else { 703 info!("{}: Device does not need activation", self.id) 704 } 705 } 706 707 fn needs_activation(&self) -> bool { 708 !self.device_activated.load(Ordering::SeqCst) && self.is_driver_ready() 709 } 710 711 pub fn dma_handler(&self) -> Option<&Arc<dyn ExternalDmaMapping>> { 712 self.dma_handler.as_ref() 713 } 714 } 715 716 impl VirtioTransport for VirtioPciDevice { 717 fn ioeventfds(&self, base_addr: u64) -> Vec<(&EventFd, u64)> { 718 let notify_base = base_addr + NOTIFICATION_BAR_OFFSET; 719 self.queue_evts() 720 .iter() 721 .enumerate() 722 .map(|(i, event)| { 723 ( 724 event, 725 notify_base + i as u64 * u64::from(NOTIFY_OFF_MULTIPLIER), 726 ) 727 }) 728 .collect() 729 } 730 } 731 732 pub struct VirtioInterruptMsix { 733 msix_config: Arc<Mutex<MsixConfig>>, 734 config_vector: Arc<AtomicU16>, 735 queues_vectors: Arc<Mutex<Vec<u16>>>, 736 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 737 } 738 739 impl VirtioInterruptMsix { 740 pub fn new( 741 msix_config: Arc<Mutex<MsixConfig>>, 742 config_vector: Arc<AtomicU16>, 743 queues_vectors: Arc<Mutex<Vec<u16>>>, 744 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 745 ) -> Self { 746 VirtioInterruptMsix { 747 msix_config, 748 config_vector, 749 queues_vectors, 750 interrupt_source_group, 751 } 752 } 753 } 754 755 impl VirtioInterrupt for VirtioInterruptMsix { 756 fn trigger(&self, int_type: VirtioInterruptType) -> std::result::Result<(), std::io::Error> { 757 let vector = match int_type { 758 VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), 759 VirtioInterruptType::Queue(queue_index) => { 760 self.queues_vectors.lock().unwrap()[queue_index as usize] 761 } 762 }; 763 764 if vector == VIRTQ_MSI_NO_VECTOR { 765 return Ok(()); 766 } 767 768 let config = &mut self.msix_config.lock().unwrap(); 769 let entry = &config.table_entries[vector as usize]; 770 // In case the vector control register associated with the entry 771 // has its first bit set, this means the vector is masked and the 772 // device should not inject the interrupt. 773 // Instead, the Pending Bit Array table is updated to reflect there 774 // is a pending interrupt for this specific vector. 775 if config.masked() || entry.masked() { 776 config.set_pba_bit(vector, false); 777 return Ok(()); 778 } 779 780 self.interrupt_source_group 781 .trigger(vector as InterruptIndex) 782 } 783 784 fn notifier(&self, int_type: VirtioInterruptType) -> Option<EventFd> { 785 let vector = match int_type { 786 VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), 787 VirtioInterruptType::Queue(queue_index) => { 788 self.queues_vectors.lock().unwrap()[queue_index as usize] 789 } 790 }; 791 792 self.interrupt_source_group 793 .notifier(vector as InterruptIndex) 794 } 795 } 796 797 impl PciDevice for VirtioPciDevice { 798 fn write_config_register( 799 &mut self, 800 reg_idx: usize, 801 offset: u64, 802 data: &[u8], 803 ) -> Option<Arc<Barrier>> { 804 // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG 805 // is accessed. This capability has a special meaning as it allows the 806 // guest to access other capabilities without mapping the PCI BAR. 807 let base = reg_idx * 4; 808 if base + offset as usize >= self.cap_pci_cfg_info.offset 809 && base + offset as usize + data.len() 810 <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len() 811 { 812 let offset = base + offset as usize - self.cap_pci_cfg_info.offset; 813 self.write_cap_pci_cfg(offset, data) 814 } else { 815 self.configuration 816 .write_config_register(reg_idx, offset, data); 817 None 818 } 819 } 820 821 fn read_config_register(&mut self, reg_idx: usize) -> u32 { 822 // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG 823 // is accessed. This capability has a special meaning as it allows the 824 // guest to access other capabilities without mapping the PCI BAR. 825 let base = reg_idx * 4; 826 if base >= self.cap_pci_cfg_info.offset 827 && base + 4 <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len() 828 { 829 let offset = base - self.cap_pci_cfg_info.offset; 830 let mut data = [0u8; 4]; 831 self.read_cap_pci_cfg(offset, &mut data); 832 u32::from_le_bytes(data) 833 } else { 834 self.configuration.read_reg(reg_idx) 835 } 836 } 837 838 fn detect_bar_reprogramming( 839 &mut self, 840 reg_idx: usize, 841 data: &[u8], 842 ) -> Option<BarReprogrammingParams> { 843 self.configuration.detect_bar_reprogramming(reg_idx, data) 844 } 845 846 fn allocate_bars( 847 &mut self, 848 allocator: &Arc<Mutex<SystemAllocator>>, 849 mmio_allocator: &mut AddressAllocator, 850 ) -> std::result::Result<Vec<(GuestAddress, GuestUsize, PciBarRegionType)>, PciDeviceError> 851 { 852 let mut ranges = Vec::new(); 853 let device_clone = self.device.clone(); 854 let device = device_clone.lock().unwrap(); 855 856 // Allocate the virtio-pci capability BAR. 857 // See http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-740004 858 let (virtio_pci_bar_addr, region_type) = if self.use_64bit_bar { 859 let region_type = PciBarRegionType::Memory64BitRegion; 860 let addr = mmio_allocator 861 .allocate( 862 self.settings_bar_addr, 863 CAPABILITY_BAR_SIZE, 864 Some(CAPABILITY_BAR_SIZE), 865 ) 866 .ok_or(PciDeviceError::IoAllocationFailed(CAPABILITY_BAR_SIZE))?; 867 ranges.push((addr, CAPABILITY_BAR_SIZE, region_type)); 868 (addr, region_type) 869 } else { 870 let region_type = PciBarRegionType::Memory32BitRegion; 871 let addr = allocator 872 .lock() 873 .unwrap() 874 .allocate_mmio_hole_addresses( 875 self.settings_bar_addr, 876 CAPABILITY_BAR_SIZE, 877 Some(CAPABILITY_BAR_SIZE), 878 ) 879 .ok_or(PciDeviceError::IoAllocationFailed(CAPABILITY_BAR_SIZE))?; 880 ranges.push((addr, CAPABILITY_BAR_SIZE, region_type)); 881 (addr, region_type) 882 }; 883 self.bar_regions 884 .push((virtio_pci_bar_addr, CAPABILITY_BAR_SIZE, region_type)); 885 886 let config = PciBarConfiguration::default() 887 .set_register_index(0) 888 .set_address(virtio_pci_bar_addr.raw_value()) 889 .set_size(CAPABILITY_BAR_SIZE) 890 .set_region_type(region_type); 891 let virtio_pci_bar = 892 self.configuration.add_pci_bar(&config).map_err(|e| { 893 PciDeviceError::IoRegistrationFailed(virtio_pci_bar_addr.raw_value(), e) 894 })? as u8; 895 896 // Once the BARs are allocated, the capabilities can be added to the PCI configuration. 897 self.add_pci_capabilities(virtio_pci_bar)?; 898 899 // Allocate a dedicated BAR if there are some shared memory regions. 900 if let Some(shm_list) = device.get_shm_regions() { 901 let config = PciBarConfiguration::default() 902 .set_register_index(2) 903 .set_address(shm_list.addr.raw_value()) 904 .set_size(shm_list.len); 905 let virtio_pci_shm_bar = 906 self.configuration.add_pci_bar(&config).map_err(|e| { 907 PciDeviceError::IoRegistrationFailed(shm_list.addr.raw_value(), e) 908 })? as u8; 909 910 let region_type = PciBarRegionType::Memory64BitRegion; 911 ranges.push((shm_list.addr, shm_list.len, region_type)); 912 self.bar_regions 913 .push((shm_list.addr, shm_list.len, region_type)); 914 915 for (idx, shm) in shm_list.region_list.iter().enumerate() { 916 let shm_cap = VirtioPciCap64::new( 917 PciCapabilityType::SharedMemoryConfig, 918 virtio_pci_shm_bar, 919 idx as u8, 920 shm.offset, 921 shm.len, 922 ); 923 self.configuration 924 .add_capability(&shm_cap) 925 .map_err(PciDeviceError::CapabilitiesSetup)?; 926 } 927 } 928 929 Ok(ranges) 930 } 931 932 fn free_bars( 933 &mut self, 934 allocator: &mut SystemAllocator, 935 mmio_allocator: &mut AddressAllocator, 936 ) -> std::result::Result<(), PciDeviceError> { 937 for (addr, length, type_) in self.bar_regions.drain(..) { 938 match type_ { 939 PciBarRegionType::Memory32BitRegion => { 940 allocator.free_mmio_hole_addresses(addr, length); 941 } 942 PciBarRegionType::Memory64BitRegion => { 943 mmio_allocator.free(addr, length); 944 } 945 _ => error!("Unexpected PCI bar type"), 946 } 947 } 948 Ok(()) 949 } 950 951 fn move_bar(&mut self, old_base: u64, new_base: u64) -> result::Result<(), std::io::Error> { 952 // We only update our idea of the bar in order to support free_bars() above. 953 // The majority of the reallocation is done inside DeviceManager. 954 for (addr, _, _) in self.bar_regions.iter_mut() { 955 if (*addr).0 == old_base { 956 *addr = GuestAddress(new_base); 957 } 958 } 959 960 Ok(()) 961 } 962 963 fn read_bar(&mut self, _base: u64, offset: u64, data: &mut [u8]) { 964 match offset { 965 o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => self.common_config.read( 966 o - COMMON_CONFIG_BAR_OFFSET, 967 data, 968 &mut self.queues, 969 self.device.clone(), 970 ), 971 o if (ISR_CONFIG_BAR_OFFSET..ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE).contains(&o) => { 972 if let Some(v) = data.get_mut(0) { 973 // Reading this register resets it to 0. 974 *v = self.interrupt_status.swap(0, Ordering::AcqRel) as u8; 975 } 976 } 977 o if (DEVICE_CONFIG_BAR_OFFSET..DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE) 978 .contains(&o) => 979 { 980 let device = self.device.lock().unwrap(); 981 device.read_config(o - DEVICE_CONFIG_BAR_OFFSET, data); 982 } 983 o if (NOTIFICATION_BAR_OFFSET..NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE) 984 .contains(&o) => 985 { 986 // Handled with ioeventfds. 987 } 988 o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { 989 if let Some(msix_config) = &self.msix_config { 990 msix_config 991 .lock() 992 .unwrap() 993 .read_table(o - MSIX_TABLE_BAR_OFFSET, data); 994 } 995 } 996 o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { 997 if let Some(msix_config) = &self.msix_config { 998 msix_config 999 .lock() 1000 .unwrap() 1001 .read_pba(o - MSIX_PBA_BAR_OFFSET, data); 1002 } 1003 } 1004 _ => (), 1005 } 1006 } 1007 1008 fn write_bar(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 1009 match offset { 1010 o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => self.common_config.write( 1011 o - COMMON_CONFIG_BAR_OFFSET, 1012 data, 1013 &mut self.queues, 1014 self.device.clone(), 1015 ), 1016 o if (ISR_CONFIG_BAR_OFFSET..ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE).contains(&o) => { 1017 if let Some(v) = data.get(0) { 1018 self.interrupt_status 1019 .fetch_and(!(*v as usize), Ordering::AcqRel); 1020 } 1021 } 1022 o if (DEVICE_CONFIG_BAR_OFFSET..DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE) 1023 .contains(&o) => 1024 { 1025 let mut device = self.device.lock().unwrap(); 1026 device.write_config(o - DEVICE_CONFIG_BAR_OFFSET, data); 1027 } 1028 o if (NOTIFICATION_BAR_OFFSET..NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE) 1029 .contains(&o) => 1030 { 1031 // Handled with ioeventfds. 1032 error!("Unexpected write to notification BAR: offset = 0x{:x}", o); 1033 } 1034 o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { 1035 if let Some(msix_config) = &self.msix_config { 1036 msix_config 1037 .lock() 1038 .unwrap() 1039 .write_table(o - MSIX_TABLE_BAR_OFFSET, data); 1040 } 1041 } 1042 o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { 1043 if let Some(msix_config) = &self.msix_config { 1044 msix_config 1045 .lock() 1046 .unwrap() 1047 .write_pba(o - MSIX_PBA_BAR_OFFSET, data); 1048 } 1049 } 1050 _ => (), 1051 }; 1052 1053 // Try and activate the device if the driver status has changed 1054 if self.needs_activation() { 1055 info!( 1056 "{}: Needs activation; writing to activate event fd", 1057 self.id 1058 ); 1059 self.activate_evt.write(1).ok(); 1060 info!("{}: Needs activation; returning barrier", self.id); 1061 return Some(self.activate_barrier.clone()); 1062 } 1063 1064 // Device has been reset by the driver 1065 if self.device_activated.load(Ordering::SeqCst) && self.is_driver_init() { 1066 let mut device = self.device.lock().unwrap(); 1067 if let Some(virtio_interrupt) = device.reset() { 1068 // Upon reset the device returns its interrupt EventFD 1069 self.virtio_interrupt = Some(virtio_interrupt); 1070 self.device_activated.store(false, Ordering::SeqCst); 1071 1072 // Reset queue readiness (changes queue_enable), queue sizes 1073 // and selected_queue as per spec for reset 1074 self.queues.iter_mut().for_each(Queue::reset); 1075 self.common_config.queue_select = 0; 1076 } else { 1077 error!("Attempt to reset device when not implemented in underlying device"); 1078 self.common_config.driver_status = crate::DEVICE_FAILED as u8; 1079 } 1080 } 1081 1082 None 1083 } 1084 1085 fn as_any(&mut self) -> &mut dyn Any { 1086 self 1087 } 1088 } 1089 1090 impl BusDevice for VirtioPciDevice { 1091 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 1092 self.read_bar(base, offset, data) 1093 } 1094 1095 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 1096 self.write_bar(base, offset, data) 1097 } 1098 } 1099 1100 impl Pausable for VirtioPciDevice { 1101 fn pause(&mut self) -> result::Result<(), MigratableError> { 1102 Ok(()) 1103 } 1104 1105 fn resume(&mut self) -> result::Result<(), MigratableError> { 1106 Ok(()) 1107 } 1108 } 1109 1110 impl Snapshottable for VirtioPciDevice { 1111 fn id(&self) -> String { 1112 self.id.clone() 1113 } 1114 1115 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1116 let mut virtio_pci_dev_snapshot = 1117 Snapshot::new_from_versioned_state(&self.id, &self.state())?; 1118 1119 // Snapshot PciConfiguration 1120 virtio_pci_dev_snapshot.add_snapshot(self.configuration.snapshot()?); 1121 1122 // Snapshot VirtioPciCommonConfig 1123 virtio_pci_dev_snapshot.add_snapshot(self.common_config.snapshot()?); 1124 1125 // Snapshot MSI-X 1126 if let Some(msix_config) = &self.msix_config { 1127 virtio_pci_dev_snapshot.add_snapshot(msix_config.lock().unwrap().snapshot()?); 1128 } 1129 1130 Ok(virtio_pci_dev_snapshot) 1131 } 1132 1133 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 1134 if let Some(virtio_pci_dev_section) = 1135 snapshot.snapshot_data.get(&format!("{}-section", self.id)) 1136 { 1137 // Restore MSI-X 1138 if let Some(msix_config) = &self.msix_config { 1139 let id = msix_config.lock().unwrap().id(); 1140 if let Some(msix_snapshot) = snapshot.snapshots.get(&id) { 1141 msix_config 1142 .lock() 1143 .unwrap() 1144 .restore(*msix_snapshot.clone())?; 1145 } 1146 } 1147 1148 // Restore VirtioPciCommonConfig 1149 if let Some(virtio_config_snapshot) = snapshot.snapshots.get(&self.common_config.id()) { 1150 self.common_config 1151 .restore(*virtio_config_snapshot.clone())?; 1152 } 1153 1154 // Restore PciConfiguration 1155 if let Some(pci_config_snapshot) = snapshot.snapshots.get(&self.configuration.id()) { 1156 self.configuration.restore(*pci_config_snapshot.clone())?; 1157 } 1158 1159 // First restore the status of the virtqueues. 1160 self.set_state(&virtio_pci_dev_section.to_versioned_state()?) 1161 .map_err(|e| { 1162 MigratableError::Restore(anyhow!( 1163 "Could not restore VIRTIO_PCI_DEVICE state {:?}", 1164 e 1165 )) 1166 })?; 1167 1168 // Then we can activate the device, as we know at this point that 1169 // the virtqueues are in the right state and the device is ready 1170 // to be activated, which will spawn each virtio worker thread. 1171 if self.device_activated.load(Ordering::SeqCst) && self.is_driver_ready() { 1172 self.activate().map_err(|e| { 1173 MigratableError::Restore(anyhow!("Failed activating the device: {:?}", e)) 1174 })?; 1175 } 1176 1177 return Ok(()); 1178 } 1179 1180 Err(MigratableError::Restore(anyhow!( 1181 "Could not find VIRTIO_PCI_DEVICE snapshot section" 1182 ))) 1183 } 1184 } 1185 impl Transportable for VirtioPciDevice {} 1186 impl Migratable for VirtioPciDevice {} 1187