1 // Copyright 2018 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE-BSD-3-Clause file. 4 // 5 // Copyright © 2019 Intel Corporation 6 // 7 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 8 9 use super::VirtioPciCommonConfig; 10 use crate::transport::VirtioTransport; 11 use crate::GuestMemoryMmap; 12 use crate::{ 13 ActivateResult, Queue, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioInterruptType, 14 DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, DEVICE_FAILED, DEVICE_FEATURES_OK, 15 DEVICE_INIT, 16 }; 17 use anyhow::anyhow; 18 use libc::EFD_NONBLOCK; 19 use pci::{ 20 BarReprogrammingParams, MsixCap, MsixConfig, PciBarConfiguration, PciBarRegionType, 21 PciCapability, PciCapabilityId, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, 22 PciHeaderType, PciMassStorageSubclass, PciNetworkControllerSubclass, PciSubclass, 23 }; 24 use std::any::Any; 25 use std::cmp; 26 use std::io::Write; 27 use std::num::Wrapping; 28 use std::result; 29 use std::sync::atomic::{AtomicBool, AtomicU16, AtomicUsize, Ordering}; 30 use std::sync::{Arc, Barrier, Mutex}; 31 use versionize::{VersionMap, Versionize, VersionizeResult}; 32 use versionize_derive::Versionize; 33 use vm_allocator::SystemAllocator; 34 use vm_device::interrupt::{ 35 InterruptIndex, InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig, 36 }; 37 use vm_device::BusDevice; 38 use vm_memory::{ 39 Address, ByteValued, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, GuestUsize, Le32, 40 }; 41 use vm_migration::{ 42 Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, VersionMapped, 43 }; 44 use vm_virtio::{queue, VirtioIommuRemapping, VIRTIO_MSI_NO_VECTOR}; 45 use vmm_sys_util::{errno::Result, eventfd::EventFd}; 46 47 #[derive(Debug)] 48 enum Error { 49 /// Failed to retrieve queue ring's index. 50 QueueRingIndex(queue::Error), 51 } 52 53 #[allow(clippy::enum_variant_names)] 54 enum PciCapabilityType { 55 CommonConfig = 1, 56 NotifyConfig = 2, 57 IsrConfig = 3, 58 DeviceConfig = 4, 59 PciConfig = 5, 60 SharedMemoryConfig = 8, 61 } 62 63 // This offset represents the 2 bytes omitted from the VirtioPciCap structure 64 // as they are already handled through add_capability(). These 2 bytes are the 65 // fields cap_vndr (1 byte) and cap_next (1 byte) defined in the virtio spec. 66 const VIRTIO_PCI_CAP_OFFSET: usize = 2; 67 68 #[allow(dead_code)] 69 #[repr(packed)] 70 #[derive(Clone, Copy, Default)] 71 struct VirtioPciCap { 72 cap_len: u8, // Generic PCI field: capability length 73 cfg_type: u8, // Identifies the structure. 74 pci_bar: u8, // Where to find it. 75 id: u8, // Multiple capabilities of the same type 76 padding: [u8; 2], // Pad to full dword. 77 offset: Le32, // Offset within bar. 78 length: Le32, // Length of the structure, in bytes. 79 } 80 // It is safe to implement ByteValued. All members are simple numbers and any value is valid. 81 unsafe impl ByteValued for VirtioPciCap {} 82 83 impl PciCapability for VirtioPciCap { 84 fn bytes(&self) -> &[u8] { 85 self.as_slice() 86 } 87 88 fn id(&self) -> PciCapabilityId { 89 PciCapabilityId::VendorSpecific 90 } 91 } 92 93 const VIRTIO_PCI_CAP_LEN_OFFSET: u8 = 2; 94 95 impl VirtioPciCap { 96 pub fn new(cfg_type: PciCapabilityType, pci_bar: u8, offset: u32, length: u32) -> Self { 97 VirtioPciCap { 98 cap_len: (std::mem::size_of::<VirtioPciCap>() as u8) + VIRTIO_PCI_CAP_LEN_OFFSET, 99 cfg_type: cfg_type as u8, 100 pci_bar, 101 id: 0, 102 padding: [0; 2], 103 offset: Le32::from(offset), 104 length: Le32::from(length), 105 } 106 } 107 } 108 109 #[allow(dead_code)] 110 #[repr(packed)] 111 #[derive(Clone, Copy, Default)] 112 struct VirtioPciNotifyCap { 113 cap: VirtioPciCap, 114 notify_off_multiplier: Le32, 115 } 116 // It is safe to implement ByteValued. All members are simple numbers and any value is valid. 117 unsafe impl ByteValued for VirtioPciNotifyCap {} 118 119 impl PciCapability for VirtioPciNotifyCap { 120 fn bytes(&self) -> &[u8] { 121 self.as_slice() 122 } 123 124 fn id(&self) -> PciCapabilityId { 125 PciCapabilityId::VendorSpecific 126 } 127 } 128 129 impl VirtioPciNotifyCap { 130 pub fn new( 131 cfg_type: PciCapabilityType, 132 pci_bar: u8, 133 offset: u32, 134 length: u32, 135 multiplier: Le32, 136 ) -> Self { 137 VirtioPciNotifyCap { 138 cap: VirtioPciCap { 139 cap_len: (std::mem::size_of::<VirtioPciNotifyCap>() as u8) 140 + VIRTIO_PCI_CAP_LEN_OFFSET, 141 cfg_type: cfg_type as u8, 142 pci_bar, 143 id: 0, 144 padding: [0; 2], 145 offset: Le32::from(offset), 146 length: Le32::from(length), 147 }, 148 notify_off_multiplier: multiplier, 149 } 150 } 151 } 152 153 #[allow(dead_code)] 154 #[repr(packed)] 155 #[derive(Clone, Copy, Default)] 156 struct VirtioPciCap64 { 157 cap: VirtioPciCap, 158 offset_hi: Le32, 159 length_hi: Le32, 160 } 161 // It is safe to implement ByteValued. All members are simple numbers and any value is valid. 162 unsafe impl ByteValued for VirtioPciCap64 {} 163 164 impl PciCapability for VirtioPciCap64 { 165 fn bytes(&self) -> &[u8] { 166 self.as_slice() 167 } 168 169 fn id(&self) -> PciCapabilityId { 170 PciCapabilityId::VendorSpecific 171 } 172 } 173 174 impl VirtioPciCap64 { 175 pub fn new(cfg_type: PciCapabilityType, pci_bar: u8, id: u8, offset: u64, length: u64) -> Self { 176 VirtioPciCap64 { 177 cap: VirtioPciCap { 178 cap_len: (std::mem::size_of::<VirtioPciCap64>() as u8) + VIRTIO_PCI_CAP_LEN_OFFSET, 179 cfg_type: cfg_type as u8, 180 pci_bar, 181 id, 182 padding: [0; 2], 183 offset: Le32::from(offset as u32), 184 length: Le32::from(length as u32), 185 }, 186 offset_hi: Le32::from((offset >> 32) as u32), 187 length_hi: Le32::from((length >> 32) as u32), 188 } 189 } 190 } 191 192 #[allow(dead_code)] 193 #[repr(packed)] 194 #[derive(Clone, Copy, Default)] 195 struct VirtioPciCfgCap { 196 cap: VirtioPciCap, 197 pci_cfg_data: [u8; 4], 198 } 199 // It is safe to implement ByteValued. All members are simple numbers and any value is valid. 200 unsafe impl ByteValued for VirtioPciCfgCap {} 201 202 impl PciCapability for VirtioPciCfgCap { 203 fn bytes(&self) -> &[u8] { 204 self.as_slice() 205 } 206 207 fn id(&self) -> PciCapabilityId { 208 PciCapabilityId::VendorSpecific 209 } 210 } 211 212 impl VirtioPciCfgCap { 213 fn new() -> Self { 214 VirtioPciCfgCap { 215 cap: VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0), 216 ..Default::default() 217 } 218 } 219 } 220 221 #[derive(Clone, Copy, Default)] 222 struct VirtioPciCfgCapInfo { 223 offset: usize, 224 cap: VirtioPciCfgCap, 225 } 226 227 #[allow(dead_code)] 228 #[derive(Copy, Clone)] 229 pub enum PciVirtioSubclass { 230 NonTransitionalBase = 0xff, 231 } 232 233 impl PciSubclass for PciVirtioSubclass { 234 fn get_register_value(&self) -> u8 { 235 *self as u8 236 } 237 } 238 239 // Allocate one bar for the structs pointed to by the capability structures. 240 // As per the PCI specification, because the same BAR shares MSI-X and non 241 // MSI-X structures, it is recommended to use 8KiB alignment for all those 242 // structures. 243 const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000; 244 const COMMON_CONFIG_SIZE: u64 = 56; 245 const ISR_CONFIG_BAR_OFFSET: u64 = 0x2000; 246 const ISR_CONFIG_SIZE: u64 = 1; 247 const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x4000; 248 const DEVICE_CONFIG_SIZE: u64 = 0x1000; 249 const NOTIFICATION_BAR_OFFSET: u64 = 0x6000; 250 const NOTIFICATION_SIZE: u64 = 0x1000; 251 const MSIX_TABLE_BAR_OFFSET: u64 = 0x8000; 252 // The size is 256KiB because the table can hold up to 2048 entries, with each 253 // entry being 128 bits (4 DWORDS). 254 const MSIX_TABLE_SIZE: u64 = 0x40000; 255 const MSIX_PBA_BAR_OFFSET: u64 = 0x48000; 256 // The size is 2KiB because the Pending Bit Array has one bit per vector and it 257 // can support up to 2048 vectors. 258 const MSIX_PBA_SIZE: u64 = 0x800; 259 // The BAR size must be a power of 2. 260 const CAPABILITY_BAR_SIZE: u64 = 0x80000; 261 262 const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address. 263 264 const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4; 265 const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID. 266 267 #[derive(Versionize)] 268 struct QueueState { 269 max_size: u16, 270 size: u16, 271 ready: bool, 272 vector: u16, 273 desc_table: u64, 274 avail_ring: u64, 275 used_ring: u64, 276 } 277 278 #[derive(Versionize)] 279 struct VirtioPciDeviceState { 280 device_activated: bool, 281 queues: Vec<QueueState>, 282 interrupt_status: usize, 283 } 284 285 impl VersionMapped for VirtioPciDeviceState {} 286 287 pub struct VirtioPciDevice { 288 id: String, 289 290 // PCI configuration registers. 291 configuration: PciConfiguration, 292 293 // virtio PCI common configuration 294 common_config: VirtioPciCommonConfig, 295 296 // MSI-X config 297 msix_config: Option<Arc<Mutex<MsixConfig>>>, 298 299 // Number of MSI-X vectors 300 msix_num: u16, 301 302 // Virtio device reference and status 303 device: Arc<Mutex<dyn VirtioDevice>>, 304 device_activated: Arc<AtomicBool>, 305 306 // PCI interrupts. 307 interrupt_status: Arc<AtomicUsize>, 308 virtio_interrupt: Option<Arc<dyn VirtioInterrupt>>, 309 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 310 311 // virtio queues 312 queues: Vec<Queue>, 313 queue_evts: Vec<EventFd>, 314 315 // Guest memory 316 memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 317 318 // Settings PCI BAR 319 settings_bar: u8, 320 settings_bar_addr: Option<GuestAddress>, 321 322 // Whether to use 64-bit bar location or 32-bit 323 use_64bit_bar: bool, 324 325 // Add a dedicated structure to hold information about the very specific 326 // virtio-pci capability VIRTIO_PCI_CAP_PCI_CFG. This is needed to support 327 // the legacy/backward compatible mechanism of letting the guest access the 328 // other virtio capabilities without mapping the PCI BARs. This can be 329 // needed when the guest tries to early access the virtio configuration of 330 // a device. 331 cap_pci_cfg_info: VirtioPciCfgCapInfo, 332 333 // Details of bar regions to free 334 bar_regions: Vec<(GuestAddress, GuestUsize, PciBarRegionType)>, 335 336 // EventFd to signal on to request activation 337 activate_evt: EventFd, 338 339 // Barrier that is used to wait on for activation 340 activate_barrier: Arc<Barrier>, 341 } 342 343 impl VirtioPciDevice { 344 /// Constructs a new PCI transport for the given virtio device. 345 #[allow(clippy::too_many_arguments)] 346 pub fn new( 347 id: String, 348 memory: GuestMemoryAtomic<GuestMemoryMmap>, 349 device: Arc<Mutex<dyn VirtioDevice>>, 350 msix_num: u16, 351 iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>>, 352 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 353 pci_device_bdf: u32, 354 activate_evt: EventFd, 355 ) -> Result<Self> { 356 let device_clone = device.clone(); 357 let locked_device = device_clone.lock().unwrap(); 358 let mut queue_evts = Vec::new(); 359 for _ in locked_device.queue_max_sizes().iter() { 360 queue_evts.push(EventFd::new(EFD_NONBLOCK)?) 361 } 362 let queues = locked_device 363 .queue_max_sizes() 364 .iter() 365 .map(|&s| { 366 let mut queue = Queue::new(s); 367 queue.iommu_mapping_cb = iommu_mapping_cb.clone(); 368 queue 369 }) 370 .collect(); 371 372 let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + locked_device.device_type() as u16; 373 374 let interrupt_source_group = interrupt_manager.create_group(MsiIrqGroupConfig { 375 base: 0, 376 count: msix_num as InterruptIndex, 377 })?; 378 379 let (msix_config, msix_config_clone) = if msix_num > 0 { 380 let msix_config = Arc::new(Mutex::new(MsixConfig::new( 381 msix_num, 382 interrupt_source_group.clone(), 383 pci_device_bdf, 384 ))); 385 let msix_config_clone = msix_config.clone(); 386 (Some(msix_config), Some(msix_config_clone)) 387 } else { 388 (None, None) 389 }; 390 391 // All device types *except* virtio block devices should be allocated a 64-bit bar 392 // The block devices should be given a 32-bit BAR so that they are easily accessible 393 // to firmware without requiring excessive identity mapping. 394 let mut use_64bit_bar = true; 395 let (class, subclass) = match VirtioDeviceType::from(locked_device.device_type()) { 396 VirtioDeviceType::Net => ( 397 PciClassCode::NetworkController, 398 &PciNetworkControllerSubclass::EthernetController as &dyn PciSubclass, 399 ), 400 VirtioDeviceType::Block => { 401 use_64bit_bar = false; 402 ( 403 PciClassCode::MassStorage, 404 &PciMassStorageSubclass::MassStorage as &dyn PciSubclass, 405 ) 406 } 407 _ => ( 408 PciClassCode::Other, 409 &PciVirtioSubclass::NonTransitionalBase as &dyn PciSubclass, 410 ), 411 }; 412 413 let configuration = PciConfiguration::new( 414 VIRTIO_PCI_VENDOR_ID, 415 pci_device_id, 416 0x1, // For modern virtio-PCI devices 417 class, 418 subclass, 419 None, 420 PciHeaderType::Device, 421 VIRTIO_PCI_VENDOR_ID, 422 pci_device_id, 423 msix_config_clone, 424 ); 425 426 let mut virtio_pci_device = VirtioPciDevice { 427 id, 428 configuration, 429 common_config: VirtioPciCommonConfig { 430 driver_status: 0, 431 config_generation: 0, 432 device_feature_select: 0, 433 driver_feature_select: 0, 434 queue_select: 0, 435 msix_config: Arc::new(AtomicU16::new(VIRTIO_MSI_NO_VECTOR)), 436 }, 437 msix_config, 438 msix_num, 439 device, 440 device_activated: Arc::new(AtomicBool::new(false)), 441 interrupt_status: Arc::new(AtomicUsize::new(0)), 442 virtio_interrupt: None, 443 queues, 444 queue_evts, 445 memory: Some(memory), 446 settings_bar: 0, 447 settings_bar_addr: None, 448 use_64bit_bar, 449 interrupt_source_group, 450 cap_pci_cfg_info: VirtioPciCfgCapInfo::default(), 451 bar_regions: vec![], 452 activate_evt, 453 activate_barrier: Arc::new(Barrier::new(2)), 454 }; 455 456 if let Some(msix_config) = &virtio_pci_device.msix_config { 457 virtio_pci_device.virtio_interrupt = Some(Arc::new(VirtioInterruptMsix::new( 458 msix_config.clone(), 459 virtio_pci_device.common_config.msix_config.clone(), 460 virtio_pci_device.interrupt_source_group.clone(), 461 ))); 462 } 463 464 Ok(virtio_pci_device) 465 } 466 467 fn state(&self) -> VirtioPciDeviceState { 468 VirtioPciDeviceState { 469 device_activated: self.device_activated.load(Ordering::Acquire), 470 interrupt_status: self.interrupt_status.load(Ordering::Acquire), 471 queues: self 472 .queues 473 .iter() 474 .map(|q| QueueState { 475 max_size: q.max_size, 476 size: q.size, 477 ready: q.ready, 478 vector: q.vector, 479 desc_table: q.desc_table.0, 480 avail_ring: q.avail_ring.0, 481 used_ring: q.used_ring.0, 482 }) 483 .collect(), 484 } 485 } 486 487 fn set_state(&mut self, state: &VirtioPciDeviceState) -> std::result::Result<(), Error> { 488 self.device_activated 489 .store(state.device_activated, Ordering::Release); 490 self.interrupt_status 491 .store(state.interrupt_status, Ordering::Release); 492 493 // Update virtqueues indexes for both available and used rings. 494 if let Some(mem) = self.memory.as_ref() { 495 let mem = mem.memory(); 496 for (i, queue) in self.queues.iter_mut().enumerate() { 497 queue.max_size = state.queues[i].max_size; 498 queue.size = state.queues[i].size; 499 queue.ready = state.queues[i].ready; 500 queue.vector = state.queues[i].vector; 501 queue.desc_table = GuestAddress(state.queues[i].desc_table); 502 queue.avail_ring = GuestAddress(state.queues[i].avail_ring); 503 queue.used_ring = GuestAddress(state.queues[i].used_ring); 504 queue.next_avail = Wrapping( 505 queue 506 .used_index_from_memory(&mem) 507 .map_err(Error::QueueRingIndex)?, 508 ); 509 queue.next_used = Wrapping( 510 queue 511 .used_index_from_memory(&mem) 512 .map_err(Error::QueueRingIndex)?, 513 ); 514 } 515 } 516 517 Ok(()) 518 } 519 520 /// Gets the list of queue events that must be triggered whenever the VM writes to 521 /// `virtio::NOTIFY_REG_OFFSET` past the MMIO base. Each event must be triggered when the 522 /// value being written equals the index of the event in this list. 523 fn queue_evts(&self) -> &[EventFd] { 524 self.queue_evts.as_slice() 525 } 526 527 fn is_driver_ready(&self) -> bool { 528 let ready_bits = 529 (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK) as u8; 530 self.common_config.driver_status == ready_bits 531 && self.common_config.driver_status & DEVICE_FAILED as u8 == 0 532 } 533 534 /// Determines if the driver has requested the device (re)init / reset itself 535 fn is_driver_init(&self) -> bool { 536 self.common_config.driver_status == DEVICE_INIT as u8 537 } 538 539 // This function is used by the caller to provide the expected base address 540 // for the virtio-pci configuration BAR. 541 pub fn set_config_bar_addr(&mut self, bar_addr: u64) { 542 self.settings_bar_addr = Some(GuestAddress(bar_addr)); 543 } 544 545 pub fn config_bar_addr(&self) -> u64 { 546 self.configuration.get_bar_addr(self.settings_bar as usize) 547 } 548 549 fn add_pci_capabilities( 550 &mut self, 551 settings_bar: u8, 552 ) -> std::result::Result<(), PciDeviceError> { 553 // Add pointers to the different configuration structures from the PCI capabilities. 554 let common_cap = VirtioPciCap::new( 555 PciCapabilityType::CommonConfig, 556 settings_bar, 557 COMMON_CONFIG_BAR_OFFSET as u32, 558 COMMON_CONFIG_SIZE as u32, 559 ); 560 self.configuration 561 .add_capability(&common_cap) 562 .map_err(PciDeviceError::CapabilitiesSetup)?; 563 564 let isr_cap = VirtioPciCap::new( 565 PciCapabilityType::IsrConfig, 566 settings_bar, 567 ISR_CONFIG_BAR_OFFSET as u32, 568 ISR_CONFIG_SIZE as u32, 569 ); 570 self.configuration 571 .add_capability(&isr_cap) 572 .map_err(PciDeviceError::CapabilitiesSetup)?; 573 574 // TODO(dgreid) - set based on device's configuration size? 575 let device_cap = VirtioPciCap::new( 576 PciCapabilityType::DeviceConfig, 577 settings_bar, 578 DEVICE_CONFIG_BAR_OFFSET as u32, 579 DEVICE_CONFIG_SIZE as u32, 580 ); 581 self.configuration 582 .add_capability(&device_cap) 583 .map_err(PciDeviceError::CapabilitiesSetup)?; 584 585 let notify_cap = VirtioPciNotifyCap::new( 586 PciCapabilityType::NotifyConfig, 587 settings_bar, 588 NOTIFICATION_BAR_OFFSET as u32, 589 NOTIFICATION_SIZE as u32, 590 Le32::from(NOTIFY_OFF_MULTIPLIER), 591 ); 592 self.configuration 593 .add_capability(¬ify_cap) 594 .map_err(PciDeviceError::CapabilitiesSetup)?; 595 596 let configuration_cap = VirtioPciCfgCap::new(); 597 self.cap_pci_cfg_info.offset = self 598 .configuration 599 .add_capability(&configuration_cap) 600 .map_err(PciDeviceError::CapabilitiesSetup)? 601 + VIRTIO_PCI_CAP_OFFSET; 602 self.cap_pci_cfg_info.cap = configuration_cap; 603 604 if self.msix_config.is_some() { 605 let msix_cap = MsixCap::new( 606 settings_bar, 607 self.msix_num, 608 MSIX_TABLE_BAR_OFFSET as u32, 609 settings_bar, 610 MSIX_PBA_BAR_OFFSET as u32, 611 ); 612 self.configuration 613 .add_capability(&msix_cap) 614 .map_err(PciDeviceError::CapabilitiesSetup)?; 615 } 616 617 self.settings_bar = settings_bar; 618 Ok(()) 619 } 620 621 fn read_cap_pci_cfg(&mut self, offset: usize, mut data: &mut [u8]) { 622 let cap_slice = self.cap_pci_cfg_info.cap.as_slice(); 623 let data_len = data.len(); 624 let cap_len = cap_slice.len(); 625 if offset + data_len > cap_len { 626 error!("Failed to read cap_pci_cfg from config space"); 627 return; 628 } 629 630 if offset < std::mem::size_of::<VirtioPciCap>() { 631 if let Some(end) = offset.checked_add(data_len) { 632 // This write can't fail, offset and end are checked against config_len. 633 data.write_all(&cap_slice[offset..cmp::min(end, cap_len)]) 634 .unwrap(); 635 } 636 } else { 637 // Safe since we know self.cap_pci_cfg_info.cap.cap.offset is 32bits long. 638 let bar_offset: u32 = 639 unsafe { std::mem::transmute(self.cap_pci_cfg_info.cap.cap.offset) }; 640 self.read_bar(0, bar_offset as u64, data) 641 } 642 } 643 644 fn write_cap_pci_cfg(&mut self, offset: usize, data: &[u8]) -> Option<Arc<Barrier>> { 645 let cap_slice = self.cap_pci_cfg_info.cap.as_mut_slice(); 646 let data_len = data.len(); 647 let cap_len = cap_slice.len(); 648 if offset + data_len > cap_len { 649 error!("Failed to write cap_pci_cfg to config space"); 650 return None; 651 } 652 653 if offset < std::mem::size_of::<VirtioPciCap>() { 654 let (_, right) = cap_slice.split_at_mut(offset); 655 right[..data_len].copy_from_slice(data); 656 None 657 } else { 658 // Safe since we know self.cap_pci_cfg_info.cap.cap.offset is 32bits long. 659 let bar_offset: u32 = 660 unsafe { std::mem::transmute(self.cap_pci_cfg_info.cap.cap.offset) }; 661 self.write_bar(0, bar_offset as u64, data) 662 } 663 } 664 665 pub fn virtio_device(&self) -> Arc<Mutex<dyn VirtioDevice>> { 666 self.device.clone() 667 } 668 669 fn activate(&mut self) -> ActivateResult { 670 if let Some(virtio_interrupt) = self.virtio_interrupt.take() { 671 if self.memory.is_some() { 672 let mem = self.memory.as_ref().unwrap().clone(); 673 let mut device = self.device.lock().unwrap(); 674 let mut queue_evts = Vec::new(); 675 let mut queues = self.queues.clone(); 676 queues.retain(|q| q.ready); 677 for (i, queue) in queues.iter().enumerate() { 678 queue_evts.push(self.queue_evts[i].try_clone().unwrap()); 679 if !queue.is_valid(&mem.memory()) { 680 error!("Queue {} is not valid", i); 681 } 682 } 683 return device.activate(mem, virtio_interrupt, queues, queue_evts); 684 } 685 } 686 Ok(()) 687 } 688 689 pub fn maybe_activate(&mut self) { 690 if self.needs_activation() { 691 self.activate().expect("Failed to activate device"); 692 self.device_activated.store(true, Ordering::SeqCst); 693 info!("{}: Waiting for barrier", self.id); 694 self.activate_barrier.wait(); 695 info!("{}: Barrier released", self.id); 696 } else { 697 info!("{}: Device does not need activation", self.id) 698 } 699 } 700 701 fn needs_activation(&self) -> bool { 702 !self.device_activated.load(Ordering::SeqCst) && self.is_driver_ready() 703 } 704 } 705 706 impl VirtioTransport for VirtioPciDevice { 707 fn ioeventfds(&self, base_addr: u64) -> Vec<(&EventFd, u64)> { 708 let notify_base = base_addr + NOTIFICATION_BAR_OFFSET; 709 self.queue_evts() 710 .iter() 711 .enumerate() 712 .map(|(i, event)| { 713 ( 714 event, 715 notify_base + i as u64 * u64::from(NOTIFY_OFF_MULTIPLIER), 716 ) 717 }) 718 .collect() 719 } 720 } 721 722 pub struct VirtioInterruptMsix { 723 msix_config: Arc<Mutex<MsixConfig>>, 724 config_vector: Arc<AtomicU16>, 725 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 726 } 727 728 impl VirtioInterruptMsix { 729 pub fn new( 730 msix_config: Arc<Mutex<MsixConfig>>, 731 config_vector: Arc<AtomicU16>, 732 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 733 ) -> Self { 734 VirtioInterruptMsix { 735 msix_config, 736 config_vector, 737 interrupt_source_group, 738 } 739 } 740 } 741 742 impl VirtioInterrupt for VirtioInterruptMsix { 743 fn trigger( 744 &self, 745 int_type: &VirtioInterruptType, 746 queue: Option<&Queue>, 747 ) -> std::result::Result<(), std::io::Error> { 748 let vector = match int_type { 749 VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), 750 VirtioInterruptType::Queue => { 751 if let Some(q) = queue { 752 q.vector 753 } else { 754 0 755 } 756 } 757 }; 758 759 if vector == VIRTIO_MSI_NO_VECTOR { 760 return Ok(()); 761 } 762 763 let config = &mut self.msix_config.lock().unwrap(); 764 let entry = &config.table_entries[vector as usize]; 765 // In case the vector control register associated with the entry 766 // has its first bit set, this means the vector is masked and the 767 // device should not inject the interrupt. 768 // Instead, the Pending Bit Array table is updated to reflect there 769 // is a pending interrupt for this specific vector. 770 if config.masked() || entry.masked() { 771 config.set_pba_bit(vector, false); 772 return Ok(()); 773 } 774 775 self.interrupt_source_group 776 .trigger(vector as InterruptIndex) 777 } 778 779 fn notifier(&self, int_type: &VirtioInterruptType, queue: Option<&Queue>) -> Option<EventFd> { 780 let vector = match int_type { 781 VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), 782 VirtioInterruptType::Queue => { 783 if let Some(q) = queue { 784 q.vector 785 } else { 786 0 787 } 788 } 789 }; 790 791 self.interrupt_source_group 792 .notifier(vector as InterruptIndex) 793 } 794 } 795 796 impl PciDevice for VirtioPciDevice { 797 fn write_config_register( 798 &mut self, 799 reg_idx: usize, 800 offset: u64, 801 data: &[u8], 802 ) -> Option<Arc<Barrier>> { 803 // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG 804 // is accessed. This capability has a special meaning as it allows the 805 // guest to access other capabilities without mapping the PCI BAR. 806 let base = reg_idx * 4; 807 if base + offset as usize >= self.cap_pci_cfg_info.offset 808 && base + offset as usize + data.len() 809 <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len() 810 { 811 let offset = base + offset as usize - self.cap_pci_cfg_info.offset; 812 self.write_cap_pci_cfg(offset, data) 813 } else { 814 self.configuration 815 .write_config_register(reg_idx, offset, data); 816 None 817 } 818 } 819 820 fn read_config_register(&mut self, reg_idx: usize) -> u32 { 821 // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG 822 // is accessed. This capability has a special meaning as it allows the 823 // guest to access other capabilities without mapping the PCI BAR. 824 let base = reg_idx * 4; 825 if base >= self.cap_pci_cfg_info.offset 826 && base + 4 <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len() 827 { 828 let offset = base - self.cap_pci_cfg_info.offset; 829 let mut data = [0u8; 4]; 830 self.read_cap_pci_cfg(offset, &mut data); 831 u32::from_le_bytes(data) 832 } else { 833 self.configuration.read_reg(reg_idx) 834 } 835 } 836 837 fn detect_bar_reprogramming( 838 &mut self, 839 reg_idx: usize, 840 data: &[u8], 841 ) -> Option<BarReprogrammingParams> { 842 self.configuration.detect_bar_reprogramming(reg_idx, data) 843 } 844 845 fn allocate_bars( 846 &mut self, 847 allocator: &mut SystemAllocator, 848 ) -> std::result::Result<Vec<(GuestAddress, GuestUsize, PciBarRegionType)>, PciDeviceError> 849 { 850 let mut ranges = Vec::new(); 851 let device_clone = self.device.clone(); 852 let device = device_clone.lock().unwrap(); 853 854 // Allocate the virtio-pci capability BAR. 855 // See http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-740004 856 let (virtio_pci_bar_addr, region_type) = if self.use_64bit_bar { 857 let region_type = PciBarRegionType::Memory64BitRegion; 858 let addr = allocator 859 .allocate_mmio_addresses( 860 self.settings_bar_addr, 861 CAPABILITY_BAR_SIZE, 862 Some(CAPABILITY_BAR_SIZE), 863 ) 864 .ok_or(PciDeviceError::IoAllocationFailed(CAPABILITY_BAR_SIZE))?; 865 ranges.push((addr, CAPABILITY_BAR_SIZE, region_type)); 866 (addr, region_type) 867 } else { 868 let region_type = PciBarRegionType::Memory32BitRegion; 869 let addr = allocator 870 .allocate_mmio_hole_addresses( 871 self.settings_bar_addr, 872 CAPABILITY_BAR_SIZE, 873 Some(CAPABILITY_BAR_SIZE), 874 ) 875 .ok_or(PciDeviceError::IoAllocationFailed(CAPABILITY_BAR_SIZE))?; 876 ranges.push((addr, CAPABILITY_BAR_SIZE, region_type)); 877 (addr, region_type) 878 }; 879 self.bar_regions 880 .push((virtio_pci_bar_addr, CAPABILITY_BAR_SIZE, region_type)); 881 882 let config = PciBarConfiguration::default() 883 .set_register_index(0) 884 .set_address(virtio_pci_bar_addr.raw_value()) 885 .set_size(CAPABILITY_BAR_SIZE) 886 .set_region_type(region_type); 887 let virtio_pci_bar = 888 self.configuration.add_pci_bar(&config).map_err(|e| { 889 PciDeviceError::IoRegistrationFailed(virtio_pci_bar_addr.raw_value(), e) 890 })? as u8; 891 892 // Once the BARs are allocated, the capabilities can be added to the PCI configuration. 893 self.add_pci_capabilities(virtio_pci_bar)?; 894 895 // Allocate a dedicated BAR if there are some shared memory regions. 896 if let Some(shm_list) = device.get_shm_regions() { 897 let config = PciBarConfiguration::default() 898 .set_register_index(2) 899 .set_address(shm_list.addr.raw_value()) 900 .set_size(shm_list.len); 901 let virtio_pci_shm_bar = 902 self.configuration.add_pci_bar(&config).map_err(|e| { 903 PciDeviceError::IoRegistrationFailed(shm_list.addr.raw_value(), e) 904 })? as u8; 905 906 let region_type = PciBarRegionType::Memory64BitRegion; 907 ranges.push((shm_list.addr, shm_list.len, region_type)); 908 self.bar_regions 909 .push((shm_list.addr, shm_list.len, region_type)); 910 911 for (idx, shm) in shm_list.region_list.iter().enumerate() { 912 let shm_cap = VirtioPciCap64::new( 913 PciCapabilityType::SharedMemoryConfig, 914 virtio_pci_shm_bar, 915 idx as u8, 916 shm.offset, 917 shm.len, 918 ); 919 self.configuration 920 .add_capability(&shm_cap) 921 .map_err(PciDeviceError::CapabilitiesSetup)?; 922 } 923 } 924 925 Ok(ranges) 926 } 927 928 fn free_bars( 929 &mut self, 930 allocator: &mut SystemAllocator, 931 ) -> std::result::Result<(), PciDeviceError> { 932 for (addr, length, type_) in self.bar_regions.drain(..) { 933 match type_ { 934 PciBarRegionType::Memory32BitRegion => { 935 allocator.free_mmio_hole_addresses(addr, length); 936 } 937 PciBarRegionType::Memory64BitRegion => { 938 allocator.free_mmio_addresses(addr, length); 939 } 940 _ => error!("Unexpected PCI bar type"), 941 } 942 } 943 Ok(()) 944 } 945 946 fn move_bar(&mut self, old_base: u64, new_base: u64) -> result::Result<(), std::io::Error> { 947 // We only update our idea of the bar in order to support free_bars() above. 948 // The majority of the reallocation is done inside DeviceManager. 949 for (addr, _, _) in self.bar_regions.iter_mut() { 950 if (*addr).0 == old_base { 951 *addr = GuestAddress(new_base); 952 } 953 } 954 955 Ok(()) 956 } 957 958 fn read_bar(&mut self, _base: u64, offset: u64, data: &mut [u8]) { 959 match offset { 960 o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => self.common_config.read( 961 o - COMMON_CONFIG_BAR_OFFSET, 962 data, 963 &mut self.queues, 964 self.device.clone(), 965 ), 966 o if ISR_CONFIG_BAR_OFFSET <= o && o < ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE => { 967 if let Some(v) = data.get_mut(0) { 968 // Reading this register resets it to 0. 969 *v = self.interrupt_status.swap(0, Ordering::AcqRel) as u8; 970 } 971 } 972 o if DEVICE_CONFIG_BAR_OFFSET <= o 973 && o < DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE => 974 { 975 let device = self.device.lock().unwrap(); 976 device.read_config(o - DEVICE_CONFIG_BAR_OFFSET, data); 977 } 978 o if NOTIFICATION_BAR_OFFSET <= o 979 && o < NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE => 980 { 981 // Handled with ioeventfds. 982 } 983 o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => { 984 if let Some(msix_config) = &self.msix_config { 985 msix_config 986 .lock() 987 .unwrap() 988 .read_table(o - MSIX_TABLE_BAR_OFFSET, data); 989 } 990 } 991 o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => { 992 if let Some(msix_config) = &self.msix_config { 993 msix_config 994 .lock() 995 .unwrap() 996 .read_pba(o - MSIX_PBA_BAR_OFFSET, data); 997 } 998 } 999 _ => (), 1000 } 1001 } 1002 1003 fn write_bar(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 1004 match offset { 1005 o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => self.common_config.write( 1006 o - COMMON_CONFIG_BAR_OFFSET, 1007 data, 1008 &mut self.queues, 1009 self.device.clone(), 1010 ), 1011 o if ISR_CONFIG_BAR_OFFSET <= o && o < ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE => { 1012 if let Some(v) = data.get(0) { 1013 self.interrupt_status 1014 .fetch_and(!(*v as usize), Ordering::AcqRel); 1015 } 1016 } 1017 o if DEVICE_CONFIG_BAR_OFFSET <= o 1018 && o < DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE => 1019 { 1020 let mut device = self.device.lock().unwrap(); 1021 device.write_config(o - DEVICE_CONFIG_BAR_OFFSET, data); 1022 } 1023 o if NOTIFICATION_BAR_OFFSET <= o 1024 && o < NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE => 1025 { 1026 // Handled with ioeventfds. 1027 } 1028 o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => { 1029 if let Some(msix_config) = &self.msix_config { 1030 msix_config 1031 .lock() 1032 .unwrap() 1033 .write_table(o - MSIX_TABLE_BAR_OFFSET, data); 1034 } 1035 } 1036 o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => { 1037 if let Some(msix_config) = &self.msix_config { 1038 msix_config 1039 .lock() 1040 .unwrap() 1041 .write_pba(o - MSIX_PBA_BAR_OFFSET, data); 1042 } 1043 } 1044 _ => (), 1045 }; 1046 1047 // Try and activate the device if the driver status has changed 1048 if self.needs_activation() { 1049 info!( 1050 "{}: Needs activation; writing to activate event fd", 1051 self.id 1052 ); 1053 self.activate_evt.write(1).ok(); 1054 info!("{}: Needs activation; returning barrier", self.id); 1055 return Some(self.activate_barrier.clone()); 1056 } 1057 1058 // Device has been reset by the driver 1059 if self.device_activated.load(Ordering::SeqCst) && self.is_driver_init() { 1060 let mut device = self.device.lock().unwrap(); 1061 if let Some(virtio_interrupt) = device.reset() { 1062 // Upon reset the device returns its interrupt EventFD 1063 self.virtio_interrupt = Some(virtio_interrupt); 1064 self.device_activated.store(false, Ordering::SeqCst); 1065 1066 // Reset queue readiness (changes queue_enable), queue sizes 1067 // and selected_queue as per spec for reset 1068 self.queues.iter_mut().for_each(Queue::reset); 1069 self.common_config.queue_select = 0; 1070 } else { 1071 error!("Attempt to reset device when not implemented in underlying device"); 1072 self.common_config.driver_status = crate::DEVICE_FAILED as u8; 1073 } 1074 } 1075 1076 None 1077 } 1078 1079 fn as_any(&mut self) -> &mut dyn Any { 1080 self 1081 } 1082 } 1083 1084 impl BusDevice for VirtioPciDevice { 1085 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 1086 self.read_bar(base, offset, data) 1087 } 1088 1089 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 1090 self.write_bar(base, offset, data) 1091 } 1092 } 1093 1094 impl Pausable for VirtioPciDevice { 1095 fn pause(&mut self) -> result::Result<(), MigratableError> { 1096 Ok(()) 1097 } 1098 1099 fn resume(&mut self) -> result::Result<(), MigratableError> { 1100 Ok(()) 1101 } 1102 } 1103 1104 impl Snapshottable for VirtioPciDevice { 1105 fn id(&self) -> String { 1106 self.id.clone() 1107 } 1108 1109 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1110 let mut virtio_pci_dev_snapshot = 1111 Snapshot::new_from_versioned_state(&self.id, &self.state())?; 1112 1113 // Snapshot PciConfiguration 1114 virtio_pci_dev_snapshot.add_snapshot(self.configuration.snapshot()?); 1115 1116 // Snapshot VirtioPciCommonConfig 1117 virtio_pci_dev_snapshot.add_snapshot(self.common_config.snapshot()?); 1118 1119 // Snapshot MSI-X 1120 if let Some(msix_config) = &self.msix_config { 1121 virtio_pci_dev_snapshot.add_snapshot(msix_config.lock().unwrap().snapshot()?); 1122 } 1123 1124 Ok(virtio_pci_dev_snapshot) 1125 } 1126 1127 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 1128 if let Some(virtio_pci_dev_section) = 1129 snapshot.snapshot_data.get(&format!("{}-section", self.id)) 1130 { 1131 // Restore MSI-X 1132 if let Some(msix_config) = &self.msix_config { 1133 let id = msix_config.lock().unwrap().id(); 1134 if let Some(msix_snapshot) = snapshot.snapshots.get(&id) { 1135 msix_config 1136 .lock() 1137 .unwrap() 1138 .restore(*msix_snapshot.clone())?; 1139 } 1140 } 1141 1142 // Restore VirtioPciCommonConfig 1143 if let Some(virtio_config_snapshot) = snapshot.snapshots.get(&self.common_config.id()) { 1144 self.common_config 1145 .restore(*virtio_config_snapshot.clone())?; 1146 } 1147 1148 // Restore PciConfiguration 1149 if let Some(pci_config_snapshot) = snapshot.snapshots.get(&self.configuration.id()) { 1150 self.configuration.restore(*pci_config_snapshot.clone())?; 1151 } 1152 1153 // First restore the status of the virtqueues. 1154 self.set_state(&virtio_pci_dev_section.to_versioned_state()?) 1155 .map_err(|e| { 1156 MigratableError::Restore(anyhow!( 1157 "Could not restore VIRTIO_PCI_DEVICE state {:?}", 1158 e 1159 )) 1160 })?; 1161 1162 // Then we can activate the device, as we know at this point that 1163 // the virtqueues are in the right state and the device is ready 1164 // to be activated, which will spawn each virtio worker thread. 1165 if self.device_activated.load(Ordering::SeqCst) && self.is_driver_ready() { 1166 self.activate().map_err(|e| { 1167 MigratableError::Restore(anyhow!("Failed activating the device: {:?}", e)) 1168 })?; 1169 } 1170 1171 return Ok(()); 1172 } 1173 1174 Err(MigratableError::Restore(anyhow!( 1175 "Could not find VIRTIO_PCI_DEVICE snapshot section" 1176 ))) 1177 } 1178 } 1179 impl Transportable for VirtioPciDevice {} 1180 impl Migratable for VirtioPciDevice {} 1181