1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 6 use crate::{PciCapability, PciCapabilityId}; 7 use anyhow::anyhow; 8 use byteorder::{ByteOrder, LittleEndian}; 9 use std::io; 10 use std::result; 11 use std::sync::Arc; 12 use versionize::{VersionMap, Versionize, VersionizeResult}; 13 use versionize_derive::Versionize; 14 use vm_device::interrupt::{ 15 InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, 16 }; 17 use vm_memory::ByteValued; 18 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, VersionMapped}; 19 20 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 21 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 22 const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 23 const BITS_PER_PBA_ENTRY: usize = 64; 24 const FUNCTION_MASK_BIT: u8 = 14; 25 const MSIX_ENABLE_BIT: u8 = 15; 26 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16; 27 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16; 28 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16; 29 pub const MSIX_CONFIG_ID: &str = "msix_config"; 30 31 #[derive(Debug)] 32 pub enum Error { 33 /// Failed enabling the interrupt route. 34 EnableInterruptRoute(io::Error), 35 /// Failed updating the interrupt route. 36 UpdateInterruptRoute(io::Error), 37 } 38 39 #[derive(Debug, Clone, Versionize, Eq, PartialEq)] 40 pub struct MsixTableEntry { 41 pub msg_addr_lo: u32, 42 pub msg_addr_hi: u32, 43 pub msg_data: u32, 44 pub vector_ctl: u32, 45 } 46 47 impl MsixTableEntry { 48 pub fn masked(&self) -> bool { 49 self.vector_ctl & 0x1 == 0x1 50 } 51 } 52 53 impl Default for MsixTableEntry { 54 fn default() -> Self { 55 MsixTableEntry { 56 msg_addr_lo: 0, 57 msg_addr_hi: 0, 58 msg_data: 0, 59 vector_ctl: 0x1, 60 } 61 } 62 } 63 64 #[derive(Versionize)] 65 pub struct MsixConfigState { 66 table_entries: Vec<MsixTableEntry>, 67 pba_entries: Vec<u64>, 68 masked: bool, 69 enabled: bool, 70 } 71 72 impl VersionMapped for MsixConfigState {} 73 74 pub struct MsixConfig { 75 pub table_entries: Vec<MsixTableEntry>, 76 pub pba_entries: Vec<u64>, 77 pub devid: u32, 78 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 79 masked: bool, 80 enabled: bool, 81 } 82 83 impl MsixConfig { 84 pub fn new( 85 msix_vectors: u16, 86 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 87 devid: u32, 88 state: Option<MsixConfigState>, 89 ) -> result::Result<Self, Error> { 90 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 91 92 let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state { 93 if state.enabled && !state.masked { 94 for (idx, table_entry) in state.table_entries.iter().enumerate() { 95 if table_entry.masked() { 96 continue; 97 } 98 99 let config = MsiIrqSourceConfig { 100 high_addr: table_entry.msg_addr_hi, 101 low_addr: table_entry.msg_addr_lo, 102 data: table_entry.msg_data, 103 devid, 104 }; 105 106 interrupt_source_group 107 .update( 108 idx as InterruptIndex, 109 InterruptSourceConfig::MsiIrq(config), 110 state.masked, 111 ) 112 .map_err(Error::UpdateInterruptRoute)?; 113 114 interrupt_source_group 115 .enable() 116 .map_err(Error::EnableInterruptRoute)?; 117 } 118 } 119 120 ( 121 state.table_entries, 122 state.pba_entries, 123 state.masked, 124 state.enabled, 125 ) 126 } else { 127 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 128 table_entries.resize_with(msix_vectors as usize, Default::default); 129 let mut pba_entries: Vec<u64> = Vec::new(); 130 let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1; 131 pba_entries.resize_with(num_pba_entries, Default::default); 132 133 (table_entries, pba_entries, true, false) 134 }; 135 136 Ok(MsixConfig { 137 table_entries, 138 pba_entries, 139 devid, 140 interrupt_source_group, 141 masked, 142 enabled, 143 }) 144 } 145 146 fn state(&self) -> MsixConfigState { 147 MsixConfigState { 148 table_entries: self.table_entries.clone(), 149 pba_entries: self.pba_entries.clone(), 150 masked: self.masked, 151 enabled: self.enabled, 152 } 153 } 154 155 fn set_state(&mut self, state: &MsixConfigState) -> result::Result<(), Error> { 156 self.table_entries = state.table_entries.clone(); 157 self.pba_entries = state.pba_entries.clone(); 158 self.masked = state.masked; 159 self.enabled = state.enabled; 160 161 if self.enabled && !self.masked { 162 for (idx, table_entry) in self.table_entries.iter().enumerate() { 163 if table_entry.masked() { 164 continue; 165 } 166 167 let config = MsiIrqSourceConfig { 168 high_addr: table_entry.msg_addr_hi, 169 low_addr: table_entry.msg_addr_lo, 170 data: table_entry.msg_data, 171 devid: self.devid, 172 }; 173 174 self.interrupt_source_group 175 .update( 176 idx as InterruptIndex, 177 InterruptSourceConfig::MsiIrq(config), 178 self.masked, 179 ) 180 .map_err(Error::UpdateInterruptRoute)?; 181 182 self.interrupt_source_group 183 .enable() 184 .map_err(Error::EnableInterruptRoute)?; 185 } 186 } 187 188 Ok(()) 189 } 190 191 pub fn masked(&self) -> bool { 192 self.masked 193 } 194 195 pub fn enabled(&self) -> bool { 196 self.enabled 197 } 198 199 pub fn set_msg_ctl(&mut self, reg: u16) { 200 let old_masked = self.masked; 201 let old_enabled = self.enabled; 202 203 self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16; 204 self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16; 205 206 // Update interrupt routing 207 if old_masked != self.masked || old_enabled != self.enabled { 208 if self.enabled && !self.masked { 209 debug!("MSI-X enabled for device 0x{:x}", self.devid); 210 for (idx, table_entry) in self.table_entries.iter().enumerate() { 211 let config = MsiIrqSourceConfig { 212 high_addr: table_entry.msg_addr_hi, 213 low_addr: table_entry.msg_addr_lo, 214 data: table_entry.msg_data, 215 devid: self.devid, 216 }; 217 218 if let Err(e) = self.interrupt_source_group.update( 219 idx as InterruptIndex, 220 InterruptSourceConfig::MsiIrq(config), 221 table_entry.masked(), 222 ) { 223 error!("Failed updating vector: {:?}", e); 224 } 225 } 226 } else if old_enabled || !old_masked { 227 debug!("MSI-X disabled for device 0x{:x}", self.devid); 228 if let Err(e) = self.interrupt_source_group.disable() { 229 error!("Failed disabling irq_fd: {:?}", e); 230 } 231 } 232 } 233 234 // If the Function Mask bit was set, and has just been cleared, it's 235 // important to go through the entire PBA to check if there was any 236 // pending MSI-X message to inject, given that the vector is not 237 // masked. 238 if old_masked && !self.masked { 239 for (index, entry) in self.table_entries.clone().iter().enumerate() { 240 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 241 self.inject_msix_and_clear_pba(index); 242 } 243 } 244 } 245 } 246 247 pub fn read_table(&self, offset: u64, data: &mut [u8]) { 248 assert!((data.len() == 4 || data.len() == 8)); 249 250 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 251 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 252 253 match data.len() { 254 4 => { 255 let value = match modulo_offset { 256 0x0 => self.table_entries[index].msg_addr_lo, 257 0x4 => self.table_entries[index].msg_addr_hi, 258 0x8 => self.table_entries[index].msg_data, 259 0xc => self.table_entries[index].vector_ctl, 260 _ => { 261 error!("invalid offset"); 262 0 263 } 264 }; 265 266 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 267 LittleEndian::write_u32(data, value); 268 } 269 8 => { 270 let value = match modulo_offset { 271 0x0 => { 272 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 273 | u64::from(self.table_entries[index].msg_addr_lo) 274 } 275 0x8 => { 276 (u64::from(self.table_entries[index].vector_ctl) << 32) 277 | u64::from(self.table_entries[index].msg_data) 278 } 279 _ => { 280 error!("invalid offset"); 281 0 282 } 283 }; 284 285 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 286 LittleEndian::write_u64(data, value); 287 } 288 _ => { 289 error!("invalid data length"); 290 } 291 } 292 } 293 294 pub fn write_table(&mut self, offset: u64, data: &[u8]) { 295 assert!((data.len() == 4 || data.len() == 8)); 296 297 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 298 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 299 300 // Store the value of the entry before modification 301 let old_entry = self.table_entries[index].clone(); 302 303 match data.len() { 304 4 => { 305 let value = LittleEndian::read_u32(data); 306 match modulo_offset { 307 0x0 => self.table_entries[index].msg_addr_lo = value, 308 0x4 => self.table_entries[index].msg_addr_hi = value, 309 0x8 => self.table_entries[index].msg_data = value, 310 0xc => { 311 self.table_entries[index].vector_ctl = value; 312 } 313 _ => error!("invalid offset"), 314 }; 315 316 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 317 } 318 8 => { 319 let value = LittleEndian::read_u64(data); 320 match modulo_offset { 321 0x0 => { 322 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 323 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 324 } 325 0x8 => { 326 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 327 self.table_entries[index].vector_ctl = (value >> 32) as u32; 328 } 329 _ => error!("invalid offset"), 330 }; 331 332 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 333 } 334 _ => error!("invalid data length"), 335 }; 336 337 let table_entry = &self.table_entries[index]; 338 339 // Optimisation to avoid excessive updates 340 if &old_entry == table_entry { 341 return; 342 } 343 344 // Update interrupt routes 345 // Optimisation: only update routes if the entry is not masked; 346 // this is safe because if the entry is masked (starts masked as per spec) 347 // in the table then it won't be triggered. (See: #4273) 348 if self.enabled && !self.masked && !table_entry.masked() { 349 let config = MsiIrqSourceConfig { 350 high_addr: table_entry.msg_addr_hi, 351 low_addr: table_entry.msg_addr_lo, 352 data: table_entry.msg_data, 353 devid: self.devid, 354 }; 355 356 if let Err(e) = self.interrupt_source_group.update( 357 index as InterruptIndex, 358 InterruptSourceConfig::MsiIrq(config), 359 table_entry.masked(), 360 ) { 361 error!("Failed updating vector: {:?}", e); 362 } 363 } 364 365 // After the MSI-X table entry has been updated, it is necessary to 366 // check if the vector control masking bit has changed. In case the 367 // bit has been flipped from 1 to 0, we need to inject a MSI message 368 // if the corresponding pending bit from the PBA is set. Once the MSI 369 // has been injected, the pending bit in the PBA needs to be cleared. 370 // All of this is valid only if MSI-X has not been masked for the whole 371 // device. 372 373 // Check if bit has been flipped 374 if !self.masked() 375 && self.enabled() 376 && old_entry.masked() 377 && !table_entry.masked() 378 && self.get_pba_bit(index as u16) == 1 379 { 380 self.inject_msix_and_clear_pba(index); 381 } 382 } 383 384 pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { 385 assert!((data.len() == 4 || data.len() == 8)); 386 387 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 388 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 389 390 match data.len() { 391 4 => { 392 let value: u32 = match modulo_offset { 393 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 394 0x4 => (self.pba_entries[index] >> 32) as u32, 395 _ => { 396 error!("invalid offset"); 397 0 398 } 399 }; 400 401 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 402 LittleEndian::write_u32(data, value); 403 } 404 8 => { 405 let value: u64 = match modulo_offset { 406 0x0 => self.pba_entries[index], 407 _ => { 408 error!("invalid offset"); 409 0 410 } 411 }; 412 413 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 414 LittleEndian::write_u64(data, value); 415 } 416 _ => { 417 error!("invalid data length"); 418 } 419 } 420 } 421 422 pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { 423 error!("Pending Bit Array is read only"); 424 } 425 426 pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { 427 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 428 429 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 430 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 431 let mut mask: u64 = (1 << shift) as u64; 432 433 if reset { 434 mask = !mask; 435 self.pba_entries[index] &= mask; 436 } else { 437 self.pba_entries[index] |= mask; 438 } 439 } 440 441 fn get_pba_bit(&self, vector: u16) -> u8 { 442 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 443 444 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 445 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 446 447 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 448 } 449 450 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 451 // Inject the MSI message 452 match self 453 .interrupt_source_group 454 .trigger(vector as InterruptIndex) 455 { 456 Ok(_) => debug!("MSI-X injected on vector control flip"), 457 Err(e) => error!("failed to inject MSI-X: {}", e), 458 } 459 460 // Clear the bit from PBA 461 self.set_pba_bit(vector as u16, true); 462 } 463 } 464 465 impl Pausable for MsixConfig {} 466 467 impl Snapshottable for MsixConfig { 468 fn id(&self) -> String { 469 String::from(MSIX_CONFIG_ID) 470 } 471 472 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 473 Snapshot::new_from_versioned_state(&self.id(), &self.state()) 474 } 475 476 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 477 self.set_state(&snapshot.to_versioned_state(&self.id())?) 478 .map_err(|e| { 479 MigratableError::Restore(anyhow!( 480 "Could not restore state for {}: {:?}", 481 self.id(), 482 e 483 )) 484 }) 485 } 486 } 487 488 #[allow(dead_code)] 489 #[repr(packed)] 490 #[derive(Clone, Copy, Default, Versionize)] 491 pub struct MsixCap { 492 // Message Control Register 493 // 10-0: MSI-X Table size 494 // 13-11: Reserved 495 // 14: Mask. Mask all MSI-X when set. 496 // 15: Enable. Enable all MSI-X when set. 497 pub msg_ctl: u16, 498 // Table. Contains the offset and the BAR indicator (BIR) 499 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 500 // 31-3: Table offset in the BAR pointed by the BIR. 501 pub table: u32, 502 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 503 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 504 // 31-3: PBA offset in the BAR pointed by the BIR. 505 pub pba: u32, 506 } 507 508 // SAFETY: All members are simple numbers and any value is valid. 509 unsafe impl ByteValued for MsixCap {} 510 511 impl PciCapability for MsixCap { 512 fn bytes(&self) -> &[u8] { 513 self.as_slice() 514 } 515 516 fn id(&self) -> PciCapabilityId { 517 PciCapabilityId::MsiX 518 } 519 } 520 521 impl MsixCap { 522 pub fn new( 523 table_pci_bar: u8, 524 table_size: u16, 525 table_off: u32, 526 pba_pci_bar: u8, 527 pba_off: u32, 528 ) -> Self { 529 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 530 531 // Set the table size and enable MSI-X. 532 let msg_ctl: u16 = 0x8000u16 + table_size - 1; 533 534 MsixCap { 535 msg_ctl, 536 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 537 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 538 } 539 } 540 541 pub fn set_msg_ctl(&mut self, data: u16) { 542 self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)) 543 | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)); 544 } 545 546 pub fn masked(&self) -> bool { 547 (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1 548 } 549 550 pub fn enabled(&self) -> bool { 551 (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1 552 } 553 554 pub fn table_offset(&self) -> u32 { 555 self.table & 0xffff_fff8 556 } 557 558 pub fn pba_offset(&self) -> u32 { 559 self.pba & 0xffff_fff8 560 } 561 562 pub fn table_bir(&self) -> u32 { 563 self.table & 0x7 564 } 565 566 pub fn pba_bir(&self) -> u32 { 567 self.pba & 0x7 568 } 569 570 pub fn table_size(&self) -> u16 { 571 (self.msg_ctl & 0x7ff) + 1 572 } 573 574 pub fn table_range(&self) -> (u64, u64) { 575 // The table takes 16 bytes per entry. 576 let size = self.table_size() as u64 * 16; 577 (self.table_offset() as u64, size) 578 } 579 580 pub fn pba_range(&self) -> (u64, u64) { 581 // The table takes 1 bit per entry modulo 8 bytes. 582 let size = ((self.table_size() as u64 / 64) + 1) * 8; 583 (self.pba_offset() as u64, size) 584 } 585 } 586