1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 6 use crate::{PciCapability, PciCapabilityId}; 7 use anyhow::anyhow; 8 use byteorder::{ByteOrder, LittleEndian}; 9 use std::io; 10 use std::result; 11 use std::sync::Arc; 12 use versionize::{VersionMap, Versionize, VersionizeResult}; 13 use versionize_derive::Versionize; 14 use vm_device::interrupt::{ 15 InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, 16 }; 17 use vm_memory::ByteValued; 18 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, VersionMapped}; 19 20 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 21 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 22 const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 23 const BITS_PER_PBA_ENTRY: usize = 64; 24 const FUNCTION_MASK_BIT: u8 = 14; 25 const MSIX_ENABLE_BIT: u8 = 15; 26 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16; 27 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16; 28 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16; 29 30 #[derive(Debug)] 31 enum Error { 32 /// Failed enabling the interrupt route. 33 EnableInterruptRoute(io::Error), 34 /// Failed updating the interrupt route. 35 UpdateInterruptRoute(io::Error), 36 } 37 38 #[derive(Debug, Clone, Versionize)] 39 pub struct MsixTableEntry { 40 pub msg_addr_lo: u32, 41 pub msg_addr_hi: u32, 42 pub msg_data: u32, 43 pub vector_ctl: u32, 44 } 45 46 impl MsixTableEntry { 47 pub fn masked(&self) -> bool { 48 self.vector_ctl & 0x1 == 0x1 49 } 50 } 51 52 impl Default for MsixTableEntry { 53 fn default() -> Self { 54 MsixTableEntry { 55 msg_addr_lo: 0, 56 msg_addr_hi: 0, 57 msg_data: 0, 58 vector_ctl: 0x1, 59 } 60 } 61 } 62 63 #[derive(Versionize)] 64 struct MsixConfigState { 65 table_entries: Vec<MsixTableEntry>, 66 pba_entries: Vec<u64>, 67 masked: bool, 68 enabled: bool, 69 } 70 71 impl VersionMapped for MsixConfigState {} 72 73 pub struct MsixConfig { 74 pub table_entries: Vec<MsixTableEntry>, 75 pub pba_entries: Vec<u64>, 76 pub devid: u32, 77 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 78 masked: bool, 79 enabled: bool, 80 } 81 82 impl MsixConfig { 83 pub fn new( 84 msix_vectors: u16, 85 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 86 devid: u32, 87 ) -> Self { 88 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 89 90 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 91 table_entries.resize_with(msix_vectors as usize, Default::default); 92 let mut pba_entries: Vec<u64> = Vec::new(); 93 let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1; 94 pba_entries.resize_with(num_pba_entries, Default::default); 95 96 MsixConfig { 97 table_entries, 98 pba_entries, 99 devid, 100 interrupt_source_group, 101 masked: true, 102 enabled: false, 103 } 104 } 105 106 fn state(&self) -> MsixConfigState { 107 MsixConfigState { 108 table_entries: self.table_entries.clone(), 109 pba_entries: self.pba_entries.clone(), 110 masked: self.masked, 111 enabled: self.enabled, 112 } 113 } 114 115 fn set_state(&mut self, state: &MsixConfigState) -> result::Result<(), Error> { 116 self.table_entries = state.table_entries.clone(); 117 self.pba_entries = state.pba_entries.clone(); 118 self.masked = state.masked; 119 self.enabled = state.enabled; 120 121 if self.enabled && !self.masked { 122 for (idx, table_entry) in self.table_entries.iter().enumerate() { 123 if table_entry.masked() { 124 continue; 125 } 126 127 let config = MsiIrqSourceConfig { 128 high_addr: table_entry.msg_addr_hi, 129 low_addr: table_entry.msg_addr_lo, 130 data: table_entry.msg_data, 131 devid: self.devid, 132 }; 133 134 self.interrupt_source_group 135 .update( 136 idx as InterruptIndex, 137 InterruptSourceConfig::MsiIrq(config), 138 self.masked, 139 ) 140 .map_err(Error::UpdateInterruptRoute)?; 141 142 self.interrupt_source_group 143 .enable() 144 .map_err(Error::EnableInterruptRoute)?; 145 } 146 } 147 148 Ok(()) 149 } 150 151 pub fn masked(&self) -> bool { 152 self.masked 153 } 154 155 pub fn enabled(&self) -> bool { 156 self.enabled 157 } 158 159 pub fn set_msg_ctl(&mut self, reg: u16) { 160 let old_masked = self.masked; 161 let old_enabled = self.enabled; 162 163 self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16; 164 self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16; 165 166 // Update interrupt routing 167 if old_masked != self.masked || old_enabled != self.enabled { 168 if self.enabled && !self.masked { 169 debug!("MSI-X enabled for device 0x{:x}", self.devid); 170 for (idx, table_entry) in self.table_entries.iter().enumerate() { 171 let config = MsiIrqSourceConfig { 172 high_addr: table_entry.msg_addr_hi, 173 low_addr: table_entry.msg_addr_lo, 174 data: table_entry.msg_data, 175 devid: self.devid, 176 }; 177 178 if let Err(e) = self.interrupt_source_group.update( 179 idx as InterruptIndex, 180 InterruptSourceConfig::MsiIrq(config), 181 table_entry.masked(), 182 ) { 183 error!("Failed updating vector: {:?}", e); 184 } 185 } 186 } else if old_enabled || !old_masked { 187 debug!("MSI-X disabled for device 0x{:x}", self.devid); 188 if let Err(e) = self.interrupt_source_group.disable() { 189 error!("Failed disabling irq_fd: {:?}", e); 190 } 191 } 192 } 193 194 // If the Function Mask bit was set, and has just been cleared, it's 195 // important to go through the entire PBA to check if there was any 196 // pending MSI-X message to inject, given that the vector is not 197 // masked. 198 if old_masked && !self.masked { 199 for (index, entry) in self.table_entries.clone().iter().enumerate() { 200 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 201 self.inject_msix_and_clear_pba(index); 202 } 203 } 204 } 205 } 206 207 pub fn read_table(&self, offset: u64, data: &mut [u8]) { 208 assert!((data.len() == 4 || data.len() == 8)); 209 210 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 211 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 212 213 match data.len() { 214 4 => { 215 let value = match modulo_offset { 216 0x0 => self.table_entries[index].msg_addr_lo, 217 0x4 => self.table_entries[index].msg_addr_hi, 218 0x8 => self.table_entries[index].msg_data, 219 0xc => self.table_entries[index].vector_ctl, 220 _ => { 221 error!("invalid offset"); 222 0 223 } 224 }; 225 226 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 227 LittleEndian::write_u32(data, value); 228 } 229 8 => { 230 let value = match modulo_offset { 231 0x0 => { 232 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 233 | u64::from(self.table_entries[index].msg_addr_lo) 234 } 235 0x8 => { 236 (u64::from(self.table_entries[index].vector_ctl) << 32) 237 | u64::from(self.table_entries[index].msg_data) 238 } 239 _ => { 240 error!("invalid offset"); 241 0 242 } 243 }; 244 245 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 246 LittleEndian::write_u64(data, value); 247 } 248 _ => { 249 error!("invalid data length"); 250 } 251 } 252 } 253 254 pub fn write_table(&mut self, offset: u64, data: &[u8]) { 255 assert!((data.len() == 4 || data.len() == 8)); 256 257 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 258 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 259 260 // Store the value of the entry before modification 261 let mut old_entry: Option<MsixTableEntry> = None; 262 263 match data.len() { 264 4 => { 265 let value = LittleEndian::read_u32(data); 266 match modulo_offset { 267 0x0 => self.table_entries[index].msg_addr_lo = value, 268 0x4 => self.table_entries[index].msg_addr_hi = value, 269 0x8 => self.table_entries[index].msg_data = value, 270 0xc => { 271 old_entry = Some(self.table_entries[index].clone()); 272 self.table_entries[index].vector_ctl = value; 273 } 274 _ => error!("invalid offset"), 275 }; 276 277 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 278 } 279 8 => { 280 let value = LittleEndian::read_u64(data); 281 match modulo_offset { 282 0x0 => { 283 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 284 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 285 } 286 0x8 => { 287 old_entry = Some(self.table_entries[index].clone()); 288 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 289 self.table_entries[index].vector_ctl = (value >> 32) as u32; 290 } 291 _ => error!("invalid offset"), 292 }; 293 294 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 295 } 296 _ => error!("invalid data length"), 297 }; 298 299 // Update interrupt routes 300 if self.enabled && !self.masked { 301 let table_entry = &self.table_entries[index]; 302 303 let config = MsiIrqSourceConfig { 304 high_addr: table_entry.msg_addr_hi, 305 low_addr: table_entry.msg_addr_lo, 306 data: table_entry.msg_data, 307 devid: self.devid, 308 }; 309 310 if let Err(e) = self.interrupt_source_group.update( 311 index as InterruptIndex, 312 InterruptSourceConfig::MsiIrq(config), 313 table_entry.masked(), 314 ) { 315 error!("Failed updating vector: {:?}", e); 316 } 317 } 318 319 // After the MSI-X table entry has been updated, it is necessary to 320 // check if the vector control masking bit has changed. In case the 321 // bit has been flipped from 1 to 0, we need to inject a MSI message 322 // if the corresponding pending bit from the PBA is set. Once the MSI 323 // has been injected, the pending bit in the PBA needs to be cleared. 324 // All of this is valid only if MSI-X has not been masked for the whole 325 // device. 326 if let Some(old_entry) = old_entry { 327 // Check if bit has been flipped 328 if !self.masked() 329 && old_entry.masked() 330 && !self.table_entries[index].masked() 331 && self.get_pba_bit(index as u16) == 1 332 { 333 self.inject_msix_and_clear_pba(index); 334 } 335 } 336 } 337 338 pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { 339 assert!((data.len() == 4 || data.len() == 8)); 340 341 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 342 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 343 344 match data.len() { 345 4 => { 346 let value: u32 = match modulo_offset { 347 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 348 0x4 => (self.pba_entries[index] >> 32) as u32, 349 _ => { 350 error!("invalid offset"); 351 0 352 } 353 }; 354 355 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 356 LittleEndian::write_u32(data, value); 357 } 358 8 => { 359 let value: u64 = match modulo_offset { 360 0x0 => self.pba_entries[index], 361 _ => { 362 error!("invalid offset"); 363 0 364 } 365 }; 366 367 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 368 LittleEndian::write_u64(data, value); 369 } 370 _ => { 371 error!("invalid data length"); 372 } 373 } 374 } 375 376 pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { 377 error!("Pending Bit Array is read only"); 378 } 379 380 pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { 381 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 382 383 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 384 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 385 let mut mask: u64 = (1 << shift) as u64; 386 387 if reset { 388 mask = !mask; 389 self.pba_entries[index] &= mask; 390 } else { 391 self.pba_entries[index] |= mask; 392 } 393 } 394 395 fn get_pba_bit(&self, vector: u16) -> u8 { 396 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 397 398 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 399 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 400 401 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 402 } 403 404 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 405 // Inject the MSI message 406 match self 407 .interrupt_source_group 408 .trigger(vector as InterruptIndex) 409 { 410 Ok(_) => debug!("MSI-X injected on vector control flip"), 411 Err(e) => error!("failed to inject MSI-X: {}", e), 412 } 413 414 // Clear the bit from PBA 415 self.set_pba_bit(vector as u16, true); 416 } 417 } 418 419 impl Pausable for MsixConfig {} 420 421 impl Snapshottable for MsixConfig { 422 fn id(&self) -> String { 423 String::from("msix_config") 424 } 425 426 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 427 Snapshot::new_from_versioned_state(&self.id(), &self.state()) 428 } 429 430 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 431 self.set_state(&snapshot.to_versioned_state(&self.id())?) 432 .map_err(|e| { 433 MigratableError::Restore(anyhow!( 434 "Could not restore state for {}: {:?}", 435 self.id(), 436 e 437 )) 438 }) 439 } 440 } 441 442 #[allow(dead_code)] 443 #[repr(packed)] 444 #[derive(Clone, Copy, Default, Versionize)] 445 pub struct MsixCap { 446 // Message Control Register 447 // 10-0: MSI-X Table size 448 // 13-11: Reserved 449 // 14: Mask. Mask all MSI-X when set. 450 // 15: Enable. Enable all MSI-X when set. 451 pub msg_ctl: u16, 452 // Table. Contains the offset and the BAR indicator (BIR) 453 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 454 // 31-3: Table offset in the BAR pointed by the BIR. 455 pub table: u32, 456 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 457 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 458 // 31-3: PBA offset in the BAR pointed by the BIR. 459 pub pba: u32, 460 } 461 462 // SAFETY: All members are simple numbers and any value is valid. 463 unsafe impl ByteValued for MsixCap {} 464 465 impl PciCapability for MsixCap { 466 fn bytes(&self) -> &[u8] { 467 self.as_slice() 468 } 469 470 fn id(&self) -> PciCapabilityId { 471 PciCapabilityId::MsiX 472 } 473 } 474 475 impl MsixCap { 476 pub fn new( 477 table_pci_bar: u8, 478 table_size: u16, 479 table_off: u32, 480 pba_pci_bar: u8, 481 pba_off: u32, 482 ) -> Self { 483 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 484 485 // Set the table size and enable MSI-X. 486 let msg_ctl: u16 = 0x8000u16 + table_size - 1; 487 488 MsixCap { 489 msg_ctl, 490 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 491 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 492 } 493 } 494 495 pub fn set_msg_ctl(&mut self, data: u16) { 496 self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)) 497 | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)); 498 } 499 500 pub fn masked(&self) -> bool { 501 (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1 502 } 503 504 pub fn enabled(&self) -> bool { 505 (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1 506 } 507 508 pub fn table_offset(&self) -> u32 { 509 self.table & 0xffff_fff8 510 } 511 512 pub fn pba_offset(&self) -> u32 { 513 self.pba & 0xffff_fff8 514 } 515 516 pub fn table_bir(&self) -> u32 { 517 self.table & 0x7 518 } 519 520 pub fn pba_bir(&self) -> u32 { 521 self.pba & 0x7 522 } 523 524 pub fn table_size(&self) -> u16 { 525 (self.msg_ctl & 0x7ff) + 1 526 } 527 528 pub fn table_range(&self) -> (u64, u64) { 529 // The table takes 16 bytes per entry. 530 let size = self.table_size() as u64 * 16; 531 (self.table_offset() as u64, size) 532 } 533 534 pub fn pba_range(&self) -> (u64, u64) { 535 // The table takes 1 bit per entry modulo 8 bytes. 536 let size = ((self.table_size() as u64 / 64) + 1) * 8; 537 (self.pba_offset() as u64, size) 538 } 539 } 540