1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 6 use std::io; 7 use std::result; 8 use std::sync::Arc; 9 10 use byteorder::{ByteOrder, LittleEndian}; 11 use serde::Deserialize; 12 use serde::Serialize; 13 use vm_device::interrupt::{ 14 InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, 15 }; 16 use vm_memory::ByteValued; 17 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable}; 18 19 use crate::{PciCapability, PciCapabilityId}; 20 21 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 22 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 23 const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 24 const BITS_PER_PBA_ENTRY: usize = 64; 25 const FUNCTION_MASK_BIT: u8 = 14; 26 const MSIX_ENABLE_BIT: u8 = 15; 27 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16; 28 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16; 29 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16; 30 pub const MSIX_CONFIG_ID: &str = "msix_config"; 31 32 #[derive(Debug)] 33 pub enum Error { 34 /// Failed enabling the interrupt route. 35 EnableInterruptRoute(io::Error), 36 /// Failed updating the interrupt route. 37 UpdateInterruptRoute(io::Error), 38 } 39 40 #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] 41 pub struct MsixTableEntry { 42 pub msg_addr_lo: u32, 43 pub msg_addr_hi: u32, 44 pub msg_data: u32, 45 pub vector_ctl: u32, 46 } 47 48 impl MsixTableEntry { 49 pub fn masked(&self) -> bool { 50 self.vector_ctl & 0x1 == 0x1 51 } 52 } 53 54 impl Default for MsixTableEntry { 55 fn default() -> Self { 56 MsixTableEntry { 57 msg_addr_lo: 0, 58 msg_addr_hi: 0, 59 msg_data: 0, 60 vector_ctl: 0x1, 61 } 62 } 63 } 64 65 #[derive(Serialize, Deserialize)] 66 pub struct MsixConfigState { 67 table_entries: Vec<MsixTableEntry>, 68 pba_entries: Vec<u64>, 69 masked: bool, 70 enabled: bool, 71 } 72 73 pub struct MsixConfig { 74 pub table_entries: Vec<MsixTableEntry>, 75 pub pba_entries: Vec<u64>, 76 pub devid: u32, 77 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 78 masked: bool, 79 enabled: bool, 80 } 81 82 impl MsixConfig { 83 pub fn new( 84 msix_vectors: u16, 85 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 86 devid: u32, 87 state: Option<MsixConfigState>, 88 ) -> result::Result<Self, Error> { 89 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 90 91 let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state { 92 if state.enabled && !state.masked { 93 for (idx, table_entry) in state.table_entries.iter().enumerate() { 94 if table_entry.masked() { 95 continue; 96 } 97 98 let config = MsiIrqSourceConfig { 99 high_addr: table_entry.msg_addr_hi, 100 low_addr: table_entry.msg_addr_lo, 101 data: table_entry.msg_data, 102 devid, 103 }; 104 105 interrupt_source_group 106 .update( 107 idx as InterruptIndex, 108 InterruptSourceConfig::MsiIrq(config), 109 state.masked, 110 true, 111 ) 112 .map_err(Error::UpdateInterruptRoute)?; 113 114 interrupt_source_group 115 .enable() 116 .map_err(Error::EnableInterruptRoute)?; 117 } 118 } 119 120 ( 121 state.table_entries, 122 state.pba_entries, 123 state.masked, 124 state.enabled, 125 ) 126 } else { 127 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 128 table_entries.resize_with(msix_vectors as usize, Default::default); 129 let mut pba_entries: Vec<u64> = Vec::new(); 130 let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1; 131 pba_entries.resize_with(num_pba_entries, Default::default); 132 133 (table_entries, pba_entries, true, false) 134 }; 135 136 Ok(MsixConfig { 137 table_entries, 138 pba_entries, 139 devid, 140 interrupt_source_group, 141 masked, 142 enabled, 143 }) 144 } 145 146 fn state(&self) -> MsixConfigState { 147 MsixConfigState { 148 table_entries: self.table_entries.clone(), 149 pba_entries: self.pba_entries.clone(), 150 masked: self.masked, 151 enabled: self.enabled, 152 } 153 } 154 155 pub fn masked(&self) -> bool { 156 self.masked 157 } 158 159 pub fn enabled(&self) -> bool { 160 self.enabled 161 } 162 163 pub fn set_msg_ctl(&mut self, reg: u16) { 164 let old_masked = self.masked; 165 let old_enabled = self.enabled; 166 167 self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16; 168 self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16; 169 170 // Update interrupt routing 171 if old_masked != self.masked || old_enabled != self.enabled { 172 if self.enabled && !self.masked { 173 debug!("MSI-X enabled for device 0x{:x}", self.devid); 174 for (idx, table_entry) in self.table_entries.iter().enumerate() { 175 let config = MsiIrqSourceConfig { 176 high_addr: table_entry.msg_addr_hi, 177 low_addr: table_entry.msg_addr_lo, 178 data: table_entry.msg_data, 179 devid: self.devid, 180 }; 181 182 if let Err(e) = self.interrupt_source_group.update( 183 idx as InterruptIndex, 184 InterruptSourceConfig::MsiIrq(config), 185 table_entry.masked(), 186 true, 187 ) { 188 error!("Failed updating vector: {:?}", e); 189 } 190 } 191 } else if old_enabled || !old_masked { 192 debug!("MSI-X disabled for device 0x{:x}", self.devid); 193 if let Err(e) = self.interrupt_source_group.disable() { 194 error!("Failed disabling irq_fd: {:?}", e); 195 } 196 } 197 } 198 199 // If the Function Mask bit was set, and has just been cleared, it's 200 // important to go through the entire PBA to check if there was any 201 // pending MSI-X message to inject, given that the vector is not 202 // masked. 203 if old_masked && !self.masked { 204 for (index, entry) in self.table_entries.clone().iter().enumerate() { 205 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 206 self.inject_msix_and_clear_pba(index); 207 } 208 } 209 } 210 } 211 212 pub fn read_table(&self, offset: u64, data: &mut [u8]) { 213 assert!((data.len() == 4 || data.len() == 8)); 214 215 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 216 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 217 218 if index >= self.table_entries.len() { 219 debug!("Invalid MSI-X table entry index {index}"); 220 data.copy_from_slice(&[0xff; 8][..data.len()]); 221 return; 222 } 223 224 match data.len() { 225 4 => { 226 let value = match modulo_offset { 227 0x0 => self.table_entries[index].msg_addr_lo, 228 0x4 => self.table_entries[index].msg_addr_hi, 229 0x8 => self.table_entries[index].msg_data, 230 0xc => self.table_entries[index].vector_ctl, 231 _ => { 232 error!("invalid offset"); 233 0 234 } 235 }; 236 237 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 238 LittleEndian::write_u32(data, value); 239 } 240 8 => { 241 let value = match modulo_offset { 242 0x0 => { 243 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 244 | u64::from(self.table_entries[index].msg_addr_lo) 245 } 246 0x8 => { 247 (u64::from(self.table_entries[index].vector_ctl) << 32) 248 | u64::from(self.table_entries[index].msg_data) 249 } 250 _ => { 251 error!("invalid offset"); 252 0 253 } 254 }; 255 256 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 257 LittleEndian::write_u64(data, value); 258 } 259 _ => { 260 error!("invalid data length"); 261 } 262 } 263 } 264 265 pub fn write_table(&mut self, offset: u64, data: &[u8]) { 266 assert!((data.len() == 4 || data.len() == 8)); 267 268 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 269 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 270 271 if index >= self.table_entries.len() { 272 debug!("Invalid MSI-X table entry index {index}"); 273 return; 274 } 275 276 // Store the value of the entry before modification 277 let old_entry = self.table_entries[index].clone(); 278 279 match data.len() { 280 4 => { 281 let value = LittleEndian::read_u32(data); 282 match modulo_offset { 283 0x0 => self.table_entries[index].msg_addr_lo = value, 284 0x4 => self.table_entries[index].msg_addr_hi = value, 285 0x8 => self.table_entries[index].msg_data = value, 286 0xc => { 287 self.table_entries[index].vector_ctl = value; 288 } 289 _ => error!("invalid offset"), 290 }; 291 292 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 293 } 294 8 => { 295 let value = LittleEndian::read_u64(data); 296 match modulo_offset { 297 0x0 => { 298 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 299 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 300 } 301 0x8 => { 302 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 303 self.table_entries[index].vector_ctl = (value >> 32) as u32; 304 } 305 _ => error!("invalid offset"), 306 }; 307 308 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 309 } 310 _ => error!("invalid data length"), 311 }; 312 313 let table_entry = &self.table_entries[index]; 314 315 // Optimisation to avoid excessive updates 316 if &old_entry == table_entry { 317 return; 318 } 319 320 // Update interrupt routes 321 // Optimisation: only update routes if the entry is not masked; 322 // this is safe because if the entry is masked (starts masked as per spec) 323 // in the table then it won't be triggered. (See: #4273) 324 if self.enabled && !self.masked && !table_entry.masked() { 325 let config = MsiIrqSourceConfig { 326 high_addr: table_entry.msg_addr_hi, 327 low_addr: table_entry.msg_addr_lo, 328 data: table_entry.msg_data, 329 devid: self.devid, 330 }; 331 332 if let Err(e) = self.interrupt_source_group.update( 333 index as InterruptIndex, 334 InterruptSourceConfig::MsiIrq(config), 335 table_entry.masked(), 336 true, 337 ) { 338 error!("Failed updating vector: {:?}", e); 339 } 340 } 341 342 // After the MSI-X table entry has been updated, it is necessary to 343 // check if the vector control masking bit has changed. In case the 344 // bit has been flipped from 1 to 0, we need to inject a MSI message 345 // if the corresponding pending bit from the PBA is set. Once the MSI 346 // has been injected, the pending bit in the PBA needs to be cleared. 347 // All of this is valid only if MSI-X has not been masked for the whole 348 // device. 349 350 // Check if bit has been flipped 351 if !self.masked() 352 && self.enabled() 353 && old_entry.masked() 354 && !table_entry.masked() 355 && self.get_pba_bit(index as u16) == 1 356 { 357 self.inject_msix_and_clear_pba(index); 358 } 359 } 360 361 pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { 362 assert!((data.len() == 4 || data.len() == 8)); 363 364 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 365 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 366 367 if index >= self.pba_entries.len() { 368 debug!("Invalid MSI-X PBA entry index {index}"); 369 data.copy_from_slice(&[0xff; 8][..data.len()]); 370 return; 371 } 372 373 match data.len() { 374 4 => { 375 let value: u32 = match modulo_offset { 376 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 377 0x4 => (self.pba_entries[index] >> 32) as u32, 378 _ => { 379 error!("invalid offset"); 380 0 381 } 382 }; 383 384 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 385 LittleEndian::write_u32(data, value); 386 } 387 8 => { 388 let value: u64 = match modulo_offset { 389 0x0 => self.pba_entries[index], 390 _ => { 391 error!("invalid offset"); 392 0 393 } 394 }; 395 396 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 397 LittleEndian::write_u64(data, value); 398 } 399 _ => { 400 error!("invalid data length"); 401 } 402 } 403 } 404 405 pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { 406 error!("Pending Bit Array is read only"); 407 } 408 409 pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { 410 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 411 412 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 413 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 414 let mut mask: u64 = (1 << shift) as u64; 415 416 if reset { 417 mask = !mask; 418 self.pba_entries[index] &= mask; 419 } else { 420 self.pba_entries[index] |= mask; 421 } 422 } 423 424 fn get_pba_bit(&self, vector: u16) -> u8 { 425 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 426 427 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 428 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 429 430 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 431 } 432 433 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 434 // Inject the MSI message 435 match self 436 .interrupt_source_group 437 .trigger(vector as InterruptIndex) 438 { 439 Ok(_) => debug!("MSI-X injected on vector control flip"), 440 Err(e) => error!("failed to inject MSI-X: {}", e), 441 } 442 443 // Clear the bit from PBA 444 self.set_pba_bit(vector as u16, true); 445 } 446 } 447 448 impl Pausable for MsixConfig {} 449 450 impl Snapshottable for MsixConfig { 451 fn id(&self) -> String { 452 String::from(MSIX_CONFIG_ID) 453 } 454 455 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 456 Snapshot::new_from_state(&self.state()) 457 } 458 } 459 460 #[allow(dead_code)] 461 #[repr(packed)] 462 #[derive(Clone, Copy, Default, Serialize, Deserialize)] 463 pub struct MsixCap { 464 // Message Control Register 465 // 10-0: MSI-X Table size 466 // 13-11: Reserved 467 // 14: Mask. Mask all MSI-X when set. 468 // 15: Enable. Enable all MSI-X when set. 469 pub msg_ctl: u16, 470 // Table. Contains the offset and the BAR indicator (BIR) 471 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 472 // 31-3: Table offset in the BAR pointed by the BIR. 473 pub table: u32, 474 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 475 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 476 // 31-3: PBA offset in the BAR pointed by the BIR. 477 pub pba: u32, 478 } 479 480 // SAFETY: All members are simple numbers and any value is valid. 481 unsafe impl ByteValued for MsixCap {} 482 483 impl PciCapability for MsixCap { 484 fn bytes(&self) -> &[u8] { 485 self.as_slice() 486 } 487 488 fn id(&self) -> PciCapabilityId { 489 PciCapabilityId::MsiX 490 } 491 } 492 493 impl MsixCap { 494 pub fn new( 495 table_pci_bar: u8, 496 table_size: u16, 497 table_off: u32, 498 pba_pci_bar: u8, 499 pba_off: u32, 500 ) -> Self { 501 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 502 503 // Set the table size and enable MSI-X. 504 let msg_ctl: u16 = 0x8000u16 + table_size - 1; 505 506 MsixCap { 507 msg_ctl, 508 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 509 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 510 } 511 } 512 513 pub fn set_msg_ctl(&mut self, data: u16) { 514 self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)) 515 | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)); 516 } 517 518 pub fn masked(&self) -> bool { 519 (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1 520 } 521 522 pub fn enabled(&self) -> bool { 523 (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1 524 } 525 526 pub fn table_offset(&self) -> u32 { 527 self.table & 0xffff_fff8 528 } 529 530 pub fn pba_offset(&self) -> u32 { 531 self.pba & 0xffff_fff8 532 } 533 534 pub fn table_set_offset(&mut self, addr: u32) { 535 self.table &= 0x7; 536 self.table += addr; 537 } 538 539 pub fn pba_set_offset(&mut self, addr: u32) { 540 self.pba &= 0x7; 541 self.pba += addr; 542 } 543 544 pub fn table_bir(&self) -> u32 { 545 self.table & 0x7 546 } 547 548 pub fn pba_bir(&self) -> u32 { 549 self.pba & 0x7 550 } 551 552 pub fn table_size(&self) -> u16 { 553 (self.msg_ctl & 0x7ff) + 1 554 } 555 556 pub fn table_range(&self) -> (u64, u64) { 557 // The table takes 16 bytes per entry. 558 let size = self.table_size() as u64 * 16; 559 (self.table_offset() as u64, size) 560 } 561 562 pub fn pba_range(&self) -> (u64, u64) { 563 // The table takes 1 bit per entry modulo 8 bytes. 564 let size = ((self.table_size() as u64 / 64) + 1) * 8; 565 (self.pba_offset() as u64, size) 566 } 567 } 568