1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 6 use std::sync::Arc; 7 use std::{io, result}; 8 9 use byteorder::{ByteOrder, LittleEndian}; 10 use serde::{Deserialize, Serialize}; 11 use thiserror::Error; 12 use vm_device::interrupt::{ 13 InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, 14 }; 15 use vm_memory::ByteValued; 16 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable}; 17 18 use crate::{PciCapability, PciCapabilityId}; 19 20 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 21 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 22 const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 23 const BITS_PER_PBA_ENTRY: usize = 64; 24 const FUNCTION_MASK_BIT: u8 = 14; 25 const MSIX_ENABLE_BIT: u8 = 15; 26 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16; 27 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16; 28 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16; 29 pub const MSIX_CONFIG_ID: &str = "msix_config"; 30 31 #[derive(Error, Debug)] 32 pub enum Error { 33 /// Failed enabling the interrupt route. 34 #[error("Failed enabling the interrupt route: {0}")] 35 EnableInterruptRoute(#[source] io::Error), 36 /// Failed updating the interrupt route. 37 #[error("Failed updating the interrupt route: {0}")] 38 UpdateInterruptRoute(#[source] io::Error), 39 } 40 41 #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] 42 pub struct MsixTableEntry { 43 pub msg_addr_lo: u32, 44 pub msg_addr_hi: u32, 45 pub msg_data: u32, 46 pub vector_ctl: u32, 47 } 48 49 impl MsixTableEntry { 50 pub fn masked(&self) -> bool { 51 self.vector_ctl & 0x1 == 0x1 52 } 53 } 54 55 impl Default for MsixTableEntry { 56 fn default() -> Self { 57 MsixTableEntry { 58 msg_addr_lo: 0, 59 msg_addr_hi: 0, 60 msg_data: 0, 61 vector_ctl: 0x1, 62 } 63 } 64 } 65 66 #[derive(Serialize, Deserialize)] 67 pub struct MsixConfigState { 68 table_entries: Vec<MsixTableEntry>, 69 pba_entries: Vec<u64>, 70 masked: bool, 71 enabled: bool, 72 } 73 74 pub struct MsixConfig { 75 pub table_entries: Vec<MsixTableEntry>, 76 pub pba_entries: Vec<u64>, 77 pub devid: u32, 78 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 79 masked: bool, 80 enabled: bool, 81 } 82 83 impl MsixConfig { 84 pub fn new( 85 msix_vectors: u16, 86 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 87 devid: u32, 88 state: Option<MsixConfigState>, 89 ) -> result::Result<Self, Error> { 90 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 91 92 let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state { 93 if state.enabled && !state.masked { 94 for (idx, table_entry) in state.table_entries.iter().enumerate() { 95 if table_entry.masked() { 96 continue; 97 } 98 99 let config = MsiIrqSourceConfig { 100 high_addr: table_entry.msg_addr_hi, 101 low_addr: table_entry.msg_addr_lo, 102 data: table_entry.msg_data, 103 devid, 104 }; 105 106 interrupt_source_group 107 .update( 108 idx as InterruptIndex, 109 InterruptSourceConfig::MsiIrq(config), 110 state.masked, 111 true, 112 ) 113 .map_err(Error::UpdateInterruptRoute)?; 114 115 interrupt_source_group 116 .enable() 117 .map_err(Error::EnableInterruptRoute)?; 118 } 119 } 120 121 ( 122 state.table_entries, 123 state.pba_entries, 124 state.masked, 125 state.enabled, 126 ) 127 } else { 128 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 129 table_entries.resize_with(msix_vectors as usize, Default::default); 130 let mut pba_entries: Vec<u64> = Vec::new(); 131 let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1; 132 pba_entries.resize_with(num_pba_entries, Default::default); 133 134 (table_entries, pba_entries, true, false) 135 }; 136 137 Ok(MsixConfig { 138 table_entries, 139 pba_entries, 140 devid, 141 interrupt_source_group, 142 masked, 143 enabled, 144 }) 145 } 146 147 fn state(&self) -> MsixConfigState { 148 MsixConfigState { 149 table_entries: self.table_entries.clone(), 150 pba_entries: self.pba_entries.clone(), 151 masked: self.masked, 152 enabled: self.enabled, 153 } 154 } 155 156 pub fn masked(&self) -> bool { 157 self.masked 158 } 159 160 pub fn enabled(&self) -> bool { 161 self.enabled 162 } 163 164 pub fn set_msg_ctl(&mut self, reg: u16) { 165 let old_masked = self.masked; 166 let old_enabled = self.enabled; 167 168 self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16; 169 self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16; 170 171 // Update interrupt routing 172 if old_masked != self.masked || old_enabled != self.enabled { 173 if self.enabled && !self.masked { 174 debug!("MSI-X enabled for device 0x{:x}", self.devid); 175 for (idx, table_entry) in self.table_entries.iter().enumerate() { 176 let config = MsiIrqSourceConfig { 177 high_addr: table_entry.msg_addr_hi, 178 low_addr: table_entry.msg_addr_lo, 179 data: table_entry.msg_data, 180 devid: self.devid, 181 }; 182 183 if let Err(e) = self.interrupt_source_group.update( 184 idx as InterruptIndex, 185 InterruptSourceConfig::MsiIrq(config), 186 table_entry.masked(), 187 true, 188 ) { 189 error!("Failed updating vector: {:?}", e); 190 } 191 } 192 } else if old_enabled || !old_masked { 193 debug!("MSI-X disabled for device 0x{:x}", self.devid); 194 if let Err(e) = self.interrupt_source_group.disable() { 195 error!("Failed disabling irq_fd: {:?}", e); 196 } 197 } 198 } 199 200 // If the Function Mask bit was set, and has just been cleared, it's 201 // important to go through the entire PBA to check if there was any 202 // pending MSI-X message to inject, given that the vector is not 203 // masked. 204 if old_masked && !self.masked { 205 for (index, entry) in self.table_entries.clone().iter().enumerate() { 206 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 207 self.inject_msix_and_clear_pba(index); 208 } 209 } 210 } 211 } 212 213 pub fn read_table(&self, offset: u64, data: &mut [u8]) { 214 assert!((data.len() == 4 || data.len() == 8)); 215 216 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 217 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 218 219 if index >= self.table_entries.len() { 220 debug!("Invalid MSI-X table entry index {index}"); 221 data.copy_from_slice(&[0xff; 8][..data.len()]); 222 return; 223 } 224 225 match data.len() { 226 4 => { 227 let value = match modulo_offset { 228 0x0 => self.table_entries[index].msg_addr_lo, 229 0x4 => self.table_entries[index].msg_addr_hi, 230 0x8 => self.table_entries[index].msg_data, 231 0xc => self.table_entries[index].vector_ctl, 232 _ => { 233 error!("invalid offset"); 234 0 235 } 236 }; 237 238 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 239 LittleEndian::write_u32(data, value); 240 } 241 8 => { 242 let value = match modulo_offset { 243 0x0 => { 244 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 245 | u64::from(self.table_entries[index].msg_addr_lo) 246 } 247 0x8 => { 248 (u64::from(self.table_entries[index].vector_ctl) << 32) 249 | u64::from(self.table_entries[index].msg_data) 250 } 251 _ => { 252 error!("invalid offset"); 253 0 254 } 255 }; 256 257 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 258 LittleEndian::write_u64(data, value); 259 } 260 _ => { 261 error!("invalid data length"); 262 } 263 } 264 } 265 266 pub fn write_table(&mut self, offset: u64, data: &[u8]) { 267 assert!((data.len() == 4 || data.len() == 8)); 268 269 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 270 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 271 272 if index >= self.table_entries.len() { 273 debug!("Invalid MSI-X table entry index {index}"); 274 return; 275 } 276 277 // Store the value of the entry before modification 278 let old_entry = self.table_entries[index].clone(); 279 280 match data.len() { 281 4 => { 282 let value = LittleEndian::read_u32(data); 283 match modulo_offset { 284 0x0 => self.table_entries[index].msg_addr_lo = value, 285 0x4 => self.table_entries[index].msg_addr_hi = value, 286 0x8 => self.table_entries[index].msg_data = value, 287 0xc => { 288 self.table_entries[index].vector_ctl = value; 289 } 290 _ => error!("invalid offset"), 291 }; 292 293 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 294 } 295 8 => { 296 let value = LittleEndian::read_u64(data); 297 match modulo_offset { 298 0x0 => { 299 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 300 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 301 } 302 0x8 => { 303 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 304 self.table_entries[index].vector_ctl = (value >> 32) as u32; 305 } 306 _ => error!("invalid offset"), 307 }; 308 309 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 310 } 311 _ => error!("invalid data length"), 312 }; 313 314 let table_entry = &self.table_entries[index]; 315 316 // Optimisation to avoid excessive updates 317 if &old_entry == table_entry { 318 return; 319 } 320 321 // Update interrupt routes 322 // Optimisation: only update routes if the entry is not masked; 323 // this is safe because if the entry is masked (starts masked as per spec) 324 // in the table then it won't be triggered. (See: #4273) 325 if self.enabled && !self.masked && !table_entry.masked() { 326 let config = MsiIrqSourceConfig { 327 high_addr: table_entry.msg_addr_hi, 328 low_addr: table_entry.msg_addr_lo, 329 data: table_entry.msg_data, 330 devid: self.devid, 331 }; 332 333 if let Err(e) = self.interrupt_source_group.update( 334 index as InterruptIndex, 335 InterruptSourceConfig::MsiIrq(config), 336 table_entry.masked(), 337 true, 338 ) { 339 error!("Failed updating vector: {:?}", e); 340 } 341 } 342 343 // After the MSI-X table entry has been updated, it is necessary to 344 // check if the vector control masking bit has changed. In case the 345 // bit has been flipped from 1 to 0, we need to inject a MSI message 346 // if the corresponding pending bit from the PBA is set. Once the MSI 347 // has been injected, the pending bit in the PBA needs to be cleared. 348 // All of this is valid only if MSI-X has not been masked for the whole 349 // device. 350 351 // Check if bit has been flipped 352 if !self.masked() 353 && self.enabled() 354 && old_entry.masked() 355 && !table_entry.masked() 356 && self.get_pba_bit(index as u16) == 1 357 { 358 self.inject_msix_and_clear_pba(index); 359 } 360 } 361 362 pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { 363 assert!((data.len() == 4 || data.len() == 8)); 364 365 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 366 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 367 368 if index >= self.pba_entries.len() { 369 debug!("Invalid MSI-X PBA entry index {index}"); 370 data.copy_from_slice(&[0xff; 8][..data.len()]); 371 return; 372 } 373 374 match data.len() { 375 4 => { 376 let value: u32 = match modulo_offset { 377 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 378 0x4 => (self.pba_entries[index] >> 32) as u32, 379 _ => { 380 error!("invalid offset"); 381 0 382 } 383 }; 384 385 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 386 LittleEndian::write_u32(data, value); 387 } 388 8 => { 389 let value: u64 = match modulo_offset { 390 0x0 => self.pba_entries[index], 391 _ => { 392 error!("invalid offset"); 393 0 394 } 395 }; 396 397 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 398 LittleEndian::write_u64(data, value); 399 } 400 _ => { 401 error!("invalid data length"); 402 } 403 } 404 } 405 406 pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { 407 error!("Pending Bit Array is read only"); 408 } 409 410 pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { 411 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 412 413 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 414 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 415 let mut mask: u64 = (1 << shift) as u64; 416 417 if reset { 418 mask = !mask; 419 self.pba_entries[index] &= mask; 420 } else { 421 self.pba_entries[index] |= mask; 422 } 423 } 424 425 fn get_pba_bit(&self, vector: u16) -> u8 { 426 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 427 428 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 429 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 430 431 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 432 } 433 434 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 435 // Inject the MSI message 436 match self 437 .interrupt_source_group 438 .trigger(vector as InterruptIndex) 439 { 440 Ok(_) => debug!("MSI-X injected on vector control flip"), 441 Err(e) => error!("failed to inject MSI-X: {}", e), 442 } 443 444 // Clear the bit from PBA 445 self.set_pba_bit(vector as u16, true); 446 } 447 } 448 449 impl Pausable for MsixConfig {} 450 451 impl Snapshottable for MsixConfig { 452 fn id(&self) -> String { 453 String::from(MSIX_CONFIG_ID) 454 } 455 456 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 457 Snapshot::new_from_state(&self.state()) 458 } 459 } 460 461 #[allow(dead_code)] 462 #[repr(C, packed)] 463 #[derive(Clone, Copy, Default, Serialize, Deserialize)] 464 pub struct MsixCap { 465 // Message Control Register 466 // 10-0: MSI-X Table size 467 // 13-11: Reserved 468 // 14: Mask. Mask all MSI-X when set. 469 // 15: Enable. Enable all MSI-X when set. 470 pub msg_ctl: u16, 471 // Table. Contains the offset and the BAR indicator (BIR) 472 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 473 // 31-3: Table offset in the BAR pointed by the BIR. 474 pub table: u32, 475 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 476 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 477 // 31-3: PBA offset in the BAR pointed by the BIR. 478 pub pba: u32, 479 } 480 481 // SAFETY: All members are simple numbers and any value is valid. 482 unsafe impl ByteValued for MsixCap {} 483 484 impl PciCapability for MsixCap { 485 fn bytes(&self) -> &[u8] { 486 self.as_slice() 487 } 488 489 fn id(&self) -> PciCapabilityId { 490 PciCapabilityId::MsiX 491 } 492 } 493 494 impl MsixCap { 495 pub fn new( 496 table_pci_bar: u8, 497 table_size: u16, 498 table_off: u32, 499 pba_pci_bar: u8, 500 pba_off: u32, 501 ) -> Self { 502 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 503 504 // Set the table size and enable MSI-X. 505 let msg_ctl: u16 = 0x8000u16 + table_size - 1; 506 507 MsixCap { 508 msg_ctl, 509 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 510 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 511 } 512 } 513 514 pub fn set_msg_ctl(&mut self, data: u16) { 515 self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)) 516 | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)); 517 } 518 519 pub fn masked(&self) -> bool { 520 (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1 521 } 522 523 pub fn enabled(&self) -> bool { 524 (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1 525 } 526 527 pub fn table_offset(&self) -> u32 { 528 self.table & 0xffff_fff8 529 } 530 531 pub fn pba_offset(&self) -> u32 { 532 self.pba & 0xffff_fff8 533 } 534 535 pub fn table_set_offset(&mut self, addr: u32) { 536 self.table &= 0x7; 537 self.table += addr; 538 } 539 540 pub fn pba_set_offset(&mut self, addr: u32) { 541 self.pba &= 0x7; 542 self.pba += addr; 543 } 544 545 pub fn table_bir(&self) -> u32 { 546 self.table & 0x7 547 } 548 549 pub fn pba_bir(&self) -> u32 { 550 self.pba & 0x7 551 } 552 553 pub fn table_size(&self) -> u16 { 554 (self.msg_ctl & 0x7ff) + 1 555 } 556 557 pub fn table_range(&self) -> (u64, u64) { 558 // The table takes 16 bytes per entry. 559 let size = self.table_size() as u64 * 16; 560 (self.table_offset() as u64, size) 561 } 562 563 pub fn pba_range(&self) -> (u64, u64) { 564 // The table takes 1 bit per entry modulo 8 bytes. 565 let size = ((self.table_size() as u64 / 64) + 1) * 8; 566 (self.pba_offset() as u64, size) 567 } 568 } 569