1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 6 use std::sync::Arc; 7 use std::{io, result}; 8 9 use byteorder::{ByteOrder, LittleEndian}; 10 use serde::{Deserialize, Serialize}; 11 use vm_device::interrupt::{ 12 InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, 13 }; 14 use vm_memory::ByteValued; 15 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable}; 16 17 use crate::{PciCapability, PciCapabilityId}; 18 19 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 20 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 21 const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 22 const BITS_PER_PBA_ENTRY: usize = 64; 23 const FUNCTION_MASK_BIT: u8 = 14; 24 const MSIX_ENABLE_BIT: u8 = 15; 25 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16; 26 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16; 27 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16; 28 pub const MSIX_CONFIG_ID: &str = "msix_config"; 29 30 #[derive(Debug)] 31 pub enum Error { 32 /// Failed enabling the interrupt route. 33 EnableInterruptRoute(io::Error), 34 /// Failed updating the interrupt route. 35 UpdateInterruptRoute(io::Error), 36 } 37 38 #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] 39 pub struct MsixTableEntry { 40 pub msg_addr_lo: u32, 41 pub msg_addr_hi: u32, 42 pub msg_data: u32, 43 pub vector_ctl: u32, 44 } 45 46 impl MsixTableEntry { 47 pub fn masked(&self) -> bool { 48 self.vector_ctl & 0x1 == 0x1 49 } 50 } 51 52 impl Default for MsixTableEntry { 53 fn default() -> Self { 54 MsixTableEntry { 55 msg_addr_lo: 0, 56 msg_addr_hi: 0, 57 msg_data: 0, 58 vector_ctl: 0x1, 59 } 60 } 61 } 62 63 #[derive(Serialize, Deserialize)] 64 pub struct MsixConfigState { 65 table_entries: Vec<MsixTableEntry>, 66 pba_entries: Vec<u64>, 67 masked: bool, 68 enabled: bool, 69 } 70 71 pub struct MsixConfig { 72 pub table_entries: Vec<MsixTableEntry>, 73 pub pba_entries: Vec<u64>, 74 pub devid: u32, 75 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 76 masked: bool, 77 enabled: bool, 78 } 79 80 impl MsixConfig { 81 pub fn new( 82 msix_vectors: u16, 83 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 84 devid: u32, 85 state: Option<MsixConfigState>, 86 ) -> result::Result<Self, Error> { 87 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 88 89 let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state { 90 if state.enabled && !state.masked { 91 for (idx, table_entry) in state.table_entries.iter().enumerate() { 92 if table_entry.masked() { 93 continue; 94 } 95 96 let config = MsiIrqSourceConfig { 97 high_addr: table_entry.msg_addr_hi, 98 low_addr: table_entry.msg_addr_lo, 99 data: table_entry.msg_data, 100 devid, 101 }; 102 103 interrupt_source_group 104 .update( 105 idx as InterruptIndex, 106 InterruptSourceConfig::MsiIrq(config), 107 state.masked, 108 true, 109 ) 110 .map_err(Error::UpdateInterruptRoute)?; 111 112 interrupt_source_group 113 .enable() 114 .map_err(Error::EnableInterruptRoute)?; 115 } 116 } 117 118 ( 119 state.table_entries, 120 state.pba_entries, 121 state.masked, 122 state.enabled, 123 ) 124 } else { 125 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 126 table_entries.resize_with(msix_vectors as usize, Default::default); 127 let mut pba_entries: Vec<u64> = Vec::new(); 128 let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1; 129 pba_entries.resize_with(num_pba_entries, Default::default); 130 131 (table_entries, pba_entries, true, false) 132 }; 133 134 Ok(MsixConfig { 135 table_entries, 136 pba_entries, 137 devid, 138 interrupt_source_group, 139 masked, 140 enabled, 141 }) 142 } 143 144 fn state(&self) -> MsixConfigState { 145 MsixConfigState { 146 table_entries: self.table_entries.clone(), 147 pba_entries: self.pba_entries.clone(), 148 masked: self.masked, 149 enabled: self.enabled, 150 } 151 } 152 153 pub fn masked(&self) -> bool { 154 self.masked 155 } 156 157 pub fn enabled(&self) -> bool { 158 self.enabled 159 } 160 161 pub fn set_msg_ctl(&mut self, reg: u16) { 162 let old_masked = self.masked; 163 let old_enabled = self.enabled; 164 165 self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16; 166 self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16; 167 168 // Update interrupt routing 169 if old_masked != self.masked || old_enabled != self.enabled { 170 if self.enabled && !self.masked { 171 debug!("MSI-X enabled for device 0x{:x}", self.devid); 172 for (idx, table_entry) in self.table_entries.iter().enumerate() { 173 let config = MsiIrqSourceConfig { 174 high_addr: table_entry.msg_addr_hi, 175 low_addr: table_entry.msg_addr_lo, 176 data: table_entry.msg_data, 177 devid: self.devid, 178 }; 179 180 if let Err(e) = self.interrupt_source_group.update( 181 idx as InterruptIndex, 182 InterruptSourceConfig::MsiIrq(config), 183 table_entry.masked(), 184 true, 185 ) { 186 error!("Failed updating vector: {:?}", e); 187 } 188 } 189 } else if old_enabled || !old_masked { 190 debug!("MSI-X disabled for device 0x{:x}", self.devid); 191 if let Err(e) = self.interrupt_source_group.disable() { 192 error!("Failed disabling irq_fd: {:?}", e); 193 } 194 } 195 } 196 197 // If the Function Mask bit was set, and has just been cleared, it's 198 // important to go through the entire PBA to check if there was any 199 // pending MSI-X message to inject, given that the vector is not 200 // masked. 201 if old_masked && !self.masked { 202 for (index, entry) in self.table_entries.clone().iter().enumerate() { 203 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 204 self.inject_msix_and_clear_pba(index); 205 } 206 } 207 } 208 } 209 210 pub fn read_table(&self, offset: u64, data: &mut [u8]) { 211 assert!((data.len() == 4 || data.len() == 8)); 212 213 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 214 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 215 216 if index >= self.table_entries.len() { 217 debug!("Invalid MSI-X table entry index {index}"); 218 data.copy_from_slice(&[0xff; 8][..data.len()]); 219 return; 220 } 221 222 match data.len() { 223 4 => { 224 let value = match modulo_offset { 225 0x0 => self.table_entries[index].msg_addr_lo, 226 0x4 => self.table_entries[index].msg_addr_hi, 227 0x8 => self.table_entries[index].msg_data, 228 0xc => self.table_entries[index].vector_ctl, 229 _ => { 230 error!("invalid offset"); 231 0 232 } 233 }; 234 235 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 236 LittleEndian::write_u32(data, value); 237 } 238 8 => { 239 let value = match modulo_offset { 240 0x0 => { 241 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 242 | u64::from(self.table_entries[index].msg_addr_lo) 243 } 244 0x8 => { 245 (u64::from(self.table_entries[index].vector_ctl) << 32) 246 | u64::from(self.table_entries[index].msg_data) 247 } 248 _ => { 249 error!("invalid offset"); 250 0 251 } 252 }; 253 254 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 255 LittleEndian::write_u64(data, value); 256 } 257 _ => { 258 error!("invalid data length"); 259 } 260 } 261 } 262 263 pub fn write_table(&mut self, offset: u64, data: &[u8]) { 264 assert!((data.len() == 4 || data.len() == 8)); 265 266 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 267 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 268 269 if index >= self.table_entries.len() { 270 debug!("Invalid MSI-X table entry index {index}"); 271 return; 272 } 273 274 // Store the value of the entry before modification 275 let old_entry = self.table_entries[index].clone(); 276 277 match data.len() { 278 4 => { 279 let value = LittleEndian::read_u32(data); 280 match modulo_offset { 281 0x0 => self.table_entries[index].msg_addr_lo = value, 282 0x4 => self.table_entries[index].msg_addr_hi = value, 283 0x8 => self.table_entries[index].msg_data = value, 284 0xc => { 285 self.table_entries[index].vector_ctl = value; 286 } 287 _ => error!("invalid offset"), 288 }; 289 290 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 291 } 292 8 => { 293 let value = LittleEndian::read_u64(data); 294 match modulo_offset { 295 0x0 => { 296 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 297 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 298 } 299 0x8 => { 300 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 301 self.table_entries[index].vector_ctl = (value >> 32) as u32; 302 } 303 _ => error!("invalid offset"), 304 }; 305 306 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 307 } 308 _ => error!("invalid data length"), 309 }; 310 311 let table_entry = &self.table_entries[index]; 312 313 // Optimisation to avoid excessive updates 314 if &old_entry == table_entry { 315 return; 316 } 317 318 // Update interrupt routes 319 // Optimisation: only update routes if the entry is not masked; 320 // this is safe because if the entry is masked (starts masked as per spec) 321 // in the table then it won't be triggered. (See: #4273) 322 if self.enabled && !self.masked && !table_entry.masked() { 323 let config = MsiIrqSourceConfig { 324 high_addr: table_entry.msg_addr_hi, 325 low_addr: table_entry.msg_addr_lo, 326 data: table_entry.msg_data, 327 devid: self.devid, 328 }; 329 330 if let Err(e) = self.interrupt_source_group.update( 331 index as InterruptIndex, 332 InterruptSourceConfig::MsiIrq(config), 333 table_entry.masked(), 334 true, 335 ) { 336 error!("Failed updating vector: {:?}", e); 337 } 338 } 339 340 // After the MSI-X table entry has been updated, it is necessary to 341 // check if the vector control masking bit has changed. In case the 342 // bit has been flipped from 1 to 0, we need to inject a MSI message 343 // if the corresponding pending bit from the PBA is set. Once the MSI 344 // has been injected, the pending bit in the PBA needs to be cleared. 345 // All of this is valid only if MSI-X has not been masked for the whole 346 // device. 347 348 // Check if bit has been flipped 349 if !self.masked() 350 && self.enabled() 351 && old_entry.masked() 352 && !table_entry.masked() 353 && self.get_pba_bit(index as u16) == 1 354 { 355 self.inject_msix_and_clear_pba(index); 356 } 357 } 358 359 pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { 360 assert!((data.len() == 4 || data.len() == 8)); 361 362 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 363 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 364 365 if index >= self.pba_entries.len() { 366 debug!("Invalid MSI-X PBA entry index {index}"); 367 data.copy_from_slice(&[0xff; 8][..data.len()]); 368 return; 369 } 370 371 match data.len() { 372 4 => { 373 let value: u32 = match modulo_offset { 374 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 375 0x4 => (self.pba_entries[index] >> 32) as u32, 376 _ => { 377 error!("invalid offset"); 378 0 379 } 380 }; 381 382 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 383 LittleEndian::write_u32(data, value); 384 } 385 8 => { 386 let value: u64 = match modulo_offset { 387 0x0 => self.pba_entries[index], 388 _ => { 389 error!("invalid offset"); 390 0 391 } 392 }; 393 394 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 395 LittleEndian::write_u64(data, value); 396 } 397 _ => { 398 error!("invalid data length"); 399 } 400 } 401 } 402 403 pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { 404 error!("Pending Bit Array is read only"); 405 } 406 407 pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { 408 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 409 410 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 411 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 412 let mut mask: u64 = (1 << shift) as u64; 413 414 if reset { 415 mask = !mask; 416 self.pba_entries[index] &= mask; 417 } else { 418 self.pba_entries[index] |= mask; 419 } 420 } 421 422 fn get_pba_bit(&self, vector: u16) -> u8 { 423 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 424 425 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 426 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 427 428 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 429 } 430 431 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 432 // Inject the MSI message 433 match self 434 .interrupt_source_group 435 .trigger(vector as InterruptIndex) 436 { 437 Ok(_) => debug!("MSI-X injected on vector control flip"), 438 Err(e) => error!("failed to inject MSI-X: {}", e), 439 } 440 441 // Clear the bit from PBA 442 self.set_pba_bit(vector as u16, true); 443 } 444 } 445 446 impl Pausable for MsixConfig {} 447 448 impl Snapshottable for MsixConfig { 449 fn id(&self) -> String { 450 String::from(MSIX_CONFIG_ID) 451 } 452 453 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 454 Snapshot::new_from_state(&self.state()) 455 } 456 } 457 458 #[allow(dead_code)] 459 #[repr(C, packed)] 460 #[derive(Clone, Copy, Default, Serialize, Deserialize)] 461 pub struct MsixCap { 462 // Message Control Register 463 // 10-0: MSI-X Table size 464 // 13-11: Reserved 465 // 14: Mask. Mask all MSI-X when set. 466 // 15: Enable. Enable all MSI-X when set. 467 pub msg_ctl: u16, 468 // Table. Contains the offset and the BAR indicator (BIR) 469 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 470 // 31-3: Table offset in the BAR pointed by the BIR. 471 pub table: u32, 472 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 473 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 474 // 31-3: PBA offset in the BAR pointed by the BIR. 475 pub pba: u32, 476 } 477 478 // SAFETY: All members are simple numbers and any value is valid. 479 unsafe impl ByteValued for MsixCap {} 480 481 impl PciCapability for MsixCap { 482 fn bytes(&self) -> &[u8] { 483 self.as_slice() 484 } 485 486 fn id(&self) -> PciCapabilityId { 487 PciCapabilityId::MsiX 488 } 489 } 490 491 impl MsixCap { 492 pub fn new( 493 table_pci_bar: u8, 494 table_size: u16, 495 table_off: u32, 496 pba_pci_bar: u8, 497 pba_off: u32, 498 ) -> Self { 499 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 500 501 // Set the table size and enable MSI-X. 502 let msg_ctl: u16 = 0x8000u16 + table_size - 1; 503 504 MsixCap { 505 msg_ctl, 506 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 507 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 508 } 509 } 510 511 pub fn set_msg_ctl(&mut self, data: u16) { 512 self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)) 513 | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)); 514 } 515 516 pub fn masked(&self) -> bool { 517 (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1 518 } 519 520 pub fn enabled(&self) -> bool { 521 (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1 522 } 523 524 pub fn table_offset(&self) -> u32 { 525 self.table & 0xffff_fff8 526 } 527 528 pub fn pba_offset(&self) -> u32 { 529 self.pba & 0xffff_fff8 530 } 531 532 pub fn table_set_offset(&mut self, addr: u32) { 533 self.table &= 0x7; 534 self.table += addr; 535 } 536 537 pub fn pba_set_offset(&mut self, addr: u32) { 538 self.pba &= 0x7; 539 self.pba += addr; 540 } 541 542 pub fn table_bir(&self) -> u32 { 543 self.table & 0x7 544 } 545 546 pub fn pba_bir(&self) -> u32 { 547 self.pba & 0x7 548 } 549 550 pub fn table_size(&self) -> u16 { 551 (self.msg_ctl & 0x7ff) + 1 552 } 553 554 pub fn table_range(&self) -> (u64, u64) { 555 // The table takes 16 bytes per entry. 556 let size = self.table_size() as u64 * 16; 557 (self.table_offset() as u64, size) 558 } 559 560 pub fn pba_range(&self) -> (u64, u64) { 561 // The table takes 1 bit per entry modulo 8 bytes. 562 let size = ((self.table_size() as u64 / 64) + 1) * 8; 563 (self.pba_offset() as u64, size) 564 } 565 } 566