1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 6 use crate::{PciCapability, PciCapabilityId}; 7 use anyhow::anyhow; 8 use byteorder::{ByteOrder, LittleEndian}; 9 use std::io; 10 use std::result; 11 use std::sync::Arc; 12 use versionize::{VersionMap, Versionize, VersionizeResult}; 13 use versionize_derive::Versionize; 14 use vm_device::interrupt::{ 15 InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, 16 }; 17 use vm_memory::ByteValued; 18 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, VersionMapped}; 19 20 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 21 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 22 const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 23 const BITS_PER_PBA_ENTRY: usize = 64; 24 const FUNCTION_MASK_BIT: u8 = 14; 25 const MSIX_ENABLE_BIT: u8 = 15; 26 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16; 27 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16; 28 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16; 29 30 #[derive(Debug)] 31 enum Error { 32 /// Failed enabling the interrupt route. 33 EnableInterruptRoute(io::Error), 34 /// Failed updating the interrupt route. 35 UpdateInterruptRoute(io::Error), 36 } 37 38 #[derive(Debug, Clone, Versionize)] 39 pub struct MsixTableEntry { 40 pub msg_addr_lo: u32, 41 pub msg_addr_hi: u32, 42 pub msg_data: u32, 43 pub vector_ctl: u32, 44 } 45 46 impl MsixTableEntry { 47 pub fn masked(&self) -> bool { 48 self.vector_ctl & 0x1 == 0x1 49 } 50 } 51 52 impl Default for MsixTableEntry { 53 fn default() -> Self { 54 MsixTableEntry { 55 msg_addr_lo: 0, 56 msg_addr_hi: 0, 57 msg_data: 0, 58 vector_ctl: 0x1, 59 } 60 } 61 } 62 63 #[derive(Versionize)] 64 struct MsixConfigState { 65 table_entries: Vec<MsixTableEntry>, 66 pba_entries: Vec<u64>, 67 masked: bool, 68 enabled: bool, 69 } 70 71 impl VersionMapped for MsixConfigState {} 72 73 pub struct MsixConfig { 74 pub table_entries: Vec<MsixTableEntry>, 75 pub pba_entries: Vec<u64>, 76 pub devid: u32, 77 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 78 masked: bool, 79 enabled: bool, 80 } 81 82 impl MsixConfig { 83 pub fn new( 84 msix_vectors: u16, 85 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 86 devid: u32, 87 ) -> Self { 88 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 89 90 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 91 table_entries.resize_with(msix_vectors as usize, Default::default); 92 let mut pba_entries: Vec<u64> = Vec::new(); 93 let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1; 94 pba_entries.resize_with(num_pba_entries, Default::default); 95 96 MsixConfig { 97 table_entries, 98 pba_entries, 99 devid, 100 interrupt_source_group, 101 masked: true, 102 enabled: false, 103 } 104 } 105 106 fn state(&self) -> MsixConfigState { 107 MsixConfigState { 108 table_entries: self.table_entries.clone(), 109 pba_entries: self.pba_entries.clone(), 110 masked: self.masked, 111 enabled: self.enabled, 112 } 113 } 114 115 fn set_state(&mut self, state: &MsixConfigState) -> result::Result<(), Error> { 116 self.table_entries = state.table_entries.clone(); 117 self.pba_entries = state.pba_entries.clone(); 118 self.masked = state.masked; 119 self.enabled = state.enabled; 120 121 if self.enabled && !self.masked { 122 for (idx, table_entry) in self.table_entries.iter().enumerate() { 123 if table_entry.masked() { 124 continue; 125 } 126 127 let config = MsiIrqSourceConfig { 128 high_addr: table_entry.msg_addr_hi, 129 low_addr: table_entry.msg_addr_lo, 130 data: table_entry.msg_data, 131 devid: self.devid, 132 }; 133 134 self.interrupt_source_group 135 .update(idx as InterruptIndex, InterruptSourceConfig::MsiIrq(config)) 136 .map_err(Error::UpdateInterruptRoute)?; 137 138 self.interrupt_source_group 139 .enable() 140 .map_err(Error::EnableInterruptRoute)?; 141 } 142 } 143 144 Ok(()) 145 } 146 147 pub fn masked(&self) -> bool { 148 self.masked 149 } 150 151 pub fn enabled(&self) -> bool { 152 self.enabled 153 } 154 155 pub fn set_msg_ctl(&mut self, reg: u16) { 156 let old_masked = self.masked; 157 let old_enabled = self.enabled; 158 159 self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16; 160 self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16; 161 162 // Update interrupt routing 163 if old_masked != self.masked || old_enabled != self.enabled { 164 if self.enabled && !self.masked { 165 debug!("MSI-X enabled for device 0x{:x}", self.devid); 166 for (idx, table_entry) in self.table_entries.iter().enumerate() { 167 let config = MsiIrqSourceConfig { 168 high_addr: table_entry.msg_addr_hi, 169 low_addr: table_entry.msg_addr_lo, 170 data: table_entry.msg_data, 171 devid: self.devid, 172 }; 173 174 if let Err(e) = self 175 .interrupt_source_group 176 .update(idx as InterruptIndex, InterruptSourceConfig::MsiIrq(config)) 177 { 178 error!("Failed updating vector: {:?}", e); 179 } 180 181 if table_entry.masked() { 182 if let Err(e) = self.interrupt_source_group.mask(idx as InterruptIndex) { 183 error!("Failed masking vector: {:?}", e); 184 } 185 } else if let Err(e) = self.interrupt_source_group.unmask(idx as InterruptIndex) 186 { 187 error!("Failed unmasking vector: {:?}", e); 188 } 189 } 190 } else if old_enabled || !old_masked { 191 debug!("MSI-X disabled for device 0x{:x}", self.devid); 192 if let Err(e) = self.interrupt_source_group.disable() { 193 error!("Failed disabling irq_fd: {:?}", e); 194 } 195 } 196 } 197 198 // If the Function Mask bit was set, and has just been cleared, it's 199 // important to go through the entire PBA to check if there was any 200 // pending MSI-X message to inject, given that the vector is not 201 // masked. 202 if old_masked && !self.masked { 203 for (index, entry) in self.table_entries.clone().iter().enumerate() { 204 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 205 self.inject_msix_and_clear_pba(index); 206 } 207 } 208 } 209 } 210 211 pub fn read_table(&self, offset: u64, data: &mut [u8]) { 212 assert!((data.len() == 4 || data.len() == 8)); 213 214 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 215 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 216 217 match data.len() { 218 4 => { 219 let value = match modulo_offset { 220 0x0 => self.table_entries[index].msg_addr_lo, 221 0x4 => self.table_entries[index].msg_addr_hi, 222 0x8 => self.table_entries[index].msg_data, 223 0xc => self.table_entries[index].vector_ctl, 224 _ => { 225 error!("invalid offset"); 226 0 227 } 228 }; 229 230 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 231 LittleEndian::write_u32(data, value); 232 } 233 8 => { 234 let value = match modulo_offset { 235 0x0 => { 236 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 237 | u64::from(self.table_entries[index].msg_addr_lo) 238 } 239 0x8 => { 240 (u64::from(self.table_entries[index].vector_ctl) << 32) 241 | u64::from(self.table_entries[index].msg_data) 242 } 243 _ => { 244 error!("invalid offset"); 245 0 246 } 247 }; 248 249 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 250 LittleEndian::write_u64(data, value); 251 } 252 _ => { 253 error!("invalid data length"); 254 } 255 } 256 } 257 258 pub fn write_table(&mut self, offset: u64, data: &[u8]) { 259 assert!((data.len() == 4 || data.len() == 8)); 260 261 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 262 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 263 264 // Store the value of the entry before modification 265 let mut old_entry: Option<MsixTableEntry> = None; 266 267 match data.len() { 268 4 => { 269 let value = LittleEndian::read_u32(data); 270 match modulo_offset { 271 0x0 => self.table_entries[index].msg_addr_lo = value, 272 0x4 => self.table_entries[index].msg_addr_hi = value, 273 0x8 => self.table_entries[index].msg_data = value, 274 0xc => { 275 old_entry = Some(self.table_entries[index].clone()); 276 self.table_entries[index].vector_ctl = value; 277 } 278 _ => error!("invalid offset"), 279 }; 280 281 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 282 } 283 8 => { 284 let value = LittleEndian::read_u64(data); 285 match modulo_offset { 286 0x0 => { 287 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 288 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 289 } 290 0x8 => { 291 old_entry = Some(self.table_entries[index].clone()); 292 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 293 self.table_entries[index].vector_ctl = (value >> 32) as u32; 294 } 295 _ => error!("invalid offset"), 296 }; 297 298 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 299 } 300 _ => error!("invalid data length"), 301 }; 302 303 // Update interrupt routes 304 if self.enabled && !self.masked { 305 let table_entry = &self.table_entries[index]; 306 307 let config = MsiIrqSourceConfig { 308 high_addr: table_entry.msg_addr_hi, 309 low_addr: table_entry.msg_addr_lo, 310 data: table_entry.msg_data, 311 devid: self.devid, 312 }; 313 314 if let Err(e) = self.interrupt_source_group.update( 315 index as InterruptIndex, 316 InterruptSourceConfig::MsiIrq(config), 317 ) { 318 error!("Failed updating vector: {:?}", e); 319 } 320 321 if table_entry.masked() { 322 if let Err(e) = self.interrupt_source_group.mask(index as InterruptIndex) { 323 error!("Failed masking vector: {:?}", e); 324 } 325 } else if let Err(e) = self.interrupt_source_group.unmask(index as InterruptIndex) { 326 error!("Failed unmasking vector: {:?}", e); 327 } 328 } 329 330 // After the MSI-X table entry has been updated, it is necessary to 331 // check if the vector control masking bit has changed. In case the 332 // bit has been flipped from 1 to 0, we need to inject a MSI message 333 // if the corresponding pending bit from the PBA is set. Once the MSI 334 // has been injected, the pending bit in the PBA needs to be cleared. 335 // All of this is valid only if MSI-X has not been masked for the whole 336 // device. 337 if let Some(old_entry) = old_entry { 338 // Check if bit has been flipped 339 if !self.masked() 340 && old_entry.masked() 341 && !self.table_entries[index].masked() 342 && self.get_pba_bit(index as u16) == 1 343 { 344 self.inject_msix_and_clear_pba(index); 345 } 346 } 347 } 348 349 pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { 350 assert!((data.len() == 4 || data.len() == 8)); 351 352 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 353 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 354 355 match data.len() { 356 4 => { 357 let value: u32 = match modulo_offset { 358 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 359 0x4 => (self.pba_entries[index] >> 32) as u32, 360 _ => { 361 error!("invalid offset"); 362 0 363 } 364 }; 365 366 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 367 LittleEndian::write_u32(data, value); 368 } 369 8 => { 370 let value: u64 = match modulo_offset { 371 0x0 => self.pba_entries[index], 372 _ => { 373 error!("invalid offset"); 374 0 375 } 376 }; 377 378 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 379 LittleEndian::write_u64(data, value); 380 } 381 _ => { 382 error!("invalid data length"); 383 } 384 } 385 } 386 387 pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { 388 error!("Pending Bit Array is read only"); 389 } 390 391 pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { 392 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 393 394 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 395 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 396 let mut mask: u64 = (1 << shift) as u64; 397 398 if reset { 399 mask = !mask; 400 self.pba_entries[index] &= mask; 401 } else { 402 self.pba_entries[index] |= mask; 403 } 404 } 405 406 fn get_pba_bit(&self, vector: u16) -> u8 { 407 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 408 409 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 410 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 411 412 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 413 } 414 415 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 416 // Inject the MSI message 417 match self 418 .interrupt_source_group 419 .trigger(vector as InterruptIndex) 420 { 421 Ok(_) => debug!("MSI-X injected on vector control flip"), 422 Err(e) => error!("failed to inject MSI-X: {}", e), 423 } 424 425 // Clear the bit from PBA 426 self.set_pba_bit(vector as u16, true); 427 } 428 } 429 430 impl Pausable for MsixConfig {} 431 432 impl Snapshottable for MsixConfig { 433 fn id(&self) -> String { 434 String::from("msix_config") 435 } 436 437 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 438 Snapshot::new_from_versioned_state(&self.id(), &self.state()) 439 } 440 441 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 442 self.set_state(&snapshot.to_versioned_state(&self.id())?) 443 .map_err(|e| { 444 MigratableError::Restore(anyhow!( 445 "Could not restore state for {}: {:?}", 446 self.id(), 447 e 448 )) 449 }) 450 } 451 } 452 453 #[allow(dead_code)] 454 #[repr(packed)] 455 #[derive(Clone, Copy, Default)] 456 pub struct MsixCap { 457 // Message Control Register 458 // 10-0: MSI-X Table size 459 // 13-11: Reserved 460 // 14: Mask. Mask all MSI-X when set. 461 // 15: Enable. Enable all MSI-X when set. 462 pub msg_ctl: u16, 463 // Table. Contains the offset and the BAR indicator (BIR) 464 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 465 // 31-3: Table offset in the BAR pointed by the BIR. 466 pub table: u32, 467 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 468 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 469 // 31-3: PBA offset in the BAR pointed by the BIR. 470 pub pba: u32, 471 } 472 473 // SAFETY: All members are simple numbers and any value is valid. 474 unsafe impl ByteValued for MsixCap {} 475 476 impl PciCapability for MsixCap { 477 fn bytes(&self) -> &[u8] { 478 self.as_slice() 479 } 480 481 fn id(&self) -> PciCapabilityId { 482 PciCapabilityId::MsiX 483 } 484 } 485 486 impl MsixCap { 487 pub fn new( 488 table_pci_bar: u8, 489 table_size: u16, 490 table_off: u32, 491 pba_pci_bar: u8, 492 pba_off: u32, 493 ) -> Self { 494 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 495 496 // Set the table size and enable MSI-X. 497 let msg_ctl: u16 = 0x8000u16 + table_size - 1; 498 499 MsixCap { 500 msg_ctl, 501 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 502 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 503 } 504 } 505 506 pub fn set_msg_ctl(&mut self, data: u16) { 507 self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)) 508 | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)); 509 } 510 511 pub fn masked(&self) -> bool { 512 (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1 513 } 514 515 pub fn enabled(&self) -> bool { 516 (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1 517 } 518 519 pub fn table_offset(&self) -> u32 { 520 self.table & 0xffff_fff8 521 } 522 523 pub fn pba_offset(&self) -> u32 { 524 self.pba & 0xffff_fff8 525 } 526 527 pub fn table_bir(&self) -> u32 { 528 self.table & 0x7 529 } 530 531 pub fn pba_bir(&self) -> u32 { 532 self.pba & 0x7 533 } 534 535 pub fn table_size(&self) -> u16 { 536 (self.msg_ctl & 0x7ff) + 1 537 } 538 539 pub fn table_range(&self) -> (u64, u64) { 540 // The table takes 16 bytes per entry. 541 let size = self.table_size() as u64 * 16; 542 (self.table_offset() as u64, size) 543 } 544 545 pub fn pba_range(&self) -> (u64, u64) { 546 // The table takes 1 bit per entry modulo 8 bytes. 547 let size = ((self.table_size() as u64 / 64) + 1) * 8; 548 (self.pba_offset() as u64, size) 549 } 550 } 551