1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 6 use crate::{PciCapability, PciCapabilityId}; 7 use byteorder::{ByteOrder, LittleEndian}; 8 use std::io; 9 use std::result; 10 use std::sync::Arc; 11 use versionize::{VersionMap, Versionize, VersionizeResult}; 12 use versionize_derive::Versionize; 13 use vm_device::interrupt::{ 14 InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, 15 }; 16 use vm_memory::ByteValued; 17 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, VersionMapped}; 18 19 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; 20 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; 21 const MSIX_PBA_ENTRIES_MODULO: u64 = 8; 22 const BITS_PER_PBA_ENTRY: usize = 64; 23 const FUNCTION_MASK_BIT: u8 = 14; 24 const MSIX_ENABLE_BIT: u8 = 15; 25 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16; 26 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16; 27 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16; 28 pub const MSIX_CONFIG_ID: &str = "msix_config"; 29 30 #[derive(Debug)] 31 pub enum Error { 32 /// Failed enabling the interrupt route. 33 EnableInterruptRoute(io::Error), 34 /// Failed updating the interrupt route. 35 UpdateInterruptRoute(io::Error), 36 } 37 38 #[derive(Debug, Clone, Versionize, Eq, PartialEq)] 39 pub struct MsixTableEntry { 40 pub msg_addr_lo: u32, 41 pub msg_addr_hi: u32, 42 pub msg_data: u32, 43 pub vector_ctl: u32, 44 } 45 46 impl MsixTableEntry { 47 pub fn masked(&self) -> bool { 48 self.vector_ctl & 0x1 == 0x1 49 } 50 } 51 52 impl Default for MsixTableEntry { 53 fn default() -> Self { 54 MsixTableEntry { 55 msg_addr_lo: 0, 56 msg_addr_hi: 0, 57 msg_data: 0, 58 vector_ctl: 0x1, 59 } 60 } 61 } 62 63 #[derive(Versionize)] 64 pub struct MsixConfigState { 65 table_entries: Vec<MsixTableEntry>, 66 pba_entries: Vec<u64>, 67 masked: bool, 68 enabled: bool, 69 } 70 71 impl VersionMapped for MsixConfigState {} 72 73 pub struct MsixConfig { 74 pub table_entries: Vec<MsixTableEntry>, 75 pub pba_entries: Vec<u64>, 76 pub devid: u32, 77 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 78 masked: bool, 79 enabled: bool, 80 } 81 82 impl MsixConfig { 83 pub fn new( 84 msix_vectors: u16, 85 interrupt_source_group: Arc<dyn InterruptSourceGroup>, 86 devid: u32, 87 state: Option<MsixConfigState>, 88 ) -> result::Result<Self, Error> { 89 assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); 90 91 let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state { 92 if state.enabled && !state.masked { 93 for (idx, table_entry) in state.table_entries.iter().enumerate() { 94 if table_entry.masked() { 95 continue; 96 } 97 98 let config = MsiIrqSourceConfig { 99 high_addr: table_entry.msg_addr_hi, 100 low_addr: table_entry.msg_addr_lo, 101 data: table_entry.msg_data, 102 devid, 103 }; 104 105 interrupt_source_group 106 .update( 107 idx as InterruptIndex, 108 InterruptSourceConfig::MsiIrq(config), 109 state.masked, 110 ) 111 .map_err(Error::UpdateInterruptRoute)?; 112 113 interrupt_source_group 114 .enable() 115 .map_err(Error::EnableInterruptRoute)?; 116 } 117 } 118 119 ( 120 state.table_entries, 121 state.pba_entries, 122 state.masked, 123 state.enabled, 124 ) 125 } else { 126 let mut table_entries: Vec<MsixTableEntry> = Vec::new(); 127 table_entries.resize_with(msix_vectors as usize, Default::default); 128 let mut pba_entries: Vec<u64> = Vec::new(); 129 let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1; 130 pba_entries.resize_with(num_pba_entries, Default::default); 131 132 (table_entries, pba_entries, true, false) 133 }; 134 135 Ok(MsixConfig { 136 table_entries, 137 pba_entries, 138 devid, 139 interrupt_source_group, 140 masked, 141 enabled, 142 }) 143 } 144 145 fn state(&self) -> MsixConfigState { 146 MsixConfigState { 147 table_entries: self.table_entries.clone(), 148 pba_entries: self.pba_entries.clone(), 149 masked: self.masked, 150 enabled: self.enabled, 151 } 152 } 153 154 pub fn masked(&self) -> bool { 155 self.masked 156 } 157 158 pub fn enabled(&self) -> bool { 159 self.enabled 160 } 161 162 pub fn set_msg_ctl(&mut self, reg: u16) { 163 let old_masked = self.masked; 164 let old_enabled = self.enabled; 165 166 self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16; 167 self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16; 168 169 // Update interrupt routing 170 if old_masked != self.masked || old_enabled != self.enabled { 171 if self.enabled && !self.masked { 172 debug!("MSI-X enabled for device 0x{:x}", self.devid); 173 for (idx, table_entry) in self.table_entries.iter().enumerate() { 174 let config = MsiIrqSourceConfig { 175 high_addr: table_entry.msg_addr_hi, 176 low_addr: table_entry.msg_addr_lo, 177 data: table_entry.msg_data, 178 devid: self.devid, 179 }; 180 181 if let Err(e) = self.interrupt_source_group.update( 182 idx as InterruptIndex, 183 InterruptSourceConfig::MsiIrq(config), 184 table_entry.masked(), 185 ) { 186 error!("Failed updating vector: {:?}", e); 187 } 188 } 189 } else if old_enabled || !old_masked { 190 debug!("MSI-X disabled for device 0x{:x}", self.devid); 191 if let Err(e) = self.interrupt_source_group.disable() { 192 error!("Failed disabling irq_fd: {:?}", e); 193 } 194 } 195 } 196 197 // If the Function Mask bit was set, and has just been cleared, it's 198 // important to go through the entire PBA to check if there was any 199 // pending MSI-X message to inject, given that the vector is not 200 // masked. 201 if old_masked && !self.masked { 202 for (index, entry) in self.table_entries.clone().iter().enumerate() { 203 if !entry.masked() && self.get_pba_bit(index as u16) == 1 { 204 self.inject_msix_and_clear_pba(index); 205 } 206 } 207 } 208 } 209 210 pub fn read_table(&self, offset: u64, data: &mut [u8]) { 211 assert!((data.len() == 4 || data.len() == 8)); 212 213 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 214 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 215 216 match data.len() { 217 4 => { 218 let value = match modulo_offset { 219 0x0 => self.table_entries[index].msg_addr_lo, 220 0x4 => self.table_entries[index].msg_addr_hi, 221 0x8 => self.table_entries[index].msg_data, 222 0xc => self.table_entries[index].vector_ctl, 223 _ => { 224 error!("invalid offset"); 225 0 226 } 227 }; 228 229 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 230 LittleEndian::write_u32(data, value); 231 } 232 8 => { 233 let value = match modulo_offset { 234 0x0 => { 235 (u64::from(self.table_entries[index].msg_addr_hi) << 32) 236 | u64::from(self.table_entries[index].msg_addr_lo) 237 } 238 0x8 => { 239 (u64::from(self.table_entries[index].vector_ctl) << 32) 240 | u64::from(self.table_entries[index].msg_data) 241 } 242 _ => { 243 error!("invalid offset"); 244 0 245 } 246 }; 247 248 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); 249 LittleEndian::write_u64(data, value); 250 } 251 _ => { 252 error!("invalid data length"); 253 } 254 } 255 } 256 257 pub fn write_table(&mut self, offset: u64, data: &[u8]) { 258 assert!((data.len() == 4 || data.len() == 8)); 259 260 let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; 261 let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; 262 263 // Store the value of the entry before modification 264 let old_entry = self.table_entries[index].clone(); 265 266 match data.len() { 267 4 => { 268 let value = LittleEndian::read_u32(data); 269 match modulo_offset { 270 0x0 => self.table_entries[index].msg_addr_lo = value, 271 0x4 => self.table_entries[index].msg_addr_hi = value, 272 0x8 => self.table_entries[index].msg_data = value, 273 0xc => { 274 self.table_entries[index].vector_ctl = value; 275 } 276 _ => error!("invalid offset"), 277 }; 278 279 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 280 } 281 8 => { 282 let value = LittleEndian::read_u64(data); 283 match modulo_offset { 284 0x0 => { 285 self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; 286 self.table_entries[index].msg_addr_hi = (value >> 32) as u32; 287 } 288 0x8 => { 289 self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; 290 self.table_entries[index].vector_ctl = (value >> 32) as u32; 291 } 292 _ => error!("invalid offset"), 293 }; 294 295 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); 296 } 297 _ => error!("invalid data length"), 298 }; 299 300 let table_entry = &self.table_entries[index]; 301 302 // Optimisation to avoid excessive updates 303 if &old_entry == table_entry { 304 return; 305 } 306 307 // Update interrupt routes 308 // Optimisation: only update routes if the entry is not masked; 309 // this is safe because if the entry is masked (starts masked as per spec) 310 // in the table then it won't be triggered. (See: #4273) 311 if self.enabled && !self.masked && !table_entry.masked() { 312 let config = MsiIrqSourceConfig { 313 high_addr: table_entry.msg_addr_hi, 314 low_addr: table_entry.msg_addr_lo, 315 data: table_entry.msg_data, 316 devid: self.devid, 317 }; 318 319 if let Err(e) = self.interrupt_source_group.update( 320 index as InterruptIndex, 321 InterruptSourceConfig::MsiIrq(config), 322 table_entry.masked(), 323 ) { 324 error!("Failed updating vector: {:?}", e); 325 } 326 } 327 328 // After the MSI-X table entry has been updated, it is necessary to 329 // check if the vector control masking bit has changed. In case the 330 // bit has been flipped from 1 to 0, we need to inject a MSI message 331 // if the corresponding pending bit from the PBA is set. Once the MSI 332 // has been injected, the pending bit in the PBA needs to be cleared. 333 // All of this is valid only if MSI-X has not been masked for the whole 334 // device. 335 336 // Check if bit has been flipped 337 if !self.masked() 338 && self.enabled() 339 && old_entry.masked() 340 && !table_entry.masked() 341 && self.get_pba_bit(index as u16) == 1 342 { 343 self.inject_msix_and_clear_pba(index); 344 } 345 } 346 347 pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { 348 assert!((data.len() == 4 || data.len() == 8)); 349 350 let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; 351 let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; 352 353 match data.len() { 354 4 => { 355 let value: u32 = match modulo_offset { 356 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 357 0x4 => (self.pba_entries[index] >> 32) as u32, 358 _ => { 359 error!("invalid offset"); 360 0 361 } 362 }; 363 364 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 365 LittleEndian::write_u32(data, value); 366 } 367 8 => { 368 let value: u64 = match modulo_offset { 369 0x0 => self.pba_entries[index], 370 _ => { 371 error!("invalid offset"); 372 0 373 } 374 }; 375 376 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); 377 LittleEndian::write_u64(data, value); 378 } 379 _ => { 380 error!("invalid data length"); 381 } 382 } 383 } 384 385 pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { 386 error!("Pending Bit Array is read only"); 387 } 388 389 pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { 390 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 391 392 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 393 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 394 let mut mask: u64 = (1 << shift) as u64; 395 396 if reset { 397 mask = !mask; 398 self.pba_entries[index] &= mask; 399 } else { 400 self.pba_entries[index] |= mask; 401 } 402 } 403 404 fn get_pba_bit(&self, vector: u16) -> u8 { 405 assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); 406 407 let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; 408 let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; 409 410 ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 411 } 412 413 fn inject_msix_and_clear_pba(&mut self, vector: usize) { 414 // Inject the MSI message 415 match self 416 .interrupt_source_group 417 .trigger(vector as InterruptIndex) 418 { 419 Ok(_) => debug!("MSI-X injected on vector control flip"), 420 Err(e) => error!("failed to inject MSI-X: {}", e), 421 } 422 423 // Clear the bit from PBA 424 self.set_pba_bit(vector as u16, true); 425 } 426 } 427 428 impl Pausable for MsixConfig {} 429 430 impl Snapshottable for MsixConfig { 431 fn id(&self) -> String { 432 String::from(MSIX_CONFIG_ID) 433 } 434 435 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 436 Snapshot::new_from_versioned_state(&self.state()) 437 } 438 } 439 440 #[allow(dead_code)] 441 #[repr(packed)] 442 #[derive(Clone, Copy, Default, Versionize)] 443 pub struct MsixCap { 444 // Message Control Register 445 // 10-0: MSI-X Table size 446 // 13-11: Reserved 447 // 14: Mask. Mask all MSI-X when set. 448 // 15: Enable. Enable all MSI-X when set. 449 pub msg_ctl: u16, 450 // Table. Contains the offset and the BAR indicator (BIR) 451 // 2-0: Table BAR indicator (BIR). Can be 0 to 5. 452 // 31-3: Table offset in the BAR pointed by the BIR. 453 pub table: u32, 454 // Pending Bit Array. Contains the offset and the BAR indicator (BIR) 455 // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. 456 // 31-3: PBA offset in the BAR pointed by the BIR. 457 pub pba: u32, 458 } 459 460 // SAFETY: All members are simple numbers and any value is valid. 461 unsafe impl ByteValued for MsixCap {} 462 463 impl PciCapability for MsixCap { 464 fn bytes(&self) -> &[u8] { 465 self.as_slice() 466 } 467 468 fn id(&self) -> PciCapabilityId { 469 PciCapabilityId::MsiX 470 } 471 } 472 473 impl MsixCap { 474 pub fn new( 475 table_pci_bar: u8, 476 table_size: u16, 477 table_off: u32, 478 pba_pci_bar: u8, 479 pba_off: u32, 480 ) -> Self { 481 assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); 482 483 // Set the table size and enable MSI-X. 484 let msg_ctl: u16 = 0x8000u16 + table_size - 1; 485 486 MsixCap { 487 msg_ctl, 488 table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), 489 pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), 490 } 491 } 492 493 pub fn set_msg_ctl(&mut self, data: u16) { 494 self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)) 495 | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)); 496 } 497 498 pub fn masked(&self) -> bool { 499 (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1 500 } 501 502 pub fn enabled(&self) -> bool { 503 (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1 504 } 505 506 pub fn table_offset(&self) -> u32 { 507 self.table & 0xffff_fff8 508 } 509 510 pub fn pba_offset(&self) -> u32 { 511 self.pba & 0xffff_fff8 512 } 513 514 pub fn table_set_offset(&mut self, addr: u32) { 515 self.table &= 0x7; 516 self.table += addr; 517 } 518 519 pub fn pba_set_offset(&mut self, addr: u32) { 520 self.pba &= 0x7; 521 self.pba += addr; 522 } 523 524 pub fn table_bir(&self) -> u32 { 525 self.table & 0x7 526 } 527 528 pub fn pba_bir(&self) -> u32 { 529 self.pba & 0x7 530 } 531 532 pub fn table_size(&self) -> u16 { 533 (self.msg_ctl & 0x7ff) + 1 534 } 535 536 pub fn table_range(&self) -> (u64, u64) { 537 // The table takes 16 bytes per entry. 538 let size = self.table_size() as u64 * 16; 539 (self.table_offset() as u64, size) 540 } 541 542 pub fn pba_range(&self) -> (u64, u64) { 543 // The table takes 1 bit per entry modulo 8 bytes. 544 let size = ((self.table_size() as u64 / 64) + 1) * 8; 545 (self.pba_offset() as u64, size) 546 } 547 } 548