xref: /cloud-hypervisor/pci/src/msix.rs (revision 7d7bfb2034001d4cb15df2ddc56d2d350c8da30f)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 
6 use crate::{PciCapability, PciCapabilityId};
7 use anyhow::anyhow;
8 use byteorder::{ByteOrder, LittleEndian};
9 use std::io;
10 use std::result;
11 use std::sync::Arc;
12 use versionize::{VersionMap, Versionize, VersionizeResult};
13 use versionize_derive::Versionize;
14 use vm_device::interrupt::{
15     InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig,
16 };
17 use vm_memory::ByteValued;
18 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, VersionMapped};
19 
20 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
21 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
22 const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
23 const BITS_PER_PBA_ENTRY: usize = 64;
24 const FUNCTION_MASK_BIT: u8 = 14;
25 const MSIX_ENABLE_BIT: u8 = 15;
26 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16;
27 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16;
28 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16;
29 
30 #[derive(Debug)]
31 enum Error {
32     /// Failed enabling the interrupt route.
33     EnableInterruptRoute(io::Error),
34     /// Failed updating the interrupt route.
35     UpdateInterruptRoute(io::Error),
36 }
37 
38 #[derive(Debug, Clone, Versionize)]
39 pub struct MsixTableEntry {
40     pub msg_addr_lo: u32,
41     pub msg_addr_hi: u32,
42     pub msg_data: u32,
43     pub vector_ctl: u32,
44 }
45 
46 impl MsixTableEntry {
47     pub fn masked(&self) -> bool {
48         self.vector_ctl & 0x1 == 0x1
49     }
50 }
51 
52 impl Default for MsixTableEntry {
53     fn default() -> Self {
54         MsixTableEntry {
55             msg_addr_lo: 0,
56             msg_addr_hi: 0,
57             msg_data: 0,
58             vector_ctl: 0x1,
59         }
60     }
61 }
62 
63 #[derive(Versionize)]
64 struct MsixConfigState {
65     table_entries: Vec<MsixTableEntry>,
66     pba_entries: Vec<u64>,
67     masked: bool,
68     enabled: bool,
69 }
70 
71 impl VersionMapped for MsixConfigState {}
72 
73 pub struct MsixConfig {
74     pub table_entries: Vec<MsixTableEntry>,
75     pub pba_entries: Vec<u64>,
76     pub devid: u32,
77     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
78     masked: bool,
79     enabled: bool,
80 }
81 
82 impl MsixConfig {
83     pub fn new(
84         msix_vectors: u16,
85         interrupt_source_group: Arc<dyn InterruptSourceGroup>,
86         devid: u32,
87     ) -> Self {
88         assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
89 
90         let mut table_entries: Vec<MsixTableEntry> = Vec::new();
91         table_entries.resize_with(msix_vectors as usize, Default::default);
92         let mut pba_entries: Vec<u64> = Vec::new();
93         let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1;
94         pba_entries.resize_with(num_pba_entries, Default::default);
95 
96         MsixConfig {
97             table_entries,
98             pba_entries,
99             devid,
100             interrupt_source_group,
101             masked: true,
102             enabled: false,
103         }
104     }
105 
106     fn state(&self) -> MsixConfigState {
107         MsixConfigState {
108             table_entries: self.table_entries.clone(),
109             pba_entries: self.pba_entries.clone(),
110             masked: self.masked,
111             enabled: self.enabled,
112         }
113     }
114 
115     fn set_state(&mut self, state: &MsixConfigState) -> result::Result<(), Error> {
116         self.table_entries = state.table_entries.clone();
117         self.pba_entries = state.pba_entries.clone();
118         self.masked = state.masked;
119         self.enabled = state.enabled;
120 
121         if self.enabled && !self.masked {
122             for (idx, table_entry) in self.table_entries.iter().enumerate() {
123                 if table_entry.masked() {
124                     continue;
125                 }
126 
127                 let config = MsiIrqSourceConfig {
128                     high_addr: table_entry.msg_addr_hi,
129                     low_addr: table_entry.msg_addr_lo,
130                     data: table_entry.msg_data,
131                     devid: self.devid,
132                 };
133 
134                 self.interrupt_source_group
135                     .update(idx as InterruptIndex, InterruptSourceConfig::MsiIrq(config))
136                     .map_err(Error::UpdateInterruptRoute)?;
137 
138                 self.interrupt_source_group
139                     .enable()
140                     .map_err(Error::EnableInterruptRoute)?;
141             }
142         }
143 
144         Ok(())
145     }
146 
147     pub fn masked(&self) -> bool {
148         self.masked
149     }
150 
151     pub fn enabled(&self) -> bool {
152         self.enabled
153     }
154 
155     pub fn set_msg_ctl(&mut self, reg: u16) {
156         let old_masked = self.masked;
157         let old_enabled = self.enabled;
158 
159         self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16;
160         self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16;
161 
162         // Update interrupt routing
163         if old_masked != self.masked || old_enabled != self.enabled {
164             if self.enabled && !self.masked {
165                 debug!("MSI-X enabled for device 0x{:x}", self.devid);
166                 for (idx, table_entry) in self.table_entries.iter().enumerate() {
167                     let config = MsiIrqSourceConfig {
168                         high_addr: table_entry.msg_addr_hi,
169                         low_addr: table_entry.msg_addr_lo,
170                         data: table_entry.msg_data,
171                         devid: self.devid,
172                     };
173 
174                     if let Err(e) = self
175                         .interrupt_source_group
176                         .update(idx as InterruptIndex, InterruptSourceConfig::MsiIrq(config))
177                     {
178                         error!("Failed updating vector: {:?}", e);
179                     }
180 
181                     if table_entry.masked() {
182                         if let Err(e) = self.interrupt_source_group.mask(idx as InterruptIndex) {
183                             error!("Failed masking vector: {:?}", e);
184                         }
185                     } else if let Err(e) = self.interrupt_source_group.unmask(idx as InterruptIndex)
186                     {
187                         error!("Failed unmasking vector: {:?}", e);
188                     }
189                 }
190             } else if old_enabled || !old_masked {
191                 debug!("MSI-X disabled for device 0x{:x}", self.devid);
192                 if let Err(e) = self.interrupt_source_group.disable() {
193                     error!("Failed disabling irq_fd: {:?}", e);
194                 }
195             }
196         }
197 
198         // If the Function Mask bit was set, and has just been cleared, it's
199         // important to go through the entire PBA to check if there was any
200         // pending MSI-X message to inject, given that the vector is not
201         // masked.
202         if old_masked && !self.masked {
203             for (index, entry) in self.table_entries.clone().iter().enumerate() {
204                 if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
205                     self.inject_msix_and_clear_pba(index);
206                 }
207             }
208         }
209     }
210 
211     pub fn read_table(&self, offset: u64, data: &mut [u8]) {
212         assert!((data.len() == 4 || data.len() == 8));
213 
214         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
215         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
216 
217         match data.len() {
218             4 => {
219                 let value = match modulo_offset {
220                     0x0 => self.table_entries[index].msg_addr_lo,
221                     0x4 => self.table_entries[index].msg_addr_hi,
222                     0x8 => self.table_entries[index].msg_data,
223                     0xc => self.table_entries[index].vector_ctl,
224                     _ => {
225                         error!("invalid offset");
226                         0
227                     }
228                 };
229 
230                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
231                 LittleEndian::write_u32(data, value);
232             }
233             8 => {
234                 let value = match modulo_offset {
235                     0x0 => {
236                         (u64::from(self.table_entries[index].msg_addr_hi) << 32)
237                             | u64::from(self.table_entries[index].msg_addr_lo)
238                     }
239                     0x8 => {
240                         (u64::from(self.table_entries[index].vector_ctl) << 32)
241                             | u64::from(self.table_entries[index].msg_data)
242                     }
243                     _ => {
244                         error!("invalid offset");
245                         0
246                     }
247                 };
248 
249                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
250                 LittleEndian::write_u64(data, value);
251             }
252             _ => {
253                 error!("invalid data length");
254             }
255         }
256     }
257 
258     pub fn write_table(&mut self, offset: u64, data: &[u8]) {
259         assert!((data.len() == 4 || data.len() == 8));
260 
261         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
262         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
263 
264         // Store the value of the entry before modification
265         let mut old_entry: Option<MsixTableEntry> = None;
266 
267         match data.len() {
268             4 => {
269                 let value = LittleEndian::read_u32(data);
270                 match modulo_offset {
271                     0x0 => self.table_entries[index].msg_addr_lo = value,
272                     0x4 => self.table_entries[index].msg_addr_hi = value,
273                     0x8 => self.table_entries[index].msg_data = value,
274                     0xc => {
275                         old_entry = Some(self.table_entries[index].clone());
276                         self.table_entries[index].vector_ctl = value;
277                     }
278                     _ => error!("invalid offset"),
279                 };
280 
281                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
282             }
283             8 => {
284                 let value = LittleEndian::read_u64(data);
285                 match modulo_offset {
286                     0x0 => {
287                         self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
288                         self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
289                     }
290                     0x8 => {
291                         old_entry = Some(self.table_entries[index].clone());
292                         self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
293                         self.table_entries[index].vector_ctl = (value >> 32) as u32;
294                     }
295                     _ => error!("invalid offset"),
296                 };
297 
298                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
299             }
300             _ => error!("invalid data length"),
301         };
302 
303         // Update interrupt routes
304         if self.enabled && !self.masked {
305             let table_entry = &self.table_entries[index];
306 
307             let config = MsiIrqSourceConfig {
308                 high_addr: table_entry.msg_addr_hi,
309                 low_addr: table_entry.msg_addr_lo,
310                 data: table_entry.msg_data,
311                 devid: self.devid,
312             };
313 
314             if let Err(e) = self.interrupt_source_group.update(
315                 index as InterruptIndex,
316                 InterruptSourceConfig::MsiIrq(config),
317             ) {
318                 error!("Failed updating vector: {:?}", e);
319             }
320 
321             if table_entry.masked() {
322                 if let Err(e) = self.interrupt_source_group.mask(index as InterruptIndex) {
323                     error!("Failed masking vector: {:?}", e);
324                 }
325             } else if let Err(e) = self.interrupt_source_group.unmask(index as InterruptIndex) {
326                 error!("Failed unmasking vector: {:?}", e);
327             }
328         }
329 
330         // After the MSI-X table entry has been updated, it is necessary to
331         // check if the vector control masking bit has changed. In case the
332         // bit has been flipped from 1 to 0, we need to inject a MSI message
333         // if the corresponding pending bit from the PBA is set. Once the MSI
334         // has been injected, the pending bit in the PBA needs to be cleared.
335         // All of this is valid only if MSI-X has not been masked for the whole
336         // device.
337         if let Some(old_entry) = old_entry {
338             // Check if bit has been flipped
339             if !self.masked()
340                 && old_entry.masked()
341                 && !self.table_entries[index].masked()
342                 && self.get_pba_bit(index as u16) == 1
343             {
344                 self.inject_msix_and_clear_pba(index);
345             }
346         }
347     }
348 
349     pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) {
350         assert!((data.len() == 4 || data.len() == 8));
351 
352         let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
353         let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
354 
355         match data.len() {
356             4 => {
357                 let value: u32 = match modulo_offset {
358                     0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
359                     0x4 => (self.pba_entries[index] >> 32) as u32,
360                     _ => {
361                         error!("invalid offset");
362                         0
363                     }
364                 };
365 
366                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
367                 LittleEndian::write_u32(data, value);
368             }
369             8 => {
370                 let value: u64 = match modulo_offset {
371                     0x0 => self.pba_entries[index],
372                     _ => {
373                         error!("invalid offset");
374                         0
375                     }
376                 };
377 
378                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
379                 LittleEndian::write_u64(data, value);
380             }
381             _ => {
382                 error!("invalid data length");
383             }
384         }
385     }
386 
387     pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) {
388         error!("Pending Bit Array is read only");
389     }
390 
391     pub fn set_pba_bit(&mut self, vector: u16, reset: bool) {
392         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
393 
394         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
395         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
396         let mut mask: u64 = (1 << shift) as u64;
397 
398         if reset {
399             mask = !mask;
400             self.pba_entries[index] &= mask;
401         } else {
402             self.pba_entries[index] |= mask;
403         }
404     }
405 
406     fn get_pba_bit(&self, vector: u16) -> u8 {
407         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
408 
409         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
410         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
411 
412         ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
413     }
414 
415     fn inject_msix_and_clear_pba(&mut self, vector: usize) {
416         // Inject the MSI message
417         match self
418             .interrupt_source_group
419             .trigger(vector as InterruptIndex)
420         {
421             Ok(_) => debug!("MSI-X injected on vector control flip"),
422             Err(e) => error!("failed to inject MSI-X: {}", e),
423         }
424 
425         // Clear the bit from PBA
426         self.set_pba_bit(vector as u16, true);
427     }
428 }
429 
430 impl Pausable for MsixConfig {}
431 
432 impl Snapshottable for MsixConfig {
433     fn id(&self) -> String {
434         String::from("msix_config")
435     }
436 
437     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
438         Snapshot::new_from_versioned_state(&self.id(), &self.state())
439     }
440 
441     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
442         self.set_state(&snapshot.to_versioned_state(&self.id())?)
443             .map_err(|e| {
444                 MigratableError::Restore(anyhow!(
445                     "Could not restore state for {}: {:?}",
446                     self.id(),
447                     e
448                 ))
449             })
450     }
451 }
452 
453 #[allow(dead_code)]
454 #[repr(packed)]
455 #[derive(Clone, Copy, Default)]
456 pub struct MsixCap {
457     // Message Control Register
458     //   10-0:  MSI-X Table size
459     //   13-11: Reserved
460     //   14:    Mask. Mask all MSI-X when set.
461     //   15:    Enable. Enable all MSI-X when set.
462     pub msg_ctl: u16,
463     // Table. Contains the offset and the BAR indicator (BIR)
464     //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
465     //   31-3: Table offset in the BAR pointed by the BIR.
466     pub table: u32,
467     // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
468     //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
469     //   31-3: PBA offset in the BAR pointed by the BIR.
470     pub pba: u32,
471 }
472 
473 // SAFETY: All members are simple numbers and any value is valid.
474 unsafe impl ByteValued for MsixCap {}
475 
476 impl PciCapability for MsixCap {
477     fn bytes(&self) -> &[u8] {
478         self.as_slice()
479     }
480 
481     fn id(&self) -> PciCapabilityId {
482         PciCapabilityId::MsiX
483     }
484 }
485 
486 impl MsixCap {
487     pub fn new(
488         table_pci_bar: u8,
489         table_size: u16,
490         table_off: u32,
491         pba_pci_bar: u8,
492         pba_off: u32,
493     ) -> Self {
494         assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
495 
496         // Set the table size and enable MSI-X.
497         let msg_ctl: u16 = 0x8000u16 + table_size - 1;
498 
499         MsixCap {
500             msg_ctl,
501             table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
502             pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
503         }
504     }
505 
506     pub fn set_msg_ctl(&mut self, data: u16) {
507         self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK))
508             | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK));
509     }
510 
511     pub fn masked(&self) -> bool {
512         (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1
513     }
514 
515     pub fn enabled(&self) -> bool {
516         (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1
517     }
518 
519     pub fn table_offset(&self) -> u32 {
520         self.table & 0xffff_fff8
521     }
522 
523     pub fn pba_offset(&self) -> u32 {
524         self.pba & 0xffff_fff8
525     }
526 
527     pub fn table_bir(&self) -> u32 {
528         self.table & 0x7
529     }
530 
531     pub fn pba_bir(&self) -> u32 {
532         self.pba & 0x7
533     }
534 
535     pub fn table_size(&self) -> u16 {
536         (self.msg_ctl & 0x7ff) + 1
537     }
538 
539     pub fn table_range(&self) -> (u64, u64) {
540         // The table takes 16 bytes per entry.
541         let size = self.table_size() as u64 * 16;
542         (self.table_offset() as u64, size)
543     }
544 
545     pub fn pba_range(&self) -> (u64, u64) {
546         // The table takes 1 bit per entry modulo 8 bytes.
547         let size = ((self.table_size() as u64 / 64) + 1) * 8;
548         (self.pba_offset() as u64, size)
549     }
550 }
551