xref: /cloud-hypervisor/pci/src/msix.rs (revision 2b05753716936506ed440863fe6a29dfc7a427e2)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 
6 use std::sync::Arc;
7 use std::{io, result};
8 
9 use byteorder::{ByteOrder, LittleEndian};
10 use serde::{Deserialize, Serialize};
11 use thiserror::Error;
12 use vm_device::interrupt::{
13     InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig,
14 };
15 use vm_memory::ByteValued;
16 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable};
17 
18 use crate::{PciCapability, PciCapabilityId};
19 
20 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
21 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
22 const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
23 const BITS_PER_PBA_ENTRY: usize = 64;
24 const FUNCTION_MASK_BIT: u8 = 14;
25 const MSIX_ENABLE_BIT: u8 = 15;
26 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16;
27 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16;
28 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16;
29 pub const MSIX_CONFIG_ID: &str = "msix_config";
30 
31 #[derive(Error, Debug)]
32 pub enum Error {
33     /// Failed enabling the interrupt route.
34     #[error("Failed enabling the interrupt route")]
35     EnableInterruptRoute(#[source] io::Error),
36     /// Failed updating the interrupt route.
37     #[error("Failed updating the interrupt route")]
38     UpdateInterruptRoute(#[source] io::Error),
39 }
40 
41 #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
42 pub struct MsixTableEntry {
43     pub msg_addr_lo: u32,
44     pub msg_addr_hi: u32,
45     pub msg_data: u32,
46     pub vector_ctl: u32,
47 }
48 
49 impl MsixTableEntry {
50     pub fn masked(&self) -> bool {
51         self.vector_ctl & 0x1 == 0x1
52     }
53 }
54 
55 impl Default for MsixTableEntry {
56     fn default() -> Self {
57         MsixTableEntry {
58             msg_addr_lo: 0,
59             msg_addr_hi: 0,
60             msg_data: 0,
61             vector_ctl: 0x1,
62         }
63     }
64 }
65 
66 #[derive(Serialize, Deserialize)]
67 pub struct MsixConfigState {
68     table_entries: Vec<MsixTableEntry>,
69     pba_entries: Vec<u64>,
70     masked: bool,
71     enabled: bool,
72 }
73 
74 pub struct MsixConfig {
75     pub table_entries: Vec<MsixTableEntry>,
76     pub pba_entries: Vec<u64>,
77     pub devid: u32,
78     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
79     masked: bool,
80     enabled: bool,
81 }
82 
83 impl MsixConfig {
84     pub fn new(
85         msix_vectors: u16,
86         interrupt_source_group: Arc<dyn InterruptSourceGroup>,
87         devid: u32,
88         state: Option<MsixConfigState>,
89     ) -> result::Result<Self, Error> {
90         assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
91 
92         let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state {
93             if state.enabled && !state.masked {
94                 for (idx, table_entry) in state.table_entries.iter().enumerate() {
95                     if table_entry.masked() {
96                         continue;
97                     }
98 
99                     let config = MsiIrqSourceConfig {
100                         high_addr: table_entry.msg_addr_hi,
101                         low_addr: table_entry.msg_addr_lo,
102                         data: table_entry.msg_data,
103                         devid,
104                     };
105 
106                     interrupt_source_group
107                         .update(
108                             idx as InterruptIndex,
109                             InterruptSourceConfig::MsiIrq(config),
110                             state.masked,
111                             true,
112                         )
113                         .map_err(Error::UpdateInterruptRoute)?;
114 
115                     interrupt_source_group
116                         .enable()
117                         .map_err(Error::EnableInterruptRoute)?;
118                 }
119             }
120 
121             (
122                 state.table_entries,
123                 state.pba_entries,
124                 state.masked,
125                 state.enabled,
126             )
127         } else {
128             let mut table_entries: Vec<MsixTableEntry> = Vec::new();
129             table_entries.resize_with(msix_vectors as usize, Default::default);
130             let mut pba_entries: Vec<u64> = Vec::new();
131             let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1;
132             pba_entries.resize_with(num_pba_entries, Default::default);
133 
134             (table_entries, pba_entries, true, false)
135         };
136 
137         Ok(MsixConfig {
138             table_entries,
139             pba_entries,
140             devid,
141             interrupt_source_group,
142             masked,
143             enabled,
144         })
145     }
146 
147     fn state(&self) -> MsixConfigState {
148         MsixConfigState {
149             table_entries: self.table_entries.clone(),
150             pba_entries: self.pba_entries.clone(),
151             masked: self.masked,
152             enabled: self.enabled,
153         }
154     }
155 
156     pub fn masked(&self) -> bool {
157         self.masked
158     }
159 
160     pub fn enabled(&self) -> bool {
161         self.enabled
162     }
163 
164     pub fn set_msg_ctl(&mut self, reg: u16) {
165         let old_masked = self.masked;
166         let old_enabled = self.enabled;
167 
168         self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16;
169         self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16;
170 
171         // Update interrupt routing
172         if old_masked != self.masked || old_enabled != self.enabled {
173             if self.enabled && !self.masked {
174                 debug!("MSI-X enabled for device 0x{:x}", self.devid);
175                 for (idx, table_entry) in self.table_entries.iter().enumerate() {
176                     let config = MsiIrqSourceConfig {
177                         high_addr: table_entry.msg_addr_hi,
178                         low_addr: table_entry.msg_addr_lo,
179                         data: table_entry.msg_data,
180                         devid: self.devid,
181                     };
182 
183                     if let Err(e) = self.interrupt_source_group.update(
184                         idx as InterruptIndex,
185                         InterruptSourceConfig::MsiIrq(config),
186                         table_entry.masked(),
187                         true,
188                     ) {
189                         error!("Failed updating vector: {:?}", e);
190                     }
191                 }
192             } else if old_enabled || !old_masked {
193                 debug!("MSI-X disabled for device 0x{:x}", self.devid);
194                 if let Err(e) = self.interrupt_source_group.disable() {
195                     error!("Failed disabling irq_fd: {:?}", e);
196                 }
197             }
198         }
199 
200         // If the Function Mask bit was set, and has just been cleared, it's
201         // important to go through the entire PBA to check if there was any
202         // pending MSI-X message to inject, given that the vector is not
203         // masked.
204         if old_masked && !self.masked {
205             for (index, entry) in self.table_entries.clone().iter().enumerate() {
206                 if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
207                     self.inject_msix_and_clear_pba(index);
208                 }
209             }
210         }
211     }
212 
213     pub fn read_table(&self, offset: u64, data: &mut [u8]) {
214         assert!((data.len() == 4 || data.len() == 8));
215 
216         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
217         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
218 
219         if index >= self.table_entries.len() {
220             debug!("Invalid MSI-X table entry index {index}");
221             data.copy_from_slice(&[0xff; 8][..data.len()]);
222             return;
223         }
224 
225         match data.len() {
226             4 => {
227                 let value = match modulo_offset {
228                     0x0 => self.table_entries[index].msg_addr_lo,
229                     0x4 => self.table_entries[index].msg_addr_hi,
230                     0x8 => self.table_entries[index].msg_data,
231                     0xc => self.table_entries[index].vector_ctl,
232                     _ => {
233                         error!("invalid offset");
234                         0
235                     }
236                 };
237 
238                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
239                 LittleEndian::write_u32(data, value);
240             }
241             8 => {
242                 let value = match modulo_offset {
243                     0x0 => {
244                         (u64::from(self.table_entries[index].msg_addr_hi) << 32)
245                             | u64::from(self.table_entries[index].msg_addr_lo)
246                     }
247                     0x8 => {
248                         (u64::from(self.table_entries[index].vector_ctl) << 32)
249                             | u64::from(self.table_entries[index].msg_data)
250                     }
251                     _ => {
252                         error!("invalid offset");
253                         0
254                     }
255                 };
256 
257                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
258                 LittleEndian::write_u64(data, value);
259             }
260             _ => {
261                 error!("invalid data length");
262             }
263         }
264     }
265 
266     pub fn write_table(&mut self, offset: u64, data: &[u8]) {
267         assert!((data.len() == 4 || data.len() == 8));
268 
269         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
270         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
271 
272         if index >= self.table_entries.len() {
273             debug!("Invalid MSI-X table entry index {index}");
274             return;
275         }
276 
277         // Store the value of the entry before modification
278         let old_entry = self.table_entries[index].clone();
279 
280         match data.len() {
281             4 => {
282                 let value = LittleEndian::read_u32(data);
283                 match modulo_offset {
284                     0x0 => self.table_entries[index].msg_addr_lo = value,
285                     0x4 => self.table_entries[index].msg_addr_hi = value,
286                     0x8 => self.table_entries[index].msg_data = value,
287                     0xc => {
288                         self.table_entries[index].vector_ctl = value;
289                     }
290                     _ => error!("invalid offset"),
291                 };
292 
293                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
294             }
295             8 => {
296                 let value = LittleEndian::read_u64(data);
297                 match modulo_offset {
298                     0x0 => {
299                         self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
300                         self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
301                     }
302                     0x8 => {
303                         self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
304                         self.table_entries[index].vector_ctl = (value >> 32) as u32;
305                     }
306                     _ => error!("invalid offset"),
307                 };
308 
309                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
310             }
311             _ => error!("invalid data length"),
312         };
313 
314         let table_entry = &self.table_entries[index];
315 
316         // Optimisation to avoid excessive updates
317         if &old_entry == table_entry {
318             return;
319         }
320 
321         // Update interrupt routes
322         // Optimisation: only update routes if the entry is not masked;
323         // this is safe because if the entry is masked (starts masked as per spec)
324         // in the table then it won't be triggered. (See: #4273)
325         if self.enabled && !self.masked && !table_entry.masked() {
326             let config = MsiIrqSourceConfig {
327                 high_addr: table_entry.msg_addr_hi,
328                 low_addr: table_entry.msg_addr_lo,
329                 data: table_entry.msg_data,
330                 devid: self.devid,
331             };
332 
333             if let Err(e) = self.interrupt_source_group.update(
334                 index as InterruptIndex,
335                 InterruptSourceConfig::MsiIrq(config),
336                 table_entry.masked(),
337                 true,
338             ) {
339                 error!("Failed updating vector: {:?}", e);
340             }
341         }
342 
343         // After the MSI-X table entry has been updated, it is necessary to
344         // check if the vector control masking bit has changed. In case the
345         // bit has been flipped from 1 to 0, we need to inject a MSI message
346         // if the corresponding pending bit from the PBA is set. Once the MSI
347         // has been injected, the pending bit in the PBA needs to be cleared.
348         // All of this is valid only if MSI-X has not been masked for the whole
349         // device.
350 
351         // Check if bit has been flipped
352         if !self.masked()
353             && self.enabled()
354             && old_entry.masked()
355             && !table_entry.masked()
356             && self.get_pba_bit(index as u16) == 1
357         {
358             self.inject_msix_and_clear_pba(index);
359         }
360     }
361 
362     pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) {
363         assert!((data.len() == 4 || data.len() == 8));
364 
365         let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
366         let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
367 
368         if index >= self.pba_entries.len() {
369             debug!("Invalid MSI-X PBA entry index {index}");
370             data.copy_from_slice(&[0xff; 8][..data.len()]);
371             return;
372         }
373 
374         match data.len() {
375             4 => {
376                 let value: u32 = match modulo_offset {
377                     0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
378                     0x4 => (self.pba_entries[index] >> 32) as u32,
379                     _ => {
380                         error!("invalid offset");
381                         0
382                     }
383                 };
384 
385                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
386                 LittleEndian::write_u32(data, value);
387             }
388             8 => {
389                 let value: u64 = match modulo_offset {
390                     0x0 => self.pba_entries[index],
391                     _ => {
392                         error!("invalid offset");
393                         0
394                     }
395                 };
396 
397                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
398                 LittleEndian::write_u64(data, value);
399             }
400             _ => {
401                 error!("invalid data length");
402             }
403         }
404     }
405 
406     pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) {
407         error!("Pending Bit Array is read only");
408     }
409 
410     pub fn set_pba_bit(&mut self, vector: u16, reset: bool) {
411         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
412 
413         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
414         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
415         let mut mask: u64 = (1 << shift) as u64;
416 
417         if reset {
418             mask = !mask;
419             self.pba_entries[index] &= mask;
420         } else {
421             self.pba_entries[index] |= mask;
422         }
423     }
424 
425     fn get_pba_bit(&self, vector: u16) -> u8 {
426         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
427 
428         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
429         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
430 
431         ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
432     }
433 
434     fn inject_msix_and_clear_pba(&mut self, vector: usize) {
435         // Inject the MSI message
436         match self
437             .interrupt_source_group
438             .trigger(vector as InterruptIndex)
439         {
440             Ok(_) => debug!("MSI-X injected on vector control flip"),
441             Err(e) => error!("failed to inject MSI-X: {}", e),
442         }
443 
444         // Clear the bit from PBA
445         self.set_pba_bit(vector as u16, true);
446     }
447 }
448 
449 impl Pausable for MsixConfig {}
450 
451 impl Snapshottable for MsixConfig {
452     fn id(&self) -> String {
453         String::from(MSIX_CONFIG_ID)
454     }
455 
456     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
457         Snapshot::new_from_state(&self.state())
458     }
459 }
460 
461 #[allow(dead_code)]
462 #[repr(C, packed)]
463 #[derive(Clone, Copy, Default, Serialize, Deserialize)]
464 pub struct MsixCap {
465     // Message Control Register
466     //   10-0:  MSI-X Table size
467     //   13-11: Reserved
468     //   14:    Mask. Mask all MSI-X when set.
469     //   15:    Enable. Enable all MSI-X when set.
470     pub msg_ctl: u16,
471     // Table. Contains the offset and the BAR indicator (BIR)
472     //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
473     //   31-3: Table offset in the BAR pointed by the BIR.
474     pub table: u32,
475     // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
476     //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
477     //   31-3: PBA offset in the BAR pointed by the BIR.
478     pub pba: u32,
479 }
480 
481 // SAFETY: All members are simple numbers and any value is valid.
482 unsafe impl ByteValued for MsixCap {}
483 
484 impl PciCapability for MsixCap {
485     fn bytes(&self) -> &[u8] {
486         self.as_slice()
487     }
488 
489     fn id(&self) -> PciCapabilityId {
490         PciCapabilityId::MsiX
491     }
492 }
493 
494 impl MsixCap {
495     pub fn new(
496         table_pci_bar: u8,
497         table_size: u16,
498         table_off: u32,
499         pba_pci_bar: u8,
500         pba_off: u32,
501     ) -> Self {
502         assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
503 
504         // Set the table size and enable MSI-X.
505         let msg_ctl: u16 = 0x8000u16 + table_size - 1;
506 
507         MsixCap {
508             msg_ctl,
509             table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
510             pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
511         }
512     }
513 
514     pub fn set_msg_ctl(&mut self, data: u16) {
515         self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK))
516             | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK));
517     }
518 
519     pub fn masked(&self) -> bool {
520         (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1
521     }
522 
523     pub fn enabled(&self) -> bool {
524         (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1
525     }
526 
527     pub fn table_offset(&self) -> u32 {
528         self.table & 0xffff_fff8
529     }
530 
531     pub fn pba_offset(&self) -> u32 {
532         self.pba & 0xffff_fff8
533     }
534 
535     pub fn table_set_offset(&mut self, addr: u32) {
536         self.table &= 0x7;
537         self.table += addr;
538     }
539 
540     pub fn pba_set_offset(&mut self, addr: u32) {
541         self.pba &= 0x7;
542         self.pba += addr;
543     }
544 
545     pub fn table_bir(&self) -> u32 {
546         self.table & 0x7
547     }
548 
549     pub fn pba_bir(&self) -> u32 {
550         self.pba & 0x7
551     }
552 
553     pub fn table_size(&self) -> u16 {
554         (self.msg_ctl & 0x7ff) + 1
555     }
556 
557     pub fn table_range(&self) -> (u64, u64) {
558         // The table takes 16 bytes per entry.
559         let size = self.table_size() as u64 * 16;
560         (self.table_offset() as u64, size)
561     }
562 
563     pub fn pba_range(&self) -> (u64, u64) {
564         // The table takes 1 bit per entry modulo 8 bytes.
565         let size = ((self.table_size() as u64 / 64) + 1) * 8;
566         (self.pba_offset() as u64, size)
567     }
568 }
569