xref: /cloud-hypervisor/pci/src/msix.rs (revision 61e57e1cb149de03ae1e0b799b9e5ba9a4a63ace)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 
6 use std::sync::Arc;
7 use std::{io, result};
8 
9 use byteorder::{ByteOrder, LittleEndian};
10 use serde::{Deserialize, Serialize};
11 use vm_device::interrupt::{
12     InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig,
13 };
14 use vm_memory::ByteValued;
15 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable};
16 
17 use crate::{PciCapability, PciCapabilityId};
18 
19 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
20 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
21 const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
22 const BITS_PER_PBA_ENTRY: usize = 64;
23 const FUNCTION_MASK_BIT: u8 = 14;
24 const MSIX_ENABLE_BIT: u8 = 15;
25 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16;
26 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16;
27 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16;
28 pub const MSIX_CONFIG_ID: &str = "msix_config";
29 
30 #[derive(Debug)]
31 pub enum Error {
32     /// Failed enabling the interrupt route.
33     EnableInterruptRoute(io::Error),
34     /// Failed updating the interrupt route.
35     UpdateInterruptRoute(io::Error),
36 }
37 
38 #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
39 pub struct MsixTableEntry {
40     pub msg_addr_lo: u32,
41     pub msg_addr_hi: u32,
42     pub msg_data: u32,
43     pub vector_ctl: u32,
44 }
45 
46 impl MsixTableEntry {
47     pub fn masked(&self) -> bool {
48         self.vector_ctl & 0x1 == 0x1
49     }
50 }
51 
52 impl Default for MsixTableEntry {
53     fn default() -> Self {
54         MsixTableEntry {
55             msg_addr_lo: 0,
56             msg_addr_hi: 0,
57             msg_data: 0,
58             vector_ctl: 0x1,
59         }
60     }
61 }
62 
63 #[derive(Serialize, Deserialize)]
64 pub struct MsixConfigState {
65     table_entries: Vec<MsixTableEntry>,
66     pba_entries: Vec<u64>,
67     masked: bool,
68     enabled: bool,
69 }
70 
71 pub struct MsixConfig {
72     pub table_entries: Vec<MsixTableEntry>,
73     pub pba_entries: Vec<u64>,
74     pub devid: u32,
75     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
76     masked: bool,
77     enabled: bool,
78 }
79 
80 impl MsixConfig {
81     pub fn new(
82         msix_vectors: u16,
83         interrupt_source_group: Arc<dyn InterruptSourceGroup>,
84         devid: u32,
85         state: Option<MsixConfigState>,
86     ) -> result::Result<Self, Error> {
87         assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
88 
89         let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state {
90             if state.enabled && !state.masked {
91                 for (idx, table_entry) in state.table_entries.iter().enumerate() {
92                     if table_entry.masked() {
93                         continue;
94                     }
95 
96                     let config = MsiIrqSourceConfig {
97                         high_addr: table_entry.msg_addr_hi,
98                         low_addr: table_entry.msg_addr_lo,
99                         data: table_entry.msg_data,
100                         devid,
101                     };
102 
103                     interrupt_source_group
104                         .update(
105                             idx as InterruptIndex,
106                             InterruptSourceConfig::MsiIrq(config),
107                             state.masked,
108                             true,
109                         )
110                         .map_err(Error::UpdateInterruptRoute)?;
111 
112                     interrupt_source_group
113                         .enable()
114                         .map_err(Error::EnableInterruptRoute)?;
115                 }
116             }
117 
118             (
119                 state.table_entries,
120                 state.pba_entries,
121                 state.masked,
122                 state.enabled,
123             )
124         } else {
125             let mut table_entries: Vec<MsixTableEntry> = Vec::new();
126             table_entries.resize_with(msix_vectors as usize, Default::default);
127             let mut pba_entries: Vec<u64> = Vec::new();
128             let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1;
129             pba_entries.resize_with(num_pba_entries, Default::default);
130 
131             (table_entries, pba_entries, true, false)
132         };
133 
134         Ok(MsixConfig {
135             table_entries,
136             pba_entries,
137             devid,
138             interrupt_source_group,
139             masked,
140             enabled,
141         })
142     }
143 
144     fn state(&self) -> MsixConfigState {
145         MsixConfigState {
146             table_entries: self.table_entries.clone(),
147             pba_entries: self.pba_entries.clone(),
148             masked: self.masked,
149             enabled: self.enabled,
150         }
151     }
152 
153     pub fn masked(&self) -> bool {
154         self.masked
155     }
156 
157     pub fn enabled(&self) -> bool {
158         self.enabled
159     }
160 
161     pub fn set_msg_ctl(&mut self, reg: u16) {
162         let old_masked = self.masked;
163         let old_enabled = self.enabled;
164 
165         self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16;
166         self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16;
167 
168         // Update interrupt routing
169         if old_masked != self.masked || old_enabled != self.enabled {
170             if self.enabled && !self.masked {
171                 debug!("MSI-X enabled for device 0x{:x}", self.devid);
172                 for (idx, table_entry) in self.table_entries.iter().enumerate() {
173                     let config = MsiIrqSourceConfig {
174                         high_addr: table_entry.msg_addr_hi,
175                         low_addr: table_entry.msg_addr_lo,
176                         data: table_entry.msg_data,
177                         devid: self.devid,
178                     };
179 
180                     if let Err(e) = self.interrupt_source_group.update(
181                         idx as InterruptIndex,
182                         InterruptSourceConfig::MsiIrq(config),
183                         table_entry.masked(),
184                         true,
185                     ) {
186                         error!("Failed updating vector: {:?}", e);
187                     }
188                 }
189             } else if old_enabled || !old_masked {
190                 debug!("MSI-X disabled for device 0x{:x}", self.devid);
191                 if let Err(e) = self.interrupt_source_group.disable() {
192                     error!("Failed disabling irq_fd: {:?}", e);
193                 }
194             }
195         }
196 
197         // If the Function Mask bit was set, and has just been cleared, it's
198         // important to go through the entire PBA to check if there was any
199         // pending MSI-X message to inject, given that the vector is not
200         // masked.
201         if old_masked && !self.masked {
202             for (index, entry) in self.table_entries.clone().iter().enumerate() {
203                 if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
204                     self.inject_msix_and_clear_pba(index);
205                 }
206             }
207         }
208     }
209 
210     pub fn read_table(&self, offset: u64, data: &mut [u8]) {
211         assert!((data.len() == 4 || data.len() == 8));
212 
213         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
214         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
215 
216         if index >= self.table_entries.len() {
217             debug!("Invalid MSI-X table entry index {index}");
218             data.copy_from_slice(&[0xff; 8][..data.len()]);
219             return;
220         }
221 
222         match data.len() {
223             4 => {
224                 let value = match modulo_offset {
225                     0x0 => self.table_entries[index].msg_addr_lo,
226                     0x4 => self.table_entries[index].msg_addr_hi,
227                     0x8 => self.table_entries[index].msg_data,
228                     0xc => self.table_entries[index].vector_ctl,
229                     _ => {
230                         error!("invalid offset");
231                         0
232                     }
233                 };
234 
235                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
236                 LittleEndian::write_u32(data, value);
237             }
238             8 => {
239                 let value = match modulo_offset {
240                     0x0 => {
241                         (u64::from(self.table_entries[index].msg_addr_hi) << 32)
242                             | u64::from(self.table_entries[index].msg_addr_lo)
243                     }
244                     0x8 => {
245                         (u64::from(self.table_entries[index].vector_ctl) << 32)
246                             | u64::from(self.table_entries[index].msg_data)
247                     }
248                     _ => {
249                         error!("invalid offset");
250                         0
251                     }
252                 };
253 
254                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
255                 LittleEndian::write_u64(data, value);
256             }
257             _ => {
258                 error!("invalid data length");
259             }
260         }
261     }
262 
263     pub fn write_table(&mut self, offset: u64, data: &[u8]) {
264         assert!((data.len() == 4 || data.len() == 8));
265 
266         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
267         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
268 
269         if index >= self.table_entries.len() {
270             debug!("Invalid MSI-X table entry index {index}");
271             return;
272         }
273 
274         // Store the value of the entry before modification
275         let old_entry = self.table_entries[index].clone();
276 
277         match data.len() {
278             4 => {
279                 let value = LittleEndian::read_u32(data);
280                 match modulo_offset {
281                     0x0 => self.table_entries[index].msg_addr_lo = value,
282                     0x4 => self.table_entries[index].msg_addr_hi = value,
283                     0x8 => self.table_entries[index].msg_data = value,
284                     0xc => {
285                         self.table_entries[index].vector_ctl = value;
286                     }
287                     _ => error!("invalid offset"),
288                 };
289 
290                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
291             }
292             8 => {
293                 let value = LittleEndian::read_u64(data);
294                 match modulo_offset {
295                     0x0 => {
296                         self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
297                         self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
298                     }
299                     0x8 => {
300                         self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
301                         self.table_entries[index].vector_ctl = (value >> 32) as u32;
302                     }
303                     _ => error!("invalid offset"),
304                 };
305 
306                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
307             }
308             _ => error!("invalid data length"),
309         };
310 
311         let table_entry = &self.table_entries[index];
312 
313         // Optimisation to avoid excessive updates
314         if &old_entry == table_entry {
315             return;
316         }
317 
318         // Update interrupt routes
319         // Optimisation: only update routes if the entry is not masked;
320         // this is safe because if the entry is masked (starts masked as per spec)
321         // in the table then it won't be triggered. (See: #4273)
322         if self.enabled && !self.masked && !table_entry.masked() {
323             let config = MsiIrqSourceConfig {
324                 high_addr: table_entry.msg_addr_hi,
325                 low_addr: table_entry.msg_addr_lo,
326                 data: table_entry.msg_data,
327                 devid: self.devid,
328             };
329 
330             if let Err(e) = self.interrupt_source_group.update(
331                 index as InterruptIndex,
332                 InterruptSourceConfig::MsiIrq(config),
333                 table_entry.masked(),
334                 true,
335             ) {
336                 error!("Failed updating vector: {:?}", e);
337             }
338         }
339 
340         // After the MSI-X table entry has been updated, it is necessary to
341         // check if the vector control masking bit has changed. In case the
342         // bit has been flipped from 1 to 0, we need to inject a MSI message
343         // if the corresponding pending bit from the PBA is set. Once the MSI
344         // has been injected, the pending bit in the PBA needs to be cleared.
345         // All of this is valid only if MSI-X has not been masked for the whole
346         // device.
347 
348         // Check if bit has been flipped
349         if !self.masked()
350             && self.enabled()
351             && old_entry.masked()
352             && !table_entry.masked()
353             && self.get_pba_bit(index as u16) == 1
354         {
355             self.inject_msix_and_clear_pba(index);
356         }
357     }
358 
359     pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) {
360         assert!((data.len() == 4 || data.len() == 8));
361 
362         let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
363         let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
364 
365         if index >= self.pba_entries.len() {
366             debug!("Invalid MSI-X PBA entry index {index}");
367             data.copy_from_slice(&[0xff; 8][..data.len()]);
368             return;
369         }
370 
371         match data.len() {
372             4 => {
373                 let value: u32 = match modulo_offset {
374                     0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
375                     0x4 => (self.pba_entries[index] >> 32) as u32,
376                     _ => {
377                         error!("invalid offset");
378                         0
379                     }
380                 };
381 
382                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
383                 LittleEndian::write_u32(data, value);
384             }
385             8 => {
386                 let value: u64 = match modulo_offset {
387                     0x0 => self.pba_entries[index],
388                     _ => {
389                         error!("invalid offset");
390                         0
391                     }
392                 };
393 
394                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
395                 LittleEndian::write_u64(data, value);
396             }
397             _ => {
398                 error!("invalid data length");
399             }
400         }
401     }
402 
403     pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) {
404         error!("Pending Bit Array is read only");
405     }
406 
407     pub fn set_pba_bit(&mut self, vector: u16, reset: bool) {
408         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
409 
410         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
411         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
412         let mut mask: u64 = (1 << shift) as u64;
413 
414         if reset {
415             mask = !mask;
416             self.pba_entries[index] &= mask;
417         } else {
418             self.pba_entries[index] |= mask;
419         }
420     }
421 
422     fn get_pba_bit(&self, vector: u16) -> u8 {
423         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
424 
425         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
426         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
427 
428         ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
429     }
430 
431     fn inject_msix_and_clear_pba(&mut self, vector: usize) {
432         // Inject the MSI message
433         match self
434             .interrupt_source_group
435             .trigger(vector as InterruptIndex)
436         {
437             Ok(_) => debug!("MSI-X injected on vector control flip"),
438             Err(e) => error!("failed to inject MSI-X: {}", e),
439         }
440 
441         // Clear the bit from PBA
442         self.set_pba_bit(vector as u16, true);
443     }
444 }
445 
446 impl Pausable for MsixConfig {}
447 
448 impl Snapshottable for MsixConfig {
449     fn id(&self) -> String {
450         String::from(MSIX_CONFIG_ID)
451     }
452 
453     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
454         Snapshot::new_from_state(&self.state())
455     }
456 }
457 
458 #[allow(dead_code)]
459 #[repr(packed)]
460 #[derive(Clone, Copy, Default, Serialize, Deserialize)]
461 pub struct MsixCap {
462     // Message Control Register
463     //   10-0:  MSI-X Table size
464     //   13-11: Reserved
465     //   14:    Mask. Mask all MSI-X when set.
466     //   15:    Enable. Enable all MSI-X when set.
467     pub msg_ctl: u16,
468     // Table. Contains the offset and the BAR indicator (BIR)
469     //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
470     //   31-3: Table offset in the BAR pointed by the BIR.
471     pub table: u32,
472     // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
473     //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
474     //   31-3: PBA offset in the BAR pointed by the BIR.
475     pub pba: u32,
476 }
477 
478 // SAFETY: All members are simple numbers and any value is valid.
479 unsafe impl ByteValued for MsixCap {}
480 
481 impl PciCapability for MsixCap {
482     fn bytes(&self) -> &[u8] {
483         self.as_slice()
484     }
485 
486     fn id(&self) -> PciCapabilityId {
487         PciCapabilityId::MsiX
488     }
489 }
490 
491 impl MsixCap {
492     pub fn new(
493         table_pci_bar: u8,
494         table_size: u16,
495         table_off: u32,
496         pba_pci_bar: u8,
497         pba_off: u32,
498     ) -> Self {
499         assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
500 
501         // Set the table size and enable MSI-X.
502         let msg_ctl: u16 = 0x8000u16 + table_size - 1;
503 
504         MsixCap {
505             msg_ctl,
506             table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
507             pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
508         }
509     }
510 
511     pub fn set_msg_ctl(&mut self, data: u16) {
512         self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK))
513             | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK));
514     }
515 
516     pub fn masked(&self) -> bool {
517         (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1
518     }
519 
520     pub fn enabled(&self) -> bool {
521         (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1
522     }
523 
524     pub fn table_offset(&self) -> u32 {
525         self.table & 0xffff_fff8
526     }
527 
528     pub fn pba_offset(&self) -> u32 {
529         self.pba & 0xffff_fff8
530     }
531 
532     pub fn table_set_offset(&mut self, addr: u32) {
533         self.table &= 0x7;
534         self.table += addr;
535     }
536 
537     pub fn pba_set_offset(&mut self, addr: u32) {
538         self.pba &= 0x7;
539         self.pba += addr;
540     }
541 
542     pub fn table_bir(&self) -> u32 {
543         self.table & 0x7
544     }
545 
546     pub fn pba_bir(&self) -> u32 {
547         self.pba & 0x7
548     }
549 
550     pub fn table_size(&self) -> u16 {
551         (self.msg_ctl & 0x7ff) + 1
552     }
553 
554     pub fn table_range(&self) -> (u64, u64) {
555         // The table takes 16 bytes per entry.
556         let size = self.table_size() as u64 * 16;
557         (self.table_offset() as u64, size)
558     }
559 
560     pub fn pba_range(&self) -> (u64, u64) {
561         // The table takes 1 bit per entry modulo 8 bytes.
562         let size = ((self.table_size() as u64 / 64) + 1) * 8;
563         (self.pba_offset() as u64, size)
564     }
565 }
566