xref: /cloud-hypervisor/pci/src/msix.rs (revision 88a9f799449c04180c6b9a21d3b9c0c4b57e2bd6)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 
6 use std::io;
7 use std::result;
8 use std::sync::Arc;
9 
10 use byteorder::{ByteOrder, LittleEndian};
11 use serde::Deserialize;
12 use serde::Serialize;
13 use vm_device::interrupt::{
14     InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig,
15 };
16 use vm_memory::ByteValued;
17 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable};
18 
19 use crate::{PciCapability, PciCapabilityId};
20 
21 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
22 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
23 const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
24 const BITS_PER_PBA_ENTRY: usize = 64;
25 const FUNCTION_MASK_BIT: u8 = 14;
26 const MSIX_ENABLE_BIT: u8 = 15;
27 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16;
28 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16;
29 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16;
30 pub const MSIX_CONFIG_ID: &str = "msix_config";
31 
32 #[derive(Debug)]
33 pub enum Error {
34     /// Failed enabling the interrupt route.
35     EnableInterruptRoute(io::Error),
36     /// Failed updating the interrupt route.
37     UpdateInterruptRoute(io::Error),
38 }
39 
40 #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
41 pub struct MsixTableEntry {
42     pub msg_addr_lo: u32,
43     pub msg_addr_hi: u32,
44     pub msg_data: u32,
45     pub vector_ctl: u32,
46 }
47 
48 impl MsixTableEntry {
49     pub fn masked(&self) -> bool {
50         self.vector_ctl & 0x1 == 0x1
51     }
52 }
53 
54 impl Default for MsixTableEntry {
55     fn default() -> Self {
56         MsixTableEntry {
57             msg_addr_lo: 0,
58             msg_addr_hi: 0,
59             msg_data: 0,
60             vector_ctl: 0x1,
61         }
62     }
63 }
64 
65 #[derive(Serialize, Deserialize)]
66 pub struct MsixConfigState {
67     table_entries: Vec<MsixTableEntry>,
68     pba_entries: Vec<u64>,
69     masked: bool,
70     enabled: bool,
71 }
72 
73 pub struct MsixConfig {
74     pub table_entries: Vec<MsixTableEntry>,
75     pub pba_entries: Vec<u64>,
76     pub devid: u32,
77     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
78     masked: bool,
79     enabled: bool,
80 }
81 
82 impl MsixConfig {
83     pub fn new(
84         msix_vectors: u16,
85         interrupt_source_group: Arc<dyn InterruptSourceGroup>,
86         devid: u32,
87         state: Option<MsixConfigState>,
88     ) -> result::Result<Self, Error> {
89         assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
90 
91         let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state {
92             if state.enabled && !state.masked {
93                 for (idx, table_entry) in state.table_entries.iter().enumerate() {
94                     if table_entry.masked() {
95                         continue;
96                     }
97 
98                     let config = MsiIrqSourceConfig {
99                         high_addr: table_entry.msg_addr_hi,
100                         low_addr: table_entry.msg_addr_lo,
101                         data: table_entry.msg_data,
102                         devid,
103                     };
104 
105                     interrupt_source_group
106                         .update(
107                             idx as InterruptIndex,
108                             InterruptSourceConfig::MsiIrq(config),
109                             state.masked,
110                             true,
111                         )
112                         .map_err(Error::UpdateInterruptRoute)?;
113 
114                     interrupt_source_group
115                         .enable()
116                         .map_err(Error::EnableInterruptRoute)?;
117                 }
118             }
119 
120             (
121                 state.table_entries,
122                 state.pba_entries,
123                 state.masked,
124                 state.enabled,
125             )
126         } else {
127             let mut table_entries: Vec<MsixTableEntry> = Vec::new();
128             table_entries.resize_with(msix_vectors as usize, Default::default);
129             let mut pba_entries: Vec<u64> = Vec::new();
130             let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1;
131             pba_entries.resize_with(num_pba_entries, Default::default);
132 
133             (table_entries, pba_entries, true, false)
134         };
135 
136         Ok(MsixConfig {
137             table_entries,
138             pba_entries,
139             devid,
140             interrupt_source_group,
141             masked,
142             enabled,
143         })
144     }
145 
146     fn state(&self) -> MsixConfigState {
147         MsixConfigState {
148             table_entries: self.table_entries.clone(),
149             pba_entries: self.pba_entries.clone(),
150             masked: self.masked,
151             enabled: self.enabled,
152         }
153     }
154 
155     pub fn masked(&self) -> bool {
156         self.masked
157     }
158 
159     pub fn enabled(&self) -> bool {
160         self.enabled
161     }
162 
163     pub fn set_msg_ctl(&mut self, reg: u16) {
164         let old_masked = self.masked;
165         let old_enabled = self.enabled;
166 
167         self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16;
168         self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16;
169 
170         // Update interrupt routing
171         if old_masked != self.masked || old_enabled != self.enabled {
172             if self.enabled && !self.masked {
173                 debug!("MSI-X enabled for device 0x{:x}", self.devid);
174                 for (idx, table_entry) in self.table_entries.iter().enumerate() {
175                     let config = MsiIrqSourceConfig {
176                         high_addr: table_entry.msg_addr_hi,
177                         low_addr: table_entry.msg_addr_lo,
178                         data: table_entry.msg_data,
179                         devid: self.devid,
180                     };
181 
182                     if let Err(e) = self.interrupt_source_group.update(
183                         idx as InterruptIndex,
184                         InterruptSourceConfig::MsiIrq(config),
185                         table_entry.masked(),
186                         true,
187                     ) {
188                         error!("Failed updating vector: {:?}", e);
189                     }
190                 }
191             } else if old_enabled || !old_masked {
192                 debug!("MSI-X disabled for device 0x{:x}", self.devid);
193                 if let Err(e) = self.interrupt_source_group.disable() {
194                     error!("Failed disabling irq_fd: {:?}", e);
195                 }
196             }
197         }
198 
199         // If the Function Mask bit was set, and has just been cleared, it's
200         // important to go through the entire PBA to check if there was any
201         // pending MSI-X message to inject, given that the vector is not
202         // masked.
203         if old_masked && !self.masked {
204             for (index, entry) in self.table_entries.clone().iter().enumerate() {
205                 if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
206                     self.inject_msix_and_clear_pba(index);
207                 }
208             }
209         }
210     }
211 
212     pub fn read_table(&self, offset: u64, data: &mut [u8]) {
213         assert!((data.len() == 4 || data.len() == 8));
214 
215         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
216         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
217 
218         if index >= self.table_entries.len() {
219             debug!("Invalid MSI-X table entry index {index}");
220             data.copy_from_slice(&[0xff; 8][..data.len()]);
221             return;
222         }
223 
224         match data.len() {
225             4 => {
226                 let value = match modulo_offset {
227                     0x0 => self.table_entries[index].msg_addr_lo,
228                     0x4 => self.table_entries[index].msg_addr_hi,
229                     0x8 => self.table_entries[index].msg_data,
230                     0xc => self.table_entries[index].vector_ctl,
231                     _ => {
232                         error!("invalid offset");
233                         0
234                     }
235                 };
236 
237                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
238                 LittleEndian::write_u32(data, value);
239             }
240             8 => {
241                 let value = match modulo_offset {
242                     0x0 => {
243                         (u64::from(self.table_entries[index].msg_addr_hi) << 32)
244                             | u64::from(self.table_entries[index].msg_addr_lo)
245                     }
246                     0x8 => {
247                         (u64::from(self.table_entries[index].vector_ctl) << 32)
248                             | u64::from(self.table_entries[index].msg_data)
249                     }
250                     _ => {
251                         error!("invalid offset");
252                         0
253                     }
254                 };
255 
256                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
257                 LittleEndian::write_u64(data, value);
258             }
259             _ => {
260                 error!("invalid data length");
261             }
262         }
263     }
264 
265     pub fn write_table(&mut self, offset: u64, data: &[u8]) {
266         assert!((data.len() == 4 || data.len() == 8));
267 
268         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
269         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
270 
271         if index >= self.table_entries.len() {
272             debug!("Invalid MSI-X table entry index {index}");
273             return;
274         }
275 
276         // Store the value of the entry before modification
277         let old_entry = self.table_entries[index].clone();
278 
279         match data.len() {
280             4 => {
281                 let value = LittleEndian::read_u32(data);
282                 match modulo_offset {
283                     0x0 => self.table_entries[index].msg_addr_lo = value,
284                     0x4 => self.table_entries[index].msg_addr_hi = value,
285                     0x8 => self.table_entries[index].msg_data = value,
286                     0xc => {
287                         self.table_entries[index].vector_ctl = value;
288                     }
289                     _ => error!("invalid offset"),
290                 };
291 
292                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
293             }
294             8 => {
295                 let value = LittleEndian::read_u64(data);
296                 match modulo_offset {
297                     0x0 => {
298                         self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
299                         self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
300                     }
301                     0x8 => {
302                         self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
303                         self.table_entries[index].vector_ctl = (value >> 32) as u32;
304                     }
305                     _ => error!("invalid offset"),
306                 };
307 
308                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
309             }
310             _ => error!("invalid data length"),
311         };
312 
313         let table_entry = &self.table_entries[index];
314 
315         // Optimisation to avoid excessive updates
316         if &old_entry == table_entry {
317             return;
318         }
319 
320         // Update interrupt routes
321         // Optimisation: only update routes if the entry is not masked;
322         // this is safe because if the entry is masked (starts masked as per spec)
323         // in the table then it won't be triggered. (See: #4273)
324         if self.enabled && !self.masked && !table_entry.masked() {
325             let config = MsiIrqSourceConfig {
326                 high_addr: table_entry.msg_addr_hi,
327                 low_addr: table_entry.msg_addr_lo,
328                 data: table_entry.msg_data,
329                 devid: self.devid,
330             };
331 
332             if let Err(e) = self.interrupt_source_group.update(
333                 index as InterruptIndex,
334                 InterruptSourceConfig::MsiIrq(config),
335                 table_entry.masked(),
336                 true,
337             ) {
338                 error!("Failed updating vector: {:?}", e);
339             }
340         }
341 
342         // After the MSI-X table entry has been updated, it is necessary to
343         // check if the vector control masking bit has changed. In case the
344         // bit has been flipped from 1 to 0, we need to inject a MSI message
345         // if the corresponding pending bit from the PBA is set. Once the MSI
346         // has been injected, the pending bit in the PBA needs to be cleared.
347         // All of this is valid only if MSI-X has not been masked for the whole
348         // device.
349 
350         // Check if bit has been flipped
351         if !self.masked()
352             && self.enabled()
353             && old_entry.masked()
354             && !table_entry.masked()
355             && self.get_pba_bit(index as u16) == 1
356         {
357             self.inject_msix_and_clear_pba(index);
358         }
359     }
360 
361     pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) {
362         assert!((data.len() == 4 || data.len() == 8));
363 
364         let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
365         let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
366 
367         if index >= self.pba_entries.len() {
368             debug!("Invalid MSI-X PBA entry index {index}");
369             data.copy_from_slice(&[0xff; 8][..data.len()]);
370             return;
371         }
372 
373         match data.len() {
374             4 => {
375                 let value: u32 = match modulo_offset {
376                     0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
377                     0x4 => (self.pba_entries[index] >> 32) as u32,
378                     _ => {
379                         error!("invalid offset");
380                         0
381                     }
382                 };
383 
384                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
385                 LittleEndian::write_u32(data, value);
386             }
387             8 => {
388                 let value: u64 = match modulo_offset {
389                     0x0 => self.pba_entries[index],
390                     _ => {
391                         error!("invalid offset");
392                         0
393                     }
394                 };
395 
396                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
397                 LittleEndian::write_u64(data, value);
398             }
399             _ => {
400                 error!("invalid data length");
401             }
402         }
403     }
404 
405     pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) {
406         error!("Pending Bit Array is read only");
407     }
408 
409     pub fn set_pba_bit(&mut self, vector: u16, reset: bool) {
410         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
411 
412         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
413         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
414         let mut mask: u64 = (1 << shift) as u64;
415 
416         if reset {
417             mask = !mask;
418             self.pba_entries[index] &= mask;
419         } else {
420             self.pba_entries[index] |= mask;
421         }
422     }
423 
424     fn get_pba_bit(&self, vector: u16) -> u8 {
425         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
426 
427         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
428         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
429 
430         ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
431     }
432 
433     fn inject_msix_and_clear_pba(&mut self, vector: usize) {
434         // Inject the MSI message
435         match self
436             .interrupt_source_group
437             .trigger(vector as InterruptIndex)
438         {
439             Ok(_) => debug!("MSI-X injected on vector control flip"),
440             Err(e) => error!("failed to inject MSI-X: {}", e),
441         }
442 
443         // Clear the bit from PBA
444         self.set_pba_bit(vector as u16, true);
445     }
446 }
447 
448 impl Pausable for MsixConfig {}
449 
450 impl Snapshottable for MsixConfig {
451     fn id(&self) -> String {
452         String::from(MSIX_CONFIG_ID)
453     }
454 
455     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
456         Snapshot::new_from_state(&self.state())
457     }
458 }
459 
460 #[allow(dead_code)]
461 #[repr(packed)]
462 #[derive(Clone, Copy, Default, Serialize, Deserialize)]
463 pub struct MsixCap {
464     // Message Control Register
465     //   10-0:  MSI-X Table size
466     //   13-11: Reserved
467     //   14:    Mask. Mask all MSI-X when set.
468     //   15:    Enable. Enable all MSI-X when set.
469     pub msg_ctl: u16,
470     // Table. Contains the offset and the BAR indicator (BIR)
471     //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
472     //   31-3: Table offset in the BAR pointed by the BIR.
473     pub table: u32,
474     // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
475     //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
476     //   31-3: PBA offset in the BAR pointed by the BIR.
477     pub pba: u32,
478 }
479 
480 // SAFETY: All members are simple numbers and any value is valid.
481 unsafe impl ByteValued for MsixCap {}
482 
483 impl PciCapability for MsixCap {
484     fn bytes(&self) -> &[u8] {
485         self.as_slice()
486     }
487 
488     fn id(&self) -> PciCapabilityId {
489         PciCapabilityId::MsiX
490     }
491 }
492 
493 impl MsixCap {
494     pub fn new(
495         table_pci_bar: u8,
496         table_size: u16,
497         table_off: u32,
498         pba_pci_bar: u8,
499         pba_off: u32,
500     ) -> Self {
501         assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
502 
503         // Set the table size and enable MSI-X.
504         let msg_ctl: u16 = 0x8000u16 + table_size - 1;
505 
506         MsixCap {
507             msg_ctl,
508             table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
509             pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
510         }
511     }
512 
513     pub fn set_msg_ctl(&mut self, data: u16) {
514         self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK))
515             | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK));
516     }
517 
518     pub fn masked(&self) -> bool {
519         (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1
520     }
521 
522     pub fn enabled(&self) -> bool {
523         (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1
524     }
525 
526     pub fn table_offset(&self) -> u32 {
527         self.table & 0xffff_fff8
528     }
529 
530     pub fn pba_offset(&self) -> u32 {
531         self.pba & 0xffff_fff8
532     }
533 
534     pub fn table_set_offset(&mut self, addr: u32) {
535         self.table &= 0x7;
536         self.table += addr;
537     }
538 
539     pub fn pba_set_offset(&mut self, addr: u32) {
540         self.pba &= 0x7;
541         self.pba += addr;
542     }
543 
544     pub fn table_bir(&self) -> u32 {
545         self.table & 0x7
546     }
547 
548     pub fn pba_bir(&self) -> u32 {
549         self.pba & 0x7
550     }
551 
552     pub fn table_size(&self) -> u16 {
553         (self.msg_ctl & 0x7ff) + 1
554     }
555 
556     pub fn table_range(&self) -> (u64, u64) {
557         // The table takes 16 bytes per entry.
558         let size = self.table_size() as u64 * 16;
559         (self.table_offset() as u64, size)
560     }
561 
562     pub fn pba_range(&self) -> (u64, u64) {
563         // The table takes 1 bit per entry modulo 8 bytes.
564         let size = ((self.table_size() as u64 / 64) + 1) * 8;
565         (self.pba_offset() as u64, size)
566     }
567 }
568