xref: /cloud-hypervisor/pci/src/msix.rs (revision 6f8bd27cf7629733582d930519e98d19e90afb16)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 
6 use crate::{PciCapability, PciCapabilityId};
7 use anyhow::anyhow;
8 use byteorder::{ByteOrder, LittleEndian};
9 use std::io;
10 use std::result;
11 use std::sync::Arc;
12 use versionize::{VersionMap, Versionize, VersionizeResult};
13 use versionize_derive::Versionize;
14 use vm_device::interrupt::{
15     InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig,
16 };
17 use vm_memory::ByteValued;
18 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, VersionMapped};
19 
20 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
21 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
22 const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
23 const BITS_PER_PBA_ENTRY: usize = 64;
24 const FUNCTION_MASK_BIT: u8 = 14;
25 const MSIX_ENABLE_BIT: u8 = 15;
26 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16;
27 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16;
28 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16;
29 pub const MSIX_CONFIG_ID: &str = "msix_config";
30 
31 #[derive(Debug)]
32 pub enum Error {
33     /// Failed enabling the interrupt route.
34     EnableInterruptRoute(io::Error),
35     /// Failed updating the interrupt route.
36     UpdateInterruptRoute(io::Error),
37 }
38 
39 #[derive(Debug, Clone, Versionize, Eq, PartialEq)]
40 pub struct MsixTableEntry {
41     pub msg_addr_lo: u32,
42     pub msg_addr_hi: u32,
43     pub msg_data: u32,
44     pub vector_ctl: u32,
45 }
46 
47 impl MsixTableEntry {
48     pub fn masked(&self) -> bool {
49         self.vector_ctl & 0x1 == 0x1
50     }
51 }
52 
53 impl Default for MsixTableEntry {
54     fn default() -> Self {
55         MsixTableEntry {
56             msg_addr_lo: 0,
57             msg_addr_hi: 0,
58             msg_data: 0,
59             vector_ctl: 0x1,
60         }
61     }
62 }
63 
64 #[derive(Versionize)]
65 pub struct MsixConfigState {
66     table_entries: Vec<MsixTableEntry>,
67     pba_entries: Vec<u64>,
68     masked: bool,
69     enabled: bool,
70 }
71 
72 impl VersionMapped for MsixConfigState {}
73 
74 pub struct MsixConfig {
75     pub table_entries: Vec<MsixTableEntry>,
76     pub pba_entries: Vec<u64>,
77     pub devid: u32,
78     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
79     masked: bool,
80     enabled: bool,
81 }
82 
83 impl MsixConfig {
84     pub fn new(
85         msix_vectors: u16,
86         interrupt_source_group: Arc<dyn InterruptSourceGroup>,
87         devid: u32,
88         state: Option<MsixConfigState>,
89     ) -> result::Result<Self, Error> {
90         assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
91 
92         let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state {
93             if state.enabled && !state.masked {
94                 for (idx, table_entry) in state.table_entries.iter().enumerate() {
95                     if table_entry.masked() {
96                         continue;
97                     }
98 
99                     let config = MsiIrqSourceConfig {
100                         high_addr: table_entry.msg_addr_hi,
101                         low_addr: table_entry.msg_addr_lo,
102                         data: table_entry.msg_data,
103                         devid,
104                     };
105 
106                     interrupt_source_group
107                         .update(
108                             idx as InterruptIndex,
109                             InterruptSourceConfig::MsiIrq(config),
110                             state.masked,
111                         )
112                         .map_err(Error::UpdateInterruptRoute)?;
113 
114                     interrupt_source_group
115                         .enable()
116                         .map_err(Error::EnableInterruptRoute)?;
117                 }
118             }
119 
120             (
121                 state.table_entries,
122                 state.pba_entries,
123                 state.masked,
124                 state.enabled,
125             )
126         } else {
127             let mut table_entries: Vec<MsixTableEntry> = Vec::new();
128             table_entries.resize_with(msix_vectors as usize, Default::default);
129             let mut pba_entries: Vec<u64> = Vec::new();
130             let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1;
131             pba_entries.resize_with(num_pba_entries, Default::default);
132 
133             (table_entries, pba_entries, true, false)
134         };
135 
136         Ok(MsixConfig {
137             table_entries,
138             pba_entries,
139             devid,
140             interrupt_source_group,
141             masked,
142             enabled,
143         })
144     }
145 
146     fn state(&self) -> MsixConfigState {
147         MsixConfigState {
148             table_entries: self.table_entries.clone(),
149             pba_entries: self.pba_entries.clone(),
150             masked: self.masked,
151             enabled: self.enabled,
152         }
153     }
154 
155     fn set_state(&mut self, state: &MsixConfigState) -> result::Result<(), Error> {
156         self.table_entries = state.table_entries.clone();
157         self.pba_entries = state.pba_entries.clone();
158         self.masked = state.masked;
159         self.enabled = state.enabled;
160 
161         if self.enabled && !self.masked {
162             for (idx, table_entry) in self.table_entries.iter().enumerate() {
163                 if table_entry.masked() {
164                     continue;
165                 }
166 
167                 let config = MsiIrqSourceConfig {
168                     high_addr: table_entry.msg_addr_hi,
169                     low_addr: table_entry.msg_addr_lo,
170                     data: table_entry.msg_data,
171                     devid: self.devid,
172                 };
173 
174                 self.interrupt_source_group
175                     .update(
176                         idx as InterruptIndex,
177                         InterruptSourceConfig::MsiIrq(config),
178                         self.masked,
179                     )
180                     .map_err(Error::UpdateInterruptRoute)?;
181 
182                 self.interrupt_source_group
183                     .enable()
184                     .map_err(Error::EnableInterruptRoute)?;
185             }
186         }
187 
188         Ok(())
189     }
190 
191     pub fn masked(&self) -> bool {
192         self.masked
193     }
194 
195     pub fn enabled(&self) -> bool {
196         self.enabled
197     }
198 
199     pub fn set_msg_ctl(&mut self, reg: u16) {
200         let old_masked = self.masked;
201         let old_enabled = self.enabled;
202 
203         self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16;
204         self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16;
205 
206         // Update interrupt routing
207         if old_masked != self.masked || old_enabled != self.enabled {
208             if self.enabled && !self.masked {
209                 debug!("MSI-X enabled for device 0x{:x}", self.devid);
210                 for (idx, table_entry) in self.table_entries.iter().enumerate() {
211                     let config = MsiIrqSourceConfig {
212                         high_addr: table_entry.msg_addr_hi,
213                         low_addr: table_entry.msg_addr_lo,
214                         data: table_entry.msg_data,
215                         devid: self.devid,
216                     };
217 
218                     if let Err(e) = self.interrupt_source_group.update(
219                         idx as InterruptIndex,
220                         InterruptSourceConfig::MsiIrq(config),
221                         table_entry.masked(),
222                     ) {
223                         error!("Failed updating vector: {:?}", e);
224                     }
225                 }
226             } else if old_enabled || !old_masked {
227                 debug!("MSI-X disabled for device 0x{:x}", self.devid);
228                 if let Err(e) = self.interrupt_source_group.disable() {
229                     error!("Failed disabling irq_fd: {:?}", e);
230                 }
231             }
232         }
233 
234         // If the Function Mask bit was set, and has just been cleared, it's
235         // important to go through the entire PBA to check if there was any
236         // pending MSI-X message to inject, given that the vector is not
237         // masked.
238         if old_masked && !self.masked {
239             for (index, entry) in self.table_entries.clone().iter().enumerate() {
240                 if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
241                     self.inject_msix_and_clear_pba(index);
242                 }
243             }
244         }
245     }
246 
247     pub fn read_table(&self, offset: u64, data: &mut [u8]) {
248         assert!((data.len() == 4 || data.len() == 8));
249 
250         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
251         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
252 
253         match data.len() {
254             4 => {
255                 let value = match modulo_offset {
256                     0x0 => self.table_entries[index].msg_addr_lo,
257                     0x4 => self.table_entries[index].msg_addr_hi,
258                     0x8 => self.table_entries[index].msg_data,
259                     0xc => self.table_entries[index].vector_ctl,
260                     _ => {
261                         error!("invalid offset");
262                         0
263                     }
264                 };
265 
266                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
267                 LittleEndian::write_u32(data, value);
268             }
269             8 => {
270                 let value = match modulo_offset {
271                     0x0 => {
272                         (u64::from(self.table_entries[index].msg_addr_hi) << 32)
273                             | u64::from(self.table_entries[index].msg_addr_lo)
274                     }
275                     0x8 => {
276                         (u64::from(self.table_entries[index].vector_ctl) << 32)
277                             | u64::from(self.table_entries[index].msg_data)
278                     }
279                     _ => {
280                         error!("invalid offset");
281                         0
282                     }
283                 };
284 
285                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
286                 LittleEndian::write_u64(data, value);
287             }
288             _ => {
289                 error!("invalid data length");
290             }
291         }
292     }
293 
294     pub fn write_table(&mut self, offset: u64, data: &[u8]) {
295         assert!((data.len() == 4 || data.len() == 8));
296 
297         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
298         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
299 
300         // Store the value of the entry before modification
301         let old_entry = self.table_entries[index].clone();
302 
303         match data.len() {
304             4 => {
305                 let value = LittleEndian::read_u32(data);
306                 match modulo_offset {
307                     0x0 => self.table_entries[index].msg_addr_lo = value,
308                     0x4 => self.table_entries[index].msg_addr_hi = value,
309                     0x8 => self.table_entries[index].msg_data = value,
310                     0xc => {
311                         self.table_entries[index].vector_ctl = value;
312                     }
313                     _ => error!("invalid offset"),
314                 };
315 
316                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
317             }
318             8 => {
319                 let value = LittleEndian::read_u64(data);
320                 match modulo_offset {
321                     0x0 => {
322                         self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
323                         self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
324                     }
325                     0x8 => {
326                         self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
327                         self.table_entries[index].vector_ctl = (value >> 32) as u32;
328                     }
329                     _ => error!("invalid offset"),
330                 };
331 
332                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
333             }
334             _ => error!("invalid data length"),
335         };
336 
337         let table_entry = &self.table_entries[index];
338 
339         // Optimisation to avoid excessive updates
340         if &old_entry == table_entry {
341             return;
342         }
343 
344         // Update interrupt routes
345         // Optimisation: only update routes if the entry is not masked;
346         // this is safe because if the entry is masked (starts masked as per spec)
347         // in the table then it won't be triggered. (See: #4273)
348         if self.enabled && !self.masked && !table_entry.masked() {
349             let config = MsiIrqSourceConfig {
350                 high_addr: table_entry.msg_addr_hi,
351                 low_addr: table_entry.msg_addr_lo,
352                 data: table_entry.msg_data,
353                 devid: self.devid,
354             };
355 
356             if let Err(e) = self.interrupt_source_group.update(
357                 index as InterruptIndex,
358                 InterruptSourceConfig::MsiIrq(config),
359                 table_entry.masked(),
360             ) {
361                 error!("Failed updating vector: {:?}", e);
362             }
363         }
364 
365         // After the MSI-X table entry has been updated, it is necessary to
366         // check if the vector control masking bit has changed. In case the
367         // bit has been flipped from 1 to 0, we need to inject a MSI message
368         // if the corresponding pending bit from the PBA is set. Once the MSI
369         // has been injected, the pending bit in the PBA needs to be cleared.
370         // All of this is valid only if MSI-X has not been masked for the whole
371         // device.
372 
373         // Check if bit has been flipped
374         if !self.masked()
375             && self.enabled()
376             && old_entry.masked()
377             && !table_entry.masked()
378             && self.get_pba_bit(index as u16) == 1
379         {
380             self.inject_msix_and_clear_pba(index);
381         }
382     }
383 
384     pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) {
385         assert!((data.len() == 4 || data.len() == 8));
386 
387         let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
388         let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
389 
390         match data.len() {
391             4 => {
392                 let value: u32 = match modulo_offset {
393                     0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
394                     0x4 => (self.pba_entries[index] >> 32) as u32,
395                     _ => {
396                         error!("invalid offset");
397                         0
398                     }
399                 };
400 
401                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
402                 LittleEndian::write_u32(data, value);
403             }
404             8 => {
405                 let value: u64 = match modulo_offset {
406                     0x0 => self.pba_entries[index],
407                     _ => {
408                         error!("invalid offset");
409                         0
410                     }
411                 };
412 
413                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
414                 LittleEndian::write_u64(data, value);
415             }
416             _ => {
417                 error!("invalid data length");
418             }
419         }
420     }
421 
422     pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) {
423         error!("Pending Bit Array is read only");
424     }
425 
426     pub fn set_pba_bit(&mut self, vector: u16, reset: bool) {
427         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
428 
429         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
430         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
431         let mut mask: u64 = (1 << shift) as u64;
432 
433         if reset {
434             mask = !mask;
435             self.pba_entries[index] &= mask;
436         } else {
437             self.pba_entries[index] |= mask;
438         }
439     }
440 
441     fn get_pba_bit(&self, vector: u16) -> u8 {
442         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
443 
444         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
445         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
446 
447         ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
448     }
449 
450     fn inject_msix_and_clear_pba(&mut self, vector: usize) {
451         // Inject the MSI message
452         match self
453             .interrupt_source_group
454             .trigger(vector as InterruptIndex)
455         {
456             Ok(_) => debug!("MSI-X injected on vector control flip"),
457             Err(e) => error!("failed to inject MSI-X: {}", e),
458         }
459 
460         // Clear the bit from PBA
461         self.set_pba_bit(vector as u16, true);
462     }
463 }
464 
465 impl Pausable for MsixConfig {}
466 
467 impl Snapshottable for MsixConfig {
468     fn id(&self) -> String {
469         String::from(MSIX_CONFIG_ID)
470     }
471 
472     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
473         Snapshot::new_from_versioned_state(&self.id(), &self.state())
474     }
475 
476     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
477         self.set_state(&snapshot.to_versioned_state(&self.id())?)
478             .map_err(|e| {
479                 MigratableError::Restore(anyhow!(
480                     "Could not restore state for {}: {:?}",
481                     self.id(),
482                     e
483                 ))
484             })
485     }
486 }
487 
488 #[allow(dead_code)]
489 #[repr(packed)]
490 #[derive(Clone, Copy, Default, Versionize)]
491 pub struct MsixCap {
492     // Message Control Register
493     //   10-0:  MSI-X Table size
494     //   13-11: Reserved
495     //   14:    Mask. Mask all MSI-X when set.
496     //   15:    Enable. Enable all MSI-X when set.
497     pub msg_ctl: u16,
498     // Table. Contains the offset and the BAR indicator (BIR)
499     //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
500     //   31-3: Table offset in the BAR pointed by the BIR.
501     pub table: u32,
502     // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
503     //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
504     //   31-3: PBA offset in the BAR pointed by the BIR.
505     pub pba: u32,
506 }
507 
508 // SAFETY: All members are simple numbers and any value is valid.
509 unsafe impl ByteValued for MsixCap {}
510 
511 impl PciCapability for MsixCap {
512     fn bytes(&self) -> &[u8] {
513         self.as_slice()
514     }
515 
516     fn id(&self) -> PciCapabilityId {
517         PciCapabilityId::MsiX
518     }
519 }
520 
521 impl MsixCap {
522     pub fn new(
523         table_pci_bar: u8,
524         table_size: u16,
525         table_off: u32,
526         pba_pci_bar: u8,
527         pba_off: u32,
528     ) -> Self {
529         assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
530 
531         // Set the table size and enable MSI-X.
532         let msg_ctl: u16 = 0x8000u16 + table_size - 1;
533 
534         MsixCap {
535             msg_ctl,
536             table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
537             pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
538         }
539     }
540 
541     pub fn set_msg_ctl(&mut self, data: u16) {
542         self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK))
543             | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK));
544     }
545 
546     pub fn masked(&self) -> bool {
547         (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1
548     }
549 
550     pub fn enabled(&self) -> bool {
551         (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1
552     }
553 
554     pub fn table_offset(&self) -> u32 {
555         self.table & 0xffff_fff8
556     }
557 
558     pub fn pba_offset(&self) -> u32 {
559         self.pba & 0xffff_fff8
560     }
561 
562     pub fn table_bir(&self) -> u32 {
563         self.table & 0x7
564     }
565 
566     pub fn pba_bir(&self) -> u32 {
567         self.pba & 0x7
568     }
569 
570     pub fn table_size(&self) -> u16 {
571         (self.msg_ctl & 0x7ff) + 1
572     }
573 
574     pub fn table_range(&self) -> (u64, u64) {
575         // The table takes 16 bytes per entry.
576         let size = self.table_size() as u64 * 16;
577         (self.table_offset() as u64, size)
578     }
579 
580     pub fn pba_range(&self) -> (u64, u64) {
581         // The table takes 1 bit per entry modulo 8 bytes.
582         let size = ((self.table_size() as u64 / 64) + 1) * 8;
583         (self.pba_offset() as u64, size)
584     }
585 }
586