xref: /cloud-hypervisor/pci/src/msix.rs (revision 5e52729453cb62edbe4fb3a4aa24f8cca31e667e)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 
6 use crate::{PciCapability, PciCapabilityId};
7 use byteorder::{ByteOrder, LittleEndian};
8 use std::io;
9 use std::result;
10 use std::sync::Arc;
11 use versionize::{VersionMap, Versionize, VersionizeResult};
12 use versionize_derive::Versionize;
13 use vm_device::interrupt::{
14     InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig,
15 };
16 use vm_memory::ByteValued;
17 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, VersionMapped};
18 
19 const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
20 const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
21 const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
22 const BITS_PER_PBA_ENTRY: usize = 64;
23 const FUNCTION_MASK_BIT: u8 = 14;
24 const MSIX_ENABLE_BIT: u8 = 15;
25 const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16;
26 const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16;
27 pub const MSIX_TABLE_ENTRY_SIZE: usize = 16;
28 pub const MSIX_CONFIG_ID: &str = "msix_config";
29 
30 #[derive(Debug)]
31 pub enum Error {
32     /// Failed enabling the interrupt route.
33     EnableInterruptRoute(io::Error),
34     /// Failed updating the interrupt route.
35     UpdateInterruptRoute(io::Error),
36 }
37 
38 #[derive(Debug, Clone, Versionize, Eq, PartialEq)]
39 pub struct MsixTableEntry {
40     pub msg_addr_lo: u32,
41     pub msg_addr_hi: u32,
42     pub msg_data: u32,
43     pub vector_ctl: u32,
44 }
45 
46 impl MsixTableEntry {
47     pub fn masked(&self) -> bool {
48         self.vector_ctl & 0x1 == 0x1
49     }
50 }
51 
52 impl Default for MsixTableEntry {
53     fn default() -> Self {
54         MsixTableEntry {
55             msg_addr_lo: 0,
56             msg_addr_hi: 0,
57             msg_data: 0,
58             vector_ctl: 0x1,
59         }
60     }
61 }
62 
63 #[derive(Versionize)]
64 pub struct MsixConfigState {
65     table_entries: Vec<MsixTableEntry>,
66     pba_entries: Vec<u64>,
67     masked: bool,
68     enabled: bool,
69 }
70 
71 impl VersionMapped for MsixConfigState {}
72 
73 pub struct MsixConfig {
74     pub table_entries: Vec<MsixTableEntry>,
75     pub pba_entries: Vec<u64>,
76     pub devid: u32,
77     interrupt_source_group: Arc<dyn InterruptSourceGroup>,
78     masked: bool,
79     enabled: bool,
80 }
81 
82 impl MsixConfig {
83     pub fn new(
84         msix_vectors: u16,
85         interrupt_source_group: Arc<dyn InterruptSourceGroup>,
86         devid: u32,
87         state: Option<MsixConfigState>,
88     ) -> result::Result<Self, Error> {
89         assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
90 
91         let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state {
92             if state.enabled && !state.masked {
93                 for (idx, table_entry) in state.table_entries.iter().enumerate() {
94                     if table_entry.masked() {
95                         continue;
96                     }
97 
98                     let config = MsiIrqSourceConfig {
99                         high_addr: table_entry.msg_addr_hi,
100                         low_addr: table_entry.msg_addr_lo,
101                         data: table_entry.msg_data,
102                         devid,
103                     };
104 
105                     interrupt_source_group
106                         .update(
107                             idx as InterruptIndex,
108                             InterruptSourceConfig::MsiIrq(config),
109                             state.masked,
110                         )
111                         .map_err(Error::UpdateInterruptRoute)?;
112 
113                     interrupt_source_group
114                         .enable()
115                         .map_err(Error::EnableInterruptRoute)?;
116                 }
117             }
118 
119             (
120                 state.table_entries,
121                 state.pba_entries,
122                 state.masked,
123                 state.enabled,
124             )
125         } else {
126             let mut table_entries: Vec<MsixTableEntry> = Vec::new();
127             table_entries.resize_with(msix_vectors as usize, Default::default);
128             let mut pba_entries: Vec<u64> = Vec::new();
129             let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1;
130             pba_entries.resize_with(num_pba_entries, Default::default);
131 
132             (table_entries, pba_entries, true, false)
133         };
134 
135         Ok(MsixConfig {
136             table_entries,
137             pba_entries,
138             devid,
139             interrupt_source_group,
140             masked,
141             enabled,
142         })
143     }
144 
145     fn state(&self) -> MsixConfigState {
146         MsixConfigState {
147             table_entries: self.table_entries.clone(),
148             pba_entries: self.pba_entries.clone(),
149             masked: self.masked,
150             enabled: self.enabled,
151         }
152     }
153 
154     pub fn masked(&self) -> bool {
155         self.masked
156     }
157 
158     pub fn enabled(&self) -> bool {
159         self.enabled
160     }
161 
162     pub fn set_msg_ctl(&mut self, reg: u16) {
163         let old_masked = self.masked;
164         let old_enabled = self.enabled;
165 
166         self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16;
167         self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16;
168 
169         // Update interrupt routing
170         if old_masked != self.masked || old_enabled != self.enabled {
171             if self.enabled && !self.masked {
172                 debug!("MSI-X enabled for device 0x{:x}", self.devid);
173                 for (idx, table_entry) in self.table_entries.iter().enumerate() {
174                     let config = MsiIrqSourceConfig {
175                         high_addr: table_entry.msg_addr_hi,
176                         low_addr: table_entry.msg_addr_lo,
177                         data: table_entry.msg_data,
178                         devid: self.devid,
179                     };
180 
181                     if let Err(e) = self.interrupt_source_group.update(
182                         idx as InterruptIndex,
183                         InterruptSourceConfig::MsiIrq(config),
184                         table_entry.masked(),
185                     ) {
186                         error!("Failed updating vector: {:?}", e);
187                     }
188                 }
189             } else if old_enabled || !old_masked {
190                 debug!("MSI-X disabled for device 0x{:x}", self.devid);
191                 if let Err(e) = self.interrupt_source_group.disable() {
192                     error!("Failed disabling irq_fd: {:?}", e);
193                 }
194             }
195         }
196 
197         // If the Function Mask bit was set, and has just been cleared, it's
198         // important to go through the entire PBA to check if there was any
199         // pending MSI-X message to inject, given that the vector is not
200         // masked.
201         if old_masked && !self.masked {
202             for (index, entry) in self.table_entries.clone().iter().enumerate() {
203                 if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
204                     self.inject_msix_and_clear_pba(index);
205                 }
206             }
207         }
208     }
209 
210     pub fn read_table(&self, offset: u64, data: &mut [u8]) {
211         assert!((data.len() == 4 || data.len() == 8));
212 
213         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
214         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
215 
216         match data.len() {
217             4 => {
218                 let value = match modulo_offset {
219                     0x0 => self.table_entries[index].msg_addr_lo,
220                     0x4 => self.table_entries[index].msg_addr_hi,
221                     0x8 => self.table_entries[index].msg_data,
222                     0xc => self.table_entries[index].vector_ctl,
223                     _ => {
224                         error!("invalid offset");
225                         0
226                     }
227                 };
228 
229                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
230                 LittleEndian::write_u32(data, value);
231             }
232             8 => {
233                 let value = match modulo_offset {
234                     0x0 => {
235                         (u64::from(self.table_entries[index].msg_addr_hi) << 32)
236                             | u64::from(self.table_entries[index].msg_addr_lo)
237                     }
238                     0x8 => {
239                         (u64::from(self.table_entries[index].vector_ctl) << 32)
240                             | u64::from(self.table_entries[index].msg_data)
241                     }
242                     _ => {
243                         error!("invalid offset");
244                         0
245                     }
246                 };
247 
248                 debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value);
249                 LittleEndian::write_u64(data, value);
250             }
251             _ => {
252                 error!("invalid data length");
253             }
254         }
255     }
256 
257     pub fn write_table(&mut self, offset: u64, data: &[u8]) {
258         assert!((data.len() == 4 || data.len() == 8));
259 
260         let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
261         let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
262 
263         // Store the value of the entry before modification
264         let old_entry = self.table_entries[index].clone();
265 
266         match data.len() {
267             4 => {
268                 let value = LittleEndian::read_u32(data);
269                 match modulo_offset {
270                     0x0 => self.table_entries[index].msg_addr_lo = value,
271                     0x4 => self.table_entries[index].msg_addr_hi = value,
272                     0x8 => self.table_entries[index].msg_data = value,
273                     0xc => {
274                         self.table_entries[index].vector_ctl = value;
275                     }
276                     _ => error!("invalid offset"),
277                 };
278 
279                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
280             }
281             8 => {
282                 let value = LittleEndian::read_u64(data);
283                 match modulo_offset {
284                     0x0 => {
285                         self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
286                         self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
287                     }
288                     0x8 => {
289                         self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
290                         self.table_entries[index].vector_ctl = (value >> 32) as u32;
291                     }
292                     _ => error!("invalid offset"),
293                 };
294 
295                 debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value);
296             }
297             _ => error!("invalid data length"),
298         };
299 
300         let table_entry = &self.table_entries[index];
301 
302         // Optimisation to avoid excessive updates
303         if &old_entry == table_entry {
304             return;
305         }
306 
307         // Update interrupt routes
308         // Optimisation: only update routes if the entry is not masked;
309         // this is safe because if the entry is masked (starts masked as per spec)
310         // in the table then it won't be triggered. (See: #4273)
311         if self.enabled && !self.masked && !table_entry.masked() {
312             let config = MsiIrqSourceConfig {
313                 high_addr: table_entry.msg_addr_hi,
314                 low_addr: table_entry.msg_addr_lo,
315                 data: table_entry.msg_data,
316                 devid: self.devid,
317             };
318 
319             if let Err(e) = self.interrupt_source_group.update(
320                 index as InterruptIndex,
321                 InterruptSourceConfig::MsiIrq(config),
322                 table_entry.masked(),
323             ) {
324                 error!("Failed updating vector: {:?}", e);
325             }
326         }
327 
328         // After the MSI-X table entry has been updated, it is necessary to
329         // check if the vector control masking bit has changed. In case the
330         // bit has been flipped from 1 to 0, we need to inject a MSI message
331         // if the corresponding pending bit from the PBA is set. Once the MSI
332         // has been injected, the pending bit in the PBA needs to be cleared.
333         // All of this is valid only if MSI-X has not been masked for the whole
334         // device.
335 
336         // Check if bit has been flipped
337         if !self.masked()
338             && self.enabled()
339             && old_entry.masked()
340             && !table_entry.masked()
341             && self.get_pba_bit(index as u16) == 1
342         {
343             self.inject_msix_and_clear_pba(index);
344         }
345     }
346 
347     pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) {
348         assert!((data.len() == 4 || data.len() == 8));
349 
350         let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
351         let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
352 
353         match data.len() {
354             4 => {
355                 let value: u32 = match modulo_offset {
356                     0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
357                     0x4 => (self.pba_entries[index] >> 32) as u32,
358                     _ => {
359                         error!("invalid offset");
360                         0
361                     }
362                 };
363 
364                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
365                 LittleEndian::write_u32(data, value);
366             }
367             8 => {
368                 let value: u64 = match modulo_offset {
369                     0x0 => self.pba_entries[index],
370                     _ => {
371                         error!("invalid offset");
372                         0
373                     }
374                 };
375 
376                 debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value);
377                 LittleEndian::write_u64(data, value);
378             }
379             _ => {
380                 error!("invalid data length");
381             }
382         }
383     }
384 
385     pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) {
386         error!("Pending Bit Array is read only");
387     }
388 
389     pub fn set_pba_bit(&mut self, vector: u16, reset: bool) {
390         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
391 
392         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
393         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
394         let mut mask: u64 = (1 << shift) as u64;
395 
396         if reset {
397             mask = !mask;
398             self.pba_entries[index] &= mask;
399         } else {
400             self.pba_entries[index] |= mask;
401         }
402     }
403 
404     fn get_pba_bit(&self, vector: u16) -> u8 {
405         assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
406 
407         let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
408         let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
409 
410         ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
411     }
412 
413     fn inject_msix_and_clear_pba(&mut self, vector: usize) {
414         // Inject the MSI message
415         match self
416             .interrupt_source_group
417             .trigger(vector as InterruptIndex)
418         {
419             Ok(_) => debug!("MSI-X injected on vector control flip"),
420             Err(e) => error!("failed to inject MSI-X: {}", e),
421         }
422 
423         // Clear the bit from PBA
424         self.set_pba_bit(vector as u16, true);
425     }
426 }
427 
428 impl Pausable for MsixConfig {}
429 
430 impl Snapshottable for MsixConfig {
431     fn id(&self) -> String {
432         String::from(MSIX_CONFIG_ID)
433     }
434 
435     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
436         Snapshot::new_from_versioned_state(&self.state())
437     }
438 }
439 
440 #[allow(dead_code)]
441 #[repr(packed)]
442 #[derive(Clone, Copy, Default, Versionize)]
443 pub struct MsixCap {
444     // Message Control Register
445     //   10-0:  MSI-X Table size
446     //   13-11: Reserved
447     //   14:    Mask. Mask all MSI-X when set.
448     //   15:    Enable. Enable all MSI-X when set.
449     pub msg_ctl: u16,
450     // Table. Contains the offset and the BAR indicator (BIR)
451     //   2-0:  Table BAR indicator (BIR). Can be 0 to 5.
452     //   31-3: Table offset in the BAR pointed by the BIR.
453     pub table: u32,
454     // Pending Bit Array. Contains the offset and the BAR indicator (BIR)
455     //   2-0:  PBA BAR indicator (BIR). Can be 0 to 5.
456     //   31-3: PBA offset in the BAR pointed by the BIR.
457     pub pba: u32,
458 }
459 
460 // SAFETY: All members are simple numbers and any value is valid.
461 unsafe impl ByteValued for MsixCap {}
462 
463 impl PciCapability for MsixCap {
464     fn bytes(&self) -> &[u8] {
465         self.as_slice()
466     }
467 
468     fn id(&self) -> PciCapabilityId {
469         PciCapabilityId::MsiX
470     }
471 }
472 
473 impl MsixCap {
474     pub fn new(
475         table_pci_bar: u8,
476         table_size: u16,
477         table_off: u32,
478         pba_pci_bar: u8,
479         pba_off: u32,
480     ) -> Self {
481         assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
482 
483         // Set the table size and enable MSI-X.
484         let msg_ctl: u16 = 0x8000u16 + table_size - 1;
485 
486         MsixCap {
487             msg_ctl,
488             table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
489             pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
490         }
491     }
492 
493     pub fn set_msg_ctl(&mut self, data: u16) {
494         self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK))
495             | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK));
496     }
497 
498     pub fn masked(&self) -> bool {
499         (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1
500     }
501 
502     pub fn enabled(&self) -> bool {
503         (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1
504     }
505 
506     pub fn table_offset(&self) -> u32 {
507         self.table & 0xffff_fff8
508     }
509 
510     pub fn pba_offset(&self) -> u32 {
511         self.pba & 0xffff_fff8
512     }
513 
514     pub fn table_bir(&self) -> u32 {
515         self.table & 0x7
516     }
517 
518     pub fn pba_bir(&self) -> u32 {
519         self.pba & 0x7
520     }
521 
522     pub fn table_size(&self) -> u16 {
523         (self.msg_ctl & 0x7ff) + 1
524     }
525 
526     pub fn table_range(&self) -> (u64, u64) {
527         // The table takes 16 bytes per entry.
528         let size = self.table_size() as u64 * 16;
529         (self.table_offset() as u64, size)
530     }
531 
532     pub fn pba_range(&self) -> (u64, u64) {
533         // The table takes 1 bit per entry modulo 8 bytes.
534         let size = ((self.table_size() as u64 / 64) + 1) * 8;
535         (self.pba_offset() as u64, size)
536     }
537 }
538