xref: /cloud-hypervisor/hypervisor/src/arch/x86/emulator/mod.rs (revision 3f3489e38e32a652241e889a9a1f6c67823d584b)
1 //
2 // Copyright © 2020 Intel Corporation
3 //
4 // SPDX-License-Identifier: Apache-2.0
5 //
6 
7 use anyhow::Context;
8 use iced_x86::*;
9 
10 use crate::arch::emulator::{EmulationError, EmulationResult, PlatformEmulator, PlatformError};
11 use crate::arch::x86::emulator::instructions::*;
12 use crate::arch::x86::regs::{CR0_PE, EFER_LMA};
13 use crate::arch::x86::{
14     segment_type_expand_down, segment_type_ro, Exception, SegmentRegister, SpecialRegisters,
15 };
16 use crate::StandardRegisters;
17 
18 #[macro_use]
19 mod instructions;
20 
21 /// x86 CPU modes
22 #[derive(Debug, PartialEq, Eq)]
23 pub enum CpuMode {
24     /// Real mode
25     Real,
26 
27     /// Virtual 8086 mode
28     Virtual8086,
29 
30     /// 16-bit protected mode
31     Protected16,
32 
33     /// 32-bit protected mode
34     Protected,
35 
36     /// 64-bit mode, a.k.a. long mode
37     Long,
38 }
39 
40 /// CpuStateManager manages an x86 CPU state.
41 ///
42 /// Instruction emulation handlers get a mutable reference to
43 /// a `CpuStateManager` implementation, representing the current state of the
44 /// CPU they have to emulate an instruction stream against. Usually those
45 /// handlers will modify the CPU state by modifying `CpuState` and it is up to
46 /// the handler caller to commit those changes back by invoking a
47 /// `PlatformEmulator` implementation `set_state()` method.
48 ///
49 pub trait CpuStateManager: Clone {
50     /// Reads a CPU register.
51     ///
52     /// # Arguments
53     ///
54     /// * `reg` - A general purpose, control or debug register.
55     fn read_reg(&self, reg: Register) -> Result<u64, PlatformError>;
56 
57     /// Write to a CPU register.
58     ///
59     /// # Arguments
60     ///
61     /// * `reg` - A general purpose, control or debug register.
62     /// * `val` - The value to load.
63     fn write_reg(&mut self, reg: Register, val: u64) -> Result<(), PlatformError>;
64 
65     /// Reads a segment register.
66     ///
67     /// # Arguments
68     ///
69     /// * `reg` - A segment register.
70     fn read_segment(&self, reg: Register) -> Result<SegmentRegister, PlatformError>;
71 
72     /// Write to a segment register.
73     ///
74     /// # Arguments
75     ///
76     /// * `reg` - A segment register.
77     /// * `segment_reg` - The segment register value to load.
78     fn write_segment(
79         &mut self,
80         reg: Register,
81         segment_reg: SegmentRegister,
82     ) -> Result<(), PlatformError>;
83 
84     /// Get the CPU instruction pointer.
85     fn ip(&self) -> u64;
86 
87     /// Set the CPU instruction pointer.
88     ///
89     /// # Arguments
90     ///
91     /// * `ip` - The CPU instruction pointer.
92     fn set_ip(&mut self, ip: u64);
93 
94     /// Get the CPU Extended Feature Enable Register.
95     fn efer(&self) -> u64;
96 
97     /// Set the CPU Extended Feature Enable Register.
98     ///
99     /// # Arguments
100     ///
101     /// * `efer` - The CPU EFER value.
102     fn set_efer(&mut self, efer: u64);
103 
104     /// Get the CPU flags.
105     fn flags(&self) -> u64;
106 
107     /// Set the CPU flags.
108     ///
109     /// # Arguments
110     ///
111     /// * `flags` - The CPU flags
112     fn set_flags(&mut self, flags: u64);
113 
114     /// Get the CPU mode.
115     fn mode(&self) -> Result<CpuMode, PlatformError>;
116 
117     /// Translate a logical (segmented) address into a linear (virtual) one.
118     ///
119     /// # Arguments
120     ///
121     /// * `segment` - Which segment to use for linearization
122     /// * `logical_addr` - The logical address to be translated
123     fn linearize(
124         &self,
125         segment: Register,
126         logical_addr: u64,
127         write: bool,
128     ) -> Result<u64, PlatformError> {
129         let segment_register = self.read_segment(segment)?;
130         let mode = self.mode()?;
131 
132         match mode {
133             CpuMode::Long => {
134                 // TODO Check that we got a canonical address.
135                 Ok(logical_addr
136                     .checked_add(segment_register.base)
137                     .ok_or_else(|| {
138                         PlatformError::InvalidAddress(anyhow!(
139                             "Logical address {:#x} cannot be linearized with segment {:#x?}",
140                             logical_addr,
141                             segment_register
142                         ))
143                     })?)
144             }
145 
146             CpuMode::Protected | CpuMode::Real => {
147                 let segment_type = segment_register.segment_type();
148 
149                 // Must not write to a read-only segment.
150                 if segment_type_ro(segment_type) && write {
151                     return Err(PlatformError::InvalidAddress(anyhow!(
152                         "Cannot write to a read-only segment"
153                     )));
154                 }
155 
156                 let logical_addr = logical_addr & 0xffff_ffffu64;
157                 let mut segment_limit: u32 = if segment_register.granularity() != 0 {
158                     (segment_register.limit << 12) | 0xfff
159                 } else {
160                     segment_register.limit
161                 };
162 
163                 // Expand-down segment
164                 if segment_type_expand_down(segment_type) {
165                     if logical_addr >= segment_limit.into() {
166                         return Err(PlatformError::InvalidAddress(anyhow!(
167                             "{:#x} is off limits {:#x} (expand down)",
168                             logical_addr,
169                             segment_limit
170                         )));
171                     }
172 
173                     if segment_register.db() != 0 {
174                         segment_limit = 0xffffffff
175                     } else {
176                         segment_limit = 0xffff
177                     }
178                 }
179 
180                 if logical_addr > segment_limit.into() {
181                     return Err(PlatformError::InvalidAddress(anyhow!(
182                         "{:#x} is off limits {:#x}",
183                         logical_addr,
184                         segment_limit
185                     )));
186                 }
187 
188                 Ok(logical_addr.wrapping_add(segment_register.base))
189             }
190 
191             _ => Err(PlatformError::UnsupportedCpuMode(anyhow!("{:?}", mode))),
192         }
193     }
194 }
195 
196 const REGISTER_MASK_64: u64 = 0xffff_ffff_ffff_ffffu64;
197 const REGISTER_MASK_32: u64 = 0xffff_ffffu64;
198 const REGISTER_MASK_16: u64 = 0xffffu64;
199 const REGISTER_MASK_8: u64 = 0xffu64;
200 
201 macro_rules! set_reg {
202     ($reg:expr, $mask:expr, $value:expr) => {
203         $reg = ($reg & $mask) | $value
204     };
205 }
206 
207 #[derive(Clone, Debug)]
208 /// A minimal, emulated CPU state.
209 ///
210 /// Hypervisors needing x86 emulation can choose to either use their own
211 /// CPU state structures and implement the CpuStateManager interface for it,
212 /// or use `EmulatorCpuState`. The latter implies creating a new state
213 /// `EmulatorCpuState` instance for each platform `cpu_state()` call, which
214 /// might be less efficient.
215 pub struct EmulatorCpuState {
216     pub regs: StandardRegisters,
217     pub sregs: SpecialRegisters,
218 }
219 
220 impl CpuStateManager for EmulatorCpuState {
221     fn read_reg(&self, reg: Register) -> Result<u64, PlatformError> {
222         let mut reg_value: u64 = match reg {
223             Register::RAX | Register::EAX | Register::AX | Register::AL | Register::AH => {
224                 self.regs.get_rax()
225             }
226             Register::RBX | Register::EBX | Register::BX | Register::BL | Register::BH => {
227                 self.regs.get_rbx()
228             }
229             Register::RCX | Register::ECX | Register::CX | Register::CL | Register::CH => {
230                 self.regs.get_rcx()
231             }
232             Register::RDX | Register::EDX | Register::DX | Register::DL | Register::DH => {
233                 self.regs.get_rdx()
234             }
235             Register::RSP | Register::ESP | Register::SP => self.regs.get_rsp(),
236             Register::RBP | Register::EBP | Register::BP => self.regs.get_rbp(),
237             Register::RSI | Register::ESI | Register::SI | Register::SIL => self.regs.get_rsi(),
238             Register::RDI | Register::EDI | Register::DI | Register::DIL => self.regs.get_rdi(),
239             Register::R8 | Register::R8D | Register::R8W | Register::R8L => self.regs.get_r8(),
240             Register::R9 | Register::R9D | Register::R9W | Register::R9L => self.regs.get_r9(),
241             Register::R10 | Register::R10D | Register::R10W | Register::R10L => self.regs.get_r10(),
242             Register::R11 | Register::R11D | Register::R11W | Register::R11L => self.regs.get_r11(),
243             Register::R12 | Register::R12D | Register::R12W | Register::R12L => self.regs.get_r12(),
244             Register::R13 | Register::R13D | Register::R13W | Register::R13L => self.regs.get_r13(),
245             Register::R14 | Register::R14D | Register::R14W | Register::R14L => self.regs.get_r14(),
246             Register::R15 | Register::R15D | Register::R15W | Register::R15L => self.regs.get_r15(),
247             Register::CR0 => self.sregs.cr0,
248             Register::CR2 => self.sregs.cr2,
249             Register::CR3 => self.sregs.cr3,
250             Register::CR4 => self.sregs.cr4,
251             Register::CR8 => self.sregs.cr8,
252 
253             r => {
254                 return Err(PlatformError::InvalidRegister(anyhow!(
255                     "read_reg invalid GPR {:?}",
256                     r
257                 )))
258             }
259         };
260 
261         reg_value = if reg.is_gpr64() || reg.is_cr() {
262             reg_value
263         } else if reg.is_gpr32() {
264             reg_value & REGISTER_MASK_32
265         } else if reg.is_gpr16() {
266             reg_value & REGISTER_MASK_16
267         } else if reg.is_gpr8() {
268             if reg == Register::AH
269                 || reg == Register::BH
270                 || reg == Register::CH
271                 || reg == Register::DH
272             {
273                 (reg_value >> 8) & REGISTER_MASK_8
274             } else {
275                 reg_value & REGISTER_MASK_8
276             }
277         } else {
278             return Err(PlatformError::InvalidRegister(anyhow!(
279                 "read_reg invalid GPR {:?}",
280                 reg
281             )));
282         };
283 
284         debug!("Register read: {:#x} from {:?}", reg_value, reg);
285 
286         Ok(reg_value)
287     }
288 
289     fn write_reg(&mut self, reg: Register, val: u64) -> Result<(), PlatformError> {
290         debug!("Register write: {:#x} to {:?}", val, reg);
291 
292         // SDM Vol 1 - 3.4.1.1
293         //
294         // 8-bit and 16-bit operands generate an 8-bit or 16-bit result.
295         // The upper 56 bits or 48 bits (respectively) of the destination
296         // general-purpose register are not modified by the operation.
297         let (reg_value, mask): (u64, u64) = if reg.is_gpr64() || reg.is_cr() {
298             (val, !REGISTER_MASK_64)
299         } else if reg.is_gpr32() {
300             (val & REGISTER_MASK_32, !REGISTER_MASK_64)
301         } else if reg.is_gpr16() {
302             (val & REGISTER_MASK_16, !REGISTER_MASK_16)
303         } else if reg.is_gpr8() {
304             if reg == Register::AH
305                 || reg == Register::BH
306                 || reg == Register::CH
307                 || reg == Register::DH
308             {
309                 ((val & REGISTER_MASK_8) << 8, !(REGISTER_MASK_8 << 8))
310             } else {
311                 (val & REGISTER_MASK_8, !REGISTER_MASK_8)
312             }
313         } else {
314             return Err(PlatformError::InvalidRegister(anyhow!(
315                 "write_reg invalid register {:?}",
316                 reg
317             )));
318         };
319 
320         match reg {
321             Register::RAX | Register::EAX | Register::AX | Register::AL | Register::AH => {
322                 self.regs.set_rax((self.regs.get_rax() & mask) | reg_value);
323             }
324             Register::RBX | Register::EBX | Register::BX | Register::BL | Register::BH => {
325                 self.regs.set_rbx((self.regs.get_rbx() & mask) | reg_value);
326             }
327             Register::RCX | Register::ECX | Register::CX | Register::CL | Register::CH => {
328                 self.regs.set_rcx((self.regs.get_rcx() & mask) | reg_value);
329             }
330             Register::RDX | Register::EDX | Register::DX | Register::DL | Register::DH => {
331                 self.regs.set_rdx((self.regs.get_rdx() & mask) | reg_value);
332             }
333             Register::RSP | Register::ESP | Register::SP => {
334                 self.regs.set_rsp((self.regs.get_rsp() & mask) | reg_value);
335             }
336             Register::RBP | Register::EBP | Register::BP => {
337                 self.regs.set_rbp((self.regs.get_rbp() & mask) | reg_value);
338             }
339             Register::RSI | Register::ESI | Register::SI | Register::SIL => {
340                 self.regs.set_rsi((self.regs.get_rsi() & mask) | reg_value);
341             }
342             Register::RDI | Register::EDI | Register::DI | Register::DIL => {
343                 self.regs.set_rdi((self.regs.get_rdi() & mask) | reg_value);
344             }
345             Register::R8 | Register::R8D | Register::R8W | Register::R8L => {
346                 self.regs.set_r8((self.regs.get_r8() & mask) | reg_value);
347             }
348             Register::R9 | Register::R9D | Register::R9W | Register::R9L => {
349                 self.regs.set_r9((self.regs.get_r9() & mask) | reg_value);
350             }
351             Register::R10 | Register::R10D | Register::R10W | Register::R10L => {
352                 self.regs.set_r10((self.regs.get_r10() & mask) | reg_value);
353             }
354             Register::R11 | Register::R11D | Register::R11W | Register::R11L => {
355                 self.regs.set_r11((self.regs.get_r11() & mask) | reg_value);
356             }
357             Register::R12 | Register::R12D | Register::R12W | Register::R12L => {
358                 self.regs.set_r12((self.regs.get_r12() & mask) | reg_value);
359             }
360             Register::R13 | Register::R13D | Register::R13W | Register::R13L => {
361                 self.regs.set_r13((self.regs.get_r13() & mask) | reg_value);
362             }
363             Register::R14 | Register::R14D | Register::R14W | Register::R14L => {
364                 self.regs.set_r14((self.regs.get_r14() & mask) | reg_value);
365             }
366             Register::R15 | Register::R15D | Register::R15W | Register::R15L => {
367                 self.regs.set_r15((self.regs.get_r15() & mask) | reg_value);
368             }
369             Register::CR0 => set_reg!(self.sregs.cr0, mask, reg_value),
370             Register::CR2 => set_reg!(self.sregs.cr2, mask, reg_value),
371             Register::CR3 => set_reg!(self.sregs.cr3, mask, reg_value),
372             Register::CR4 => set_reg!(self.sregs.cr4, mask, reg_value),
373             Register::CR8 => set_reg!(self.sregs.cr8, mask, reg_value),
374             _ => {
375                 return Err(PlatformError::InvalidRegister(anyhow!(
376                     "write_reg invalid register {:?}",
377                     reg
378                 )))
379             }
380         }
381 
382         Ok(())
383     }
384 
385     fn read_segment(&self, reg: Register) -> Result<SegmentRegister, PlatformError> {
386         if !reg.is_segment_register() {
387             return Err(PlatformError::InvalidRegister(anyhow!(
388                 "read_segment {:?} is not a segment register",
389                 reg
390             )));
391         }
392 
393         match reg {
394             Register::CS => Ok(self.sregs.cs),
395             Register::DS => Ok(self.sregs.ds),
396             Register::ES => Ok(self.sregs.es),
397             Register::FS => Ok(self.sregs.fs),
398             Register::GS => Ok(self.sregs.gs),
399             Register::SS => Ok(self.sregs.ss),
400             r => Err(PlatformError::InvalidRegister(anyhow!(
401                 "read_segment invalid register {:?}",
402                 r
403             ))),
404         }
405     }
406 
407     fn write_segment(
408         &mut self,
409         reg: Register,
410         segment_register: SegmentRegister,
411     ) -> Result<(), PlatformError> {
412         if !reg.is_segment_register() {
413             return Err(PlatformError::InvalidRegister(anyhow!("{:?}", reg)));
414         }
415 
416         match reg {
417             Register::CS => self.sregs.cs = segment_register,
418             Register::DS => self.sregs.ds = segment_register,
419             Register::ES => self.sregs.es = segment_register,
420             Register::FS => self.sregs.fs = segment_register,
421             Register::GS => self.sregs.gs = segment_register,
422             Register::SS => self.sregs.ss = segment_register,
423             r => return Err(PlatformError::InvalidRegister(anyhow!("{:?}", r))),
424         }
425 
426         Ok(())
427     }
428 
429     fn ip(&self) -> u64 {
430         self.regs.get_rip()
431     }
432 
433     fn set_ip(&mut self, ip: u64) {
434         self.regs.set_rip(ip);
435     }
436 
437     fn efer(&self) -> u64 {
438         self.sregs.efer
439     }
440 
441     fn set_efer(&mut self, efer: u64) {
442         self.sregs.efer = efer
443     }
444 
445     fn flags(&self) -> u64 {
446         self.regs.get_rflags()
447     }
448 
449     fn set_flags(&mut self, flags: u64) {
450         self.regs.set_rflags(flags);
451     }
452 
453     fn mode(&self) -> Result<CpuMode, PlatformError> {
454         let efer = self.efer();
455         let cr0 = self.read_reg(Register::CR0)?;
456         let mut mode = CpuMode::Real;
457 
458         if (cr0 & CR0_PE) == CR0_PE {
459             mode = CpuMode::Protected;
460         }
461 
462         if (efer & EFER_LMA) == EFER_LMA {
463             if mode != CpuMode::Protected {
464                 return Err(PlatformError::InvalidState(anyhow!(
465                     "Protection must be enabled in long mode"
466                 )));
467             }
468 
469             mode = CpuMode::Long;
470         }
471 
472         Ok(mode)
473     }
474 }
475 
476 pub struct Emulator<'a, T: CpuStateManager> {
477     platform: &'a mut dyn PlatformEmulator<CpuState = T>,
478 }
479 
480 // Reduce repetition, see its invocation in get_handler().
481 macro_rules! gen_handler_match {
482     ($value: ident, $( ($module:ident, $code:ident) ),* ) => {
483         match $value {
484             $(
485                 Code::$code => Some(Box::new($module::$code)),
486             )*
487             _ => None,
488         }
489     };
490 }
491 
492 impl<T: CpuStateManager> Emulator<'_, T> {
493     pub fn new(platform: &mut dyn PlatformEmulator<CpuState = T>) -> Emulator<'_, T> {
494         Emulator { platform }
495     }
496 
497     fn get_handler(code: Code) -> Option<Box<dyn InstructionHandler<T>>> {
498         let handler: Option<Box<dyn InstructionHandler<T>>> = gen_handler_match!(
499             code,
500             // CMP
501             (cmp, Cmp_rm32_r32),
502             (cmp, Cmp_rm8_r8),
503             (cmp, Cmp_rm32_imm8),
504             (cmp, Cmp_rm64_r64),
505             // MOV
506             (mov, Mov_r8_rm8),
507             (mov, Mov_r8_imm8),
508             (mov, Mov_r16_imm16),
509             (mov, Mov_r16_rm16),
510             (mov, Mov_r32_imm32),
511             (mov, Mov_r32_rm32),
512             (mov, Mov_r64_imm64),
513             (mov, Mov_r64_rm64),
514             (mov, Mov_rm8_imm8),
515             (mov, Mov_rm8_r8),
516             (mov, Mov_rm16_imm16),
517             (mov, Mov_rm16_r16),
518             (mov, Mov_rm32_imm32),
519             (mov, Mov_rm32_r32),
520             (mov, Mov_rm64_imm32),
521             (mov, Mov_rm64_r64),
522             // MOVZX
523             (mov, Movzx_r16_rm8),
524             (mov, Movzx_r32_rm8),
525             (mov, Movzx_r64_rm8),
526             (mov, Movzx_r32_rm16),
527             (mov, Movzx_r64_rm16),
528             // MOV MOFFS
529             (mov, Mov_moffs16_AX),
530             (mov, Mov_AX_moffs16),
531             (mov, Mov_moffs32_EAX),
532             (mov, Mov_EAX_moffs32),
533             (mov, Mov_moffs64_RAX),
534             (mov, Mov_RAX_moffs64),
535             // MOVS
536             (movs, Movsq_m64_m64),
537             (movs, Movsd_m32_m32),
538             (movs, Movsw_m16_m16),
539             (movs, Movsb_m8_m8),
540             // OR
541             (or, Or_rm8_r8),
542             // STOS
543             (stos, Stosb_m8_AL),
544             (stos, Stosw_m16_AX),
545             (stos, Stosd_m32_EAX),
546             (stos, Stosq_m64_RAX)
547         );
548 
549         handler
550     }
551 
552     fn emulate_insn_stream(
553         &mut self,
554         cpu_id: usize,
555         insn_stream: &[u8],
556         num_insn: Option<usize>,
557     ) -> EmulationResult<T, Exception> {
558         let mut state = self
559             .platform
560             .cpu_state(cpu_id)
561             .map_err(EmulationError::PlatformEmulationError)?;
562         let mut decoder = Decoder::new(64, insn_stream, DecoderOptions::NONE);
563         let mut insn = Instruction::default();
564         let mut num_insn_emulated: usize = 0;
565         let mut fetched_insn_stream: [u8; 16] = [0; 16];
566         let mut last_decoded_ip: u64 = state.ip();
567         let mut stop_emulation: bool = false;
568 
569         decoder.set_ip(state.ip());
570 
571         while !stop_emulation {
572             decoder.decode_out(&mut insn);
573 
574             if decoder.last_error() == DecoderError::NoMoreBytes {
575                 // The decoder is missing some bytes to decode the current
576                 // instruction, for example because the instruction stream
577                 // crosses a page boundary.
578                 // We fetch 16 more bytes from the instruction segment,
579                 // decode and emulate the failing instruction and terminate
580                 // the emulation loop.
581                 debug!(
582                     "Fetching {} bytes from {:#x}",
583                     fetched_insn_stream.len(),
584                     last_decoded_ip
585                 );
586 
587                 // fetched_insn_stream is 16 bytes long, enough to contain
588                 // any complete x86 instruction.
589                 self.platform
590                     .fetch(last_decoded_ip, &mut fetched_insn_stream)
591                     .map_err(EmulationError::PlatformEmulationError)?;
592 
593                 debug!("Fetched {:x?}", fetched_insn_stream);
594 
595                 // Once we have the new stream, we must create a new decoder
596                 // and emulate one last instruction from the last decoded IP.
597                 decoder = Decoder::new(64, &fetched_insn_stream, DecoderOptions::NONE);
598                 decoder.set_ip(last_decoded_ip);
599                 decoder.decode_out(&mut insn);
600                 if decoder.last_error() != DecoderError::None {
601                     return Err(EmulationError::InstructionFetchingError(anyhow!(
602                         "{:?}",
603                         insn.code()
604                     )));
605                 }
606             }
607 
608             // Emulate the decoded instruction
609             Emulator::get_handler(insn.code())
610                 .ok_or_else(|| {
611                     EmulationError::UnsupportedInstruction(anyhow!(
612                         "{:?} {:x?}",
613                         insn.code(),
614                         insn_stream
615                     ))
616                 })?
617                 .emulate(&insn, &mut state, self.platform)
618                 .context(anyhow!(
619                     "Failed to emulate {:?} {:x?}",
620                     insn.code(),
621                     insn_stream
622                 ))?;
623 
624             last_decoded_ip = decoder.ip();
625             num_insn_emulated += 1;
626 
627             if let Some(num_insn) = num_insn {
628                 if num_insn_emulated >= num_insn {
629                     // Exit the decoding loop, do not decode the next instruction.
630                     stop_emulation = true;
631                 }
632             }
633         }
634 
635         state.set_ip(decoder.ip());
636         Ok(state)
637     }
638 
639     /// Emulate all instructions from the instructions stream.
640     pub fn emulate(&mut self, cpu_id: usize, insn_stream: &[u8]) -> EmulationResult<T, Exception> {
641         self.emulate_insn_stream(cpu_id, insn_stream, None)
642     }
643 
644     /// Only emulate the first instruction from the stream.
645     ///
646     /// This is useful for cases where we get readahead instruction stream
647     /// but implicitly must only emulate the first instruction, and then return
648     /// to the guest.
649     pub fn emulate_first_insn(
650         &mut self,
651         cpu_id: usize,
652         insn_stream: &[u8],
653     ) -> EmulationResult<T, Exception> {
654         self.emulate_insn_stream(cpu_id, insn_stream, Some(1))
655     }
656 }
657 
658 #[cfg(test)]
659 mod mock_vmm {
660     use std::sync::{Arc, Mutex};
661 
662     use super::*;
663     use crate::arch::x86::emulator::EmulatorCpuState as CpuState;
664     use crate::arch::x86::gdt::{gdt_entry, segment_from_gdt};
665     use crate::StandardRegisters;
666 
667     #[derive(Debug, Clone)]
668     pub struct MockVmm {
669         memory: Vec<u8>,
670         state: Arc<Mutex<CpuState>>,
671     }
672 
673     pub type MockResult = Result<(), EmulationError<Exception>>;
674 
675     impl MockVmm {
676         pub fn new(ip: u64, regs: Vec<(Register, u64)>, memory: Option<(u64, &[u8])>) -> MockVmm {
677             let _ = env_logger::try_init();
678             let cs_reg = segment_from_gdt(gdt_entry(0xc09b, 0, 0xffffffff), 1);
679             let ds_reg = segment_from_gdt(gdt_entry(0xc093, 0, 0xffffffff), 2);
680             let es_reg = segment_from_gdt(gdt_entry(0xc093, 0, 0xffffffff), 3);
681             cfg_if::cfg_if! {
682                 if #[cfg(feature = "kvm")] {
683                     let std_regs: StandardRegisters = kvm_bindings::kvm_regs::default().into();
684                 } else if #[cfg(feature = "mshv")] {
685                     let std_regs: StandardRegisters = mshv_bindings::StandardRegisters::default().into();
686                 } else {
687                     panic!("Unsupported hypervisor type!")
688                 }
689             };
690             let mut initial_state = CpuState {
691                 regs: std_regs,
692                 sregs: SpecialRegisters::default(),
693             };
694             initial_state.set_ip(ip);
695             initial_state.write_segment(Register::CS, cs_reg).unwrap();
696             initial_state.write_segment(Register::DS, ds_reg).unwrap();
697             initial_state.write_segment(Register::ES, es_reg).unwrap();
698             for (reg, value) in regs {
699                 initial_state.write_reg(reg, value).unwrap();
700             }
701 
702             let mut vmm = MockVmm {
703                 memory: vec![0; 8192],
704                 state: Arc::new(Mutex::new(initial_state)),
705             };
706 
707             if let Some(mem) = memory {
708                 vmm.write_memory(mem.0, mem.1).unwrap();
709             }
710 
711             vmm
712         }
713 
714         pub fn emulate_insn(
715             &mut self,
716             cpu_id: usize,
717             insn: &[u8],
718             num_insn: Option<usize>,
719         ) -> MockResult {
720             let ip = self.cpu_state(cpu_id).unwrap().ip();
721             let mut emulator = Emulator::new(self);
722 
723             let new_state = emulator.emulate_insn_stream(cpu_id, insn, num_insn)?;
724             if num_insn.is_none() {
725                 assert_eq!(ip + insn.len() as u64, new_state.ip());
726             }
727 
728             self.set_cpu_state(cpu_id, new_state).unwrap();
729 
730             Ok(())
731         }
732 
733         pub fn emulate_first_insn(&mut self, cpu_id: usize, insn: &[u8]) -> MockResult {
734             self.emulate_insn(cpu_id, insn, Some(1))
735         }
736     }
737 
738     impl PlatformEmulator for MockVmm {
739         type CpuState = CpuState;
740 
741         fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> {
742             debug!(
743                 "Memory read {} bytes from [{:#x} -> {:#x}]",
744                 data.len(),
745                 gva,
746                 gva + data.len() as u64 - 1
747             );
748             data.copy_from_slice(&self.memory[gva as usize..gva as usize + data.len()]);
749             Ok(())
750         }
751 
752         fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> {
753             debug!(
754                 "Memory write {} bytes at [{:#x} -> {:#x}]",
755                 data.len(),
756                 gva,
757                 gva + data.len() as u64 - 1
758             );
759             self.memory[gva as usize..gva as usize + data.len()].copy_from_slice(data);
760 
761             Ok(())
762         }
763 
764         fn cpu_state(&self, _cpu_id: usize) -> Result<CpuState, PlatformError> {
765             Ok(self.state.lock().unwrap().clone())
766         }
767 
768         fn set_cpu_state(
769             &self,
770             _cpu_id: usize,
771             state: Self::CpuState,
772         ) -> Result<(), PlatformError> {
773             *self.state.lock().unwrap() = state;
774             Ok(())
775         }
776 
777         fn fetch(&self, ip: u64, instruction_bytes: &mut [u8]) -> Result<(), PlatformError> {
778             let rip = self
779                 .state
780                 .lock()
781                 .unwrap()
782                 .linearize(Register::CS, ip, false)?;
783             self.read_memory(rip, instruction_bytes)
784         }
785     }
786 }
787 
788 #[cfg(test)]
789 mod tests {
790     use super::*;
791     use crate::arch::x86::emulator::mock_vmm::*;
792 
793     #[test]
794     // Emulate executing an empty stream. Instructions should be fetched from
795     // memory.
796     //
797     // mov rax, 0x1000
798     // mov rbx, qword ptr [rax+10h]
799     fn test_empty_instruction_stream() {
800         let target_rax: u64 = 0x1000;
801         let target_rbx: u64 = 0x1234567812345678;
802         let ip: u64 = 0x1000;
803         let cpu_id = 0;
804         let memory = [
805             // Code at IP
806             0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
807             0x48, 0x8b, 0x58, 0x10, // mov rbx, qword ptr [rax+10h]
808             // Padding
809             0x00, 0x00, 0x00, 0x00, 0x00, // Padding is all zeroes
810             // Data at IP + 0x10 (0x1234567812345678 in LE)
811             0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12,
812         ];
813 
814         let mut vmm = MockVmm::new(ip, vec![], Some((ip, &memory)));
815         vmm.emulate_insn(cpu_id, &[], Some(2)).unwrap();
816 
817         let rax: u64 = vmm
818             .cpu_state(cpu_id)
819             .unwrap()
820             .read_reg(Register::RAX)
821             .unwrap();
822         assert_eq!(rax, target_rax);
823 
824         let rbx: u64 = vmm
825             .cpu_state(cpu_id)
826             .unwrap()
827             .read_reg(Register::RBX)
828             .unwrap();
829         assert_eq!(rbx, target_rbx);
830     }
831 
832     #[test]
833     // Emulate executing an empty stream. Instructions should be fetched from
834     // memory. The emulation should abort.
835     //
836     // mov rax, 0x1000
837     // mov rbx, qword ptr [rax+10h]
838     // ... garbage ...
839     fn test_empty_instruction_stream_bad() {
840         let ip: u64 = 0x1000;
841         let cpu_id = 0;
842         let memory = [
843             // Code at IP
844             0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
845             0x48, 0x8b, 0x58, 0x10, // mov rbx, qword ptr [rax+10h]
846             // Padding
847             0xff, 0xff, 0xff, 0xff, 0xff, // Garbage
848             // Data at IP + 0x10 (0x1234567812345678 in LE)
849             0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12,
850         ];
851 
852         let mut vmm = MockVmm::new(ip, vec![], Some((ip, &memory)));
853         vmm.emulate_insn(cpu_id, &[], None).unwrap_err();
854     }
855 
856     #[test]
857     // Emulate truncated instruction stream, which should cause a fetch.
858     //
859     // mov rax, 0x1000
860     // mov rbx, qword ptr [rax+10h]
861     // Test with a first instruction truncated.
862     fn test_fetch_first_instruction() {
863         let target_rax: u64 = 0x1000;
864         let ip: u64 = 0x1000;
865         let cpu_id = 0;
866         let memory = [
867             // Code at IP
868             0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
869             0x48, 0x8b, 0x58, 0x10, // mov rbx, qword ptr [rax+10h]
870             // Padding
871             0x00, 0x00, 0x00, 0x00, 0x00, // Padding is all zeroes
872             // Data at IP + 0x10 (0x1234567812345678 in LE)
873             0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12,
874         ];
875         let insn = [
876             // First instruction is truncated
877             0x48, 0xc7, 0xc0, 0x00, // mov rax, 0x1000 -- Missing bytes: 0x00, 0x10, 0x00, 0x00,
878         ];
879 
880         let mut vmm = MockVmm::new(ip, vec![], Some((ip, &memory)));
881         vmm.emulate_insn(cpu_id, &insn, Some(2)).unwrap();
882 
883         let rax: u64 = vmm
884             .cpu_state(cpu_id)
885             .unwrap()
886             .read_reg(Register::RAX)
887             .unwrap();
888         assert_eq!(rax, target_rax);
889     }
890 
891     #[test]
892     // Emulate truncated instruction stream, which should cause a fetch.
893     //
894     // mov rax, 0x1000
895     // mov rbx, qword ptr [rax+10h]
896     // Test with a 2nd instruction truncated.
897     fn test_fetch_second_instruction() {
898         let target_rax: u64 = 0x1234567812345678;
899         let ip: u64 = 0x1000;
900         let cpu_id = 0;
901         let memory = [
902             // Code at IP
903             0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
904             0x48, 0x8b, 0x58, 0x10, // mov rbx, qword ptr [rax+10h]
905             // Padding
906             0x00, 0x00, 0x00, 0x00, 0x00, // Padding is all zeroes
907             // Data at IP + 0x10 (0x1234567812345678 in LE)
908             0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12,
909         ];
910         let insn = [
911             0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
912             0x48, 0x8b, // Truncated mov rbx, qword ptr [rax+10h] -- missing [0x58, 0x10]
913         ];
914 
915         let mut vmm = MockVmm::new(ip, vec![], Some((ip, &memory)));
916         vmm.emulate_insn(cpu_id, &insn, Some(2)).unwrap();
917 
918         let rbx: u64 = vmm
919             .cpu_state(cpu_id)
920             .unwrap()
921             .read_reg(Register::RBX)
922             .unwrap();
923         assert_eq!(rbx, target_rax);
924     }
925 
926     #[test]
927     // Emulate only one instruction.
928     //
929     // mov rax, 0x1000
930     // mov rbx, qword ptr [rax+10h]
931     // The emulation should stop after the first instruction.
932     fn test_emulate_one_instruction() {
933         let target_rax: u64 = 0x1000;
934         let ip: u64 = 0x1000;
935         let cpu_id = 0;
936         let memory = [
937             // Code at IP
938             0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
939             0x48, 0x8b, 0x58, 0x10, // mov rbx, qword ptr [rax+10h]
940             // Padding
941             0x00, 0x00, 0x00, 0x00, 0x00, // Padding is all zeroes
942             // Data at IP + 0x10 (0x1234567812345678 in LE)
943             0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12,
944         ];
945         let insn = [
946             0x48, 0xc7, 0xc0, 0x00, 0x10, 0x00, 0x00, // mov rax, 0x1000
947             0x48, 0x8b, 0x58, 0x10, // mov rbx, qword ptr [rax+10h]
948         ];
949 
950         let mut vmm = MockVmm::new(ip, vec![], Some((ip, &memory)));
951         vmm.emulate_insn(cpu_id, &insn, Some(1)).unwrap();
952 
953         let new_ip: u64 = vmm.cpu_state(cpu_id).unwrap().ip();
954         assert_eq!(new_ip, ip + 0x7 /* length of mov rax,0x1000 */);
955 
956         let rax: u64 = vmm
957             .cpu_state(cpu_id)
958             .unwrap()
959             .read_reg(Register::RAX)
960             .unwrap();
961         assert_eq!(rax, target_rax);
962 
963         // The second instruction is not executed so RBX should be zero.
964         let rbx: u64 = vmm
965             .cpu_state(cpu_id)
966             .unwrap()
967             .read_reg(Register::RBX)
968             .unwrap();
969         assert_eq!(rbx, 0);
970     }
971 
972     #[test]
973     // Emulate truncated instruction stream, which should cause a fetch.
974     //
975     // mov rax, 0x1000
976     // Test with a first instruction truncated and a bad fetched instruction.
977     // Verify that the instruction emulation returns an error.
978     fn test_fetch_bad_insn() {
979         let ip: u64 = 0x1000;
980         let cpu_id = 0;
981         let memory = [
982             // Code at IP
983             0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
984             0xff, 0xff,
985         ];
986         let insn = [
987             // First instruction is truncated
988             0x48, 0xc7, 0xc0, 0x00, // mov rax, 0x1000 -- Missing bytes: 0x00, 0x10, 0x00, 0x00,
989         ];
990 
991         let mut vmm = MockVmm::new(ip, vec![], Some((ip, &memory)));
992         vmm.emulate_first_insn(cpu_id, &insn).unwrap_err();
993     }
994 }
995