xref: /cloud-hypervisor/test_infra/src/lib.rs (revision b440cb7d2330770cd415b63544a371d4caa2db3a)
1 // Copyright © 2021 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 use once_cell::sync::Lazy;
7 use ssh2::Session;
8 use std::env;
9 use std::ffi::OsStr;
10 use std::fmt::Debug;
11 use std::fs;
12 use std::io;
13 use std::io::{Read, Write};
14 use std::net::TcpListener;
15 use std::net::TcpStream;
16 use std::os::unix::fs::PermissionsExt;
17 use std::os::unix::io::AsRawFd;
18 use std::path::Path;
19 use std::process::{Child, Command, ExitStatus, Output, Stdio};
20 use std::str::FromStr;
21 use std::sync::Mutex;
22 use std::thread;
23 use vmm_sys_util::tempdir::TempDir;
24 
25 #[derive(Debug)]
26 pub enum Error {
27     Parsing(std::num::ParseIntError),
28     SshCommand(SshCommandError),
29     WaitForBoot(WaitForBootError),
30 }
31 
32 impl From<SshCommandError> for Error {
33     fn from(e: SshCommandError) -> Self {
34         Self::SshCommand(e)
35     }
36 }
37 
38 pub struct GuestNetworkConfig {
39     pub guest_ip: String,
40     pub l2_guest_ip1: String,
41     pub l2_guest_ip2: String,
42     pub l2_guest_ip3: String,
43     pub host_ip: String,
44     pub guest_mac: String,
45     pub l2_guest_mac1: String,
46     pub l2_guest_mac2: String,
47     pub l2_guest_mac3: String,
48     pub tcp_listener_port: u16,
49 }
50 
51 pub const DEFAULT_TCP_LISTENER_MESSAGE: &str = "booted";
52 pub const DEFAULT_TCP_LISTENER_PORT: u16 = 8000;
53 pub const DEFAULT_TCP_LISTENER_TIMEOUT: i32 = 120;
54 
55 #[derive(Debug)]
56 pub enum WaitForBootError {
57     EpollWait(std::io::Error),
58     Listen(std::io::Error),
59     EpollWaitTimeout,
60     WrongGuestAddr,
61     Accept(std::io::Error),
62 }
63 
64 impl GuestNetworkConfig {
65     pub fn wait_vm_boot(&self, custom_timeout: Option<i32>) -> Result<(), WaitForBootError> {
66         let start = std::time::Instant::now();
67         // The 'port' is unique per 'GUEST' and listening to wild-card ip avoids retrying on 'TcpListener::bind()'
68         let listen_addr = format!("0.0.0.0:{}", self.tcp_listener_port);
69         let expected_guest_addr = self.guest_ip.as_str();
70         let mut s = String::new();
71         let timeout = match custom_timeout {
72             Some(t) => t,
73             None => DEFAULT_TCP_LISTENER_TIMEOUT,
74         };
75 
76         match (|| -> Result<(), WaitForBootError> {
77             let listener =
78                 TcpListener::bind(&listen_addr.as_str()).map_err(WaitForBootError::Listen)?;
79             listener
80                 .set_nonblocking(true)
81                 .expect("Cannot set non-blocking for tcp listener");
82 
83             // Reply on epoll w/ timeout to wait for guest connections faithfully
84             let epoll_fd = epoll::create(true).expect("Cannot create epoll fd");
85             epoll::ctl(
86                 epoll_fd,
87                 epoll::ControlOptions::EPOLL_CTL_ADD,
88                 listener.as_raw_fd(),
89                 epoll::Event::new(epoll::Events::EPOLLIN, 0),
90             )
91             .expect("Cannot add 'tcp_listener' event to epoll");
92             let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); 1];
93             loop {
94                 let num_events = match epoll::wait(epoll_fd, timeout * 1000_i32, &mut events[..]) {
95                     Ok(num_events) => Ok(num_events),
96                     Err(e) => match e.raw_os_error() {
97                         Some(libc::EAGAIN) | Some(libc::EINTR) => continue,
98                         _ => Err(e),
99                     },
100                 }
101                 .map_err(WaitForBootError::EpollWait)?;
102                 if num_events == 0 {
103                     return Err(WaitForBootError::EpollWaitTimeout);
104                 }
105                 break;
106             }
107 
108             match listener.accept() {
109                 Ok((_, addr)) => {
110                     // Make sure the connection is from the expected 'guest_addr'
111                     if addr.ip() != std::net::IpAddr::from_str(expected_guest_addr).unwrap() {
112                         s = format!(
113                             "Expecting the guest ip '{}' while being connected with ip '{}'",
114                             expected_guest_addr,
115                             addr.ip()
116                         );
117                         return Err(WaitForBootError::WrongGuestAddr);
118                     }
119 
120                     Ok(())
121                 }
122                 Err(e) => {
123                     s = "TcpListener::accept() failed".to_string();
124                     Err(WaitForBootError::Accept(e))
125                 }
126             }
127         })() {
128             Err(e) => {
129                 let duration = start.elapsed();
130                 eprintln!(
131                     "\n\n==== Start 'wait_vm_boot' (FAILED) ====\n\n\
132                  duration =\"{:?}, timeout = {}s\"\n\
133                  listen_addr=\"{}\"\n\
134                  expected_guest_addr=\"{}\"\n\
135                  message=\"{}\"\n\
136                  error=\"{:?}\"\n\
137                  \n==== End 'wait_vm_boot' outout ====\n\n",
138                     duration, timeout, listen_addr, expected_guest_addr, s, e
139                 );
140 
141                 Err(e)
142             }
143             Ok(_) => Ok(()),
144         }
145     }
146 }
147 
148 pub enum DiskType {
149     OperatingSystem,
150     CloudInit,
151 }
152 
153 pub trait DiskConfig {
154     fn prepare_files(&mut self, tmp_dir: &TempDir, network: &GuestNetworkConfig);
155     fn prepare_cloudinit(&self, tmp_dir: &TempDir, network: &GuestNetworkConfig) -> String;
156     fn disk(&self, disk_type: DiskType) -> Option<String>;
157 }
158 
159 #[derive(Clone)]
160 pub struct UbuntuDiskConfig {
161     osdisk_path: String,
162     cloudinit_path: String,
163     image_name: String,
164 }
165 
166 impl UbuntuDiskConfig {
167     pub fn new(image_name: String) -> Self {
168         UbuntuDiskConfig {
169             image_name,
170             osdisk_path: String::new(),
171             cloudinit_path: String::new(),
172         }
173     }
174 }
175 
176 pub struct WindowsDiskConfig {
177     image_name: String,
178     osdisk_path: String,
179     loopback_device: String,
180     windows_snapshot_cow: String,
181     windows_snapshot: String,
182 }
183 
184 impl WindowsDiskConfig {
185     pub fn new(image_name: String) -> Self {
186         WindowsDiskConfig {
187             image_name,
188             osdisk_path: String::new(),
189             loopback_device: String::new(),
190             windows_snapshot_cow: String::new(),
191             windows_snapshot: String::new(),
192         }
193     }
194 }
195 
196 impl Drop for WindowsDiskConfig {
197     fn drop(&mut self) {
198         // dmsetup remove windows-snapshot-1
199         std::process::Command::new("dmsetup")
200             .arg("remove")
201             .arg(self.windows_snapshot.as_str())
202             .output()
203             .expect("Expect removing Windows snapshot with 'dmsetup' to succeed");
204 
205         // dmsetup remove windows-snapshot-cow-1
206         std::process::Command::new("dmsetup")
207             .arg("remove")
208             .arg(self.windows_snapshot_cow.as_str())
209             .output()
210             .expect("Expect removing Windows snapshot CoW with 'dmsetup' to succeed");
211 
212         // losetup -d <loopback_device>
213         std::process::Command::new("losetup")
214             .args(&["-d", self.loopback_device.as_str()])
215             .output()
216             .expect("Expect removing loopback device to succeed");
217     }
218 }
219 
220 impl DiskConfig for UbuntuDiskConfig {
221     fn prepare_cloudinit(&self, tmp_dir: &TempDir, network: &GuestNetworkConfig) -> String {
222         let cloudinit_file_path =
223             String::from(tmp_dir.as_path().join("cloudinit").to_str().unwrap());
224 
225         let cloud_init_directory = tmp_dir.as_path().join("cloud-init").join("ubuntu");
226 
227         fs::create_dir_all(&cloud_init_directory)
228             .expect("Expect creating cloud-init directory to succeed");
229 
230         let source_file_dir = std::env::current_dir()
231             .unwrap()
232             .join("test_data")
233             .join("cloud-init")
234             .join("ubuntu");
235 
236         vec!["meta-data"].iter().for_each(|x| {
237             rate_limited_copy(source_file_dir.join(x), cloud_init_directory.join(x))
238                 .expect("Expect copying cloud-init meta-data to succeed");
239         });
240 
241         let mut user_data_string = String::new();
242         fs::File::open(source_file_dir.join("user-data"))
243             .unwrap()
244             .read_to_string(&mut user_data_string)
245             .expect("Expected reading user-data file in to succeed");
246         user_data_string = user_data_string.replace(
247             "@DEFAULT_TCP_LISTENER_MESSAGE",
248             DEFAULT_TCP_LISTENER_MESSAGE,
249         );
250         user_data_string = user_data_string.replace("@HOST_IP", &network.host_ip);
251         user_data_string =
252             user_data_string.replace("@TCP_LISTENER_PORT", &network.tcp_listener_port.to_string());
253 
254         fs::File::create(cloud_init_directory.join("user-data"))
255             .unwrap()
256             .write_all(user_data_string.as_bytes())
257             .expect("Expected writing out user-data to succeed");
258 
259         let mut network_config_string = String::new();
260 
261         fs::File::open(source_file_dir.join("network-config"))
262             .unwrap()
263             .read_to_string(&mut network_config_string)
264             .expect("Expected reading network-config file in to succeed");
265 
266         network_config_string = network_config_string.replace("192.168.2.1", &network.host_ip);
267         network_config_string = network_config_string.replace("192.168.2.2", &network.guest_ip);
268         network_config_string = network_config_string.replace("192.168.2.3", &network.l2_guest_ip1);
269         network_config_string = network_config_string.replace("192.168.2.4", &network.l2_guest_ip2);
270         network_config_string = network_config_string.replace("192.168.2.5", &network.l2_guest_ip3);
271         network_config_string =
272             network_config_string.replace("12:34:56:78:90:ab", &network.guest_mac);
273         network_config_string =
274             network_config_string.replace("de:ad:be:ef:12:34", &network.l2_guest_mac1);
275         network_config_string =
276             network_config_string.replace("de:ad:be:ef:34:56", &network.l2_guest_mac2);
277         network_config_string =
278             network_config_string.replace("de:ad:be:ef:56:78", &network.l2_guest_mac3);
279 
280         fs::File::create(cloud_init_directory.join("network-config"))
281             .unwrap()
282             .write_all(network_config_string.as_bytes())
283             .expect("Expected writing out network-config to succeed");
284 
285         std::process::Command::new("mkdosfs")
286             .args(&["-n", "cidata"])
287             .args(&["-C", cloudinit_file_path.as_str()])
288             .arg("8192")
289             .output()
290             .expect("Expect creating disk image to succeed");
291 
292         vec!["user-data", "meta-data", "network-config"]
293             .iter()
294             .for_each(|x| {
295                 std::process::Command::new("mcopy")
296                     .arg("-o")
297                     .args(&["-i", cloudinit_file_path.as_str()])
298                     .args(&["-s", cloud_init_directory.join(x).to_str().unwrap(), "::"])
299                     .output()
300                     .expect("Expect copying files to disk image to succeed");
301             });
302 
303         cloudinit_file_path
304     }
305 
306     fn prepare_files(&mut self, tmp_dir: &TempDir, network: &GuestNetworkConfig) {
307         let mut workload_path = dirs::home_dir().unwrap();
308         workload_path.push("workloads");
309 
310         let mut osdisk_base_path = workload_path;
311         osdisk_base_path.push(&self.image_name);
312 
313         let osdisk_path = String::from(tmp_dir.as_path().join("osdisk.img").to_str().unwrap());
314         let cloudinit_path = self.prepare_cloudinit(tmp_dir, network);
315 
316         rate_limited_copy(osdisk_base_path, &osdisk_path)
317             .expect("copying of OS source disk image failed");
318 
319         self.cloudinit_path = cloudinit_path;
320         self.osdisk_path = osdisk_path;
321     }
322 
323     fn disk(&self, disk_type: DiskType) -> Option<String> {
324         match disk_type {
325             DiskType::OperatingSystem => Some(self.osdisk_path.clone()),
326             DiskType::CloudInit => Some(self.cloudinit_path.clone()),
327         }
328     }
329 }
330 
331 impl DiskConfig for WindowsDiskConfig {
332     fn prepare_cloudinit(&self, _tmp_dir: &TempDir, _network: &GuestNetworkConfig) -> String {
333         String::new()
334     }
335 
336     fn prepare_files(&mut self, tmp_dir: &TempDir, _network: &GuestNetworkConfig) {
337         let mut workload_path = dirs::home_dir().unwrap();
338         workload_path.push("workloads");
339 
340         let mut osdisk_path = workload_path;
341         osdisk_path.push(&self.image_name);
342 
343         let osdisk_blk_size = fs::metadata(osdisk_path)
344             .expect("Expect retrieving Windows image metadata")
345             .len()
346             >> 9;
347 
348         let snapshot_cow_path =
349             String::from(tmp_dir.as_path().join("snapshot_cow").to_str().unwrap());
350 
351         // Create and truncate CoW file for device mapper
352         let cow_file_size: u64 = 1 << 30;
353         let cow_file_blk_size = cow_file_size >> 9;
354         let cow_file = std::fs::File::create(snapshot_cow_path.as_str())
355             .expect("Expect creating CoW image to succeed");
356         cow_file
357             .set_len(cow_file_size)
358             .expect("Expect truncating CoW image to succeed");
359 
360         // losetup --find --show /tmp/snapshot_cow
361         let loopback_device = std::process::Command::new("losetup")
362             .arg("--find")
363             .arg("--show")
364             .arg(snapshot_cow_path.as_str())
365             .output()
366             .expect("Expect creating loopback device from snapshot CoW image to succeed");
367 
368         self.loopback_device = String::from_utf8_lossy(&loopback_device.stdout)
369             .trim()
370             .to_string();
371 
372         let random_extension = tmp_dir.as_path().file_name().unwrap();
373         let windows_snapshot_cow = format!(
374             "windows-snapshot-cow-{}",
375             random_extension.to_str().unwrap()
376         );
377 
378         // dmsetup create windows-snapshot-cow-1 --table '0 2097152 linear /dev/loop1 0'
379         std::process::Command::new("dmsetup")
380             .arg("create")
381             .arg(windows_snapshot_cow.as_str())
382             .args(&[
383                 "--table",
384                 format!("0 {} linear {} 0", cow_file_blk_size, self.loopback_device).as_str(),
385             ])
386             .output()
387             .expect("Expect creating Windows snapshot CoW with 'dmsetup' to succeed");
388 
389         let windows_snapshot = format!("windows-snapshot-{}", random_extension.to_str().unwrap());
390 
391         // dmsetup mknodes
392         std::process::Command::new("dmsetup")
393             .arg("mknodes")
394             .output()
395             .expect("Expect device mapper nodes to be ready");
396 
397         // dmsetup create windows-snapshot-1 --table '0 41943040 snapshot /dev/mapper/windows-base /dev/mapper/windows-snapshot-cow-1 P 8'
398         std::process::Command::new("dmsetup")
399             .arg("create")
400             .arg(windows_snapshot.as_str())
401             .args(&[
402                 "--table",
403                 format!(
404                     "0 {} snapshot /dev/mapper/windows-base /dev/mapper/{} P 8",
405                     osdisk_blk_size,
406                     windows_snapshot_cow.as_str()
407                 )
408                 .as_str(),
409             ])
410             .output()
411             .expect("Expect creating Windows snapshot with 'dmsetup' to succeed");
412 
413         // dmsetup mknodes
414         std::process::Command::new("dmsetup")
415             .arg("mknodes")
416             .output()
417             .expect("Expect device mapper nodes to be ready");
418 
419         self.osdisk_path = format!("/dev/mapper/{}", windows_snapshot);
420         self.windows_snapshot_cow = windows_snapshot_cow;
421         self.windows_snapshot = windows_snapshot;
422     }
423 
424     fn disk(&self, disk_type: DiskType) -> Option<String> {
425         match disk_type {
426             DiskType::OperatingSystem => Some(self.osdisk_path.clone()),
427             DiskType::CloudInit => None,
428         }
429     }
430 }
431 
432 pub fn rate_limited_copy<P: AsRef<Path>, Q: AsRef<Path>>(from: P, to: Q) -> io::Result<u64> {
433     for i in 0..10 {
434         let free_bytes = unsafe {
435             let mut stats = std::mem::MaybeUninit::zeroed();
436             let fs_name = std::ffi::CString::new("/tmp").unwrap();
437             libc::statvfs(fs_name.as_ptr(), stats.as_mut_ptr());
438 
439             let free_blocks = stats.assume_init().f_bfree;
440             let block_size = stats.assume_init().f_bsize;
441 
442             free_blocks * block_size
443         };
444 
445         // Make sure there is at least 6 GiB of space
446         if free_bytes < 6 << 30 {
447             eprintln!(
448                 "Not enough space on disk ({}). Attempt {} of 10. Sleeping.",
449                 free_bytes, i
450             );
451             thread::sleep(std::time::Duration::new(60, 0));
452             continue;
453         }
454 
455         match fs::copy(&from, &to) {
456             Err(e) => {
457                 if let Some(errno) = e.raw_os_error() {
458                     if errno == libc::ENOSPC {
459                         eprintln!("Copy returned ENOSPC. Attempt {} of 10. Sleeping.", i);
460                         thread::sleep(std::time::Duration::new(60, 0));
461                         continue;
462                     }
463                 }
464                 return Err(e);
465             }
466             Ok(i) => return Ok(i),
467         }
468     }
469     Err(io::Error::last_os_error())
470 }
471 
472 pub fn handle_child_output(
473     r: Result<(), std::boxed::Box<dyn std::any::Any + std::marker::Send>>,
474     output: &std::process::Output,
475 ) {
476     use std::os::unix::process::ExitStatusExt;
477     if r.is_ok() && output.status.success() {
478         return;
479     }
480 
481     match output.status.code() {
482         None => {
483             // Don't treat child.kill() as a problem
484             if output.status.signal() == Some(9) && r.is_ok() {
485                 return;
486             }
487 
488             eprintln!(
489                 "==== child killed by signal: {} ====",
490                 output.status.signal().unwrap()
491             );
492         }
493         Some(code) => {
494             eprintln!("\n\n==== child exit code: {} ====", code);
495         }
496     }
497 
498     eprintln!(
499         "\n\n==== Start child stdout ====\n\n{}\n\n==== End child stdout ====",
500         String::from_utf8_lossy(&output.stdout)
501     );
502     eprintln!(
503         "\n\n==== Start child stderr ====\n\n{}\n\n==== End child stderr ====",
504         String::from_utf8_lossy(&output.stderr)
505     );
506 
507     panic!("Test failed")
508 }
509 
510 #[derive(Debug)]
511 pub struct PasswordAuth {
512     pub username: String,
513     pub password: String,
514 }
515 
516 pub const DEFAULT_SSH_RETRIES: u8 = 6;
517 pub const DEFAULT_SSH_TIMEOUT: u8 = 10;
518 
519 #[derive(Debug)]
520 pub enum SshCommandError {
521     Connection(std::io::Error),
522     Handshake(ssh2::Error),
523     Authentication(ssh2::Error),
524     ChannelSession(ssh2::Error),
525     Command(ssh2::Error),
526     ExitStatus(ssh2::Error),
527     NonZeroExitStatus(i32),
528     FileRead(std::io::Error),
529     FileMetadata(std::io::Error),
530     ScpSend(ssh2::Error),
531     WriteAll(std::io::Error),
532     SendEof(ssh2::Error),
533     WaitEof(ssh2::Error),
534 }
535 
536 fn scp_to_guest_with_auth(
537     path: &Path,
538     remote_path: &Path,
539     auth: &PasswordAuth,
540     ip: &str,
541     retries: u8,
542     timeout: u8,
543 ) -> Result<(), SshCommandError> {
544     let mut counter = 0;
545     loop {
546         match (|| -> Result<(), SshCommandError> {
547             let tcp =
548                 TcpStream::connect(format!("{}:22", ip)).map_err(SshCommandError::Connection)?;
549             let mut sess = Session::new().unwrap();
550             sess.set_tcp_stream(tcp);
551             sess.handshake().map_err(SshCommandError::Handshake)?;
552 
553             sess.userauth_password(&auth.username, &auth.password)
554                 .map_err(SshCommandError::Authentication)?;
555             assert!(sess.authenticated());
556 
557             let content = fs::read(path).map_err(SshCommandError::FileRead)?;
558             let mode = fs::metadata(path)
559                 .map_err(SshCommandError::FileMetadata)?
560                 .permissions()
561                 .mode()
562                 & 0o777;
563 
564             let mut channel = sess
565                 .scp_send(remote_path, mode as i32, content.len() as u64, None)
566                 .map_err(SshCommandError::ScpSend)?;
567             channel
568                 .write_all(&content)
569                 .map_err(SshCommandError::WriteAll)?;
570             channel.send_eof().map_err(SshCommandError::SendEof)?;
571             channel.wait_eof().map_err(SshCommandError::WaitEof)?;
572 
573             // Intentionally ignore these results here as their failure
574             // does not precipitate a repeat
575             let _ = channel.close();
576             let _ = channel.wait_close();
577 
578             Ok(())
579         })() {
580             Ok(_) => break,
581             Err(e) => {
582                 counter += 1;
583                 if counter >= retries {
584                     eprintln!(
585                         "\n\n==== Start scp command output (FAILED) ====\n\n\
586                          path =\"{:?}\"\n\
587                          remote_path =\"{:?}\"\n\
588                          auth=\"{:#?}\"\n\
589                          ip=\"{}\"\n\
590                          error=\"{:?}\"\n\
591                          \n==== End scp command outout ====\n\n",
592                         path, remote_path, auth, ip, e
593                     );
594 
595                     return Err(e);
596                 }
597             }
598         };
599         thread::sleep(std::time::Duration::new((timeout * counter).into(), 0));
600     }
601     Ok(())
602 }
603 
604 pub fn scp_to_guest(
605     path: &Path,
606     remote_path: &Path,
607     ip: &str,
608     retries: u8,
609     timeout: u8,
610 ) -> Result<(), SshCommandError> {
611     scp_to_guest_with_auth(
612         path,
613         remote_path,
614         &PasswordAuth {
615             username: String::from("cloud"),
616             password: String::from("cloud123"),
617         },
618         ip,
619         retries,
620         timeout,
621     )
622 }
623 
624 pub fn ssh_command_ip_with_auth(
625     command: &str,
626     auth: &PasswordAuth,
627     ip: &str,
628     retries: u8,
629     timeout: u8,
630 ) -> Result<String, SshCommandError> {
631     let mut s = String::new();
632 
633     let mut counter = 0;
634     loop {
635         match (|| -> Result<(), SshCommandError> {
636             let tcp =
637                 TcpStream::connect(format!("{}:22", ip)).map_err(SshCommandError::Connection)?;
638             let mut sess = Session::new().unwrap();
639             sess.set_tcp_stream(tcp);
640             sess.handshake().map_err(SshCommandError::Handshake)?;
641 
642             sess.userauth_password(&auth.username, &auth.password)
643                 .map_err(SshCommandError::Authentication)?;
644             assert!(sess.authenticated());
645 
646             let mut channel = sess
647                 .channel_session()
648                 .map_err(SshCommandError::ChannelSession)?;
649             channel.exec(command).map_err(SshCommandError::Command)?;
650 
651             // Intentionally ignore these results here as their failure
652             // does not precipitate a repeat
653             let _ = channel.read_to_string(&mut s);
654             let _ = channel.close();
655             let _ = channel.wait_close();
656 
657             let status = channel.exit_status().map_err(SshCommandError::ExitStatus)?;
658 
659             if status != 0 {
660                 Err(SshCommandError::NonZeroExitStatus(status))
661             } else {
662                 Ok(())
663             }
664         })() {
665             Ok(_) => break,
666             Err(e) => {
667                 counter += 1;
668                 if counter >= retries {
669                     eprintln!(
670                         "\n\n==== Start ssh command output (FAILED) ====\n\n\
671                          command=\"{}\"\n\
672                          auth=\"{:#?}\"\n\
673                          ip=\"{}\"\n\
674                          output=\"{}\"\n\
675                          error=\"{:?}\"\n\
676                          \n==== End ssh command outout ====\n\n",
677                         command, auth, ip, s, e
678                     );
679 
680                     return Err(e);
681                 }
682             }
683         };
684         thread::sleep(std::time::Duration::new((timeout * counter).into(), 0));
685     }
686     Ok(s)
687 }
688 
689 pub fn ssh_command_ip(
690     command: &str,
691     ip: &str,
692     retries: u8,
693     timeout: u8,
694 ) -> Result<String, SshCommandError> {
695     ssh_command_ip_with_auth(
696         command,
697         &PasswordAuth {
698             username: String::from("cloud"),
699             password: String::from("cloud123"),
700         },
701         ip,
702         retries,
703         timeout,
704     )
705 }
706 
707 pub fn exec_host_command_status(command: &str) -> ExitStatus {
708     std::process::Command::new("bash")
709         .args(&["-c", command])
710         .status()
711         .unwrap_or_else(|_| panic!("Expected '{}' to run", command))
712 }
713 
714 pub fn exec_host_command_output(command: &str) -> Output {
715     std::process::Command::new("bash")
716         .args(&["-c", command])
717         .output()
718         .unwrap_or_else(|_| panic!("Expected '{}' to run", command))
719 }
720 
721 pub const PIPE_SIZE: i32 = 32 << 20;
722 
723 static NEXT_VM_ID: Lazy<Mutex<u8>> = Lazy::new(|| Mutex::new(1));
724 
725 pub struct Guest {
726     pub tmp_dir: TempDir,
727     pub disk_config: Box<dyn DiskConfig>,
728     pub network: GuestNetworkConfig,
729 }
730 
731 // Safe to implement as we know we have no interior mutability
732 impl std::panic::RefUnwindSafe for Guest {}
733 
734 impl Guest {
735     pub fn new_from_ip_range(mut disk_config: Box<dyn DiskConfig>, class: &str, id: u8) -> Self {
736         let tmp_dir = TempDir::new_with_prefix("/tmp/ch").unwrap();
737 
738         let network = GuestNetworkConfig {
739             guest_ip: format!("{}.{}.2", class, id),
740             l2_guest_ip1: format!("{}.{}.3", class, id),
741             l2_guest_ip2: format!("{}.{}.4", class, id),
742             l2_guest_ip3: format!("{}.{}.5", class, id),
743             host_ip: format!("{}.{}.1", class, id),
744             guest_mac: format!("12:34:56:78:90:{:02x}", id),
745             l2_guest_mac1: format!("de:ad:be:ef:12:{:02x}", id),
746             l2_guest_mac2: format!("de:ad:be:ef:34:{:02x}", id),
747             l2_guest_mac3: format!("de:ad:be:ef:56:{:02x}", id),
748             tcp_listener_port: DEFAULT_TCP_LISTENER_PORT + id as u16,
749         };
750 
751         disk_config.prepare_files(&tmp_dir, &network);
752 
753         Guest {
754             tmp_dir,
755             disk_config,
756             network,
757         }
758     }
759 
760     pub fn new(disk_config: Box<dyn DiskConfig>) -> Self {
761         let mut guard = NEXT_VM_ID.lock().unwrap();
762         let id = *guard;
763         *guard = id + 1;
764 
765         Self::new_from_ip_range(disk_config, "192.168", id)
766     }
767 
768     pub fn default_net_string(&self) -> String {
769         format!(
770             "tap=,mac={},ip={},mask=255.255.255.0",
771             self.network.guest_mac, self.network.host_ip
772         )
773     }
774 
775     pub fn default_net_string_w_iommu(&self) -> String {
776         format!(
777             "tap=,mac={},ip={},mask=255.255.255.0,iommu=on",
778             self.network.guest_mac, self.network.host_ip
779         )
780     }
781 
782     pub fn ssh_command(&self, command: &str) -> Result<String, SshCommandError> {
783         ssh_command_ip(
784             command,
785             &self.network.guest_ip,
786             DEFAULT_SSH_RETRIES,
787             DEFAULT_SSH_TIMEOUT,
788         )
789     }
790 
791     #[cfg(target_arch = "x86_64")]
792     pub fn ssh_command_l1(&self, command: &str) -> Result<String, SshCommandError> {
793         ssh_command_ip(
794             command,
795             &self.network.guest_ip,
796             DEFAULT_SSH_RETRIES,
797             DEFAULT_SSH_TIMEOUT,
798         )
799     }
800 
801     #[cfg(target_arch = "x86_64")]
802     pub fn ssh_command_l2_1(&self, command: &str) -> Result<String, SshCommandError> {
803         ssh_command_ip(
804             command,
805             &self.network.l2_guest_ip1,
806             DEFAULT_SSH_RETRIES,
807             DEFAULT_SSH_TIMEOUT,
808         )
809     }
810 
811     #[cfg(target_arch = "x86_64")]
812     pub fn ssh_command_l2_2(&self, command: &str) -> Result<String, SshCommandError> {
813         ssh_command_ip(
814             command,
815             &self.network.l2_guest_ip2,
816             DEFAULT_SSH_RETRIES,
817             DEFAULT_SSH_TIMEOUT,
818         )
819     }
820 
821     #[cfg(target_arch = "x86_64")]
822     pub fn ssh_command_l2_3(&self, command: &str) -> Result<String, SshCommandError> {
823         ssh_command_ip(
824             command,
825             &self.network.l2_guest_ip3,
826             DEFAULT_SSH_RETRIES,
827             DEFAULT_SSH_TIMEOUT,
828         )
829     }
830 
831     pub fn api_create_body(&self, cpu_count: u8, kernel_path: &str, kernel_cmd: &str) -> String {
832         format! {"{{\"cpus\":{{\"boot_vcpus\":{},\"max_vcpus\":{}}},\"kernel\":{{\"path\":\"{}\"}},\"cmdline\":{{\"args\": \"{}\"}},\"net\":[{{\"ip\":\"{}\", \"mask\":\"255.255.255.0\", \"mac\":\"{}\"}}], \"disks\":[{{\"path\":\"{}\"}}, {{\"path\":\"{}\"}}]}}",
833                  cpu_count,
834                  cpu_count,
835                  kernel_path,
836                  kernel_cmd,
837                  self.network.host_ip,
838                  self.network.guest_mac,
839                  self.disk_config.disk(DiskType::OperatingSystem).unwrap().as_str(),
840                  self.disk_config.disk(DiskType::CloudInit).unwrap().as_str(),
841         }
842     }
843 
844     pub fn get_cpu_count(&self) -> Result<u32, Error> {
845         self.ssh_command("grep -c processor /proc/cpuinfo")?
846             .trim()
847             .parse()
848             .map_err(Error::Parsing)
849     }
850 
851     #[cfg(target_arch = "x86_64")]
852     pub fn get_initial_apicid(&self) -> Result<u32, Error> {
853         self.ssh_command("grep \"initial apicid\" /proc/cpuinfo | grep -o \"[0-9]*\"")?
854             .trim()
855             .parse()
856             .map_err(Error::Parsing)
857     }
858 
859     pub fn get_total_memory(&self) -> Result<u32, Error> {
860         self.ssh_command("grep MemTotal /proc/meminfo | grep -o \"[0-9]*\"")?
861             .trim()
862             .parse()
863             .map_err(Error::Parsing)
864     }
865 
866     #[cfg(target_arch = "x86_64")]
867     pub fn get_total_memory_l2(&self) -> Result<u32, Error> {
868         self.ssh_command_l2_1("grep MemTotal /proc/meminfo | grep -o \"[0-9]*\"")?
869             .trim()
870             .parse()
871             .map_err(Error::Parsing)
872     }
873 
874     pub fn get_numa_node_memory(&self, node_id: usize) -> Result<u32, Error> {
875         self.ssh_command(
876             format!(
877                 "grep MemTotal /sys/devices/system/node/node{}/meminfo \
878                         | cut -d \":\" -f 2 | grep -o \"[0-9]*\"",
879                 node_id
880             )
881             .as_str(),
882         )?
883         .trim()
884         .parse()
885         .map_err(Error::Parsing)
886     }
887 
888     pub fn wait_vm_boot(&self, custom_timeout: Option<i32>) -> Result<(), Error> {
889         self.network
890             .wait_vm_boot(custom_timeout)
891             .map_err(Error::WaitForBoot)
892     }
893 
894     pub fn check_numa_node_cpus(&self, node_id: usize, cpus: Vec<usize>) -> Result<(), Error> {
895         for cpu in cpus.iter() {
896             let cmd = format!(
897                 "[ -d \"/sys/devices/system/node/node{}/cpu{}\" ]",
898                 node_id, cpu
899             );
900             self.ssh_command(cmd.as_str())?;
901         }
902 
903         Ok(())
904     }
905 
906     pub fn check_numa_node_distances(
907         &self,
908         node_id: usize,
909         distances: &str,
910     ) -> Result<bool, Error> {
911         let cmd = format!("cat /sys/devices/system/node/node{}/distance", node_id);
912         if self.ssh_command(cmd.as_str())?.trim() == distances {
913             Ok(true)
914         } else {
915             Ok(false)
916         }
917     }
918 
919     pub fn check_numa_common(
920         &self,
921         mem_ref: Option<&[u32]>,
922         node_ref: Option<&[Vec<usize>]>,
923         distance_ref: Option<&[&str]>,
924     ) {
925         if let Some(mem_ref) = mem_ref {
926             // Check each NUMA node has been assigned the right amount of
927             // memory.
928             for (i, &m) in mem_ref.iter().enumerate() {
929                 assert!(self.get_numa_node_memory(i).unwrap_or_default() > m);
930             }
931         }
932 
933         if let Some(node_ref) = node_ref {
934             // Check each NUMA node has been assigned the right CPUs set.
935             for (i, n) in node_ref.iter().enumerate() {
936                 self.check_numa_node_cpus(i, n.clone()).unwrap();
937             }
938         }
939 
940         if let Some(distance_ref) = distance_ref {
941             // Check each NUMA node has been assigned the right distances.
942             for (i, &d) in distance_ref.iter().enumerate() {
943                 assert!(self.check_numa_node_distances(i, d).unwrap());
944             }
945         }
946     }
947 
948     #[cfg(target_arch = "x86_64")]
949     pub fn check_sgx_support(&self) -> Result<(), Error> {
950         self.ssh_command(
951             "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX: \
952                     Software Guard Extensions supported = true'",
953         )?;
954         self.ssh_command(
955             "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX_LC: \
956                     SGX launch config supported = true'",
957         )?;
958         self.ssh_command(
959             "cpuid -l 0x12 -s 0 | tr -s [:space:] | grep -q 'SGX1 \
960                     supported = true'",
961         )?;
962 
963         Ok(())
964     }
965 
966     pub fn get_pci_bridge_class(&self) -> Result<String, Error> {
967         Ok(self
968             .ssh_command("cat /sys/bus/pci/devices/0000:00:00.0/class")?
969             .trim()
970             .to_string())
971     }
972 
973     pub fn get_pci_device_ids(&self) -> Result<String, Error> {
974         Ok(self
975             .ssh_command("cat /sys/bus/pci/devices/*/device")?
976             .trim()
977             .to_string())
978     }
979 
980     pub fn get_pci_vendor_ids(&self) -> Result<String, Error> {
981         Ok(self
982             .ssh_command("cat /sys/bus/pci/devices/*/vendor")?
983             .trim()
984             .to_string())
985     }
986 
987     pub fn does_device_vendor_pair_match(
988         &self,
989         device_id: &str,
990         vendor_id: &str,
991     ) -> Result<bool, Error> {
992         // We are checking if console device's device id and vendor id pair matches
993         let devices = self.get_pci_device_ids()?;
994         let devices: Vec<&str> = devices.split('\n').collect();
995         let vendors = self.get_pci_vendor_ids()?;
996         let vendors: Vec<&str> = vendors.split('\n').collect();
997 
998         for (index, d_id) in devices.iter().enumerate() {
999             if *d_id == device_id {
1000                 if let Some(v_id) = vendors.get(index) {
1001                     if *v_id == vendor_id {
1002                         return Ok(true);
1003                     }
1004                 }
1005             }
1006         }
1007 
1008         Ok(false)
1009     }
1010 
1011     pub fn check_vsock(&self, socket: &str) {
1012         // Listen from guest on vsock CID=3 PORT=16
1013         // SOCKET-LISTEN:<domain>:<protocol>:<local-address>
1014         let guest_ip = self.network.guest_ip.clone();
1015         let listen_socat = thread::spawn(move || {
1016             ssh_command_ip("sudo socat - SOCKET-LISTEN:40:0:x00x00x10x00x00x00x03x00x00x00x00x00x00x00 > vsock_log", &guest_ip, DEFAULT_SSH_RETRIES, DEFAULT_SSH_TIMEOUT).unwrap();
1017         });
1018 
1019         // Make sure socat is listening, which might take a few second on slow systems
1020         thread::sleep(std::time::Duration::new(10, 0));
1021 
1022         // Write something to vsock from the host
1023         assert!(exec_host_command_status(&format!(
1024             "echo -e \"CONNECT 16\\nHelloWorld!\" | socat - UNIX-CONNECT:{}",
1025             socket
1026         ))
1027         .success());
1028 
1029         // Wait for the thread to terminate.
1030         listen_socat.join().unwrap();
1031 
1032         assert_eq!(
1033             self.ssh_command("cat vsock_log").unwrap().trim(),
1034             "HelloWorld!"
1035         );
1036     }
1037 
1038     #[cfg(target_arch = "x86_64")]
1039     pub fn check_nvidia_gpu(&self) {
1040         // Run CUDA sample to validate it can find the device
1041         let device_query_result = self
1042             .ssh_command("sudo /root/NVIDIA_CUDA-11.3_Samples/bin/x86_64/linux/release/deviceQuery")
1043             .unwrap();
1044         assert!(device_query_result.contains("Detected 1 CUDA Capable device"));
1045         assert!(device_query_result.contains("Device 0: \"NVIDIA Tesla T4\""));
1046         assert!(device_query_result.contains("Result = PASS"));
1047 
1048         // Run NVIDIA DCGM Diagnostics to validate the device is functional
1049         self.ssh_command("sudo nv-hostengine").unwrap();
1050 
1051         assert!(self
1052             .ssh_command("sudo dcgmi discovery -l")
1053             .unwrap()
1054             .contains("Name: NVIDIA Tesla T4"));
1055         assert_eq!(
1056             self.ssh_command("sudo dcgmi diag -r 'diagnostic' | grep Pass | wc -l")
1057                 .unwrap()
1058                 .trim(),
1059             "10"
1060         );
1061     }
1062 
1063     pub fn reboot_linux(&self, current_reboot_count: u32, custom_timeout: Option<i32>) {
1064         let list_boots_cmd = "sudo journalctl --list-boots | wc -l";
1065         let boot_count = self
1066             .ssh_command(list_boots_cmd)
1067             .unwrap()
1068             .trim()
1069             .parse::<u32>()
1070             .unwrap_or_default();
1071 
1072         assert_eq!(boot_count, current_reboot_count + 1);
1073         self.ssh_command("sudo reboot").unwrap();
1074 
1075         self.wait_vm_boot(custom_timeout).unwrap();
1076         let boot_count = self
1077             .ssh_command(list_boots_cmd)
1078             .unwrap()
1079             .trim()
1080             .parse::<u32>()
1081             .unwrap_or_default();
1082         assert_eq!(boot_count, current_reboot_count + 2);
1083     }
1084 
1085     pub fn enable_memory_hotplug(&self) {
1086         self.ssh_command("echo online | sudo tee /sys/devices/system/memory/auto_online_blocks")
1087             .unwrap();
1088     }
1089 
1090     pub fn check_devices_common(
1091         &self,
1092         socket: Option<&String>,
1093         console_text: Option<&String>,
1094         pmem_path: Option<&String>,
1095     ) {
1096         // Check block devices are readable
1097         self.ssh_command("sudo dd if=/dev/vda of=/dev/null bs=1M iflag=direct count=1024")
1098             .unwrap();
1099         self.ssh_command("sudo dd if=/dev/vdb of=/dev/null bs=1M iflag=direct count=8")
1100             .unwrap();
1101         // Check if the rng device is readable
1102         self.ssh_command("sudo head -c 1000 /dev/hwrng > /dev/null")
1103             .unwrap();
1104         // Check vsock
1105         if let Some(socket) = socket {
1106             self.check_vsock(socket.as_str());
1107         }
1108         // Check if the console is usable
1109         if let Some(console_text) = console_text {
1110             let console_cmd = format!("echo {} | sudo tee /dev/hvc0", console_text);
1111             self.ssh_command(&console_cmd).unwrap();
1112         }
1113         // The net device is 'automatically' exercised through the above 'ssh' commands
1114 
1115         // Check if the pmem device is usable
1116         if let Some(pmem_path) = pmem_path {
1117             assert_eq!(
1118                 self.ssh_command(&format!("ls {}", pmem_path))
1119                     .unwrap()
1120                     .trim(),
1121                 pmem_path
1122             );
1123             assert_eq!(
1124                 self.ssh_command(&format!("sudo mount {} /mnt", pmem_path))
1125                     .unwrap(),
1126                 ""
1127             );
1128             assert_eq!(self.ssh_command("ls /mnt").unwrap(), "lost+found\n");
1129             self.ssh_command("echo test123 | sudo tee /mnt/test")
1130                 .unwrap();
1131             assert_eq!(self.ssh_command("sudo umount /mnt").unwrap(), "");
1132             assert_eq!(self.ssh_command("ls /mnt").unwrap(), "");
1133 
1134             assert_eq!(
1135                 self.ssh_command(&format!("sudo mount {} /mnt", pmem_path))
1136                     .unwrap(),
1137                 ""
1138             );
1139             assert_eq!(
1140                 self.ssh_command("sudo cat /mnt/test || true")
1141                     .unwrap()
1142                     .trim(),
1143                 "test123"
1144             );
1145             self.ssh_command("sudo rm /mnt/test").unwrap();
1146             assert_eq!(self.ssh_command("sudo umount /mnt").unwrap(), "");
1147         }
1148     }
1149 }
1150 
1151 pub enum VerbosityLevel {
1152     Warn,
1153     Info,
1154     Debug,
1155 }
1156 
1157 impl Default for VerbosityLevel {
1158     fn default() -> Self {
1159         Self::Warn
1160     }
1161 }
1162 
1163 impl ToString for VerbosityLevel {
1164     fn to_string(&self) -> String {
1165         use VerbosityLevel::*;
1166         match self {
1167             Warn => "".to_string(),
1168             Info => "-v".to_string(),
1169             Debug => "-vv".to_string(),
1170         }
1171     }
1172 }
1173 
1174 pub struct GuestCommand<'a> {
1175     command: Command,
1176     guest: &'a Guest,
1177     capture_output: bool,
1178     print_cmd: bool,
1179     verbosity: VerbosityLevel,
1180 }
1181 
1182 impl<'a> GuestCommand<'a> {
1183     pub fn new(guest: &'a Guest) -> Self {
1184         Self::new_with_binary_path(guest, &clh_command("cloud-hypervisor"))
1185     }
1186 
1187     pub fn new_with_binary_path(guest: &'a Guest, binary_path: &str) -> Self {
1188         Self {
1189             command: Command::new(binary_path),
1190             guest,
1191             capture_output: false,
1192             print_cmd: true,
1193             verbosity: VerbosityLevel::Info,
1194         }
1195     }
1196 
1197     pub fn verbosity(&mut self, verbosity: VerbosityLevel) -> &mut Self {
1198         self.verbosity = verbosity;
1199         self
1200     }
1201 
1202     pub fn capture_output(&mut self) -> &mut Self {
1203         self.capture_output = true;
1204         self
1205     }
1206 
1207     pub fn set_print_cmd(&mut self, print_cmd: bool) -> &mut Self {
1208         self.print_cmd = print_cmd;
1209         self
1210     }
1211 
1212     pub fn spawn(&mut self) -> io::Result<Child> {
1213         use VerbosityLevel::*;
1214         match &self.verbosity {
1215             Warn => {}
1216             Info => {
1217                 self.command.arg("-v");
1218             }
1219             Debug => {
1220                 self.command.arg("-vv");
1221             }
1222         };
1223 
1224         if self.print_cmd {
1225             println!(
1226                 "\n\n==== Start cloud-hypervisor command-line ====\n\n\
1227                      {:?}\n\
1228                      \n==== End cloud-hypervisor command-line ====\n\n",
1229                 self.command
1230             );
1231         }
1232 
1233         if self.capture_output {
1234             let child = self
1235                 .command
1236                 .stderr(Stdio::piped())
1237                 .stdout(Stdio::piped())
1238                 .spawn()
1239                 .unwrap();
1240 
1241             let fd = child.stdout.as_ref().unwrap().as_raw_fd();
1242             let pipesize = unsafe { libc::fcntl(fd, libc::F_SETPIPE_SZ, PIPE_SIZE) };
1243             let fd = child.stderr.as_ref().unwrap().as_raw_fd();
1244             let pipesize1 = unsafe { libc::fcntl(fd, libc::F_SETPIPE_SZ, PIPE_SIZE) };
1245 
1246             if pipesize >= PIPE_SIZE && pipesize1 >= PIPE_SIZE {
1247                 Ok(child)
1248             } else {
1249                 Err(std::io::Error::new(
1250                     std::io::ErrorKind::Other,
1251                     "resizing pipe w/ 'fnctl' failed!",
1252                 ))
1253             }
1254         } else {
1255             self.command.spawn()
1256         }
1257     }
1258 
1259     pub fn args<I, S>(&mut self, args: I) -> &mut Self
1260     where
1261         I: IntoIterator<Item = S>,
1262         S: AsRef<OsStr>,
1263     {
1264         self.command.args(args);
1265         self
1266     }
1267 
1268     pub fn default_disks(&mut self) -> &mut Self {
1269         if self.guest.disk_config.disk(DiskType::CloudInit).is_some() {
1270             self.args(&[
1271                 "--disk",
1272                 format!(
1273                     "path={}",
1274                     self.guest
1275                         .disk_config
1276                         .disk(DiskType::OperatingSystem)
1277                         .unwrap()
1278                 )
1279                 .as_str(),
1280                 format!(
1281                     "path={}",
1282                     self.guest.disk_config.disk(DiskType::CloudInit).unwrap()
1283                 )
1284                 .as_str(),
1285             ])
1286         } else {
1287             self.args(&[
1288                 "--disk",
1289                 format!(
1290                     "path={}",
1291                     self.guest
1292                         .disk_config
1293                         .disk(DiskType::OperatingSystem)
1294                         .unwrap()
1295                 )
1296                 .as_str(),
1297             ])
1298         }
1299     }
1300 
1301     pub fn default_net(&mut self) -> &mut Self {
1302         self.args(&["--net", self.guest.default_net_string().as_str()])
1303     }
1304 }
1305 
1306 pub fn clh_command(cmd: &str) -> String {
1307     env::var("BUILD_TARGET").map_or(
1308         format!("target/x86_64-unknown-linux-gnu/release/{}", cmd),
1309         |target| format!("target/{}/release/{}", target, cmd),
1310     )
1311 }
1312