xref: /cloud-hypervisor/test_infra/src/lib.rs (revision 7d7bfb2034001d4cb15df2ddc56d2d350c8da30f)
1 // Copyright © 2021 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 #[macro_use]
7 extern crate lazy_static;
8 
9 use ssh2::Session;
10 use std::env;
11 use std::ffi::OsStr;
12 use std::fs;
13 use std::io;
14 use std::io::{Read, Write};
15 use std::net::TcpListener;
16 use std::net::TcpStream;
17 use std::os::unix::fs::PermissionsExt;
18 use std::os::unix::io::AsRawFd;
19 use std::path::Path;
20 use std::process::{Child, Command, ExitStatus, Output, Stdio};
21 use std::str::FromStr;
22 use std::sync::Mutex;
23 use std::thread;
24 use vmm_sys_util::tempdir::TempDir;
25 
26 #[derive(Debug)]
27 pub enum Error {
28     Parsing(std::num::ParseIntError),
29     SshCommand(SshCommandError),
30     WaitForBoot(WaitForBootError),
31 }
32 
33 impl From<SshCommandError> for Error {
34     fn from(e: SshCommandError) -> Self {
35         Self::SshCommand(e)
36     }
37 }
38 
39 pub struct GuestNetworkConfig {
40     pub guest_ip: String,
41     pub l2_guest_ip1: String,
42     pub l2_guest_ip2: String,
43     pub l2_guest_ip3: String,
44     pub host_ip: String,
45     pub guest_mac: String,
46     pub l2_guest_mac1: String,
47     pub l2_guest_mac2: String,
48     pub l2_guest_mac3: String,
49     pub tcp_listener_port: u16,
50 }
51 
52 pub const DEFAULT_TCP_LISTENER_MESSAGE: &str = "booted";
53 pub const DEFAULT_TCP_LISTENER_PORT: u16 = 8000;
54 pub const DEFAULT_TCP_LISTENER_TIMEOUT: i32 = 80;
55 
56 #[derive(Debug)]
57 pub enum WaitForBootError {
58     EpollWait(std::io::Error),
59     Listen(std::io::Error),
60     EpollWaitTimeout,
61     WrongGuestAddr,
62     Accept(std::io::Error),
63 }
64 
65 impl GuestNetworkConfig {
66     pub fn wait_vm_boot(&self, custom_timeout: Option<i32>) -> Result<(), WaitForBootError> {
67         let start = std::time::Instant::now();
68         // The 'port' is unique per 'GUEST' and listening to wild-card ip avoids retrying on 'TcpListener::bind()'
69         let listen_addr = format!("0.0.0.0:{}", self.tcp_listener_port);
70         let expected_guest_addr = self.guest_ip.as_str();
71         let mut s = String::new();
72         let timeout = match custom_timeout {
73             Some(t) => t,
74             None => DEFAULT_TCP_LISTENER_TIMEOUT,
75         };
76 
77         match (|| -> Result<(), WaitForBootError> {
78             let listener =
79                 TcpListener::bind(&listen_addr.as_str()).map_err(WaitForBootError::Listen)?;
80             listener
81                 .set_nonblocking(true)
82                 .expect("Cannot set non-blocking for tcp listener");
83 
84             // Reply on epoll w/ timeout to wait for guest connections faithfully
85             let epoll_fd = epoll::create(true).expect("Cannot create epoll fd");
86             epoll::ctl(
87                 epoll_fd,
88                 epoll::ControlOptions::EPOLL_CTL_ADD,
89                 listener.as_raw_fd(),
90                 epoll::Event::new(epoll::Events::EPOLLIN, 0),
91             )
92             .expect("Cannot add 'tcp_listener' event to epoll");
93             let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); 1];
94             loop {
95                 let num_events = match epoll::wait(epoll_fd, timeout * 1000_i32, &mut events[..]) {
96                     Ok(num_events) => Ok(num_events),
97                     Err(e) => match e.raw_os_error() {
98                         Some(libc::EAGAIN) | Some(libc::EINTR) => continue,
99                         _ => Err(e),
100                     },
101                 }
102                 .map_err(WaitForBootError::EpollWait)?;
103                 if num_events == 0 {
104                     return Err(WaitForBootError::EpollWaitTimeout);
105                 }
106                 break;
107             }
108 
109             match listener.accept() {
110                 Ok((_, addr)) => {
111                     // Make sure the connection is from the expected 'guest_addr'
112                     if addr.ip() != std::net::IpAddr::from_str(expected_guest_addr).unwrap() {
113                         s = format!(
114                             "Expecting the guest ip '{}' while being connected with ip '{}'",
115                             expected_guest_addr,
116                             addr.ip()
117                         );
118                         return Err(WaitForBootError::WrongGuestAddr);
119                     }
120 
121                     Ok(())
122                 }
123                 Err(e) => {
124                     s = "TcpListener::accept() failed".to_string();
125                     Err(WaitForBootError::Accept(e))
126                 }
127             }
128         })() {
129             Err(e) => {
130                 let duration = start.elapsed();
131                 eprintln!(
132                     "\n\n==== Start 'wait_vm_boot' (FAILED) ====\n\n\
133                  duration =\"{:?}, timeout = {}s\"\n\
134                  listen_addr=\"{}\"\n\
135                  expected_guest_addr=\"{}\"\n\
136                  message=\"{}\"\n\
137                  error=\"{:?}\"\n\
138                  \n==== End 'wait_vm_boot' outout ====\n\n",
139                     duration, timeout, listen_addr, expected_guest_addr, s, e
140                 );
141 
142                 Err(e)
143             }
144             Ok(_) => Ok(()),
145         }
146     }
147 }
148 
149 pub enum DiskType {
150     OperatingSystem,
151     CloudInit,
152 }
153 
154 pub trait DiskConfig {
155     fn prepare_files(&mut self, tmp_dir: &TempDir, network: &GuestNetworkConfig);
156     fn prepare_cloudinit(&self, tmp_dir: &TempDir, network: &GuestNetworkConfig) -> String;
157     fn disk(&self, disk_type: DiskType) -> Option<String>;
158 }
159 
160 #[derive(Clone)]
161 pub struct UbuntuDiskConfig {
162     osdisk_path: String,
163     cloudinit_path: String,
164     image_name: String,
165 }
166 
167 impl UbuntuDiskConfig {
168     pub fn new(image_name: String) -> Self {
169         UbuntuDiskConfig {
170             image_name,
171             osdisk_path: String::new(),
172             cloudinit_path: String::new(),
173         }
174     }
175 }
176 
177 pub struct WindowsDiskConfig {
178     image_name: String,
179     osdisk_path: String,
180     loopback_device: String,
181     windows_snapshot_cow: String,
182     windows_snapshot: String,
183 }
184 
185 impl WindowsDiskConfig {
186     pub fn new(image_name: String) -> Self {
187         WindowsDiskConfig {
188             image_name,
189             osdisk_path: String::new(),
190             loopback_device: String::new(),
191             windows_snapshot_cow: String::new(),
192             windows_snapshot: String::new(),
193         }
194     }
195 }
196 
197 impl Drop for WindowsDiskConfig {
198     fn drop(&mut self) {
199         // dmsetup remove windows-snapshot-1
200         std::process::Command::new("dmsetup")
201             .arg("remove")
202             .arg(self.windows_snapshot.as_str())
203             .output()
204             .expect("Expect removing Windows snapshot with 'dmsetup' to succeed");
205 
206         // dmsetup remove windows-snapshot-cow-1
207         std::process::Command::new("dmsetup")
208             .arg("remove")
209             .arg(self.windows_snapshot_cow.as_str())
210             .output()
211             .expect("Expect removing Windows snapshot CoW with 'dmsetup' to succeed");
212 
213         // losetup -d <loopback_device>
214         std::process::Command::new("losetup")
215             .args(&["-d", self.loopback_device.as_str()])
216             .output()
217             .expect("Expect removing loopback device to succeed");
218     }
219 }
220 
221 impl DiskConfig for UbuntuDiskConfig {
222     fn prepare_cloudinit(&self, tmp_dir: &TempDir, network: &GuestNetworkConfig) -> String {
223         let cloudinit_file_path =
224             String::from(tmp_dir.as_path().join("cloudinit").to_str().unwrap());
225 
226         let cloud_init_directory = tmp_dir.as_path().join("cloud-init").join("ubuntu");
227 
228         fs::create_dir_all(&cloud_init_directory)
229             .expect("Expect creating cloud-init directory to succeed");
230 
231         let source_file_dir = std::env::current_dir()
232             .unwrap()
233             .join("test_data")
234             .join("cloud-init")
235             .join("ubuntu");
236 
237         vec!["meta-data"].iter().for_each(|x| {
238             rate_limited_copy(source_file_dir.join(x), cloud_init_directory.join(x))
239                 .expect("Expect copying cloud-init meta-data to succeed");
240         });
241 
242         let mut user_data_string = String::new();
243         fs::File::open(source_file_dir.join("user-data"))
244             .unwrap()
245             .read_to_string(&mut user_data_string)
246             .expect("Expected reading user-data file in to succeed");
247         user_data_string = user_data_string.replace(
248             "@DEFAULT_TCP_LISTENER_MESSAGE",
249             DEFAULT_TCP_LISTENER_MESSAGE,
250         );
251         user_data_string = user_data_string.replace("@HOST_IP", &network.host_ip);
252         user_data_string =
253             user_data_string.replace("@TCP_LISTENER_PORT", &network.tcp_listener_port.to_string());
254 
255         fs::File::create(cloud_init_directory.join("user-data"))
256             .unwrap()
257             .write_all(user_data_string.as_bytes())
258             .expect("Expected writing out user-data to succeed");
259 
260         let mut network_config_string = String::new();
261 
262         fs::File::open(source_file_dir.join("network-config"))
263             .unwrap()
264             .read_to_string(&mut network_config_string)
265             .expect("Expected reading network-config file in to succeed");
266 
267         network_config_string = network_config_string.replace("192.168.2.1", &network.host_ip);
268         network_config_string = network_config_string.replace("192.168.2.2", &network.guest_ip);
269         network_config_string = network_config_string.replace("192.168.2.3", &network.l2_guest_ip1);
270         network_config_string = network_config_string.replace("192.168.2.4", &network.l2_guest_ip2);
271         network_config_string = network_config_string.replace("192.168.2.5", &network.l2_guest_ip3);
272         network_config_string =
273             network_config_string.replace("12:34:56:78:90:ab", &network.guest_mac);
274         network_config_string =
275             network_config_string.replace("de:ad:be:ef:12:34", &network.l2_guest_mac1);
276         network_config_string =
277             network_config_string.replace("de:ad:be:ef:34:56", &network.l2_guest_mac2);
278         network_config_string =
279             network_config_string.replace("de:ad:be:ef:56:78", &network.l2_guest_mac3);
280 
281         fs::File::create(cloud_init_directory.join("network-config"))
282             .unwrap()
283             .write_all(network_config_string.as_bytes())
284             .expect("Expected writing out network-config to succeed");
285 
286         std::process::Command::new("mkdosfs")
287             .args(&["-n", "cidata"])
288             .args(&["-C", cloudinit_file_path.as_str()])
289             .arg("8192")
290             .output()
291             .expect("Expect creating disk image to succeed");
292 
293         vec!["user-data", "meta-data", "network-config"]
294             .iter()
295             .for_each(|x| {
296                 std::process::Command::new("mcopy")
297                     .arg("-o")
298                     .args(&["-i", cloudinit_file_path.as_str()])
299                     .args(&["-s", cloud_init_directory.join(x).to_str().unwrap(), "::"])
300                     .output()
301                     .expect("Expect copying files to disk image to succeed");
302             });
303 
304         cloudinit_file_path
305     }
306 
307     fn prepare_files(&mut self, tmp_dir: &TempDir, network: &GuestNetworkConfig) {
308         let mut workload_path = dirs::home_dir().unwrap();
309         workload_path.push("workloads");
310 
311         let mut osdisk_base_path = workload_path;
312         osdisk_base_path.push(&self.image_name);
313 
314         let osdisk_path = String::from(tmp_dir.as_path().join("osdisk.img").to_str().unwrap());
315         let cloudinit_path = self.prepare_cloudinit(tmp_dir, network);
316 
317         rate_limited_copy(osdisk_base_path, &osdisk_path)
318             .expect("copying of OS source disk image failed");
319 
320         self.cloudinit_path = cloudinit_path;
321         self.osdisk_path = osdisk_path;
322     }
323 
324     fn disk(&self, disk_type: DiskType) -> Option<String> {
325         match disk_type {
326             DiskType::OperatingSystem => Some(self.osdisk_path.clone()),
327             DiskType::CloudInit => Some(self.cloudinit_path.clone()),
328         }
329     }
330 }
331 
332 impl DiskConfig for WindowsDiskConfig {
333     fn prepare_cloudinit(&self, _tmp_dir: &TempDir, _network: &GuestNetworkConfig) -> String {
334         String::new()
335     }
336 
337     fn prepare_files(&mut self, tmp_dir: &TempDir, _network: &GuestNetworkConfig) {
338         let mut workload_path = dirs::home_dir().unwrap();
339         workload_path.push("workloads");
340 
341         let mut osdisk_path = workload_path;
342         osdisk_path.push(&self.image_name);
343 
344         let osdisk_blk_size = fs::metadata(osdisk_path)
345             .expect("Expect retrieving Windows image metadata")
346             .len()
347             >> 9;
348 
349         let snapshot_cow_path =
350             String::from(tmp_dir.as_path().join("snapshot_cow").to_str().unwrap());
351 
352         // Create and truncate CoW file for device mapper
353         let cow_file_size: u64 = 1 << 30;
354         let cow_file_blk_size = cow_file_size >> 9;
355         let cow_file = std::fs::File::create(snapshot_cow_path.as_str())
356             .expect("Expect creating CoW image to succeed");
357         cow_file
358             .set_len(cow_file_size)
359             .expect("Expect truncating CoW image to succeed");
360 
361         // losetup --find --show /tmp/snapshot_cow
362         let loopback_device = std::process::Command::new("losetup")
363             .arg("--find")
364             .arg("--show")
365             .arg(snapshot_cow_path.as_str())
366             .output()
367             .expect("Expect creating loopback device from snapshot CoW image to succeed");
368 
369         self.loopback_device = String::from_utf8_lossy(&loopback_device.stdout)
370             .trim()
371             .to_string();
372 
373         let random_extension = tmp_dir.as_path().file_name().unwrap();
374         let windows_snapshot_cow = format!(
375             "windows-snapshot-cow-{}",
376             random_extension.to_str().unwrap()
377         );
378 
379         // dmsetup create windows-snapshot-cow-1 --table '0 2097152 linear /dev/loop1 0'
380         std::process::Command::new("dmsetup")
381             .arg("create")
382             .arg(windows_snapshot_cow.as_str())
383             .args(&[
384                 "--table",
385                 format!("0 {} linear {} 0", cow_file_blk_size, self.loopback_device).as_str(),
386             ])
387             .output()
388             .expect("Expect creating Windows snapshot CoW with 'dmsetup' to succeed");
389 
390         let windows_snapshot = format!("windows-snapshot-{}", random_extension.to_str().unwrap());
391 
392         // dmsetup mknodes
393         std::process::Command::new("dmsetup")
394             .arg("mknodes")
395             .output()
396             .expect("Expect device mapper nodes to be ready");
397 
398         // dmsetup create windows-snapshot-1 --table '0 41943040 snapshot /dev/mapper/windows-base /dev/mapper/windows-snapshot-cow-1 P 8'
399         std::process::Command::new("dmsetup")
400             .arg("create")
401             .arg(windows_snapshot.as_str())
402             .args(&[
403                 "--table",
404                 format!(
405                     "0 {} snapshot /dev/mapper/windows-base /dev/mapper/{} P 8",
406                     osdisk_blk_size,
407                     windows_snapshot_cow.as_str()
408                 )
409                 .as_str(),
410             ])
411             .output()
412             .expect("Expect creating Windows snapshot with 'dmsetup' to succeed");
413 
414         // dmsetup mknodes
415         std::process::Command::new("dmsetup")
416             .arg("mknodes")
417             .output()
418             .expect("Expect device mapper nodes to be ready");
419 
420         self.osdisk_path = format!("/dev/mapper/{}", windows_snapshot);
421         self.windows_snapshot_cow = windows_snapshot_cow;
422         self.windows_snapshot = windows_snapshot;
423     }
424 
425     fn disk(&self, disk_type: DiskType) -> Option<String> {
426         match disk_type {
427             DiskType::OperatingSystem => Some(self.osdisk_path.clone()),
428             DiskType::CloudInit => None,
429         }
430     }
431 }
432 
433 pub fn rate_limited_copy<P: AsRef<Path>, Q: AsRef<Path>>(from: P, to: Q) -> io::Result<u64> {
434     for i in 0..10 {
435         let free_bytes = unsafe {
436             let mut stats = std::mem::MaybeUninit::zeroed();
437             let fs_name = std::ffi::CString::new("/tmp").unwrap();
438             libc::statvfs(fs_name.as_ptr(), stats.as_mut_ptr());
439 
440             let free_blocks = stats.assume_init().f_bfree;
441             let block_size = stats.assume_init().f_bsize;
442 
443             free_blocks * block_size
444         };
445 
446         // Make sure there is at least 6 GiB of space
447         if free_bytes < 6 << 30 {
448             eprintln!(
449                 "Not enough space on disk ({}). Attempt {} of 10. Sleeping.",
450                 free_bytes, i
451             );
452             thread::sleep(std::time::Duration::new(60, 0));
453             continue;
454         }
455 
456         match fs::copy(&from, &to) {
457             Err(e) => {
458                 if let Some(errno) = e.raw_os_error() {
459                     if errno == libc::ENOSPC {
460                         eprintln!("Copy returned ENOSPC. Attempt {} of 10. Sleeping.", i);
461                         thread::sleep(std::time::Duration::new(60, 0));
462                         continue;
463                     }
464                 }
465                 return Err(e);
466             }
467             Ok(i) => return Ok(i),
468         }
469     }
470     Err(io::Error::last_os_error())
471 }
472 
473 pub fn handle_child_output(
474     r: Result<(), std::boxed::Box<dyn std::any::Any + std::marker::Send>>,
475     output: &std::process::Output,
476 ) {
477     use std::os::unix::process::ExitStatusExt;
478     if r.is_ok() && output.status.success() {
479         return;
480     }
481 
482     match output.status.code() {
483         None => {
484             // Don't treat child.kill() as a problem
485             if output.status.signal() == Some(9) && r.is_ok() {
486                 return;
487             }
488 
489             eprintln!(
490                 "==== child killed by signal: {} ====",
491                 output.status.signal().unwrap()
492             );
493         }
494         Some(code) => {
495             eprintln!("\n\n==== child exit code: {} ====", code);
496         }
497     }
498 
499     eprintln!(
500         "\n\n==== Start child stdout ====\n\n{}\n\n==== End child stdout ====",
501         String::from_utf8_lossy(&output.stdout)
502     );
503     eprintln!(
504         "\n\n==== Start child stderr ====\n\n{}\n\n==== End child stderr ====",
505         String::from_utf8_lossy(&output.stderr)
506     );
507 
508     panic!("Test failed")
509 }
510 
511 #[derive(Debug)]
512 pub struct PasswordAuth {
513     pub username: String,
514     pub password: String,
515 }
516 
517 pub const DEFAULT_SSH_RETRIES: u8 = 6;
518 pub const DEFAULT_SSH_TIMEOUT: u8 = 10;
519 
520 #[derive(Debug)]
521 pub enum SshCommandError {
522     Connection(std::io::Error),
523     Handshake(ssh2::Error),
524     Authentication(ssh2::Error),
525     ChannelSession(ssh2::Error),
526     Command(ssh2::Error),
527     ExitStatus(ssh2::Error),
528     NonZeroExitStatus(i32),
529     FileRead(std::io::Error),
530     FileMetadata(std::io::Error),
531     ScpSend(ssh2::Error),
532     WriteAll(std::io::Error),
533     SendEof(ssh2::Error),
534     WaitEof(ssh2::Error),
535 }
536 
537 fn scp_to_guest_with_auth(
538     path: &Path,
539     remote_path: &Path,
540     auth: &PasswordAuth,
541     ip: &str,
542     retries: u8,
543     timeout: u8,
544 ) -> Result<(), SshCommandError> {
545     let mut counter = 0;
546     loop {
547         match (|| -> Result<(), SshCommandError> {
548             let tcp =
549                 TcpStream::connect(format!("{}:22", ip)).map_err(SshCommandError::Connection)?;
550             let mut sess = Session::new().unwrap();
551             sess.set_tcp_stream(tcp);
552             sess.handshake().map_err(SshCommandError::Handshake)?;
553 
554             sess.userauth_password(&auth.username, &auth.password)
555                 .map_err(SshCommandError::Authentication)?;
556             assert!(sess.authenticated());
557 
558             let content = fs::read(path).map_err(SshCommandError::FileRead)?;
559             let mode = fs::metadata(path)
560                 .map_err(SshCommandError::FileMetadata)?
561                 .permissions()
562                 .mode()
563                 & 0o777;
564 
565             let mut channel = sess
566                 .scp_send(remote_path, mode as i32, content.len() as u64, None)
567                 .map_err(SshCommandError::ScpSend)?;
568             channel
569                 .write_all(&content)
570                 .map_err(SshCommandError::WriteAll)?;
571             channel.send_eof().map_err(SshCommandError::SendEof)?;
572             channel.wait_eof().map_err(SshCommandError::WaitEof)?;
573 
574             // Intentionally ignore these results here as their failure
575             // does not precipitate a repeat
576             let _ = channel.close();
577             let _ = channel.wait_close();
578 
579             Ok(())
580         })() {
581             Ok(_) => break,
582             Err(e) => {
583                 counter += 1;
584                 if counter >= retries {
585                     eprintln!(
586                         "\n\n==== Start scp command output (FAILED) ====\n\n\
587                          path =\"{:?}\"\n\
588                          remote_path =\"{:?}\"\n\
589                          auth=\"{:#?}\"\n\
590                          ip=\"{}\"\n\
591                          error=\"{:?}\"\n\
592                          \n==== End scp command outout ====\n\n",
593                         path, remote_path, auth, ip, e
594                     );
595 
596                     return Err(e);
597                 }
598             }
599         };
600         thread::sleep(std::time::Duration::new((timeout * counter).into(), 0));
601     }
602     Ok(())
603 }
604 
605 pub fn scp_to_guest(
606     path: &Path,
607     remote_path: &Path,
608     ip: &str,
609     retries: u8,
610     timeout: u8,
611 ) -> Result<(), SshCommandError> {
612     scp_to_guest_with_auth(
613         path,
614         remote_path,
615         &PasswordAuth {
616             username: String::from("cloud"),
617             password: String::from("cloud123"),
618         },
619         ip,
620         retries,
621         timeout,
622     )
623 }
624 
625 pub fn ssh_command_ip_with_auth(
626     command: &str,
627     auth: &PasswordAuth,
628     ip: &str,
629     retries: u8,
630     timeout: u8,
631 ) -> Result<String, SshCommandError> {
632     let mut s = String::new();
633 
634     let mut counter = 0;
635     loop {
636         match (|| -> Result<(), SshCommandError> {
637             let tcp =
638                 TcpStream::connect(format!("{}:22", ip)).map_err(SshCommandError::Connection)?;
639             let mut sess = Session::new().unwrap();
640             sess.set_tcp_stream(tcp);
641             sess.handshake().map_err(SshCommandError::Handshake)?;
642 
643             sess.userauth_password(&auth.username, &auth.password)
644                 .map_err(SshCommandError::Authentication)?;
645             assert!(sess.authenticated());
646 
647             let mut channel = sess
648                 .channel_session()
649                 .map_err(SshCommandError::ChannelSession)?;
650             channel.exec(command).map_err(SshCommandError::Command)?;
651 
652             // Intentionally ignore these results here as their failure
653             // does not precipitate a repeat
654             let _ = channel.read_to_string(&mut s);
655             let _ = channel.close();
656             let _ = channel.wait_close();
657 
658             let status = channel.exit_status().map_err(SshCommandError::ExitStatus)?;
659 
660             if status != 0 {
661                 Err(SshCommandError::NonZeroExitStatus(status))
662             } else {
663                 Ok(())
664             }
665         })() {
666             Ok(_) => break,
667             Err(e) => {
668                 counter += 1;
669                 if counter >= retries {
670                     eprintln!(
671                         "\n\n==== Start ssh command output (FAILED) ====\n\n\
672                          command=\"{}\"\n\
673                          auth=\"{:#?}\"\n\
674                          ip=\"{}\"\n\
675                          output=\"{}\"\n\
676                          error=\"{:?}\"\n\
677                          \n==== End ssh command outout ====\n\n",
678                         command, auth, ip, s, e
679                     );
680 
681                     return Err(e);
682                 }
683             }
684         };
685         thread::sleep(std::time::Duration::new((timeout * counter).into(), 0));
686     }
687     Ok(s)
688 }
689 
690 pub fn ssh_command_ip(
691     command: &str,
692     ip: &str,
693     retries: u8,
694     timeout: u8,
695 ) -> Result<String, SshCommandError> {
696     ssh_command_ip_with_auth(
697         command,
698         &PasswordAuth {
699             username: String::from("cloud"),
700             password: String::from("cloud123"),
701         },
702         ip,
703         retries,
704         timeout,
705     )
706 }
707 
708 pub fn exec_host_command_status(command: &str) -> ExitStatus {
709     std::process::Command::new("bash")
710         .args(&["-c", command])
711         .status()
712         .unwrap_or_else(|_| panic!("Expected '{}' to run", command))
713 }
714 
715 pub fn exec_host_command_output(command: &str) -> Output {
716     std::process::Command::new("bash")
717         .args(&["-c", command])
718         .output()
719         .unwrap_or_else(|_| panic!("Expected '{}' to run", command))
720 }
721 
722 pub const PIPE_SIZE: i32 = 32 << 20;
723 
724 lazy_static! {
725     static ref NEXT_VM_ID: Mutex<u8> = Mutex::new(1);
726 }
727 
728 pub struct Guest {
729     pub tmp_dir: TempDir,
730     pub disk_config: Box<dyn DiskConfig>,
731     pub network: GuestNetworkConfig,
732 }
733 
734 // Safe to implement as we know we have no interior mutability
735 impl std::panic::RefUnwindSafe for Guest {}
736 
737 impl Guest {
738     pub fn new_from_ip_range(mut disk_config: Box<dyn DiskConfig>, class: &str, id: u8) -> Self {
739         let tmp_dir = TempDir::new_with_prefix("/tmp/ch").unwrap();
740 
741         let network = GuestNetworkConfig {
742             guest_ip: format!("{}.{}.2", class, id),
743             l2_guest_ip1: format!("{}.{}.3", class, id),
744             l2_guest_ip2: format!("{}.{}.4", class, id),
745             l2_guest_ip3: format!("{}.{}.5", class, id),
746             host_ip: format!("{}.{}.1", class, id),
747             guest_mac: format!("12:34:56:78:90:{:02x}", id),
748             l2_guest_mac1: format!("de:ad:be:ef:12:{:02x}", id),
749             l2_guest_mac2: format!("de:ad:be:ef:34:{:02x}", id),
750             l2_guest_mac3: format!("de:ad:be:ef:56:{:02x}", id),
751             tcp_listener_port: DEFAULT_TCP_LISTENER_PORT + id as u16,
752         };
753 
754         disk_config.prepare_files(&tmp_dir, &network);
755 
756         Guest {
757             tmp_dir,
758             disk_config,
759             network,
760         }
761     }
762 
763     pub fn new(disk_config: Box<dyn DiskConfig>) -> Self {
764         let mut guard = NEXT_VM_ID.lock().unwrap();
765         let id = *guard;
766         *guard = id + 1;
767 
768         Self::new_from_ip_range(disk_config, "192.168", id)
769     }
770 
771     pub fn default_net_string(&self) -> String {
772         format!(
773             "tap=,mac={},ip={},mask=255.255.255.0",
774             self.network.guest_mac, self.network.host_ip
775         )
776     }
777 
778     pub fn default_net_string_w_iommu(&self) -> String {
779         format!(
780             "tap=,mac={},ip={},mask=255.255.255.0,iommu=on",
781             self.network.guest_mac, self.network.host_ip
782         )
783     }
784 
785     pub fn ssh_command(&self, command: &str) -> Result<String, SshCommandError> {
786         ssh_command_ip(
787             command,
788             &self.network.guest_ip,
789             DEFAULT_SSH_RETRIES,
790             DEFAULT_SSH_TIMEOUT,
791         )
792     }
793 
794     #[cfg(target_arch = "x86_64")]
795     pub fn ssh_command_l1(&self, command: &str) -> Result<String, SshCommandError> {
796         ssh_command_ip(
797             command,
798             &self.network.guest_ip,
799             DEFAULT_SSH_RETRIES,
800             DEFAULT_SSH_TIMEOUT,
801         )
802     }
803 
804     #[cfg(target_arch = "x86_64")]
805     pub fn ssh_command_l2_1(&self, command: &str) -> Result<String, SshCommandError> {
806         ssh_command_ip(
807             command,
808             &self.network.l2_guest_ip1,
809             DEFAULT_SSH_RETRIES,
810             DEFAULT_SSH_TIMEOUT,
811         )
812     }
813 
814     #[cfg(target_arch = "x86_64")]
815     pub fn ssh_command_l2_2(&self, command: &str) -> Result<String, SshCommandError> {
816         ssh_command_ip(
817             command,
818             &self.network.l2_guest_ip2,
819             DEFAULT_SSH_RETRIES,
820             DEFAULT_SSH_TIMEOUT,
821         )
822     }
823 
824     #[cfg(target_arch = "x86_64")]
825     pub fn ssh_command_l2_3(&self, command: &str) -> Result<String, SshCommandError> {
826         ssh_command_ip(
827             command,
828             &self.network.l2_guest_ip3,
829             DEFAULT_SSH_RETRIES,
830             DEFAULT_SSH_TIMEOUT,
831         )
832     }
833 
834     pub fn api_create_body(
835         &self,
836         cpu_count: u8,
837         _fw_path: &str,
838         _kernel_path: &str,
839         _kernel_cmd: &str,
840     ) -> String {
841         #[cfg(all(target_arch = "x86_64", not(feature = "mshv")))]
842         format! {"{{\"cpus\":{{\"boot_vcpus\":{},\"max_vcpus\":{}}},\"kernel\":{{\"path\":\"{}\"}},\"cmdline\":{{\"args\": \"\"}},\"net\":[{{\"ip\":\"{}\", \"mask\":\"255.255.255.0\", \"mac\":\"{}\"}}], \"disks\":[{{\"path\":\"{}\"}}, {{\"path\":\"{}\"}}]}}",
843                  cpu_count,
844                  cpu_count,
845                  _fw_path,
846                  self.network.host_ip,
847                  self.network.guest_mac,
848                  self.disk_config.disk(DiskType::OperatingSystem).unwrap().as_str(),
849                  self.disk_config.disk(DiskType::CloudInit).unwrap().as_str(),
850         }
851 
852         #[cfg(any(target_arch = "aarch64", feature = "mshv"))]
853         format! {"{{\"cpus\":{{\"boot_vcpus\":{},\"max_vcpus\":{}}},\"kernel\":{{\"path\":\"{}\"}},\"cmdline\":{{\"args\": \"{}\"}},\"net\":[{{\"ip\":\"{}\", \"mask\":\"255.255.255.0\", \"mac\":\"{}\"}}], \"disks\":[{{\"path\":\"{}\"}}, {{\"path\":\"{}\"}}]}}",
854                  cpu_count,
855                  cpu_count,
856                  _kernel_path,
857                  _kernel_cmd,
858                  self.network.host_ip,
859                  self.network.guest_mac,
860                  self.disk_config.disk(DiskType::OperatingSystem).unwrap().as_str(),
861                  self.disk_config.disk(DiskType::CloudInit).unwrap().as_str(),
862         }
863     }
864 
865     pub fn get_cpu_count(&self) -> Result<u32, Error> {
866         self.ssh_command("grep -c processor /proc/cpuinfo")?
867             .trim()
868             .parse()
869             .map_err(Error::Parsing)
870     }
871 
872     #[cfg(target_arch = "x86_64")]
873     pub fn get_initial_apicid(&self) -> Result<u32, Error> {
874         self.ssh_command("grep \"initial apicid\" /proc/cpuinfo | grep -o \"[0-9]*\"")?
875             .trim()
876             .parse()
877             .map_err(Error::Parsing)
878     }
879 
880     pub fn get_total_memory(&self) -> Result<u32, Error> {
881         self.ssh_command("grep MemTotal /proc/meminfo | grep -o \"[0-9]*\"")?
882             .trim()
883             .parse()
884             .map_err(Error::Parsing)
885     }
886 
887     #[cfg(target_arch = "x86_64")]
888     pub fn get_total_memory_l2(&self) -> Result<u32, Error> {
889         self.ssh_command_l2_1("grep MemTotal /proc/meminfo | grep -o \"[0-9]*\"")?
890             .trim()
891             .parse()
892             .map_err(Error::Parsing)
893     }
894 
895     pub fn get_numa_node_memory(&self, node_id: usize) -> Result<u32, Error> {
896         self.ssh_command(
897             format!(
898                 "grep MemTotal /sys/devices/system/node/node{}/meminfo \
899                         | cut -d \":\" -f 2 | grep -o \"[0-9]*\"",
900                 node_id
901             )
902             .as_str(),
903         )?
904         .trim()
905         .parse()
906         .map_err(Error::Parsing)
907     }
908 
909     pub fn wait_vm_boot(&self, custom_timeout: Option<i32>) -> Result<(), Error> {
910         self.network
911             .wait_vm_boot(custom_timeout)
912             .map_err(Error::WaitForBoot)
913     }
914 
915     pub fn check_numa_node_cpus(&self, node_id: usize, cpus: Vec<usize>) -> Result<(), Error> {
916         for cpu in cpus.iter() {
917             let cmd = format!(
918                 "[ -d \"/sys/devices/system/node/node{}/cpu{}\" ]",
919                 node_id, cpu
920             );
921             self.ssh_command(cmd.as_str())?;
922         }
923 
924         Ok(())
925     }
926 
927     pub fn check_numa_node_distances(
928         &self,
929         node_id: usize,
930         distances: &str,
931     ) -> Result<bool, Error> {
932         let cmd = format!("cat /sys/devices/system/node/node{}/distance", node_id);
933         if self.ssh_command(cmd.as_str())?.trim() == distances {
934             Ok(true)
935         } else {
936             Ok(false)
937         }
938     }
939 
940     pub fn check_numa_common(
941         &self,
942         mem_ref: Option<&[u32]>,
943         node_ref: Option<&[Vec<usize>]>,
944         distance_ref: Option<&[&str]>,
945     ) {
946         if let Some(mem_ref) = mem_ref {
947             // Check each NUMA node has been assigned the right amount of
948             // memory.
949             for (i, &m) in mem_ref.iter().enumerate() {
950                 assert!(self.get_numa_node_memory(i).unwrap_or_default() > m);
951             }
952         }
953 
954         if let Some(node_ref) = node_ref {
955             // Check each NUMA node has been assigned the right CPUs set.
956             for (i, n) in node_ref.iter().enumerate() {
957                 self.check_numa_node_cpus(i, n.clone()).unwrap();
958             }
959         }
960 
961         if let Some(distance_ref) = distance_ref {
962             // Check each NUMA node has been assigned the right distances.
963             for (i, &d) in distance_ref.iter().enumerate() {
964                 assert!(self.check_numa_node_distances(i, d).unwrap());
965             }
966         }
967     }
968 
969     #[cfg(target_arch = "x86_64")]
970     pub fn check_sgx_support(&self) -> Result<(), Error> {
971         self.ssh_command(
972             "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX: \
973                     Software Guard Extensions supported = true'",
974         )?;
975         self.ssh_command(
976             "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX_LC: \
977                     SGX launch config supported = true'",
978         )?;
979         self.ssh_command(
980             "cpuid -l 0x12 -s 0 | tr -s [:space:] | grep -q 'SGX1 \
981                     supported = true'",
982         )?;
983 
984         Ok(())
985     }
986 
987     pub fn get_entropy(&self) -> Result<u32, Error> {
988         self.ssh_command("cat /proc/sys/kernel/random/entropy_avail")?
989             .trim()
990             .parse()
991             .map_err(Error::Parsing)
992     }
993 
994     pub fn get_pci_bridge_class(&self) -> Result<String, Error> {
995         Ok(self
996             .ssh_command("cat /sys/bus/pci/devices/0000:00:00.0/class")?
997             .trim()
998             .to_string())
999     }
1000 
1001     pub fn get_pci_device_ids(&self) -> Result<String, Error> {
1002         Ok(self
1003             .ssh_command("cat /sys/bus/pci/devices/*/device")?
1004             .trim()
1005             .to_string())
1006     }
1007 
1008     pub fn get_pci_vendor_ids(&self) -> Result<String, Error> {
1009         Ok(self
1010             .ssh_command("cat /sys/bus/pci/devices/*/vendor")?
1011             .trim()
1012             .to_string())
1013     }
1014 
1015     pub fn does_device_vendor_pair_match(
1016         &self,
1017         device_id: &str,
1018         vendor_id: &str,
1019     ) -> Result<bool, Error> {
1020         // We are checking if console device's device id and vendor id pair matches
1021         let devices = self.get_pci_device_ids()?;
1022         let devices: Vec<&str> = devices.split('\n').collect();
1023         let vendors = self.get_pci_vendor_ids()?;
1024         let vendors: Vec<&str> = vendors.split('\n').collect();
1025 
1026         for (index, d_id) in devices.iter().enumerate() {
1027             if *d_id == device_id {
1028                 if let Some(v_id) = vendors.get(index) {
1029                     if *v_id == vendor_id {
1030                         return Ok(true);
1031                     }
1032                 }
1033             }
1034         }
1035 
1036         Ok(false)
1037     }
1038 
1039     pub fn valid_virtio_fs_cache_size(
1040         &self,
1041         dax: bool,
1042         cache_size: Option<u64>,
1043     ) -> Result<bool, Error> {
1044         // SHM region is called different things depending on kernel
1045         let shm_region = self
1046             .ssh_command("sudo grep 'virtio[0-9]\\|virtio-pci-shm' /proc/iomem || true")?
1047             .trim()
1048             .to_string();
1049 
1050         if shm_region.is_empty() {
1051             return Ok(!dax);
1052         }
1053 
1054         // From this point, the region is not empty, hence it is an error
1055         // if DAX is off.
1056         if !dax {
1057             return Ok(false);
1058         }
1059 
1060         let cache = if let Some(cache) = cache_size {
1061             cache
1062         } else {
1063             // 8Gib by default
1064             0x0002_0000_0000
1065         };
1066 
1067         let args: Vec<&str> = shm_region.split(':').collect();
1068         if args.is_empty() {
1069             return Ok(false);
1070         }
1071 
1072         let args: Vec<&str> = args[0].trim().split('-').collect();
1073         if args.len() != 2 {
1074             return Ok(false);
1075         }
1076 
1077         let start_addr = u64::from_str_radix(args[0], 16).map_err(Error::Parsing)?;
1078         let end_addr = u64::from_str_radix(args[1], 16).map_err(Error::Parsing)?;
1079 
1080         Ok(cache == (end_addr - start_addr + 1))
1081     }
1082 
1083     pub fn check_vsock(&self, socket: &str) {
1084         // Listen from guest on vsock CID=3 PORT=16
1085         // SOCKET-LISTEN:<domain>:<protocol>:<local-address>
1086         let guest_ip = self.network.guest_ip.clone();
1087         let listen_socat = thread::spawn(move || {
1088             ssh_command_ip("sudo socat - SOCKET-LISTEN:40:0:x00x00x10x00x00x00x03x00x00x00x00x00x00x00 > vsock_log", &guest_ip, DEFAULT_SSH_RETRIES, DEFAULT_SSH_TIMEOUT).unwrap();
1089         });
1090 
1091         // Make sure socat is listening, which might take a few second on slow systems
1092         thread::sleep(std::time::Duration::new(10, 0));
1093 
1094         // Write something to vsock from the host
1095         assert!(exec_host_command_status(&format!(
1096             "echo -e \"CONNECT 16\\nHelloWorld!\" | socat - UNIX-CONNECT:{}",
1097             socket
1098         ))
1099         .success());
1100 
1101         // Wait for the thread to terminate.
1102         listen_socat.join().unwrap();
1103 
1104         assert_eq!(
1105             self.ssh_command("cat vsock_log").unwrap().trim(),
1106             "HelloWorld!"
1107         );
1108     }
1109 
1110     #[cfg(target_arch = "x86_64")]
1111     pub fn check_nvidia_gpu(&self) {
1112         // Run CUDA sample to validate it can find the device
1113         let device_query_result = self
1114             .ssh_command("sudo /root/NVIDIA_CUDA-11.3_Samples/bin/x86_64/linux/release/deviceQuery")
1115             .unwrap();
1116         assert!(device_query_result.contains("Detected 1 CUDA Capable device"));
1117         assert!(device_query_result.contains("Device 0: \"NVIDIA Tesla T4\""));
1118         assert!(device_query_result.contains("Result = PASS"));
1119 
1120         // Run NVIDIA DCGM Diagnostics to validate the device is functional
1121         self.ssh_command("sudo nv-hostengine").unwrap();
1122 
1123         assert!(self
1124             .ssh_command("sudo dcgmi discovery -l")
1125             .unwrap()
1126             .contains("Name: NVIDIA Tesla T4"));
1127         assert_eq!(
1128             self.ssh_command("sudo dcgmi diag -r 'diagnostic' | grep Pass | wc -l")
1129                 .unwrap()
1130                 .trim(),
1131             "10"
1132         );
1133     }
1134 
1135     pub fn reboot_linux(&self, current_reboot_count: u32, custom_timeout: Option<i32>) {
1136         let list_boots_cmd = "sudo journalctl --list-boots | wc -l";
1137         let boot_count = self
1138             .ssh_command(list_boots_cmd)
1139             .unwrap()
1140             .trim()
1141             .parse::<u32>()
1142             .unwrap_or_default();
1143 
1144         assert_eq!(boot_count, current_reboot_count + 1);
1145         self.ssh_command("sudo reboot").unwrap();
1146 
1147         self.wait_vm_boot(custom_timeout).unwrap();
1148         let boot_count = self
1149             .ssh_command(list_boots_cmd)
1150             .unwrap()
1151             .trim()
1152             .parse::<u32>()
1153             .unwrap_or_default();
1154         assert_eq!(boot_count, current_reboot_count + 2);
1155     }
1156 
1157     pub fn enable_memory_hotplug(&self) {
1158         self.ssh_command("echo online | sudo tee /sys/devices/system/memory/auto_online_blocks")
1159             .unwrap();
1160     }
1161 
1162     pub fn check_devices_common(&self, socket: Option<&String>, console_text: Option<&String>) {
1163         // Check block devices are readable
1164         self.ssh_command("sudo dd if=/dev/vda of=/dev/null bs=1M iflag=direct count=1024")
1165             .unwrap();
1166         self.ssh_command("sudo dd if=/dev/vdb of=/dev/null bs=1M iflag=direct count=8")
1167             .unwrap();
1168         // Check if the rng device is readable
1169         self.ssh_command("sudo head -c 1000 /dev/hwrng > /dev/null")
1170             .unwrap();
1171         // Check vsock
1172         if let Some(socket) = socket {
1173             self.check_vsock(socket.as_str());
1174         }
1175         // Check if the console is usable
1176         if let Some(console_text) = console_text {
1177             let console_cmd = format!("echo {} | sudo tee /dev/hvc0", console_text);
1178             self.ssh_command(&console_cmd).unwrap();
1179         }
1180         // The net device is 'automatically' exercised through the above 'ssh' commands
1181     }
1182 }
1183 
1184 pub struct GuestCommand<'a> {
1185     command: Command,
1186     guest: &'a Guest,
1187     capture_output: bool,
1188     print_cmd: bool,
1189 }
1190 
1191 impl<'a> GuestCommand<'a> {
1192     pub fn new(guest: &'a Guest) -> Self {
1193         Self::new_with_binary_name(guest, "cloud-hypervisor")
1194     }
1195 
1196     pub fn new_with_binary_name(guest: &'a Guest, binary_name: &str) -> Self {
1197         Self {
1198             command: Command::new(clh_command(binary_name)),
1199             guest,
1200             capture_output: false,
1201             print_cmd: true,
1202         }
1203     }
1204 
1205     pub fn capture_output(&mut self) -> &mut Self {
1206         self.capture_output = true;
1207         self
1208     }
1209 
1210     pub fn set_print_cmd(&mut self, print_cmd: bool) -> &mut Self {
1211         self.print_cmd = print_cmd;
1212         self
1213     }
1214 
1215     pub fn spawn(&mut self) -> io::Result<Child> {
1216         if self.print_cmd {
1217             println!(
1218                 "\n\n==== Start cloud-hypervisor command-line ====\n\n\
1219                      {:?}\n\
1220                      \n==== End cloud-hypervisor command-line ====\n\n",
1221                 self.command
1222             );
1223         }
1224 
1225         if self.capture_output {
1226             let child = self
1227                 .command
1228                 .arg("-v")
1229                 .stderr(Stdio::piped())
1230                 .stdout(Stdio::piped())
1231                 .spawn()
1232                 .unwrap();
1233 
1234             let fd = child.stdout.as_ref().unwrap().as_raw_fd();
1235             let pipesize = unsafe { libc::fcntl(fd, libc::F_SETPIPE_SZ, PIPE_SIZE) };
1236             let fd = child.stderr.as_ref().unwrap().as_raw_fd();
1237             let pipesize1 = unsafe { libc::fcntl(fd, libc::F_SETPIPE_SZ, PIPE_SIZE) };
1238 
1239             if pipesize >= PIPE_SIZE && pipesize1 >= PIPE_SIZE {
1240                 Ok(child)
1241             } else {
1242                 Err(std::io::Error::new(
1243                     std::io::ErrorKind::Other,
1244                     "resizing pipe w/ 'fnctl' failed!",
1245                 ))
1246             }
1247         } else {
1248             self.command.arg("-v").spawn()
1249         }
1250     }
1251 
1252     pub fn args<I, S>(&mut self, args: I) -> &mut Self
1253     where
1254         I: IntoIterator<Item = S>,
1255         S: AsRef<OsStr>,
1256     {
1257         self.command.args(args);
1258         self
1259     }
1260 
1261     pub fn default_disks(&mut self) -> &mut Self {
1262         if self.guest.disk_config.disk(DiskType::CloudInit).is_some() {
1263             self.args(&[
1264                 "--disk",
1265                 format!(
1266                     "path={}",
1267                     self.guest
1268                         .disk_config
1269                         .disk(DiskType::OperatingSystem)
1270                         .unwrap()
1271                 )
1272                 .as_str(),
1273                 format!(
1274                     "path={}",
1275                     self.guest.disk_config.disk(DiskType::CloudInit).unwrap()
1276                 )
1277                 .as_str(),
1278             ])
1279         } else {
1280             self.args(&[
1281                 "--disk",
1282                 format!(
1283                     "path={}",
1284                     self.guest
1285                         .disk_config
1286                         .disk(DiskType::OperatingSystem)
1287                         .unwrap()
1288                 )
1289                 .as_str(),
1290             ])
1291         }
1292     }
1293 
1294     pub fn default_net(&mut self) -> &mut Self {
1295         self.args(&["--net", self.guest.default_net_string().as_str()])
1296     }
1297 }
1298 
1299 pub fn clh_command(cmd: &str) -> String {
1300     env::var("BUILD_TARGET").map_or(
1301         format!("target/x86_64-unknown-linux-gnu/release/{}", cmd),
1302         |target| format!("target/{}/release/{}", target, cmd),
1303     )
1304 }
1305