1 // Copyright © 2021 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 use once_cell::sync::Lazy; 7 use ssh2::Session; 8 use std::env; 9 use std::ffi::OsStr; 10 use std::fmt::Debug; 11 use std::fs; 12 use std::io; 13 use std::io::{Read, Write}; 14 use std::net::TcpListener; 15 use std::net::TcpStream; 16 use std::os::unix::fs::PermissionsExt; 17 use std::os::unix::io::AsRawFd; 18 use std::path::Path; 19 use std::process::{Child, Command, ExitStatus, Output, Stdio}; 20 use std::str::FromStr; 21 use std::sync::Mutex; 22 use std::thread; 23 use vmm_sys_util::tempdir::TempDir; 24 25 #[derive(Debug)] 26 pub enum Error { 27 Parsing(std::num::ParseIntError), 28 SshCommand(SshCommandError), 29 WaitForBoot(WaitForBootError), 30 } 31 32 impl From<SshCommandError> for Error { 33 fn from(e: SshCommandError) -> Self { 34 Self::SshCommand(e) 35 } 36 } 37 38 pub struct GuestNetworkConfig { 39 pub guest_ip: String, 40 pub l2_guest_ip1: String, 41 pub l2_guest_ip2: String, 42 pub l2_guest_ip3: String, 43 pub host_ip: String, 44 pub guest_mac: String, 45 pub l2_guest_mac1: String, 46 pub l2_guest_mac2: String, 47 pub l2_guest_mac3: String, 48 pub tcp_listener_port: u16, 49 } 50 51 pub const DEFAULT_TCP_LISTENER_MESSAGE: &str = "booted"; 52 pub const DEFAULT_TCP_LISTENER_PORT: u16 = 8000; 53 pub const DEFAULT_TCP_LISTENER_TIMEOUT: i32 = 120; 54 55 #[derive(Debug)] 56 pub enum WaitForBootError { 57 EpollWait(std::io::Error), 58 Listen(std::io::Error), 59 EpollWaitTimeout, 60 WrongGuestAddr, 61 Accept(std::io::Error), 62 } 63 64 impl GuestNetworkConfig { 65 pub fn wait_vm_boot(&self, custom_timeout: Option<i32>) -> Result<(), WaitForBootError> { 66 let start = std::time::Instant::now(); 67 // The 'port' is unique per 'GUEST' and listening to wild-card ip avoids retrying on 'TcpListener::bind()' 68 let listen_addr = format!("0.0.0.0:{}", self.tcp_listener_port); 69 let expected_guest_addr = self.guest_ip.as_str(); 70 let mut s = String::new(); 71 let timeout = match custom_timeout { 72 Some(t) => t, 73 None => DEFAULT_TCP_LISTENER_TIMEOUT, 74 }; 75 76 match (|| -> Result<(), WaitForBootError> { 77 let listener = 78 TcpListener::bind(&listen_addr.as_str()).map_err(WaitForBootError::Listen)?; 79 listener 80 .set_nonblocking(true) 81 .expect("Cannot set non-blocking for tcp listener"); 82 83 // Reply on epoll w/ timeout to wait for guest connections faithfully 84 let epoll_fd = epoll::create(true).expect("Cannot create epoll fd"); 85 epoll::ctl( 86 epoll_fd, 87 epoll::ControlOptions::EPOLL_CTL_ADD, 88 listener.as_raw_fd(), 89 epoll::Event::new(epoll::Events::EPOLLIN, 0), 90 ) 91 .expect("Cannot add 'tcp_listener' event to epoll"); 92 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); 1]; 93 loop { 94 let num_events = match epoll::wait(epoll_fd, timeout * 1000_i32, &mut events[..]) { 95 Ok(num_events) => Ok(num_events), 96 Err(e) => match e.raw_os_error() { 97 Some(libc::EAGAIN) | Some(libc::EINTR) => continue, 98 _ => Err(e), 99 }, 100 } 101 .map_err(WaitForBootError::EpollWait)?; 102 if num_events == 0 { 103 return Err(WaitForBootError::EpollWaitTimeout); 104 } 105 break; 106 } 107 108 match listener.accept() { 109 Ok((_, addr)) => { 110 // Make sure the connection is from the expected 'guest_addr' 111 if addr.ip() != std::net::IpAddr::from_str(expected_guest_addr).unwrap() { 112 s = format!( 113 "Expecting the guest ip '{}' while being connected with ip '{}'", 114 expected_guest_addr, 115 addr.ip() 116 ); 117 return Err(WaitForBootError::WrongGuestAddr); 118 } 119 120 Ok(()) 121 } 122 Err(e) => { 123 s = "TcpListener::accept() failed".to_string(); 124 Err(WaitForBootError::Accept(e)) 125 } 126 } 127 })() { 128 Err(e) => { 129 let duration = start.elapsed(); 130 eprintln!( 131 "\n\n==== Start 'wait_vm_boot' (FAILED) ====\n\n\ 132 duration =\"{:?}, timeout = {}s\"\n\ 133 listen_addr=\"{}\"\n\ 134 expected_guest_addr=\"{}\"\n\ 135 message=\"{}\"\n\ 136 error=\"{:?}\"\n\ 137 \n==== End 'wait_vm_boot' outout ====\n\n", 138 duration, timeout, listen_addr, expected_guest_addr, s, e 139 ); 140 141 Err(e) 142 } 143 Ok(_) => Ok(()), 144 } 145 } 146 } 147 148 pub enum DiskType { 149 OperatingSystem, 150 CloudInit, 151 } 152 153 pub trait DiskConfig { 154 fn prepare_files(&mut self, tmp_dir: &TempDir, network: &GuestNetworkConfig); 155 fn prepare_cloudinit(&self, tmp_dir: &TempDir, network: &GuestNetworkConfig) -> String; 156 fn disk(&self, disk_type: DiskType) -> Option<String>; 157 } 158 159 #[derive(Clone)] 160 pub struct UbuntuDiskConfig { 161 osdisk_path: String, 162 cloudinit_path: String, 163 image_name: String, 164 } 165 166 impl UbuntuDiskConfig { 167 pub fn new(image_name: String) -> Self { 168 UbuntuDiskConfig { 169 image_name, 170 osdisk_path: String::new(), 171 cloudinit_path: String::new(), 172 } 173 } 174 } 175 176 pub struct WindowsDiskConfig { 177 image_name: String, 178 osdisk_path: String, 179 loopback_device: String, 180 windows_snapshot_cow: String, 181 windows_snapshot: String, 182 } 183 184 impl WindowsDiskConfig { 185 pub fn new(image_name: String) -> Self { 186 WindowsDiskConfig { 187 image_name, 188 osdisk_path: String::new(), 189 loopback_device: String::new(), 190 windows_snapshot_cow: String::new(), 191 windows_snapshot: String::new(), 192 } 193 } 194 } 195 196 impl Drop for WindowsDiskConfig { 197 fn drop(&mut self) { 198 // dmsetup remove windows-snapshot-1 199 std::process::Command::new("dmsetup") 200 .arg("remove") 201 .arg(self.windows_snapshot.as_str()) 202 .output() 203 .expect("Expect removing Windows snapshot with 'dmsetup' to succeed"); 204 205 // dmsetup remove windows-snapshot-cow-1 206 std::process::Command::new("dmsetup") 207 .arg("remove") 208 .arg(self.windows_snapshot_cow.as_str()) 209 .output() 210 .expect("Expect removing Windows snapshot CoW with 'dmsetup' to succeed"); 211 212 // losetup -d <loopback_device> 213 std::process::Command::new("losetup") 214 .args(&["-d", self.loopback_device.as_str()]) 215 .output() 216 .expect("Expect removing loopback device to succeed"); 217 } 218 } 219 220 impl DiskConfig for UbuntuDiskConfig { 221 fn prepare_cloudinit(&self, tmp_dir: &TempDir, network: &GuestNetworkConfig) -> String { 222 let cloudinit_file_path = 223 String::from(tmp_dir.as_path().join("cloudinit").to_str().unwrap()); 224 225 let cloud_init_directory = tmp_dir.as_path().join("cloud-init").join("ubuntu"); 226 227 fs::create_dir_all(&cloud_init_directory) 228 .expect("Expect creating cloud-init directory to succeed"); 229 230 let source_file_dir = std::env::current_dir() 231 .unwrap() 232 .join("test_data") 233 .join("cloud-init") 234 .join("ubuntu"); 235 236 vec!["meta-data"].iter().for_each(|x| { 237 rate_limited_copy(source_file_dir.join(x), cloud_init_directory.join(x)) 238 .expect("Expect copying cloud-init meta-data to succeed"); 239 }); 240 241 let mut user_data_string = String::new(); 242 fs::File::open(source_file_dir.join("user-data")) 243 .unwrap() 244 .read_to_string(&mut user_data_string) 245 .expect("Expected reading user-data file in to succeed"); 246 user_data_string = user_data_string.replace( 247 "@DEFAULT_TCP_LISTENER_MESSAGE", 248 DEFAULT_TCP_LISTENER_MESSAGE, 249 ); 250 user_data_string = user_data_string.replace("@HOST_IP", &network.host_ip); 251 user_data_string = 252 user_data_string.replace("@TCP_LISTENER_PORT", &network.tcp_listener_port.to_string()); 253 254 fs::File::create(cloud_init_directory.join("user-data")) 255 .unwrap() 256 .write_all(user_data_string.as_bytes()) 257 .expect("Expected writing out user-data to succeed"); 258 259 let mut network_config_string = String::new(); 260 261 fs::File::open(source_file_dir.join("network-config")) 262 .unwrap() 263 .read_to_string(&mut network_config_string) 264 .expect("Expected reading network-config file in to succeed"); 265 266 network_config_string = network_config_string.replace("192.168.2.1", &network.host_ip); 267 network_config_string = network_config_string.replace("192.168.2.2", &network.guest_ip); 268 network_config_string = network_config_string.replace("192.168.2.3", &network.l2_guest_ip1); 269 network_config_string = network_config_string.replace("192.168.2.4", &network.l2_guest_ip2); 270 network_config_string = network_config_string.replace("192.168.2.5", &network.l2_guest_ip3); 271 network_config_string = 272 network_config_string.replace("12:34:56:78:90:ab", &network.guest_mac); 273 network_config_string = 274 network_config_string.replace("de:ad:be:ef:12:34", &network.l2_guest_mac1); 275 network_config_string = 276 network_config_string.replace("de:ad:be:ef:34:56", &network.l2_guest_mac2); 277 network_config_string = 278 network_config_string.replace("de:ad:be:ef:56:78", &network.l2_guest_mac3); 279 280 fs::File::create(cloud_init_directory.join("network-config")) 281 .unwrap() 282 .write_all(network_config_string.as_bytes()) 283 .expect("Expected writing out network-config to succeed"); 284 285 std::process::Command::new("mkdosfs") 286 .args(&["-n", "cidata"]) 287 .args(&["-C", cloudinit_file_path.as_str()]) 288 .arg("8192") 289 .output() 290 .expect("Expect creating disk image to succeed"); 291 292 vec!["user-data", "meta-data", "network-config"] 293 .iter() 294 .for_each(|x| { 295 std::process::Command::new("mcopy") 296 .arg("-o") 297 .args(&["-i", cloudinit_file_path.as_str()]) 298 .args(&["-s", cloud_init_directory.join(x).to_str().unwrap(), "::"]) 299 .output() 300 .expect("Expect copying files to disk image to succeed"); 301 }); 302 303 cloudinit_file_path 304 } 305 306 fn prepare_files(&mut self, tmp_dir: &TempDir, network: &GuestNetworkConfig) { 307 let mut workload_path = dirs::home_dir().unwrap(); 308 workload_path.push("workloads"); 309 310 let mut osdisk_base_path = workload_path; 311 osdisk_base_path.push(&self.image_name); 312 313 let osdisk_path = String::from(tmp_dir.as_path().join("osdisk.img").to_str().unwrap()); 314 let cloudinit_path = self.prepare_cloudinit(tmp_dir, network); 315 316 rate_limited_copy(osdisk_base_path, &osdisk_path) 317 .expect("copying of OS source disk image failed"); 318 319 self.cloudinit_path = cloudinit_path; 320 self.osdisk_path = osdisk_path; 321 } 322 323 fn disk(&self, disk_type: DiskType) -> Option<String> { 324 match disk_type { 325 DiskType::OperatingSystem => Some(self.osdisk_path.clone()), 326 DiskType::CloudInit => Some(self.cloudinit_path.clone()), 327 } 328 } 329 } 330 331 impl DiskConfig for WindowsDiskConfig { 332 fn prepare_cloudinit(&self, _tmp_dir: &TempDir, _network: &GuestNetworkConfig) -> String { 333 String::new() 334 } 335 336 fn prepare_files(&mut self, tmp_dir: &TempDir, _network: &GuestNetworkConfig) { 337 let mut workload_path = dirs::home_dir().unwrap(); 338 workload_path.push("workloads"); 339 340 let mut osdisk_path = workload_path; 341 osdisk_path.push(&self.image_name); 342 343 let osdisk_blk_size = fs::metadata(osdisk_path) 344 .expect("Expect retrieving Windows image metadata") 345 .len() 346 >> 9; 347 348 let snapshot_cow_path = 349 String::from(tmp_dir.as_path().join("snapshot_cow").to_str().unwrap()); 350 351 // Create and truncate CoW file for device mapper 352 let cow_file_size: u64 = 1 << 30; 353 let cow_file_blk_size = cow_file_size >> 9; 354 let cow_file = std::fs::File::create(snapshot_cow_path.as_str()) 355 .expect("Expect creating CoW image to succeed"); 356 cow_file 357 .set_len(cow_file_size) 358 .expect("Expect truncating CoW image to succeed"); 359 360 // losetup --find --show /tmp/snapshot_cow 361 let loopback_device = std::process::Command::new("losetup") 362 .arg("--find") 363 .arg("--show") 364 .arg(snapshot_cow_path.as_str()) 365 .output() 366 .expect("Expect creating loopback device from snapshot CoW image to succeed"); 367 368 self.loopback_device = String::from_utf8_lossy(&loopback_device.stdout) 369 .trim() 370 .to_string(); 371 372 let random_extension = tmp_dir.as_path().file_name().unwrap(); 373 let windows_snapshot_cow = format!( 374 "windows-snapshot-cow-{}", 375 random_extension.to_str().unwrap() 376 ); 377 378 // dmsetup create windows-snapshot-cow-1 --table '0 2097152 linear /dev/loop1 0' 379 std::process::Command::new("dmsetup") 380 .arg("create") 381 .arg(windows_snapshot_cow.as_str()) 382 .args(&[ 383 "--table", 384 format!("0 {} linear {} 0", cow_file_blk_size, self.loopback_device).as_str(), 385 ]) 386 .output() 387 .expect("Expect creating Windows snapshot CoW with 'dmsetup' to succeed"); 388 389 let windows_snapshot = format!("windows-snapshot-{}", random_extension.to_str().unwrap()); 390 391 // dmsetup mknodes 392 std::process::Command::new("dmsetup") 393 .arg("mknodes") 394 .output() 395 .expect("Expect device mapper nodes to be ready"); 396 397 // dmsetup create windows-snapshot-1 --table '0 41943040 snapshot /dev/mapper/windows-base /dev/mapper/windows-snapshot-cow-1 P 8' 398 std::process::Command::new("dmsetup") 399 .arg("create") 400 .arg(windows_snapshot.as_str()) 401 .args(&[ 402 "--table", 403 format!( 404 "0 {} snapshot /dev/mapper/windows-base /dev/mapper/{} P 8", 405 osdisk_blk_size, 406 windows_snapshot_cow.as_str() 407 ) 408 .as_str(), 409 ]) 410 .output() 411 .expect("Expect creating Windows snapshot with 'dmsetup' to succeed"); 412 413 // dmsetup mknodes 414 std::process::Command::new("dmsetup") 415 .arg("mknodes") 416 .output() 417 .expect("Expect device mapper nodes to be ready"); 418 419 self.osdisk_path = format!("/dev/mapper/{}", windows_snapshot); 420 self.windows_snapshot_cow = windows_snapshot_cow; 421 self.windows_snapshot = windows_snapshot; 422 } 423 424 fn disk(&self, disk_type: DiskType) -> Option<String> { 425 match disk_type { 426 DiskType::OperatingSystem => Some(self.osdisk_path.clone()), 427 DiskType::CloudInit => None, 428 } 429 } 430 } 431 432 pub fn rate_limited_copy<P: AsRef<Path>, Q: AsRef<Path>>(from: P, to: Q) -> io::Result<u64> { 433 for i in 0..10 { 434 let free_bytes = unsafe { 435 let mut stats = std::mem::MaybeUninit::zeroed(); 436 let fs_name = std::ffi::CString::new("/tmp").unwrap(); 437 libc::statvfs(fs_name.as_ptr(), stats.as_mut_ptr()); 438 439 let free_blocks = stats.assume_init().f_bfree; 440 let block_size = stats.assume_init().f_bsize; 441 442 free_blocks * block_size 443 }; 444 445 // Make sure there is at least 6 GiB of space 446 if free_bytes < 6 << 30 { 447 eprintln!( 448 "Not enough space on disk ({}). Attempt {} of 10. Sleeping.", 449 free_bytes, i 450 ); 451 thread::sleep(std::time::Duration::new(60, 0)); 452 continue; 453 } 454 455 match fs::copy(&from, &to) { 456 Err(e) => { 457 if let Some(errno) = e.raw_os_error() { 458 if errno == libc::ENOSPC { 459 eprintln!("Copy returned ENOSPC. Attempt {} of 10. Sleeping.", i); 460 thread::sleep(std::time::Duration::new(60, 0)); 461 continue; 462 } 463 } 464 return Err(e); 465 } 466 Ok(i) => return Ok(i), 467 } 468 } 469 Err(io::Error::last_os_error()) 470 } 471 472 pub fn handle_child_output( 473 r: Result<(), std::boxed::Box<dyn std::any::Any + std::marker::Send>>, 474 output: &std::process::Output, 475 ) { 476 use std::os::unix::process::ExitStatusExt; 477 if r.is_ok() && output.status.success() { 478 return; 479 } 480 481 match output.status.code() { 482 None => { 483 // Don't treat child.kill() as a problem 484 if output.status.signal() == Some(9) && r.is_ok() { 485 return; 486 } 487 488 eprintln!( 489 "==== child killed by signal: {} ====", 490 output.status.signal().unwrap() 491 ); 492 } 493 Some(code) => { 494 eprintln!("\n\n==== child exit code: {} ====", code); 495 } 496 } 497 498 eprintln!( 499 "\n\n==== Start child stdout ====\n\n{}\n\n==== End child stdout ====", 500 String::from_utf8_lossy(&output.stdout) 501 ); 502 eprintln!( 503 "\n\n==== Start child stderr ====\n\n{}\n\n==== End child stderr ====", 504 String::from_utf8_lossy(&output.stderr) 505 ); 506 507 panic!("Test failed") 508 } 509 510 #[derive(Debug)] 511 pub struct PasswordAuth { 512 pub username: String, 513 pub password: String, 514 } 515 516 pub const DEFAULT_SSH_RETRIES: u8 = 6; 517 pub const DEFAULT_SSH_TIMEOUT: u8 = 10; 518 519 #[derive(Debug)] 520 pub enum SshCommandError { 521 Connection(std::io::Error), 522 Handshake(ssh2::Error), 523 Authentication(ssh2::Error), 524 ChannelSession(ssh2::Error), 525 Command(ssh2::Error), 526 ExitStatus(ssh2::Error), 527 NonZeroExitStatus(i32), 528 FileRead(std::io::Error), 529 FileMetadata(std::io::Error), 530 ScpSend(ssh2::Error), 531 WriteAll(std::io::Error), 532 SendEof(ssh2::Error), 533 WaitEof(ssh2::Error), 534 } 535 536 fn scp_to_guest_with_auth( 537 path: &Path, 538 remote_path: &Path, 539 auth: &PasswordAuth, 540 ip: &str, 541 retries: u8, 542 timeout: u8, 543 ) -> Result<(), SshCommandError> { 544 let mut counter = 0; 545 loop { 546 match (|| -> Result<(), SshCommandError> { 547 let tcp = 548 TcpStream::connect(format!("{}:22", ip)).map_err(SshCommandError::Connection)?; 549 let mut sess = Session::new().unwrap(); 550 sess.set_tcp_stream(tcp); 551 sess.handshake().map_err(SshCommandError::Handshake)?; 552 553 sess.userauth_password(&auth.username, &auth.password) 554 .map_err(SshCommandError::Authentication)?; 555 assert!(sess.authenticated()); 556 557 let content = fs::read(path).map_err(SshCommandError::FileRead)?; 558 let mode = fs::metadata(path) 559 .map_err(SshCommandError::FileMetadata)? 560 .permissions() 561 .mode() 562 & 0o777; 563 564 let mut channel = sess 565 .scp_send(remote_path, mode as i32, content.len() as u64, None) 566 .map_err(SshCommandError::ScpSend)?; 567 channel 568 .write_all(&content) 569 .map_err(SshCommandError::WriteAll)?; 570 channel.send_eof().map_err(SshCommandError::SendEof)?; 571 channel.wait_eof().map_err(SshCommandError::WaitEof)?; 572 573 // Intentionally ignore these results here as their failure 574 // does not precipitate a repeat 575 let _ = channel.close(); 576 let _ = channel.wait_close(); 577 578 Ok(()) 579 })() { 580 Ok(_) => break, 581 Err(e) => { 582 counter += 1; 583 if counter >= retries { 584 eprintln!( 585 "\n\n==== Start scp command output (FAILED) ====\n\n\ 586 path =\"{:?}\"\n\ 587 remote_path =\"{:?}\"\n\ 588 auth=\"{:#?}\"\n\ 589 ip=\"{}\"\n\ 590 error=\"{:?}\"\n\ 591 \n==== End scp command outout ====\n\n", 592 path, remote_path, auth, ip, e 593 ); 594 595 return Err(e); 596 } 597 } 598 }; 599 thread::sleep(std::time::Duration::new((timeout * counter).into(), 0)); 600 } 601 Ok(()) 602 } 603 604 pub fn scp_to_guest( 605 path: &Path, 606 remote_path: &Path, 607 ip: &str, 608 retries: u8, 609 timeout: u8, 610 ) -> Result<(), SshCommandError> { 611 scp_to_guest_with_auth( 612 path, 613 remote_path, 614 &PasswordAuth { 615 username: String::from("cloud"), 616 password: String::from("cloud123"), 617 }, 618 ip, 619 retries, 620 timeout, 621 ) 622 } 623 624 pub fn ssh_command_ip_with_auth( 625 command: &str, 626 auth: &PasswordAuth, 627 ip: &str, 628 retries: u8, 629 timeout: u8, 630 ) -> Result<String, SshCommandError> { 631 let mut s = String::new(); 632 633 let mut counter = 0; 634 loop { 635 match (|| -> Result<(), SshCommandError> { 636 let tcp = 637 TcpStream::connect(format!("{}:22", ip)).map_err(SshCommandError::Connection)?; 638 let mut sess = Session::new().unwrap(); 639 sess.set_tcp_stream(tcp); 640 sess.handshake().map_err(SshCommandError::Handshake)?; 641 642 sess.userauth_password(&auth.username, &auth.password) 643 .map_err(SshCommandError::Authentication)?; 644 assert!(sess.authenticated()); 645 646 let mut channel = sess 647 .channel_session() 648 .map_err(SshCommandError::ChannelSession)?; 649 channel.exec(command).map_err(SshCommandError::Command)?; 650 651 // Intentionally ignore these results here as their failure 652 // does not precipitate a repeat 653 let _ = channel.read_to_string(&mut s); 654 let _ = channel.close(); 655 let _ = channel.wait_close(); 656 657 let status = channel.exit_status().map_err(SshCommandError::ExitStatus)?; 658 659 if status != 0 { 660 Err(SshCommandError::NonZeroExitStatus(status)) 661 } else { 662 Ok(()) 663 } 664 })() { 665 Ok(_) => break, 666 Err(e) => { 667 counter += 1; 668 if counter >= retries { 669 eprintln!( 670 "\n\n==== Start ssh command output (FAILED) ====\n\n\ 671 command=\"{}\"\n\ 672 auth=\"{:#?}\"\n\ 673 ip=\"{}\"\n\ 674 output=\"{}\"\n\ 675 error=\"{:?}\"\n\ 676 \n==== End ssh command outout ====\n\n", 677 command, auth, ip, s, e 678 ); 679 680 return Err(e); 681 } 682 } 683 }; 684 thread::sleep(std::time::Duration::new((timeout * counter).into(), 0)); 685 } 686 Ok(s) 687 } 688 689 pub fn ssh_command_ip( 690 command: &str, 691 ip: &str, 692 retries: u8, 693 timeout: u8, 694 ) -> Result<String, SshCommandError> { 695 ssh_command_ip_with_auth( 696 command, 697 &PasswordAuth { 698 username: String::from("cloud"), 699 password: String::from("cloud123"), 700 }, 701 ip, 702 retries, 703 timeout, 704 ) 705 } 706 707 pub fn exec_host_command_status(command: &str) -> ExitStatus { 708 std::process::Command::new("bash") 709 .args(&["-c", command]) 710 .status() 711 .unwrap_or_else(|_| panic!("Expected '{}' to run", command)) 712 } 713 714 pub fn exec_host_command_output(command: &str) -> Output { 715 std::process::Command::new("bash") 716 .args(&["-c", command]) 717 .output() 718 .unwrap_or_else(|_| panic!("Expected '{}' to run", command)) 719 } 720 721 pub const PIPE_SIZE: i32 = 32 << 20; 722 723 static NEXT_VM_ID: Lazy<Mutex<u8>> = Lazy::new(|| Mutex::new(1)); 724 725 pub struct Guest { 726 pub tmp_dir: TempDir, 727 pub disk_config: Box<dyn DiskConfig>, 728 pub network: GuestNetworkConfig, 729 } 730 731 // Safe to implement as we know we have no interior mutability 732 impl std::panic::RefUnwindSafe for Guest {} 733 734 impl Guest { 735 pub fn new_from_ip_range(mut disk_config: Box<dyn DiskConfig>, class: &str, id: u8) -> Self { 736 let tmp_dir = TempDir::new_with_prefix("/tmp/ch").unwrap(); 737 738 let network = GuestNetworkConfig { 739 guest_ip: format!("{}.{}.2", class, id), 740 l2_guest_ip1: format!("{}.{}.3", class, id), 741 l2_guest_ip2: format!("{}.{}.4", class, id), 742 l2_guest_ip3: format!("{}.{}.5", class, id), 743 host_ip: format!("{}.{}.1", class, id), 744 guest_mac: format!("12:34:56:78:90:{:02x}", id), 745 l2_guest_mac1: format!("de:ad:be:ef:12:{:02x}", id), 746 l2_guest_mac2: format!("de:ad:be:ef:34:{:02x}", id), 747 l2_guest_mac3: format!("de:ad:be:ef:56:{:02x}", id), 748 tcp_listener_port: DEFAULT_TCP_LISTENER_PORT + id as u16, 749 }; 750 751 disk_config.prepare_files(&tmp_dir, &network); 752 753 Guest { 754 tmp_dir, 755 disk_config, 756 network, 757 } 758 } 759 760 pub fn new(disk_config: Box<dyn DiskConfig>) -> Self { 761 let mut guard = NEXT_VM_ID.lock().unwrap(); 762 let id = *guard; 763 *guard = id + 1; 764 765 Self::new_from_ip_range(disk_config, "192.168", id) 766 } 767 768 pub fn default_net_string(&self) -> String { 769 format!( 770 "tap=,mac={},ip={},mask=255.255.255.0", 771 self.network.guest_mac, self.network.host_ip 772 ) 773 } 774 775 pub fn default_net_string_w_iommu(&self) -> String { 776 format!( 777 "tap=,mac={},ip={},mask=255.255.255.0,iommu=on", 778 self.network.guest_mac, self.network.host_ip 779 ) 780 } 781 782 pub fn ssh_command(&self, command: &str) -> Result<String, SshCommandError> { 783 ssh_command_ip( 784 command, 785 &self.network.guest_ip, 786 DEFAULT_SSH_RETRIES, 787 DEFAULT_SSH_TIMEOUT, 788 ) 789 } 790 791 #[cfg(target_arch = "x86_64")] 792 pub fn ssh_command_l1(&self, command: &str) -> Result<String, SshCommandError> { 793 ssh_command_ip( 794 command, 795 &self.network.guest_ip, 796 DEFAULT_SSH_RETRIES, 797 DEFAULT_SSH_TIMEOUT, 798 ) 799 } 800 801 #[cfg(target_arch = "x86_64")] 802 pub fn ssh_command_l2_1(&self, command: &str) -> Result<String, SshCommandError> { 803 ssh_command_ip( 804 command, 805 &self.network.l2_guest_ip1, 806 DEFAULT_SSH_RETRIES, 807 DEFAULT_SSH_TIMEOUT, 808 ) 809 } 810 811 #[cfg(target_arch = "x86_64")] 812 pub fn ssh_command_l2_2(&self, command: &str) -> Result<String, SshCommandError> { 813 ssh_command_ip( 814 command, 815 &self.network.l2_guest_ip2, 816 DEFAULT_SSH_RETRIES, 817 DEFAULT_SSH_TIMEOUT, 818 ) 819 } 820 821 #[cfg(target_arch = "x86_64")] 822 pub fn ssh_command_l2_3(&self, command: &str) -> Result<String, SshCommandError> { 823 ssh_command_ip( 824 command, 825 &self.network.l2_guest_ip3, 826 DEFAULT_SSH_RETRIES, 827 DEFAULT_SSH_TIMEOUT, 828 ) 829 } 830 831 pub fn api_create_body(&self, cpu_count: u8, kernel_path: &str, kernel_cmd: &str) -> String { 832 format! {"{{\"cpus\":{{\"boot_vcpus\":{},\"max_vcpus\":{}}},\"kernel\":{{\"path\":\"{}\"}},\"cmdline\":{{\"args\": \"{}\"}},\"net\":[{{\"ip\":\"{}\", \"mask\":\"255.255.255.0\", \"mac\":\"{}\"}}], \"disks\":[{{\"path\":\"{}\"}}, {{\"path\":\"{}\"}}]}}", 833 cpu_count, 834 cpu_count, 835 kernel_path, 836 kernel_cmd, 837 self.network.host_ip, 838 self.network.guest_mac, 839 self.disk_config.disk(DiskType::OperatingSystem).unwrap().as_str(), 840 self.disk_config.disk(DiskType::CloudInit).unwrap().as_str(), 841 } 842 } 843 844 pub fn get_cpu_count(&self) -> Result<u32, Error> { 845 self.ssh_command("grep -c processor /proc/cpuinfo")? 846 .trim() 847 .parse() 848 .map_err(Error::Parsing) 849 } 850 851 #[cfg(target_arch = "x86_64")] 852 pub fn get_initial_apicid(&self) -> Result<u32, Error> { 853 self.ssh_command("grep \"initial apicid\" /proc/cpuinfo | grep -o \"[0-9]*\"")? 854 .trim() 855 .parse() 856 .map_err(Error::Parsing) 857 } 858 859 pub fn get_total_memory(&self) -> Result<u32, Error> { 860 self.ssh_command("grep MemTotal /proc/meminfo | grep -o \"[0-9]*\"")? 861 .trim() 862 .parse() 863 .map_err(Error::Parsing) 864 } 865 866 #[cfg(target_arch = "x86_64")] 867 pub fn get_total_memory_l2(&self) -> Result<u32, Error> { 868 self.ssh_command_l2_1("grep MemTotal /proc/meminfo | grep -o \"[0-9]*\"")? 869 .trim() 870 .parse() 871 .map_err(Error::Parsing) 872 } 873 874 pub fn get_numa_node_memory(&self, node_id: usize) -> Result<u32, Error> { 875 self.ssh_command( 876 format!( 877 "grep MemTotal /sys/devices/system/node/node{}/meminfo \ 878 | cut -d \":\" -f 2 | grep -o \"[0-9]*\"", 879 node_id 880 ) 881 .as_str(), 882 )? 883 .trim() 884 .parse() 885 .map_err(Error::Parsing) 886 } 887 888 pub fn wait_vm_boot(&self, custom_timeout: Option<i32>) -> Result<(), Error> { 889 self.network 890 .wait_vm_boot(custom_timeout) 891 .map_err(Error::WaitForBoot) 892 } 893 894 pub fn check_numa_node_cpus(&self, node_id: usize, cpus: Vec<usize>) -> Result<(), Error> { 895 for cpu in cpus.iter() { 896 let cmd = format!( 897 "[ -d \"/sys/devices/system/node/node{}/cpu{}\" ]", 898 node_id, cpu 899 ); 900 self.ssh_command(cmd.as_str())?; 901 } 902 903 Ok(()) 904 } 905 906 pub fn check_numa_node_distances( 907 &self, 908 node_id: usize, 909 distances: &str, 910 ) -> Result<bool, Error> { 911 let cmd = format!("cat /sys/devices/system/node/node{}/distance", node_id); 912 if self.ssh_command(cmd.as_str())?.trim() == distances { 913 Ok(true) 914 } else { 915 Ok(false) 916 } 917 } 918 919 pub fn check_numa_common( 920 &self, 921 mem_ref: Option<&[u32]>, 922 node_ref: Option<&[Vec<usize>]>, 923 distance_ref: Option<&[&str]>, 924 ) { 925 if let Some(mem_ref) = mem_ref { 926 // Check each NUMA node has been assigned the right amount of 927 // memory. 928 for (i, &m) in mem_ref.iter().enumerate() { 929 assert!(self.get_numa_node_memory(i).unwrap_or_default() > m); 930 } 931 } 932 933 if let Some(node_ref) = node_ref { 934 // Check each NUMA node has been assigned the right CPUs set. 935 for (i, n) in node_ref.iter().enumerate() { 936 self.check_numa_node_cpus(i, n.clone()).unwrap(); 937 } 938 } 939 940 if let Some(distance_ref) = distance_ref { 941 // Check each NUMA node has been assigned the right distances. 942 for (i, &d) in distance_ref.iter().enumerate() { 943 assert!(self.check_numa_node_distances(i, d).unwrap()); 944 } 945 } 946 } 947 948 #[cfg(target_arch = "x86_64")] 949 pub fn check_sgx_support(&self) -> Result<(), Error> { 950 self.ssh_command( 951 "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX: \ 952 Software Guard Extensions supported = true'", 953 )?; 954 self.ssh_command( 955 "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX_LC: \ 956 SGX launch config supported = true'", 957 )?; 958 self.ssh_command( 959 "cpuid -l 0x12 -s 0 | tr -s [:space:] | grep -q 'SGX1 \ 960 supported = true'", 961 )?; 962 963 Ok(()) 964 } 965 966 pub fn get_pci_bridge_class(&self) -> Result<String, Error> { 967 Ok(self 968 .ssh_command("cat /sys/bus/pci/devices/0000:00:00.0/class")? 969 .trim() 970 .to_string()) 971 } 972 973 pub fn get_pci_device_ids(&self) -> Result<String, Error> { 974 Ok(self 975 .ssh_command("cat /sys/bus/pci/devices/*/device")? 976 .trim() 977 .to_string()) 978 } 979 980 pub fn get_pci_vendor_ids(&self) -> Result<String, Error> { 981 Ok(self 982 .ssh_command("cat /sys/bus/pci/devices/*/vendor")? 983 .trim() 984 .to_string()) 985 } 986 987 pub fn does_device_vendor_pair_match( 988 &self, 989 device_id: &str, 990 vendor_id: &str, 991 ) -> Result<bool, Error> { 992 // We are checking if console device's device id and vendor id pair matches 993 let devices = self.get_pci_device_ids()?; 994 let devices: Vec<&str> = devices.split('\n').collect(); 995 let vendors = self.get_pci_vendor_ids()?; 996 let vendors: Vec<&str> = vendors.split('\n').collect(); 997 998 for (index, d_id) in devices.iter().enumerate() { 999 if *d_id == device_id { 1000 if let Some(v_id) = vendors.get(index) { 1001 if *v_id == vendor_id { 1002 return Ok(true); 1003 } 1004 } 1005 } 1006 } 1007 1008 Ok(false) 1009 } 1010 1011 pub fn check_vsock(&self, socket: &str) { 1012 // Listen from guest on vsock CID=3 PORT=16 1013 // SOCKET-LISTEN:<domain>:<protocol>:<local-address> 1014 let guest_ip = self.network.guest_ip.clone(); 1015 let listen_socat = thread::spawn(move || { 1016 ssh_command_ip("sudo socat - SOCKET-LISTEN:40:0:x00x00x10x00x00x00x03x00x00x00x00x00x00x00 > vsock_log", &guest_ip, DEFAULT_SSH_RETRIES, DEFAULT_SSH_TIMEOUT).unwrap(); 1017 }); 1018 1019 // Make sure socat is listening, which might take a few second on slow systems 1020 thread::sleep(std::time::Duration::new(10, 0)); 1021 1022 // Write something to vsock from the host 1023 assert!(exec_host_command_status(&format!( 1024 "echo -e \"CONNECT 16\\nHelloWorld!\" | socat - UNIX-CONNECT:{}", 1025 socket 1026 )) 1027 .success()); 1028 1029 // Wait for the thread to terminate. 1030 listen_socat.join().unwrap(); 1031 1032 assert_eq!( 1033 self.ssh_command("cat vsock_log").unwrap().trim(), 1034 "HelloWorld!" 1035 ); 1036 } 1037 1038 #[cfg(target_arch = "x86_64")] 1039 pub fn check_nvidia_gpu(&self) { 1040 // Run CUDA sample to validate it can find the device 1041 let device_query_result = self 1042 .ssh_command("sudo /root/NVIDIA_CUDA-11.3_Samples/bin/x86_64/linux/release/deviceQuery") 1043 .unwrap(); 1044 assert!(device_query_result.contains("Detected 1 CUDA Capable device")); 1045 assert!(device_query_result.contains("Device 0: \"NVIDIA Tesla T4\"")); 1046 assert!(device_query_result.contains("Result = PASS")); 1047 1048 // Run NVIDIA DCGM Diagnostics to validate the device is functional 1049 self.ssh_command("sudo nv-hostengine").unwrap(); 1050 1051 assert!(self 1052 .ssh_command("sudo dcgmi discovery -l") 1053 .unwrap() 1054 .contains("Name: NVIDIA Tesla T4")); 1055 assert_eq!( 1056 self.ssh_command("sudo dcgmi diag -r 'diagnostic' | grep Pass | wc -l") 1057 .unwrap() 1058 .trim(), 1059 "10" 1060 ); 1061 } 1062 1063 pub fn reboot_linux(&self, current_reboot_count: u32, custom_timeout: Option<i32>) { 1064 let list_boots_cmd = "sudo journalctl --list-boots | wc -l"; 1065 let boot_count = self 1066 .ssh_command(list_boots_cmd) 1067 .unwrap() 1068 .trim() 1069 .parse::<u32>() 1070 .unwrap_or_default(); 1071 1072 assert_eq!(boot_count, current_reboot_count + 1); 1073 self.ssh_command("sudo reboot").unwrap(); 1074 1075 self.wait_vm_boot(custom_timeout).unwrap(); 1076 let boot_count = self 1077 .ssh_command(list_boots_cmd) 1078 .unwrap() 1079 .trim() 1080 .parse::<u32>() 1081 .unwrap_or_default(); 1082 assert_eq!(boot_count, current_reboot_count + 2); 1083 } 1084 1085 pub fn enable_memory_hotplug(&self) { 1086 self.ssh_command("echo online | sudo tee /sys/devices/system/memory/auto_online_blocks") 1087 .unwrap(); 1088 } 1089 1090 pub fn check_devices_common( 1091 &self, 1092 socket: Option<&String>, 1093 console_text: Option<&String>, 1094 pmem_path: Option<&String>, 1095 ) { 1096 // Check block devices are readable 1097 self.ssh_command("sudo dd if=/dev/vda of=/dev/null bs=1M iflag=direct count=1024") 1098 .unwrap(); 1099 self.ssh_command("sudo dd if=/dev/vdb of=/dev/null bs=1M iflag=direct count=8") 1100 .unwrap(); 1101 // Check if the rng device is readable 1102 self.ssh_command("sudo head -c 1000 /dev/hwrng > /dev/null") 1103 .unwrap(); 1104 // Check vsock 1105 if let Some(socket) = socket { 1106 self.check_vsock(socket.as_str()); 1107 } 1108 // Check if the console is usable 1109 if let Some(console_text) = console_text { 1110 let console_cmd = format!("echo {} | sudo tee /dev/hvc0", console_text); 1111 self.ssh_command(&console_cmd).unwrap(); 1112 } 1113 // The net device is 'automatically' exercised through the above 'ssh' commands 1114 1115 // Check if the pmem device is usable 1116 if let Some(pmem_path) = pmem_path { 1117 assert_eq!( 1118 self.ssh_command(&format!("ls {}", pmem_path)) 1119 .unwrap() 1120 .trim(), 1121 pmem_path 1122 ); 1123 assert_eq!( 1124 self.ssh_command(&format!("sudo mount {} /mnt", pmem_path)) 1125 .unwrap(), 1126 "" 1127 ); 1128 assert_eq!(self.ssh_command("ls /mnt").unwrap(), "lost+found\n"); 1129 self.ssh_command("echo test123 | sudo tee /mnt/test") 1130 .unwrap(); 1131 assert_eq!(self.ssh_command("sudo umount /mnt").unwrap(), ""); 1132 assert_eq!(self.ssh_command("ls /mnt").unwrap(), ""); 1133 1134 assert_eq!( 1135 self.ssh_command(&format!("sudo mount {} /mnt", pmem_path)) 1136 .unwrap(), 1137 "" 1138 ); 1139 assert_eq!( 1140 self.ssh_command("sudo cat /mnt/test || true") 1141 .unwrap() 1142 .trim(), 1143 "test123" 1144 ); 1145 self.ssh_command("sudo rm /mnt/test").unwrap(); 1146 assert_eq!(self.ssh_command("sudo umount /mnt").unwrap(), ""); 1147 } 1148 } 1149 } 1150 1151 pub enum VerbosityLevel { 1152 Warn, 1153 Info, 1154 Debug, 1155 } 1156 1157 impl Default for VerbosityLevel { 1158 fn default() -> Self { 1159 Self::Warn 1160 } 1161 } 1162 1163 impl ToString for VerbosityLevel { 1164 fn to_string(&self) -> String { 1165 use VerbosityLevel::*; 1166 match self { 1167 Warn => "".to_string(), 1168 Info => "-v".to_string(), 1169 Debug => "-vv".to_string(), 1170 } 1171 } 1172 } 1173 1174 pub struct GuestCommand<'a> { 1175 command: Command, 1176 guest: &'a Guest, 1177 capture_output: bool, 1178 print_cmd: bool, 1179 verbosity: VerbosityLevel, 1180 } 1181 1182 impl<'a> GuestCommand<'a> { 1183 pub fn new(guest: &'a Guest) -> Self { 1184 Self::new_with_binary_path(guest, &clh_command("cloud-hypervisor")) 1185 } 1186 1187 pub fn new_with_binary_path(guest: &'a Guest, binary_path: &str) -> Self { 1188 Self { 1189 command: Command::new(binary_path), 1190 guest, 1191 capture_output: false, 1192 print_cmd: true, 1193 verbosity: VerbosityLevel::Info, 1194 } 1195 } 1196 1197 pub fn verbosity(&mut self, verbosity: VerbosityLevel) -> &mut Self { 1198 self.verbosity = verbosity; 1199 self 1200 } 1201 1202 pub fn capture_output(&mut self) -> &mut Self { 1203 self.capture_output = true; 1204 self 1205 } 1206 1207 pub fn set_print_cmd(&mut self, print_cmd: bool) -> &mut Self { 1208 self.print_cmd = print_cmd; 1209 self 1210 } 1211 1212 pub fn spawn(&mut self) -> io::Result<Child> { 1213 use VerbosityLevel::*; 1214 match &self.verbosity { 1215 Warn => {} 1216 Info => { 1217 self.command.arg("-v"); 1218 } 1219 Debug => { 1220 self.command.arg("-vv"); 1221 } 1222 }; 1223 1224 if self.print_cmd { 1225 println!( 1226 "\n\n==== Start cloud-hypervisor command-line ====\n\n\ 1227 {:?}\n\ 1228 \n==== End cloud-hypervisor command-line ====\n\n", 1229 self.command 1230 ); 1231 } 1232 1233 if self.capture_output { 1234 let child = self 1235 .command 1236 .stderr(Stdio::piped()) 1237 .stdout(Stdio::piped()) 1238 .spawn() 1239 .unwrap(); 1240 1241 let fd = child.stdout.as_ref().unwrap().as_raw_fd(); 1242 let pipesize = unsafe { libc::fcntl(fd, libc::F_SETPIPE_SZ, PIPE_SIZE) }; 1243 let fd = child.stderr.as_ref().unwrap().as_raw_fd(); 1244 let pipesize1 = unsafe { libc::fcntl(fd, libc::F_SETPIPE_SZ, PIPE_SIZE) }; 1245 1246 if pipesize >= PIPE_SIZE && pipesize1 >= PIPE_SIZE { 1247 Ok(child) 1248 } else { 1249 Err(std::io::Error::new( 1250 std::io::ErrorKind::Other, 1251 "resizing pipe w/ 'fnctl' failed!", 1252 )) 1253 } 1254 } else { 1255 self.command.spawn() 1256 } 1257 } 1258 1259 pub fn args<I, S>(&mut self, args: I) -> &mut Self 1260 where 1261 I: IntoIterator<Item = S>, 1262 S: AsRef<OsStr>, 1263 { 1264 self.command.args(args); 1265 self 1266 } 1267 1268 pub fn default_disks(&mut self) -> &mut Self { 1269 if self.guest.disk_config.disk(DiskType::CloudInit).is_some() { 1270 self.args(&[ 1271 "--disk", 1272 format!( 1273 "path={}", 1274 self.guest 1275 .disk_config 1276 .disk(DiskType::OperatingSystem) 1277 .unwrap() 1278 ) 1279 .as_str(), 1280 format!( 1281 "path={}", 1282 self.guest.disk_config.disk(DiskType::CloudInit).unwrap() 1283 ) 1284 .as_str(), 1285 ]) 1286 } else { 1287 self.args(&[ 1288 "--disk", 1289 format!( 1290 "path={}", 1291 self.guest 1292 .disk_config 1293 .disk(DiskType::OperatingSystem) 1294 .unwrap() 1295 ) 1296 .as_str(), 1297 ]) 1298 } 1299 } 1300 1301 pub fn default_net(&mut self) -> &mut Self { 1302 self.args(&["--net", self.guest.default_net_string().as_str()]) 1303 } 1304 } 1305 1306 pub fn clh_command(cmd: &str) -> String { 1307 env::var("BUILD_TARGET").map_or( 1308 format!("target/x86_64-unknown-linux-gnu/release/{}", cmd), 1309 |target| format!("target/{}/release/{}", target, cmd), 1310 ) 1311 } 1312