1 // Copyright © 2021 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate lazy_static; 8 9 use ssh2::Session; 10 use std::env; 11 use std::ffi::OsStr; 12 use std::fs; 13 use std::io; 14 use std::io::{Read, Write}; 15 use std::net::TcpListener; 16 use std::net::TcpStream; 17 use std::os::unix::fs::PermissionsExt; 18 use std::os::unix::io::AsRawFd; 19 use std::path::Path; 20 use std::process::{Child, Command, ExitStatus, Output, Stdio}; 21 use std::str::FromStr; 22 use std::sync::Mutex; 23 use std::thread; 24 use vmm_sys_util::tempdir::TempDir; 25 26 #[derive(Debug)] 27 pub enum Error { 28 Parsing(std::num::ParseIntError), 29 SshCommand(SshCommandError), 30 WaitForBoot(WaitForBootError), 31 } 32 33 impl From<SshCommandError> for Error { 34 fn from(e: SshCommandError) -> Self { 35 Self::SshCommand(e) 36 } 37 } 38 39 pub struct GuestNetworkConfig { 40 pub guest_ip: String, 41 pub l2_guest_ip1: String, 42 pub l2_guest_ip2: String, 43 pub l2_guest_ip3: String, 44 pub host_ip: String, 45 pub guest_mac: String, 46 pub l2_guest_mac1: String, 47 pub l2_guest_mac2: String, 48 pub l2_guest_mac3: String, 49 pub tcp_listener_port: u16, 50 } 51 52 pub const DEFAULT_TCP_LISTENER_MESSAGE: &str = "booted"; 53 pub const DEFAULT_TCP_LISTENER_PORT: u16 = 8000; 54 pub const DEFAULT_TCP_LISTENER_TIMEOUT: i32 = 80; 55 56 #[derive(Debug)] 57 pub enum WaitForBootError { 58 EpollWait(std::io::Error), 59 Listen(std::io::Error), 60 EpollWaitTimeout, 61 WrongGuestAddr, 62 Accept(std::io::Error), 63 } 64 65 impl GuestNetworkConfig { 66 pub fn wait_vm_boot(&self, custom_timeout: Option<i32>) -> Result<(), WaitForBootError> { 67 let start = std::time::Instant::now(); 68 // The 'port' is unique per 'GUEST' and listening to wild-card ip avoids retrying on 'TcpListener::bind()' 69 let listen_addr = format!("0.0.0.0:{}", self.tcp_listener_port); 70 let expected_guest_addr = self.guest_ip.as_str(); 71 let mut s = String::new(); 72 let timeout = match custom_timeout { 73 Some(t) => t, 74 None => DEFAULT_TCP_LISTENER_TIMEOUT, 75 }; 76 77 match (|| -> Result<(), WaitForBootError> { 78 let listener = 79 TcpListener::bind(&listen_addr.as_str()).map_err(WaitForBootError::Listen)?; 80 listener 81 .set_nonblocking(true) 82 .expect("Cannot set non-blocking for tcp listener"); 83 84 // Reply on epoll w/ timeout to wait for guest connections faithfully 85 let epoll_fd = epoll::create(true).expect("Cannot create epoll fd"); 86 epoll::ctl( 87 epoll_fd, 88 epoll::ControlOptions::EPOLL_CTL_ADD, 89 listener.as_raw_fd(), 90 epoll::Event::new(epoll::Events::EPOLLIN, 0), 91 ) 92 .expect("Cannot add 'tcp_listener' event to epoll"); 93 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); 1]; 94 loop { 95 let num_events = match epoll::wait(epoll_fd, timeout * 1000_i32, &mut events[..]) { 96 Ok(num_events) => Ok(num_events), 97 Err(e) => match e.raw_os_error() { 98 Some(libc::EAGAIN) | Some(libc::EINTR) => continue, 99 _ => Err(e), 100 }, 101 } 102 .map_err(WaitForBootError::EpollWait)?; 103 if num_events == 0 { 104 return Err(WaitForBootError::EpollWaitTimeout); 105 } 106 break; 107 } 108 109 match listener.accept() { 110 Ok((_, addr)) => { 111 // Make sure the connection is from the expected 'guest_addr' 112 if addr.ip() != std::net::IpAddr::from_str(expected_guest_addr).unwrap() { 113 s = format!( 114 "Expecting the guest ip '{}' while being connected with ip '{}'", 115 expected_guest_addr, 116 addr.ip() 117 ); 118 return Err(WaitForBootError::WrongGuestAddr); 119 } 120 121 Ok(()) 122 } 123 Err(e) => { 124 s = "TcpListener::accept() failed".to_string(); 125 Err(WaitForBootError::Accept(e)) 126 } 127 } 128 })() { 129 Err(e) => { 130 let duration = start.elapsed(); 131 eprintln!( 132 "\n\n==== Start 'wait_vm_boot' (FAILED) ====\n\n\ 133 duration =\"{:?}, timeout = {}s\"\n\ 134 listen_addr=\"{}\"\n\ 135 expected_guest_addr=\"{}\"\n\ 136 message=\"{}\"\n\ 137 error=\"{:?}\"\n\ 138 \n==== End 'wait_vm_boot' outout ====\n\n", 139 duration, timeout, listen_addr, expected_guest_addr, s, e 140 ); 141 142 Err(e) 143 } 144 Ok(_) => Ok(()), 145 } 146 } 147 } 148 149 pub enum DiskType { 150 OperatingSystem, 151 CloudInit, 152 } 153 154 pub trait DiskConfig { 155 fn prepare_files(&mut self, tmp_dir: &TempDir, network: &GuestNetworkConfig); 156 fn prepare_cloudinit(&self, tmp_dir: &TempDir, network: &GuestNetworkConfig) -> String; 157 fn disk(&self, disk_type: DiskType) -> Option<String>; 158 } 159 160 #[derive(Clone)] 161 pub struct UbuntuDiskConfig { 162 osdisk_path: String, 163 cloudinit_path: String, 164 image_name: String, 165 } 166 167 impl UbuntuDiskConfig { 168 pub fn new(image_name: String) -> Self { 169 UbuntuDiskConfig { 170 image_name, 171 osdisk_path: String::new(), 172 cloudinit_path: String::new(), 173 } 174 } 175 } 176 177 pub struct WindowsDiskConfig { 178 image_name: String, 179 osdisk_path: String, 180 loopback_device: String, 181 windows_snapshot_cow: String, 182 windows_snapshot: String, 183 } 184 185 impl WindowsDiskConfig { 186 pub fn new(image_name: String) -> Self { 187 WindowsDiskConfig { 188 image_name, 189 osdisk_path: String::new(), 190 loopback_device: String::new(), 191 windows_snapshot_cow: String::new(), 192 windows_snapshot: String::new(), 193 } 194 } 195 } 196 197 impl Drop for WindowsDiskConfig { 198 fn drop(&mut self) { 199 // dmsetup remove windows-snapshot-1 200 std::process::Command::new("dmsetup") 201 .arg("remove") 202 .arg(self.windows_snapshot.as_str()) 203 .output() 204 .expect("Expect removing Windows snapshot with 'dmsetup' to succeed"); 205 206 // dmsetup remove windows-snapshot-cow-1 207 std::process::Command::new("dmsetup") 208 .arg("remove") 209 .arg(self.windows_snapshot_cow.as_str()) 210 .output() 211 .expect("Expect removing Windows snapshot CoW with 'dmsetup' to succeed"); 212 213 // losetup -d <loopback_device> 214 std::process::Command::new("losetup") 215 .args(&["-d", self.loopback_device.as_str()]) 216 .output() 217 .expect("Expect removing loopback device to succeed"); 218 } 219 } 220 221 impl DiskConfig for UbuntuDiskConfig { 222 fn prepare_cloudinit(&self, tmp_dir: &TempDir, network: &GuestNetworkConfig) -> String { 223 let cloudinit_file_path = 224 String::from(tmp_dir.as_path().join("cloudinit").to_str().unwrap()); 225 226 let cloud_init_directory = tmp_dir.as_path().join("cloud-init").join("ubuntu"); 227 228 fs::create_dir_all(&cloud_init_directory) 229 .expect("Expect creating cloud-init directory to succeed"); 230 231 let source_file_dir = std::env::current_dir() 232 .unwrap() 233 .join("test_data") 234 .join("cloud-init") 235 .join("ubuntu"); 236 237 vec!["meta-data"].iter().for_each(|x| { 238 rate_limited_copy(source_file_dir.join(x), cloud_init_directory.join(x)) 239 .expect("Expect copying cloud-init meta-data to succeed"); 240 }); 241 242 let mut user_data_string = String::new(); 243 fs::File::open(source_file_dir.join("user-data")) 244 .unwrap() 245 .read_to_string(&mut user_data_string) 246 .expect("Expected reading user-data file in to succeed"); 247 user_data_string = user_data_string.replace( 248 "@DEFAULT_TCP_LISTENER_MESSAGE", 249 DEFAULT_TCP_LISTENER_MESSAGE, 250 ); 251 user_data_string = user_data_string.replace("@HOST_IP", &network.host_ip); 252 user_data_string = 253 user_data_string.replace("@TCP_LISTENER_PORT", &network.tcp_listener_port.to_string()); 254 255 fs::File::create(cloud_init_directory.join("user-data")) 256 .unwrap() 257 .write_all(user_data_string.as_bytes()) 258 .expect("Expected writing out user-data to succeed"); 259 260 let mut network_config_string = String::new(); 261 262 fs::File::open(source_file_dir.join("network-config")) 263 .unwrap() 264 .read_to_string(&mut network_config_string) 265 .expect("Expected reading network-config file in to succeed"); 266 267 network_config_string = network_config_string.replace("192.168.2.1", &network.host_ip); 268 network_config_string = network_config_string.replace("192.168.2.2", &network.guest_ip); 269 network_config_string = network_config_string.replace("192.168.2.3", &network.l2_guest_ip1); 270 network_config_string = network_config_string.replace("192.168.2.4", &network.l2_guest_ip2); 271 network_config_string = network_config_string.replace("192.168.2.5", &network.l2_guest_ip3); 272 network_config_string = 273 network_config_string.replace("12:34:56:78:90:ab", &network.guest_mac); 274 network_config_string = 275 network_config_string.replace("de:ad:be:ef:12:34", &network.l2_guest_mac1); 276 network_config_string = 277 network_config_string.replace("de:ad:be:ef:34:56", &network.l2_guest_mac2); 278 network_config_string = 279 network_config_string.replace("de:ad:be:ef:56:78", &network.l2_guest_mac3); 280 281 fs::File::create(cloud_init_directory.join("network-config")) 282 .unwrap() 283 .write_all(network_config_string.as_bytes()) 284 .expect("Expected writing out network-config to succeed"); 285 286 std::process::Command::new("mkdosfs") 287 .args(&["-n", "cidata"]) 288 .args(&["-C", cloudinit_file_path.as_str()]) 289 .arg("8192") 290 .output() 291 .expect("Expect creating disk image to succeed"); 292 293 vec!["user-data", "meta-data", "network-config"] 294 .iter() 295 .for_each(|x| { 296 std::process::Command::new("mcopy") 297 .arg("-o") 298 .args(&["-i", cloudinit_file_path.as_str()]) 299 .args(&["-s", cloud_init_directory.join(x).to_str().unwrap(), "::"]) 300 .output() 301 .expect("Expect copying files to disk image to succeed"); 302 }); 303 304 cloudinit_file_path 305 } 306 307 fn prepare_files(&mut self, tmp_dir: &TempDir, network: &GuestNetworkConfig) { 308 let mut workload_path = dirs::home_dir().unwrap(); 309 workload_path.push("workloads"); 310 311 let mut osdisk_base_path = workload_path; 312 osdisk_base_path.push(&self.image_name); 313 314 let osdisk_path = String::from(tmp_dir.as_path().join("osdisk.img").to_str().unwrap()); 315 let cloudinit_path = self.prepare_cloudinit(tmp_dir, network); 316 317 rate_limited_copy(osdisk_base_path, &osdisk_path) 318 .expect("copying of OS source disk image failed"); 319 320 self.cloudinit_path = cloudinit_path; 321 self.osdisk_path = osdisk_path; 322 } 323 324 fn disk(&self, disk_type: DiskType) -> Option<String> { 325 match disk_type { 326 DiskType::OperatingSystem => Some(self.osdisk_path.clone()), 327 DiskType::CloudInit => Some(self.cloudinit_path.clone()), 328 } 329 } 330 } 331 332 impl DiskConfig for WindowsDiskConfig { 333 fn prepare_cloudinit(&self, _tmp_dir: &TempDir, _network: &GuestNetworkConfig) -> String { 334 String::new() 335 } 336 337 fn prepare_files(&mut self, tmp_dir: &TempDir, _network: &GuestNetworkConfig) { 338 let mut workload_path = dirs::home_dir().unwrap(); 339 workload_path.push("workloads"); 340 341 let mut osdisk_path = workload_path; 342 osdisk_path.push(&self.image_name); 343 344 let osdisk_blk_size = fs::metadata(osdisk_path) 345 .expect("Expect retrieving Windows image metadata") 346 .len() 347 >> 9; 348 349 let snapshot_cow_path = 350 String::from(tmp_dir.as_path().join("snapshot_cow").to_str().unwrap()); 351 352 // Create and truncate CoW file for device mapper 353 let cow_file_size: u64 = 1 << 30; 354 let cow_file_blk_size = cow_file_size >> 9; 355 let cow_file = std::fs::File::create(snapshot_cow_path.as_str()) 356 .expect("Expect creating CoW image to succeed"); 357 cow_file 358 .set_len(cow_file_size) 359 .expect("Expect truncating CoW image to succeed"); 360 361 // losetup --find --show /tmp/snapshot_cow 362 let loopback_device = std::process::Command::new("losetup") 363 .arg("--find") 364 .arg("--show") 365 .arg(snapshot_cow_path.as_str()) 366 .output() 367 .expect("Expect creating loopback device from snapshot CoW image to succeed"); 368 369 self.loopback_device = String::from_utf8_lossy(&loopback_device.stdout) 370 .trim() 371 .to_string(); 372 373 let random_extension = tmp_dir.as_path().file_name().unwrap(); 374 let windows_snapshot_cow = format!( 375 "windows-snapshot-cow-{}", 376 random_extension.to_str().unwrap() 377 ); 378 379 // dmsetup create windows-snapshot-cow-1 --table '0 2097152 linear /dev/loop1 0' 380 std::process::Command::new("dmsetup") 381 .arg("create") 382 .arg(windows_snapshot_cow.as_str()) 383 .args(&[ 384 "--table", 385 format!("0 {} linear {} 0", cow_file_blk_size, self.loopback_device).as_str(), 386 ]) 387 .output() 388 .expect("Expect creating Windows snapshot CoW with 'dmsetup' to succeed"); 389 390 let windows_snapshot = format!("windows-snapshot-{}", random_extension.to_str().unwrap()); 391 392 // dmsetup mknodes 393 std::process::Command::new("dmsetup") 394 .arg("mknodes") 395 .output() 396 .expect("Expect device mapper nodes to be ready"); 397 398 // dmsetup create windows-snapshot-1 --table '0 41943040 snapshot /dev/mapper/windows-base /dev/mapper/windows-snapshot-cow-1 P 8' 399 std::process::Command::new("dmsetup") 400 .arg("create") 401 .arg(windows_snapshot.as_str()) 402 .args(&[ 403 "--table", 404 format!( 405 "0 {} snapshot /dev/mapper/windows-base /dev/mapper/{} P 8", 406 osdisk_blk_size, 407 windows_snapshot_cow.as_str() 408 ) 409 .as_str(), 410 ]) 411 .output() 412 .expect("Expect creating Windows snapshot with 'dmsetup' to succeed"); 413 414 // dmsetup mknodes 415 std::process::Command::new("dmsetup") 416 .arg("mknodes") 417 .output() 418 .expect("Expect device mapper nodes to be ready"); 419 420 self.osdisk_path = format!("/dev/mapper/{}", windows_snapshot); 421 self.windows_snapshot_cow = windows_snapshot_cow; 422 self.windows_snapshot = windows_snapshot; 423 } 424 425 fn disk(&self, disk_type: DiskType) -> Option<String> { 426 match disk_type { 427 DiskType::OperatingSystem => Some(self.osdisk_path.clone()), 428 DiskType::CloudInit => None, 429 } 430 } 431 } 432 433 pub fn rate_limited_copy<P: AsRef<Path>, Q: AsRef<Path>>(from: P, to: Q) -> io::Result<u64> { 434 for i in 0..10 { 435 let free_bytes = unsafe { 436 let mut stats = std::mem::MaybeUninit::zeroed(); 437 let fs_name = std::ffi::CString::new("/tmp").unwrap(); 438 libc::statvfs(fs_name.as_ptr(), stats.as_mut_ptr()); 439 440 let free_blocks = stats.assume_init().f_bfree; 441 let block_size = stats.assume_init().f_bsize; 442 443 free_blocks * block_size 444 }; 445 446 // Make sure there is at least 6 GiB of space 447 if free_bytes < 6 << 30 { 448 eprintln!( 449 "Not enough space on disk ({}). Attempt {} of 10. Sleeping.", 450 free_bytes, i 451 ); 452 thread::sleep(std::time::Duration::new(60, 0)); 453 continue; 454 } 455 456 match fs::copy(&from, &to) { 457 Err(e) => { 458 if let Some(errno) = e.raw_os_error() { 459 if errno == libc::ENOSPC { 460 eprintln!("Copy returned ENOSPC. Attempt {} of 10. Sleeping.", i); 461 thread::sleep(std::time::Duration::new(60, 0)); 462 continue; 463 } 464 } 465 return Err(e); 466 } 467 Ok(i) => return Ok(i), 468 } 469 } 470 Err(io::Error::last_os_error()) 471 } 472 473 pub fn handle_child_output( 474 r: Result<(), std::boxed::Box<dyn std::any::Any + std::marker::Send>>, 475 output: &std::process::Output, 476 ) { 477 use std::os::unix::process::ExitStatusExt; 478 if r.is_ok() && output.status.success() { 479 return; 480 } 481 482 match output.status.code() { 483 None => { 484 // Don't treat child.kill() as a problem 485 if output.status.signal() == Some(9) && r.is_ok() { 486 return; 487 } 488 489 eprintln!( 490 "==== child killed by signal: {} ====", 491 output.status.signal().unwrap() 492 ); 493 } 494 Some(code) => { 495 eprintln!("\n\n==== child exit code: {} ====", code); 496 } 497 } 498 499 eprintln!( 500 "\n\n==== Start child stdout ====\n\n{}\n\n==== End child stdout ====", 501 String::from_utf8_lossy(&output.stdout) 502 ); 503 eprintln!( 504 "\n\n==== Start child stderr ====\n\n{}\n\n==== End child stderr ====", 505 String::from_utf8_lossy(&output.stderr) 506 ); 507 508 panic!("Test failed") 509 } 510 511 #[derive(Debug)] 512 pub struct PasswordAuth { 513 pub username: String, 514 pub password: String, 515 } 516 517 pub const DEFAULT_SSH_RETRIES: u8 = 6; 518 pub const DEFAULT_SSH_TIMEOUT: u8 = 10; 519 520 #[derive(Debug)] 521 pub enum SshCommandError { 522 Connection(std::io::Error), 523 Handshake(ssh2::Error), 524 Authentication(ssh2::Error), 525 ChannelSession(ssh2::Error), 526 Command(ssh2::Error), 527 ExitStatus(ssh2::Error), 528 NonZeroExitStatus(i32), 529 FileRead(std::io::Error), 530 FileMetadata(std::io::Error), 531 ScpSend(ssh2::Error), 532 WriteAll(std::io::Error), 533 SendEof(ssh2::Error), 534 WaitEof(ssh2::Error), 535 } 536 537 fn scp_to_guest_with_auth( 538 path: &Path, 539 remote_path: &Path, 540 auth: &PasswordAuth, 541 ip: &str, 542 retries: u8, 543 timeout: u8, 544 ) -> Result<(), SshCommandError> { 545 let mut counter = 0; 546 loop { 547 match (|| -> Result<(), SshCommandError> { 548 let tcp = 549 TcpStream::connect(format!("{}:22", ip)).map_err(SshCommandError::Connection)?; 550 let mut sess = Session::new().unwrap(); 551 sess.set_tcp_stream(tcp); 552 sess.handshake().map_err(SshCommandError::Handshake)?; 553 554 sess.userauth_password(&auth.username, &auth.password) 555 .map_err(SshCommandError::Authentication)?; 556 assert!(sess.authenticated()); 557 558 let content = fs::read(path).map_err(SshCommandError::FileRead)?; 559 let mode = fs::metadata(path) 560 .map_err(SshCommandError::FileMetadata)? 561 .permissions() 562 .mode() 563 & 0o777; 564 565 let mut channel = sess 566 .scp_send(remote_path, mode as i32, content.len() as u64, None) 567 .map_err(SshCommandError::ScpSend)?; 568 channel 569 .write_all(&content) 570 .map_err(SshCommandError::WriteAll)?; 571 channel.send_eof().map_err(SshCommandError::SendEof)?; 572 channel.wait_eof().map_err(SshCommandError::WaitEof)?; 573 574 // Intentionally ignore these results here as their failure 575 // does not precipitate a repeat 576 let _ = channel.close(); 577 let _ = channel.wait_close(); 578 579 Ok(()) 580 })() { 581 Ok(_) => break, 582 Err(e) => { 583 counter += 1; 584 if counter >= retries { 585 eprintln!( 586 "\n\n==== Start scp command output (FAILED) ====\n\n\ 587 path =\"{:?}\"\n\ 588 remote_path =\"{:?}\"\n\ 589 auth=\"{:#?}\"\n\ 590 ip=\"{}\"\n\ 591 error=\"{:?}\"\n\ 592 \n==== End scp command outout ====\n\n", 593 path, remote_path, auth, ip, e 594 ); 595 596 return Err(e); 597 } 598 } 599 }; 600 thread::sleep(std::time::Duration::new((timeout * counter).into(), 0)); 601 } 602 Ok(()) 603 } 604 605 pub fn scp_to_guest( 606 path: &Path, 607 remote_path: &Path, 608 ip: &str, 609 retries: u8, 610 timeout: u8, 611 ) -> Result<(), SshCommandError> { 612 scp_to_guest_with_auth( 613 path, 614 remote_path, 615 &PasswordAuth { 616 username: String::from("cloud"), 617 password: String::from("cloud123"), 618 }, 619 ip, 620 retries, 621 timeout, 622 ) 623 } 624 625 pub fn ssh_command_ip_with_auth( 626 command: &str, 627 auth: &PasswordAuth, 628 ip: &str, 629 retries: u8, 630 timeout: u8, 631 ) -> Result<String, SshCommandError> { 632 let mut s = String::new(); 633 634 let mut counter = 0; 635 loop { 636 match (|| -> Result<(), SshCommandError> { 637 let tcp = 638 TcpStream::connect(format!("{}:22", ip)).map_err(SshCommandError::Connection)?; 639 let mut sess = Session::new().unwrap(); 640 sess.set_tcp_stream(tcp); 641 sess.handshake().map_err(SshCommandError::Handshake)?; 642 643 sess.userauth_password(&auth.username, &auth.password) 644 .map_err(SshCommandError::Authentication)?; 645 assert!(sess.authenticated()); 646 647 let mut channel = sess 648 .channel_session() 649 .map_err(SshCommandError::ChannelSession)?; 650 channel.exec(command).map_err(SshCommandError::Command)?; 651 652 // Intentionally ignore these results here as their failure 653 // does not precipitate a repeat 654 let _ = channel.read_to_string(&mut s); 655 let _ = channel.close(); 656 let _ = channel.wait_close(); 657 658 let status = channel.exit_status().map_err(SshCommandError::ExitStatus)?; 659 660 if status != 0 { 661 Err(SshCommandError::NonZeroExitStatus(status)) 662 } else { 663 Ok(()) 664 } 665 })() { 666 Ok(_) => break, 667 Err(e) => { 668 counter += 1; 669 if counter >= retries { 670 eprintln!( 671 "\n\n==== Start ssh command output (FAILED) ====\n\n\ 672 command=\"{}\"\n\ 673 auth=\"{:#?}\"\n\ 674 ip=\"{}\"\n\ 675 output=\"{}\"\n\ 676 error=\"{:?}\"\n\ 677 \n==== End ssh command outout ====\n\n", 678 command, auth, ip, s, e 679 ); 680 681 return Err(e); 682 } 683 } 684 }; 685 thread::sleep(std::time::Duration::new((timeout * counter).into(), 0)); 686 } 687 Ok(s) 688 } 689 690 pub fn ssh_command_ip( 691 command: &str, 692 ip: &str, 693 retries: u8, 694 timeout: u8, 695 ) -> Result<String, SshCommandError> { 696 ssh_command_ip_with_auth( 697 command, 698 &PasswordAuth { 699 username: String::from("cloud"), 700 password: String::from("cloud123"), 701 }, 702 ip, 703 retries, 704 timeout, 705 ) 706 } 707 708 pub fn exec_host_command_status(command: &str) -> ExitStatus { 709 std::process::Command::new("bash") 710 .args(&["-c", command]) 711 .status() 712 .unwrap_or_else(|_| panic!("Expected '{}' to run", command)) 713 } 714 715 pub fn exec_host_command_output(command: &str) -> Output { 716 std::process::Command::new("bash") 717 .args(&["-c", command]) 718 .output() 719 .unwrap_or_else(|_| panic!("Expected '{}' to run", command)) 720 } 721 722 pub const PIPE_SIZE: i32 = 32 << 20; 723 724 lazy_static! { 725 static ref NEXT_VM_ID: Mutex<u8> = Mutex::new(1); 726 } 727 728 pub struct Guest { 729 pub tmp_dir: TempDir, 730 pub disk_config: Box<dyn DiskConfig>, 731 pub network: GuestNetworkConfig, 732 } 733 734 // Safe to implement as we know we have no interior mutability 735 impl std::panic::RefUnwindSafe for Guest {} 736 737 impl Guest { 738 pub fn new_from_ip_range(mut disk_config: Box<dyn DiskConfig>, class: &str, id: u8) -> Self { 739 let tmp_dir = TempDir::new_with_prefix("/tmp/ch").unwrap(); 740 741 let network = GuestNetworkConfig { 742 guest_ip: format!("{}.{}.2", class, id), 743 l2_guest_ip1: format!("{}.{}.3", class, id), 744 l2_guest_ip2: format!("{}.{}.4", class, id), 745 l2_guest_ip3: format!("{}.{}.5", class, id), 746 host_ip: format!("{}.{}.1", class, id), 747 guest_mac: format!("12:34:56:78:90:{:02x}", id), 748 l2_guest_mac1: format!("de:ad:be:ef:12:{:02x}", id), 749 l2_guest_mac2: format!("de:ad:be:ef:34:{:02x}", id), 750 l2_guest_mac3: format!("de:ad:be:ef:56:{:02x}", id), 751 tcp_listener_port: DEFAULT_TCP_LISTENER_PORT + id as u16, 752 }; 753 754 disk_config.prepare_files(&tmp_dir, &network); 755 756 Guest { 757 tmp_dir, 758 disk_config, 759 network, 760 } 761 } 762 763 pub fn new(disk_config: Box<dyn DiskConfig>) -> Self { 764 let mut guard = NEXT_VM_ID.lock().unwrap(); 765 let id = *guard; 766 *guard = id + 1; 767 768 Self::new_from_ip_range(disk_config, "192.168", id) 769 } 770 771 pub fn default_net_string(&self) -> String { 772 format!( 773 "tap=,mac={},ip={},mask=255.255.255.0", 774 self.network.guest_mac, self.network.host_ip 775 ) 776 } 777 778 pub fn default_net_string_w_iommu(&self) -> String { 779 format!( 780 "tap=,mac={},ip={},mask=255.255.255.0,iommu=on", 781 self.network.guest_mac, self.network.host_ip 782 ) 783 } 784 785 pub fn ssh_command(&self, command: &str) -> Result<String, SshCommandError> { 786 ssh_command_ip( 787 command, 788 &self.network.guest_ip, 789 DEFAULT_SSH_RETRIES, 790 DEFAULT_SSH_TIMEOUT, 791 ) 792 } 793 794 #[cfg(target_arch = "x86_64")] 795 pub fn ssh_command_l1(&self, command: &str) -> Result<String, SshCommandError> { 796 ssh_command_ip( 797 command, 798 &self.network.guest_ip, 799 DEFAULT_SSH_RETRIES, 800 DEFAULT_SSH_TIMEOUT, 801 ) 802 } 803 804 #[cfg(target_arch = "x86_64")] 805 pub fn ssh_command_l2_1(&self, command: &str) -> Result<String, SshCommandError> { 806 ssh_command_ip( 807 command, 808 &self.network.l2_guest_ip1, 809 DEFAULT_SSH_RETRIES, 810 DEFAULT_SSH_TIMEOUT, 811 ) 812 } 813 814 #[cfg(target_arch = "x86_64")] 815 pub fn ssh_command_l2_2(&self, command: &str) -> Result<String, SshCommandError> { 816 ssh_command_ip( 817 command, 818 &self.network.l2_guest_ip2, 819 DEFAULT_SSH_RETRIES, 820 DEFAULT_SSH_TIMEOUT, 821 ) 822 } 823 824 #[cfg(target_arch = "x86_64")] 825 pub fn ssh_command_l2_3(&self, command: &str) -> Result<String, SshCommandError> { 826 ssh_command_ip( 827 command, 828 &self.network.l2_guest_ip3, 829 DEFAULT_SSH_RETRIES, 830 DEFAULT_SSH_TIMEOUT, 831 ) 832 } 833 834 pub fn api_create_body( 835 &self, 836 cpu_count: u8, 837 _fw_path: &str, 838 _kernel_path: &str, 839 _kernel_cmd: &str, 840 ) -> String { 841 #[cfg(all(target_arch = "x86_64", not(feature = "mshv")))] 842 format! {"{{\"cpus\":{{\"boot_vcpus\":{},\"max_vcpus\":{}}},\"kernel\":{{\"path\":\"{}\"}},\"cmdline\":{{\"args\": \"\"}},\"net\":[{{\"ip\":\"{}\", \"mask\":\"255.255.255.0\", \"mac\":\"{}\"}}], \"disks\":[{{\"path\":\"{}\"}}, {{\"path\":\"{}\"}}]}}", 843 cpu_count, 844 cpu_count, 845 _fw_path, 846 self.network.host_ip, 847 self.network.guest_mac, 848 self.disk_config.disk(DiskType::OperatingSystem).unwrap().as_str(), 849 self.disk_config.disk(DiskType::CloudInit).unwrap().as_str(), 850 } 851 852 #[cfg(any(target_arch = "aarch64", feature = "mshv"))] 853 format! {"{{\"cpus\":{{\"boot_vcpus\":{},\"max_vcpus\":{}}},\"kernel\":{{\"path\":\"{}\"}},\"cmdline\":{{\"args\": \"{}\"}},\"net\":[{{\"ip\":\"{}\", \"mask\":\"255.255.255.0\", \"mac\":\"{}\"}}], \"disks\":[{{\"path\":\"{}\"}}, {{\"path\":\"{}\"}}]}}", 854 cpu_count, 855 cpu_count, 856 _kernel_path, 857 _kernel_cmd, 858 self.network.host_ip, 859 self.network.guest_mac, 860 self.disk_config.disk(DiskType::OperatingSystem).unwrap().as_str(), 861 self.disk_config.disk(DiskType::CloudInit).unwrap().as_str(), 862 } 863 } 864 865 pub fn get_cpu_count(&self) -> Result<u32, Error> { 866 self.ssh_command("grep -c processor /proc/cpuinfo")? 867 .trim() 868 .parse() 869 .map_err(Error::Parsing) 870 } 871 872 #[cfg(target_arch = "x86_64")] 873 pub fn get_initial_apicid(&self) -> Result<u32, Error> { 874 self.ssh_command("grep \"initial apicid\" /proc/cpuinfo | grep -o \"[0-9]*\"")? 875 .trim() 876 .parse() 877 .map_err(Error::Parsing) 878 } 879 880 pub fn get_total_memory(&self) -> Result<u32, Error> { 881 self.ssh_command("grep MemTotal /proc/meminfo | grep -o \"[0-9]*\"")? 882 .trim() 883 .parse() 884 .map_err(Error::Parsing) 885 } 886 887 #[cfg(target_arch = "x86_64")] 888 pub fn get_total_memory_l2(&self) -> Result<u32, Error> { 889 self.ssh_command_l2_1("grep MemTotal /proc/meminfo | grep -o \"[0-9]*\"")? 890 .trim() 891 .parse() 892 .map_err(Error::Parsing) 893 } 894 895 pub fn get_numa_node_memory(&self, node_id: usize) -> Result<u32, Error> { 896 self.ssh_command( 897 format!( 898 "grep MemTotal /sys/devices/system/node/node{}/meminfo \ 899 | cut -d \":\" -f 2 | grep -o \"[0-9]*\"", 900 node_id 901 ) 902 .as_str(), 903 )? 904 .trim() 905 .parse() 906 .map_err(Error::Parsing) 907 } 908 909 pub fn wait_vm_boot(&self, custom_timeout: Option<i32>) -> Result<(), Error> { 910 self.network 911 .wait_vm_boot(custom_timeout) 912 .map_err(Error::WaitForBoot) 913 } 914 915 pub fn check_numa_node_cpus(&self, node_id: usize, cpus: Vec<usize>) -> Result<(), Error> { 916 for cpu in cpus.iter() { 917 let cmd = format!( 918 "[ -d \"/sys/devices/system/node/node{}/cpu{}\" ]", 919 node_id, cpu 920 ); 921 self.ssh_command(cmd.as_str())?; 922 } 923 924 Ok(()) 925 } 926 927 pub fn check_numa_node_distances( 928 &self, 929 node_id: usize, 930 distances: &str, 931 ) -> Result<bool, Error> { 932 let cmd = format!("cat /sys/devices/system/node/node{}/distance", node_id); 933 if self.ssh_command(cmd.as_str())?.trim() == distances { 934 Ok(true) 935 } else { 936 Ok(false) 937 } 938 } 939 940 pub fn check_numa_common( 941 &self, 942 mem_ref: Option<&[u32]>, 943 node_ref: Option<&[Vec<usize>]>, 944 distance_ref: Option<&[&str]>, 945 ) { 946 if let Some(mem_ref) = mem_ref { 947 // Check each NUMA node has been assigned the right amount of 948 // memory. 949 for (i, &m) in mem_ref.iter().enumerate() { 950 assert!(self.get_numa_node_memory(i).unwrap_or_default() > m); 951 } 952 } 953 954 if let Some(node_ref) = node_ref { 955 // Check each NUMA node has been assigned the right CPUs set. 956 for (i, n) in node_ref.iter().enumerate() { 957 self.check_numa_node_cpus(i, n.clone()).unwrap(); 958 } 959 } 960 961 if let Some(distance_ref) = distance_ref { 962 // Check each NUMA node has been assigned the right distances. 963 for (i, &d) in distance_ref.iter().enumerate() { 964 assert!(self.check_numa_node_distances(i, d).unwrap()); 965 } 966 } 967 } 968 969 #[cfg(target_arch = "x86_64")] 970 pub fn check_sgx_support(&self) -> Result<(), Error> { 971 self.ssh_command( 972 "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX: \ 973 Software Guard Extensions supported = true'", 974 )?; 975 self.ssh_command( 976 "cpuid -l 0x7 -s 0 | tr -s [:space:] | grep -q 'SGX_LC: \ 977 SGX launch config supported = true'", 978 )?; 979 self.ssh_command( 980 "cpuid -l 0x12 -s 0 | tr -s [:space:] | grep -q 'SGX1 \ 981 supported = true'", 982 )?; 983 984 Ok(()) 985 } 986 987 pub fn get_entropy(&self) -> Result<u32, Error> { 988 self.ssh_command("cat /proc/sys/kernel/random/entropy_avail")? 989 .trim() 990 .parse() 991 .map_err(Error::Parsing) 992 } 993 994 pub fn get_pci_bridge_class(&self) -> Result<String, Error> { 995 Ok(self 996 .ssh_command("cat /sys/bus/pci/devices/0000:00:00.0/class")? 997 .trim() 998 .to_string()) 999 } 1000 1001 pub fn get_pci_device_ids(&self) -> Result<String, Error> { 1002 Ok(self 1003 .ssh_command("cat /sys/bus/pci/devices/*/device")? 1004 .trim() 1005 .to_string()) 1006 } 1007 1008 pub fn get_pci_vendor_ids(&self) -> Result<String, Error> { 1009 Ok(self 1010 .ssh_command("cat /sys/bus/pci/devices/*/vendor")? 1011 .trim() 1012 .to_string()) 1013 } 1014 1015 pub fn does_device_vendor_pair_match( 1016 &self, 1017 device_id: &str, 1018 vendor_id: &str, 1019 ) -> Result<bool, Error> { 1020 // We are checking if console device's device id and vendor id pair matches 1021 let devices = self.get_pci_device_ids()?; 1022 let devices: Vec<&str> = devices.split('\n').collect(); 1023 let vendors = self.get_pci_vendor_ids()?; 1024 let vendors: Vec<&str> = vendors.split('\n').collect(); 1025 1026 for (index, d_id) in devices.iter().enumerate() { 1027 if *d_id == device_id { 1028 if let Some(v_id) = vendors.get(index) { 1029 if *v_id == vendor_id { 1030 return Ok(true); 1031 } 1032 } 1033 } 1034 } 1035 1036 Ok(false) 1037 } 1038 1039 pub fn valid_virtio_fs_cache_size( 1040 &self, 1041 dax: bool, 1042 cache_size: Option<u64>, 1043 ) -> Result<bool, Error> { 1044 // SHM region is called different things depending on kernel 1045 let shm_region = self 1046 .ssh_command("sudo grep 'virtio[0-9]\\|virtio-pci-shm' /proc/iomem || true")? 1047 .trim() 1048 .to_string(); 1049 1050 if shm_region.is_empty() { 1051 return Ok(!dax); 1052 } 1053 1054 // From this point, the region is not empty, hence it is an error 1055 // if DAX is off. 1056 if !dax { 1057 return Ok(false); 1058 } 1059 1060 let cache = if let Some(cache) = cache_size { 1061 cache 1062 } else { 1063 // 8Gib by default 1064 0x0002_0000_0000 1065 }; 1066 1067 let args: Vec<&str> = shm_region.split(':').collect(); 1068 if args.is_empty() { 1069 return Ok(false); 1070 } 1071 1072 let args: Vec<&str> = args[0].trim().split('-').collect(); 1073 if args.len() != 2 { 1074 return Ok(false); 1075 } 1076 1077 let start_addr = u64::from_str_radix(args[0], 16).map_err(Error::Parsing)?; 1078 let end_addr = u64::from_str_radix(args[1], 16).map_err(Error::Parsing)?; 1079 1080 Ok(cache == (end_addr - start_addr + 1)) 1081 } 1082 1083 pub fn check_vsock(&self, socket: &str) { 1084 // Listen from guest on vsock CID=3 PORT=16 1085 // SOCKET-LISTEN:<domain>:<protocol>:<local-address> 1086 let guest_ip = self.network.guest_ip.clone(); 1087 let listen_socat = thread::spawn(move || { 1088 ssh_command_ip("sudo socat - SOCKET-LISTEN:40:0:x00x00x10x00x00x00x03x00x00x00x00x00x00x00 > vsock_log", &guest_ip, DEFAULT_SSH_RETRIES, DEFAULT_SSH_TIMEOUT).unwrap(); 1089 }); 1090 1091 // Make sure socat is listening, which might take a few second on slow systems 1092 thread::sleep(std::time::Duration::new(10, 0)); 1093 1094 // Write something to vsock from the host 1095 assert!(exec_host_command_status(&format!( 1096 "echo -e \"CONNECT 16\\nHelloWorld!\" | socat - UNIX-CONNECT:{}", 1097 socket 1098 )) 1099 .success()); 1100 1101 // Wait for the thread to terminate. 1102 listen_socat.join().unwrap(); 1103 1104 assert_eq!( 1105 self.ssh_command("cat vsock_log").unwrap().trim(), 1106 "HelloWorld!" 1107 ); 1108 } 1109 1110 #[cfg(target_arch = "x86_64")] 1111 pub fn check_nvidia_gpu(&self) { 1112 // Run CUDA sample to validate it can find the device 1113 let device_query_result = self 1114 .ssh_command("sudo /root/NVIDIA_CUDA-11.3_Samples/bin/x86_64/linux/release/deviceQuery") 1115 .unwrap(); 1116 assert!(device_query_result.contains("Detected 1 CUDA Capable device")); 1117 assert!(device_query_result.contains("Device 0: \"NVIDIA Tesla T4\"")); 1118 assert!(device_query_result.contains("Result = PASS")); 1119 1120 // Run NVIDIA DCGM Diagnostics to validate the device is functional 1121 self.ssh_command("sudo nv-hostengine").unwrap(); 1122 1123 assert!(self 1124 .ssh_command("sudo dcgmi discovery -l") 1125 .unwrap() 1126 .contains("Name: NVIDIA Tesla T4")); 1127 assert_eq!( 1128 self.ssh_command("sudo dcgmi diag -r 'diagnostic' | grep Pass | wc -l") 1129 .unwrap() 1130 .trim(), 1131 "10" 1132 ); 1133 } 1134 1135 pub fn reboot_linux(&self, current_reboot_count: u32, custom_timeout: Option<i32>) { 1136 let list_boots_cmd = "sudo journalctl --list-boots | wc -l"; 1137 let boot_count = self 1138 .ssh_command(list_boots_cmd) 1139 .unwrap() 1140 .trim() 1141 .parse::<u32>() 1142 .unwrap_or_default(); 1143 1144 assert_eq!(boot_count, current_reboot_count + 1); 1145 self.ssh_command("sudo reboot").unwrap(); 1146 1147 self.wait_vm_boot(custom_timeout).unwrap(); 1148 let boot_count = self 1149 .ssh_command(list_boots_cmd) 1150 .unwrap() 1151 .trim() 1152 .parse::<u32>() 1153 .unwrap_or_default(); 1154 assert_eq!(boot_count, current_reboot_count + 2); 1155 } 1156 1157 pub fn enable_memory_hotplug(&self) { 1158 self.ssh_command("echo online | sudo tee /sys/devices/system/memory/auto_online_blocks") 1159 .unwrap(); 1160 } 1161 1162 pub fn check_devices_common(&self, socket: Option<&String>, console_text: Option<&String>) { 1163 // Check block devices are readable 1164 self.ssh_command("sudo dd if=/dev/vda of=/dev/null bs=1M iflag=direct count=1024") 1165 .unwrap(); 1166 self.ssh_command("sudo dd if=/dev/vdb of=/dev/null bs=1M iflag=direct count=8") 1167 .unwrap(); 1168 // Check if the rng device is readable 1169 self.ssh_command("sudo head -c 1000 /dev/hwrng > /dev/null") 1170 .unwrap(); 1171 // Check vsock 1172 if let Some(socket) = socket { 1173 self.check_vsock(socket.as_str()); 1174 } 1175 // Check if the console is usable 1176 if let Some(console_text) = console_text { 1177 let console_cmd = format!("echo {} | sudo tee /dev/hvc0", console_text); 1178 self.ssh_command(&console_cmd).unwrap(); 1179 } 1180 // The net device is 'automatically' exercised through the above 'ssh' commands 1181 } 1182 } 1183 1184 pub struct GuestCommand<'a> { 1185 command: Command, 1186 guest: &'a Guest, 1187 capture_output: bool, 1188 print_cmd: bool, 1189 } 1190 1191 impl<'a> GuestCommand<'a> { 1192 pub fn new(guest: &'a Guest) -> Self { 1193 Self::new_with_binary_name(guest, "cloud-hypervisor") 1194 } 1195 1196 pub fn new_with_binary_name(guest: &'a Guest, binary_name: &str) -> Self { 1197 Self { 1198 command: Command::new(clh_command(binary_name)), 1199 guest, 1200 capture_output: false, 1201 print_cmd: true, 1202 } 1203 } 1204 1205 pub fn capture_output(&mut self) -> &mut Self { 1206 self.capture_output = true; 1207 self 1208 } 1209 1210 pub fn set_print_cmd(&mut self, print_cmd: bool) -> &mut Self { 1211 self.print_cmd = print_cmd; 1212 self 1213 } 1214 1215 pub fn spawn(&mut self) -> io::Result<Child> { 1216 if self.print_cmd { 1217 println!( 1218 "\n\n==== Start cloud-hypervisor command-line ====\n\n\ 1219 {:?}\n\ 1220 \n==== End cloud-hypervisor command-line ====\n\n", 1221 self.command 1222 ); 1223 } 1224 1225 if self.capture_output { 1226 let child = self 1227 .command 1228 .arg("-v") 1229 .stderr(Stdio::piped()) 1230 .stdout(Stdio::piped()) 1231 .spawn() 1232 .unwrap(); 1233 1234 let fd = child.stdout.as_ref().unwrap().as_raw_fd(); 1235 let pipesize = unsafe { libc::fcntl(fd, libc::F_SETPIPE_SZ, PIPE_SIZE) }; 1236 let fd = child.stderr.as_ref().unwrap().as_raw_fd(); 1237 let pipesize1 = unsafe { libc::fcntl(fd, libc::F_SETPIPE_SZ, PIPE_SIZE) }; 1238 1239 if pipesize >= PIPE_SIZE && pipesize1 >= PIPE_SIZE { 1240 Ok(child) 1241 } else { 1242 Err(std::io::Error::new( 1243 std::io::ErrorKind::Other, 1244 "resizing pipe w/ 'fnctl' failed!", 1245 )) 1246 } 1247 } else { 1248 self.command.arg("-v").spawn() 1249 } 1250 } 1251 1252 pub fn args<I, S>(&mut self, args: I) -> &mut Self 1253 where 1254 I: IntoIterator<Item = S>, 1255 S: AsRef<OsStr>, 1256 { 1257 self.command.args(args); 1258 self 1259 } 1260 1261 pub fn default_disks(&mut self) -> &mut Self { 1262 if self.guest.disk_config.disk(DiskType::CloudInit).is_some() { 1263 self.args(&[ 1264 "--disk", 1265 format!( 1266 "path={}", 1267 self.guest 1268 .disk_config 1269 .disk(DiskType::OperatingSystem) 1270 .unwrap() 1271 ) 1272 .as_str(), 1273 format!( 1274 "path={}", 1275 self.guest.disk_config.disk(DiskType::CloudInit).unwrap() 1276 ) 1277 .as_str(), 1278 ]) 1279 } else { 1280 self.args(&[ 1281 "--disk", 1282 format!( 1283 "path={}", 1284 self.guest 1285 .disk_config 1286 .disk(DiskType::OperatingSystem) 1287 .unwrap() 1288 ) 1289 .as_str(), 1290 ]) 1291 } 1292 } 1293 1294 pub fn default_net(&mut self) -> &mut Self { 1295 self.args(&["--net", self.guest.default_net_string().as_str()]) 1296 } 1297 } 1298 1299 pub fn clh_command(cmd: &str) -> String { 1300 env::var("BUILD_TARGET").map_or( 1301 format!("target/x86_64-unknown-linux-gnu/release/{}", cmd), 1302 |target| format!("target/{}/release/{}", target, cmd), 1303 ) 1304 } 1305