xref: /cloud-hypervisor/virtio-devices/src/balloon.rs (revision 9af2968a7dc47b89bf07ea9dc5e735084efcfa3a)
1 // Copyright (c) 2020 Ant Financial
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use super::{
16     ActivateError, ActivateResult, EpollHelper, EpollHelperError, EpollHelperHandler, Queue,
17     VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
18 };
19 use crate::seccomp_filters::{get_seccomp_filter, Thread};
20 use crate::GuestMemoryMmap;
21 use crate::{VirtioInterrupt, VirtioInterruptType};
22 use libc::EFD_NONBLOCK;
23 use seccomp::{SeccompAction, SeccompFilter};
24 use std::io;
25 use std::mem::size_of;
26 use std::os::unix::io::AsRawFd;
27 use std::result;
28 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
29 use std::sync::mpsc;
30 use std::sync::{Arc, Barrier, Mutex};
31 use std::thread;
32 use vm_memory::GuestMemory;
33 use vm_memory::{
34     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
35     GuestMemoryError,
36 };
37 use vm_migration::{Migratable, MigratableError, Pausable, Snapshottable, Transportable};
38 use vmm_sys_util::eventfd::EventFd;
39 
40 const QUEUE_SIZE: u16 = 128;
41 const NUM_QUEUES: usize = 2;
42 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE; NUM_QUEUES];
43 
44 // Get resize event.
45 const RESIZE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
46 // New descriptors are pending on the virtio queue.
47 const INFLATE_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
48 // New descriptors are pending on the virtio queue.
49 const DEFLATE_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3;
50 
51 // Size of a PFN in the balloon interface.
52 const VIRTIO_BALLOON_PFN_SHIFT: u64 = 12;
53 
54 // Deflate balloon on OOM
55 const VIRTIO_BALLOON_F_DEFLATE_ON_OOM: u64 = 2;
56 
57 #[derive(Debug)]
58 pub enum Error {
59     // Guest gave us bad memory addresses.
60     GuestMemory(GuestMemoryError),
61     // Guest gave us a write only descriptor that protocol says to read from.
62     UnexpectedWriteOnlyDescriptor,
63     // Guest sent us invalid request.
64     InvalidRequest,
65     // Fallocate fail.
66     FallocateFail(std::io::Error),
67     // Madvise fail.
68     MadviseFail(std::io::Error),
69     // Failed to EventFd write.
70     EventFdWriteFail(std::io::Error),
71     // Failed to EventFd try_clone.
72     EventFdTryCloneFail(std::io::Error),
73     // Failed to MpscRecv.
74     MpscRecvFail(mpsc::RecvError),
75     // Resize invalid argument
76     ResizeInval(String),
77     // process_queue got wrong ev_type
78     ProcessQueueWrongEvType(u16),
79     // Fail tp signal
80     FailedSignal(io::Error),
81 }
82 
83 // Got from include/uapi/linux/virtio_balloon.h
84 #[repr(C)]
85 #[derive(Copy, Clone, Debug, Default)]
86 struct VirtioBalloonConfig {
87     // Number of pages host wants Guest to give up.
88     num_pages: u32,
89     // Number of pages we've actually got in balloon.
90     actual: u32,
91 }
92 
93 const CONFIG_ACTUAL_OFFSET: u64 = 4;
94 const CONFIG_ACTUAL_SIZE: usize = 4;
95 
96 // Safe because it only has data and has no implicit padding.
97 unsafe impl ByteValued for VirtioBalloonConfig {}
98 
99 struct VirtioBalloonResizeReceiver {
100     size: Arc<AtomicU64>,
101     tx: mpsc::Sender<Result<(), Error>>,
102     evt: EventFd,
103 }
104 
105 impl VirtioBalloonResizeReceiver {
106     fn get_size(&self) -> u64 {
107         self.size.load(Ordering::Acquire)
108     }
109 
110     fn send(&self, r: Result<(), Error>) -> Result<(), mpsc::SendError<Result<(), Error>>> {
111         self.tx.send(r)
112     }
113 }
114 
115 struct VirtioBalloonResize {
116     size: Arc<AtomicU64>,
117     tx: mpsc::Sender<Result<(), Error>>,
118     rx: mpsc::Receiver<Result<(), Error>>,
119     evt: EventFd,
120 }
121 
122 impl VirtioBalloonResize {
123     pub fn new() -> io::Result<Self> {
124         let (tx, rx) = mpsc::channel();
125 
126         Ok(Self {
127             size: Arc::new(AtomicU64::new(0)),
128             tx,
129             rx,
130             evt: EventFd::new(EFD_NONBLOCK)?,
131         })
132     }
133 
134     pub fn get_receiver(&self) -> Result<VirtioBalloonResizeReceiver, Error> {
135         Ok(VirtioBalloonResizeReceiver {
136             size: self.size.clone(),
137             tx: self.tx.clone(),
138             evt: self.evt.try_clone().map_err(Error::EventFdTryCloneFail)?,
139         })
140     }
141 
142     pub fn work(&self, size: u64) -> Result<(), Error> {
143         self.size.store(size, Ordering::Release);
144         self.evt.write(1).map_err(Error::EventFdWriteFail)?;
145         self.rx.recv().map_err(Error::MpscRecvFail)?
146     }
147 }
148 
149 struct BalloonEpollHandler {
150     config: Arc<Mutex<VirtioBalloonConfig>>,
151     resize_receiver: VirtioBalloonResizeReceiver,
152     queues: Vec<Queue>,
153     mem: GuestMemoryAtomic<GuestMemoryMmap>,
154     interrupt_cb: Arc<dyn VirtioInterrupt>,
155     inflate_queue_evt: EventFd,
156     deflate_queue_evt: EventFd,
157     kill_evt: EventFd,
158     pause_evt: EventFd,
159 }
160 
161 impl BalloonEpollHandler {
162     fn signal(
163         &self,
164         int_type: &VirtioInterruptType,
165         queue: Option<&Queue>,
166     ) -> result::Result<(), Error> {
167         self.interrupt_cb.trigger(int_type, queue).map_err(|e| {
168             error!("Failed to signal used queue: {:?}", e);
169             Error::FailedSignal(e)
170         })
171     }
172 
173     fn process_queue(&mut self, ev_type: u16) -> result::Result<(), Error> {
174         let queue_index = match ev_type {
175             INFLATE_QUEUE_EVENT => 0,
176             DEFLATE_QUEUE_EVENT => 1,
177             _ => return Err(Error::ProcessQueueWrongEvType(ev_type)),
178         };
179 
180         let mut used_desc_heads = [0; QUEUE_SIZE as usize];
181         let mut used_count = 0;
182         let mem = self.mem.memory();
183         for avail_desc in self.queues[queue_index].iter(&mem) {
184             used_desc_heads[used_count] = avail_desc.index;
185             used_count += 1;
186 
187             let data_chunk_size = size_of::<u32>();
188 
189             // The head contains the request type which MUST be readable.
190             if avail_desc.is_write_only() {
191                 error!("The head contains the request type is not right");
192                 return Err(Error::UnexpectedWriteOnlyDescriptor);
193             }
194             if avail_desc.len as usize % data_chunk_size != 0 {
195                 error!("the request size {} is not right", avail_desc.len);
196                 return Err(Error::InvalidRequest);
197             }
198 
199             let mut offset = 0u64;
200             while offset < avail_desc.len as u64 {
201                 let addr = avail_desc.addr.checked_add(offset).unwrap();
202                 let pfn: u32 = mem.read_obj(addr).map_err(Error::GuestMemory)?;
203                 offset += data_chunk_size as u64;
204 
205                 let gpa = (pfn as u64) << VIRTIO_BALLOON_PFN_SHIFT;
206                 if let Ok(hva) = mem.get_host_address(GuestAddress(gpa)) {
207                     let advice = match ev_type {
208                         INFLATE_QUEUE_EVENT => {
209                             let region =
210                                 mem.find_region(GuestAddress(gpa))
211                                     .ok_or(Error::GuestMemory(
212                                         GuestMemoryError::InvalidGuestAddress(GuestAddress(gpa)),
213                                     ))?;
214                             if let Some(f_off) = region.file_offset() {
215                                 let offset = hva as usize - region.as_ptr() as usize;
216                                 let res = unsafe {
217                                     libc::fallocate64(
218                                         f_off.file().as_raw_fd(),
219                                         libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
220                                         (offset as u64 + f_off.start()) as libc::off64_t,
221                                         (1 << VIRTIO_BALLOON_PFN_SHIFT) as libc::off64_t,
222                                     )
223                                 };
224 
225                                 if res != 0 {
226                                     return Err(Error::FallocateFail(io::Error::last_os_error()));
227                                 }
228                             }
229                             libc::MADV_DONTNEED
230                         }
231                         DEFLATE_QUEUE_EVENT => libc::MADV_WILLNEED,
232                         _ => return Err(Error::ProcessQueueWrongEvType(ev_type)),
233                     };
234                     // Need unsafe to do syscall madvise
235                     let res = unsafe {
236                         libc::madvise(
237                             hva as *mut libc::c_void,
238                             (1 << VIRTIO_BALLOON_PFN_SHIFT) as libc::size_t,
239                             advice,
240                         )
241                     };
242                     if res != 0 {
243                         return Err(Error::MadviseFail(io::Error::last_os_error()));
244                     }
245                 } else {
246                     error!("Address 0x{:x} is not available", gpa);
247                     return Err(Error::InvalidRequest);
248                 }
249             }
250         }
251 
252         for &desc_index in &used_desc_heads[..used_count] {
253             self.queues[queue_index].add_used(&mem, desc_index, 0);
254         }
255         if used_count > 0 {
256             self.signal(&VirtioInterruptType::Queue, Some(&self.queues[queue_index]))?;
257         }
258 
259         Ok(())
260     }
261 
262     fn run(
263         &mut self,
264         paused: Arc<AtomicBool>,
265         paused_sync: Arc<Barrier>,
266     ) -> result::Result<(), EpollHelperError> {
267         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
268         helper.add_event(self.resize_receiver.evt.as_raw_fd(), RESIZE_EVENT)?;
269         helper.add_event(self.inflate_queue_evt.as_raw_fd(), INFLATE_QUEUE_EVENT)?;
270         helper.add_event(self.deflate_queue_evt.as_raw_fd(), DEFLATE_QUEUE_EVENT)?;
271         helper.run(paused, paused_sync, self)?;
272 
273         Ok(())
274     }
275 }
276 
277 impl EpollHelperHandler for BalloonEpollHandler {
278     fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
279         let ev_type = event.data as u16;
280         match ev_type {
281             RESIZE_EVENT => {
282                 if let Err(e) = self.resize_receiver.evt.read() {
283                     error!("Failed to get resize event: {:?}", e);
284                     return true;
285                 }
286                 let mut signal_error = false;
287                 let r = {
288                     let mut config = self.config.lock().unwrap();
289                     config.num_pages =
290                         (self.resize_receiver.get_size() >> VIRTIO_BALLOON_PFN_SHIFT) as u32;
291                     if let Err(e) = self.signal(&VirtioInterruptType::Config, None) {
292                         signal_error = true;
293                         Err(e)
294                     } else {
295                         Ok(())
296                     }
297                 };
298                 if let Err(e) = &r {
299                     // This error will send back to resize caller.
300                     error!("Handle resize event get error: {:?}", e);
301                 }
302                 if let Err(e) = self.resize_receiver.send(r) {
303                     error!("Sending \"resize\" generated error: {:?}", e);
304                     return true;
305                 }
306                 if signal_error {
307                     return true;
308                 }
309             }
310             INFLATE_QUEUE_EVENT => {
311                 if let Err(e) = self.inflate_queue_evt.read() {
312                     error!("Failed to get inflate queue event: {:?}", e);
313                     return true;
314                 } else if let Err(e) = self.process_queue(ev_type) {
315                     error!("Failed to signal used inflate queue: {:?}", e);
316                     return true;
317                 }
318             }
319             DEFLATE_QUEUE_EVENT => {
320                 if let Err(e) = self.deflate_queue_evt.read() {
321                     error!("Failed to get deflate queue event: {:?}", e);
322                     return true;
323                 } else if let Err(e) = self.process_queue(ev_type) {
324                     error!("Failed to signal used deflate queue: {:?}", e);
325                     return true;
326                 }
327             }
328             _ => {
329                 error!("Unknown event for virtio-balloon");
330                 return true;
331             }
332         }
333 
334         false
335     }
336 }
337 
338 // Virtio device for exposing entropy to the guest OS through virtio.
339 pub struct Balloon {
340     common: VirtioCommon,
341     id: String,
342     resize: VirtioBalloonResize,
343     config: Arc<Mutex<VirtioBalloonConfig>>,
344     seccomp_action: SeccompAction,
345 }
346 
347 impl Balloon {
348     // Create a new virtio-balloon.
349     pub fn new(
350         id: String,
351         size: u64,
352         deflate_on_oom: bool,
353         seccomp_action: SeccompAction,
354     ) -> io::Result<Self> {
355         let mut avail_features = 1u64 << VIRTIO_F_VERSION_1;
356         if deflate_on_oom {
357             avail_features |= 1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM;
358         }
359 
360         let config = VirtioBalloonConfig {
361             num_pages: (size >> VIRTIO_BALLOON_PFN_SHIFT) as u32,
362             ..Default::default()
363         };
364 
365         Ok(Balloon {
366             common: VirtioCommon {
367                 device_type: VirtioDeviceType::Balloon as u32,
368                 avail_features,
369                 paused_sync: Some(Arc::new(Barrier::new(2))),
370                 queue_sizes: QUEUE_SIZES.to_vec(),
371                 min_queues: NUM_QUEUES as u16,
372                 ..Default::default()
373             },
374             id,
375             resize: VirtioBalloonResize::new()?,
376             config: Arc::new(Mutex::new(config)),
377             seccomp_action,
378         })
379     }
380 
381     pub fn resize(&self, size: u64) -> Result<(), Error> {
382         self.resize.work(size)
383     }
384 
385     // Get the actual size of the virtio-balloon.
386     pub fn get_actual(&self) -> u64 {
387         (self.config.lock().unwrap().actual as u64) << VIRTIO_BALLOON_PFN_SHIFT
388     }
389 }
390 
391 impl Drop for Balloon {
392     fn drop(&mut self) {
393         if let Some(kill_evt) = self.common.kill_evt.take() {
394             // Ignore the result because there is nothing we can do about it.
395             let _ = kill_evt.write(1);
396         }
397     }
398 }
399 
400 impl VirtioDevice for Balloon {
401     fn device_type(&self) -> u32 {
402         self.common.device_type
403     }
404 
405     fn queue_max_sizes(&self) -> &[u16] {
406         &self.common.queue_sizes
407     }
408 
409     fn features(&self) -> u64 {
410         self.common.avail_features
411     }
412 
413     fn ack_features(&mut self, value: u64) {
414         self.common.ack_features(value)
415     }
416 
417     fn read_config(&self, offset: u64, data: &mut [u8]) {
418         self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data);
419     }
420 
421     fn write_config(&mut self, offset: u64, data: &[u8]) {
422         // The "actual" field is the only mutable field
423         if offset != CONFIG_ACTUAL_OFFSET || data.len() != CONFIG_ACTUAL_SIZE {
424             error!(
425                 "Attempt to write to read-only field: offset {:x} length {}",
426                 offset,
427                 data.len()
428             );
429             return;
430         }
431 
432         self.write_config_helper(self.config.lock().unwrap().as_mut_slice(), offset, data);
433     }
434 
435     fn activate(
436         &mut self,
437         mem: GuestMemoryAtomic<GuestMemoryMmap>,
438         interrupt_cb: Arc<dyn VirtioInterrupt>,
439         queues: Vec<Queue>,
440         mut queue_evts: Vec<EventFd>,
441     ) -> ActivateResult {
442         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
443         let (kill_evt, pause_evt) = self.common.dup_eventfds();
444 
445         let mut handler = BalloonEpollHandler {
446             config: self.config.clone(),
447             resize_receiver: self.resize.get_receiver().map_err(|e| {
448                 error!("failed to clone resize EventFd: {:?}", e);
449                 ActivateError::BadActivate
450             })?,
451             queues,
452             mem,
453             interrupt_cb,
454             inflate_queue_evt: queue_evts.remove(0),
455             deflate_queue_evt: queue_evts.remove(0),
456             kill_evt,
457             pause_evt,
458         };
459 
460         let paused = self.common.paused.clone();
461         let paused_sync = self.common.paused_sync.clone();
462         let mut epoll_threads = Vec::new();
463         let virtio_balloon_seccomp_filter =
464             get_seccomp_filter(&self.seccomp_action, Thread::VirtioBalloon)
465                 .map_err(ActivateError::CreateSeccompFilter)?;
466         thread::Builder::new()
467             .name(self.id.clone())
468             .spawn(move || {
469                 if let Err(e) = SeccompFilter::apply(virtio_balloon_seccomp_filter) {
470                     error!("Error applying seccomp filter: {:?}", e);
471                 } else if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
472                     error!("Error running worker: {:?}", e);
473                 }
474             })
475             .map(|thread| epoll_threads.push(thread))
476             .map_err(|e| {
477                 error!("failed to clone virtio-balloon epoll thread: {}", e);
478                 ActivateError::BadActivate
479             })?;
480         self.common.epoll_threads = Some(epoll_threads);
481 
482         event!("virtio-device", "activated", "id", &self.id);
483         Ok(())
484     }
485 
486     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
487         let result = self.common.reset();
488         event!("virtio-device", "reset", "id", &self.id);
489         result
490     }
491 }
492 
493 impl Pausable for Balloon {
494     fn pause(&mut self) -> result::Result<(), MigratableError> {
495         self.common.pause()
496     }
497 
498     fn resume(&mut self) -> result::Result<(), MigratableError> {
499         self.common.resume()
500     }
501 }
502 
503 impl Snapshottable for Balloon {
504     fn id(&self) -> String {
505         self.id.clone()
506     }
507 }
508 impl Transportable for Balloon {}
509 impl Migratable for Balloon {}
510