10bbc82e4SYishai Hadas // SPDX-License-Identifier: GPL-2.0-only
20bbc82e4SYishai Hadas /*
30bbc82e4SYishai Hadas * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
40bbc82e4SYishai Hadas */
50bbc82e4SYishai Hadas
60bbc82e4SYishai Hadas #include <linux/device.h>
70bbc82e4SYishai Hadas #include <linux/module.h>
80bbc82e4SYishai Hadas #include <linux/mutex.h>
90bbc82e4SYishai Hadas #include <linux/pci.h>
100bbc82e4SYishai Hadas #include <linux/pm_runtime.h>
110bbc82e4SYishai Hadas #include <linux/types.h>
120bbc82e4SYishai Hadas #include <linux/uaccess.h>
130bbc82e4SYishai Hadas #include <linux/vfio.h>
140bbc82e4SYishai Hadas #include <linux/vfio_pci_core.h>
150bbc82e4SYishai Hadas #include <linux/virtio_pci.h>
160bbc82e4SYishai Hadas #include <linux/virtio_net.h>
170bbc82e4SYishai Hadas #include <linux/virtio_pci_admin.h>
180bbc82e4SYishai Hadas #include <linux/anon_inodes.h>
190bbc82e4SYishai Hadas
200bbc82e4SYishai Hadas #include "common.h"
210bbc82e4SYishai Hadas
220bbc82e4SYishai Hadas /* Device specification max parts size */
230bbc82e4SYishai Hadas #define MAX_LOAD_SIZE (BIT_ULL(BITS_PER_TYPE \
240bbc82e4SYishai Hadas (((struct virtio_admin_cmd_dev_parts_metadata_result *)0)->parts_size.size)) - 1)
250bbc82e4SYishai Hadas
260bbc82e4SYishai Hadas /* Initial target buffer size */
270bbc82e4SYishai Hadas #define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
280bbc82e4SYishai Hadas
296cea64b1SYishai Hadas static int
306cea64b1SYishai Hadas virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
316cea64b1SYishai Hadas u32 ctx_size);
326cea64b1SYishai Hadas
330bbc82e4SYishai Hadas static struct page *
virtiovf_get_migration_page(struct virtiovf_data_buffer * buf,unsigned long offset)340bbc82e4SYishai Hadas virtiovf_get_migration_page(struct virtiovf_data_buffer *buf,
350bbc82e4SYishai Hadas unsigned long offset)
360bbc82e4SYishai Hadas {
370bbc82e4SYishai Hadas unsigned long cur_offset = 0;
380bbc82e4SYishai Hadas struct scatterlist *sg;
390bbc82e4SYishai Hadas unsigned int i;
400bbc82e4SYishai Hadas
410bbc82e4SYishai Hadas /* All accesses are sequential */
420bbc82e4SYishai Hadas if (offset < buf->last_offset || !buf->last_offset_sg) {
430bbc82e4SYishai Hadas buf->last_offset = 0;
440bbc82e4SYishai Hadas buf->last_offset_sg = buf->table.sgt.sgl;
450bbc82e4SYishai Hadas buf->sg_last_entry = 0;
460bbc82e4SYishai Hadas }
470bbc82e4SYishai Hadas
480bbc82e4SYishai Hadas cur_offset = buf->last_offset;
490bbc82e4SYishai Hadas
500bbc82e4SYishai Hadas for_each_sg(buf->last_offset_sg, sg,
510bbc82e4SYishai Hadas buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
520bbc82e4SYishai Hadas if (offset < sg->length + cur_offset) {
530bbc82e4SYishai Hadas buf->last_offset_sg = sg;
540bbc82e4SYishai Hadas buf->sg_last_entry += i;
550bbc82e4SYishai Hadas buf->last_offset = cur_offset;
560bbc82e4SYishai Hadas return nth_page(sg_page(sg),
570bbc82e4SYishai Hadas (offset - cur_offset) / PAGE_SIZE);
580bbc82e4SYishai Hadas }
590bbc82e4SYishai Hadas cur_offset += sg->length;
600bbc82e4SYishai Hadas }
610bbc82e4SYishai Hadas return NULL;
620bbc82e4SYishai Hadas }
630bbc82e4SYishai Hadas
virtiovf_add_migration_pages(struct virtiovf_data_buffer * buf,unsigned int npages)640bbc82e4SYishai Hadas static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf,
650bbc82e4SYishai Hadas unsigned int npages)
660bbc82e4SYishai Hadas {
670bbc82e4SYishai Hadas unsigned int to_alloc = npages;
680bbc82e4SYishai Hadas struct page **page_list;
690bbc82e4SYishai Hadas unsigned long filled;
700bbc82e4SYishai Hadas unsigned int to_fill;
710bbc82e4SYishai Hadas int ret;
720bbc82e4SYishai Hadas int i;
730bbc82e4SYishai Hadas
740bbc82e4SYishai Hadas to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
750bbc82e4SYishai Hadas page_list = kvcalloc(to_fill, sizeof(*page_list), GFP_KERNEL_ACCOUNT);
760bbc82e4SYishai Hadas if (!page_list)
770bbc82e4SYishai Hadas return -ENOMEM;
780bbc82e4SYishai Hadas
790bbc82e4SYishai Hadas do {
80*6bf9b5b4SLuiz Capitulino filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
810bbc82e4SYishai Hadas page_list);
820bbc82e4SYishai Hadas if (!filled) {
830bbc82e4SYishai Hadas ret = -ENOMEM;
840bbc82e4SYishai Hadas goto err;
850bbc82e4SYishai Hadas }
860bbc82e4SYishai Hadas to_alloc -= filled;
870bbc82e4SYishai Hadas ret = sg_alloc_append_table_from_pages(&buf->table, page_list,
880bbc82e4SYishai Hadas filled, 0, filled << PAGE_SHIFT, UINT_MAX,
890bbc82e4SYishai Hadas SG_MAX_SINGLE_ALLOC, GFP_KERNEL_ACCOUNT);
900bbc82e4SYishai Hadas
910bbc82e4SYishai Hadas if (ret)
920bbc82e4SYishai Hadas goto err_append;
930bbc82e4SYishai Hadas buf->allocated_length += filled * PAGE_SIZE;
940bbc82e4SYishai Hadas /* clean input for another bulk allocation */
950bbc82e4SYishai Hadas memset(page_list, 0, filled * sizeof(*page_list));
960bbc82e4SYishai Hadas to_fill = min_t(unsigned int, to_alloc,
970bbc82e4SYishai Hadas PAGE_SIZE / sizeof(*page_list));
980bbc82e4SYishai Hadas } while (to_alloc > 0);
990bbc82e4SYishai Hadas
1000bbc82e4SYishai Hadas kvfree(page_list);
1010bbc82e4SYishai Hadas return 0;
1020bbc82e4SYishai Hadas
1030bbc82e4SYishai Hadas err_append:
1040bbc82e4SYishai Hadas for (i = filled - 1; i >= 0; i--)
1050bbc82e4SYishai Hadas __free_page(page_list[i]);
1060bbc82e4SYishai Hadas err:
1070bbc82e4SYishai Hadas kvfree(page_list);
1080bbc82e4SYishai Hadas return ret;
1090bbc82e4SYishai Hadas }
1100bbc82e4SYishai Hadas
virtiovf_free_data_buffer(struct virtiovf_data_buffer * buf)1110bbc82e4SYishai Hadas static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf)
1120bbc82e4SYishai Hadas {
1130bbc82e4SYishai Hadas struct sg_page_iter sg_iter;
1140bbc82e4SYishai Hadas
115*6bf9b5b4SLuiz Capitulino /* Undo alloc_pages_bulk() */
1160bbc82e4SYishai Hadas for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
1170bbc82e4SYishai Hadas __free_page(sg_page_iter_page(&sg_iter));
1180bbc82e4SYishai Hadas sg_free_append_table(&buf->table);
1190bbc82e4SYishai Hadas kfree(buf);
1200bbc82e4SYishai Hadas }
1210bbc82e4SYishai Hadas
1220bbc82e4SYishai Hadas static struct virtiovf_data_buffer *
virtiovf_alloc_data_buffer(struct virtiovf_migration_file * migf,size_t length)1230bbc82e4SYishai Hadas virtiovf_alloc_data_buffer(struct virtiovf_migration_file *migf, size_t length)
1240bbc82e4SYishai Hadas {
1250bbc82e4SYishai Hadas struct virtiovf_data_buffer *buf;
1260bbc82e4SYishai Hadas int ret;
1270bbc82e4SYishai Hadas
1280bbc82e4SYishai Hadas buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
1290bbc82e4SYishai Hadas if (!buf)
1300bbc82e4SYishai Hadas return ERR_PTR(-ENOMEM);
1310bbc82e4SYishai Hadas
1320bbc82e4SYishai Hadas ret = virtiovf_add_migration_pages(buf,
1330bbc82e4SYishai Hadas DIV_ROUND_UP_ULL(length, PAGE_SIZE));
1340bbc82e4SYishai Hadas if (ret)
1350bbc82e4SYishai Hadas goto end;
1360bbc82e4SYishai Hadas
1370bbc82e4SYishai Hadas buf->migf = migf;
1380bbc82e4SYishai Hadas return buf;
1390bbc82e4SYishai Hadas end:
1400bbc82e4SYishai Hadas virtiovf_free_data_buffer(buf);
1410bbc82e4SYishai Hadas return ERR_PTR(ret);
1420bbc82e4SYishai Hadas }
1430bbc82e4SYishai Hadas
virtiovf_put_data_buffer(struct virtiovf_data_buffer * buf)1440bbc82e4SYishai Hadas static void virtiovf_put_data_buffer(struct virtiovf_data_buffer *buf)
1450bbc82e4SYishai Hadas {
1460bbc82e4SYishai Hadas spin_lock_irq(&buf->migf->list_lock);
1470bbc82e4SYishai Hadas list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
1480bbc82e4SYishai Hadas spin_unlock_irq(&buf->migf->list_lock);
1490bbc82e4SYishai Hadas }
1500bbc82e4SYishai Hadas
1510bbc82e4SYishai Hadas static int
virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device * virtvdev,u8 type,u32 * obj_id)1520bbc82e4SYishai Hadas virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device *virtvdev, u8 type,
1530bbc82e4SYishai Hadas u32 *obj_id)
1540bbc82e4SYishai Hadas {
1550bbc82e4SYishai Hadas return virtio_pci_admin_obj_create(virtvdev->core_device.pdev,
1560bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, type, obj_id);
1570bbc82e4SYishai Hadas }
1580bbc82e4SYishai Hadas
1590bbc82e4SYishai Hadas static void
virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device * virtvdev,u32 obj_id)1600bbc82e4SYishai Hadas virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id)
1610bbc82e4SYishai Hadas {
1620bbc82e4SYishai Hadas virtio_pci_admin_obj_destroy(virtvdev->core_device.pdev,
1630bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id);
1640bbc82e4SYishai Hadas }
1650bbc82e4SYishai Hadas
1666cea64b1SYishai Hadas static struct virtiovf_data_buffer *
virtiovf_get_data_buffer(struct virtiovf_migration_file * migf,size_t length)1676cea64b1SYishai Hadas virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length)
1686cea64b1SYishai Hadas {
1696cea64b1SYishai Hadas struct virtiovf_data_buffer *buf, *temp_buf;
1706cea64b1SYishai Hadas struct list_head free_list;
1716cea64b1SYishai Hadas
1726cea64b1SYishai Hadas INIT_LIST_HEAD(&free_list);
1736cea64b1SYishai Hadas
1746cea64b1SYishai Hadas spin_lock_irq(&migf->list_lock);
1756cea64b1SYishai Hadas list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
1766cea64b1SYishai Hadas list_del_init(&buf->buf_elm);
1776cea64b1SYishai Hadas if (buf->allocated_length >= length) {
1786cea64b1SYishai Hadas spin_unlock_irq(&migf->list_lock);
1796cea64b1SYishai Hadas goto found;
1806cea64b1SYishai Hadas }
1816cea64b1SYishai Hadas /*
1826cea64b1SYishai Hadas * Prevent holding redundant buffers. Put in a free
1836cea64b1SYishai Hadas * list and call at the end not under the spin lock
1846cea64b1SYishai Hadas * (&migf->list_lock) to minimize its scope usage.
1856cea64b1SYishai Hadas */
1866cea64b1SYishai Hadas list_add(&buf->buf_elm, &free_list);
1876cea64b1SYishai Hadas }
1886cea64b1SYishai Hadas spin_unlock_irq(&migf->list_lock);
1896cea64b1SYishai Hadas buf = virtiovf_alloc_data_buffer(migf, length);
1906cea64b1SYishai Hadas
1916cea64b1SYishai Hadas found:
1926cea64b1SYishai Hadas while ((temp_buf = list_first_entry_or_null(&free_list,
1936cea64b1SYishai Hadas struct virtiovf_data_buffer, buf_elm))) {
1946cea64b1SYishai Hadas list_del(&temp_buf->buf_elm);
1956cea64b1SYishai Hadas virtiovf_free_data_buffer(temp_buf);
1966cea64b1SYishai Hadas }
1976cea64b1SYishai Hadas
1986cea64b1SYishai Hadas return buf;
1996cea64b1SYishai Hadas }
2006cea64b1SYishai Hadas
virtiovf_clean_migf_resources(struct virtiovf_migration_file * migf)2010bbc82e4SYishai Hadas static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf)
2020bbc82e4SYishai Hadas {
2030bbc82e4SYishai Hadas struct virtiovf_data_buffer *entry;
2040bbc82e4SYishai Hadas
2050bbc82e4SYishai Hadas if (migf->buf) {
2060bbc82e4SYishai Hadas virtiovf_free_data_buffer(migf->buf);
2070bbc82e4SYishai Hadas migf->buf = NULL;
2080bbc82e4SYishai Hadas }
2090bbc82e4SYishai Hadas
2100bbc82e4SYishai Hadas if (migf->buf_header) {
2110bbc82e4SYishai Hadas virtiovf_free_data_buffer(migf->buf_header);
2120bbc82e4SYishai Hadas migf->buf_header = NULL;
2130bbc82e4SYishai Hadas }
2140bbc82e4SYishai Hadas
2150bbc82e4SYishai Hadas list_splice(&migf->avail_list, &migf->buf_list);
2160bbc82e4SYishai Hadas
2170bbc82e4SYishai Hadas while ((entry = list_first_entry_or_null(&migf->buf_list,
2180bbc82e4SYishai Hadas struct virtiovf_data_buffer, buf_elm))) {
2190bbc82e4SYishai Hadas list_del(&entry->buf_elm);
2200bbc82e4SYishai Hadas virtiovf_free_data_buffer(entry);
2210bbc82e4SYishai Hadas }
2220bbc82e4SYishai Hadas
2230bbc82e4SYishai Hadas if (migf->has_obj_id)
2240bbc82e4SYishai Hadas virtiovf_pci_free_obj_id(migf->virtvdev, migf->obj_id);
2250bbc82e4SYishai Hadas }
2260bbc82e4SYishai Hadas
virtiovf_disable_fd(struct virtiovf_migration_file * migf)2270bbc82e4SYishai Hadas static void virtiovf_disable_fd(struct virtiovf_migration_file *migf)
2280bbc82e4SYishai Hadas {
2290bbc82e4SYishai Hadas mutex_lock(&migf->lock);
2300bbc82e4SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_ERROR;
2310bbc82e4SYishai Hadas migf->filp->f_pos = 0;
2320bbc82e4SYishai Hadas mutex_unlock(&migf->lock);
2330bbc82e4SYishai Hadas }
2340bbc82e4SYishai Hadas
virtiovf_disable_fds(struct virtiovf_pci_core_device * virtvdev)2350bbc82e4SYishai Hadas static void virtiovf_disable_fds(struct virtiovf_pci_core_device *virtvdev)
2360bbc82e4SYishai Hadas {
2370bbc82e4SYishai Hadas if (virtvdev->resuming_migf) {
2380bbc82e4SYishai Hadas virtiovf_disable_fd(virtvdev->resuming_migf);
2390bbc82e4SYishai Hadas virtiovf_clean_migf_resources(virtvdev->resuming_migf);
2400bbc82e4SYishai Hadas fput(virtvdev->resuming_migf->filp);
2410bbc82e4SYishai Hadas virtvdev->resuming_migf = NULL;
2420bbc82e4SYishai Hadas }
2430bbc82e4SYishai Hadas if (virtvdev->saving_migf) {
2440bbc82e4SYishai Hadas virtiovf_disable_fd(virtvdev->saving_migf);
2450bbc82e4SYishai Hadas virtiovf_clean_migf_resources(virtvdev->saving_migf);
2460bbc82e4SYishai Hadas fput(virtvdev->saving_migf->filp);
2470bbc82e4SYishai Hadas virtvdev->saving_migf = NULL;
2480bbc82e4SYishai Hadas }
2490bbc82e4SYishai Hadas }
2500bbc82e4SYishai Hadas
2510bbc82e4SYishai Hadas /*
2520bbc82e4SYishai Hadas * This function is called in all state_mutex unlock cases to
2530bbc82e4SYishai Hadas * handle a 'deferred_reset' if exists.
2540bbc82e4SYishai Hadas */
virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device * virtvdev)2550bbc82e4SYishai Hadas static void virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device *virtvdev)
2560bbc82e4SYishai Hadas {
2570bbc82e4SYishai Hadas again:
2580bbc82e4SYishai Hadas spin_lock(&virtvdev->reset_lock);
2590bbc82e4SYishai Hadas if (virtvdev->deferred_reset) {
2600bbc82e4SYishai Hadas virtvdev->deferred_reset = false;
2610bbc82e4SYishai Hadas spin_unlock(&virtvdev->reset_lock);
2620bbc82e4SYishai Hadas virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
2630bbc82e4SYishai Hadas virtiovf_disable_fds(virtvdev);
2640bbc82e4SYishai Hadas goto again;
2650bbc82e4SYishai Hadas }
2660bbc82e4SYishai Hadas mutex_unlock(&virtvdev->state_mutex);
2670bbc82e4SYishai Hadas spin_unlock(&virtvdev->reset_lock);
2680bbc82e4SYishai Hadas }
2690bbc82e4SYishai Hadas
virtiovf_migration_reset_done(struct pci_dev * pdev)2700bbc82e4SYishai Hadas void virtiovf_migration_reset_done(struct pci_dev *pdev)
2710bbc82e4SYishai Hadas {
2720bbc82e4SYishai Hadas struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev);
2730bbc82e4SYishai Hadas
2740bbc82e4SYishai Hadas if (!virtvdev->migrate_cap)
2750bbc82e4SYishai Hadas return;
2760bbc82e4SYishai Hadas
2770bbc82e4SYishai Hadas /*
2780bbc82e4SYishai Hadas * As the higher VFIO layers are holding locks across reset and using
2790bbc82e4SYishai Hadas * those same locks with the mm_lock we need to prevent ABBA deadlock
2800bbc82e4SYishai Hadas * with the state_mutex and mm_lock.
2810bbc82e4SYishai Hadas * In case the state_mutex was taken already we defer the cleanup work
2820bbc82e4SYishai Hadas * to the unlock flow of the other running context.
2830bbc82e4SYishai Hadas */
2840bbc82e4SYishai Hadas spin_lock(&virtvdev->reset_lock);
2850bbc82e4SYishai Hadas virtvdev->deferred_reset = true;
2860bbc82e4SYishai Hadas if (!mutex_trylock(&virtvdev->state_mutex)) {
2870bbc82e4SYishai Hadas spin_unlock(&virtvdev->reset_lock);
2880bbc82e4SYishai Hadas return;
2890bbc82e4SYishai Hadas }
2900bbc82e4SYishai Hadas spin_unlock(&virtvdev->reset_lock);
2910bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(virtvdev);
2920bbc82e4SYishai Hadas }
2930bbc82e4SYishai Hadas
virtiovf_release_file(struct inode * inode,struct file * filp)2940bbc82e4SYishai Hadas static int virtiovf_release_file(struct inode *inode, struct file *filp)
2950bbc82e4SYishai Hadas {
2960bbc82e4SYishai Hadas struct virtiovf_migration_file *migf = filp->private_data;
2970bbc82e4SYishai Hadas
2980bbc82e4SYishai Hadas virtiovf_disable_fd(migf);
2990bbc82e4SYishai Hadas mutex_destroy(&migf->lock);
3000bbc82e4SYishai Hadas kfree(migf);
3010bbc82e4SYishai Hadas return 0;
3020bbc82e4SYishai Hadas }
3030bbc82e4SYishai Hadas
3040bbc82e4SYishai Hadas static struct virtiovf_data_buffer *
virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file * migf,loff_t pos,bool * end_of_data)3050bbc82e4SYishai Hadas virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf,
3060bbc82e4SYishai Hadas loff_t pos, bool *end_of_data)
3070bbc82e4SYishai Hadas {
3080bbc82e4SYishai Hadas struct virtiovf_data_buffer *buf;
3090bbc82e4SYishai Hadas bool found = false;
3100bbc82e4SYishai Hadas
3110bbc82e4SYishai Hadas *end_of_data = false;
3120bbc82e4SYishai Hadas spin_lock_irq(&migf->list_lock);
3130bbc82e4SYishai Hadas if (list_empty(&migf->buf_list)) {
3140bbc82e4SYishai Hadas *end_of_data = true;
3150bbc82e4SYishai Hadas goto end;
3160bbc82e4SYishai Hadas }
3170bbc82e4SYishai Hadas
3180bbc82e4SYishai Hadas buf = list_first_entry(&migf->buf_list, struct virtiovf_data_buffer,
3190bbc82e4SYishai Hadas buf_elm);
3200bbc82e4SYishai Hadas if (pos >= buf->start_pos &&
3210bbc82e4SYishai Hadas pos < buf->start_pos + buf->length) {
3220bbc82e4SYishai Hadas found = true;
3230bbc82e4SYishai Hadas goto end;
3240bbc82e4SYishai Hadas }
3250bbc82e4SYishai Hadas
3260bbc82e4SYishai Hadas /*
3270bbc82e4SYishai Hadas * As we use a stream based FD we may expect having the data always
3280bbc82e4SYishai Hadas * on first chunk
3290bbc82e4SYishai Hadas */
3300bbc82e4SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_ERROR;
3310bbc82e4SYishai Hadas
3320bbc82e4SYishai Hadas end:
3330bbc82e4SYishai Hadas spin_unlock_irq(&migf->list_lock);
3340bbc82e4SYishai Hadas return found ? buf : NULL;
3350bbc82e4SYishai Hadas }
3360bbc82e4SYishai Hadas
virtiovf_buf_read(struct virtiovf_data_buffer * vhca_buf,char __user ** buf,size_t * len,loff_t * pos)3370bbc82e4SYishai Hadas static ssize_t virtiovf_buf_read(struct virtiovf_data_buffer *vhca_buf,
3380bbc82e4SYishai Hadas char __user **buf, size_t *len, loff_t *pos)
3390bbc82e4SYishai Hadas {
3400bbc82e4SYishai Hadas unsigned long offset;
3410bbc82e4SYishai Hadas ssize_t done = 0;
3420bbc82e4SYishai Hadas size_t copy_len;
3430bbc82e4SYishai Hadas
3440bbc82e4SYishai Hadas copy_len = min_t(size_t,
3450bbc82e4SYishai Hadas vhca_buf->start_pos + vhca_buf->length - *pos, *len);
3460bbc82e4SYishai Hadas while (copy_len) {
3470bbc82e4SYishai Hadas size_t page_offset;
3480bbc82e4SYishai Hadas struct page *page;
3490bbc82e4SYishai Hadas size_t page_len;
3500bbc82e4SYishai Hadas u8 *from_buff;
3510bbc82e4SYishai Hadas int ret;
3520bbc82e4SYishai Hadas
3530bbc82e4SYishai Hadas offset = *pos - vhca_buf->start_pos;
3540bbc82e4SYishai Hadas page_offset = offset % PAGE_SIZE;
3550bbc82e4SYishai Hadas offset -= page_offset;
3560bbc82e4SYishai Hadas page = virtiovf_get_migration_page(vhca_buf, offset);
3570bbc82e4SYishai Hadas if (!page)
3580bbc82e4SYishai Hadas return -EINVAL;
3590bbc82e4SYishai Hadas page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset);
3600bbc82e4SYishai Hadas from_buff = kmap_local_page(page);
3610bbc82e4SYishai Hadas ret = copy_to_user(*buf, from_buff + page_offset, page_len);
3620bbc82e4SYishai Hadas kunmap_local(from_buff);
3630bbc82e4SYishai Hadas if (ret)
3640bbc82e4SYishai Hadas return -EFAULT;
3650bbc82e4SYishai Hadas *pos += page_len;
3660bbc82e4SYishai Hadas *len -= page_len;
3670bbc82e4SYishai Hadas *buf += page_len;
3680bbc82e4SYishai Hadas done += page_len;
3690bbc82e4SYishai Hadas copy_len -= page_len;
3700bbc82e4SYishai Hadas }
3710bbc82e4SYishai Hadas
3720bbc82e4SYishai Hadas if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
3730bbc82e4SYishai Hadas spin_lock_irq(&vhca_buf->migf->list_lock);
3740bbc82e4SYishai Hadas list_del_init(&vhca_buf->buf_elm);
3750bbc82e4SYishai Hadas list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
3760bbc82e4SYishai Hadas spin_unlock_irq(&vhca_buf->migf->list_lock);
3770bbc82e4SYishai Hadas }
3780bbc82e4SYishai Hadas
3790bbc82e4SYishai Hadas return done;
3800bbc82e4SYishai Hadas }
3810bbc82e4SYishai Hadas
virtiovf_save_read(struct file * filp,char __user * buf,size_t len,loff_t * pos)3820bbc82e4SYishai Hadas static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t len,
3830bbc82e4SYishai Hadas loff_t *pos)
3840bbc82e4SYishai Hadas {
3850bbc82e4SYishai Hadas struct virtiovf_migration_file *migf = filp->private_data;
3860bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf;
3876cea64b1SYishai Hadas bool first_loop_call = true;
3880bbc82e4SYishai Hadas bool end_of_data;
3890bbc82e4SYishai Hadas ssize_t done = 0;
3900bbc82e4SYishai Hadas
3910bbc82e4SYishai Hadas if (pos)
3920bbc82e4SYishai Hadas return -ESPIPE;
3930bbc82e4SYishai Hadas pos = &filp->f_pos;
3940bbc82e4SYishai Hadas
3950bbc82e4SYishai Hadas mutex_lock(&migf->lock);
3960bbc82e4SYishai Hadas if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
3970bbc82e4SYishai Hadas done = -ENODEV;
3980bbc82e4SYishai Hadas goto out_unlock;
3990bbc82e4SYishai Hadas }
4000bbc82e4SYishai Hadas
4010bbc82e4SYishai Hadas while (len) {
4020bbc82e4SYishai Hadas ssize_t count;
4030bbc82e4SYishai Hadas
4040bbc82e4SYishai Hadas vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data);
4056cea64b1SYishai Hadas if (first_loop_call) {
4066cea64b1SYishai Hadas first_loop_call = false;
4076cea64b1SYishai Hadas /* Temporary end of file as part of PRE_COPY */
4086cea64b1SYishai Hadas if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) {
4096cea64b1SYishai Hadas done = -ENOMSG;
4106cea64b1SYishai Hadas goto out_unlock;
4116cea64b1SYishai Hadas }
4126cea64b1SYishai Hadas if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) {
4136cea64b1SYishai Hadas done = -EINVAL;
4146cea64b1SYishai Hadas goto out_unlock;
4156cea64b1SYishai Hadas }
4166cea64b1SYishai Hadas }
4176cea64b1SYishai Hadas
4180bbc82e4SYishai Hadas if (end_of_data)
4190bbc82e4SYishai Hadas goto out_unlock;
4200bbc82e4SYishai Hadas
4210bbc82e4SYishai Hadas if (!vhca_buf) {
4220bbc82e4SYishai Hadas done = -EINVAL;
4230bbc82e4SYishai Hadas goto out_unlock;
4240bbc82e4SYishai Hadas }
4250bbc82e4SYishai Hadas
4260bbc82e4SYishai Hadas count = virtiovf_buf_read(vhca_buf, &buf, &len, pos);
4270bbc82e4SYishai Hadas if (count < 0) {
4280bbc82e4SYishai Hadas done = count;
4290bbc82e4SYishai Hadas goto out_unlock;
4300bbc82e4SYishai Hadas }
4310bbc82e4SYishai Hadas done += count;
4320bbc82e4SYishai Hadas }
4330bbc82e4SYishai Hadas
4340bbc82e4SYishai Hadas out_unlock:
4350bbc82e4SYishai Hadas mutex_unlock(&migf->lock);
4360bbc82e4SYishai Hadas return done;
4370bbc82e4SYishai Hadas }
4380bbc82e4SYishai Hadas
virtiovf_precopy_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)4396cea64b1SYishai Hadas static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
4406cea64b1SYishai Hadas unsigned long arg)
4416cea64b1SYishai Hadas {
4426cea64b1SYishai Hadas struct virtiovf_migration_file *migf = filp->private_data;
4436cea64b1SYishai Hadas struct virtiovf_pci_core_device *virtvdev = migf->virtvdev;
4446cea64b1SYishai Hadas struct vfio_precopy_info info = {};
4456cea64b1SYishai Hadas loff_t *pos = &filp->f_pos;
4466cea64b1SYishai Hadas bool end_of_data = false;
4476cea64b1SYishai Hadas unsigned long minsz;
4486cea64b1SYishai Hadas u32 ctx_size = 0;
4496cea64b1SYishai Hadas int ret;
4506cea64b1SYishai Hadas
4516cea64b1SYishai Hadas if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
4526cea64b1SYishai Hadas return -ENOTTY;
4536cea64b1SYishai Hadas
4546cea64b1SYishai Hadas minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
4556cea64b1SYishai Hadas if (copy_from_user(&info, (void __user *)arg, minsz))
4566cea64b1SYishai Hadas return -EFAULT;
4576cea64b1SYishai Hadas
4586cea64b1SYishai Hadas if (info.argsz < minsz)
4596cea64b1SYishai Hadas return -EINVAL;
4606cea64b1SYishai Hadas
4616cea64b1SYishai Hadas mutex_lock(&virtvdev->state_mutex);
4626cea64b1SYishai Hadas if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
4636cea64b1SYishai Hadas virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
4646cea64b1SYishai Hadas ret = -EINVAL;
4656cea64b1SYishai Hadas goto err_state_unlock;
4666cea64b1SYishai Hadas }
4676cea64b1SYishai Hadas
4686cea64b1SYishai Hadas /*
4696cea64b1SYishai Hadas * The virtio specification does not include a PRE_COPY concept.
4706cea64b1SYishai Hadas * Since we can expect the data to remain the same for a certain period,
4716cea64b1SYishai Hadas * we use a rate limiter mechanism before making a call to the device.
4726cea64b1SYishai Hadas */
4736cea64b1SYishai Hadas if (__ratelimit(&migf->pre_copy_rl_state)) {
4746cea64b1SYishai Hadas
4756cea64b1SYishai Hadas ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
4766cea64b1SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
4776cea64b1SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
4786cea64b1SYishai Hadas &ctx_size);
4796cea64b1SYishai Hadas if (ret)
4806cea64b1SYishai Hadas goto err_state_unlock;
4816cea64b1SYishai Hadas }
4826cea64b1SYishai Hadas
4836cea64b1SYishai Hadas mutex_lock(&migf->lock);
4846cea64b1SYishai Hadas if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
4856cea64b1SYishai Hadas ret = -ENODEV;
4866cea64b1SYishai Hadas goto err_migf_unlock;
4876cea64b1SYishai Hadas }
4886cea64b1SYishai Hadas
4896cea64b1SYishai Hadas if (migf->pre_copy_initial_bytes > *pos) {
4906cea64b1SYishai Hadas info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
4916cea64b1SYishai Hadas } else {
4926cea64b1SYishai Hadas info.dirty_bytes = migf->max_pos - *pos;
4936cea64b1SYishai Hadas if (!info.dirty_bytes)
4946cea64b1SYishai Hadas end_of_data = true;
4956cea64b1SYishai Hadas info.dirty_bytes += ctx_size;
4966cea64b1SYishai Hadas }
4976cea64b1SYishai Hadas
4986cea64b1SYishai Hadas if (!end_of_data || !ctx_size) {
4996cea64b1SYishai Hadas mutex_unlock(&migf->lock);
5006cea64b1SYishai Hadas goto done;
5016cea64b1SYishai Hadas }
5026cea64b1SYishai Hadas
5036cea64b1SYishai Hadas mutex_unlock(&migf->lock);
5046cea64b1SYishai Hadas /*
5056cea64b1SYishai Hadas * We finished transferring the current state and the device has a
5066cea64b1SYishai Hadas * dirty state, read a new state.
5076cea64b1SYishai Hadas */
5086cea64b1SYishai Hadas ret = virtiovf_read_device_context_chunk(migf, ctx_size);
5096cea64b1SYishai Hadas if (ret)
5106cea64b1SYishai Hadas /*
5116cea64b1SYishai Hadas * The machine is running, and context size could be grow, so no reason to mark
5126cea64b1SYishai Hadas * the device state as VIRTIOVF_MIGF_STATE_ERROR.
5136cea64b1SYishai Hadas */
5146cea64b1SYishai Hadas goto err_state_unlock;
5156cea64b1SYishai Hadas
5166cea64b1SYishai Hadas done:
5176cea64b1SYishai Hadas virtiovf_state_mutex_unlock(virtvdev);
5186cea64b1SYishai Hadas if (copy_to_user((void __user *)arg, &info, minsz))
5196cea64b1SYishai Hadas return -EFAULT;
5206cea64b1SYishai Hadas return 0;
5216cea64b1SYishai Hadas
5226cea64b1SYishai Hadas err_migf_unlock:
5236cea64b1SYishai Hadas mutex_unlock(&migf->lock);
5246cea64b1SYishai Hadas err_state_unlock:
5256cea64b1SYishai Hadas virtiovf_state_mutex_unlock(virtvdev);
5266cea64b1SYishai Hadas return ret;
5276cea64b1SYishai Hadas }
5286cea64b1SYishai Hadas
5290bbc82e4SYishai Hadas static const struct file_operations virtiovf_save_fops = {
5300bbc82e4SYishai Hadas .owner = THIS_MODULE,
5310bbc82e4SYishai Hadas .read = virtiovf_save_read,
5326cea64b1SYishai Hadas .unlocked_ioctl = virtiovf_precopy_ioctl,
5336cea64b1SYishai Hadas .compat_ioctl = compat_ptr_ioctl,
5340bbc82e4SYishai Hadas .release = virtiovf_release_file,
5350bbc82e4SYishai Hadas };
5360bbc82e4SYishai Hadas
5370bbc82e4SYishai Hadas static int
virtiovf_add_buf_header(struct virtiovf_data_buffer * header_buf,u32 data_size)5380bbc82e4SYishai Hadas virtiovf_add_buf_header(struct virtiovf_data_buffer *header_buf,
5390bbc82e4SYishai Hadas u32 data_size)
5400bbc82e4SYishai Hadas {
5410bbc82e4SYishai Hadas struct virtiovf_migration_file *migf = header_buf->migf;
5420bbc82e4SYishai Hadas struct virtiovf_migration_header header = {};
5430bbc82e4SYishai Hadas struct page *page;
5440bbc82e4SYishai Hadas u8 *to_buff;
5450bbc82e4SYishai Hadas
5460bbc82e4SYishai Hadas header.record_size = cpu_to_le64(data_size);
5470bbc82e4SYishai Hadas header.flags = cpu_to_le32(VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY);
5480bbc82e4SYishai Hadas header.tag = cpu_to_le32(VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA);
5490bbc82e4SYishai Hadas page = virtiovf_get_migration_page(header_buf, 0);
5500bbc82e4SYishai Hadas if (!page)
5510bbc82e4SYishai Hadas return -EINVAL;
5520bbc82e4SYishai Hadas to_buff = kmap_local_page(page);
5530bbc82e4SYishai Hadas memcpy(to_buff, &header, sizeof(header));
5540bbc82e4SYishai Hadas kunmap_local(to_buff);
5550bbc82e4SYishai Hadas header_buf->length = sizeof(header);
5560bbc82e4SYishai Hadas header_buf->start_pos = header_buf->migf->max_pos;
5570bbc82e4SYishai Hadas migf->max_pos += header_buf->length;
5580bbc82e4SYishai Hadas spin_lock_irq(&migf->list_lock);
5590bbc82e4SYishai Hadas list_add_tail(&header_buf->buf_elm, &migf->buf_list);
5600bbc82e4SYishai Hadas spin_unlock_irq(&migf->list_lock);
5610bbc82e4SYishai Hadas return 0;
5620bbc82e4SYishai Hadas }
5630bbc82e4SYishai Hadas
5640bbc82e4SYishai Hadas static int
virtiovf_read_device_context_chunk(struct virtiovf_migration_file * migf,u32 ctx_size)5650bbc82e4SYishai Hadas virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
5660bbc82e4SYishai Hadas u32 ctx_size)
5670bbc82e4SYishai Hadas {
5680bbc82e4SYishai Hadas struct virtiovf_data_buffer *header_buf;
5690bbc82e4SYishai Hadas struct virtiovf_data_buffer *buf;
5700bbc82e4SYishai Hadas bool unmark_end = false;
5710bbc82e4SYishai Hadas struct scatterlist *sg;
5720bbc82e4SYishai Hadas unsigned int i;
5730bbc82e4SYishai Hadas u32 res_size;
5740bbc82e4SYishai Hadas int nent;
5750bbc82e4SYishai Hadas int ret;
5760bbc82e4SYishai Hadas
5776cea64b1SYishai Hadas buf = virtiovf_get_data_buffer(migf, ctx_size);
5780bbc82e4SYishai Hadas if (IS_ERR(buf))
5790bbc82e4SYishai Hadas return PTR_ERR(buf);
5800bbc82e4SYishai Hadas
5810bbc82e4SYishai Hadas /* Find the total count of SG entries which satisfies the size */
5820bbc82e4SYishai Hadas nent = sg_nents_for_len(buf->table.sgt.sgl, ctx_size);
5830bbc82e4SYishai Hadas if (nent <= 0) {
5840bbc82e4SYishai Hadas ret = -EINVAL;
5850bbc82e4SYishai Hadas goto out;
5860bbc82e4SYishai Hadas }
5870bbc82e4SYishai Hadas
5880bbc82e4SYishai Hadas /*
5890bbc82e4SYishai Hadas * Iterate to that SG entry and mark it as last (if it's not already)
5900bbc82e4SYishai Hadas * to let underlay layers iterate only till that entry.
5910bbc82e4SYishai Hadas */
5920bbc82e4SYishai Hadas for_each_sg(buf->table.sgt.sgl, sg, nent - 1, i)
5930bbc82e4SYishai Hadas ;
5940bbc82e4SYishai Hadas
5950bbc82e4SYishai Hadas if (!sg_is_last(sg)) {
5960bbc82e4SYishai Hadas unmark_end = true;
5970bbc82e4SYishai Hadas sg_mark_end(sg);
5980bbc82e4SYishai Hadas }
5990bbc82e4SYishai Hadas
6000bbc82e4SYishai Hadas ret = virtio_pci_admin_dev_parts_get(migf->virtvdev->core_device.pdev,
6010bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS,
6020bbc82e4SYishai Hadas migf->obj_id,
6030bbc82e4SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL,
6040bbc82e4SYishai Hadas buf->table.sgt.sgl, &res_size);
6050bbc82e4SYishai Hadas /* Restore the original SG mark end */
6060bbc82e4SYishai Hadas if (unmark_end)
6070bbc82e4SYishai Hadas sg_unmark_end(sg);
6080bbc82e4SYishai Hadas if (ret)
6090bbc82e4SYishai Hadas goto out;
6100bbc82e4SYishai Hadas
6110bbc82e4SYishai Hadas buf->length = res_size;
6126cea64b1SYishai Hadas header_buf = virtiovf_get_data_buffer(migf,
6130bbc82e4SYishai Hadas sizeof(struct virtiovf_migration_header));
6140bbc82e4SYishai Hadas if (IS_ERR(header_buf)) {
6150bbc82e4SYishai Hadas ret = PTR_ERR(header_buf);
6160bbc82e4SYishai Hadas goto out;
6170bbc82e4SYishai Hadas }
6180bbc82e4SYishai Hadas
6190bbc82e4SYishai Hadas ret = virtiovf_add_buf_header(header_buf, res_size);
6200bbc82e4SYishai Hadas if (ret)
6210bbc82e4SYishai Hadas goto out_header;
6220bbc82e4SYishai Hadas
6230bbc82e4SYishai Hadas buf->start_pos = buf->migf->max_pos;
6240bbc82e4SYishai Hadas migf->max_pos += buf->length;
6250bbc82e4SYishai Hadas spin_lock(&migf->list_lock);
6260bbc82e4SYishai Hadas list_add_tail(&buf->buf_elm, &migf->buf_list);
6270bbc82e4SYishai Hadas spin_unlock_irq(&migf->list_lock);
6280bbc82e4SYishai Hadas return 0;
6290bbc82e4SYishai Hadas
6300bbc82e4SYishai Hadas out_header:
6310bbc82e4SYishai Hadas virtiovf_put_data_buffer(header_buf);
6320bbc82e4SYishai Hadas out:
6330bbc82e4SYishai Hadas virtiovf_put_data_buffer(buf);
6340bbc82e4SYishai Hadas return ret;
6350bbc82e4SYishai Hadas }
6360bbc82e4SYishai Hadas
6376cea64b1SYishai Hadas static int
virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device * virtvdev)6386cea64b1SYishai Hadas virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev)
6396cea64b1SYishai Hadas {
6406cea64b1SYishai Hadas struct virtiovf_migration_file *migf = virtvdev->saving_migf;
6416cea64b1SYishai Hadas u32 ctx_size;
6426cea64b1SYishai Hadas int ret;
6436cea64b1SYishai Hadas
6446cea64b1SYishai Hadas if (migf->state == VIRTIOVF_MIGF_STATE_ERROR)
6456cea64b1SYishai Hadas return -ENODEV;
6466cea64b1SYishai Hadas
6476cea64b1SYishai Hadas ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
6486cea64b1SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
6496cea64b1SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
6506cea64b1SYishai Hadas &ctx_size);
6516cea64b1SYishai Hadas if (ret)
6526cea64b1SYishai Hadas goto err;
6536cea64b1SYishai Hadas
6546cea64b1SYishai Hadas if (!ctx_size) {
6556cea64b1SYishai Hadas ret = -EINVAL;
6566cea64b1SYishai Hadas goto err;
6576cea64b1SYishai Hadas }
6586cea64b1SYishai Hadas
6596cea64b1SYishai Hadas ret = virtiovf_read_device_context_chunk(migf, ctx_size);
6606cea64b1SYishai Hadas if (ret)
6616cea64b1SYishai Hadas goto err;
6626cea64b1SYishai Hadas
6636cea64b1SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
6646cea64b1SYishai Hadas return 0;
6656cea64b1SYishai Hadas
6666cea64b1SYishai Hadas err:
6676cea64b1SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_ERROR;
6686cea64b1SYishai Hadas return ret;
6696cea64b1SYishai Hadas }
6706cea64b1SYishai Hadas
6710bbc82e4SYishai Hadas static struct virtiovf_migration_file *
virtiovf_pci_save_device_data(struct virtiovf_pci_core_device * virtvdev,bool pre_copy)6726cea64b1SYishai Hadas virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev,
6736cea64b1SYishai Hadas bool pre_copy)
6740bbc82e4SYishai Hadas {
6750bbc82e4SYishai Hadas struct virtiovf_migration_file *migf;
6760bbc82e4SYishai Hadas u32 ctx_size;
6770bbc82e4SYishai Hadas u32 obj_id;
6780bbc82e4SYishai Hadas int ret;
6790bbc82e4SYishai Hadas
6800bbc82e4SYishai Hadas migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
6810bbc82e4SYishai Hadas if (!migf)
6820bbc82e4SYishai Hadas return ERR_PTR(-ENOMEM);
6830bbc82e4SYishai Hadas
6840bbc82e4SYishai Hadas migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_save_fops, migf,
6850bbc82e4SYishai Hadas O_RDONLY);
6860bbc82e4SYishai Hadas if (IS_ERR(migf->filp)) {
6870bbc82e4SYishai Hadas ret = PTR_ERR(migf->filp);
6880bbc82e4SYishai Hadas kfree(migf);
6890bbc82e4SYishai Hadas return ERR_PTR(ret);
6900bbc82e4SYishai Hadas }
6910bbc82e4SYishai Hadas
6920bbc82e4SYishai Hadas stream_open(migf->filp->f_inode, migf->filp);
6930bbc82e4SYishai Hadas mutex_init(&migf->lock);
6940bbc82e4SYishai Hadas INIT_LIST_HEAD(&migf->buf_list);
6950bbc82e4SYishai Hadas INIT_LIST_HEAD(&migf->avail_list);
6960bbc82e4SYishai Hadas spin_lock_init(&migf->list_lock);
6970bbc82e4SYishai Hadas migf->virtvdev = virtvdev;
6980bbc82e4SYishai Hadas
6990bbc82e4SYishai Hadas lockdep_assert_held(&virtvdev->state_mutex);
7000bbc82e4SYishai Hadas ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
7010bbc82e4SYishai Hadas &obj_id);
7020bbc82e4SYishai Hadas if (ret)
7030bbc82e4SYishai Hadas goto out;
7040bbc82e4SYishai Hadas
7050bbc82e4SYishai Hadas migf->obj_id = obj_id;
7060bbc82e4SYishai Hadas /* Mark as having a valid obj id which can be even 0 */
7070bbc82e4SYishai Hadas migf->has_obj_id = true;
7080bbc82e4SYishai Hadas ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
7090bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
7100bbc82e4SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
7110bbc82e4SYishai Hadas &ctx_size);
7120bbc82e4SYishai Hadas if (ret)
7130bbc82e4SYishai Hadas goto out_clean;
7140bbc82e4SYishai Hadas
7150bbc82e4SYishai Hadas if (!ctx_size) {
7160bbc82e4SYishai Hadas ret = -EINVAL;
7170bbc82e4SYishai Hadas goto out_clean;
7180bbc82e4SYishai Hadas }
7190bbc82e4SYishai Hadas
7200bbc82e4SYishai Hadas ret = virtiovf_read_device_context_chunk(migf, ctx_size);
7210bbc82e4SYishai Hadas if (ret)
7220bbc82e4SYishai Hadas goto out_clean;
7230bbc82e4SYishai Hadas
7246cea64b1SYishai Hadas if (pre_copy) {
7256cea64b1SYishai Hadas migf->pre_copy_initial_bytes = migf->max_pos;
7266cea64b1SYishai Hadas /* Arbitrarily set the pre-copy rate limit to 1-second intervals */
7276cea64b1SYishai Hadas ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1);
7286cea64b1SYishai Hadas /* Prevent any rate messages upon its usage */
7296cea64b1SYishai Hadas ratelimit_set_flags(&migf->pre_copy_rl_state,
7306cea64b1SYishai Hadas RATELIMIT_MSG_ON_RELEASE);
7316cea64b1SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_PRECOPY;
7326cea64b1SYishai Hadas } else {
7336cea64b1SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
7346cea64b1SYishai Hadas }
7356cea64b1SYishai Hadas
7360bbc82e4SYishai Hadas return migf;
7370bbc82e4SYishai Hadas
7380bbc82e4SYishai Hadas out_clean:
7390bbc82e4SYishai Hadas virtiovf_clean_migf_resources(migf);
7400bbc82e4SYishai Hadas out:
7410bbc82e4SYishai Hadas fput(migf->filp);
7420bbc82e4SYishai Hadas return ERR_PTR(ret);
7430bbc82e4SYishai Hadas }
7440bbc82e4SYishai Hadas
7450bbc82e4SYishai Hadas /*
7460bbc82e4SYishai Hadas * Set the required object header at the beginning of the buffer.
7470bbc82e4SYishai Hadas * The actual device parts data will be written post of the header offset.
7480bbc82e4SYishai Hadas */
virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer * vhca_buf)7490bbc82e4SYishai Hadas static int virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer *vhca_buf)
7500bbc82e4SYishai Hadas {
7510bbc82e4SYishai Hadas struct virtio_admin_cmd_resource_obj_cmd_hdr obj_hdr = {};
7520bbc82e4SYishai Hadas struct page *page;
7530bbc82e4SYishai Hadas u8 *to_buff;
7540bbc82e4SYishai Hadas
7550bbc82e4SYishai Hadas obj_hdr.type = cpu_to_le16(VIRTIO_RESOURCE_OBJ_DEV_PARTS);
7560bbc82e4SYishai Hadas obj_hdr.id = cpu_to_le32(vhca_buf->migf->obj_id);
7570bbc82e4SYishai Hadas page = virtiovf_get_migration_page(vhca_buf, 0);
7580bbc82e4SYishai Hadas if (!page)
7590bbc82e4SYishai Hadas return -EINVAL;
7600bbc82e4SYishai Hadas to_buff = kmap_local_page(page);
7610bbc82e4SYishai Hadas memcpy(to_buff, &obj_hdr, sizeof(obj_hdr));
7620bbc82e4SYishai Hadas kunmap_local(to_buff);
7630bbc82e4SYishai Hadas
7640bbc82e4SYishai Hadas /* Mark the buffer as including the header object data */
7650bbc82e4SYishai Hadas vhca_buf->include_header_object = 1;
7660bbc82e4SYishai Hadas return 0;
7670bbc82e4SYishai Hadas }
7680bbc82e4SYishai Hadas
7690bbc82e4SYishai Hadas static int
virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer * vhca_buf,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done)7700bbc82e4SYishai Hadas virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer *vhca_buf,
7710bbc82e4SYishai Hadas const char __user **buf, size_t *len,
7720bbc82e4SYishai Hadas loff_t *pos, ssize_t *done)
7730bbc82e4SYishai Hadas {
7740bbc82e4SYishai Hadas unsigned long offset;
7750bbc82e4SYishai Hadas size_t page_offset;
7760bbc82e4SYishai Hadas struct page *page;
7770bbc82e4SYishai Hadas size_t page_len;
7780bbc82e4SYishai Hadas u8 *to_buff;
7790bbc82e4SYishai Hadas int ret;
7800bbc82e4SYishai Hadas
7810bbc82e4SYishai Hadas offset = *pos - vhca_buf->start_pos;
7820bbc82e4SYishai Hadas
7830bbc82e4SYishai Hadas if (vhca_buf->include_header_object)
7840bbc82e4SYishai Hadas /* The buffer holds the object header, update the offset accordingly */
7850bbc82e4SYishai Hadas offset += sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
7860bbc82e4SYishai Hadas
7870bbc82e4SYishai Hadas page_offset = offset % PAGE_SIZE;
7880bbc82e4SYishai Hadas
7890bbc82e4SYishai Hadas page = virtiovf_get_migration_page(vhca_buf, offset - page_offset);
7900bbc82e4SYishai Hadas if (!page)
7910bbc82e4SYishai Hadas return -EINVAL;
7920bbc82e4SYishai Hadas
7930bbc82e4SYishai Hadas page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
7940bbc82e4SYishai Hadas to_buff = kmap_local_page(page);
7950bbc82e4SYishai Hadas ret = copy_from_user(to_buff + page_offset, *buf, page_len);
7960bbc82e4SYishai Hadas kunmap_local(to_buff);
7970bbc82e4SYishai Hadas if (ret)
7980bbc82e4SYishai Hadas return -EFAULT;
7990bbc82e4SYishai Hadas
8000bbc82e4SYishai Hadas *pos += page_len;
8010bbc82e4SYishai Hadas *done += page_len;
8020bbc82e4SYishai Hadas *buf += page_len;
8030bbc82e4SYishai Hadas *len -= page_len;
8040bbc82e4SYishai Hadas vhca_buf->length += page_len;
8050bbc82e4SYishai Hadas return 0;
8060bbc82e4SYishai Hadas }
8070bbc82e4SYishai Hadas
8080bbc82e4SYishai Hadas static ssize_t
virtiovf_resume_read_chunk(struct virtiovf_migration_file * migf,struct virtiovf_data_buffer * vhca_buf,size_t chunk_size,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done,bool * has_work)8090bbc82e4SYishai Hadas virtiovf_resume_read_chunk(struct virtiovf_migration_file *migf,
8100bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf,
8110bbc82e4SYishai Hadas size_t chunk_size, const char __user **buf,
8120bbc82e4SYishai Hadas size_t *len, loff_t *pos, ssize_t *done,
8130bbc82e4SYishai Hadas bool *has_work)
8140bbc82e4SYishai Hadas {
8150bbc82e4SYishai Hadas size_t copy_len, to_copy;
8160bbc82e4SYishai Hadas int ret;
8170bbc82e4SYishai Hadas
8180bbc82e4SYishai Hadas to_copy = min_t(size_t, *len, chunk_size - vhca_buf->length);
8190bbc82e4SYishai Hadas copy_len = to_copy;
8200bbc82e4SYishai Hadas while (to_copy) {
8210bbc82e4SYishai Hadas ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
8220bbc82e4SYishai Hadas pos, done);
8230bbc82e4SYishai Hadas if (ret)
8240bbc82e4SYishai Hadas return ret;
8250bbc82e4SYishai Hadas }
8260bbc82e4SYishai Hadas
8270bbc82e4SYishai Hadas *len -= copy_len;
8280bbc82e4SYishai Hadas if (vhca_buf->length == chunk_size) {
8290bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_LOAD_CHUNK;
8300bbc82e4SYishai Hadas migf->max_pos += chunk_size;
8310bbc82e4SYishai Hadas *has_work = true;
8320bbc82e4SYishai Hadas }
8330bbc82e4SYishai Hadas
8340bbc82e4SYishai Hadas return 0;
8350bbc82e4SYishai Hadas }
8360bbc82e4SYishai Hadas
8370bbc82e4SYishai Hadas static int
virtiovf_resume_read_header_data(struct virtiovf_migration_file * migf,struct virtiovf_data_buffer * vhca_buf,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done)8380bbc82e4SYishai Hadas virtiovf_resume_read_header_data(struct virtiovf_migration_file *migf,
8390bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf,
8400bbc82e4SYishai Hadas const char __user **buf, size_t *len,
8410bbc82e4SYishai Hadas loff_t *pos, ssize_t *done)
8420bbc82e4SYishai Hadas {
8430bbc82e4SYishai Hadas size_t copy_len, to_copy;
8440bbc82e4SYishai Hadas size_t required_data;
8450bbc82e4SYishai Hadas int ret;
8460bbc82e4SYishai Hadas
8470bbc82e4SYishai Hadas required_data = migf->record_size - vhca_buf->length;
8480bbc82e4SYishai Hadas to_copy = min_t(size_t, *len, required_data);
8490bbc82e4SYishai Hadas copy_len = to_copy;
8500bbc82e4SYishai Hadas while (to_copy) {
8510bbc82e4SYishai Hadas ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
8520bbc82e4SYishai Hadas pos, done);
8530bbc82e4SYishai Hadas if (ret)
8540bbc82e4SYishai Hadas return ret;
8550bbc82e4SYishai Hadas }
8560bbc82e4SYishai Hadas
8570bbc82e4SYishai Hadas *len -= copy_len;
8580bbc82e4SYishai Hadas if (vhca_buf->length == migf->record_size) {
8590bbc82e4SYishai Hadas switch (migf->record_tag) {
8600bbc82e4SYishai Hadas default:
8610bbc82e4SYishai Hadas /* Optional tag */
8620bbc82e4SYishai Hadas break;
8630bbc82e4SYishai Hadas }
8640bbc82e4SYishai Hadas
8650bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
8660bbc82e4SYishai Hadas migf->max_pos += migf->record_size;
8670bbc82e4SYishai Hadas vhca_buf->length = 0;
8680bbc82e4SYishai Hadas }
8690bbc82e4SYishai Hadas
8700bbc82e4SYishai Hadas return 0;
8710bbc82e4SYishai Hadas }
8720bbc82e4SYishai Hadas
8730bbc82e4SYishai Hadas static int
virtiovf_resume_read_header(struct virtiovf_migration_file * migf,struct virtiovf_data_buffer * vhca_buf,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done,bool * has_work)8740bbc82e4SYishai Hadas virtiovf_resume_read_header(struct virtiovf_migration_file *migf,
8750bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf,
8760bbc82e4SYishai Hadas const char __user **buf,
8770bbc82e4SYishai Hadas size_t *len, loff_t *pos,
8780bbc82e4SYishai Hadas ssize_t *done, bool *has_work)
8790bbc82e4SYishai Hadas {
8800bbc82e4SYishai Hadas struct page *page;
8810bbc82e4SYishai Hadas size_t copy_len;
8820bbc82e4SYishai Hadas u8 *to_buff;
8830bbc82e4SYishai Hadas int ret;
8840bbc82e4SYishai Hadas
8850bbc82e4SYishai Hadas copy_len = min_t(size_t, *len,
8860bbc82e4SYishai Hadas sizeof(struct virtiovf_migration_header) - vhca_buf->length);
8870bbc82e4SYishai Hadas page = virtiovf_get_migration_page(vhca_buf, 0);
8880bbc82e4SYishai Hadas if (!page)
8890bbc82e4SYishai Hadas return -EINVAL;
8900bbc82e4SYishai Hadas to_buff = kmap_local_page(page);
8910bbc82e4SYishai Hadas ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
8920bbc82e4SYishai Hadas if (ret) {
8930bbc82e4SYishai Hadas ret = -EFAULT;
8940bbc82e4SYishai Hadas goto end;
8950bbc82e4SYishai Hadas }
8960bbc82e4SYishai Hadas
8970bbc82e4SYishai Hadas *buf += copy_len;
8980bbc82e4SYishai Hadas *pos += copy_len;
8990bbc82e4SYishai Hadas *done += copy_len;
9000bbc82e4SYishai Hadas *len -= copy_len;
9010bbc82e4SYishai Hadas vhca_buf->length += copy_len;
9020bbc82e4SYishai Hadas if (vhca_buf->length == sizeof(struct virtiovf_migration_header)) {
9030bbc82e4SYishai Hadas u64 record_size;
9040bbc82e4SYishai Hadas u32 flags;
9050bbc82e4SYishai Hadas
9060bbc82e4SYishai Hadas record_size = le64_to_cpup((__le64 *)to_buff);
9070bbc82e4SYishai Hadas if (record_size > MAX_LOAD_SIZE) {
9080bbc82e4SYishai Hadas ret = -ENOMEM;
9090bbc82e4SYishai Hadas goto end;
9100bbc82e4SYishai Hadas }
9110bbc82e4SYishai Hadas
9120bbc82e4SYishai Hadas migf->record_size = record_size;
9130bbc82e4SYishai Hadas flags = le32_to_cpup((__le32 *)(to_buff +
9140bbc82e4SYishai Hadas offsetof(struct virtiovf_migration_header, flags)));
9150bbc82e4SYishai Hadas migf->record_tag = le32_to_cpup((__le32 *)(to_buff +
9160bbc82e4SYishai Hadas offsetof(struct virtiovf_migration_header, tag)));
9170bbc82e4SYishai Hadas switch (migf->record_tag) {
9180bbc82e4SYishai Hadas case VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA:
9190bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_PREP_CHUNK;
9200bbc82e4SYishai Hadas break;
9210bbc82e4SYishai Hadas default:
9220bbc82e4SYishai Hadas if (!(flags & VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) {
9230bbc82e4SYishai Hadas ret = -EOPNOTSUPP;
9240bbc82e4SYishai Hadas goto end;
9250bbc82e4SYishai Hadas }
9260bbc82e4SYishai Hadas /* We may read and skip this optional record data */
9270bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA;
9280bbc82e4SYishai Hadas }
9290bbc82e4SYishai Hadas
9300bbc82e4SYishai Hadas migf->max_pos += vhca_buf->length;
9310bbc82e4SYishai Hadas vhca_buf->length = 0;
9320bbc82e4SYishai Hadas *has_work = true;
9330bbc82e4SYishai Hadas }
9340bbc82e4SYishai Hadas end:
9350bbc82e4SYishai Hadas kunmap_local(to_buff);
9360bbc82e4SYishai Hadas return ret;
9370bbc82e4SYishai Hadas }
9380bbc82e4SYishai Hadas
virtiovf_resume_write(struct file * filp,const char __user * buf,size_t len,loff_t * pos)9390bbc82e4SYishai Hadas static ssize_t virtiovf_resume_write(struct file *filp, const char __user *buf,
9400bbc82e4SYishai Hadas size_t len, loff_t *pos)
9410bbc82e4SYishai Hadas {
9420bbc82e4SYishai Hadas struct virtiovf_migration_file *migf = filp->private_data;
9430bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf = migf->buf;
9440bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf_header = migf->buf_header;
9450bbc82e4SYishai Hadas unsigned int orig_length;
9460bbc82e4SYishai Hadas bool has_work = false;
9470bbc82e4SYishai Hadas ssize_t done = 0;
9480bbc82e4SYishai Hadas int ret = 0;
9490bbc82e4SYishai Hadas
9500bbc82e4SYishai Hadas if (pos)
9510bbc82e4SYishai Hadas return -ESPIPE;
9520bbc82e4SYishai Hadas
9530bbc82e4SYishai Hadas pos = &filp->f_pos;
9540bbc82e4SYishai Hadas if (*pos < vhca_buf->start_pos)
9550bbc82e4SYishai Hadas return -EINVAL;
9560bbc82e4SYishai Hadas
9570bbc82e4SYishai Hadas mutex_lock(&migf->virtvdev->state_mutex);
9580bbc82e4SYishai Hadas mutex_lock(&migf->lock);
9590bbc82e4SYishai Hadas if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
9600bbc82e4SYishai Hadas done = -ENODEV;
9610bbc82e4SYishai Hadas goto out_unlock;
9620bbc82e4SYishai Hadas }
9630bbc82e4SYishai Hadas
9640bbc82e4SYishai Hadas while (len || has_work) {
9650bbc82e4SYishai Hadas has_work = false;
9660bbc82e4SYishai Hadas switch (migf->load_state) {
9670bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_READ_HEADER:
9680bbc82e4SYishai Hadas ret = virtiovf_resume_read_header(migf, vhca_buf_header, &buf,
9690bbc82e4SYishai Hadas &len, pos, &done, &has_work);
9700bbc82e4SYishai Hadas if (ret)
9710bbc82e4SYishai Hadas goto out_unlock;
9720bbc82e4SYishai Hadas break;
9730bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA:
9740bbc82e4SYishai Hadas if (vhca_buf_header->allocated_length < migf->record_size) {
9750bbc82e4SYishai Hadas virtiovf_free_data_buffer(vhca_buf_header);
9760bbc82e4SYishai Hadas
9770bbc82e4SYishai Hadas migf->buf_header = virtiovf_alloc_data_buffer(migf,
9780bbc82e4SYishai Hadas migf->record_size);
9790bbc82e4SYishai Hadas if (IS_ERR(migf->buf_header)) {
9800bbc82e4SYishai Hadas ret = PTR_ERR(migf->buf_header);
9810bbc82e4SYishai Hadas migf->buf_header = NULL;
9820bbc82e4SYishai Hadas goto out_unlock;
9830bbc82e4SYishai Hadas }
9840bbc82e4SYishai Hadas
9850bbc82e4SYishai Hadas vhca_buf_header = migf->buf_header;
9860bbc82e4SYishai Hadas }
9870bbc82e4SYishai Hadas
9880bbc82e4SYishai Hadas vhca_buf_header->start_pos = migf->max_pos;
9890bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER_DATA;
9900bbc82e4SYishai Hadas break;
9910bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_READ_HEADER_DATA:
9920bbc82e4SYishai Hadas ret = virtiovf_resume_read_header_data(migf, vhca_buf_header,
9930bbc82e4SYishai Hadas &buf, &len, pos, &done);
9940bbc82e4SYishai Hadas if (ret)
9950bbc82e4SYishai Hadas goto out_unlock;
9960bbc82e4SYishai Hadas break;
9970bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_PREP_CHUNK:
9980bbc82e4SYishai Hadas {
9990bbc82e4SYishai Hadas u32 cmd_size = migf->record_size +
10000bbc82e4SYishai Hadas sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
10010bbc82e4SYishai Hadas
10020bbc82e4SYishai Hadas /*
10030bbc82e4SYishai Hadas * The DMA map/unmap is managed in virtio layer, we just need to extend
10040bbc82e4SYishai Hadas * the SG pages to hold the extra required chunk data.
10050bbc82e4SYishai Hadas */
10060bbc82e4SYishai Hadas if (vhca_buf->allocated_length < cmd_size) {
10070bbc82e4SYishai Hadas ret = virtiovf_add_migration_pages(vhca_buf,
10080bbc82e4SYishai Hadas DIV_ROUND_UP_ULL(cmd_size - vhca_buf->allocated_length,
10090bbc82e4SYishai Hadas PAGE_SIZE));
10100bbc82e4SYishai Hadas if (ret)
10110bbc82e4SYishai Hadas goto out_unlock;
10120bbc82e4SYishai Hadas }
10130bbc82e4SYishai Hadas
10140bbc82e4SYishai Hadas vhca_buf->start_pos = migf->max_pos;
10150bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_CHUNK;
10160bbc82e4SYishai Hadas break;
10170bbc82e4SYishai Hadas }
10180bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_READ_CHUNK:
10190bbc82e4SYishai Hadas ret = virtiovf_resume_read_chunk(migf, vhca_buf, migf->record_size,
10200bbc82e4SYishai Hadas &buf, &len, pos, &done, &has_work);
10210bbc82e4SYishai Hadas if (ret)
10220bbc82e4SYishai Hadas goto out_unlock;
10230bbc82e4SYishai Hadas break;
10240bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_LOAD_CHUNK:
10250bbc82e4SYishai Hadas /* Mark the last SG entry and set its length */
10260bbc82e4SYishai Hadas sg_mark_end(vhca_buf->last_offset_sg);
10270bbc82e4SYishai Hadas orig_length = vhca_buf->last_offset_sg->length;
10280bbc82e4SYishai Hadas /* Length should include the resource object command header */
10290bbc82e4SYishai Hadas vhca_buf->last_offset_sg->length = vhca_buf->length +
10300bbc82e4SYishai Hadas sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr) -
10310bbc82e4SYishai Hadas vhca_buf->last_offset;
10320bbc82e4SYishai Hadas ret = virtio_pci_admin_dev_parts_set(migf->virtvdev->core_device.pdev,
10330bbc82e4SYishai Hadas vhca_buf->table.sgt.sgl);
10340bbc82e4SYishai Hadas /* Restore the original SG data */
10350bbc82e4SYishai Hadas vhca_buf->last_offset_sg->length = orig_length;
10360bbc82e4SYishai Hadas sg_unmark_end(vhca_buf->last_offset_sg);
10370bbc82e4SYishai Hadas if (ret)
10380bbc82e4SYishai Hadas goto out_unlock;
10390bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
10400bbc82e4SYishai Hadas /* be ready for reading the next chunk */
10410bbc82e4SYishai Hadas vhca_buf->length = 0;
10420bbc82e4SYishai Hadas break;
10430bbc82e4SYishai Hadas default:
10440bbc82e4SYishai Hadas break;
10450bbc82e4SYishai Hadas }
10460bbc82e4SYishai Hadas }
10470bbc82e4SYishai Hadas
10480bbc82e4SYishai Hadas out_unlock:
10490bbc82e4SYishai Hadas if (ret)
10500bbc82e4SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_ERROR;
10510bbc82e4SYishai Hadas mutex_unlock(&migf->lock);
10520bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(migf->virtvdev);
10530bbc82e4SYishai Hadas return ret ? ret : done;
10540bbc82e4SYishai Hadas }
10550bbc82e4SYishai Hadas
10560bbc82e4SYishai Hadas static const struct file_operations virtiovf_resume_fops = {
10570bbc82e4SYishai Hadas .owner = THIS_MODULE,
10580bbc82e4SYishai Hadas .write = virtiovf_resume_write,
10590bbc82e4SYishai Hadas .release = virtiovf_release_file,
10600bbc82e4SYishai Hadas };
10610bbc82e4SYishai Hadas
10620bbc82e4SYishai Hadas static struct virtiovf_migration_file *
virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device * virtvdev)10630bbc82e4SYishai Hadas virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device *virtvdev)
10640bbc82e4SYishai Hadas {
10650bbc82e4SYishai Hadas struct virtiovf_migration_file *migf;
10660bbc82e4SYishai Hadas struct virtiovf_data_buffer *buf;
10670bbc82e4SYishai Hadas u32 obj_id;
10680bbc82e4SYishai Hadas int ret;
10690bbc82e4SYishai Hadas
10700bbc82e4SYishai Hadas migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
10710bbc82e4SYishai Hadas if (!migf)
10720bbc82e4SYishai Hadas return ERR_PTR(-ENOMEM);
10730bbc82e4SYishai Hadas
10740bbc82e4SYishai Hadas migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_resume_fops, migf,
10750bbc82e4SYishai Hadas O_WRONLY);
10760bbc82e4SYishai Hadas if (IS_ERR(migf->filp)) {
10770bbc82e4SYishai Hadas ret = PTR_ERR(migf->filp);
10780bbc82e4SYishai Hadas kfree(migf);
10790bbc82e4SYishai Hadas return ERR_PTR(ret);
10800bbc82e4SYishai Hadas }
10810bbc82e4SYishai Hadas
10820bbc82e4SYishai Hadas stream_open(migf->filp->f_inode, migf->filp);
10830bbc82e4SYishai Hadas mutex_init(&migf->lock);
10840bbc82e4SYishai Hadas INIT_LIST_HEAD(&migf->buf_list);
10850bbc82e4SYishai Hadas INIT_LIST_HEAD(&migf->avail_list);
10860bbc82e4SYishai Hadas spin_lock_init(&migf->list_lock);
10870bbc82e4SYishai Hadas
10880bbc82e4SYishai Hadas buf = virtiovf_alloc_data_buffer(migf, VIRTIOVF_TARGET_INITIAL_BUF_SIZE);
10890bbc82e4SYishai Hadas if (IS_ERR(buf)) {
10900bbc82e4SYishai Hadas ret = PTR_ERR(buf);
10910bbc82e4SYishai Hadas goto out;
10920bbc82e4SYishai Hadas }
10930bbc82e4SYishai Hadas
10940bbc82e4SYishai Hadas migf->buf = buf;
10950bbc82e4SYishai Hadas
10960bbc82e4SYishai Hadas buf = virtiovf_alloc_data_buffer(migf,
10970bbc82e4SYishai Hadas sizeof(struct virtiovf_migration_header));
10980bbc82e4SYishai Hadas if (IS_ERR(buf)) {
10990bbc82e4SYishai Hadas ret = PTR_ERR(buf);
11000bbc82e4SYishai Hadas goto out_clean;
11010bbc82e4SYishai Hadas }
11020bbc82e4SYishai Hadas
11030bbc82e4SYishai Hadas migf->buf_header = buf;
11040bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
11050bbc82e4SYishai Hadas
11060bbc82e4SYishai Hadas migf->virtvdev = virtvdev;
11070bbc82e4SYishai Hadas ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET,
11080bbc82e4SYishai Hadas &obj_id);
11090bbc82e4SYishai Hadas if (ret)
11100bbc82e4SYishai Hadas goto out_clean;
11110bbc82e4SYishai Hadas
11120bbc82e4SYishai Hadas migf->obj_id = obj_id;
11130bbc82e4SYishai Hadas /* Mark as having a valid obj id which can be even 0 */
11140bbc82e4SYishai Hadas migf->has_obj_id = true;
11150bbc82e4SYishai Hadas ret = virtiovf_set_obj_cmd_header(migf->buf);
11160bbc82e4SYishai Hadas if (ret)
11170bbc82e4SYishai Hadas goto out_clean;
11180bbc82e4SYishai Hadas
11190bbc82e4SYishai Hadas return migf;
11200bbc82e4SYishai Hadas
11210bbc82e4SYishai Hadas out_clean:
11220bbc82e4SYishai Hadas virtiovf_clean_migf_resources(migf);
11230bbc82e4SYishai Hadas out:
11240bbc82e4SYishai Hadas fput(migf->filp);
11250bbc82e4SYishai Hadas return ERR_PTR(ret);
11260bbc82e4SYishai Hadas }
11270bbc82e4SYishai Hadas
11280bbc82e4SYishai Hadas static struct file *
virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device * virtvdev,u32 new)11290bbc82e4SYishai Hadas virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
11300bbc82e4SYishai Hadas u32 new)
11310bbc82e4SYishai Hadas {
11320bbc82e4SYishai Hadas u32 cur = virtvdev->mig_state;
11330bbc82e4SYishai Hadas int ret;
11340bbc82e4SYishai Hadas
11350bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
11360bbc82e4SYishai Hadas /* NOP */
11370bbc82e4SYishai Hadas return NULL;
11380bbc82e4SYishai Hadas }
11390bbc82e4SYishai Hadas
11400bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
11410bbc82e4SYishai Hadas /* NOP */
11420bbc82e4SYishai Hadas return NULL;
11430bbc82e4SYishai Hadas }
11440bbc82e4SYishai Hadas
11456cea64b1SYishai Hadas if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
11466cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
11470bbc82e4SYishai Hadas ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev,
11480bbc82e4SYishai Hadas BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED));
11490bbc82e4SYishai Hadas if (ret)
11500bbc82e4SYishai Hadas return ERR_PTR(ret);
11510bbc82e4SYishai Hadas return NULL;
11520bbc82e4SYishai Hadas }
11530bbc82e4SYishai Hadas
11546cea64b1SYishai Hadas if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
11556cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
11560bbc82e4SYishai Hadas ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0);
11570bbc82e4SYishai Hadas if (ret)
11580bbc82e4SYishai Hadas return ERR_PTR(ret);
11590bbc82e4SYishai Hadas return NULL;
11600bbc82e4SYishai Hadas }
11610bbc82e4SYishai Hadas
11620bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
11630bbc82e4SYishai Hadas struct virtiovf_migration_file *migf;
11640bbc82e4SYishai Hadas
11656cea64b1SYishai Hadas migf = virtiovf_pci_save_device_data(virtvdev, false);
11660bbc82e4SYishai Hadas if (IS_ERR(migf))
11670bbc82e4SYishai Hadas return ERR_CAST(migf);
11680bbc82e4SYishai Hadas get_file(migf->filp);
11690bbc82e4SYishai Hadas virtvdev->saving_migf = migf;
11700bbc82e4SYishai Hadas return migf->filp;
11710bbc82e4SYishai Hadas }
11720bbc82e4SYishai Hadas
11736cea64b1SYishai Hadas if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
11746cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
11756cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
11760bbc82e4SYishai Hadas virtiovf_disable_fds(virtvdev);
11770bbc82e4SYishai Hadas return NULL;
11780bbc82e4SYishai Hadas }
11790bbc82e4SYishai Hadas
11800bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
11810bbc82e4SYishai Hadas struct virtiovf_migration_file *migf;
11820bbc82e4SYishai Hadas
11830bbc82e4SYishai Hadas migf = virtiovf_pci_resume_device_data(virtvdev);
11840bbc82e4SYishai Hadas if (IS_ERR(migf))
11850bbc82e4SYishai Hadas return ERR_CAST(migf);
11860bbc82e4SYishai Hadas get_file(migf->filp);
11870bbc82e4SYishai Hadas virtvdev->resuming_migf = migf;
11880bbc82e4SYishai Hadas return migf->filp;
11890bbc82e4SYishai Hadas }
11900bbc82e4SYishai Hadas
11910bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
11920bbc82e4SYishai Hadas virtiovf_disable_fds(virtvdev);
11930bbc82e4SYishai Hadas return NULL;
11940bbc82e4SYishai Hadas }
11950bbc82e4SYishai Hadas
11966cea64b1SYishai Hadas if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
11976cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
11986cea64b1SYishai Hadas new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
11996cea64b1SYishai Hadas struct virtiovf_migration_file *migf;
12006cea64b1SYishai Hadas
12016cea64b1SYishai Hadas migf = virtiovf_pci_save_device_data(virtvdev, true);
12026cea64b1SYishai Hadas if (IS_ERR(migf))
12036cea64b1SYishai Hadas return ERR_CAST(migf);
12046cea64b1SYishai Hadas get_file(migf->filp);
12056cea64b1SYishai Hadas virtvdev->saving_migf = migf;
12066cea64b1SYishai Hadas return migf->filp;
12076cea64b1SYishai Hadas }
12086cea64b1SYishai Hadas
12096cea64b1SYishai Hadas if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
12106cea64b1SYishai Hadas ret = virtiovf_pci_save_device_final_data(virtvdev);
12116cea64b1SYishai Hadas return ret ? ERR_PTR(ret) : NULL;
12126cea64b1SYishai Hadas }
12136cea64b1SYishai Hadas
12140bbc82e4SYishai Hadas /*
12150bbc82e4SYishai Hadas * vfio_mig_get_next_state() does not use arcs other than the above
12160bbc82e4SYishai Hadas */
12170bbc82e4SYishai Hadas WARN_ON(true);
12180bbc82e4SYishai Hadas return ERR_PTR(-EINVAL);
12190bbc82e4SYishai Hadas }
12200bbc82e4SYishai Hadas
12210bbc82e4SYishai Hadas static struct file *
virtiovf_pci_set_device_state(struct vfio_device * vdev,enum vfio_device_mig_state new_state)12220bbc82e4SYishai Hadas virtiovf_pci_set_device_state(struct vfio_device *vdev,
12230bbc82e4SYishai Hadas enum vfio_device_mig_state new_state)
12240bbc82e4SYishai Hadas {
12250bbc82e4SYishai Hadas struct virtiovf_pci_core_device *virtvdev = container_of(
12260bbc82e4SYishai Hadas vdev, struct virtiovf_pci_core_device, core_device.vdev);
12270bbc82e4SYishai Hadas enum vfio_device_mig_state next_state;
12280bbc82e4SYishai Hadas struct file *res = NULL;
12290bbc82e4SYishai Hadas int ret;
12300bbc82e4SYishai Hadas
12310bbc82e4SYishai Hadas mutex_lock(&virtvdev->state_mutex);
12320bbc82e4SYishai Hadas while (new_state != virtvdev->mig_state) {
12330bbc82e4SYishai Hadas ret = vfio_mig_get_next_state(vdev, virtvdev->mig_state,
12340bbc82e4SYishai Hadas new_state, &next_state);
12350bbc82e4SYishai Hadas if (ret) {
12360bbc82e4SYishai Hadas res = ERR_PTR(ret);
12370bbc82e4SYishai Hadas break;
12380bbc82e4SYishai Hadas }
12390bbc82e4SYishai Hadas res = virtiovf_pci_step_device_state_locked(virtvdev, next_state);
12400bbc82e4SYishai Hadas if (IS_ERR(res))
12410bbc82e4SYishai Hadas break;
12420bbc82e4SYishai Hadas virtvdev->mig_state = next_state;
12430bbc82e4SYishai Hadas if (WARN_ON(res && new_state != virtvdev->mig_state)) {
12440bbc82e4SYishai Hadas fput(res);
12450bbc82e4SYishai Hadas res = ERR_PTR(-EINVAL);
12460bbc82e4SYishai Hadas break;
12470bbc82e4SYishai Hadas }
12480bbc82e4SYishai Hadas }
12490bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(virtvdev);
12500bbc82e4SYishai Hadas return res;
12510bbc82e4SYishai Hadas }
12520bbc82e4SYishai Hadas
virtiovf_pci_get_device_state(struct vfio_device * vdev,enum vfio_device_mig_state * curr_state)12530bbc82e4SYishai Hadas static int virtiovf_pci_get_device_state(struct vfio_device *vdev,
12540bbc82e4SYishai Hadas enum vfio_device_mig_state *curr_state)
12550bbc82e4SYishai Hadas {
12560bbc82e4SYishai Hadas struct virtiovf_pci_core_device *virtvdev = container_of(
12570bbc82e4SYishai Hadas vdev, struct virtiovf_pci_core_device, core_device.vdev);
12580bbc82e4SYishai Hadas
12590bbc82e4SYishai Hadas mutex_lock(&virtvdev->state_mutex);
12600bbc82e4SYishai Hadas *curr_state = virtvdev->mig_state;
12610bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(virtvdev);
12620bbc82e4SYishai Hadas return 0;
12630bbc82e4SYishai Hadas }
12640bbc82e4SYishai Hadas
virtiovf_pci_get_data_size(struct vfio_device * vdev,unsigned long * stop_copy_length)12650bbc82e4SYishai Hadas static int virtiovf_pci_get_data_size(struct vfio_device *vdev,
12660bbc82e4SYishai Hadas unsigned long *stop_copy_length)
12670bbc82e4SYishai Hadas {
12680bbc82e4SYishai Hadas struct virtiovf_pci_core_device *virtvdev = container_of(
12690bbc82e4SYishai Hadas vdev, struct virtiovf_pci_core_device, core_device.vdev);
12700bbc82e4SYishai Hadas bool obj_id_exists;
12710bbc82e4SYishai Hadas u32 res_size;
12720bbc82e4SYishai Hadas u32 obj_id;
12730bbc82e4SYishai Hadas int ret;
12740bbc82e4SYishai Hadas
12750bbc82e4SYishai Hadas mutex_lock(&virtvdev->state_mutex);
12760bbc82e4SYishai Hadas obj_id_exists = virtvdev->saving_migf && virtvdev->saving_migf->has_obj_id;
12770bbc82e4SYishai Hadas if (!obj_id_exists) {
12780bbc82e4SYishai Hadas ret = virtiovf_pci_alloc_obj_id(virtvdev,
12790bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
12800bbc82e4SYishai Hadas &obj_id);
12810bbc82e4SYishai Hadas if (ret)
12820bbc82e4SYishai Hadas goto end;
12830bbc82e4SYishai Hadas } else {
12840bbc82e4SYishai Hadas obj_id = virtvdev->saving_migf->obj_id;
12850bbc82e4SYishai Hadas }
12860bbc82e4SYishai Hadas
12870bbc82e4SYishai Hadas ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
12880bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
12890bbc82e4SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
12900bbc82e4SYishai Hadas &res_size);
12910bbc82e4SYishai Hadas if (!ret)
12920bbc82e4SYishai Hadas *stop_copy_length = res_size;
12930bbc82e4SYishai Hadas
12940bbc82e4SYishai Hadas /*
12950bbc82e4SYishai Hadas * We can't leave this obj_id alive if didn't exist before, otherwise, it might
12960bbc82e4SYishai Hadas * stay alive, even without an active migration flow (e.g. migration was cancelled)
12970bbc82e4SYishai Hadas */
12980bbc82e4SYishai Hadas if (!obj_id_exists)
12990bbc82e4SYishai Hadas virtiovf_pci_free_obj_id(virtvdev, obj_id);
13000bbc82e4SYishai Hadas end:
13010bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(virtvdev);
13020bbc82e4SYishai Hadas return ret;
13030bbc82e4SYishai Hadas }
13040bbc82e4SYishai Hadas
13050bbc82e4SYishai Hadas static const struct vfio_migration_ops virtvdev_pci_mig_ops = {
13060bbc82e4SYishai Hadas .migration_set_state = virtiovf_pci_set_device_state,
13070bbc82e4SYishai Hadas .migration_get_state = virtiovf_pci_get_device_state,
13080bbc82e4SYishai Hadas .migration_get_data_size = virtiovf_pci_get_data_size,
13090bbc82e4SYishai Hadas };
13100bbc82e4SYishai Hadas
virtiovf_set_migratable(struct virtiovf_pci_core_device * virtvdev)13110bbc82e4SYishai Hadas void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev)
13120bbc82e4SYishai Hadas {
13130bbc82e4SYishai Hadas virtvdev->migrate_cap = 1;
13140bbc82e4SYishai Hadas mutex_init(&virtvdev->state_mutex);
13150bbc82e4SYishai Hadas spin_lock_init(&virtvdev->reset_lock);
13160bbc82e4SYishai Hadas virtvdev->core_device.vdev.migration_flags =
13170bbc82e4SYishai Hadas VFIO_MIGRATION_STOP_COPY |
13186cea64b1SYishai Hadas VFIO_MIGRATION_P2P |
13196cea64b1SYishai Hadas VFIO_MIGRATION_PRE_COPY;
13200bbc82e4SYishai Hadas virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops;
13210bbc82e4SYishai Hadas }
13220bbc82e4SYishai Hadas
virtiovf_open_migration(struct virtiovf_pci_core_device * virtvdev)13230bbc82e4SYishai Hadas void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev)
13240bbc82e4SYishai Hadas {
13250bbc82e4SYishai Hadas if (!virtvdev->migrate_cap)
13260bbc82e4SYishai Hadas return;
13270bbc82e4SYishai Hadas
13280bbc82e4SYishai Hadas virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
13290bbc82e4SYishai Hadas }
13300bbc82e4SYishai Hadas
virtiovf_close_migration(struct virtiovf_pci_core_device * virtvdev)13310bbc82e4SYishai Hadas void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev)
13320bbc82e4SYishai Hadas {
13330bbc82e4SYishai Hadas if (!virtvdev->migrate_cap)
13340bbc82e4SYishai Hadas return;
13350bbc82e4SYishai Hadas
13360bbc82e4SYishai Hadas virtiovf_disable_fds(virtvdev);
13370bbc82e4SYishai Hadas }
1338