1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright (c) 2025, Google LLC.
5 * Pasha Tatashin <pasha.tatashin@soleen.com>
6 */
7
8 /**
9 * DOC: Live Update Orchestrator (LUO)
10 *
11 * Live Update is a specialized, kexec-based reboot process that allows a
12 * running kernel to be updated from one version to another while preserving
13 * the state of selected resources and keeping designated hardware devices
14 * operational. For these devices, DMA activity may continue throughout the
15 * kernel transition.
16 *
17 * While the primary use case driving this work is supporting live updates of
18 * the Linux kernel when it is used as a hypervisor in cloud environments, the
19 * LUO framework itself is designed to be workload-agnostic. Live Update
20 * facilitates a full kernel version upgrade for any type of system.
21 *
22 * For example, a non-hypervisor system running an in-memory cache like
23 * memcached with many gigabytes of data can use LUO. The userspace service
24 * can place its cache into a memfd, have its state preserved by LUO, and
25 * restore it immediately after the kernel kexec.
26 *
27 * Whether the system is running virtual machines, containers, a
28 * high-performance database, or networking services, LUO's primary goal is to
29 * enable a full kernel update by preserving critical userspace state and
30 * keeping essential devices operational.
31 *
32 * The core of LUO is a mechanism that tracks the progress of a live update,
33 * along with a callback API that allows other kernel subsystems to participate
34 * in the process. Example subsystems that can hook into LUO include: kvm,
35 * iommu, interrupts, vfio, participating filesystems, and memory management.
36 *
37 * LUO uses Kexec Handover to transfer memory state from the current kernel to
38 * the next kernel. For more details see Documentation/core-api/kho/index.rst.
39 */
40
41 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42
43 #include <linux/atomic.h>
44 #include <linux/errno.h>
45 #include <linux/file.h>
46 #include <linux/fs.h>
47 #include <linux/init.h>
48 #include <linux/io.h>
49 #include <linux/kernel.h>
50 #include <linux/kexec_handover.h>
51 #include <linux/kho/abi/luo.h>
52 #include <linux/kobject.h>
53 #include <linux/libfdt.h>
54 #include <linux/liveupdate.h>
55 #include <linux/miscdevice.h>
56 #include <linux/mm.h>
57 #include <linux/sizes.h>
58 #include <linux/string.h>
59 #include <linux/unaligned.h>
60
61 #include "kexec_handover_internal.h"
62 #include "luo_internal.h"
63
64 static struct {
65 bool enabled;
66 void *fdt_out;
67 void *fdt_in;
68 u64 liveupdate_num;
69 } luo_global;
70
early_liveupdate_param(char * buf)71 static int __init early_liveupdate_param(char *buf)
72 {
73 return kstrtobool(buf, &luo_global.enabled);
74 }
75 early_param("liveupdate", early_liveupdate_param);
76
luo_early_startup(void)77 static int __init luo_early_startup(void)
78 {
79 phys_addr_t fdt_phys;
80 int err, ln_size;
81 const void *ptr;
82
83 if (!kho_is_enabled()) {
84 if (liveupdate_enabled())
85 pr_warn("Disabling liveupdate because KHO is disabled\n");
86 luo_global.enabled = false;
87 return 0;
88 }
89
90 /* Retrieve LUO subtree, and verify its format. */
91 err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys);
92 if (err) {
93 if (err != -ENOENT) {
94 pr_err("failed to retrieve FDT '%s' from KHO: %pe\n",
95 LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err));
96 return err;
97 }
98
99 return 0;
100 }
101
102 luo_global.fdt_in = phys_to_virt(fdt_phys);
103 err = fdt_node_check_compatible(luo_global.fdt_in, 0,
104 LUO_FDT_COMPATIBLE);
105 if (err) {
106 pr_err("FDT '%s' is incompatible with '%s' [%d]\n",
107 LUO_FDT_KHO_ENTRY_NAME, LUO_FDT_COMPATIBLE, err);
108
109 return -EINVAL;
110 }
111
112 ln_size = 0;
113 ptr = fdt_getprop(luo_global.fdt_in, 0, LUO_FDT_LIVEUPDATE_NUM,
114 &ln_size);
115 if (!ptr || ln_size != sizeof(luo_global.liveupdate_num)) {
116 pr_err("Unable to get live update number '%s' [%d]\n",
117 LUO_FDT_LIVEUPDATE_NUM, ln_size);
118
119 return -EINVAL;
120 }
121
122 luo_global.liveupdate_num = get_unaligned((u64 *)ptr);
123 pr_info("Retrieved live update data, liveupdate number: %lld\n",
124 luo_global.liveupdate_num);
125
126 err = luo_session_setup_incoming(luo_global.fdt_in);
127 if (err)
128 return err;
129
130 err = luo_flb_setup_incoming(luo_global.fdt_in);
131
132 return err;
133 }
134
liveupdate_early_init(void)135 static int __init liveupdate_early_init(void)
136 {
137 int err;
138
139 err = luo_early_startup();
140 if (err) {
141 luo_global.enabled = false;
142 luo_restore_fail("The incoming tree failed to initialize properly [%pe], disabling live update\n",
143 ERR_PTR(err));
144 }
145
146 return err;
147 }
148 early_initcall(liveupdate_early_init);
149
150 /* Called during boot to create outgoing LUO fdt tree */
luo_fdt_setup(void)151 static int __init luo_fdt_setup(void)
152 {
153 const u64 ln = luo_global.liveupdate_num + 1;
154 void *fdt_out;
155 int err;
156
157 fdt_out = kho_alloc_preserve(LUO_FDT_SIZE);
158 if (IS_ERR(fdt_out)) {
159 pr_err("failed to allocate/preserve FDT memory\n");
160 return PTR_ERR(fdt_out);
161 }
162
163 err = fdt_create(fdt_out, LUO_FDT_SIZE);
164 err |= fdt_finish_reservemap(fdt_out);
165 err |= fdt_begin_node(fdt_out, "");
166 err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE);
167 err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln));
168 err |= luo_session_setup_outgoing(fdt_out);
169 err |= luo_flb_setup_outgoing(fdt_out);
170 err |= fdt_end_node(fdt_out);
171 err |= fdt_finish(fdt_out);
172 if (err)
173 goto exit_free;
174
175 err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out);
176 if (err)
177 goto exit_free;
178 luo_global.fdt_out = fdt_out;
179
180 return 0;
181
182 exit_free:
183 kho_unpreserve_free(fdt_out);
184 pr_err("failed to prepare LUO FDT: %d\n", err);
185
186 return err;
187 }
188
189 /*
190 * late initcall because it initializes the outgoing tree that is needed only
191 * once userspace starts using /dev/liveupdate.
192 */
luo_late_startup(void)193 static int __init luo_late_startup(void)
194 {
195 int err;
196
197 if (!liveupdate_enabled())
198 return 0;
199
200 err = luo_fdt_setup();
201 if (err)
202 luo_global.enabled = false;
203
204 return err;
205 }
206 late_initcall(luo_late_startup);
207
208 /* Public Functions */
209
210 /**
211 * liveupdate_reboot() - Kernel reboot notifier for live update final
212 * serialization.
213 *
214 * This function is invoked directly from the reboot() syscall pathway
215 * if kexec is in progress.
216 *
217 * If any callback fails, this function aborts KHO, undoes the freeze()
218 * callbacks, and returns an error.
219 */
liveupdate_reboot(void)220 int liveupdate_reboot(void)
221 {
222 int err;
223
224 if (!liveupdate_enabled())
225 return 0;
226
227 err = luo_session_serialize();
228 if (err)
229 return err;
230
231 luo_flb_serialize();
232
233 err = kho_finalize();
234 if (err) {
235 pr_err("kho_finalize failed %d\n", err);
236 /*
237 * kho_finalize() may return libfdt errors, to aboid passing to
238 * userspace unknown errors, change this to EAGAIN.
239 */
240 err = -EAGAIN;
241 }
242
243 return err;
244 }
245
246 /**
247 * liveupdate_enabled - Check if the live update feature is enabled.
248 *
249 * This function returns the state of the live update feature flag, which
250 * can be controlled via the ``liveupdate`` kernel command-line parameter.
251 *
252 * @return true if live update is enabled, false otherwise.
253 */
liveupdate_enabled(void)254 bool liveupdate_enabled(void)
255 {
256 return luo_global.enabled;
257 }
258
259 /**
260 * DOC: LUO ioctl Interface
261 *
262 * The IOCTL user-space control interface for the LUO subsystem.
263 * It registers a character device, typically found at ``/dev/liveupdate``,
264 * which allows a userspace agent to manage the LUO state machine and its
265 * associated resources, such as preservable file descriptors.
266 *
267 * To ensure that the state machine is controlled by a single entity, access
268 * to this device is exclusive: only one process is permitted to have
269 * ``/dev/liveupdate`` open at any given time. Subsequent open attempts will
270 * fail with -EBUSY until the first process closes its file descriptor.
271 * This singleton model simplifies state management by preventing conflicting
272 * commands from multiple userspace agents.
273 */
274
275 struct luo_device_state {
276 struct miscdevice miscdev;
277 atomic_t in_use;
278 };
279
luo_ioctl_create_session(struct luo_ucmd * ucmd)280 static int luo_ioctl_create_session(struct luo_ucmd *ucmd)
281 {
282 struct liveupdate_ioctl_create_session *argp = ucmd->cmd;
283 struct file *file;
284 int err;
285
286 argp->fd = get_unused_fd_flags(O_CLOEXEC);
287 if (argp->fd < 0)
288 return argp->fd;
289
290 err = luo_session_create(argp->name, &file);
291 if (err)
292 goto err_put_fd;
293
294 err = luo_ucmd_respond(ucmd, sizeof(*argp));
295 if (err)
296 goto err_put_file;
297
298 fd_install(argp->fd, file);
299
300 return 0;
301
302 err_put_file:
303 fput(file);
304 err_put_fd:
305 put_unused_fd(argp->fd);
306
307 return err;
308 }
309
luo_ioctl_retrieve_session(struct luo_ucmd * ucmd)310 static int luo_ioctl_retrieve_session(struct luo_ucmd *ucmd)
311 {
312 struct liveupdate_ioctl_retrieve_session *argp = ucmd->cmd;
313 struct file *file;
314 int err;
315
316 argp->fd = get_unused_fd_flags(O_CLOEXEC);
317 if (argp->fd < 0)
318 return argp->fd;
319
320 err = luo_session_retrieve(argp->name, &file);
321 if (err < 0)
322 goto err_put_fd;
323
324 err = luo_ucmd_respond(ucmd, sizeof(*argp));
325 if (err)
326 goto err_put_file;
327
328 fd_install(argp->fd, file);
329
330 return 0;
331
332 err_put_file:
333 fput(file);
334 err_put_fd:
335 put_unused_fd(argp->fd);
336
337 return err;
338 }
339
luo_open(struct inode * inodep,struct file * filep)340 static int luo_open(struct inode *inodep, struct file *filep)
341 {
342 struct luo_device_state *ldev = container_of(filep->private_data,
343 struct luo_device_state,
344 miscdev);
345
346 if (atomic_cmpxchg(&ldev->in_use, 0, 1))
347 return -EBUSY;
348
349 /* Always return -EIO to user if deserialization fail */
350 if (luo_session_deserialize()) {
351 atomic_set(&ldev->in_use, 0);
352 return -EIO;
353 }
354
355 return 0;
356 }
357
luo_release(struct inode * inodep,struct file * filep)358 static int luo_release(struct inode *inodep, struct file *filep)
359 {
360 struct luo_device_state *ldev = container_of(filep->private_data,
361 struct luo_device_state,
362 miscdev);
363 atomic_set(&ldev->in_use, 0);
364
365 return 0;
366 }
367
368 union ucmd_buffer {
369 struct liveupdate_ioctl_create_session create;
370 struct liveupdate_ioctl_retrieve_session retrieve;
371 };
372
373 struct luo_ioctl_op {
374 unsigned int size;
375 unsigned int min_size;
376 unsigned int ioctl_num;
377 int (*execute)(struct luo_ucmd *ucmd);
378 };
379
380 #define IOCTL_OP(_ioctl, _fn, _struct, _last) \
381 [_IOC_NR(_ioctl) - LIVEUPDATE_CMD_BASE] = { \
382 .size = sizeof(_struct) + \
383 BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \
384 sizeof(_struct)), \
385 .min_size = offsetofend(_struct, _last), \
386 .ioctl_num = _ioctl, \
387 .execute = _fn, \
388 }
389
390 static const struct luo_ioctl_op luo_ioctl_ops[] = {
391 IOCTL_OP(LIVEUPDATE_IOCTL_CREATE_SESSION, luo_ioctl_create_session,
392 struct liveupdate_ioctl_create_session, name),
393 IOCTL_OP(LIVEUPDATE_IOCTL_RETRIEVE_SESSION, luo_ioctl_retrieve_session,
394 struct liveupdate_ioctl_retrieve_session, name),
395 };
396
luo_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)397 static long luo_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
398 {
399 const struct luo_ioctl_op *op;
400 struct luo_ucmd ucmd = {};
401 union ucmd_buffer buf;
402 unsigned int nr;
403 int err;
404
405 nr = _IOC_NR(cmd);
406 if (nr - LIVEUPDATE_CMD_BASE >= ARRAY_SIZE(luo_ioctl_ops))
407 return -EINVAL;
408
409 ucmd.ubuffer = (void __user *)arg;
410 err = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
411 if (err)
412 return err;
413
414 op = &luo_ioctl_ops[nr - LIVEUPDATE_CMD_BASE];
415 if (op->ioctl_num != cmd)
416 return -ENOIOCTLCMD;
417 if (ucmd.user_size < op->min_size)
418 return -EINVAL;
419
420 ucmd.cmd = &buf;
421 err = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
422 ucmd.user_size);
423 if (err)
424 return err;
425
426 return op->execute(&ucmd);
427 }
428
429 static const struct file_operations luo_fops = {
430 .owner = THIS_MODULE,
431 .open = luo_open,
432 .release = luo_release,
433 .unlocked_ioctl = luo_ioctl,
434 };
435
436 static struct luo_device_state luo_dev = {
437 .miscdev = {
438 .minor = MISC_DYNAMIC_MINOR,
439 .name = "liveupdate",
440 .fops = &luo_fops,
441 },
442 .in_use = ATOMIC_INIT(0),
443 };
444
liveupdate_ioctl_init(void)445 static int __init liveupdate_ioctl_init(void)
446 {
447 if (!liveupdate_enabled())
448 return 0;
449
450 return misc_register(&luo_dev.miscdev);
451 }
452 late_initcall(liveupdate_ioctl_init);
453