xref: /qemu/qga/commands-linux.c (revision 3e9793ab01904144c204589811e0e879109a9713)
1 /*
2  * QEMU Guest Agent Linux-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qga-qapi-commands.h"
17 #include "qapi/error.h"
18 #include "commands-common.h"
19 #include "cutils.h"
20 #include <mntent.h>
21 #include <sys/ioctl.h>
22 #include <mntent.h>
23 #include <linux/nvme_ioctl.h>
24 #include "block/nvme.h"
25 
26 #ifdef CONFIG_LIBUDEV
27 #include <libudev.h>
28 #endif
29 
30 #ifdef HAVE_GETIFADDRS
31 #include <net/if.h>
32 #endif
33 
34 #include <sys/statvfs.h>
35 
36 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
dev_major_minor(const char * devpath,unsigned int * devmajor,unsigned int * devminor)37 static int dev_major_minor(const char *devpath,
38                            unsigned int *devmajor, unsigned int *devminor)
39 {
40     struct stat st;
41 
42     *devmajor = 0;
43     *devminor = 0;
44 
45     if (stat(devpath, &st) < 0) {
46         slog("failed to stat device file '%s': %s", devpath, strerror(errno));
47         return -1;
48     }
49     if (S_ISDIR(st.st_mode)) {
50         /* It is bind mount */
51         return -2;
52     }
53     if (S_ISBLK(st.st_mode)) {
54         *devmajor = major(st.st_rdev);
55         *devminor = minor(st.st_rdev);
56         return 0;
57     }
58     return -1;
59 }
60 
61 /*
62  * Check if we already have the devmajor:devminor in the mounts
63  * If thats the case return true.
64  */
dev_exists(FsMountList * mounts,unsigned int devmajor,unsigned int devminor)65 static bool dev_exists(FsMountList *mounts, unsigned int devmajor, unsigned int devminor)
66 {
67     FsMount *mount;
68 
69     QTAILQ_FOREACH(mount, mounts, next) {
70         if (mount->devmajor == devmajor && mount->devminor == devminor) {
71             return true;
72         }
73     }
74     return false;
75 }
76 
build_fs_mount_list_from_mtab(FsMountList * mounts,Error ** errp)77 static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
78 {
79     struct mntent *ment;
80     FsMount *mount;
81     char const *mtab = "/proc/self/mounts";
82     FILE *fp;
83     unsigned int devmajor, devminor;
84 
85     fp = setmntent(mtab, "r");
86     if (!fp) {
87         error_setg(errp, "failed to open mtab file: '%s'", mtab);
88         return false;
89     }
90 
91     while ((ment = getmntent(fp))) {
92         /*
93          * An entry which device name doesn't start with a '/' is
94          * either a dummy file system or a network file system.
95          * Add special handling for smbfs and cifs as is done by
96          * coreutils as well.
97          */
98         if ((ment->mnt_fsname[0] != '/') ||
99             (strcmp(ment->mnt_type, "smbfs") == 0) ||
100             (strcmp(ment->mnt_type, "cifs") == 0)) {
101             continue;
102         }
103         if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) {
104             /* Skip bind mounts */
105             continue;
106         }
107         if (dev_exists(mounts, devmajor, devminor)) {
108             /* Skip already existing devices (bind mounts) */
109             continue;
110         }
111 
112         mount = g_new0(FsMount, 1);
113         mount->dirname = g_strdup(ment->mnt_dir);
114         mount->devtype = g_strdup(ment->mnt_type);
115         mount->devmajor = devmajor;
116         mount->devminor = devminor;
117 
118         QTAILQ_INSERT_TAIL(mounts, mount, next);
119     }
120 
121     endmntent(fp);
122     return true;
123 }
124 
decode_mntname(char * name,int len)125 static void decode_mntname(char *name, int len)
126 {
127     int i, j = 0;
128     for (i = 0; i <= len; i++) {
129         if (name[i] != '\\') {
130             name[j++] = name[i];
131         } else if (name[i + 1] == '\\') {
132             name[j++] = '\\';
133             i++;
134         } else if (name[i + 1] >= '0' && name[i + 1] <= '3' &&
135                    name[i + 2] >= '0' && name[i + 2] <= '7' &&
136                    name[i + 3] >= '0' && name[i + 3] <= '7') {
137             name[j++] = (name[i + 1] - '0') * 64 +
138                         (name[i + 2] - '0') * 8 +
139                         (name[i + 3] - '0');
140             i += 3;
141         } else {
142             name[j++] = name[i];
143         }
144     }
145 }
146 
147 /*
148  * Walk the mount table and build a list of local file systems
149  */
build_fs_mount_list(FsMountList * mounts,Error ** errp)150 bool build_fs_mount_list(FsMountList *mounts, Error **errp)
151 {
152     FsMount *mount;
153     char const *mountinfo = "/proc/self/mountinfo";
154     FILE *fp;
155     char *line = NULL, *dash;
156     size_t n;
157     char check;
158     unsigned int devmajor, devminor;
159     int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e;
160 
161     fp = fopen(mountinfo, "r");
162     if (!fp) {
163         return build_fs_mount_list_from_mtab(mounts, errp);
164     }
165 
166     while (getline(&line, &n, fp) != -1) {
167         ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c",
168                      &devmajor, &devminor, &dir_s, &dir_e, &check);
169         if (ret < 3) {
170             continue;
171         }
172         dash = strstr(line + dir_e, " - ");
173         if (!dash) {
174             continue;
175         }
176         ret = sscanf(dash, " - %n%*s%n %n%*s%n%c",
177                      &type_s, &type_e, &dev_s, &dev_e, &check);
178         if (ret < 1) {
179             continue;
180         }
181         line[dir_e] = 0;
182         dash[type_e] = 0;
183         dash[dev_e] = 0;
184         decode_mntname(line + dir_s, dir_e - dir_s);
185         decode_mntname(dash + dev_s, dev_e - dev_s);
186         if (devmajor == 0) {
187             /* btrfs reports major number = 0 */
188             if (strcmp("btrfs", dash + type_s) != 0 ||
189                 dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) {
190                 continue;
191             }
192         }
193 
194         if (dev_exists(mounts, devmajor, devminor)) {
195             /* Skip already existing devices (bind mounts) */
196             continue;
197         }
198 
199         mount = g_new0(FsMount, 1);
200         mount->dirname = g_strdup(line + dir_s);
201         mount->devtype = g_strdup(dash + type_s);
202         mount->devmajor = devmajor;
203         mount->devminor = devminor;
204 
205         QTAILQ_INSERT_TAIL(mounts, mount, next);
206     }
207     free(line);
208 
209     fclose(fp);
210     return true;
211 }
212 #endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
213 
214 #ifdef CONFIG_FSFREEZE
215 /*
216  * Walk list of mounted file systems in the guest, and freeze the ones which
217  * are real local file systems.
218  */
qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,strList * mountpoints,FsMountList mounts,Error ** errp)219 int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
220                                           strList *mountpoints,
221                                           FsMountList mounts,
222                                           Error **errp)
223 {
224     struct FsMount *mount;
225     strList *list;
226     int fd, ret, i = 0;
227 
228     QTAILQ_FOREACH_REVERSE(mount, &mounts, next) {
229         /* To issue fsfreeze in the reverse order of mounts, check if the
230          * mount is listed in the list here */
231         if (has_mountpoints) {
232             for (list = mountpoints; list; list = list->next) {
233                 if (strcmp(list->value, mount->dirname) == 0) {
234                     break;
235                 }
236             }
237             if (!list) {
238                 continue;
239             }
240         }
241 
242         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
243         if (fd == -1) {
244             error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
245             return -1;
246         }
247 
248         /* we try to cull filesystems we know won't work in advance, but other
249          * filesystems may not implement fsfreeze for less obvious reasons.
250          * these will report EOPNOTSUPP. we simply ignore these when tallying
251          * the number of frozen filesystems.
252          * if a filesystem is mounted more than once (aka bind mount) a
253          * consecutive attempt to freeze an already frozen filesystem will
254          * return EBUSY.
255          *
256          * any other error means a failure to freeze a filesystem we
257          * expect to be freezable, so return an error in those cases
258          * and return system to thawed state.
259          */
260         ret = ioctl(fd, FIFREEZE);
261         if (ret == -1) {
262             if (errno != EOPNOTSUPP && errno != EBUSY) {
263                 error_setg_errno(errp, errno, "failed to freeze %s",
264                                  mount->dirname);
265                 close(fd);
266                 return -1;
267             }
268         } else {
269             i++;
270         }
271         close(fd);
272     }
273     return i;
274 }
275 
qmp_guest_fsfreeze_do_thaw(Error ** errp)276 int qmp_guest_fsfreeze_do_thaw(Error **errp)
277 {
278     int ret;
279     FsMountList mounts;
280     FsMount *mount;
281     int fd, i = 0, logged;
282     Error *local_err = NULL;
283 
284     QTAILQ_INIT(&mounts);
285     if (!build_fs_mount_list(&mounts, &local_err)) {
286         error_propagate(errp, local_err);
287         return -1;
288     }
289 
290     QTAILQ_FOREACH(mount, &mounts, next) {
291         logged = false;
292         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
293         if (fd == -1) {
294             continue;
295         }
296         /* we have no way of knowing whether a filesystem was actually unfrozen
297          * as a result of a successful call to FITHAW, only that if an error
298          * was returned the filesystem was *not* unfrozen by that particular
299          * call.
300          *
301          * since multiple preceding FIFREEZEs require multiple calls to FITHAW
302          * to unfreeze, continuing issuing FITHAW until an error is returned,
303          * in which case either the filesystem is in an unfreezable state, or,
304          * more likely, it was thawed previously (and remains so afterward).
305          *
306          * also, since the most recent successful call is the one that did
307          * the actual unfreeze, we can use this to provide an accurate count
308          * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
309          * may * be useful for determining whether a filesystem was unfrozen
310          * during the freeze/thaw phase by a process other than qemu-ga.
311          */
312         do {
313             ret = ioctl(fd, FITHAW);
314             if (ret == 0 && !logged) {
315                 i++;
316                 logged = true;
317             }
318         } while (ret == 0);
319         close(fd);
320     }
321 
322     free_fs_mount_list(&mounts);
323 
324     return i;
325 }
326 #endif /* CONFIG_FSFREEZE */
327 
328 #if defined(CONFIG_FSFREEZE)
329 
get_pci_driver(char const * syspath,int pathlen,Error ** errp)330 static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
331 {
332     char *path;
333     char *dpath;
334     char *driver = NULL;
335     char buf[PATH_MAX];
336     ssize_t len;
337 
338     path = g_strndup(syspath, pathlen);
339     dpath = g_strdup_printf("%s/driver", path);
340     len = readlink(dpath, buf, sizeof(buf) - 1);
341     if (len != -1) {
342         buf[len] = 0;
343         driver = g_path_get_basename(buf);
344     }
345     g_free(dpath);
346     g_free(path);
347     return driver;
348 }
349 
compare_uint(const void * _a,const void * _b)350 static int compare_uint(const void *_a, const void *_b)
351 {
352     unsigned int a = *(unsigned int *)_a;
353     unsigned int b = *(unsigned int *)_b;
354 
355     return a < b ? -1 : a > b ? 1 : 0;
356 }
357 
358 /* Walk the specified sysfs and build a sorted list of host or ata numbers */
build_hosts(char const * syspath,char const * host,bool ata,unsigned int * hosts,int hosts_max,Error ** errp)359 static int build_hosts(char const *syspath, char const *host, bool ata,
360                        unsigned int *hosts, int hosts_max, Error **errp)
361 {
362     char *path;
363     DIR *dir;
364     struct dirent *entry;
365     int i = 0;
366 
367     path = g_strndup(syspath, host - syspath);
368     dir = opendir(path);
369     if (!dir) {
370         error_setg_errno(errp, errno, "opendir(\"%s\")", path);
371         g_free(path);
372         return -1;
373     }
374 
375     while (i < hosts_max) {
376         entry = readdir(dir);
377         if (!entry) {
378             break;
379         }
380         if (ata && sscanf(entry->d_name, "ata%d", hosts + i) == 1) {
381             ++i;
382         } else if (!ata && sscanf(entry->d_name, "host%d", hosts + i) == 1) {
383             ++i;
384         }
385     }
386 
387     qsort(hosts, i, sizeof(hosts[0]), compare_uint);
388 
389     g_free(path);
390     closedir(dir);
391     return i;
392 }
393 
394 /*
395  * Store disk device info for devices on the PCI bus.
396  * Returns true if information has been stored, or false for failure.
397  */
build_guest_fsinfo_for_pci_dev(char const * syspath,GuestDiskAddress * disk,Error ** errp)398 static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
399                                            GuestDiskAddress *disk,
400                                            Error **errp)
401 {
402     unsigned int pci[4], host, hosts[8], tgt[3];
403     int i, nhosts = 0, pcilen;
404     GuestPCIAddress *pciaddr = disk->pci_controller;
405     bool has_ata = false, has_host = false, has_tgt = false;
406     char *p, *q, *driver = NULL;
407     bool ret = false;
408 
409     p = strstr(syspath, "/devices/pci");
410     if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n",
411                      pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) {
412         g_debug("only pci device is supported: sysfs path '%s'", syspath);
413         return false;
414     }
415 
416     p += 12 + pcilen;
417     while (true) {
418         driver = get_pci_driver(syspath, p - syspath, errp);
419         if (driver && (g_str_equal(driver, "ata_piix") ||
420                        g_str_equal(driver, "sym53c8xx") ||
421                        g_str_equal(driver, "virtio-pci") ||
422                        g_str_equal(driver, "ahci") ||
423                        g_str_equal(driver, "nvme") ||
424                        g_str_equal(driver, "xhci_hcd") ||
425                        g_str_equal(driver, "ehci-pci"))) {
426             break;
427         }
428 
429         g_free(driver);
430         if (sscanf(p, "/%x:%x:%x.%x%n",
431                           pci, pci + 1, pci + 2, pci + 3, &pcilen) == 4) {
432             p += pcilen;
433             continue;
434         }
435 
436         g_debug("unsupported driver or sysfs path '%s'", syspath);
437         return false;
438     }
439 
440     p = strstr(syspath, "/target");
441     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
442                     tgt, tgt + 1, tgt + 2) == 3) {
443         has_tgt = true;
444     }
445 
446     p = strstr(syspath, "/ata");
447     if (p) {
448         q = p + 4;
449         has_ata = true;
450     } else {
451         p = strstr(syspath, "/host");
452         q = p + 5;
453     }
454     if (p && sscanf(q, "%u", &host) == 1) {
455         has_host = true;
456         nhosts = build_hosts(syspath, p, has_ata, hosts,
457                              ARRAY_SIZE(hosts), errp);
458         if (nhosts < 0) {
459             goto cleanup;
460         }
461     }
462 
463     pciaddr->domain = pci[0];
464     pciaddr->bus = pci[1];
465     pciaddr->slot = pci[2];
466     pciaddr->function = pci[3];
467 
468     if (strcmp(driver, "ata_piix") == 0) {
469         /* a host per ide bus, target*:0:<unit>:0 */
470         if (!has_host || !has_tgt) {
471             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
472             goto cleanup;
473         }
474         for (i = 0; i < nhosts; i++) {
475             if (host == hosts[i]) {
476                 disk->bus_type = GUEST_DISK_BUS_TYPE_IDE;
477                 disk->bus = i;
478                 disk->unit = tgt[1];
479                 break;
480             }
481         }
482         if (i >= nhosts) {
483             g_debug("no host for '%s' (driver '%s')", syspath, driver);
484             goto cleanup;
485         }
486     } else if (strcmp(driver, "sym53c8xx") == 0) {
487         /* scsi(LSI Logic): target*:0:<unit>:0 */
488         if (!has_tgt) {
489             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
490             goto cleanup;
491         }
492         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
493         disk->unit = tgt[1];
494     } else if (strcmp(driver, "virtio-pci") == 0) {
495         if (has_tgt) {
496             /* virtio-scsi: target*:0:0:<unit> */
497             disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
498             disk->unit = tgt[2];
499         } else {
500             /* virtio-blk: 1 disk per 1 device */
501             disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
502         }
503     } else if (strcmp(driver, "ahci") == 0) {
504         /* ahci: 1 host per 1 unit */
505         if (!has_host || !has_tgt) {
506             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
507             goto cleanup;
508         }
509         for (i = 0; i < nhosts; i++) {
510             if (host == hosts[i]) {
511                 disk->unit = i;
512                 disk->bus_type = GUEST_DISK_BUS_TYPE_SATA;
513                 break;
514             }
515         }
516         if (i >= nhosts) {
517             g_debug("no host for '%s' (driver '%s')", syspath, driver);
518             goto cleanup;
519         }
520     } else if (strcmp(driver, "nvme") == 0) {
521         disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
522     } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
523         disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
524     } else {
525         g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
526         goto cleanup;
527     }
528 
529     ret = true;
530 
531 cleanup:
532     g_free(driver);
533     return ret;
534 }
535 
536 /*
537  * Store disk device info for non-PCI virtio devices (for example s390x
538  * channel I/O devices). Returns true if information has been stored, or
539  * false for failure.
540  */
build_guest_fsinfo_for_nonpci_virtio(char const * syspath,GuestDiskAddress * disk,Error ** errp)541 static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath,
542                                                  GuestDiskAddress *disk,
543                                                  Error **errp)
544 {
545     unsigned int tgt[3];
546     char *p;
547 
548     if (!strstr(syspath, "/virtio") || !strstr(syspath, "/block")) {
549         g_debug("Unsupported virtio device '%s'", syspath);
550         return false;
551     }
552 
553     p = strstr(syspath, "/target");
554     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
555                     &tgt[0], &tgt[1], &tgt[2]) == 3) {
556         /* virtio-scsi: target*:0:<target>:<unit> */
557         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
558         disk->bus = tgt[0];
559         disk->target = tgt[1];
560         disk->unit = tgt[2];
561     } else {
562         /* virtio-blk: 1 disk per 1 device */
563         disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
564     }
565 
566     return true;
567 }
568 
569 /*
570  * Store disk device info for CCW devices (s390x channel I/O devices).
571  * Returns true if information has been stored, or false for failure.
572  */
build_guest_fsinfo_for_ccw_dev(char const * syspath,GuestDiskAddress * disk,Error ** errp)573 static bool build_guest_fsinfo_for_ccw_dev(char const *syspath,
574                                            GuestDiskAddress *disk,
575                                            Error **errp)
576 {
577     unsigned int cssid, ssid, subchno, devno;
578     char *p;
579 
580     p = strstr(syspath, "/devices/css");
581     if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/",
582                      &cssid, &ssid, &subchno, &devno) < 4) {
583         g_debug("could not parse ccw device sysfs path: %s", syspath);
584         return false;
585     }
586 
587     disk->ccw_address = g_new0(GuestCCWAddress, 1);
588     disk->ccw_address->cssid = cssid;
589     disk->ccw_address->ssid = ssid;
590     disk->ccw_address->subchno = subchno;
591     disk->ccw_address->devno = devno;
592 
593     if (strstr(p, "/virtio")) {
594         build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
595     }
596 
597     return true;
598 }
599 
600 /* Store disk device info specified by @sysfs into @fs */
build_guest_fsinfo_for_real_device(char const * syspath,GuestFilesystemInfo * fs,Error ** errp)601 static void build_guest_fsinfo_for_real_device(char const *syspath,
602                                                GuestFilesystemInfo *fs,
603                                                Error **errp)
604 {
605     GuestDiskAddress *disk;
606     GuestPCIAddress *pciaddr;
607     bool has_hwinf;
608 #ifdef CONFIG_LIBUDEV
609     struct udev *udev = NULL;
610     struct udev_device *udevice = NULL;
611 #endif
612 
613     pciaddr = g_new0(GuestPCIAddress, 1);
614     pciaddr->domain = -1;                       /* -1 means field is invalid */
615     pciaddr->bus = -1;
616     pciaddr->slot = -1;
617     pciaddr->function = -1;
618 
619     disk = g_new0(GuestDiskAddress, 1);
620     disk->pci_controller = pciaddr;
621     disk->bus_type = GUEST_DISK_BUS_TYPE_UNKNOWN;
622 
623 #ifdef CONFIG_LIBUDEV
624     udev = udev_new();
625     udevice = udev_device_new_from_syspath(udev, syspath);
626     if (udev == NULL || udevice == NULL) {
627         g_debug("failed to query udev");
628     } else {
629         const char *devnode, *serial;
630         devnode = udev_device_get_devnode(udevice);
631         if (devnode != NULL) {
632             disk->dev = g_strdup(devnode);
633         }
634         serial = udev_device_get_property_value(udevice, "ID_SERIAL");
635         if (serial != NULL && *serial != 0) {
636             disk->serial = g_strdup(serial);
637         }
638     }
639 
640     udev_unref(udev);
641     udev_device_unref(udevice);
642 #endif
643 
644     if (strstr(syspath, "/devices/pci")) {
645         has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp);
646     } else if (strstr(syspath, "/devices/css")) {
647         has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp);
648     } else if (strstr(syspath, "/virtio")) {
649         has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
650     } else {
651         g_debug("Unsupported device type for '%s'", syspath);
652         has_hwinf = false;
653     }
654 
655     if (has_hwinf || disk->dev || disk->serial) {
656         QAPI_LIST_PREPEND(fs->disk, disk);
657     } else {
658         qapi_free_GuestDiskAddress(disk);
659     }
660 }
661 
662 static void build_guest_fsinfo_for_device(char const *devpath,
663                                           GuestFilesystemInfo *fs,
664                                           Error **errp);
665 
666 /* Store a list of slave devices of virtual volume specified by @syspath into
667  * @fs */
build_guest_fsinfo_for_virtual_device(char const * syspath,GuestFilesystemInfo * fs,Error ** errp)668 static void build_guest_fsinfo_for_virtual_device(char const *syspath,
669                                                   GuestFilesystemInfo *fs,
670                                                   Error **errp)
671 {
672     Error *err = NULL;
673     DIR *dir;
674     char *dirpath;
675     struct dirent *entry;
676 
677     dirpath = g_strdup_printf("%s/slaves", syspath);
678     dir = opendir(dirpath);
679     if (!dir) {
680         if (errno != ENOENT) {
681             error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
682         }
683         g_free(dirpath);
684         return;
685     }
686 
687     for (;;) {
688         errno = 0;
689         entry = readdir(dir);
690         if (entry == NULL) {
691             if (errno) {
692                 error_setg_errno(errp, errno, "readdir(\"%s\")", dirpath);
693             }
694             break;
695         }
696 
697         if (entry->d_type == DT_LNK) {
698             char *path;
699 
700             g_debug(" slave device '%s'", entry->d_name);
701             path = g_strdup_printf("%s/slaves/%s", syspath, entry->d_name);
702             build_guest_fsinfo_for_device(path, fs, &err);
703             g_free(path);
704 
705             if (err) {
706                 error_propagate(errp, err);
707                 break;
708             }
709         }
710     }
711 
712     g_free(dirpath);
713     closedir(dir);
714 }
715 
is_disk_virtual(const char * devpath,Error ** errp)716 static bool is_disk_virtual(const char *devpath, Error **errp)
717 {
718     g_autofree char *syspath = realpath(devpath, NULL);
719 
720     if (!syspath) {
721         error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
722         return false;
723     }
724     return strstr(syspath, "/devices/virtual/block/") != NULL;
725 }
726 
727 /* Dispatch to functions for virtual/real device */
build_guest_fsinfo_for_device(char const * devpath,GuestFilesystemInfo * fs,Error ** errp)728 static void build_guest_fsinfo_for_device(char const *devpath,
729                                           GuestFilesystemInfo *fs,
730                                           Error **errp)
731 {
732     ERRP_GUARD();
733     g_autofree char *syspath = NULL;
734     bool is_virtual = false;
735 
736     syspath = realpath(devpath, NULL);
737     if (!syspath) {
738         if (errno != ENOENT) {
739             error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
740             return;
741         }
742 
743         /* ENOENT: This devpath may not exist because of container config */
744         if (!fs->name) {
745             fs->name = g_path_get_basename(devpath);
746         }
747         return;
748     }
749 
750     if (!fs->name) {
751         fs->name = g_path_get_basename(syspath);
752     }
753 
754     g_debug("  parse sysfs path '%s'", syspath);
755     is_virtual = is_disk_virtual(syspath, errp);
756     if (*errp != NULL) {
757         return;
758     }
759     if (is_virtual) {
760         build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
761     } else {
762         build_guest_fsinfo_for_real_device(syspath, fs, errp);
763     }
764 }
765 
766 #ifdef CONFIG_LIBUDEV
767 
768 /*
769  * Wrapper around build_guest_fsinfo_for_device() for getting just
770  * the disk address.
771  */
get_disk_address(const char * syspath,Error ** errp)772 static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp)
773 {
774     g_autoptr(GuestFilesystemInfo) fs = NULL;
775 
776     fs = g_new0(GuestFilesystemInfo, 1);
777     build_guest_fsinfo_for_device(syspath, fs, errp);
778     if (fs->disk != NULL) {
779         return g_steal_pointer(&fs->disk->value);
780     }
781     return NULL;
782 }
783 
get_alias_for_syspath(const char * syspath)784 static char *get_alias_for_syspath(const char *syspath)
785 {
786     struct udev *udev = NULL;
787     struct udev_device *udevice = NULL;
788     char *ret = NULL;
789 
790     udev = udev_new();
791     if (udev == NULL) {
792         g_debug("failed to query udev");
793         goto out;
794     }
795     udevice = udev_device_new_from_syspath(udev, syspath);
796     if (udevice == NULL) {
797         g_debug("failed to query udev for path: %s", syspath);
798         goto out;
799     } else {
800         const char *alias = udev_device_get_property_value(
801             udevice, "DM_NAME");
802         /*
803          * NULL means there was an error and empty string means there is no
804          * alias. In case of no alias we return NULL instead of empty string.
805          */
806         if (alias == NULL) {
807             g_debug("failed to query udev for device alias for: %s",
808                 syspath);
809         } else if (*alias != 0) {
810             ret = g_strdup(alias);
811         }
812     }
813 
814 out:
815     udev_unref(udev);
816     udev_device_unref(udevice);
817     return ret;
818 }
819 
get_device_for_syspath(const char * syspath)820 static char *get_device_for_syspath(const char *syspath)
821 {
822     struct udev *udev = NULL;
823     struct udev_device *udevice = NULL;
824     char *ret = NULL;
825 
826     udev = udev_new();
827     if (udev == NULL) {
828         g_debug("failed to query udev");
829         goto out;
830     }
831     udevice = udev_device_new_from_syspath(udev, syspath);
832     if (udevice == NULL) {
833         g_debug("failed to query udev for path: %s", syspath);
834         goto out;
835     } else {
836         ret = g_strdup(udev_device_get_devnode(udevice));
837     }
838 
839 out:
840     udev_unref(udev);
841     udev_device_unref(udevice);
842     return ret;
843 }
844 
get_disk_deps(const char * disk_dir,GuestDiskInfo * disk)845 static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk)
846 {
847     g_autofree char *deps_dir = NULL;
848     const gchar *dep;
849     GDir *dp_deps = NULL;
850 
851     /* List dependent disks */
852     deps_dir = g_strdup_printf("%s/slaves", disk_dir);
853     g_debug("  listing entries in: %s", deps_dir);
854     dp_deps = g_dir_open(deps_dir, 0, NULL);
855     if (dp_deps == NULL) {
856         g_debug("failed to list entries in %s", deps_dir);
857         return;
858     }
859     disk->has_dependencies = true;
860     while ((dep = g_dir_read_name(dp_deps)) != NULL) {
861         g_autofree char *dep_dir = NULL;
862         char *dev_name;
863 
864         /* Add dependent disks */
865         dep_dir = g_strdup_printf("%s/%s", deps_dir, dep);
866         dev_name = get_device_for_syspath(dep_dir);
867         if (dev_name != NULL) {
868             g_debug("  adding dependent device: %s", dev_name);
869             QAPI_LIST_PREPEND(disk->dependencies, dev_name);
870         }
871     }
872     g_dir_close(dp_deps);
873 }
874 
875 /*
876  * Detect partitions subdirectory, name is "<disk_name><number>" or
877  * "<disk_name>p<number>"
878  *
879  * @disk_name -- last component of /sys path (e.g. sda)
880  * @disk_dir -- sys path of the disk (e.g. /sys/block/sda)
881  * @disk_dev -- device node of the disk (e.g. /dev/sda)
882  */
get_disk_partitions(GuestDiskInfoList * list,const char * disk_name,const char * disk_dir,const char * disk_dev)883 static GuestDiskInfoList *get_disk_partitions(
884     GuestDiskInfoList *list,
885     const char *disk_name, const char *disk_dir,
886     const char *disk_dev)
887 {
888     GuestDiskInfoList *ret = list;
889     struct dirent *de_disk;
890     DIR *dp_disk = NULL;
891     size_t len = strlen(disk_name);
892 
893     dp_disk = opendir(disk_dir);
894     while ((de_disk = readdir(dp_disk)) != NULL) {
895         g_autofree char *partition_dir = NULL;
896         char *dev_name;
897         GuestDiskInfo *partition;
898 
899         if (!(de_disk->d_type & DT_DIR)) {
900             continue;
901         }
902 
903         if (!(strncmp(disk_name, de_disk->d_name, len) == 0 &&
904             ((*(de_disk->d_name + len) == 'p' &&
905             isdigit(*(de_disk->d_name + len + 1))) ||
906                 isdigit(*(de_disk->d_name + len))))) {
907             continue;
908         }
909 
910         partition_dir = g_strdup_printf("%s/%s",
911             disk_dir, de_disk->d_name);
912         dev_name = get_device_for_syspath(partition_dir);
913         if (dev_name == NULL) {
914             g_debug("Failed to get device name for syspath: %s",
915                 disk_dir);
916             continue;
917         }
918         partition = g_new0(GuestDiskInfo, 1);
919         partition->name = dev_name;
920         partition->partition = true;
921         partition->has_dependencies = true;
922         /* Add parent disk as dependent for easier tracking of hierarchy */
923         QAPI_LIST_PREPEND(partition->dependencies, g_strdup(disk_dev));
924 
925         QAPI_LIST_PREPEND(ret, partition);
926     }
927     closedir(dp_disk);
928 
929     return ret;
930 }
931 
get_nvme_smart(GuestDiskInfo * disk)932 static void get_nvme_smart(GuestDiskInfo *disk)
933 {
934     int fd;
935     GuestNVMeSmart *smart;
936     NvmeSmartLog log = {0};
937     struct nvme_admin_cmd cmd = {
938         .opcode = NVME_ADM_CMD_GET_LOG_PAGE,
939         .nsid = NVME_NSID_BROADCAST,
940         .addr = (uintptr_t)&log,
941         .data_len = sizeof(log),
942         .cdw10 = NVME_LOG_SMART_INFO | (1 << 15) /* RAE bit */
943                  | (((sizeof(log) >> 2) - 1) << 16)
944     };
945 
946     fd = qga_open_cloexec(disk->name, O_RDONLY, 0);
947     if (fd == -1) {
948         g_debug("Failed to open device: %s: %s", disk->name, g_strerror(errno));
949         return;
950     }
951 
952     if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd)) {
953         g_debug("Failed to get smart: %s: %s", disk->name, g_strerror(errno));
954         close(fd);
955         return;
956     }
957 
958     disk->smart = g_new0(GuestDiskSmart, 1);
959     disk->smart->type = GUEST_DISK_BUS_TYPE_NVME;
960 
961     smart = &disk->smart->u.nvme;
962     smart->critical_warning = log.critical_warning;
963     smart->temperature = lduw_le_p(&log.temperature); /* unaligned field */
964     smart->available_spare = log.available_spare;
965     smart->available_spare_threshold = log.available_spare_threshold;
966     smart->percentage_used = log.percentage_used;
967     smart->data_units_read_lo = le64_to_cpu(log.data_units_read[0]);
968     smart->data_units_read_hi = le64_to_cpu(log.data_units_read[1]);
969     smart->data_units_written_lo = le64_to_cpu(log.data_units_written[0]);
970     smart->data_units_written_hi = le64_to_cpu(log.data_units_written[1]);
971     smart->host_read_commands_lo = le64_to_cpu(log.host_read_commands[0]);
972     smart->host_read_commands_hi = le64_to_cpu(log.host_read_commands[1]);
973     smart->host_write_commands_lo = le64_to_cpu(log.host_write_commands[0]);
974     smart->host_write_commands_hi = le64_to_cpu(log.host_write_commands[1]);
975     smart->controller_busy_time_lo = le64_to_cpu(log.controller_busy_time[0]);
976     smart->controller_busy_time_hi = le64_to_cpu(log.controller_busy_time[1]);
977     smart->power_cycles_lo = le64_to_cpu(log.power_cycles[0]);
978     smart->power_cycles_hi = le64_to_cpu(log.power_cycles[1]);
979     smart->power_on_hours_lo = le64_to_cpu(log.power_on_hours[0]);
980     smart->power_on_hours_hi = le64_to_cpu(log.power_on_hours[1]);
981     smart->unsafe_shutdowns_lo = le64_to_cpu(log.unsafe_shutdowns[0]);
982     smart->unsafe_shutdowns_hi = le64_to_cpu(log.unsafe_shutdowns[1]);
983     smart->media_errors_lo = le64_to_cpu(log.media_errors[0]);
984     smart->media_errors_hi = le64_to_cpu(log.media_errors[1]);
985     smart->number_of_error_log_entries_lo =
986         le64_to_cpu(log.number_of_error_log_entries[0]);
987     smart->number_of_error_log_entries_hi =
988         le64_to_cpu(log.number_of_error_log_entries[1]);
989 
990     close(fd);
991 }
992 
get_disk_smart(GuestDiskInfo * disk)993 static void get_disk_smart(GuestDiskInfo *disk)
994 {
995     if (disk->address
996         && (disk->address->bus_type == GUEST_DISK_BUS_TYPE_NVME)) {
997         get_nvme_smart(disk);
998     }
999 }
1000 
qmp_guest_get_disks(Error ** errp)1001 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1002 {
1003     GuestDiskInfoList *ret = NULL;
1004     GuestDiskInfo *disk;
1005     DIR *dp = NULL;
1006     struct dirent *de = NULL;
1007 
1008     g_debug("listing /sys/block directory");
1009     dp = opendir("/sys/block");
1010     if (dp == NULL) {
1011         error_setg_errno(errp, errno, "Can't open directory \"/sys/block\"");
1012         return NULL;
1013     }
1014     while ((de = readdir(dp)) != NULL) {
1015         g_autofree char *disk_dir = NULL, *line = NULL,
1016             *size_path = NULL;
1017         char *dev_name;
1018         Error *local_err = NULL;
1019         if (de->d_type != DT_LNK) {
1020             g_debug("  skipping entry: %s", de->d_name);
1021             continue;
1022         }
1023 
1024         /* Check size and skip zero-sized disks */
1025         g_debug("  checking disk size");
1026         size_path = g_strdup_printf("/sys/block/%s/size", de->d_name);
1027         if (!g_file_get_contents(size_path, &line, NULL, NULL)) {
1028             g_debug("  failed to read disk size");
1029             continue;
1030         }
1031         if (g_strcmp0(line, "0\n") == 0) {
1032             g_debug("  skipping zero-sized disk");
1033             continue;
1034         }
1035 
1036         g_debug("  adding %s", de->d_name);
1037         disk_dir = g_strdup_printf("/sys/block/%s", de->d_name);
1038         dev_name = get_device_for_syspath(disk_dir);
1039         if (dev_name == NULL) {
1040             g_debug("Failed to get device name for syspath: %s",
1041                 disk_dir);
1042             continue;
1043         }
1044         disk = g_new0(GuestDiskInfo, 1);
1045         disk->name = dev_name;
1046         disk->partition = false;
1047         disk->alias = get_alias_for_syspath(disk_dir);
1048         QAPI_LIST_PREPEND(ret, disk);
1049 
1050         /* Get address for non-virtual devices */
1051         bool is_virtual = is_disk_virtual(disk_dir, &local_err);
1052         if (local_err != NULL) {
1053             g_debug("  failed to check disk path, ignoring error: %s",
1054                 error_get_pretty(local_err));
1055             error_free(local_err);
1056             local_err = NULL;
1057             /* Don't try to get the address */
1058             is_virtual = true;
1059         }
1060         if (!is_virtual) {
1061             disk->address = get_disk_address(disk_dir, &local_err);
1062             if (local_err != NULL) {
1063                 g_debug("  failed to get device info, ignoring error: %s",
1064                     error_get_pretty(local_err));
1065                 error_free(local_err);
1066                 local_err = NULL;
1067             }
1068         }
1069 
1070         get_disk_deps(disk_dir, disk);
1071         get_disk_smart(disk);
1072         ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name);
1073     }
1074 
1075     closedir(dp);
1076 
1077     return ret;
1078 }
1079 
1080 #endif
1081 
1082 /* Return a list of the disk device(s)' info which @mount lies on */
build_guest_fsinfo(struct FsMount * mount,Error ** errp)1083 static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount,
1084                                                Error **errp)
1085 {
1086     GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
1087     struct statvfs buf;
1088     unsigned long used, nonroot_total, fr_size;
1089     char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
1090                                     mount->devmajor, mount->devminor);
1091 
1092     fs->mountpoint = g_strdup(mount->dirname);
1093     fs->type = g_strdup(mount->devtype);
1094     build_guest_fsinfo_for_device(devpath, fs, errp);
1095 
1096     if (statvfs(fs->mountpoint, &buf) == 0) {
1097         fr_size = buf.f_frsize;
1098         used = buf.f_blocks - buf.f_bfree;
1099         nonroot_total = used + buf.f_bavail;
1100         fs->used_bytes = used * fr_size;
1101         fs->total_bytes = nonroot_total * fr_size;
1102         fs->total_bytes_privileged = buf.f_blocks * fr_size;
1103 
1104         fs->has_total_bytes = true;
1105         fs->has_total_bytes_privileged = true;
1106         fs->has_used_bytes = true;
1107     }
1108 
1109     g_free(devpath);
1110 
1111     return fs;
1112 }
1113 
qmp_guest_get_fsinfo(Error ** errp)1114 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1115 {
1116     FsMountList mounts;
1117     struct FsMount *mount;
1118     GuestFilesystemInfoList *ret = NULL;
1119     Error *local_err = NULL;
1120 
1121     QTAILQ_INIT(&mounts);
1122     if (!build_fs_mount_list(&mounts, &local_err)) {
1123         error_propagate(errp, local_err);
1124         return NULL;
1125     }
1126 
1127     QTAILQ_FOREACH(mount, &mounts, next) {
1128         g_debug("Building guest fsinfo for '%s'", mount->dirname);
1129 
1130         QAPI_LIST_PREPEND(ret, build_guest_fsinfo(mount, &local_err));
1131         if (local_err) {
1132             error_propagate(errp, local_err);
1133             qapi_free_GuestFilesystemInfoList(ret);
1134             ret = NULL;
1135             break;
1136         }
1137     }
1138 
1139     free_fs_mount_list(&mounts);
1140     return ret;
1141 }
1142 #endif /* CONFIG_FSFREEZE */
1143 
1144 #if defined(CONFIG_FSTRIM)
1145 /*
1146  * Walk list of mounted file systems in the guest, and trim them.
1147  */
1148 GuestFilesystemTrimResponse *
qmp_guest_fstrim(bool has_minimum,int64_t minimum,Error ** errp)1149 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1150 {
1151     GuestFilesystemTrimResponse *response;
1152     GuestFilesystemTrimResult *result;
1153     int ret = 0;
1154     FsMountList mounts;
1155     struct FsMount *mount;
1156     int fd;
1157     struct fstrim_range r;
1158 
1159     slog("guest-fstrim called");
1160 
1161     QTAILQ_INIT(&mounts);
1162     if (!build_fs_mount_list(&mounts, errp)) {
1163         return NULL;
1164     }
1165 
1166     response = g_malloc0(sizeof(*response));
1167 
1168     QTAILQ_FOREACH(mount, &mounts, next) {
1169         result = g_malloc0(sizeof(*result));
1170         result->path = g_strdup(mount->dirname);
1171 
1172         QAPI_LIST_PREPEND(response->paths, result);
1173 
1174         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
1175         if (fd == -1) {
1176             result->error = g_strdup_printf("failed to open: %s",
1177                                             strerror(errno));
1178             continue;
1179         }
1180 
1181         /* We try to cull filesystems we know won't work in advance, but other
1182          * filesystems may not implement fstrim for less obvious reasons.
1183          * These will report EOPNOTSUPP; while in some other cases ENOTTY
1184          * will be reported (e.g. CD-ROMs).
1185          * Any other error means an unexpected error.
1186          */
1187         r.start = 0;
1188         r.len = -1;
1189         r.minlen = has_minimum ? minimum : 0;
1190         ret = ioctl(fd, FITRIM, &r);
1191         if (ret == -1) {
1192             if (errno == ENOTTY || errno == EOPNOTSUPP) {
1193                 result->error = g_strdup("trim not supported");
1194             } else {
1195                 result->error = g_strdup_printf("failed to trim: %s",
1196                                                 strerror(errno));
1197             }
1198             close(fd);
1199             continue;
1200         }
1201 
1202         result->has_minimum = true;
1203         result->minimum = r.minlen;
1204         result->has_trimmed = true;
1205         result->trimmed = r.len;
1206         close(fd);
1207     }
1208 
1209     free_fs_mount_list(&mounts);
1210     return response;
1211 }
1212 #endif /* CONFIG_FSTRIM */
1213 
1214 #define LINUX_SYS_STATE_FILE "/sys/power/state"
1215 #define SUSPEND_SUPPORTED 0
1216 #define SUSPEND_NOT_SUPPORTED 1
1217 
1218 typedef enum {
1219     SUSPEND_MODE_DISK = 0,
1220     SUSPEND_MODE_RAM = 1,
1221     SUSPEND_MODE_HYBRID = 2,
1222 } SuspendMode;
1223 
1224 /*
1225  * Executes a command in a child process using g_spawn_sync,
1226  * returning an int >= 0 representing the exit status of the
1227  * process.
1228  *
1229  * If the program wasn't found in path, returns -1.
1230  *
1231  * If a problem happened when creating the child process,
1232  * returns -1 and errp is set.
1233  */
run_process_child(const char * command[],Error ** errp)1234 static int run_process_child(const char *command[], Error **errp)
1235 {
1236     int exit_status, spawn_flag;
1237     GError *g_err = NULL;
1238     bool success;
1239 
1240     spawn_flag = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL |
1241                  G_SPAWN_STDERR_TO_DEV_NULL;
1242 
1243     success =  g_spawn_sync(NULL, (char **)command, NULL, spawn_flag,
1244                             NULL, NULL, NULL, NULL,
1245                             &exit_status, &g_err);
1246 
1247     if (success) {
1248         return WEXITSTATUS(exit_status);
1249     }
1250 
1251     if (g_err && (g_err->code != G_SPAWN_ERROR_NOENT)) {
1252         error_setg(errp, "failed to create child process, error '%s'",
1253                    g_err->message);
1254     }
1255 
1256     g_error_free(g_err);
1257     return -1;
1258 }
1259 
systemd_supports_mode(SuspendMode mode,Error ** errp)1260 static bool systemd_supports_mode(SuspendMode mode, Error **errp)
1261 {
1262     const char *systemctl_args[3] = {"systemd-hibernate", "systemd-suspend",
1263                                      "systemd-hybrid-sleep"};
1264     const char *cmd[4] = {"systemctl", "status", systemctl_args[mode], NULL};
1265     int status;
1266 
1267     status = run_process_child(cmd, errp);
1268 
1269     /*
1270      * systemctl status uses LSB return codes so we can expect
1271      * status > 0 and be ok. To assert if the guest has support
1272      * for the selected suspend mode, status should be < 4. 4 is
1273      * the code for unknown service status, the return value when
1274      * the service does not exist. A common value is status = 3
1275      * (program is not running).
1276      */
1277     if (status > 0 && status < 4) {
1278         return true;
1279     }
1280 
1281     return false;
1282 }
1283 
systemd_suspend(SuspendMode mode,Error ** errp)1284 static void systemd_suspend(SuspendMode mode, Error **errp)
1285 {
1286     Error *local_err = NULL;
1287     const char *systemctl_args[3] = {"hibernate", "suspend", "hybrid-sleep"};
1288     const char *cmd[3] = {"systemctl", systemctl_args[mode], NULL};
1289     int status;
1290 
1291     status = run_process_child(cmd, &local_err);
1292 
1293     if (status == 0) {
1294         return;
1295     }
1296 
1297     if ((status == -1) && !local_err) {
1298         error_setg(errp, "the helper program 'systemctl %s' was not found",
1299                    systemctl_args[mode]);
1300         return;
1301     }
1302 
1303     if (local_err) {
1304         error_propagate(errp, local_err);
1305     } else {
1306         error_setg(errp, "the helper program 'systemctl %s' returned an "
1307                    "unexpected exit status code (%d)",
1308                    systemctl_args[mode], status);
1309     }
1310 }
1311 
pmutils_supports_mode(SuspendMode mode,Error ** errp)1312 static bool pmutils_supports_mode(SuspendMode mode, Error **errp)
1313 {
1314     Error *local_err = NULL;
1315     const char *pmutils_args[3] = {"--hibernate", "--suspend",
1316                                    "--suspend-hybrid"};
1317     const char *cmd[3] = {"pm-is-supported", pmutils_args[mode], NULL};
1318     int status;
1319 
1320     status = run_process_child(cmd, &local_err);
1321 
1322     if (status == SUSPEND_SUPPORTED) {
1323         return true;
1324     }
1325 
1326     if ((status == -1) && !local_err) {
1327         return false;
1328     }
1329 
1330     if (local_err) {
1331         error_propagate(errp, local_err);
1332     } else {
1333         error_setg(errp,
1334                    "the helper program '%s' returned an unexpected exit"
1335                    " status code (%d)", "pm-is-supported", status);
1336     }
1337 
1338     return false;
1339 }
1340 
pmutils_suspend(SuspendMode mode,Error ** errp)1341 static void pmutils_suspend(SuspendMode mode, Error **errp)
1342 {
1343     Error *local_err = NULL;
1344     const char *pmutils_binaries[3] = {"pm-hibernate", "pm-suspend",
1345                                        "pm-suspend-hybrid"};
1346     const char *cmd[2] = {pmutils_binaries[mode], NULL};
1347     int status;
1348 
1349     status = run_process_child(cmd, &local_err);
1350 
1351     if (status == 0) {
1352         return;
1353     }
1354 
1355     if ((status == -1) && !local_err) {
1356         error_setg(errp, "the helper program '%s' was not found",
1357                    pmutils_binaries[mode]);
1358         return;
1359     }
1360 
1361     if (local_err) {
1362         error_propagate(errp, local_err);
1363     } else {
1364         error_setg(errp,
1365                    "the helper program '%s' returned an unexpected exit"
1366                    " status code (%d)", pmutils_binaries[mode], status);
1367     }
1368 }
1369 
linux_sys_state_supports_mode(SuspendMode mode,Error ** errp)1370 static bool linux_sys_state_supports_mode(SuspendMode mode, Error **errp)
1371 {
1372     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1373     const char *sysfile_str = sysfile_strs[mode];
1374     char buf[32]; /* hopefully big enough */
1375     int fd;
1376     ssize_t ret;
1377 
1378     if (!sysfile_str) {
1379         error_setg(errp, "unknown guest suspend mode");
1380         return false;
1381     }
1382 
1383     fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
1384     if (fd < 0) {
1385         return false;
1386     }
1387 
1388     ret = read(fd, buf, sizeof(buf) - 1);
1389     close(fd);
1390     if (ret <= 0) {
1391         return false;
1392     }
1393     buf[ret] = '\0';
1394 
1395     if (strstr(buf, sysfile_str)) {
1396         return true;
1397     }
1398     return false;
1399 }
1400 
linux_sys_state_suspend(SuspendMode mode,Error ** errp)1401 static void linux_sys_state_suspend(SuspendMode mode, Error **errp)
1402 {
1403     g_autoptr(GError) local_gerr = NULL;
1404     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1405     const char *sysfile_str = sysfile_strs[mode];
1406 
1407     if (!sysfile_str) {
1408         error_setg(errp, "unknown guest suspend mode");
1409         return;
1410     }
1411 
1412     if (!g_file_set_contents(LINUX_SYS_STATE_FILE, sysfile_str,
1413                              -1, &local_gerr)) {
1414         error_setg(errp, "suspend: cannot write to '%s': %s",
1415                    LINUX_SYS_STATE_FILE, local_gerr->message);
1416         return;
1417     }
1418 }
1419 
guest_suspend(SuspendMode mode,Error ** errp)1420 static void guest_suspend(SuspendMode mode, Error **errp)
1421 {
1422     Error *local_err = NULL;
1423     bool mode_supported = false;
1424 
1425     if (systemd_supports_mode(mode, &local_err)) {
1426         mode_supported = true;
1427         systemd_suspend(mode, &local_err);
1428 
1429         if (!local_err) {
1430             return;
1431         }
1432     }
1433 
1434     error_free(local_err);
1435     local_err = NULL;
1436 
1437     if (pmutils_supports_mode(mode, &local_err)) {
1438         mode_supported = true;
1439         pmutils_suspend(mode, &local_err);
1440 
1441         if (!local_err) {
1442             return;
1443         }
1444     }
1445 
1446     error_free(local_err);
1447     local_err = NULL;
1448 
1449     if (linux_sys_state_supports_mode(mode, &local_err)) {
1450         mode_supported = true;
1451         linux_sys_state_suspend(mode, &local_err);
1452     }
1453 
1454     if (!mode_supported) {
1455         error_free(local_err);
1456         error_setg(errp,
1457                    "the requested suspend mode is not supported by the guest");
1458     } else {
1459         error_propagate(errp, local_err);
1460     }
1461 }
1462 
qmp_guest_suspend_disk(Error ** errp)1463 void qmp_guest_suspend_disk(Error **errp)
1464 {
1465     guest_suspend(SUSPEND_MODE_DISK, errp);
1466 }
1467 
qmp_guest_suspend_ram(Error ** errp)1468 void qmp_guest_suspend_ram(Error **errp)
1469 {
1470     guest_suspend(SUSPEND_MODE_RAM, errp);
1471 }
1472 
qmp_guest_suspend_hybrid(Error ** errp)1473 void qmp_guest_suspend_hybrid(Error **errp)
1474 {
1475     guest_suspend(SUSPEND_MODE_HYBRID, errp);
1476 }
1477 
1478 /* Transfer online/offline status between @vcpu and the guest system.
1479  *
1480  * On input either @errp or *@errp must be NULL.
1481  *
1482  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1483  * - R: vcpu->logical_id
1484  * - W: vcpu->online
1485  * - W: vcpu->can_offline
1486  *
1487  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1488  * - R: vcpu->logical_id
1489  * - R: vcpu->online
1490  *
1491  * Written members remain unmodified on error.
1492  */
transfer_vcpu(GuestLogicalProcessor * vcpu,bool sys2vcpu,char * dirpath,Error ** errp)1493 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1494                           char *dirpath, Error **errp)
1495 {
1496     int fd;
1497     int res;
1498     int dirfd;
1499     static const char fn[] = "online";
1500 
1501     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1502     if (dirfd == -1) {
1503         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1504         return;
1505     }
1506 
1507     fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
1508     if (fd == -1) {
1509         if (errno != ENOENT) {
1510             error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
1511         } else if (sys2vcpu) {
1512             vcpu->online = true;
1513             vcpu->can_offline = false;
1514         } else if (!vcpu->online) {
1515             error_setg(errp, "logical processor #%" PRId64 " can't be "
1516                        "offlined", vcpu->logical_id);
1517         } /* otherwise pretend successful re-onlining */
1518     } else {
1519         unsigned char status;
1520 
1521         res = pread(fd, &status, 1, 0);
1522         if (res == -1) {
1523             error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
1524         } else if (res == 0) {
1525             error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
1526                        fn);
1527         } else if (sys2vcpu) {
1528             vcpu->online = (status != '0');
1529             vcpu->can_offline = true;
1530         } else if (vcpu->online != (status != '0')) {
1531             status = '0' + vcpu->online;
1532             if (pwrite(fd, &status, 1, 0) == -1) {
1533                 error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
1534                                  fn);
1535             }
1536         } /* otherwise pretend successful re-(on|off)-lining */
1537 
1538         res = close(fd);
1539         g_assert(res == 0);
1540     }
1541 
1542     res = close(dirfd);
1543     g_assert(res == 0);
1544 }
1545 
qmp_guest_get_vcpus(Error ** errp)1546 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1547 {
1548     GuestLogicalProcessorList *head, **tail;
1549     const char *cpu_dir = "/sys/devices/system/cpu";
1550     const gchar *line;
1551     g_autoptr(GDir) cpu_gdir = NULL;
1552     Error *local_err = NULL;
1553 
1554     head = NULL;
1555     tail = &head;
1556     cpu_gdir = g_dir_open(cpu_dir, 0, NULL);
1557 
1558     if (cpu_gdir == NULL) {
1559         error_setg_errno(errp, errno, "failed to list entries: %s", cpu_dir);
1560         return NULL;
1561     }
1562 
1563     while (local_err == NULL && (line = g_dir_read_name(cpu_gdir)) != NULL) {
1564         GuestLogicalProcessor *vcpu;
1565         int64_t id;
1566         if (sscanf(line, "cpu%" PRId64, &id)) {
1567             g_autofree char *path = g_strdup_printf("/sys/devices/system/cpu/"
1568                                                     "cpu%" PRId64 "/", id);
1569             vcpu = g_malloc0(sizeof *vcpu);
1570             vcpu->logical_id = id;
1571             vcpu->has_can_offline = true; /* lolspeak ftw */
1572             transfer_vcpu(vcpu, true, path, &local_err);
1573             QAPI_LIST_APPEND(tail, vcpu);
1574         }
1575     }
1576 
1577     if (local_err == NULL) {
1578         /* there's no guest with zero VCPUs */
1579         g_assert(head != NULL);
1580         return head;
1581     }
1582 
1583     qapi_free_GuestLogicalProcessorList(head);
1584     error_propagate(errp, local_err);
1585     return NULL;
1586 }
1587 
qmp_guest_set_vcpus(GuestLogicalProcessorList * vcpus,Error ** errp)1588 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1589 {
1590     int64_t processed;
1591     Error *local_err = NULL;
1592 
1593     processed = 0;
1594     while (vcpus != NULL) {
1595         char *path = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
1596                                      vcpus->value->logical_id);
1597 
1598         transfer_vcpu(vcpus->value, false, path, &local_err);
1599         g_free(path);
1600         if (local_err != NULL) {
1601             break;
1602         }
1603         ++processed;
1604         vcpus = vcpus->next;
1605     }
1606 
1607     if (local_err != NULL) {
1608         if (processed == 0) {
1609             error_propagate(errp, local_err);
1610         } else {
1611             error_free(local_err);
1612         }
1613     }
1614 
1615     return processed;
1616 }
1617 
1618 
ga_read_sysfs_file(int dirfd,const char * pathname,char * buf,int size,Error ** errp)1619 static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
1620                                int size, Error **errp)
1621 {
1622     int fd;
1623     int res;
1624 
1625     errno = 0;
1626     fd = openat(dirfd, pathname, O_RDONLY);
1627     if (fd == -1) {
1628         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
1629         return;
1630     }
1631 
1632     res = pread(fd, buf, size, 0);
1633     if (res == -1) {
1634         error_setg_errno(errp, errno, "pread sysfs file \"%s\"", pathname);
1635     } else if (res == 0) {
1636         error_setg(errp, "pread sysfs file \"%s\": unexpected EOF", pathname);
1637     }
1638     close(fd);
1639 }
1640 
ga_write_sysfs_file(int dirfd,const char * pathname,const char * buf,int size,Error ** errp)1641 static void ga_write_sysfs_file(int dirfd, const char *pathname,
1642                                 const char *buf, int size, Error **errp)
1643 {
1644     int fd;
1645 
1646     errno = 0;
1647     fd = openat(dirfd, pathname, O_WRONLY);
1648     if (fd == -1) {
1649         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
1650         return;
1651     }
1652 
1653     if (pwrite(fd, buf, size, 0) == -1) {
1654         error_setg_errno(errp, errno, "pwrite sysfs file \"%s\"", pathname);
1655     }
1656 
1657     close(fd);
1658 }
1659 
1660 /* Transfer online/offline status between @mem_blk and the guest system.
1661  *
1662  * On input either @errp or *@errp must be NULL.
1663  *
1664  * In system-to-@mem_blk direction, the following @mem_blk fields are accessed:
1665  * - R: mem_blk->phys_index
1666  * - W: mem_blk->online
1667  * - W: mem_blk->can_offline
1668  *
1669  * In @mem_blk-to-system direction, the following @mem_blk fields are accessed:
1670  * - R: mem_blk->phys_index
1671  * - R: mem_blk->online
1672  *-  R: mem_blk->can_offline
1673  * Written members remain unmodified on error.
1674  */
transfer_memory_block(GuestMemoryBlock * mem_blk,bool sys2memblk,GuestMemoryBlockResponse * result,Error ** errp)1675 static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk,
1676                                   GuestMemoryBlockResponse *result,
1677                                   Error **errp)
1678 {
1679     char *dirpath;
1680     int dirfd;
1681     char *status;
1682     Error *local_err = NULL;
1683 
1684     if (!sys2memblk) {
1685         DIR *dp;
1686 
1687         if (!result) {
1688             error_setg(errp, "Internal error, 'result' should not be NULL");
1689             return;
1690         }
1691         errno = 0;
1692         dp = opendir("/sys/devices/system/memory/");
1693          /* if there is no 'memory' directory in sysfs,
1694          * we think this VM does not support online/offline memory block,
1695          * any other solution?
1696          */
1697         if (!dp) {
1698             if (errno == ENOENT) {
1699                 result->response =
1700                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
1701             }
1702             goto out1;
1703         }
1704         closedir(dp);
1705     }
1706 
1707     dirpath = g_strdup_printf("/sys/devices/system/memory/memory%" PRId64 "/",
1708                               mem_blk->phys_index);
1709     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1710     if (dirfd == -1) {
1711         if (sys2memblk) {
1712             error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1713         } else {
1714             if (errno == ENOENT) {
1715                 result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_NOT_FOUND;
1716             } else {
1717                 result->response =
1718                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1719             }
1720         }
1721         g_free(dirpath);
1722         goto out1;
1723     }
1724     g_free(dirpath);
1725 
1726     status = g_malloc0(10);
1727     ga_read_sysfs_file(dirfd, "state", status, 10, &local_err);
1728     if (local_err) {
1729         /* treat with sysfs file that not exist in old kernel */
1730         if (errno == ENOENT) {
1731             error_free(local_err);
1732             if (sys2memblk) {
1733                 mem_blk->online = true;
1734                 mem_blk->can_offline = false;
1735             } else if (!mem_blk->online) {
1736                 result->response =
1737                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
1738             }
1739         } else {
1740             if (sys2memblk) {
1741                 error_propagate(errp, local_err);
1742             } else {
1743                 error_free(local_err);
1744                 result->response =
1745                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1746             }
1747         }
1748         goto out2;
1749     }
1750 
1751     if (sys2memblk) {
1752         char removable = '0';
1753 
1754         mem_blk->online = (strncmp(status, "online", 6) == 0);
1755 
1756         ga_read_sysfs_file(dirfd, "removable", &removable, 1, &local_err);
1757         if (local_err) {
1758             /* if no 'removable' file, it doesn't support offline mem blk */
1759             if (errno == ENOENT) {
1760                 error_free(local_err);
1761                 mem_blk->can_offline = false;
1762             } else {
1763                 error_propagate(errp, local_err);
1764             }
1765         } else {
1766             mem_blk->can_offline = (removable != '0');
1767         }
1768     } else {
1769         if (mem_blk->online != (strncmp(status, "online", 6) == 0)) {
1770             const char *new_state = mem_blk->online ? "online" : "offline";
1771 
1772             ga_write_sysfs_file(dirfd, "state", new_state, strlen(new_state),
1773                                 &local_err);
1774             if (local_err) {
1775                 error_free(local_err);
1776                 result->response =
1777                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1778                 goto out2;
1779             }
1780 
1781             result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_SUCCESS;
1782             result->has_error_code = false;
1783         } /* otherwise pretend successful re-(on|off)-lining */
1784     }
1785     g_free(status);
1786     close(dirfd);
1787     return;
1788 
1789 out2:
1790     g_free(status);
1791     close(dirfd);
1792 out1:
1793     if (!sys2memblk) {
1794         result->has_error_code = true;
1795         result->error_code = errno;
1796     }
1797 }
1798 
qmp_guest_get_memory_blocks(Error ** errp)1799 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
1800 {
1801     GuestMemoryBlockList *head, **tail;
1802     Error *local_err = NULL;
1803     struct dirent *de;
1804     DIR *dp;
1805 
1806     head = NULL;
1807     tail = &head;
1808 
1809     dp = opendir("/sys/devices/system/memory/");
1810     if (!dp) {
1811         /* it's ok if this happens to be a system that doesn't expose
1812          * memory blocks via sysfs, but otherwise we should report
1813          * an error
1814          */
1815         if (errno != ENOENT) {
1816             error_setg_errno(errp, errno, "Can't open directory"
1817                              "\"/sys/devices/system/memory/\"");
1818         }
1819         return NULL;
1820     }
1821 
1822     /* Note: the phys_index of memory block may be discontinuous,
1823      * this is because a memblk is the unit of the Sparse Memory design, which
1824      * allows discontinuous memory ranges (ex. NUMA), so here we should
1825      * traverse the memory block directory.
1826      */
1827     while ((de = readdir(dp)) != NULL) {
1828         GuestMemoryBlock *mem_blk;
1829 
1830         if ((strncmp(de->d_name, "memory", 6) != 0) ||
1831             !(de->d_type & DT_DIR)) {
1832             continue;
1833         }
1834 
1835         mem_blk = g_malloc0(sizeof *mem_blk);
1836         /* The d_name is "memoryXXX",  phys_index is block id, same as XXX */
1837         mem_blk->phys_index = strtoul(&de->d_name[6], NULL, 10);
1838         mem_blk->has_can_offline = true; /* lolspeak ftw */
1839         transfer_memory_block(mem_blk, true, NULL, &local_err);
1840         if (local_err) {
1841             break;
1842         }
1843 
1844         QAPI_LIST_APPEND(tail, mem_blk);
1845     }
1846 
1847     closedir(dp);
1848     if (local_err == NULL) {
1849         /* there's no guest with zero memory blocks */
1850         if (head == NULL) {
1851             error_setg(errp, "guest reported zero memory blocks!");
1852         }
1853         return head;
1854     }
1855 
1856     qapi_free_GuestMemoryBlockList(head);
1857     error_propagate(errp, local_err);
1858     return NULL;
1859 }
1860 
1861 GuestMemoryBlockResponseList *
qmp_guest_set_memory_blocks(GuestMemoryBlockList * mem_blks,Error ** errp)1862 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
1863 {
1864     GuestMemoryBlockResponseList *head, **tail;
1865     Error *local_err = NULL;
1866 
1867     head = NULL;
1868     tail = &head;
1869 
1870     while (mem_blks != NULL) {
1871         GuestMemoryBlockResponse *result;
1872         GuestMemoryBlock *current_mem_blk = mem_blks->value;
1873 
1874         result = g_malloc0(sizeof(*result));
1875         result->phys_index = current_mem_blk->phys_index;
1876         transfer_memory_block(current_mem_blk, false, result, &local_err);
1877         if (local_err) { /* should never happen */
1878             goto err;
1879         }
1880 
1881         QAPI_LIST_APPEND(tail, result);
1882         mem_blks = mem_blks->next;
1883     }
1884 
1885     return head;
1886 err:
1887     qapi_free_GuestMemoryBlockResponseList(head);
1888     error_propagate(errp, local_err);
1889     return NULL;
1890 }
1891 
qmp_guest_get_memory_block_info(Error ** errp)1892 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
1893 {
1894     Error *local_err = NULL;
1895     char *dirpath;
1896     int dirfd;
1897     char *buf;
1898     GuestMemoryBlockInfo *info;
1899 
1900     dirpath = g_strdup_printf("/sys/devices/system/memory/");
1901     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1902     if (dirfd == -1) {
1903         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1904         g_free(dirpath);
1905         return NULL;
1906     }
1907     g_free(dirpath);
1908 
1909     buf = g_malloc0(20);
1910     ga_read_sysfs_file(dirfd, "block_size_bytes", buf, 20, &local_err);
1911     close(dirfd);
1912     if (local_err) {
1913         g_free(buf);
1914         error_propagate(errp, local_err);
1915         return NULL;
1916     }
1917 
1918     info = g_new0(GuestMemoryBlockInfo, 1);
1919     info->size = strtol(buf, NULL, 16); /* the unit is bytes */
1920 
1921     g_free(buf);
1922 
1923     return info;
1924 }
1925 
1926 #define MAX_NAME_LEN 128
guest_get_diskstats(Error ** errp)1927 static GuestDiskStatsInfoList *guest_get_diskstats(Error **errp)
1928 {
1929     GuestDiskStatsInfoList *head = NULL, **tail = &head;
1930     const char *diskstats = "/proc/diskstats";
1931     FILE *fp;
1932     size_t n;
1933     char *line = NULL;
1934 
1935     fp = fopen(diskstats, "r");
1936     if (fp  == NULL) {
1937         error_setg_errno(errp, errno, "open(\"%s\")", diskstats);
1938         return NULL;
1939     }
1940 
1941     while (getline(&line, &n, fp) != -1) {
1942         g_autofree GuestDiskStatsInfo *diskstatinfo = NULL;
1943         g_autofree GuestDiskStats *diskstat = NULL;
1944         char dev_name[MAX_NAME_LEN];
1945         unsigned int ios_pgr, tot_ticks, rq_ticks, wr_ticks, dc_ticks, fl_ticks;
1946         unsigned long rd_ios, rd_merges_or_rd_sec, rd_ticks_or_wr_sec, wr_ios;
1947         unsigned long wr_merges, rd_sec_or_wr_ios, wr_sec;
1948         unsigned long dc_ios, dc_merges, dc_sec, fl_ios;
1949         unsigned int major, minor;
1950         int i;
1951 
1952         i = sscanf(line, "%u %u %s %lu %lu %lu"
1953                    "%lu %lu %lu %lu %u %u %u %u"
1954                    "%lu %lu %lu %u %lu %u",
1955                    &major, &minor, dev_name,
1956                    &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios,
1957                    &rd_ticks_or_wr_sec, &wr_ios, &wr_merges, &wr_sec,
1958                    &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks,
1959                    &dc_ios, &dc_merges, &dc_sec, &dc_ticks,
1960                    &fl_ios, &fl_ticks);
1961 
1962         if (i < 7) {
1963             continue;
1964         }
1965 
1966         diskstatinfo = g_new0(GuestDiskStatsInfo, 1);
1967         diskstatinfo->name = g_strdup(dev_name);
1968         diskstatinfo->major = major;
1969         diskstatinfo->minor = minor;
1970 
1971         diskstat = g_new0(GuestDiskStats, 1);
1972         if (i == 7) {
1973             diskstat->has_read_ios = true;
1974             diskstat->read_ios = rd_ios;
1975             diskstat->has_read_sectors = true;
1976             diskstat->read_sectors = rd_merges_or_rd_sec;
1977             diskstat->has_write_ios = true;
1978             diskstat->write_ios = rd_sec_or_wr_ios;
1979             diskstat->has_write_sectors = true;
1980             diskstat->write_sectors = rd_ticks_or_wr_sec;
1981         }
1982         if (i >= 14) {
1983             diskstat->has_read_ios = true;
1984             diskstat->read_ios = rd_ios;
1985             diskstat->has_read_sectors = true;
1986             diskstat->read_sectors = rd_sec_or_wr_ios;
1987             diskstat->has_read_merges = true;
1988             diskstat->read_merges = rd_merges_or_rd_sec;
1989             diskstat->has_read_ticks = true;
1990             diskstat->read_ticks = rd_ticks_or_wr_sec;
1991             diskstat->has_write_ios = true;
1992             diskstat->write_ios = wr_ios;
1993             diskstat->has_write_sectors = true;
1994             diskstat->write_sectors = wr_sec;
1995             diskstat->has_write_merges = true;
1996             diskstat->write_merges = wr_merges;
1997             diskstat->has_write_ticks = true;
1998             diskstat->write_ticks = wr_ticks;
1999             diskstat->has_ios_pgr = true;
2000             diskstat->ios_pgr = ios_pgr;
2001             diskstat->has_total_ticks = true;
2002             diskstat->total_ticks = tot_ticks;
2003             diskstat->has_weight_ticks = true;
2004             diskstat->weight_ticks = rq_ticks;
2005         }
2006         if (i >= 18) {
2007             diskstat->has_discard_ios = true;
2008             diskstat->discard_ios = dc_ios;
2009             diskstat->has_discard_merges = true;
2010             diskstat->discard_merges = dc_merges;
2011             diskstat->has_discard_sectors = true;
2012             diskstat->discard_sectors = dc_sec;
2013             diskstat->has_discard_ticks = true;
2014             diskstat->discard_ticks = dc_ticks;
2015         }
2016         if (i >= 20) {
2017             diskstat->has_flush_ios = true;
2018             diskstat->flush_ios = fl_ios;
2019             diskstat->has_flush_ticks = true;
2020             diskstat->flush_ticks = fl_ticks;
2021         }
2022 
2023         diskstatinfo->stats = g_steal_pointer(&diskstat);
2024         QAPI_LIST_APPEND(tail, diskstatinfo);
2025         diskstatinfo = NULL;
2026     }
2027     free(line);
2028     fclose(fp);
2029     return head;
2030 }
2031 
qmp_guest_get_diskstats(Error ** errp)2032 GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
2033 {
2034     return guest_get_diskstats(errp);
2035 }
2036 
qmp_guest_get_cpustats(Error ** errp)2037 GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
2038 {
2039     GuestCpuStatsList *head = NULL, **tail = &head;
2040     const char *cpustats = "/proc/stat";
2041     int clk_tck = sysconf(_SC_CLK_TCK);
2042     FILE *fp;
2043     size_t n;
2044     char *line = NULL;
2045 
2046     fp = fopen(cpustats, "r");
2047     if (fp  == NULL) {
2048         error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
2049         return NULL;
2050     }
2051 
2052     while (getline(&line, &n, fp) != -1) {
2053         GuestCpuStats *cpustat = NULL;
2054         GuestLinuxCpuStats *linuxcpustat;
2055         int i;
2056         unsigned long user, system, idle, iowait, irq, softirq, steal, guest;
2057         unsigned long nice, guest_nice;
2058         char name[64];
2059 
2060         i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
2061                    name, &user, &nice, &system, &idle, &iowait, &irq, &softirq,
2062                    &steal, &guest, &guest_nice);
2063 
2064         /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
2065         if ((i == EOF) || strncmp(name, "cpu", 3) || (name[3] == '\0')) {
2066             continue;
2067         }
2068 
2069         if (i < 5) {
2070             slog("Parsing cpu stat from %s failed, see \"man proc\"", cpustats);
2071             break;
2072         }
2073 
2074         cpustat = g_new0(GuestCpuStats, 1);
2075         cpustat->type = GUEST_CPU_STATS_TYPE_LINUX;
2076 
2077         linuxcpustat = &cpustat->u.q_linux;
2078         linuxcpustat->cpu = atoi(&name[3]);
2079         linuxcpustat->user = user * 1000 / clk_tck;
2080         linuxcpustat->nice = nice * 1000 / clk_tck;
2081         linuxcpustat->system = system * 1000 / clk_tck;
2082         linuxcpustat->idle = idle * 1000 / clk_tck;
2083 
2084         if (i > 5) {
2085             linuxcpustat->has_iowait = true;
2086             linuxcpustat->iowait = iowait * 1000 / clk_tck;
2087         }
2088 
2089         if (i > 6) {
2090             linuxcpustat->has_irq = true;
2091             linuxcpustat->irq = irq * 1000 / clk_tck;
2092             linuxcpustat->has_softirq = true;
2093             linuxcpustat->softirq = softirq * 1000 / clk_tck;
2094         }
2095 
2096         if (i > 8) {
2097             linuxcpustat->has_steal = true;
2098             linuxcpustat->steal = steal * 1000 / clk_tck;
2099         }
2100 
2101         if (i > 9) {
2102             linuxcpustat->has_guest = true;
2103             linuxcpustat->guest = guest * 1000 / clk_tck;
2104         }
2105 
2106         if (i > 10) {
2107             linuxcpustat->has_guest = true;
2108             linuxcpustat->guest = guest * 1000 / clk_tck;
2109             linuxcpustat->has_guestnice = true;
2110             linuxcpustat->guestnice = guest_nice * 1000 / clk_tck;
2111         }
2112 
2113         QAPI_LIST_APPEND(tail, cpustat);
2114     }
2115 
2116     free(line);
2117     fclose(fp);
2118     return head;
2119 }
2120 
hex_to_ip_address(const void * hex_value,int is_ipv6)2121 static char *hex_to_ip_address(const void *hex_value, int is_ipv6)
2122 {
2123     if (is_ipv6) {
2124         char addr[INET6_ADDRSTRLEN];
2125         struct in6_addr in6;
2126         const char *hex_str = (const char *)hex_value;
2127         int i;
2128 
2129         for (i = 0; i < 16; i++) {
2130             if (sscanf(&hex_str[i * 2], "%02hhx", &in6.s6_addr[i]) != 1) {
2131                 return NULL;
2132             }
2133         }
2134         inet_ntop(AF_INET6, &in6, addr, INET6_ADDRSTRLEN);
2135 
2136         return g_strdup(addr);
2137     } else {
2138         unsigned int hex_int = *(unsigned int *)hex_value;
2139         unsigned int byte1 = (hex_int >> 24) & 0xFF;
2140         unsigned int byte2 = (hex_int >> 16) & 0xFF;
2141         unsigned int byte3 = (hex_int >> 8) & 0xFF;
2142         unsigned int byte4 = hex_int & 0xFF;
2143 
2144         return g_strdup_printf("%u.%u.%u.%u", byte4, byte3, byte2, byte1);
2145     }
2146 }
2147 
qmp_guest_network_get_route(Error ** errp)2148 GuestNetworkRouteList *qmp_guest_network_get_route(Error **errp)
2149 {
2150     GuestNetworkRouteList *head = NULL, **tail = &head;
2151     const char *route_files[] = {"/proc/net/route", "/proc/net/ipv6_route"};
2152     FILE *fp;
2153     size_t n = 0;
2154     char *line = NULL;
2155     int firstLine;
2156     int is_ipv6;
2157     int i;
2158     char iface[IFNAMSIZ];
2159 
2160     for (i = 0; i < 2; i++) {
2161         firstLine = 1;
2162         is_ipv6 = (i == 1);
2163         fp = fopen(route_files[i], "r");
2164         if (fp == NULL) {
2165             error_setg_errno(errp, errno, "open(\"%s\")", route_files[i]);
2166             continue;
2167         }
2168 
2169         while (getline(&line, &n, fp) != -1) {
2170             if (firstLine && !is_ipv6) {
2171                 firstLine = 0;
2172                 continue;
2173             }
2174             g_autoptr(GuestNetworkRoute) route = g_new0(GuestNetworkRoute, 1);
2175 
2176             if (is_ipv6) {
2177                 char destination[33], source[33], next_hop[33];
2178                 int des_prefixlen, src_prefixlen, metric, refcnt, use, flags;
2179                 if (sscanf(line, "%32s %x %32s %x %32s %x %x %x %x %s",
2180                            destination, &des_prefixlen, source,
2181                            &src_prefixlen, next_hop, &metric, &refcnt,
2182                            &use, &flags, iface) != 10) {
2183                     continue;
2184                 }
2185 
2186                 route->destination = hex_to_ip_address(destination, 1);
2187                 if (route->destination == NULL) {
2188                     continue;
2189                 }
2190                 route->iface = g_strdup(iface);
2191                 route->source = hex_to_ip_address(source, 1);
2192                 route->nexthop = hex_to_ip_address(next_hop, 1);
2193                 route->desprefixlen = g_strdup_printf("%d", des_prefixlen);
2194                 route->srcprefixlen = g_strdup_printf("%d", src_prefixlen);
2195                 route->metric = metric;
2196                 route->has_flags = true;
2197                 route->flags = flags;
2198                 route->has_refcnt = true;
2199                 route->refcnt = refcnt;
2200                 route->has_use = true;
2201                 route->use = use;
2202                 route->version = 6;
2203             } else {
2204                 unsigned int destination, gateway, mask, flags;
2205                 int refcnt, use, metric, mtu, window, irtt;
2206                 if (sscanf(line, "%s %X %X %x %d %d %d %X %d %d %d",
2207                            iface, &destination, &gateway, &flags, &refcnt,
2208                            &use, &metric, &mask, &mtu, &window, &irtt) != 11) {
2209                     continue;
2210                 }
2211 
2212                 route->destination = hex_to_ip_address(&destination, 0);
2213                 if (route->destination == NULL) {
2214                     continue;
2215                 }
2216                 route->iface = g_strdup(iface);
2217                 route->gateway = hex_to_ip_address(&gateway, 0);
2218                 route->mask = hex_to_ip_address(&mask, 0);
2219                 route->metric = metric;
2220                 route->has_flags = true;
2221                 route->flags = flags;
2222                 route->has_refcnt = true;
2223                 route->refcnt = refcnt;
2224                 route->has_use = true;
2225                 route->use = use;
2226                 route->has_mtu = true;
2227                 route->mtu = mtu;
2228                 route->has_window = true;
2229                 route->window = window;
2230                 route->has_irtt = true;
2231                 route->irtt = irtt;
2232                 route->version = 4;
2233             }
2234 
2235             QAPI_LIST_APPEND(tail, route);
2236             route = NULL;
2237         }
2238 
2239         fclose(fp);
2240     }
2241 
2242     free(line);
2243     return head;
2244 }
2245