16482a961SGreg Kurz /* 26482a961SGreg Kurz * 9p utilities 36482a961SGreg Kurz * 46482a961SGreg Kurz * Copyright IBM, Corp. 2017 56482a961SGreg Kurz * 66482a961SGreg Kurz * Authors: 76482a961SGreg Kurz * Greg Kurz <groug@kaod.org> 86482a961SGreg Kurz * 96482a961SGreg Kurz * This work is licensed under the terms of the GNU GPL, version 2 or later. 106482a961SGreg Kurz * See the COPYING file in the top-level directory. 116482a961SGreg Kurz */ 126482a961SGreg Kurz 136482a961SGreg Kurz #ifndef QEMU_9P_UTIL_H 146482a961SGreg Kurz #define QEMU_9P_UTIL_H 156482a961SGreg Kurz 16f6b0de53SChristian Schoenebeck #include "qemu/error-report.h" 17f6b0de53SChristian Schoenebeck 184751fd53SGreg Kurz #ifdef O_PATH 194751fd53SGreg Kurz #define O_PATH_9P_UTIL O_PATH 204751fd53SGreg Kurz #else 214751fd53SGreg Kurz #define O_PATH_9P_UTIL 0 224751fd53SGreg Kurz #endif 234751fd53SGreg Kurz 24e5c88e22SChristian Schoenebeck #if !defined(CONFIG_LINUX) 25e5c88e22SChristian Schoenebeck 26e5c88e22SChristian Schoenebeck /* 27e5c88e22SChristian Schoenebeck * Generates a Linux device number (a.k.a. dev_t) for given device major 28e5c88e22SChristian Schoenebeck * and minor numbers. 29e5c88e22SChristian Schoenebeck * 30e5c88e22SChristian Schoenebeck * To be more precise: it generates a device number in glibc's format 31e5c88e22SChristian Schoenebeck * (MMMM_Mmmm_mmmM_MMmm, 64 bits) actually, which is compatible with 32e5c88e22SChristian Schoenebeck * Linux's format (mmmM_MMmm, 32 bits), as described in <bits/sysmacros.h>. 33e5c88e22SChristian Schoenebeck */ 34e5c88e22SChristian Schoenebeck static inline uint64_t makedev_dotl(uint32_t dev_major, uint32_t dev_minor) 35e5c88e22SChristian Schoenebeck { 36e5c88e22SChristian Schoenebeck uint64_t dev; 37e5c88e22SChristian Schoenebeck 38e5c88e22SChristian Schoenebeck // from glibc sysmacros.h: 39e5c88e22SChristian Schoenebeck dev = (((uint64_t) (dev_major & 0x00000fffu)) << 8); 40e5c88e22SChristian Schoenebeck dev |= (((uint64_t) (dev_major & 0xfffff000u)) << 32); 41e5c88e22SChristian Schoenebeck dev |= (((uint64_t) (dev_minor & 0x000000ffu)) << 0); 42e5c88e22SChristian Schoenebeck dev |= (((uint64_t) (dev_minor & 0xffffff00u)) << 12); 43e5c88e22SChristian Schoenebeck return dev; 44e5c88e22SChristian Schoenebeck } 45e5c88e22SChristian Schoenebeck 46e5c88e22SChristian Schoenebeck #endif 47e5c88e22SChristian Schoenebeck 48e5c88e22SChristian Schoenebeck /* 49e5c88e22SChristian Schoenebeck * Converts given device number from host's device number format to Linux 50e5c88e22SChristian Schoenebeck * device number format. As both the size of type dev_t and encoding of 5128cbbdd2SMichael Tokarev * dev_t is system dependent, we have to convert them for Linux guests if 52e5c88e22SChristian Schoenebeck * host is not running Linux. 53e5c88e22SChristian Schoenebeck */ 54e5c88e22SChristian Schoenebeck static inline uint64_t host_dev_to_dotl_dev(dev_t dev) 55e5c88e22SChristian Schoenebeck { 56e5c88e22SChristian Schoenebeck #ifdef CONFIG_LINUX 57e5c88e22SChristian Schoenebeck return dev; 58e5c88e22SChristian Schoenebeck #else 59e5c88e22SChristian Schoenebeck return makedev_dotl(major(dev), minor(dev)); 60e5c88e22SChristian Schoenebeck #endif 61e5c88e22SChristian Schoenebeck } 62e5c88e22SChristian Schoenebeck 63951fe2f8SChristian Schoenebeck /* Translates errno from host -> Linux if needed */ 64951fe2f8SChristian Schoenebeck static inline int errno_to_dotl(int err) { 65951fe2f8SChristian Schoenebeck #if defined(CONFIG_LINUX) 66951fe2f8SChristian Schoenebeck /* nothing to translate (Linux -> Linux) */ 67951fe2f8SChristian Schoenebeck #elif defined(CONFIG_DARWIN) 68951fe2f8SChristian Schoenebeck /* 69951fe2f8SChristian Schoenebeck * translation mandatory for macOS hosts 70951fe2f8SChristian Schoenebeck * 71951fe2f8SChristian Schoenebeck * FIXME: Only most important errnos translated here yet, this should be 72951fe2f8SChristian Schoenebeck * extended to as many errnos being translated as possible in future. 73951fe2f8SChristian Schoenebeck */ 74951fe2f8SChristian Schoenebeck if (err == ENAMETOOLONG) { 75951fe2f8SChristian Schoenebeck err = 36; /* ==ENAMETOOLONG on Linux */ 76951fe2f8SChristian Schoenebeck } else if (err == ENOTEMPTY) { 77951fe2f8SChristian Schoenebeck err = 39; /* ==ENOTEMPTY on Linux */ 78951fe2f8SChristian Schoenebeck } else if (err == ELOOP) { 79951fe2f8SChristian Schoenebeck err = 40; /* ==ELOOP on Linux */ 80951fe2f8SChristian Schoenebeck } else if (err == ENOATTR) { 81951fe2f8SChristian Schoenebeck err = 61; /* ==ENODATA on Linux */ 82951fe2f8SChristian Schoenebeck } else if (err == ENOTSUP) { 83951fe2f8SChristian Schoenebeck err = 95; /* ==EOPNOTSUPP on Linux */ 84951fe2f8SChristian Schoenebeck } else if (err == EOPNOTSUPP) { 85951fe2f8SChristian Schoenebeck err = 95; /* ==EOPNOTSUPP on Linux */ 86951fe2f8SChristian Schoenebeck } 87951fe2f8SChristian Schoenebeck #else 88951fe2f8SChristian Schoenebeck #error Missing errno translation to Linux for this host system 89951fe2f8SChristian Schoenebeck #endif 90951fe2f8SChristian Schoenebeck return err; 91951fe2f8SChristian Schoenebeck } 92951fe2f8SChristian Schoenebeck 93b5989326SKeno Fischer #ifdef CONFIG_DARWIN 94b5989326SKeno Fischer #define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0) 95b5989326SKeno Fischer #else 96b5989326SKeno Fischer #define qemu_fgetxattr fgetxattr 97b5989326SKeno Fischer #endif 98b5989326SKeno Fischer 996ca60cd7SBin Meng #define qemu_openat openat 100f6b0de53SChristian Schoenebeck #define qemu_fstat fstat 1016ca60cd7SBin Meng #define qemu_fstatat fstatat 1026ca60cd7SBin Meng #define qemu_mkdirat mkdirat 1036ca60cd7SBin Meng #define qemu_renameat renameat 1046ca60cd7SBin Meng #define qemu_utimensat utimensat 1056ca60cd7SBin Meng #define qemu_unlinkat unlinkat 106*371a269fSGreg Kurz #define qemu_futimens futimens 1076ca60cd7SBin Meng 1086482a961SGreg Kurz static inline void close_preserve_errno(int fd) 1096482a961SGreg Kurz { 1106482a961SGreg Kurz int serrno = errno; 1116482a961SGreg Kurz close(fd); 1126482a961SGreg Kurz errno = serrno; 1136482a961SGreg Kurz } 1146482a961SGreg Kurz 115f6b0de53SChristian Schoenebeck /** 116f6b0de53SChristian Schoenebeck * close_if_special_file() - Close @fd if neither regular file nor directory. 117f6b0de53SChristian Schoenebeck * 118f6b0de53SChristian Schoenebeck * @fd: file descriptor of open file 119f6b0de53SChristian Schoenebeck * Return: 0 on regular file or directory, -1 otherwise 120f6b0de53SChristian Schoenebeck * 121f6b0de53SChristian Schoenebeck * CVE-2023-2861: Prohibit opening any special file directly on host 122f6b0de53SChristian Schoenebeck * (especially device files), as a compromised client could potentially gain 123f6b0de53SChristian Schoenebeck * access outside exported tree under certain, unsafe setups. We expect 124f6b0de53SChristian Schoenebeck * client to handle I/O on special files exclusively on guest side. 125f6b0de53SChristian Schoenebeck */ 126f6b0de53SChristian Schoenebeck static inline int close_if_special_file(int fd) 127f6b0de53SChristian Schoenebeck { 128f6b0de53SChristian Schoenebeck struct stat stbuf; 129f6b0de53SChristian Schoenebeck 130f6b0de53SChristian Schoenebeck if (qemu_fstat(fd, &stbuf) < 0) { 131f6b0de53SChristian Schoenebeck close_preserve_errno(fd); 132f6b0de53SChristian Schoenebeck return -1; 133f6b0de53SChristian Schoenebeck } 134f6b0de53SChristian Schoenebeck if (!S_ISREG(stbuf.st_mode) && !S_ISDIR(stbuf.st_mode)) { 135f6b0de53SChristian Schoenebeck error_report_once( 136f6b0de53SChristian Schoenebeck "9p: broken or compromised client detected; attempt to open " 137f6b0de53SChristian Schoenebeck "special file (i.e. neither regular file, nor directory)" 138f6b0de53SChristian Schoenebeck ); 139f6b0de53SChristian Schoenebeck close(fd); 140f6b0de53SChristian Schoenebeck errno = ENXIO; 141f6b0de53SChristian Schoenebeck return -1; 142f6b0de53SChristian Schoenebeck } 143f6b0de53SChristian Schoenebeck 144f6b0de53SChristian Schoenebeck return 0; 145f6b0de53SChristian Schoenebeck } 146f6b0de53SChristian Schoenebeck 1476482a961SGreg Kurz static inline int openat_dir(int dirfd, const char *name) 1486482a961SGreg Kurz { 1496ca60cd7SBin Meng return qemu_openat(dirfd, name, 1504751fd53SGreg Kurz O_DIRECTORY | O_RDONLY | O_NOFOLLOW | O_PATH_9P_UTIL); 1516482a961SGreg Kurz } 1526482a961SGreg Kurz 1536482a961SGreg Kurz static inline int openat_file(int dirfd, const char *name, int flags, 1546482a961SGreg Kurz mode_t mode) 1556482a961SGreg Kurz { 1566482a961SGreg Kurz int fd, serrno, ret; 1576482a961SGreg Kurz 15867a71e3bSKeno Fischer #ifndef CONFIG_DARWIN 159a5804fcfSOmar Sandoval again: 16067a71e3bSKeno Fischer #endif 1616ca60cd7SBin Meng fd = qemu_openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK, 1626482a961SGreg Kurz mode); 1636482a961SGreg Kurz if (fd == -1) { 16467a71e3bSKeno Fischer #ifndef CONFIG_DARWIN 165a5804fcfSOmar Sandoval if (errno == EPERM && (flags & O_NOATIME)) { 166a5804fcfSOmar Sandoval /* 167a5804fcfSOmar Sandoval * The client passed O_NOATIME but we lack permissions to honor it. 168a5804fcfSOmar Sandoval * Rather than failing the open, fall back without O_NOATIME. This 169a5804fcfSOmar Sandoval * doesn't break the semantics on the client side, as the Linux 170a5804fcfSOmar Sandoval * open(2) man page notes that O_NOATIME "may not be effective on 171a5804fcfSOmar Sandoval * all filesystems". In particular, NFS and other network 172a5804fcfSOmar Sandoval * filesystems ignore it entirely. 173a5804fcfSOmar Sandoval */ 174a5804fcfSOmar Sandoval flags &= ~O_NOATIME; 175a5804fcfSOmar Sandoval goto again; 176a5804fcfSOmar Sandoval } 17767a71e3bSKeno Fischer #endif 1786482a961SGreg Kurz return -1; 1796482a961SGreg Kurz } 1806482a961SGreg Kurz 181d06a9d84SChristian Schoenebeck /* Only if O_PATH is not set ... */ 182d06a9d84SChristian Schoenebeck if (!(flags & O_PATH_9P_UTIL)) { 183d06a9d84SChristian Schoenebeck /* 184d06a9d84SChristian Schoenebeck * Prevent I/O on special files (device files, etc.) on host side, 185d06a9d84SChristian Schoenebeck * however it is safe and required to allow opening them with O_PATH, 186d06a9d84SChristian Schoenebeck * as this is limited to (required) path based operations only. 187d06a9d84SChristian Schoenebeck */ 188f6b0de53SChristian Schoenebeck if (close_if_special_file(fd) < 0) { 189f6b0de53SChristian Schoenebeck return -1; 190f6b0de53SChristian Schoenebeck } 191f6b0de53SChristian Schoenebeck 1926482a961SGreg Kurz serrno = errno; 193d06a9d84SChristian Schoenebeck /* 194d06a9d84SChristian Schoenebeck * O_NONBLOCK was only needed to open the file. Let's drop it. We don't 195d06a9d84SChristian Schoenebeck * do that with O_PATH since fcntl(F_SETFL) isn't supported, and 196d06a9d84SChristian Schoenebeck * openat() ignored it anyway. 1974751fd53SGreg Kurz */ 1986482a961SGreg Kurz ret = fcntl(fd, F_SETFL, flags); 1996482a961SGreg Kurz assert(!ret); 2006482a961SGreg Kurz errno = serrno; 201d06a9d84SChristian Schoenebeck } 2026482a961SGreg Kurz return fd; 2036482a961SGreg Kurz } 2046482a961SGreg Kurz 20556ad3e54SGreg Kurz ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name, 20656ad3e54SGreg Kurz void *value, size_t size); 2073e36aba7SGreg Kurz int fsetxattrat_nofollow(int dirfd, const char *path, const char *name, 2083e36aba7SGreg Kurz void *value, size_t size, int flags); 209ec70b956SKeno Fischer ssize_t flistxattrat_nofollow(int dirfd, const char *filename, 210ec70b956SKeno Fischer char *list, size_t size); 211ec70b956SKeno Fischer ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, 212ec70b956SKeno Fischer const char *name); 2136482a961SGreg Kurz 214041b0945SChristian Schoenebeck /* 2156b3b279bSKeno Fischer * Darwin has d_seekoff, which appears to function similarly to d_off. 2166b3b279bSKeno Fischer * However, it does not appear to be supported on all file systems, 2176b3b279bSKeno Fischer * so ensure it is manually injected earlier and call here when 2186b3b279bSKeno Fischer * needed. 2196b3b279bSKeno Fischer */ 2206b3b279bSKeno Fischer static inline off_t qemu_dirent_off(struct dirent *dent) 2216b3b279bSKeno Fischer { 2226b3b279bSKeno Fischer #ifdef CONFIG_DARWIN 2236b3b279bSKeno Fischer return dent->d_seekoff; 2246b3b279bSKeno Fischer #else 2256b3b279bSKeno Fischer return dent->d_off; 2266b3b279bSKeno Fischer #endif 2276b3b279bSKeno Fischer } 2286b3b279bSKeno Fischer 2291983d8b0SChristian Schoenebeck /** 230041b0945SChristian Schoenebeck * qemu_dirent_dup() - Duplicate directory entry @dent. 231041b0945SChristian Schoenebeck * 232041b0945SChristian Schoenebeck * @dent: original directory entry to be duplicated 233041b0945SChristian Schoenebeck * Return: duplicated directory entry which should be freed with g_free() 2341983d8b0SChristian Schoenebeck * 2351983d8b0SChristian Schoenebeck * It is highly recommended to use this function instead of open coding 23663ce31c3SChristian Schoenebeck * duplication of dirent objects, because the actual struct dirent 23763ce31c3SChristian Schoenebeck * size may be bigger or shorter than sizeof(struct dirent) and correct 2381983d8b0SChristian Schoenebeck * handling is platform specific (see gitlab issue #841). 2391983d8b0SChristian Schoenebeck */ 2401983d8b0SChristian Schoenebeck static inline struct dirent *qemu_dirent_dup(struct dirent *dent) 2411983d8b0SChristian Schoenebeck { 2421983d8b0SChristian Schoenebeck size_t sz = 0; 2431983d8b0SChristian Schoenebeck #if defined _DIRENT_HAVE_D_RECLEN 2441983d8b0SChristian Schoenebeck /* Avoid use of strlen() if platform supports d_reclen. */ 2451983d8b0SChristian Schoenebeck sz = dent->d_reclen; 2461983d8b0SChristian Schoenebeck #endif 2471983d8b0SChristian Schoenebeck /* 2481983d8b0SChristian Schoenebeck * Test sz for zero even if d_reclen is available 2491983d8b0SChristian Schoenebeck * because some drivers may set d_reclen to zero. 2501983d8b0SChristian Schoenebeck */ 2511983d8b0SChristian Schoenebeck if (sz == 0) { 2521983d8b0SChristian Schoenebeck /* Fallback to the most portable way. */ 2531983d8b0SChristian Schoenebeck sz = offsetof(struct dirent, d_name) + 2541983d8b0SChristian Schoenebeck strlen(dent->d_name) + 1; 2551983d8b0SChristian Schoenebeck } 2561983d8b0SChristian Schoenebeck return g_memdup(dent, sz); 2571983d8b0SChristian Schoenebeck } 2581983d8b0SChristian Schoenebeck 259029ed1bdSKeno Fischer /* 260029ed1bdSKeno Fischer * As long as mknodat is not available on macOS, this workaround 261029ed1bdSKeno Fischer * using pthread_fchdir_np is needed. qemu_mknodat is defined in 262029ed1bdSKeno Fischer * os-posix.c. pthread_fchdir_np is weakly linked here as a guard 263029ed1bdSKeno Fischer * in case it disappears in future macOS versions, because it is 264029ed1bdSKeno Fischer * is a private API. 265029ed1bdSKeno Fischer */ 266029ed1bdSKeno Fischer #if defined CONFIG_DARWIN && defined CONFIG_PTHREAD_FCHDIR_NP 267029ed1bdSKeno Fischer int pthread_fchdir_np(int fd) __attribute__((weak_import)); 268029ed1bdSKeno Fischer #endif 269029ed1bdSKeno Fischer int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev); 2706b3b279bSKeno Fischer 2719a0dd4b3SChristian Schoenebeck /* 2729a0dd4b3SChristian Schoenebeck * Returns a newly allocated string presentation of open() flags, intended 2739a0dd4b3SChristian Schoenebeck * for debugging (tracing) purposes only. 2749a0dd4b3SChristian Schoenebeck */ 2759a0dd4b3SChristian Schoenebeck char *qemu_open_flags_tostr(int flags); 2769a0dd4b3SChristian Schoenebeck 2776482a961SGreg Kurz #endif 278