1 /* 2 * QEMU seccomp mode 2 support with libseccomp 3 * 4 * Copyright IBM, Corp. 2012 5 * 6 * Authors: 7 * Eduardo Otubo <eotubo@br.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qapi/error.h" 18 #include "qemu/config-file.h" 19 #include "qemu/option.h" 20 #include "qemu/module.h" 21 #include <sys/prctl.h> 22 #include <seccomp.h> 23 #include "sysemu/seccomp.h" 24 #include <linux/seccomp.h> 25 26 /* For some architectures (notably ARM) cacheflush is not supported until 27 * libseccomp 2.2.3, but configure enforces that we are using a more recent 28 * version on those hosts, so it is OK for this check to be less strict. 29 */ 30 #if SCMP_VER_MAJOR >= 3 31 #define HAVE_CACHEFLUSH 32 #elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 2 33 #define HAVE_CACHEFLUSH 34 #endif 35 36 struct QemuSeccompSyscall { 37 int32_t num; 38 uint8_t set; 39 uint8_t narg; 40 const struct scmp_arg_cmp *arg_cmp; 41 }; 42 43 const struct scmp_arg_cmp sched_setscheduler_arg[] = { 44 /* was SCMP_A1(SCMP_CMP_NE, SCHED_IDLE), but expanded due to GCC 4.x bug */ 45 { .arg = 1, .op = SCMP_CMP_NE, .datum_a = SCHED_IDLE } 46 }; 47 48 static const struct QemuSeccompSyscall denylist[] = { 49 /* default set of syscalls that should get blocked */ 50 { SCMP_SYS(reboot), QEMU_SECCOMP_SET_DEFAULT }, 51 { SCMP_SYS(swapon), QEMU_SECCOMP_SET_DEFAULT }, 52 { SCMP_SYS(swapoff), QEMU_SECCOMP_SET_DEFAULT }, 53 { SCMP_SYS(syslog), QEMU_SECCOMP_SET_DEFAULT }, 54 { SCMP_SYS(mount), QEMU_SECCOMP_SET_DEFAULT }, 55 { SCMP_SYS(umount), QEMU_SECCOMP_SET_DEFAULT }, 56 { SCMP_SYS(kexec_load), QEMU_SECCOMP_SET_DEFAULT }, 57 { SCMP_SYS(afs_syscall), QEMU_SECCOMP_SET_DEFAULT }, 58 { SCMP_SYS(break), QEMU_SECCOMP_SET_DEFAULT }, 59 { SCMP_SYS(ftime), QEMU_SECCOMP_SET_DEFAULT }, 60 { SCMP_SYS(getpmsg), QEMU_SECCOMP_SET_DEFAULT }, 61 { SCMP_SYS(gtty), QEMU_SECCOMP_SET_DEFAULT }, 62 { SCMP_SYS(lock), QEMU_SECCOMP_SET_DEFAULT }, 63 { SCMP_SYS(mpx), QEMU_SECCOMP_SET_DEFAULT }, 64 { SCMP_SYS(prof), QEMU_SECCOMP_SET_DEFAULT }, 65 { SCMP_SYS(profil), QEMU_SECCOMP_SET_DEFAULT }, 66 { SCMP_SYS(putpmsg), QEMU_SECCOMP_SET_DEFAULT }, 67 { SCMP_SYS(security), QEMU_SECCOMP_SET_DEFAULT }, 68 { SCMP_SYS(stty), QEMU_SECCOMP_SET_DEFAULT }, 69 { SCMP_SYS(tuxcall), QEMU_SECCOMP_SET_DEFAULT }, 70 { SCMP_SYS(ulimit), QEMU_SECCOMP_SET_DEFAULT }, 71 { SCMP_SYS(vserver), QEMU_SECCOMP_SET_DEFAULT }, 72 /* obsolete */ 73 { SCMP_SYS(readdir), QEMU_SECCOMP_SET_OBSOLETE }, 74 { SCMP_SYS(_sysctl), QEMU_SECCOMP_SET_OBSOLETE }, 75 { SCMP_SYS(bdflush), QEMU_SECCOMP_SET_OBSOLETE }, 76 { SCMP_SYS(create_module), QEMU_SECCOMP_SET_OBSOLETE }, 77 { SCMP_SYS(get_kernel_syms), QEMU_SECCOMP_SET_OBSOLETE }, 78 { SCMP_SYS(query_module), QEMU_SECCOMP_SET_OBSOLETE }, 79 { SCMP_SYS(sgetmask), QEMU_SECCOMP_SET_OBSOLETE }, 80 { SCMP_SYS(ssetmask), QEMU_SECCOMP_SET_OBSOLETE }, 81 { SCMP_SYS(sysfs), QEMU_SECCOMP_SET_OBSOLETE }, 82 { SCMP_SYS(uselib), QEMU_SECCOMP_SET_OBSOLETE }, 83 { SCMP_SYS(ustat), QEMU_SECCOMP_SET_OBSOLETE }, 84 /* privileged */ 85 { SCMP_SYS(setuid), QEMU_SECCOMP_SET_PRIVILEGED }, 86 { SCMP_SYS(setgid), QEMU_SECCOMP_SET_PRIVILEGED }, 87 { SCMP_SYS(setpgid), QEMU_SECCOMP_SET_PRIVILEGED }, 88 { SCMP_SYS(setsid), QEMU_SECCOMP_SET_PRIVILEGED }, 89 { SCMP_SYS(setreuid), QEMU_SECCOMP_SET_PRIVILEGED }, 90 { SCMP_SYS(setregid), QEMU_SECCOMP_SET_PRIVILEGED }, 91 { SCMP_SYS(setresuid), QEMU_SECCOMP_SET_PRIVILEGED }, 92 { SCMP_SYS(setresgid), QEMU_SECCOMP_SET_PRIVILEGED }, 93 { SCMP_SYS(setfsuid), QEMU_SECCOMP_SET_PRIVILEGED }, 94 { SCMP_SYS(setfsgid), QEMU_SECCOMP_SET_PRIVILEGED }, 95 /* spawn */ 96 { SCMP_SYS(fork), QEMU_SECCOMP_SET_SPAWN }, 97 { SCMP_SYS(vfork), QEMU_SECCOMP_SET_SPAWN }, 98 { SCMP_SYS(execve), QEMU_SECCOMP_SET_SPAWN }, 99 /* resource control */ 100 { SCMP_SYS(getpriority), QEMU_SECCOMP_SET_RESOURCECTL }, 101 { SCMP_SYS(setpriority), QEMU_SECCOMP_SET_RESOURCECTL }, 102 { SCMP_SYS(sched_setparam), QEMU_SECCOMP_SET_RESOURCECTL }, 103 { SCMP_SYS(sched_getparam), QEMU_SECCOMP_SET_RESOURCECTL }, 104 { SCMP_SYS(sched_setscheduler), QEMU_SECCOMP_SET_RESOURCECTL, 105 ARRAY_SIZE(sched_setscheduler_arg), sched_setscheduler_arg }, 106 { SCMP_SYS(sched_getscheduler), QEMU_SECCOMP_SET_RESOURCECTL }, 107 { SCMP_SYS(sched_setaffinity), QEMU_SECCOMP_SET_RESOURCECTL }, 108 { SCMP_SYS(sched_getaffinity), QEMU_SECCOMP_SET_RESOURCECTL }, 109 { SCMP_SYS(sched_get_priority_max), QEMU_SECCOMP_SET_RESOURCECTL }, 110 { SCMP_SYS(sched_get_priority_min), QEMU_SECCOMP_SET_RESOURCECTL }, 111 }; 112 113 static inline __attribute__((unused)) int 114 qemu_seccomp(unsigned int operation, unsigned int flags, void *args) 115 { 116 #ifdef __NR_seccomp 117 return syscall(__NR_seccomp, operation, flags, args); 118 #else 119 errno = ENOSYS; 120 return -1; 121 #endif 122 } 123 124 static uint32_t qemu_seccomp_get_action(int set) 125 { 126 switch (set) { 127 case QEMU_SECCOMP_SET_DEFAULT: 128 case QEMU_SECCOMP_SET_OBSOLETE: 129 case QEMU_SECCOMP_SET_PRIVILEGED: 130 case QEMU_SECCOMP_SET_SPAWN: { 131 #if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \ 132 defined(SECCOMP_RET_KILL_PROCESS) 133 static int kill_process = -1; 134 if (kill_process == -1) { 135 uint32_t action = SECCOMP_RET_KILL_PROCESS; 136 137 if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { 138 kill_process = 1; 139 } else { 140 kill_process = 0; 141 } 142 } 143 if (kill_process == 1) { 144 return SCMP_ACT_KILL_PROCESS; 145 } 146 #endif 147 return SCMP_ACT_TRAP; 148 } 149 150 case QEMU_SECCOMP_SET_RESOURCECTL: 151 return SCMP_ACT_ERRNO(EPERM); 152 153 default: 154 g_assert_not_reached(); 155 } 156 } 157 158 159 static int seccomp_start(uint32_t seccomp_opts, Error **errp) 160 { 161 int rc = -1; 162 unsigned int i = 0; 163 scmp_filter_ctx ctx; 164 165 ctx = seccomp_init(SCMP_ACT_ALLOW); 166 if (ctx == NULL) { 167 error_setg(errp, "failed to initialize seccomp context"); 168 goto seccomp_return; 169 } 170 171 rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); 172 if (rc != 0) { 173 error_setg_errno(errp, -rc, 174 "failed to set seccomp thread synchronization"); 175 goto seccomp_return; 176 } 177 178 for (i = 0; i < ARRAY_SIZE(denylist); i++) { 179 uint32_t action; 180 if (!(seccomp_opts & denylist[i].set)) { 181 continue; 182 } 183 184 action = qemu_seccomp_get_action(denylist[i].set); 185 rc = seccomp_rule_add_array(ctx, action, denylist[i].num, 186 denylist[i].narg, denylist[i].arg_cmp); 187 if (rc < 0) { 188 error_setg_errno(errp, -rc, 189 "failed to add seccomp denylist rules"); 190 goto seccomp_return; 191 } 192 } 193 194 rc = seccomp_load(ctx); 195 if (rc < 0) { 196 error_setg_errno(errp, -rc, 197 "failed to load seccomp syscall filter in kernel"); 198 } 199 200 seccomp_return: 201 seccomp_release(ctx); 202 return rc < 0 ? -1 : 0; 203 } 204 205 int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) 206 { 207 if (qemu_opt_get_bool(opts, "enable", false)) { 208 uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT 209 | QEMU_SECCOMP_SET_OBSOLETE; 210 const char *value = NULL; 211 212 value = qemu_opt_get(opts, "obsolete"); 213 if (value) { 214 if (g_str_equal(value, "allow")) { 215 seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE; 216 } else if (g_str_equal(value, "deny")) { 217 /* this is the default option, this if is here 218 * to provide a little bit of consistency for 219 * the command line */ 220 } else { 221 error_setg(errp, "invalid argument for obsolete"); 222 return -1; 223 } 224 } 225 226 value = qemu_opt_get(opts, "elevateprivileges"); 227 if (value) { 228 if (g_str_equal(value, "deny")) { 229 seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; 230 } else if (g_str_equal(value, "children")) { 231 seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; 232 233 /* calling prctl directly because we're 234 * not sure if host has CAP_SYS_ADMIN set*/ 235 if (prctl(PR_SET_NO_NEW_PRIVS, 1)) { 236 error_setg(errp, "failed to set no_new_privs aborting"); 237 return -1; 238 } 239 } else if (g_str_equal(value, "allow")) { 240 /* default value */ 241 } else { 242 error_setg(errp, "invalid argument for elevateprivileges"); 243 return -1; 244 } 245 } 246 247 value = qemu_opt_get(opts, "spawn"); 248 if (value) { 249 if (g_str_equal(value, "deny")) { 250 seccomp_opts |= QEMU_SECCOMP_SET_SPAWN; 251 } else if (g_str_equal(value, "allow")) { 252 /* default value */ 253 } else { 254 error_setg(errp, "invalid argument for spawn"); 255 return -1; 256 } 257 } 258 259 value = qemu_opt_get(opts, "resourcecontrol"); 260 if (value) { 261 if (g_str_equal(value, "deny")) { 262 seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL; 263 } else if (g_str_equal(value, "allow")) { 264 /* default value */ 265 } else { 266 error_setg(errp, "invalid argument for resourcecontrol"); 267 return -1; 268 } 269 } 270 271 if (seccomp_start(seccomp_opts, errp) < 0) { 272 return -1; 273 } 274 } 275 276 return 0; 277 } 278 279 static QemuOptsList qemu_sandbox_opts = { 280 .name = "sandbox", 281 .implied_opt_name = "enable", 282 .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head), 283 .desc = { 284 { 285 .name = "enable", 286 .type = QEMU_OPT_BOOL, 287 }, 288 { 289 .name = "obsolete", 290 .type = QEMU_OPT_STRING, 291 }, 292 { 293 .name = "elevateprivileges", 294 .type = QEMU_OPT_STRING, 295 }, 296 { 297 .name = "spawn", 298 .type = QEMU_OPT_STRING, 299 }, 300 { 301 .name = "resourcecontrol", 302 .type = QEMU_OPT_STRING, 303 }, 304 { /* end of list */ } 305 }, 306 }; 307 308 static void seccomp_register(void) 309 { 310 bool add = false; 311 312 /* FIXME: use seccomp_api_get() >= 2 check when released */ 313 314 #if defined(SECCOMP_FILTER_FLAG_TSYNC) 315 int check; 316 317 /* check host TSYNC capability, it returns errno == ENOSYS if unavailable */ 318 check = qemu_seccomp(SECCOMP_SET_MODE_FILTER, 319 SECCOMP_FILTER_FLAG_TSYNC, NULL); 320 if (check < 0 && errno == EFAULT) { 321 add = true; 322 } 323 #endif 324 325 if (add) { 326 qemu_add_opts(&qemu_sandbox_opts); 327 } 328 } 329 opts_init(seccomp_register); 330