1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Ioctl to enable verity on a file
4 *
5 * Copyright 2019 Google LLC
6 */
7
8 #include "fsverity_private.h"
9
10 #include <linux/export.h>
11 #include <linux/mount.h>
12 #include <linux/sched/signal.h>
13 #include <linux/uaccess.h>
14
15 struct block_buffer {
16 u32 filled;
17 bool is_root_hash;
18 u8 *data;
19 };
20
21 /* Hash a block, writing the result to the next level's pending block buffer. */
hash_one_block(const struct merkle_tree_params * params,struct block_buffer * cur)22 static int hash_one_block(const struct merkle_tree_params *params,
23 struct block_buffer *cur)
24 {
25 struct block_buffer *next = cur + 1;
26
27 /*
28 * Safety check to prevent a buffer overflow in case of a filesystem bug
29 * that allows the file size to change despite deny_write_access(), or a
30 * bug in the Merkle tree logic itself
31 */
32 if (WARN_ON_ONCE(next->is_root_hash && next->filled != 0))
33 return -EINVAL;
34
35 /* Zero-pad the block if it's shorter than the block size. */
36 memset(&cur->data[cur->filled], 0, params->block_size - cur->filled);
37
38 fsverity_hash_block(params, cur->data, &next->data[next->filled]);
39 next->filled += params->digest_size;
40 cur->filled = 0;
41 return 0;
42 }
43
write_merkle_tree_block(struct file * file,const u8 * buf,unsigned long index,const struct merkle_tree_params * params)44 static int write_merkle_tree_block(struct file *file, const u8 *buf,
45 unsigned long index,
46 const struct merkle_tree_params *params)
47 {
48 struct inode *inode = file_inode(file);
49 u64 pos = (u64)index << params->log_blocksize;
50 int err;
51
52 err = inode->i_sb->s_vop->write_merkle_tree_block(file, buf, pos,
53 params->block_size);
54 if (err)
55 fsverity_err(inode, "Error %d writing Merkle tree block %lu",
56 err, index);
57 return err;
58 }
59
60 /*
61 * Build the Merkle tree for the given file using the given parameters, and
62 * return the root hash in @root_hash.
63 *
64 * The tree is written to a filesystem-specific location as determined by the
65 * ->write_merkle_tree_block() method. However, the blocks that comprise the
66 * tree are the same for all filesystems.
67 */
build_merkle_tree(struct file * filp,const struct merkle_tree_params * params,u8 * root_hash)68 static int build_merkle_tree(struct file *filp,
69 const struct merkle_tree_params *params,
70 u8 *root_hash)
71 {
72 struct inode *inode = file_inode(filp);
73 const u64 data_size = inode->i_size;
74 const int num_levels = params->num_levels;
75 struct block_buffer _buffers[1 + FS_VERITY_MAX_LEVELS + 1] = {};
76 struct block_buffer *buffers = &_buffers[1];
77 unsigned long level_offset[FS_VERITY_MAX_LEVELS];
78 int level;
79 u64 offset;
80 int err;
81
82 if (data_size == 0) {
83 /* Empty file is a special case; root hash is all 0's */
84 memset(root_hash, 0, params->digest_size);
85 return 0;
86 }
87
88 /*
89 * Allocate the block buffers. Buffer "-1" is for data blocks.
90 * Buffers 0 <= level < num_levels are for the actual tree levels.
91 * Buffer 'num_levels' is for the root hash.
92 */
93 for (level = -1; level < num_levels; level++) {
94 buffers[level].data = kzalloc(params->block_size, GFP_KERNEL);
95 if (!buffers[level].data) {
96 err = -ENOMEM;
97 goto out;
98 }
99 }
100 buffers[num_levels].data = root_hash;
101 buffers[num_levels].is_root_hash = true;
102
103 BUILD_BUG_ON(sizeof(level_offset) != sizeof(params->level_start));
104 memcpy(level_offset, params->level_start, sizeof(level_offset));
105
106 /* Hash each data block, also hashing the tree blocks as they fill up */
107 for (offset = 0; offset < data_size; offset += params->block_size) {
108 ssize_t bytes_read;
109 loff_t pos = offset;
110
111 buffers[-1].filled = min_t(u64, params->block_size,
112 data_size - offset);
113 bytes_read = __kernel_read(filp, buffers[-1].data,
114 buffers[-1].filled, &pos);
115 if (bytes_read < 0) {
116 err = bytes_read;
117 fsverity_err(inode, "Error %d reading file data", err);
118 goto out;
119 }
120 if (bytes_read != buffers[-1].filled) {
121 err = -EINVAL;
122 fsverity_err(inode, "Short read of file data");
123 goto out;
124 }
125 err = hash_one_block(params, &buffers[-1]);
126 if (err)
127 goto out;
128 for (level = 0; level < num_levels; level++) {
129 if (buffers[level].filled + params->digest_size <=
130 params->block_size) {
131 /* Next block at @level isn't full yet */
132 break;
133 }
134 /* Next block at @level is full */
135
136 err = hash_one_block(params, &buffers[level]);
137 if (err)
138 goto out;
139 err = write_merkle_tree_block(filp,
140 buffers[level].data,
141 level_offset[level],
142 params);
143 if (err)
144 goto out;
145 level_offset[level]++;
146 }
147 if (fatal_signal_pending(current)) {
148 err = -EINTR;
149 goto out;
150 }
151 cond_resched();
152 }
153 /* Finish all nonempty pending tree blocks. */
154 for (level = 0; level < num_levels; level++) {
155 if (buffers[level].filled != 0) {
156 err = hash_one_block(params, &buffers[level]);
157 if (err)
158 goto out;
159 err = write_merkle_tree_block(filp,
160 buffers[level].data,
161 level_offset[level],
162 params);
163 if (err)
164 goto out;
165 }
166 }
167 /* The root hash was filled by the last call to hash_one_block(). */
168 if (WARN_ON_ONCE(buffers[num_levels].filled != params->digest_size)) {
169 err = -EINVAL;
170 goto out;
171 }
172 err = 0;
173 out:
174 for (level = -1; level < num_levels; level++)
175 kfree(buffers[level].data);
176 return err;
177 }
178
enable_verity(struct file * filp,const struct fsverity_enable_arg * arg)179 static int enable_verity(struct file *filp,
180 const struct fsverity_enable_arg *arg)
181 {
182 struct inode *inode = file_inode(filp);
183 const struct fsverity_operations *vops = inode->i_sb->s_vop;
184 struct merkle_tree_params params = { };
185 struct fsverity_descriptor *desc;
186 size_t desc_size = struct_size(desc, signature, arg->sig_size);
187 struct fsverity_info *vi;
188 int err;
189
190 /* Start initializing the fsverity_descriptor */
191 desc = kzalloc(desc_size, GFP_KERNEL);
192 if (!desc)
193 return -ENOMEM;
194 desc->version = 1;
195 desc->hash_algorithm = arg->hash_algorithm;
196 desc->log_blocksize = ilog2(arg->block_size);
197
198 /* Get the salt if the user provided one */
199 if (arg->salt_size &&
200 copy_from_user(desc->salt, u64_to_user_ptr(arg->salt_ptr),
201 arg->salt_size)) {
202 err = -EFAULT;
203 goto out;
204 }
205 desc->salt_size = arg->salt_size;
206
207 /* Get the builtin signature if the user provided one */
208 if (arg->sig_size &&
209 copy_from_user(desc->signature, u64_to_user_ptr(arg->sig_ptr),
210 arg->sig_size)) {
211 err = -EFAULT;
212 goto out;
213 }
214 desc->sig_size = cpu_to_le32(arg->sig_size);
215
216 desc->data_size = cpu_to_le64(inode->i_size);
217
218 /* Prepare the Merkle tree parameters */
219 err = fsverity_init_merkle_tree_params(¶ms, inode,
220 arg->hash_algorithm,
221 desc->log_blocksize,
222 desc->salt, desc->salt_size);
223 if (err)
224 goto out;
225
226 trace_fsverity_enable(inode, ¶ms);
227
228 /*
229 * Start enabling verity on this file, serialized by the inode lock.
230 * Fail if verity is already enabled or is already being enabled.
231 */
232 inode_lock(inode);
233 if (IS_VERITY(inode))
234 err = -EEXIST;
235 else
236 err = vops->begin_enable_verity(filp);
237 inode_unlock(inode);
238 if (err)
239 goto out;
240
241 /*
242 * Build the Merkle tree. Don't hold the inode lock during this, since
243 * on huge files this may take a very long time and we don't want to
244 * force unrelated syscalls like chown() to block forever. We don't
245 * need the inode lock here because deny_write_access() already prevents
246 * the file from being written to or truncated, and we still serialize
247 * ->begin_enable_verity() and ->end_enable_verity() using the inode
248 * lock and only allow one process to be here at a time on a given file.
249 */
250 BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE);
251 err = build_merkle_tree(filp, ¶ms, desc->root_hash);
252 if (err) {
253 fsverity_err(inode, "Error %d building Merkle tree", err);
254 goto rollback;
255 }
256
257 /*
258 * Create the fsverity_info. Don't bother trying to save work by
259 * reusing the merkle_tree_params from above. Instead, just create the
260 * fsverity_info from the fsverity_descriptor as if it were just loaded
261 * from disk. This is simpler, and it serves as an extra check that the
262 * metadata we're writing is valid before actually enabling verity.
263 */
264 vi = fsverity_create_info(inode, desc);
265 if (IS_ERR(vi)) {
266 err = PTR_ERR(vi);
267 goto rollback;
268 }
269
270 trace_fsverity_tree_done(inode, vi, ¶ms);
271
272 /*
273 * Add the fsverity_info into the hash table before finishing the
274 * initialization so that we don't have to undo the enabling when memory
275 * allocation for the hash table fails. This is safe because looking up
276 * the fsverity_info always first checks the S_VERITY flag on the inode,
277 * which will only be set at the very end of the ->end_enable_verity
278 * method.
279 */
280 err = fsverity_set_info(vi);
281 if (err) {
282 fsverity_free_info(vi);
283 goto rollback;
284 }
285
286 /*
287 * Tell the filesystem to finish enabling verity on the file.
288 * Serialized with ->begin_enable_verity() by the inode lock. The file
289 * system needs to set the S_VERITY flag on the inode at the very end of
290 * the method, at which point the fsverity information can be accessed
291 * by other threads.
292 */
293 inode_lock(inode);
294 err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size);
295 inode_unlock(inode);
296 if (err) {
297 fsverity_err(inode, "%ps() failed with err %d",
298 vops->end_enable_verity, err);
299 fsverity_remove_info(vi);
300 } else if (WARN_ON_ONCE(!IS_VERITY(inode))) {
301 fsverity_remove_info(vi);
302 err = -EINVAL;
303 }
304 out:
305 kfree(params.hashstate);
306 kfree(desc);
307 return err;
308
309 rollback:
310 inode_lock(inode);
311 (void)vops->end_enable_verity(filp, NULL, 0, params.tree_size);
312 inode_unlock(inode);
313 goto out;
314 }
315
316 /**
317 * fsverity_ioctl_enable() - enable verity on a file
318 * @filp: file to enable verity on
319 * @uarg: user pointer to fsverity_enable_arg
320 *
321 * Enable fs-verity on a file. See the "FS_IOC_ENABLE_VERITY" section of
322 * Documentation/filesystems/fsverity.rst for the documentation.
323 *
324 * Return: 0 on success, -errno on failure
325 */
fsverity_ioctl_enable(struct file * filp,const void __user * uarg)326 int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
327 {
328 struct inode *inode = file_inode(filp);
329 struct fsverity_enable_arg arg;
330 int err;
331
332 if (copy_from_user(&arg, uarg, sizeof(arg)))
333 return -EFAULT;
334
335 if (arg.version != 1)
336 return -EINVAL;
337
338 if (arg.__reserved1 ||
339 memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2)))
340 return -EINVAL;
341
342 if (!is_power_of_2(arg.block_size))
343 return -EINVAL;
344
345 if (arg.salt_size > sizeof_field(struct fsverity_descriptor, salt))
346 return -EMSGSIZE;
347
348 if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE)
349 return -EMSGSIZE;
350
351 /*
352 * Require a regular file with write access. But the actual fd must
353 * still be readonly so that we can lock out all writers. This is
354 * needed to guarantee that no writable fds exist to the file once it
355 * has verity enabled, and to stabilize the data being hashed.
356 */
357
358 err = file_permission(filp, MAY_WRITE);
359 if (err)
360 return err;
361 /*
362 * __kernel_read() is used while building the Merkle tree. So, we can't
363 * allow file descriptors that were opened for ioctl access only, using
364 * the special nonstandard access mode 3. O_RDONLY only, please!
365 */
366 if (!(filp->f_mode & FMODE_READ))
367 return -EBADF;
368
369 if (IS_APPEND(inode))
370 return -EPERM;
371
372 if (S_ISDIR(inode->i_mode))
373 return -EISDIR;
374
375 if (!S_ISREG(inode->i_mode))
376 return -EINVAL;
377
378 err = mnt_want_write_file(filp);
379 if (err) /* -EROFS */
380 return err;
381
382 err = deny_write_access(filp);
383 if (err) /* -ETXTBSY */
384 goto out_drop_write;
385
386 err = enable_verity(filp, &arg);
387
388 /*
389 * We no longer drop the inode's pagecache after enabling verity. This
390 * used to be done to try to avoid a race condition where pages could be
391 * evicted after being used in the Merkle tree construction, then
392 * re-instantiated by a concurrent read. Such pages are unverified, and
393 * the backing storage could have filled them with different content, so
394 * they shouldn't be used to fulfill reads once verity is enabled.
395 *
396 * But, dropping the pagecache has a big performance impact, and it
397 * doesn't fully solve the race condition anyway. So for those reasons,
398 * and also because this race condition isn't very important relatively
399 * speaking (especially for small-ish files, where the chance of a page
400 * being used, evicted, *and* re-instantiated all while enabling verity
401 * is quite small), we no longer drop the inode's pagecache.
402 */
403
404 /*
405 * allow_write_access() is needed to pair with deny_write_access().
406 * Regardless, the filesystem won't allow writing to verity files.
407 */
408 allow_write_access(filp);
409 out_drop_write:
410 mnt_drop_write_file(filp);
411 return err;
412 }
413 EXPORT_SYMBOL_GPL(fsverity_ioctl_enable);
414