1 /**
2  * kmemcheck - a heavyweight memory checker for the linux kernel
3  * Copyright (C) 2007, 2008  Vegard Nossum <vegardno@ifi.uio.no>
4  * (With a lot of help from Ingo Molnar and Pekka Enberg.)
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2) as
8  * published by the Free Software Foundation.
9  */
10 
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/kallsyms.h>
14 #include <linux/kernel.h>
15 #include <linux/kmemcheck.h>
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/page-flags.h>
19 #include <linux/percpu.h>
20 #include <linux/ptrace.h>
21 #include <linux/string.h>
22 #include <linux/types.h>
23 
24 #include <asm/cacheflush.h>
25 #include <asm/kmemcheck.h>
26 #include <asm/pgtable.h>
27 #include <asm/tlbflush.h>
28 
29 #include "error.h"
30 #include "opcode.h"
31 #include "pte.h"
32 #include "selftest.h"
33 #include "shadow.h"
34 
35 
36 #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
37 #  define KMEMCHECK_ENABLED 0
38 #endif
39 
40 #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
41 #  define KMEMCHECK_ENABLED 1
42 #endif
43 
44 #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
45 #  define KMEMCHECK_ENABLED 2
46 #endif
47 
48 int kmemcheck_enabled = KMEMCHECK_ENABLED;
49 
kmemcheck_init(void)50 int __init kmemcheck_init(void)
51 {
52 #ifdef CONFIG_SMP
53 	/*
54 	 * Limit SMP to use a single CPU. We rely on the fact that this code
55 	 * runs before SMP is set up.
56 	 */
57 	if (setup_max_cpus > 1) {
58 		printk(KERN_INFO
59 			"kmemcheck: Limiting number of CPUs to 1.\n");
60 		setup_max_cpus = 1;
61 	}
62 #endif
63 
64 	if (!kmemcheck_selftest()) {
65 		printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
66 		kmemcheck_enabled = 0;
67 		return -EINVAL;
68 	}
69 
70 	printk(KERN_INFO "kmemcheck: Initialized\n");
71 	return 0;
72 }
73 
74 early_initcall(kmemcheck_init);
75 
76 /*
77  * We need to parse the kmemcheck= option before any memory is allocated.
78  */
param_kmemcheck(char * str)79 static int __init param_kmemcheck(char *str)
80 {
81 	if (!str)
82 		return -EINVAL;
83 
84 	sscanf(str, "%d", &kmemcheck_enabled);
85 	return 0;
86 }
87 
88 early_param("kmemcheck", param_kmemcheck);
89 
kmemcheck_show_addr(unsigned long address)90 int kmemcheck_show_addr(unsigned long address)
91 {
92 	pte_t *pte;
93 
94 	pte = kmemcheck_pte_lookup(address);
95 	if (!pte)
96 		return 0;
97 
98 	set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
99 	__flush_tlb_one(address);
100 	return 1;
101 }
102 
kmemcheck_hide_addr(unsigned long address)103 int kmemcheck_hide_addr(unsigned long address)
104 {
105 	pte_t *pte;
106 
107 	pte = kmemcheck_pte_lookup(address);
108 	if (!pte)
109 		return 0;
110 
111 	set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
112 	__flush_tlb_one(address);
113 	return 1;
114 }
115 
116 struct kmemcheck_context {
117 	bool busy;
118 	int balance;
119 
120 	/*
121 	 * There can be at most two memory operands to an instruction, but
122 	 * each address can cross a page boundary -- so we may need up to
123 	 * four addresses that must be hidden/revealed for each fault.
124 	 */
125 	unsigned long addr[4];
126 	unsigned long n_addrs;
127 	unsigned long flags;
128 
129 	/* Data size of the instruction that caused a fault. */
130 	unsigned int size;
131 };
132 
133 static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
134 
kmemcheck_active(struct pt_regs * regs)135 bool kmemcheck_active(struct pt_regs *regs)
136 {
137 	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
138 
139 	return data->balance > 0;
140 }
141 
142 /* Save an address that needs to be shown/hidden */
kmemcheck_save_addr(unsigned long addr)143 static void kmemcheck_save_addr(unsigned long addr)
144 {
145 	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
146 
147 	BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
148 	data->addr[data->n_addrs++] = addr;
149 }
150 
kmemcheck_show_all(void)151 static unsigned int kmemcheck_show_all(void)
152 {
153 	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
154 	unsigned int i;
155 	unsigned int n;
156 
157 	n = 0;
158 	for (i = 0; i < data->n_addrs; ++i)
159 		n += kmemcheck_show_addr(data->addr[i]);
160 
161 	return n;
162 }
163 
kmemcheck_hide_all(void)164 static unsigned int kmemcheck_hide_all(void)
165 {
166 	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
167 	unsigned int i;
168 	unsigned int n;
169 
170 	n = 0;
171 	for (i = 0; i < data->n_addrs; ++i)
172 		n += kmemcheck_hide_addr(data->addr[i]);
173 
174 	return n;
175 }
176 
177 /*
178  * Called from the #PF handler.
179  */
kmemcheck_show(struct pt_regs * regs)180 void kmemcheck_show(struct pt_regs *regs)
181 {
182 	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
183 
184 	BUG_ON(!irqs_disabled());
185 
186 	if (unlikely(data->balance != 0)) {
187 		kmemcheck_show_all();
188 		kmemcheck_error_save_bug(regs);
189 		data->balance = 0;
190 		return;
191 	}
192 
193 	/*
194 	 * None of the addresses actually belonged to kmemcheck. Note that
195 	 * this is not an error.
196 	 */
197 	if (kmemcheck_show_all() == 0)
198 		return;
199 
200 	++data->balance;
201 
202 	/*
203 	 * The IF needs to be cleared as well, so that the faulting
204 	 * instruction can run "uninterrupted". Otherwise, we might take
205 	 * an interrupt and start executing that before we've had a chance
206 	 * to hide the page again.
207 	 *
208 	 * NOTE: In the rare case of multiple faults, we must not override
209 	 * the original flags:
210 	 */
211 	if (!(regs->flags & X86_EFLAGS_TF))
212 		data->flags = regs->flags;
213 
214 	regs->flags |= X86_EFLAGS_TF;
215 	regs->flags &= ~X86_EFLAGS_IF;
216 }
217 
218 /*
219  * Called from the #DB handler.
220  */
kmemcheck_hide(struct pt_regs * regs)221 void kmemcheck_hide(struct pt_regs *regs)
222 {
223 	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
224 	int n;
225 
226 	BUG_ON(!irqs_disabled());
227 
228 	if (unlikely(data->balance != 1)) {
229 		kmemcheck_show_all();
230 		kmemcheck_error_save_bug(regs);
231 		data->n_addrs = 0;
232 		data->balance = 0;
233 
234 		if (!(data->flags & X86_EFLAGS_TF))
235 			regs->flags &= ~X86_EFLAGS_TF;
236 		if (data->flags & X86_EFLAGS_IF)
237 			regs->flags |= X86_EFLAGS_IF;
238 		return;
239 	}
240 
241 	if (kmemcheck_enabled)
242 		n = kmemcheck_hide_all();
243 	else
244 		n = kmemcheck_show_all();
245 
246 	if (n == 0)
247 		return;
248 
249 	--data->balance;
250 
251 	data->n_addrs = 0;
252 
253 	if (!(data->flags & X86_EFLAGS_TF))
254 		regs->flags &= ~X86_EFLAGS_TF;
255 	if (data->flags & X86_EFLAGS_IF)
256 		regs->flags |= X86_EFLAGS_IF;
257 }
258 
kmemcheck_show_pages(struct page * p,unsigned int n)259 void kmemcheck_show_pages(struct page *p, unsigned int n)
260 {
261 	unsigned int i;
262 
263 	for (i = 0; i < n; ++i) {
264 		unsigned long address;
265 		pte_t *pte;
266 		unsigned int level;
267 
268 		address = (unsigned long) page_address(&p[i]);
269 		pte = lookup_address(address, &level);
270 		BUG_ON(!pte);
271 		BUG_ON(level != PG_LEVEL_4K);
272 
273 		set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
274 		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
275 		__flush_tlb_one(address);
276 	}
277 }
278 
kmemcheck_page_is_tracked(struct page * p)279 bool kmemcheck_page_is_tracked(struct page *p)
280 {
281 	/* This will also check the "hidden" flag of the PTE. */
282 	return kmemcheck_pte_lookup((unsigned long) page_address(p));
283 }
284 
kmemcheck_hide_pages(struct page * p,unsigned int n)285 void kmemcheck_hide_pages(struct page *p, unsigned int n)
286 {
287 	unsigned int i;
288 
289 	for (i = 0; i < n; ++i) {
290 		unsigned long address;
291 		pte_t *pte;
292 		unsigned int level;
293 
294 		address = (unsigned long) page_address(&p[i]);
295 		pte = lookup_address(address, &level);
296 		BUG_ON(!pte);
297 		BUG_ON(level != PG_LEVEL_4K);
298 
299 		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
300 		set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
301 		__flush_tlb_one(address);
302 	}
303 }
304 
305 /* Access may NOT cross page boundary */
kmemcheck_read_strict(struct pt_regs * regs,unsigned long addr,unsigned int size)306 static void kmemcheck_read_strict(struct pt_regs *regs,
307 	unsigned long addr, unsigned int size)
308 {
309 	void *shadow;
310 	enum kmemcheck_shadow status;
311 
312 	shadow = kmemcheck_shadow_lookup(addr);
313 	if (!shadow)
314 		return;
315 
316 	kmemcheck_save_addr(addr);
317 	status = kmemcheck_shadow_test(shadow, size);
318 	if (status == KMEMCHECK_SHADOW_INITIALIZED)
319 		return;
320 
321 	if (kmemcheck_enabled)
322 		kmemcheck_error_save(status, addr, size, regs);
323 
324 	if (kmemcheck_enabled == 2)
325 		kmemcheck_enabled = 0;
326 
327 	/* Don't warn about it again. */
328 	kmemcheck_shadow_set(shadow, size);
329 }
330 
kmemcheck_is_obj_initialized(unsigned long addr,size_t size)331 bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
332 {
333 	enum kmemcheck_shadow status;
334 	void *shadow;
335 
336 	shadow = kmemcheck_shadow_lookup(addr);
337 	if (!shadow)
338 		return true;
339 
340 	status = kmemcheck_shadow_test_all(shadow, size);
341 
342 	return status == KMEMCHECK_SHADOW_INITIALIZED;
343 }
344 
345 /* Access may cross page boundary */
kmemcheck_read(struct pt_regs * regs,unsigned long addr,unsigned int size)346 static void kmemcheck_read(struct pt_regs *regs,
347 	unsigned long addr, unsigned int size)
348 {
349 	unsigned long page = addr & PAGE_MASK;
350 	unsigned long next_addr = addr + size - 1;
351 	unsigned long next_page = next_addr & PAGE_MASK;
352 
353 	if (likely(page == next_page)) {
354 		kmemcheck_read_strict(regs, addr, size);
355 		return;
356 	}
357 
358 	/*
359 	 * What we do is basically to split the access across the
360 	 * two pages and handle each part separately. Yes, this means
361 	 * that we may now see reads that are 3 + 5 bytes, for
362 	 * example (and if both are uninitialized, there will be two
363 	 * reports), but it makes the code a lot simpler.
364 	 */
365 	kmemcheck_read_strict(regs, addr, next_page - addr);
366 	kmemcheck_read_strict(regs, next_page, next_addr - next_page);
367 }
368 
kmemcheck_write_strict(struct pt_regs * regs,unsigned long addr,unsigned int size)369 static void kmemcheck_write_strict(struct pt_regs *regs,
370 	unsigned long addr, unsigned int size)
371 {
372 	void *shadow;
373 
374 	shadow = kmemcheck_shadow_lookup(addr);
375 	if (!shadow)
376 		return;
377 
378 	kmemcheck_save_addr(addr);
379 	kmemcheck_shadow_set(shadow, size);
380 }
381 
kmemcheck_write(struct pt_regs * regs,unsigned long addr,unsigned int size)382 static void kmemcheck_write(struct pt_regs *regs,
383 	unsigned long addr, unsigned int size)
384 {
385 	unsigned long page = addr & PAGE_MASK;
386 	unsigned long next_addr = addr + size - 1;
387 	unsigned long next_page = next_addr & PAGE_MASK;
388 
389 	if (likely(page == next_page)) {
390 		kmemcheck_write_strict(regs, addr, size);
391 		return;
392 	}
393 
394 	/* See comment in kmemcheck_read(). */
395 	kmemcheck_write_strict(regs, addr, next_page - addr);
396 	kmemcheck_write_strict(regs, next_page, next_addr - next_page);
397 }
398 
399 /*
400  * Copying is hard. We have two addresses, each of which may be split across
401  * a page (and each page will have different shadow addresses).
402  */
kmemcheck_copy(struct pt_regs * regs,unsigned long src_addr,unsigned long dst_addr,unsigned int size)403 static void kmemcheck_copy(struct pt_regs *regs,
404 	unsigned long src_addr, unsigned long dst_addr, unsigned int size)
405 {
406 	uint8_t shadow[8];
407 	enum kmemcheck_shadow status;
408 
409 	unsigned long page;
410 	unsigned long next_addr;
411 	unsigned long next_page;
412 
413 	uint8_t *x;
414 	unsigned int i;
415 	unsigned int n;
416 
417 	BUG_ON(size > sizeof(shadow));
418 
419 	page = src_addr & PAGE_MASK;
420 	next_addr = src_addr + size - 1;
421 	next_page = next_addr & PAGE_MASK;
422 
423 	if (likely(page == next_page)) {
424 		/* Same page */
425 		x = kmemcheck_shadow_lookup(src_addr);
426 		if (x) {
427 			kmemcheck_save_addr(src_addr);
428 			for (i = 0; i < size; ++i)
429 				shadow[i] = x[i];
430 		} else {
431 			for (i = 0; i < size; ++i)
432 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
433 		}
434 	} else {
435 		n = next_page - src_addr;
436 		BUG_ON(n > sizeof(shadow));
437 
438 		/* First page */
439 		x = kmemcheck_shadow_lookup(src_addr);
440 		if (x) {
441 			kmemcheck_save_addr(src_addr);
442 			for (i = 0; i < n; ++i)
443 				shadow[i] = x[i];
444 		} else {
445 			/* Not tracked */
446 			for (i = 0; i < n; ++i)
447 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
448 		}
449 
450 		/* Second page */
451 		x = kmemcheck_shadow_lookup(next_page);
452 		if (x) {
453 			kmemcheck_save_addr(next_page);
454 			for (i = n; i < size; ++i)
455 				shadow[i] = x[i - n];
456 		} else {
457 			/* Not tracked */
458 			for (i = n; i < size; ++i)
459 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
460 		}
461 	}
462 
463 	page = dst_addr & PAGE_MASK;
464 	next_addr = dst_addr + size - 1;
465 	next_page = next_addr & PAGE_MASK;
466 
467 	if (likely(page == next_page)) {
468 		/* Same page */
469 		x = kmemcheck_shadow_lookup(dst_addr);
470 		if (x) {
471 			kmemcheck_save_addr(dst_addr);
472 			for (i = 0; i < size; ++i) {
473 				x[i] = shadow[i];
474 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
475 			}
476 		}
477 	} else {
478 		n = next_page - dst_addr;
479 		BUG_ON(n > sizeof(shadow));
480 
481 		/* First page */
482 		x = kmemcheck_shadow_lookup(dst_addr);
483 		if (x) {
484 			kmemcheck_save_addr(dst_addr);
485 			for (i = 0; i < n; ++i) {
486 				x[i] = shadow[i];
487 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
488 			}
489 		}
490 
491 		/* Second page */
492 		x = kmemcheck_shadow_lookup(next_page);
493 		if (x) {
494 			kmemcheck_save_addr(next_page);
495 			for (i = n; i < size; ++i) {
496 				x[i - n] = shadow[i];
497 				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
498 			}
499 		}
500 	}
501 
502 	status = kmemcheck_shadow_test(shadow, size);
503 	if (status == KMEMCHECK_SHADOW_INITIALIZED)
504 		return;
505 
506 	if (kmemcheck_enabled)
507 		kmemcheck_error_save(status, src_addr, size, regs);
508 
509 	if (kmemcheck_enabled == 2)
510 		kmemcheck_enabled = 0;
511 }
512 
513 enum kmemcheck_method {
514 	KMEMCHECK_READ,
515 	KMEMCHECK_WRITE,
516 };
517 
kmemcheck_access(struct pt_regs * regs,unsigned long fallback_address,enum kmemcheck_method fallback_method)518 static void kmemcheck_access(struct pt_regs *regs,
519 	unsigned long fallback_address, enum kmemcheck_method fallback_method)
520 {
521 	const uint8_t *insn;
522 	const uint8_t *insn_primary;
523 	unsigned int size;
524 
525 	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
526 
527 	/* Recursive fault -- ouch. */
528 	if (data->busy) {
529 		kmemcheck_show_addr(fallback_address);
530 		kmemcheck_error_save_bug(regs);
531 		return;
532 	}
533 
534 	data->busy = true;
535 
536 	insn = (const uint8_t *) regs->ip;
537 	insn_primary = kmemcheck_opcode_get_primary(insn);
538 
539 	kmemcheck_opcode_decode(insn, &size);
540 
541 	switch (insn_primary[0]) {
542 #ifdef CONFIG_KMEMCHECK_BITOPS_OK
543 		/* AND, OR, XOR */
544 		/*
545 		 * Unfortunately, these instructions have to be excluded from
546 		 * our regular checking since they access only some (and not
547 		 * all) bits. This clears out "bogus" bitfield-access warnings.
548 		 */
549 	case 0x80:
550 	case 0x81:
551 	case 0x82:
552 	case 0x83:
553 		switch ((insn_primary[1] >> 3) & 7) {
554 			/* OR */
555 		case 1:
556 			/* AND */
557 		case 4:
558 			/* XOR */
559 		case 6:
560 			kmemcheck_write(regs, fallback_address, size);
561 			goto out;
562 
563 			/* ADD */
564 		case 0:
565 			/* ADC */
566 		case 2:
567 			/* SBB */
568 		case 3:
569 			/* SUB */
570 		case 5:
571 			/* CMP */
572 		case 7:
573 			break;
574 		}
575 		break;
576 #endif
577 
578 		/* MOVS, MOVSB, MOVSW, MOVSD */
579 	case 0xa4:
580 	case 0xa5:
581 		/*
582 		 * These instructions are special because they take two
583 		 * addresses, but we only get one page fault.
584 		 */
585 		kmemcheck_copy(regs, regs->si, regs->di, size);
586 		goto out;
587 
588 		/* CMPS, CMPSB, CMPSW, CMPSD */
589 	case 0xa6:
590 	case 0xa7:
591 		kmemcheck_read(regs, regs->si, size);
592 		kmemcheck_read(regs, regs->di, size);
593 		goto out;
594 	}
595 
596 	/*
597 	 * If the opcode isn't special in any way, we use the data from the
598 	 * page fault handler to determine the address and type of memory
599 	 * access.
600 	 */
601 	switch (fallback_method) {
602 	case KMEMCHECK_READ:
603 		kmemcheck_read(regs, fallback_address, size);
604 		goto out;
605 	case KMEMCHECK_WRITE:
606 		kmemcheck_write(regs, fallback_address, size);
607 		goto out;
608 	}
609 
610 out:
611 	data->busy = false;
612 }
613 
kmemcheck_fault(struct pt_regs * regs,unsigned long address,unsigned long error_code)614 bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
615 	unsigned long error_code)
616 {
617 	pte_t *pte;
618 
619 	/*
620 	 * XXX: Is it safe to assume that memory accesses from virtual 86
621 	 * mode or non-kernel code segments will _never_ access kernel
622 	 * memory (e.g. tracked pages)? For now, we need this to avoid
623 	 * invoking kmemcheck for PnP BIOS calls.
624 	 */
625 	if (regs->flags & X86_VM_MASK)
626 		return false;
627 	if (regs->cs != __KERNEL_CS)
628 		return false;
629 
630 	pte = kmemcheck_pte_lookup(address);
631 	if (!pte)
632 		return false;
633 
634 	WARN_ON_ONCE(in_nmi());
635 
636 	if (error_code & 2)
637 		kmemcheck_access(regs, address, KMEMCHECK_WRITE);
638 	else
639 		kmemcheck_access(regs, address, KMEMCHECK_READ);
640 
641 	kmemcheck_show(regs);
642 	return true;
643 }
644 
kmemcheck_trap(struct pt_regs * regs)645 bool kmemcheck_trap(struct pt_regs *regs)
646 {
647 	if (!kmemcheck_active(regs))
648 		return false;
649 
650 	/* We're done. */
651 	kmemcheck_hide(regs);
652 	return true;
653 }
654