1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KMSAN initialization routines. 4 * 5 * Copyright (C) 2017-2021 Google LLC 6 * Author: Alexander Potapenko <glider@google.com> 7 * 8 */ 9 10 #include "kmsan.h" 11 12 #include <asm/sections.h> 13 #include <linux/mm.h> 14 #include <linux/memblock.h> 15 16 #include "../internal.h" 17 18 #define NUM_FUTURE_RANGES 128 19 struct start_end_pair { 20 u64 start, end; 21 }; 22 23 static struct start_end_pair start_end_pairs[NUM_FUTURE_RANGES] __initdata; 24 static int future_index __initdata; 25 26 /* 27 * Record a range of memory for which the metadata pages will be created once 28 * the page allocator becomes available. 29 */ 30 static void __init kmsan_record_future_shadow_range(void *start, void *end) 31 { 32 u64 nstart = (u64)start, nend = (u64)end, cstart, cend; 33 bool merged = false; 34 35 KMSAN_WARN_ON(future_index == NUM_FUTURE_RANGES); 36 KMSAN_WARN_ON((nstart >= nend) || 37 /* Virtual address 0 is valid on s390. */ 38 (!IS_ENABLED(CONFIG_S390) && !nstart) || !nend); 39 nstart = ALIGN_DOWN(nstart, PAGE_SIZE); 40 nend = ALIGN(nend, PAGE_SIZE); 41 42 /* 43 * Scan the existing ranges to see if any of them overlaps with 44 * [start, end). In that case, merge the two ranges instead of 45 * creating a new one. 46 * The number of ranges is less than 20, so there is no need to organize 47 * them into a more intelligent data structure. 48 */ 49 for (int i = 0; i < future_index; i++) { 50 cstart = start_end_pairs[i].start; 51 cend = start_end_pairs[i].end; 52 if ((cstart < nstart && cend < nstart) || 53 (cstart > nend && cend > nend)) 54 /* ranges are disjoint - do not merge */ 55 continue; 56 start_end_pairs[i].start = min(nstart, cstart); 57 start_end_pairs[i].end = max(nend, cend); 58 merged = true; 59 break; 60 } 61 if (merged) 62 return; 63 start_end_pairs[future_index].start = nstart; 64 start_end_pairs[future_index].end = nend; 65 future_index++; 66 } 67 68 /* 69 * Initialize the shadow for existing mappings during kernel initialization. 70 * These include kernel text/data sections, NODE_DATA and future ranges 71 * registered while creating other data (e.g. percpu). 72 * 73 * Allocations via memblock can be only done before slab is initialized. 74 */ 75 void __init kmsan_init_shadow(void) 76 { 77 const size_t nd_size = sizeof(pg_data_t); 78 phys_addr_t p_start, p_end; 79 u64 loop; 80 int nid; 81 82 for_each_reserved_mem_range(loop, &p_start, &p_end) 83 kmsan_record_future_shadow_range(phys_to_virt(p_start), 84 phys_to_virt(p_end)); 85 /* Allocate shadow for .data */ 86 kmsan_record_future_shadow_range(_sdata, _edata); 87 88 for_each_online_node(nid) 89 kmsan_record_future_shadow_range( 90 NODE_DATA(nid), (char *)NODE_DATA(nid) + nd_size); 91 92 for (int i = 0; i < future_index; i++) 93 kmsan_init_alloc_meta_for_range( 94 (void *)start_end_pairs[i].start, 95 (void *)start_end_pairs[i].end); 96 } 97 98 struct metadata_page_pair { 99 struct page *shadow, *origin; 100 }; 101 static struct metadata_page_pair held_back[NR_PAGE_ORDERS] __initdata; 102 103 /* 104 * Eager metadata allocation. When the memblock allocator is freeing pages to 105 * pagealloc, we use 2/3 of them as metadata for the remaining 1/3. 106 * We store the pointers to the returned blocks of pages in held_back[] grouped 107 * by their order: when kmsan_memblock_free_pages() is called for the first 108 * time with a certain order, it is reserved as a shadow block, for the second 109 * time - as an origin block. On the third time the incoming block receives its 110 * shadow and origin ranges from the previously saved shadow and origin blocks, 111 * after which held_back[order] can be used again. 112 * 113 * At the very end there may be leftover blocks in held_back[]. They are 114 * collected later by kmsan_memblock_discard(). 115 */ 116 bool kmsan_memblock_free_pages(struct page *page, unsigned int order) 117 { 118 struct page *shadow, *origin; 119 120 if (!held_back[order].shadow) { 121 held_back[order].shadow = page; 122 return false; 123 } 124 if (!held_back[order].origin) { 125 held_back[order].origin = page; 126 return false; 127 } 128 shadow = held_back[order].shadow; 129 origin = held_back[order].origin; 130 kmsan_setup_meta(page, shadow, origin, order); 131 132 held_back[order].shadow = NULL; 133 held_back[order].origin = NULL; 134 return true; 135 } 136 137 #define MAX_BLOCKS 8 138 struct smallstack { 139 struct page *items[MAX_BLOCKS]; 140 int index; 141 int order; 142 }; 143 144 static struct smallstack collect = { 145 .index = 0, 146 .order = MAX_PAGE_ORDER, 147 }; 148 149 static void smallstack_push(struct smallstack *stack, struct page *pages) 150 { 151 KMSAN_WARN_ON(stack->index == MAX_BLOCKS); 152 stack->items[stack->index] = pages; 153 stack->index++; 154 } 155 #undef MAX_BLOCKS 156 157 static struct page *smallstack_pop(struct smallstack *stack) 158 { 159 struct page *ret; 160 161 KMSAN_WARN_ON(stack->index == 0); 162 stack->index--; 163 ret = stack->items[stack->index]; 164 stack->items[stack->index] = NULL; 165 return ret; 166 } 167 168 static void do_collection(void) 169 { 170 struct page *page, *shadow, *origin; 171 172 while (collect.index >= 3) { 173 page = smallstack_pop(&collect); 174 shadow = smallstack_pop(&collect); 175 origin = smallstack_pop(&collect); 176 kmsan_setup_meta(page, shadow, origin, collect.order); 177 __free_pages_core(page, collect.order, MEMINIT_EARLY); 178 } 179 } 180 181 static void collect_split(void) 182 { 183 struct smallstack tmp = { 184 .order = collect.order - 1, 185 .index = 0, 186 }; 187 struct page *page; 188 189 if (!collect.order) 190 return; 191 while (collect.index) { 192 page = smallstack_pop(&collect); 193 smallstack_push(&tmp, &page[0]); 194 smallstack_push(&tmp, &page[1 << tmp.order]); 195 } 196 __memcpy(&collect, &tmp, sizeof(tmp)); 197 } 198 199 /* 200 * Memblock is about to go away. Split the page blocks left over in held_back[] 201 * and return 1/3 of that memory to the system. 202 */ 203 static void kmsan_memblock_discard(void) 204 { 205 /* 206 * For each order=N: 207 * - push held_back[N].shadow and .origin to @collect; 208 * - while there are >= 3 elements in @collect, do garbage collection: 209 * - pop 3 ranges from @collect; 210 * - use two of them as shadow and origin for the third one; 211 * - repeat; 212 * - split each remaining element from @collect into 2 ranges of 213 * order=N-1, 214 * - repeat. 215 */ 216 collect.order = MAX_PAGE_ORDER; 217 for (int i = MAX_PAGE_ORDER; i >= 0; i--) { 218 if (held_back[i].shadow) 219 smallstack_push(&collect, held_back[i].shadow); 220 if (held_back[i].origin) 221 smallstack_push(&collect, held_back[i].origin); 222 held_back[i].shadow = NULL; 223 held_back[i].origin = NULL; 224 do_collection(); 225 collect_split(); 226 } 227 } 228 229 void __init kmsan_init_runtime(void) 230 { 231 /* Assuming current is init_task */ 232 kmsan_internal_task_create(current); 233 kmsan_memblock_discard(); 234 pr_info("Starting KernelMemorySanitizer\n"); 235 pr_info("ATTENTION: KMSAN is a debugging tool! Do not use it on production machines!\n"); 236 kmsan_enabled = true; 237 } 238