1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/memblock.h> 3 #include <linux/compiler.h> 4 #include <linux/fs.h> 5 #include <linux/init.h> 6 #include <linux/ksm.h> 7 #include <linux/mm.h> 8 #include <linux/mmzone.h> 9 #include <linux/huge_mm.h> 10 #include <linux/proc_fs.h> 11 #include <linux/seq_file.h> 12 #include <linux/hugetlb.h> 13 #include <linux/memremap.h> 14 #include <linux/memcontrol.h> 15 #include <linux/mmu_notifier.h> 16 #include <linux/page_idle.h> 17 #include <linux/kernel-page-flags.h> 18 #include <linux/uaccess.h> 19 #include "internal.h" 20 21 #define KPMSIZE sizeof(u64) 22 #define KPMMASK (KPMSIZE - 1) 23 #define KPMBITS (KPMSIZE * BITS_PER_BYTE) 24 25 enum kpage_operation { 26 KPAGE_FLAGS, 27 KPAGE_COUNT, 28 KPAGE_CGROUP, 29 }; 30 31 static inline unsigned long get_max_dump_pfn(void) 32 { 33 #ifdef CONFIG_SPARSEMEM 34 /* 35 * The memmap of early sections is completely populated and marked 36 * online even if max_pfn does not fall on a section boundary - 37 * pfn_to_online_page() will succeed on all pages. Allow inspecting 38 * these memmaps. 39 */ 40 return round_up(max_pfn, PAGES_PER_SECTION); 41 #else 42 return max_pfn; 43 #endif 44 } 45 46 static ssize_t kpage_read(struct file *file, char __user *buf, 47 size_t count, loff_t *ppos, 48 enum kpage_operation op) 49 { 50 const unsigned long max_dump_pfn = get_max_dump_pfn(); 51 u64 __user *out = (u64 __user *)buf; 52 struct page *page; 53 unsigned long src = *ppos; 54 unsigned long pfn; 55 ssize_t ret = 0; 56 u64 info; 57 58 pfn = src / KPMSIZE; 59 if (src & KPMMASK || count & KPMMASK) 60 return -EINVAL; 61 if (src >= max_dump_pfn * KPMSIZE) 62 return 0; 63 count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); 64 65 while (count > 0) { 66 /* 67 * TODO: ZONE_DEVICE support requires to identify 68 * memmaps that were actually initialized. 69 */ 70 page = pfn_to_online_page(pfn); 71 72 if (page) { 73 switch (op) { 74 case KPAGE_FLAGS: 75 info = stable_page_flags(page); 76 break; 77 case KPAGE_COUNT: 78 if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) 79 info = folio_precise_page_mapcount(page_folio(page), page); 80 else 81 info = folio_average_page_mapcount(page_folio(page)); 82 break; 83 case KPAGE_CGROUP: 84 info = page_cgroup_ino(page); 85 break; 86 default: 87 info = 0; 88 break; 89 } 90 } else 91 info = 0; 92 93 if (put_user(info, out)) { 94 ret = -EFAULT; 95 break; 96 } 97 98 pfn++; 99 out++; 100 count -= KPMSIZE; 101 102 cond_resched(); 103 } 104 105 *ppos += (char __user *)out - buf; 106 if (!ret) 107 ret = (char __user *)out - buf; 108 return ret; 109 } 110 111 /* /proc/kpagecount - an array exposing page mapcounts 112 * 113 * Each entry is a u64 representing the corresponding 114 * physical page mapcount. 115 */ 116 static ssize_t kpagecount_read(struct file *file, char __user *buf, 117 size_t count, loff_t *ppos) 118 { 119 return kpage_read(file, buf, count, ppos, KPAGE_COUNT); 120 } 121 122 static const struct proc_ops kpagecount_proc_ops = { 123 .proc_flags = PROC_ENTRY_PERMANENT, 124 .proc_lseek = mem_lseek, 125 .proc_read = kpagecount_read, 126 }; 127 128 129 static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) 130 { 131 return ((kflags >> kbit) & 1) << ubit; 132 } 133 134 u64 stable_page_flags(const struct page *page) 135 { 136 const struct folio *folio; 137 unsigned long k; 138 unsigned long mapping; 139 bool is_anon; 140 u64 u = 0; 141 142 /* 143 * pseudo flag: KPF_NOPAGE 144 * it differentiates a memory hole from a page with no flags 145 */ 146 if (!page) 147 return 1 << KPF_NOPAGE; 148 folio = page_folio(page); 149 150 k = folio->flags; 151 mapping = (unsigned long)folio->mapping; 152 is_anon = mapping & PAGE_MAPPING_ANON; 153 154 /* 155 * pseudo flags for the well known (anonymous) memory mapped pages 156 */ 157 if (page_mapped(page)) 158 u |= 1 << KPF_MMAP; 159 if (is_anon) { 160 u |= 1 << KPF_ANON; 161 if (mapping & PAGE_MAPPING_KSM) 162 u |= 1 << KPF_KSM; 163 } 164 165 /* 166 * compound pages: export both head/tail info 167 * they together define a compound page's start/end pos and order 168 */ 169 if (page == &folio->page) 170 u |= kpf_copy_bit(k, KPF_COMPOUND_HEAD, PG_head); 171 else 172 u |= 1 << KPF_COMPOUND_TAIL; 173 if (folio_test_hugetlb(folio)) 174 u |= 1 << KPF_HUGE; 175 else if (folio_test_large(folio) && 176 folio_test_large_rmappable(folio)) { 177 /* Note: we indicate any THPs here, not just PMD-sized ones */ 178 u |= 1 << KPF_THP; 179 } else if (is_huge_zero_folio(folio)) { 180 u |= 1 << KPF_ZERO_PAGE; 181 u |= 1 << KPF_THP; 182 } else if (is_zero_folio(folio)) { 183 u |= 1 << KPF_ZERO_PAGE; 184 } 185 186 /* 187 * Caveats on high order pages: PG_buddy and PG_slab will only be set 188 * on the head page. 189 */ 190 if (PageBuddy(page)) 191 u |= 1 << KPF_BUDDY; 192 else if (page_count(page) == 0 && is_free_buddy_page(page)) 193 u |= 1 << KPF_BUDDY; 194 195 if (PageOffline(page)) 196 u |= 1 << KPF_OFFLINE; 197 if (PageTable(page)) 198 u |= 1 << KPF_PGTABLE; 199 if (folio_test_slab(folio)) 200 u |= 1 << KPF_SLAB; 201 202 #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) 203 u |= kpf_copy_bit(k, KPF_IDLE, PG_idle); 204 #else 205 if (folio_test_idle(folio)) 206 u |= 1 << KPF_IDLE; 207 #endif 208 209 u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); 210 u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); 211 u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); 212 u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); 213 214 u |= kpf_copy_bit(k, KPF_LRU, PG_lru); 215 u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); 216 u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); 217 u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); 218 219 #define SWAPCACHE ((1 << PG_swapbacked) | (1 << PG_swapcache)) 220 if ((k & SWAPCACHE) == SWAPCACHE) 221 u |= 1 << KPF_SWAPCACHE; 222 u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); 223 224 u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); 225 u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); 226 227 #ifdef CONFIG_MEMORY_FAILURE 228 if (u & (1 << KPF_HUGE)) 229 u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); 230 else 231 u |= kpf_copy_bit(page->flags, KPF_HWPOISON, PG_hwpoison); 232 #endif 233 234 u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); 235 u |= kpf_copy_bit(k, KPF_OWNER_2, PG_owner_2); 236 u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); 237 u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); 238 u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); 239 u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); 240 #ifdef CONFIG_ARCH_USES_PG_ARCH_2 241 u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); 242 #endif 243 #ifdef CONFIG_ARCH_USES_PG_ARCH_3 244 u |= kpf_copy_bit(k, KPF_ARCH_3, PG_arch_3); 245 #endif 246 247 return u; 248 } 249 250 /* /proc/kpageflags - an array exposing page flags 251 * 252 * Each entry is a u64 representing the corresponding 253 * physical page flags. 254 */ 255 static ssize_t kpageflags_read(struct file *file, char __user *buf, 256 size_t count, loff_t *ppos) 257 { 258 return kpage_read(file, buf, count, ppos, KPAGE_FLAGS); 259 } 260 261 static const struct proc_ops kpageflags_proc_ops = { 262 .proc_flags = PROC_ENTRY_PERMANENT, 263 .proc_lseek = mem_lseek, 264 .proc_read = kpageflags_read, 265 }; 266 267 #ifdef CONFIG_MEMCG 268 static ssize_t kpagecgroup_read(struct file *file, char __user *buf, 269 size_t count, loff_t *ppos) 270 { 271 return kpage_read(file, buf, count, ppos, KPAGE_CGROUP); 272 } 273 static const struct proc_ops kpagecgroup_proc_ops = { 274 .proc_flags = PROC_ENTRY_PERMANENT, 275 .proc_lseek = mem_lseek, 276 .proc_read = kpagecgroup_read, 277 }; 278 #endif /* CONFIG_MEMCG */ 279 280 static int __init proc_page_init(void) 281 { 282 proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops); 283 proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops); 284 #ifdef CONFIG_MEMCG 285 proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops); 286 #endif 287 return 0; 288 } 289 fs_initcall(proc_page_init); 290