1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_X86_PAGE_64_H
3 #define _ASM_X86_PAGE_64_H
4
5 #include <asm/page_64_types.h>
6
7 #ifndef __ASSEMBLER__
8 #include <asm/cpufeatures.h>
9 #include <asm/alternative.h>
10
11 #include <linux/kmsan-checks.h>
12 #include <linux/mmdebug.h>
13
14 /* duplicated to the one in bootmem.h */
15 extern unsigned long max_pfn;
16 extern unsigned long phys_base;
17
18 extern unsigned long page_offset_base;
19 extern unsigned long vmalloc_base;
20 extern unsigned long vmemmap_base;
21 extern unsigned long direct_map_physmem_end;
22
__phys_addr_nodebug(unsigned long x)23 static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
24 {
25 unsigned long y = x - __START_KERNEL_map;
26
27 /* use the carry flag to determine if x was < __START_KERNEL_map */
28 x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
29
30 return x;
31 }
32
33 #ifdef CONFIG_DEBUG_VIRTUAL
34 extern unsigned long __phys_addr(unsigned long);
35 #else
36 #define __phys_addr(x) __phys_addr_nodebug(x)
37 #endif
38
__phys_addr_symbol(unsigned long x)39 static inline unsigned long __phys_addr_symbol(unsigned long x)
40 {
41 unsigned long y = x - __START_KERNEL_map;
42
43 /* only check upper bounds since lower bounds will trigger carry */
44 VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
45
46 return y + phys_base;
47 }
48
49 #define __phys_reloc_hide(x) (x)
50
51 void __clear_pages_unrolled(void *page);
52 KCFI_REFERENCE(__clear_pages_unrolled);
53
54 /**
55 * clear_pages() - clear a page range using a kernel virtual address.
56 * @addr: start address of kernel page range
57 * @npages: number of pages
58 *
59 * Switch between three implementations of page clearing based on CPU
60 * capabilities:
61 *
62 * - __clear_pages_unrolled(): the oldest, slowest and universally
63 * supported method. Zeroes via 8-byte MOV instructions unrolled 8x
64 * to write a 64-byte cacheline in each loop iteration.
65 *
66 * - "REP; STOSQ": really old CPUs had crummy REP implementations.
67 * Vendor CPU setup code sets 'REP_GOOD' on CPUs where REP can be
68 * trusted. The instruction writes 8-byte per REP iteration but
69 * CPUs can internally batch these together and do larger writes.
70 *
71 * - "REP; STOSB": used on CPUs with "enhanced REP MOVSB/STOSB",
72 * which enumerate 'ERMS' and provide an implementation which
73 * unlike "REP; STOSQ" above wasn't overly picky about alignment.
74 * The instruction writes 1-byte per REP iteration with CPUs
75 * internally batching these together into larger writes and is
76 * generally fastest of the three.
77 *
78 * Note that when running as a guest, features exposed by the CPU
79 * might be mediated by the hypervisor. So, the STOSQ variant might
80 * be in active use on some systems even when the hardware enumerates
81 * ERMS.
82 *
83 * Does absolutely no exception handling.
84 */
clear_pages(void * addr,unsigned int npages)85 static inline void clear_pages(void *addr, unsigned int npages)
86 {
87 u64 len = npages * PAGE_SIZE;
88 /*
89 * Clean up KMSAN metadata for the pages being cleared. The assembly call
90 * below clobbers @addr, so perform unpoisoning before it.
91 */
92 kmsan_unpoison_memory(addr, len);
93
94 /*
95 * The inline asm embeds a CALL instruction and usually that is a no-no
96 * due to the compiler not knowing that and thus being unable to track
97 * callee-clobbered registers.
98 *
99 * In this case that is fine because the registers clobbered by
100 * __clear_pages_unrolled() are part of the inline asm register
101 * specification.
102 */
103 asm volatile(ALTERNATIVE_2("call __clear_pages_unrolled",
104 "shrq $3, %%rcx; rep stosq", X86_FEATURE_REP_GOOD,
105 "rep stosb", X86_FEATURE_ERMS)
106 : "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT
107 : "a" (0)
108 : "cc", "memory");
109 }
110 #define clear_pages clear_pages
111
clear_page(void * addr)112 static inline void clear_page(void *addr)
113 {
114 clear_pages(addr, 1);
115 }
116
117 void copy_page(void *to, void *from);
118 KCFI_REFERENCE(copy_page);
119
120 /*
121 * User space process size. This is the first address outside the user range.
122 * There are a few constraints that determine this:
123 *
124 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
125 * address, then that syscall will enter the kernel with a
126 * non-canonical return address, and SYSRET will explode dangerously.
127 * We avoid this particular problem by preventing anything
128 * from being mapped at the maximum canonical address.
129 *
130 * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
131 * CPUs malfunction if they execute code from the highest canonical page.
132 * They'll speculate right off the end of the canonical space, and
133 * bad things happen. This is worked around in the same way as the
134 * Intel problem.
135 *
136 * With page table isolation enabled, we map the LDT in ... [stay tuned]
137 */
task_size_max(void)138 static __always_inline unsigned long task_size_max(void)
139 {
140 unsigned long ret;
141
142 alternative_io("movq %[small],%0","movq %[large],%0",
143 X86_FEATURE_LA57,
144 "=r" (ret),
145 [small] "i" ((1ul << 47)-PAGE_SIZE),
146 [large] "i" ((1ul << 56)-PAGE_SIZE));
147
148 return ret;
149 }
150
151 #endif /* !__ASSEMBLER__ */
152
153 #ifdef CONFIG_X86_VSYSCALL_EMULATION
154 # define __HAVE_ARCH_GATE_AREA 1
155 #endif
156
157 #endif /* _ASM_X86_PAGE_64_H */
158