1/* SPDX-License-Identifier: GPL-2.0-only */
2#include <linux/export.h>
3#include <linux/linkage.h>
4#include <linux/cfi_types.h>
5#include <linux/objtool.h>
6#include <asm/asm.h>
7
8/*
9 * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
10 * recommended to use this when possible and we do use them by default.
11 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
12 * Otherwise, use original.
13 */
14
15/*
16 * Zero a page.
17 * %rdi	- page
18 */
19SYM_TYPED_FUNC_START(clear_page_rep)
20	movl $4096/8,%ecx
21	xorl %eax,%eax
22	rep stosq
23	RET
24SYM_FUNC_END(clear_page_rep)
25EXPORT_SYMBOL_GPL(clear_page_rep)
26
27SYM_TYPED_FUNC_START(clear_page_orig)
28	xorl   %eax,%eax
29	movl   $4096/64,%ecx
30	.p2align 4
31.Lloop:
32	decl	%ecx
33#define PUT(x) movq %rax,x*8(%rdi)
34	movq %rax,(%rdi)
35	PUT(1)
36	PUT(2)
37	PUT(3)
38	PUT(4)
39	PUT(5)
40	PUT(6)
41	PUT(7)
42	leaq	64(%rdi),%rdi
43	jnz	.Lloop
44	nop
45	RET
46SYM_FUNC_END(clear_page_orig)
47EXPORT_SYMBOL_GPL(clear_page_orig)
48
49SYM_TYPED_FUNC_START(clear_page_erms)
50	movl $4096,%ecx
51	xorl %eax,%eax
52	rep stosb
53	RET
54SYM_FUNC_END(clear_page_erms)
55EXPORT_SYMBOL_GPL(clear_page_erms)
56
57/*
58 * Default clear user-space.
59 * Input:
60 * rdi destination
61 * rcx count
62 * rax is zero
63 *
64 * Output:
65 * rcx: uncleared bytes or 0 if successful.
66 */
67SYM_FUNC_START(rep_stos_alternative)
68	ANNOTATE_NOENDBR
69	cmpq $64,%rcx
70	jae .Lunrolled
71
72	cmp $8,%ecx
73	jae .Lword
74
75	testl %ecx,%ecx
76	je .Lexit
77
78.Lclear_user_tail:
790:	movb %al,(%rdi)
80	inc %rdi
81	dec %rcx
82	jnz .Lclear_user_tail
83.Lexit:
84	RET
85
86	_ASM_EXTABLE_UA( 0b, .Lexit)
87
88.Lword:
891:	movq %rax,(%rdi)
90	addq $8,%rdi
91	sub $8,%ecx
92	je .Lexit
93	cmp $8,%ecx
94	jae .Lword
95	jmp .Lclear_user_tail
96
97	.p2align 4
98.Lunrolled:
9910:	movq %rax,(%rdi)
10011:	movq %rax,8(%rdi)
10112:	movq %rax,16(%rdi)
10213:	movq %rax,24(%rdi)
10314:	movq %rax,32(%rdi)
10415:	movq %rax,40(%rdi)
10516:	movq %rax,48(%rdi)
10617:	movq %rax,56(%rdi)
107	addq $64,%rdi
108	subq $64,%rcx
109	cmpq $64,%rcx
110	jae .Lunrolled
111	cmpl $8,%ecx
112	jae .Lword
113	testl %ecx,%ecx
114	jne .Lclear_user_tail
115	RET
116
117	/*
118	 * If we take an exception on any of the
119	 * word stores, we know that %rcx isn't zero,
120	 * so we can just go to the tail clearing to
121	 * get the exact count.
122	 *
123	 * The unrolled case might end up clearing
124	 * some bytes twice. Don't care.
125	 *
126	 * We could use the value in %rdi to avoid
127	 * a second fault on the exact count case,
128	 * but do we really care? No.
129	 *
130	 * Finally, we could try to align %rdi at the
131	 * top of the unrolling. But unaligned stores
132	 * just aren't that common or expensive.
133	 */
134	_ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
135	_ASM_EXTABLE_UA(10b, .Lclear_user_tail)
136	_ASM_EXTABLE_UA(11b, .Lclear_user_tail)
137	_ASM_EXTABLE_UA(12b, .Lclear_user_tail)
138	_ASM_EXTABLE_UA(13b, .Lclear_user_tail)
139	_ASM_EXTABLE_UA(14b, .Lclear_user_tail)
140	_ASM_EXTABLE_UA(15b, .Lclear_user_tail)
141	_ASM_EXTABLE_UA(16b, .Lclear_user_tail)
142	_ASM_EXTABLE_UA(17b, .Lclear_user_tail)
143SYM_FUNC_END(rep_stos_alternative)
144EXPORT_SYMBOL(rep_stos_alternative)
145