1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * In-kernel FPU support functions
4  *
5  *
6  * Consider these guidelines before using in-kernel FPU functions:
7  *
8  *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
9  *     use of floating-point or vector registers and instructions.
10  *
11  *  2. For kernel_fpu_begin(), specify the vector register range you want to
12  *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
13  *
14  *     a) If your function typically runs in process-context, use the lower
15  *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
16  *     b) If your function typically runs in soft-irq or hard-irq context,
17  *	  prefer using the upper half of the vector registers, for example,
18  *	  specify KERNEL_VXR_HIGH.
19  *
20  *     If you adhere to these guidelines, an interrupted process context
21  *     does not require to save and restore vector registers because of
22  *     disjoint register ranges.
23  *
24  *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
25  *     includes logic to save and restore up to 16 vector registers at once.
26  *
27  *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
28  *     struct kernel_fpu states.  Vector registers that are in use by outer
29  *     levels are saved and restored.  You can minimize the save and restore
30  *     effort by choosing disjoint vector register ranges.
31  *
32  *  5. To use vector floating-point instructions, specify the KERNEL_FPC
33  *     flag to save and restore floating-point controls in addition to any
34  *     vector register range.
35  *
36  *  6. To use floating-point registers and instructions only, specify the
37  *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
38  *     registers V0 to V15 and floating-point controls.
39  *
40  * Copyright IBM Corp. 2015
41  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
42  */
43 
44 #ifndef _ASM_S390_FPU_H
45 #define _ASM_S390_FPU_H
46 
47 #include <linux/cpufeature.h>
48 #include <linux/processor.h>
49 #include <linux/preempt.h>
50 #include <linux/string.h>
51 #include <linux/sched.h>
52 #include <asm/sigcontext.h>
53 #include <asm/fpu-types.h>
54 #include <asm/fpu-insn.h>
55 
56 enum {
57 	KERNEL_FPC_BIT = 0,
58 	KERNEL_VXR_V0V7_BIT,
59 	KERNEL_VXR_V8V15_BIT,
60 	KERNEL_VXR_V16V23_BIT,
61 	KERNEL_VXR_V24V31_BIT,
62 };
63 
64 #define KERNEL_FPC		BIT(KERNEL_FPC_BIT)
65 #define KERNEL_VXR_V0V7		BIT(KERNEL_VXR_V0V7_BIT)
66 #define KERNEL_VXR_V8V15	BIT(KERNEL_VXR_V8V15_BIT)
67 #define KERNEL_VXR_V16V23	BIT(KERNEL_VXR_V16V23_BIT)
68 #define KERNEL_VXR_V24V31	BIT(KERNEL_VXR_V24V31_BIT)
69 
70 #define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7   | KERNEL_VXR_V8V15)
71 #define KERNEL_VXR_MID		(KERNEL_VXR_V8V15  | KERNEL_VXR_V16V23)
72 #define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31)
73 
74 #define KERNEL_VXR		(KERNEL_VXR_LOW	   | KERNEL_VXR_HIGH)
75 #define KERNEL_FPR		(KERNEL_FPC	   | KERNEL_VXR_LOW)
76 
77 void load_fpu_state(struct fpu *state, int flags);
78 void save_fpu_state(struct fpu *state, int flags);
79 void __kernel_fpu_begin(struct kernel_fpu *state, int flags);
80 void __kernel_fpu_end(struct kernel_fpu *state, int flags);
81 
save_vx_regs(__vector128 * vxrs)82 static __always_inline void save_vx_regs(__vector128 *vxrs)
83 {
84 	fpu_vstm(0, 15, &vxrs[0]);
85 	fpu_vstm(16, 31, &vxrs[16]);
86 }
87 
load_vx_regs(__vector128 * vxrs)88 static __always_inline void load_vx_regs(__vector128 *vxrs)
89 {
90 	fpu_vlm(0, 15, &vxrs[0]);
91 	fpu_vlm(16, 31, &vxrs[16]);
92 }
93 
__save_fp_regs(freg_t * fprs,unsigned int offset)94 static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset)
95 {
96 	fpu_std(0, &fprs[0 * offset]);
97 	fpu_std(1, &fprs[1 * offset]);
98 	fpu_std(2, &fprs[2 * offset]);
99 	fpu_std(3, &fprs[3 * offset]);
100 	fpu_std(4, &fprs[4 * offset]);
101 	fpu_std(5, &fprs[5 * offset]);
102 	fpu_std(6, &fprs[6 * offset]);
103 	fpu_std(7, &fprs[7 * offset]);
104 	fpu_std(8, &fprs[8 * offset]);
105 	fpu_std(9, &fprs[9 * offset]);
106 	fpu_std(10, &fprs[10 * offset]);
107 	fpu_std(11, &fprs[11 * offset]);
108 	fpu_std(12, &fprs[12 * offset]);
109 	fpu_std(13, &fprs[13 * offset]);
110 	fpu_std(14, &fprs[14 * offset]);
111 	fpu_std(15, &fprs[15 * offset]);
112 }
113 
__load_fp_regs(freg_t * fprs,unsigned int offset)114 static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset)
115 {
116 	fpu_ld(0, &fprs[0 * offset]);
117 	fpu_ld(1, &fprs[1 * offset]);
118 	fpu_ld(2, &fprs[2 * offset]);
119 	fpu_ld(3, &fprs[3 * offset]);
120 	fpu_ld(4, &fprs[4 * offset]);
121 	fpu_ld(5, &fprs[5 * offset]);
122 	fpu_ld(6, &fprs[6 * offset]);
123 	fpu_ld(7, &fprs[7 * offset]);
124 	fpu_ld(8, &fprs[8 * offset]);
125 	fpu_ld(9, &fprs[9 * offset]);
126 	fpu_ld(10, &fprs[10 * offset]);
127 	fpu_ld(11, &fprs[11 * offset]);
128 	fpu_ld(12, &fprs[12 * offset]);
129 	fpu_ld(13, &fprs[13 * offset]);
130 	fpu_ld(14, &fprs[14 * offset]);
131 	fpu_ld(15, &fprs[15 * offset]);
132 }
133 
save_fp_regs(freg_t * fprs)134 static __always_inline void save_fp_regs(freg_t *fprs)
135 {
136 	__save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
137 }
138 
load_fp_regs(freg_t * fprs)139 static __always_inline void load_fp_regs(freg_t *fprs)
140 {
141 	__load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
142 }
143 
save_fp_regs_vx(__vector128 * vxrs)144 static __always_inline void save_fp_regs_vx(__vector128 *vxrs)
145 {
146 	freg_t *fprs = (freg_t *)&vxrs[0].high;
147 
148 	__save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
149 }
150 
load_fp_regs_vx(__vector128 * vxrs)151 static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
152 {
153 	freg_t *fprs = (freg_t *)&vxrs[0].high;
154 
155 	__load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
156 }
157 
load_user_fpu_regs(void)158 static inline void load_user_fpu_regs(void)
159 {
160 	struct thread_struct *thread = &current->thread;
161 
162 	if (!thread->ufpu_flags)
163 		return;
164 	load_fpu_state(&thread->ufpu, thread->ufpu_flags);
165 	thread->ufpu_flags = 0;
166 }
167 
__save_user_fpu_regs(struct thread_struct * thread,int flags)168 static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags)
169 {
170 	save_fpu_state(&thread->ufpu, flags);
171 	__atomic_or(flags, &thread->ufpu_flags);
172 }
173 
save_user_fpu_regs(void)174 static inline void save_user_fpu_regs(void)
175 {
176 	struct thread_struct *thread = &current->thread;
177 	int mask, flags;
178 
179 	mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags);
180 	flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR);
181 	if (flags)
182 		__save_user_fpu_regs(thread, flags);
183 	barrier();
184 	WRITE_ONCE(thread->kfpu_flags, mask);
185 }
186 
_kernel_fpu_begin(struct kernel_fpu * state,int flags)187 static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
188 {
189 	struct thread_struct *thread = &current->thread;
190 	int mask, uflags;
191 
192 	mask = __atomic_or(flags, &thread->kfpu_flags);
193 	state->hdr.mask = mask;
194 	uflags = READ_ONCE(thread->ufpu_flags);
195 	if ((uflags & flags) != flags)
196 		__save_user_fpu_regs(thread, ~uflags & flags);
197 	if (mask & flags)
198 		__kernel_fpu_begin(state, flags);
199 }
200 
_kernel_fpu_end(struct kernel_fpu * state,int flags)201 static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
202 {
203 	int mask = state->hdr.mask;
204 
205 	if (mask & flags)
206 		__kernel_fpu_end(state, flags);
207 	barrier();
208 	WRITE_ONCE(current->thread.kfpu_flags, mask);
209 }
210 
211 void __kernel_fpu_invalid_size(void);
212 
kernel_fpu_check_size(int flags,unsigned int size)213 static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
214 {
215 	unsigned int cnt = 0;
216 
217 	if (flags & KERNEL_VXR_V0V7)
218 		cnt += 8;
219 	if (flags & KERNEL_VXR_V8V15)
220 		cnt += 8;
221 	if (flags & KERNEL_VXR_V16V23)
222 		cnt += 8;
223 	if (flags & KERNEL_VXR_V24V31)
224 		cnt += 8;
225 	if (cnt != size)
226 		__kernel_fpu_invalid_size();
227 }
228 
229 #define kernel_fpu_begin(state, flags)					\
230 {									\
231 	typeof(state) s = (state);					\
232 	int _flags = (flags);						\
233 									\
234 	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
235 	_kernel_fpu_begin((struct kernel_fpu *)s, _flags);		\
236 }
237 
238 #define kernel_fpu_end(state, flags)					\
239 {									\
240 	typeof(state) s = (state);					\
241 	int _flags = (flags);						\
242 									\
243 	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
244 	_kernel_fpu_end((struct kernel_fpu *)s, _flags);		\
245 }
246 
save_kernel_fpu_regs(struct thread_struct * thread)247 static inline void save_kernel_fpu_regs(struct thread_struct *thread)
248 {
249 	if (!thread->kfpu_flags)
250 		return;
251 	save_fpu_state(&thread->kfpu, thread->kfpu_flags);
252 }
253 
restore_kernel_fpu_regs(struct thread_struct * thread)254 static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
255 {
256 	if (!thread->kfpu_flags)
257 		return;
258 	load_fpu_state(&thread->kfpu, thread->kfpu_flags);
259 }
260 
convert_vx_to_fp(freg_t * fprs,__vector128 * vxrs)261 static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
262 {
263 	int i;
264 
265 	for (i = 0; i < __NUM_FPRS; i++)
266 		fprs[i].ui = vxrs[i].high;
267 }
268 
convert_fp_to_vx(__vector128 * vxrs,freg_t * fprs)269 static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
270 {
271 	int i;
272 
273 	for (i = 0; i < __NUM_FPRS; i++)
274 		vxrs[i].high = fprs[i].ui;
275 }
276 
fpregs_store(_s390_fp_regs * fpregs,struct fpu * fpu)277 static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
278 {
279 	fpregs->pad = 0;
280 	fpregs->fpc = fpu->fpc;
281 	convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
282 }
283 
fpregs_load(_s390_fp_regs * fpregs,struct fpu * fpu)284 static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
285 {
286 	fpu->fpc = fpregs->fpc;
287 	convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
288 }
289 
290 #endif /* _ASM_S390_FPU_H */
291