xref: /linux/arch/parisc/kernel/pacache.S (revision c64c782e58ac5bb3601aa77b860ca4e93a489b7a)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *  PARISC TLB and cache flushing support
4 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
5 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
6 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
7 */
8
9/*
10 * NOTE: fdc,fic, and pdc instructions that use base register modification
11 *       should only use index and base registers that are not shadowed,
12 *       so that the fast path emulation in the non access miss handler
13 *       can be used.
14 */
15
16#ifdef CONFIG_64BIT
17	.level	2.0w
18#else
19	.level	2.0
20#endif
21
22#include <asm/psw.h>
23#include <asm/assembly.h>
24#include <asm/cache.h>
25#include <asm/ldcw.h>
26#include <asm/alternative.h>
27#include <linux/linkage.h>
28#include <linux/init.h>
29#include <linux/pgtable.h>
30
31	.section .text.hot
32	.align	16
33
34ENTRY_CFI(flush_tlb_all_local)
35	/*
36	 * The pitlbe and pdtlbe instructions should only be used to
37	 * flush the entire tlb. Also, there needs to be no intervening
38	 * tlb operations, e.g. tlb misses, so the operation needs
39	 * to happen in real mode with all interruptions disabled.
40	 */
41
42	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
43	rsm		PSW_SM_I, %r19		/* save I-bit state */
44	load32		PA(1f), %r1
45	nop
46	nop
47	nop
48	nop
49	nop
50
51	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
52	mtctl		%r0, %cr17		/* Clear IIASQ tail */
53	mtctl		%r0, %cr17		/* Clear IIASQ head */
54	mtctl		%r1, %cr18		/* IIAOQ head */
55	ldo		4(%r1), %r1
56	mtctl		%r1, %cr18		/* IIAOQ tail */
57	load32		REAL_MODE_PSW, %r1
58	mtctl           %r1, %ipsw
59	rfi
60	nop
61
621:      load32		PA(cache_info), %r1
63
64	/* Flush Instruction Tlb */
65
6688:	LDREG		ITLB_SID_BASE(%r1), %r20
67	LDREG		ITLB_SID_STRIDE(%r1), %r21
68	LDREG		ITLB_SID_COUNT(%r1), %r22
69	LDREG		ITLB_OFF_BASE(%r1), %arg0
70	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
71	LDREG		ITLB_OFF_COUNT(%r1), %arg2
72	LDREG		ITLB_LOOP(%r1), %arg3
73
74	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
75	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
76	copy		%arg0, %r28		/* Init base addr */
77
78fitmanyloop:					/* Loop if LOOP >= 2 */
79	mtsp		%r20, %sr1
80	add		%r21, %r20, %r20	/* increment space */
81	copy		%arg2, %r29		/* Init middle loop count */
82
83fitmanymiddle:					/* Loop if LOOP >= 2 */
84	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
85	pitlbe		%r0(%sr1, %r28)
86	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
87	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
88	copy		%arg3, %r31		/* Re-init inner loop count */
89
90	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
91	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
92
93fitoneloop:					/* Loop if LOOP = 1 */
94	mtsp		%r20, %sr1
95	copy		%arg0, %r28		/* init base addr */
96	copy		%arg2, %r29		/* init middle loop count */
97
98fitonemiddle:					/* Loop if LOOP = 1 */
99	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
100	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
101
102	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
103	add		%r21, %r20, %r20		/* increment space */
104
105fitdone:
106	ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
107
108	/* Flush Data Tlb */
109
110	LDREG		DTLB_SID_BASE(%r1), %r20
111	LDREG		DTLB_SID_STRIDE(%r1), %r21
112	LDREG		DTLB_SID_COUNT(%r1), %r22
113	LDREG		DTLB_OFF_BASE(%r1), %arg0
114	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
115	LDREG		DTLB_OFF_COUNT(%r1), %arg2
116	LDREG		DTLB_LOOP(%r1), %arg3
117
118	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
119	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
120	copy		%arg0, %r28		/* Init base addr */
121
122fdtmanyloop:					/* Loop if LOOP >= 2 */
123	mtsp		%r20, %sr1
124	add		%r21, %r20, %r20	/* increment space */
125	copy		%arg2, %r29		/* Init middle loop count */
126
127fdtmanymiddle:					/* Loop if LOOP >= 2 */
128	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
129	pdtlbe		%r0(%sr1, %r28)
130	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
131	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
132	copy		%arg3, %r31		/* Re-init inner loop count */
133
134	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
135	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
136
137fdtoneloop:					/* Loop if LOOP = 1 */
138	mtsp		%r20, %sr1
139	copy		%arg0, %r28		/* init base addr */
140	copy		%arg2, %r29		/* init middle loop count */
141
142fdtonemiddle:					/* Loop if LOOP = 1 */
143	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
144	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
145
146	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
147	add		%r21, %r20, %r20	/* increment space */
148
149
150fdtdone:
151	/*
152	 * Switch back to virtual mode
153	 */
154	/* pcxt_ssm_bug */
155	rsm		PSW_SM_I, %r0
156	load32		2f, %r1
157	nop
158	nop
159	nop
160	nop
161	nop
162
163	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
164	mtctl		%r0, %cr17		/* Clear IIASQ tail */
165	mtctl		%r0, %cr17		/* Clear IIASQ head */
166	mtctl		%r1, %cr18		/* IIAOQ head */
167	ldo		4(%r1), %r1
168	mtctl		%r1, %cr18		/* IIAOQ tail */
169	load32		KERNEL_PSW, %r1
170	or		%r1, %r19, %r1	/* I-bit to state on entry */
171	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
172	rfi
173	nop
174
1752:      bv		%r0(%r2)
176	nop
177
178	/*
179	 * When running in qemu, drop whole flush_tlb_all_local function and
180	 * replace by one pdtlbe instruction, for which QEMU will drop all
181	 * local TLB entries.
182	 */
1833:	pdtlbe		%r0(%sr1,%r0)
184	bv,n		%r0(%r2)
185	ALTERNATIVE_CODE(flush_tlb_all_local, 2, ALT_COND_RUN_ON_QEMU, 3b)
186ENDPROC_CFI(flush_tlb_all_local)
187
188	.import cache_info,data
189
190ENTRY_CFI(flush_instruction_cache_local)
19188:	load32		cache_info, %r1
192
193	/* Flush Instruction Cache */
194
195	LDREG		ICACHE_BASE(%r1), %arg0
196	LDREG		ICACHE_STRIDE(%r1), %arg1
197	LDREG		ICACHE_COUNT(%r1), %arg2
198	LDREG		ICACHE_LOOP(%r1), %arg3
199	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
200	mtsp		%r0, %sr1
201	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
202	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
203
204fimanyloop:					/* Loop if LOOP >= 2 */
205	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
206	fice            %r0(%sr1, %arg0)
207	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
208	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
209	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
210
211fioneloop:					/* Loop if LOOP = 1 */
212	/* Some implementations may flush with a single fice instruction */
213	cmpib,COND(>>=),n	15, %arg2, fioneloop2
214
215fioneloop1:
216	fice,m		%arg1(%sr1, %arg0)
217	fice,m		%arg1(%sr1, %arg0)
218	fice,m		%arg1(%sr1, %arg0)
219	fice,m		%arg1(%sr1, %arg0)
220	fice,m		%arg1(%sr1, %arg0)
221	fice,m		%arg1(%sr1, %arg0)
222	fice,m		%arg1(%sr1, %arg0)
223	fice,m		%arg1(%sr1, %arg0)
224	fice,m		%arg1(%sr1, %arg0)
225	fice,m		%arg1(%sr1, %arg0)
226	fice,m		%arg1(%sr1, %arg0)
227	fice,m		%arg1(%sr1, %arg0)
228	fice,m		%arg1(%sr1, %arg0)
229	fice,m		%arg1(%sr1, %arg0)
230	fice,m		%arg1(%sr1, %arg0)
231	addib,COND(>)	-16, %arg2, fioneloop1
232	fice,m		%arg1(%sr1, %arg0)
233
234	/* Check if done */
235	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
236
237fioneloop2:
238	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
239	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
240
241fisync:
242	sync
243	mtsm		%r22			/* restore I-bit */
24489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
245	bv		%r0(%r2)
246	nop
247ENDPROC_CFI(flush_instruction_cache_local)
248
249
250	.import cache_info, data
251ENTRY_CFI(flush_data_cache_local)
25288:	load32		cache_info, %r1
253
254	/* Flush Data Cache */
255
256	LDREG		DCACHE_BASE(%r1), %arg0
257	LDREG		DCACHE_STRIDE(%r1), %arg1
258	LDREG		DCACHE_COUNT(%r1), %arg2
259	LDREG		DCACHE_LOOP(%r1), %arg3
260	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
261	mtsp		%r0, %sr1
262	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
263	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
264
265fdmanyloop:					/* Loop if LOOP >= 2 */
266	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
267	fdce		%r0(%sr1, %arg0)
268	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
269	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
270	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
271
272fdoneloop:					/* Loop if LOOP = 1 */
273	/* Some implementations may flush with a single fdce instruction */
274	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
275
276fdoneloop1:
277	fdce,m		%arg1(%sr1, %arg0)
278	fdce,m		%arg1(%sr1, %arg0)
279	fdce,m		%arg1(%sr1, %arg0)
280	fdce,m		%arg1(%sr1, %arg0)
281	fdce,m		%arg1(%sr1, %arg0)
282	fdce,m		%arg1(%sr1, %arg0)
283	fdce,m		%arg1(%sr1, %arg0)
284	fdce,m		%arg1(%sr1, %arg0)
285	fdce,m		%arg1(%sr1, %arg0)
286	fdce,m		%arg1(%sr1, %arg0)
287	fdce,m		%arg1(%sr1, %arg0)
288	fdce,m		%arg1(%sr1, %arg0)
289	fdce,m		%arg1(%sr1, %arg0)
290	fdce,m		%arg1(%sr1, %arg0)
291	fdce,m		%arg1(%sr1, %arg0)
292	addib,COND(>)	-16, %arg2, fdoneloop1
293	fdce,m		%arg1(%sr1, %arg0)
294
295	/* Check if done */
296	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
297
298fdoneloop2:
299	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
300	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
301
302fdsync:
303	sync
304	mtsm		%r22			/* restore I-bit */
30589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
306	bv		%r0(%r2)
307	nop
308ENDPROC_CFI(flush_data_cache_local)
309
310/* Clear page using kernel mapping.  */
311
312ENTRY_CFI(clear_page_asm)
313#ifdef CONFIG_64BIT
314
315	/* Unroll the loop.  */
316	ldi		(PAGE_SIZE / 128), %r1
317
3181:
319	std		%r0, 0(%r26)
320	std		%r0, 8(%r26)
321	std		%r0, 16(%r26)
322	std		%r0, 24(%r26)
323	std		%r0, 32(%r26)
324	std		%r0, 40(%r26)
325	std		%r0, 48(%r26)
326	std		%r0, 56(%r26)
327	std		%r0, 64(%r26)
328	std		%r0, 72(%r26)
329	std		%r0, 80(%r26)
330	std		%r0, 88(%r26)
331	std		%r0, 96(%r26)
332	std		%r0, 104(%r26)
333	std		%r0, 112(%r26)
334	std		%r0, 120(%r26)
335
336	/* Note reverse branch hint for addib is taken.  */
337	addib,COND(>),n	-1, %r1, 1b
338	ldo		128(%r26), %r26
339
340#else
341
342	/*
343	 * Note that until (if) we start saving the full 64-bit register
344	 * values on interrupt, we can't use std on a 32 bit kernel.
345	 */
346	ldi		(PAGE_SIZE / 64), %r1
347
3481:
349	stw		%r0, 0(%r26)
350	stw		%r0, 4(%r26)
351	stw		%r0, 8(%r26)
352	stw		%r0, 12(%r26)
353	stw		%r0, 16(%r26)
354	stw		%r0, 20(%r26)
355	stw		%r0, 24(%r26)
356	stw		%r0, 28(%r26)
357	stw		%r0, 32(%r26)
358	stw		%r0, 36(%r26)
359	stw		%r0, 40(%r26)
360	stw		%r0, 44(%r26)
361	stw		%r0, 48(%r26)
362	stw		%r0, 52(%r26)
363	stw		%r0, 56(%r26)
364	stw		%r0, 60(%r26)
365
366	addib,COND(>),n	-1, %r1, 1b
367	ldo		64(%r26), %r26
368#endif
369	bv		%r0(%r2)
370	nop
371ENDPROC_CFI(clear_page_asm)
372
373/* Copy page using kernel mapping.  */
374
375ENTRY_CFI(copy_page_asm)
376#ifdef CONFIG_64BIT
377	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
378	 * Unroll the loop by hand and arrange insn appropriately.
379	 * Prefetch doesn't improve performance on rp3440.
380	 * GCC probably can do this just as well...
381	 */
382
383	ldi		(PAGE_SIZE / 128), %r1
384
3851:	ldd		0(%r25), %r19
386	ldd		8(%r25), %r20
387
388	ldd		16(%r25), %r21
389	ldd		24(%r25), %r22
390	std		%r19, 0(%r26)
391	std		%r20, 8(%r26)
392
393	ldd		32(%r25), %r19
394	ldd		40(%r25), %r20
395	std		%r21, 16(%r26)
396	std		%r22, 24(%r26)
397
398	ldd		48(%r25), %r21
399	ldd		56(%r25), %r22
400	std		%r19, 32(%r26)
401	std		%r20, 40(%r26)
402
403	ldd		64(%r25), %r19
404	ldd		72(%r25), %r20
405	std		%r21, 48(%r26)
406	std		%r22, 56(%r26)
407
408	ldd		80(%r25), %r21
409	ldd		88(%r25), %r22
410	std		%r19, 64(%r26)
411	std		%r20, 72(%r26)
412
413	ldd		 96(%r25), %r19
414	ldd		104(%r25), %r20
415	std		%r21, 80(%r26)
416	std		%r22, 88(%r26)
417
418	ldd		112(%r25), %r21
419	ldd		120(%r25), %r22
420	ldo		128(%r25), %r25
421	std		%r19, 96(%r26)
422	std		%r20, 104(%r26)
423
424	std		%r21, 112(%r26)
425	std		%r22, 120(%r26)
426
427	/* Note reverse branch hint for addib is taken.  */
428	addib,COND(>),n	-1, %r1, 1b
429	ldo		128(%r26), %r26
430
431#else
432
433	/*
434	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
435	 * bundles (very restricted rules for bundling).
436	 * Note that until (if) we start saving
437	 * the full 64 bit register values on interrupt, we can't
438	 * use ldd/std on a 32 bit kernel.
439	 */
440	ldw		0(%r25), %r19
441	ldi		(PAGE_SIZE / 64), %r1
442
4431:
444	ldw		4(%r25), %r20
445	ldw		8(%r25), %r21
446	ldw		12(%r25), %r22
447	stw		%r19, 0(%r26)
448	stw		%r20, 4(%r26)
449	stw		%r21, 8(%r26)
450	stw		%r22, 12(%r26)
451	ldw		16(%r25), %r19
452	ldw		20(%r25), %r20
453	ldw		24(%r25), %r21
454	ldw		28(%r25), %r22
455	stw		%r19, 16(%r26)
456	stw		%r20, 20(%r26)
457	stw		%r21, 24(%r26)
458	stw		%r22, 28(%r26)
459	ldw		32(%r25), %r19
460	ldw		36(%r25), %r20
461	ldw		40(%r25), %r21
462	ldw		44(%r25), %r22
463	stw		%r19, 32(%r26)
464	stw		%r20, 36(%r26)
465	stw		%r21, 40(%r26)
466	stw		%r22, 44(%r26)
467	ldw		48(%r25), %r19
468	ldw		52(%r25), %r20
469	ldw		56(%r25), %r21
470	ldw		60(%r25), %r22
471	stw		%r19, 48(%r26)
472	stw		%r20, 52(%r26)
473	ldo		64(%r25), %r25
474	stw		%r21, 56(%r26)
475	stw		%r22, 60(%r26)
476	ldo		64(%r26), %r26
477	addib,COND(>),n	-1, %r1, 1b
478	ldw		0(%r25), %r19
479#endif
480	bv		%r0(%r2)
481	nop
482ENDPROC_CFI(copy_page_asm)
483
484/*
485 * NOTE: Code in clear_user_page has a hard coded dependency on the
486 *       maximum alias boundary being 4 Mb. We've been assured by the
487 *       parisc chip designers that there will not ever be a parisc
488 *       chip with a larger alias boundary (Never say never :-) ).
489 *
490 *       Yah, what about the PA8800 and PA8900 processors?
491 *
492 *       Subtle: the dtlb miss handlers support the temp alias region by
493 *       "knowing" that if a dtlb miss happens within the temp alias
494 *       region it must have occurred while in clear_user_page. Since
495 *       this routine makes use of processor local translations, we
496 *       don't want to insert them into the kernel page table. Instead,
497 *       we load up some general registers (they need to be registers
498 *       which aren't shadowed) with the physical page numbers (preshifted
499 *       for tlb insertion) needed to insert the translations. When we
500 *       miss on the translation, the dtlb miss handler inserts the
501 *       translation into the tlb using these values:
502 *
503 *          %r26 physical page (shifted for tlb insert) of "to" translation
504 *          %r23 physical page (shifted for tlb insert) of "from" translation
505 */
506
507        /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
508        #define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
509        .macro          convert_phys_for_tlb_insert20  phys
510        extrd,u         \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys
511#if _PAGE_SIZE_ENCODING_DEFAULT
512        depdi           _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys
513#endif
514	.endm
515
516	/*
517	 * copy_user_page_asm() performs a page copy using mappings
518	 * equivalent to the user page mappings.  It can be used to
519	 * implement copy_user_page() but unfortunately both the `from'
520	 * and `to' pages need to be flushed through mappings equivalent
521	 * to the user mappings after the copy because the kernel accesses
522	 * the `from' page through the kmap kernel mapping and the `to'
523	 * page needs to be flushed since code can be copied.  As a
524	 * result, this implementation is less efficient than the simpler
525	 * copy using the kernel mapping.  It only needs the `from' page
526	 * to flushed via the user mapping.  The kunmap routines handle
527	 * the flushes needed for the kernel mapping.
528	 *
529	 * I'm still keeping this around because it may be possible to
530	 * use it if more information is passed into copy_user_page().
531	 * Have to do some measurements to see if it is worthwhile to
532	 * lobby for such a change.
533	 *
534	 */
535
536ENTRY_CFI(copy_user_page_asm)
537	/* Convert virtual `to' and `from' addresses to physical addresses.
538	   Move `from' physical address to non shadowed register.  */
539	ldil		L%(__PAGE_OFFSET), %r1
540	sub		%r26, %r1, %r26
541	sub		%r25, %r1, %r23
542
543	ldil		L%(TMPALIAS_MAP_START), %r28
544#ifdef CONFIG_64BIT
545#if (TMPALIAS_MAP_START >= 0x80000000)
546	depdi		0, 31,32, %r28		/* clear any sign extension */
547#endif
548	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
549	convert_phys_for_tlb_insert20 %r23	/* convert phys addr to tlb insert format */
550	depd		%r24,63,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
551	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
552	copy		%r28, %r29
553	depdi		1, 63-TMPALIAS_SIZE_BITS,1, %r29	/* Form aliased virtual address 'from' */
554#else
555	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
556	extrw,u		%r23, 24,25, %r23	/* convert phys addr to tlb insert format */
557	depw		%r24, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
558	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
559	copy		%r28, %r29
560	depwi		1, 31-TMPALIAS_SIZE_BITS,1, %r29	/* Form aliased virtual address 'from' */
561#endif
562
563	/* Purge any old translations */
564
565#ifdef CONFIG_PA20
566	pdtlb,l		%r0(%r28)
567	pdtlb,l		%r0(%r29)
568#else
5690:	pdtlb		%r0(%r28)
5701:	pdtlb		%r0(%r29)
571	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
572	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
573#endif
574
575#ifdef CONFIG_64BIT
576	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
577	 * Unroll the loop by hand and arrange insn appropriately.
578	 * GCC probably can do this just as well.
579	 */
580
581	ldd		0(%r29), %r19
582	ldi		(PAGE_SIZE / 128), %r1
583
5841:	ldd		8(%r29), %r20
585
586	ldd		16(%r29), %r21
587	ldd		24(%r29), %r22
588	std		%r19, 0(%r28)
589	std		%r20, 8(%r28)
590
591	ldd		32(%r29), %r19
592	ldd		40(%r29), %r20
593	std		%r21, 16(%r28)
594	std		%r22, 24(%r28)
595
596	ldd		48(%r29), %r21
597	ldd		56(%r29), %r22
598	std		%r19, 32(%r28)
599	std		%r20, 40(%r28)
600
601	ldd		64(%r29), %r19
602	ldd		72(%r29), %r20
603	std		%r21, 48(%r28)
604	std		%r22, 56(%r28)
605
606	ldd		80(%r29), %r21
607	ldd		88(%r29), %r22
608	std		%r19, 64(%r28)
609	std		%r20, 72(%r28)
610
611	ldd		 96(%r29), %r19
612	ldd		104(%r29), %r20
613	std		%r21, 80(%r28)
614	std		%r22, 88(%r28)
615
616	ldd		112(%r29), %r21
617	ldd		120(%r29), %r22
618	std		%r19, 96(%r28)
619	std		%r20, 104(%r28)
620
621	ldo		128(%r29), %r29
622	std		%r21, 112(%r28)
623	std		%r22, 120(%r28)
624	ldo		128(%r28), %r28
625
626	/* conditional branches nullify on forward taken branch, and on
627	 * non-taken backward branch. Note that .+4 is a backwards branch.
628	 * The ldd should only get executed if the branch is taken.
629	 */
630	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
631	ldd		0(%r29), %r19		/* start next loads */
632
633#else
634	ldi		(PAGE_SIZE / 64), %r1
635
636	/*
637	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
638	 * bundles (very restricted rules for bundling). It probably
639	 * does OK on PCXU and better, but we could do better with
640	 * ldd/std instructions. Note that until (if) we start saving
641	 * the full 64 bit register values on interrupt, we can't
642	 * use ldd/std on a 32 bit kernel.
643	 */
644
6451:	ldw		0(%r29), %r19
646	ldw		4(%r29), %r20
647	ldw		8(%r29), %r21
648	ldw		12(%r29), %r22
649	stw		%r19, 0(%r28)
650	stw		%r20, 4(%r28)
651	stw		%r21, 8(%r28)
652	stw		%r22, 12(%r28)
653	ldw		16(%r29), %r19
654	ldw		20(%r29), %r20
655	ldw		24(%r29), %r21
656	ldw		28(%r29), %r22
657	stw		%r19, 16(%r28)
658	stw		%r20, 20(%r28)
659	stw		%r21, 24(%r28)
660	stw		%r22, 28(%r28)
661	ldw		32(%r29), %r19
662	ldw		36(%r29), %r20
663	ldw		40(%r29), %r21
664	ldw		44(%r29), %r22
665	stw		%r19, 32(%r28)
666	stw		%r20, 36(%r28)
667	stw		%r21, 40(%r28)
668	stw		%r22, 44(%r28)
669	ldw		48(%r29), %r19
670	ldw		52(%r29), %r20
671	ldw		56(%r29), %r21
672	ldw		60(%r29), %r22
673	stw		%r19, 48(%r28)
674	stw		%r20, 52(%r28)
675	stw		%r21, 56(%r28)
676	stw		%r22, 60(%r28)
677	ldo		64(%r28), %r28
678
679	addib,COND(>)		-1, %r1,1b
680	ldo		64(%r29), %r29
681#endif
682
683	bv		%r0(%r2)
684	nop
685ENDPROC_CFI(copy_user_page_asm)
686
687ENTRY_CFI(clear_user_page_asm)
688	tophys_r1	%r26
689
690	ldil		L%(TMPALIAS_MAP_START), %r28
691#ifdef CONFIG_64BIT
692#if (TMPALIAS_MAP_START >= 0x80000000)
693	depdi		0, 31,32, %r28		/* clear any sign extension */
694#endif
695	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
696	depd		%r25, 63,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
697	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
698#else
699	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
700	depw		%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
701	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
702#endif
703
704	/* Purge any old translation */
705
706#ifdef CONFIG_PA20
707	pdtlb,l		%r0(%r28)
708#else
7090:	pdtlb		%r0(%r28)
710	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
711#endif
712
713#ifdef CONFIG_64BIT
714	ldi		(PAGE_SIZE / 128), %r1
715
716	/* PREFETCH (Write) has not (yet) been proven to help here */
717	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
718
7191:	std		%r0, 0(%r28)
720	std		%r0, 8(%r28)
721	std		%r0, 16(%r28)
722	std		%r0, 24(%r28)
723	std		%r0, 32(%r28)
724	std		%r0, 40(%r28)
725	std		%r0, 48(%r28)
726	std		%r0, 56(%r28)
727	std		%r0, 64(%r28)
728	std		%r0, 72(%r28)
729	std		%r0, 80(%r28)
730	std		%r0, 88(%r28)
731	std		%r0, 96(%r28)
732	std		%r0, 104(%r28)
733	std		%r0, 112(%r28)
734	std		%r0, 120(%r28)
735	addib,COND(>)		-1, %r1, 1b
736	ldo		128(%r28), %r28
737
738#else	/* ! CONFIG_64BIT */
739	ldi		(PAGE_SIZE / 64), %r1
740
7411:	stw		%r0, 0(%r28)
742	stw		%r0, 4(%r28)
743	stw		%r0, 8(%r28)
744	stw		%r0, 12(%r28)
745	stw		%r0, 16(%r28)
746	stw		%r0, 20(%r28)
747	stw		%r0, 24(%r28)
748	stw		%r0, 28(%r28)
749	stw		%r0, 32(%r28)
750	stw		%r0, 36(%r28)
751	stw		%r0, 40(%r28)
752	stw		%r0, 44(%r28)
753	stw		%r0, 48(%r28)
754	stw		%r0, 52(%r28)
755	stw		%r0, 56(%r28)
756	stw		%r0, 60(%r28)
757	addib,COND(>)		-1, %r1, 1b
758	ldo		64(%r28), %r28
759#endif	/* CONFIG_64BIT */
760
761	bv		%r0(%r2)
762	nop
763ENDPROC_CFI(clear_user_page_asm)
764
765ENTRY_CFI(flush_dcache_page_asm)
766	ldil		L%(TMPALIAS_MAP_START), %r28
767#ifdef CONFIG_64BIT
768#if (TMPALIAS_MAP_START >= 0x80000000)
769	depdi		0, 31,32, %r28		/* clear any sign extension */
770#endif
771	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
772	depd		%r25, 63,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
773	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
774#else
775	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
776	depw		%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
777	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
778#endif
779
780	/* Purge any old translation */
781
782#ifdef CONFIG_PA20
783	pdtlb,l		%r0(%r28)
784#else
7850:	pdtlb		%r0(%r28)
786	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
787#endif
788
78988:	ldil		L%dcache_stride, %r1
790	ldw		R%dcache_stride(%r1), r31
791
792#ifdef CONFIG_64BIT
793	depdi,z		1, 63-PAGE_SHIFT,1, %r25
794#else
795	depwi,z		1, 31-PAGE_SHIFT,1, %r25
796#endif
797	add		%r28, %r25, %r25
798	sub		%r25, r31, %r25
799
8001:	fdc,m		r31(%r28)
801	fdc,m		r31(%r28)
802	fdc,m		r31(%r28)
803	fdc,m		r31(%r28)
804	fdc,m		r31(%r28)
805	fdc,m		r31(%r28)
806	fdc,m		r31(%r28)
807	fdc,m		r31(%r28)
808	fdc,m		r31(%r28)
809	fdc,m		r31(%r28)
810	fdc,m		r31(%r28)
811	fdc,m		r31(%r28)
812	fdc,m		r31(%r28)
813	fdc,m		r31(%r28)
814	fdc,m		r31(%r28)
815	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
816	fdc,m		r31(%r28)
817
81889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
819	sync
820	bv		%r0(%r2)
821	nop
822ENDPROC_CFI(flush_dcache_page_asm)
823
824ENTRY_CFI(purge_dcache_page_asm)
825	ldil		L%(TMPALIAS_MAP_START), %r28
826#ifdef CONFIG_64BIT
827#if (TMPALIAS_MAP_START >= 0x80000000)
828	depdi		0, 31,32, %r28		/* clear any sign extension */
829#endif
830	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
831	depd		%r25, 63,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
832	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
833#else
834	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
835	depw		%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
836	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
837#endif
838
839	/* Purge any old translation */
840
841#ifdef CONFIG_PA20
842	pdtlb,l		%r0(%r28)
843#else
8440:	pdtlb		%r0(%r28)
845	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
846#endif
847
84888:	ldil		L%dcache_stride, %r1
849	ldw		R%dcache_stride(%r1), r31
850
851#ifdef CONFIG_64BIT
852	depdi,z		1, 63-PAGE_SHIFT,1, %r25
853#else
854	depwi,z		1, 31-PAGE_SHIFT,1, %r25
855#endif
856	add		%r28, %r25, %r25
857	sub		%r25, r31, %r25
858
8591:      pdc,m		r31(%r28)
860	pdc,m		r31(%r28)
861	pdc,m		r31(%r28)
862	pdc,m		r31(%r28)
863	pdc,m		r31(%r28)
864	pdc,m		r31(%r28)
865	pdc,m		r31(%r28)
866	pdc,m		r31(%r28)
867	pdc,m		r31(%r28)
868	pdc,m		r31(%r28)
869	pdc,m		r31(%r28)
870	pdc,m		r31(%r28)
871	pdc,m		r31(%r28)
872	pdc,m		r31(%r28)
873	pdc,m		r31(%r28)
874	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
875	pdc,m		r31(%r28)
876
87789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
878	sync
879	bv		%r0(%r2)
880	nop
881ENDPROC_CFI(purge_dcache_page_asm)
882
883ENTRY_CFI(flush_icache_page_asm)
884	ldil		L%(TMPALIAS_MAP_START), %r28
885#ifdef CONFIG_64BIT
886#if (TMPALIAS_MAP_START >= 0x80000000)
887	depdi		0, 31,32, %r28		/* clear any sign extension */
888#endif
889	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
890	depd		%r25, 63,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
891	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
892#else
893	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
894	depw		%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
895	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
896#endif
897
898	/* Purge any old translation.  Note that the FIC instruction
899	 * may use either the instruction or data TLB.  Given that we
900	 * have a flat address space, it's not clear which TLB will be
901	 * used.  So, we purge both entries.  */
902
903#ifdef CONFIG_PA20
904	pdtlb,l		%r0(%r28)
9051:	pitlb,l         %r0(%sr4,%r28)
906	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
907#else
9080:	pdtlb		%r0(%r28)
9091:	pitlb           %r0(%sr4,%r28)
910	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
911	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
912	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
913#endif
914
91588:	ldil		L%icache_stride, %r1
916	ldw		R%icache_stride(%r1), %r31
917
918#ifdef CONFIG_64BIT
919	depdi,z		1, 63-PAGE_SHIFT,1, %r25
920#else
921	depwi,z		1, 31-PAGE_SHIFT,1, %r25
922#endif
923	add		%r28, %r25, %r25
924	sub		%r25, %r31, %r25
925
926	/* fic only has the type 26 form on PA1.1, requiring an
927	 * explicit space specification, so use %sr4 */
9281:      fic,m		%r31(%sr4,%r28)
929	fic,m		%r31(%sr4,%r28)
930	fic,m		%r31(%sr4,%r28)
931	fic,m		%r31(%sr4,%r28)
932	fic,m		%r31(%sr4,%r28)
933	fic,m		%r31(%sr4,%r28)
934	fic,m		%r31(%sr4,%r28)
935	fic,m		%r31(%sr4,%r28)
936	fic,m		%r31(%sr4,%r28)
937	fic,m		%r31(%sr4,%r28)
938	fic,m		%r31(%sr4,%r28)
939	fic,m		%r31(%sr4,%r28)
940	fic,m		%r31(%sr4,%r28)
941	fic,m		%r31(%sr4,%r28)
942	fic,m		%r31(%sr4,%r28)
943	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
944	fic,m		%r31(%sr4,%r28)
945
94689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
947	sync
948	bv		%r0(%r2)
949	nop
950ENDPROC_CFI(flush_icache_page_asm)
951
952ENTRY_CFI(flush_kernel_dcache_page_asm)
95388:	ldil		L%dcache_stride, %r1
954	ldw		R%dcache_stride(%r1), %r23
955
956#ifdef CONFIG_64BIT
957	depdi,z		1, 63-PAGE_SHIFT,1, %r25
958#else
959	depwi,z		1, 31-PAGE_SHIFT,1, %r25
960#endif
961	add		%r26, %r25, %r25
962	sub		%r25, %r23, %r25
963
9641:      fdc,m		%r23(%r26)
965	fdc,m		%r23(%r26)
966	fdc,m		%r23(%r26)
967	fdc,m		%r23(%r26)
968	fdc,m		%r23(%r26)
969	fdc,m		%r23(%r26)
970	fdc,m		%r23(%r26)
971	fdc,m		%r23(%r26)
972	fdc,m		%r23(%r26)
973	fdc,m		%r23(%r26)
974	fdc,m		%r23(%r26)
975	fdc,m		%r23(%r26)
976	fdc,m		%r23(%r26)
977	fdc,m		%r23(%r26)
978	fdc,m		%r23(%r26)
979	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
980	fdc,m		%r23(%r26)
981
98289:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
983	sync
984	bv		%r0(%r2)
985	nop
986ENDPROC_CFI(flush_kernel_dcache_page_asm)
987
988ENTRY_CFI(purge_kernel_dcache_page_asm)
98988:	ldil		L%dcache_stride, %r1
990	ldw		R%dcache_stride(%r1), %r23
991
992#ifdef CONFIG_64BIT
993	depdi,z		1, 63-PAGE_SHIFT,1, %r25
994#else
995	depwi,z		1, 31-PAGE_SHIFT,1, %r25
996#endif
997	add		%r26, %r25, %r25
998	sub		%r25, %r23, %r25
999
10001:      pdc,m		%r23(%r26)
1001	pdc,m		%r23(%r26)
1002	pdc,m		%r23(%r26)
1003	pdc,m		%r23(%r26)
1004	pdc,m		%r23(%r26)
1005	pdc,m		%r23(%r26)
1006	pdc,m		%r23(%r26)
1007	pdc,m		%r23(%r26)
1008	pdc,m		%r23(%r26)
1009	pdc,m		%r23(%r26)
1010	pdc,m		%r23(%r26)
1011	pdc,m		%r23(%r26)
1012	pdc,m		%r23(%r26)
1013	pdc,m		%r23(%r26)
1014	pdc,m		%r23(%r26)
1015	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1016	pdc,m		%r23(%r26)
1017
101889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1019	sync
1020	bv		%r0(%r2)
1021	nop
1022ENDPROC_CFI(purge_kernel_dcache_page_asm)
1023
1024ENTRY_CFI(flush_user_dcache_range_asm)
102588:	ldil		L%dcache_stride, %r1
1026	ldw		R%dcache_stride(%r1), %r23
1027	ldo		-1(%r23), %r21
1028	ANDCM		%r26, %r21, %r26
1029
1030#ifdef CONFIG_64BIT
1031	depd,z		%r23, 59, 60, %r21
1032#else
1033	depw,z		%r23, 27, 28, %r21
1034#endif
1035	add		%r26, %r21, %r22
1036	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10371:	add		%r22, %r21, %r22
1038	fdc,m		%r23(%sr3, %r26)
1039	fdc,m		%r23(%sr3, %r26)
1040	fdc,m		%r23(%sr3, %r26)
1041	fdc,m		%r23(%sr3, %r26)
1042	fdc,m		%r23(%sr3, %r26)
1043	fdc,m		%r23(%sr3, %r26)
1044	fdc,m		%r23(%sr3, %r26)
1045	fdc,m		%r23(%sr3, %r26)
1046	fdc,m		%r23(%sr3, %r26)
1047	fdc,m		%r23(%sr3, %r26)
1048	fdc,m		%r23(%sr3, %r26)
1049	fdc,m		%r23(%sr3, %r26)
1050	fdc,m		%r23(%sr3, %r26)
1051	fdc,m		%r23(%sr3, %r26)
1052	fdc,m		%r23(%sr3, %r26)
1053	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1054	fdc,m		%r23(%sr3, %r26)
1055
10562:	cmpb,COND(>>),n	%r25, %r26, 2b
1057	fdc,m		%r23(%sr3, %r26)
1058
105989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1060	sync
1061	bv		%r0(%r2)
1062	nop
1063ENDPROC_CFI(flush_user_dcache_range_asm)
1064
1065ENTRY_CFI(flush_kernel_dcache_range_asm)
106688:	ldil		L%dcache_stride, %r1
1067	ldw		R%dcache_stride(%r1), %r23
1068	ldo		-1(%r23), %r21
1069	ANDCM		%r26, %r21, %r26
1070
1071#ifdef CONFIG_64BIT
1072	depd,z		%r23, 59, 60, %r21
1073#else
1074	depw,z		%r23, 27, 28, %r21
1075#endif
1076	add		%r26, %r21, %r22
1077	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10781:	add		%r22, %r21, %r22
1079	fdc,m		%r23(%r26)
1080	fdc,m		%r23(%r26)
1081	fdc,m		%r23(%r26)
1082	fdc,m		%r23(%r26)
1083	fdc,m		%r23(%r26)
1084	fdc,m		%r23(%r26)
1085	fdc,m		%r23(%r26)
1086	fdc,m		%r23(%r26)
1087	fdc,m		%r23(%r26)
1088	fdc,m		%r23(%r26)
1089	fdc,m		%r23(%r26)
1090	fdc,m		%r23(%r26)
1091	fdc,m		%r23(%r26)
1092	fdc,m		%r23(%r26)
1093	fdc,m		%r23(%r26)
1094	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1095	fdc,m		%r23(%r26)
1096
10972:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1098	fdc,m		%r23(%r26)
1099
1100	sync
110189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1102	bv		%r0(%r2)
1103	nop
1104ENDPROC_CFI(flush_kernel_dcache_range_asm)
1105
1106ENTRY_CFI(purge_kernel_dcache_range_asm)
110788:	ldil		L%dcache_stride, %r1
1108	ldw		R%dcache_stride(%r1), %r23
1109	ldo		-1(%r23), %r21
1110	ANDCM		%r26, %r21, %r26
1111
1112#ifdef CONFIG_64BIT
1113	depd,z		%r23, 59, 60, %r21
1114#else
1115	depw,z		%r23, 27, 28, %r21
1116#endif
1117	add		%r26, %r21, %r22
1118	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11191:	add		%r22, %r21, %r22
1120	pdc,m		%r23(%r26)
1121	pdc,m		%r23(%r26)
1122	pdc,m		%r23(%r26)
1123	pdc,m		%r23(%r26)
1124	pdc,m		%r23(%r26)
1125	pdc,m		%r23(%r26)
1126	pdc,m		%r23(%r26)
1127	pdc,m		%r23(%r26)
1128	pdc,m		%r23(%r26)
1129	pdc,m		%r23(%r26)
1130	pdc,m		%r23(%r26)
1131	pdc,m		%r23(%r26)
1132	pdc,m		%r23(%r26)
1133	pdc,m		%r23(%r26)
1134	pdc,m		%r23(%r26)
1135	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1136	pdc,m		%r23(%r26)
1137
11382:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1139	pdc,m		%r23(%r26)
1140
1141	sync
114289:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1143	bv		%r0(%r2)
1144	nop
1145ENDPROC_CFI(purge_kernel_dcache_range_asm)
1146
1147ENTRY_CFI(flush_user_icache_range_asm)
114888:	ldil		L%icache_stride, %r1
1149	ldw		R%icache_stride(%r1), %r23
1150	ldo		-1(%r23), %r21
1151	ANDCM		%r26, %r21, %r26
1152
1153#ifdef CONFIG_64BIT
1154	depd,z		%r23, 59, 60, %r21
1155#else
1156	depw,z		%r23, 27, 28, %r21
1157#endif
1158	add		%r26, %r21, %r22
1159	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11601:	add		%r22, %r21, %r22
1161	fic,m		%r23(%sr3, %r26)
1162	fic,m		%r23(%sr3, %r26)
1163	fic,m		%r23(%sr3, %r26)
1164	fic,m		%r23(%sr3, %r26)
1165	fic,m		%r23(%sr3, %r26)
1166	fic,m		%r23(%sr3, %r26)
1167	fic,m		%r23(%sr3, %r26)
1168	fic,m		%r23(%sr3, %r26)
1169	fic,m		%r23(%sr3, %r26)
1170	fic,m		%r23(%sr3, %r26)
1171	fic,m		%r23(%sr3, %r26)
1172	fic,m		%r23(%sr3, %r26)
1173	fic,m		%r23(%sr3, %r26)
1174	fic,m		%r23(%sr3, %r26)
1175	fic,m		%r23(%sr3, %r26)
1176	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1177	fic,m		%r23(%sr3, %r26)
1178
11792:	cmpb,COND(>>),n	%r25, %r26, 2b
1180	fic,m		%r23(%sr3, %r26)
1181
118289:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1183	sync
1184	bv		%r0(%r2)
1185	nop
1186ENDPROC_CFI(flush_user_icache_range_asm)
1187
1188ENTRY_CFI(flush_kernel_icache_page)
118988:	ldil		L%icache_stride, %r1
1190	ldw		R%icache_stride(%r1), %r23
1191
1192#ifdef CONFIG_64BIT
1193	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1194#else
1195	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1196#endif
1197	add		%r26, %r25, %r25
1198	sub		%r25, %r23, %r25
1199
1200
12011:      fic,m		%r23(%sr4, %r26)
1202	fic,m		%r23(%sr4, %r26)
1203	fic,m		%r23(%sr4, %r26)
1204	fic,m		%r23(%sr4, %r26)
1205	fic,m		%r23(%sr4, %r26)
1206	fic,m		%r23(%sr4, %r26)
1207	fic,m		%r23(%sr4, %r26)
1208	fic,m		%r23(%sr4, %r26)
1209	fic,m		%r23(%sr4, %r26)
1210	fic,m		%r23(%sr4, %r26)
1211	fic,m		%r23(%sr4, %r26)
1212	fic,m		%r23(%sr4, %r26)
1213	fic,m		%r23(%sr4, %r26)
1214	fic,m		%r23(%sr4, %r26)
1215	fic,m		%r23(%sr4, %r26)
1216	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1217	fic,m		%r23(%sr4, %r26)
1218
121989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1220	sync
1221	bv		%r0(%r2)
1222	nop
1223ENDPROC_CFI(flush_kernel_icache_page)
1224
1225ENTRY_CFI(flush_kernel_icache_range_asm)
122688:	ldil		L%icache_stride, %r1
1227	ldw		R%icache_stride(%r1), %r23
1228	ldo		-1(%r23), %r21
1229	ANDCM		%r26, %r21, %r26
1230
1231#ifdef CONFIG_64BIT
1232	depd,z		%r23, 59, 60, %r21
1233#else
1234	depw,z		%r23, 27, 28, %r21
1235#endif
1236	add		%r26, %r21, %r22
1237	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
12381:	add		%r22, %r21, %r22
1239	fic,m		%r23(%sr4, %r26)
1240	fic,m		%r23(%sr4, %r26)
1241	fic,m		%r23(%sr4, %r26)
1242	fic,m		%r23(%sr4, %r26)
1243	fic,m		%r23(%sr4, %r26)
1244	fic,m		%r23(%sr4, %r26)
1245	fic,m		%r23(%sr4, %r26)
1246	fic,m		%r23(%sr4, %r26)
1247	fic,m		%r23(%sr4, %r26)
1248	fic,m		%r23(%sr4, %r26)
1249	fic,m		%r23(%sr4, %r26)
1250	fic,m		%r23(%sr4, %r26)
1251	fic,m		%r23(%sr4, %r26)
1252	fic,m		%r23(%sr4, %r26)
1253	fic,m		%r23(%sr4, %r26)
1254	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1255	fic,m		%r23(%sr4, %r26)
1256
12572:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1258	fic,m		%r23(%sr4, %r26)
1259
126089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1261	sync
1262	bv		%r0(%r2)
1263	nop
1264ENDPROC_CFI(flush_kernel_icache_range_asm)
1265
1266	.text
1267
1268	/* align should cover use of rfi in disable_sr_hashing_asm and
1269	 * srdis_done.
1270	 */
1271	.align	256
1272ENTRY_CFI(disable_sr_hashing_asm)
1273	/*
1274	 * Switch to real mode
1275	 */
1276	/* pcxt_ssm_bug */
1277	rsm		PSW_SM_I, %r0
1278	load32		PA(1f), %r1
1279	nop
1280	nop
1281	nop
1282	nop
1283	nop
1284
1285	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1286	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1287	mtctl		%r0, %cr17		/* Clear IIASQ head */
1288	mtctl		%r1, %cr18		/* IIAOQ head */
1289	ldo		4(%r1), %r1
1290	mtctl		%r1, %cr18		/* IIAOQ tail */
1291	load32		REAL_MODE_PSW, %r1
1292	mtctl		%r1, %ipsw
1293	rfi
1294	nop
1295
12961:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1297	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1298	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1299	b,n		srdis_done
1300
1301srdis_pcxs:
1302
1303	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1304
1305	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1306	.word		0x141c1a00		/* must issue twice */
1307	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1308	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1309	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1310	.word		0x141c1600		/* must issue twice */
1311	b,n		srdis_done
1312
1313srdis_pcxl:
1314
1315	/* Disable Space Register Hashing for PCXL */
1316
1317	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1318	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1319	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1320	b,n		srdis_done
1321
1322srdis_pa20:
1323
1324	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1325
1326	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1327	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1328	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1329
1330
1331srdis_done:
1332	/* Switch back to virtual mode */
1333	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1334	load32 	   	2f, %r1
1335	nop
1336	nop
1337	nop
1338	nop
1339	nop
1340
1341	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1342	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1343	mtctl		%r0, %cr17		/* Clear IIASQ head */
1344	mtctl		%r1, %cr18		/* IIAOQ head */
1345	ldo		4(%r1), %r1
1346	mtctl		%r1, %cr18		/* IIAOQ tail */
1347	load32		KERNEL_PSW, %r1
1348	mtctl		%r1, %ipsw
1349	rfi
1350	nop
1351
13522:      bv		%r0(%r2)
1353	nop
1354ENDPROC_CFI(disable_sr_hashing_asm)
1355
1356	.end
1357