xref: /linux/arch/powerpc/lib/copypage_power7.S (revision 0be3ff0c)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *
4 * Copyright (C) IBM Corporation, 2012
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8#include <asm/page.h>
9#include <asm/ppc_asm.h>
10
11_GLOBAL(copypage_power7)
12	/*
13	 * We prefetch both the source and destination using enhanced touch
14	 * instructions. We use a stream ID of 0 for the load side and
15	 * 1 for the store side. Since source and destination are page
16	 * aligned we don't need to clear the bottom 7 bits of either
17	 * address.
18	 */
19	ori	r9,r3,1		/* stream=1 => to */
20
21#ifdef CONFIG_PPC_64K_PAGES
22	lis	r7,0x0E01	/* depth=7
23				 * units/cachelines=512 */
24#else
25	lis	r7,0x0E00	/* depth=7 */
26	ori	r7,r7,0x1000	/* units/cachelines=32 */
27#endif
28	ori	r10,r7,1	/* stream=1 */
29
30	lis	r8,0x8000	/* GO=1 */
31	clrldi	r8,r8,32
32
33	/* setup read stream 0  */
34	dcbt	0,r4,0b01000  	/* addr from */
35	dcbt	0,r7,0b01010   /* length and depth from */
36	/* setup write stream 1 */
37	dcbtst	0,r9,0b01000   /* addr to */
38	dcbtst	0,r10,0b01010  /* length and depth to */
39	eieio
40	dcbt	0,r8,0b01010	/* all streams GO */
41
42#ifdef CONFIG_ALTIVEC
43	mflr	r0
44	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
45	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
46	std	r0,16(r1)
47	stdu	r1,-STACKFRAMESIZE(r1)
48	bl	enter_vmx_ops
49	cmpwi	r3,0
50	ld	r0,STACKFRAMESIZE+16(r1)
51	ld	r3,STK_REG(R31)(r1)
52	ld	r4,STK_REG(R30)(r1)
53	mtlr	r0
54
55	li	r0,(PAGE_SIZE/128)
56	mtctr	r0
57
58	beq	.Lnonvmx_copy
59
60	addi	r1,r1,STACKFRAMESIZE
61
62	li	r6,16
63	li	r7,32
64	li	r8,48
65	li	r9,64
66	li	r10,80
67	li	r11,96
68	li	r12,112
69
70	.align	5
711:	lvx	v7,0,r4
72	lvx	v6,r4,r6
73	lvx	v5,r4,r7
74	lvx	v4,r4,r8
75	lvx	v3,r4,r9
76	lvx	v2,r4,r10
77	lvx	v1,r4,r11
78	lvx	v0,r4,r12
79	addi	r4,r4,128
80	stvx	v7,0,r3
81	stvx	v6,r3,r6
82	stvx	v5,r3,r7
83	stvx	v4,r3,r8
84	stvx	v3,r3,r9
85	stvx	v2,r3,r10
86	stvx	v1,r3,r11
87	stvx	v0,r3,r12
88	addi	r3,r3,128
89	bdnz	1b
90
91	b	exit_vmx_ops		/* tail call optimise */
92
93#else
94	li	r0,(PAGE_SIZE/128)
95	mtctr	r0
96
97	stdu	r1,-STACKFRAMESIZE(r1)
98#endif
99
100.Lnonvmx_copy:
101	std	r14,STK_REG(R14)(r1)
102	std	r15,STK_REG(R15)(r1)
103	std	r16,STK_REG(R16)(r1)
104	std	r17,STK_REG(R17)(r1)
105	std	r18,STK_REG(R18)(r1)
106	std	r19,STK_REG(R19)(r1)
107	std	r20,STK_REG(R20)(r1)
108
1091:	ld	r0,0(r4)
110	ld	r5,8(r4)
111	ld	r6,16(r4)
112	ld	r7,24(r4)
113	ld	r8,32(r4)
114	ld	r9,40(r4)
115	ld	r10,48(r4)
116	ld	r11,56(r4)
117	ld	r12,64(r4)
118	ld	r14,72(r4)
119	ld	r15,80(r4)
120	ld	r16,88(r4)
121	ld	r17,96(r4)
122	ld	r18,104(r4)
123	ld	r19,112(r4)
124	ld	r20,120(r4)
125	addi	r4,r4,128
126	std	r0,0(r3)
127	std	r5,8(r3)
128	std	r6,16(r3)
129	std	r7,24(r3)
130	std	r8,32(r3)
131	std	r9,40(r3)
132	std	r10,48(r3)
133	std	r11,56(r3)
134	std	r12,64(r3)
135	std	r14,72(r3)
136	std	r15,80(r3)
137	std	r16,88(r3)
138	std	r17,96(r3)
139	std	r18,104(r3)
140	std	r19,112(r3)
141	std	r20,120(r3)
142	addi	r3,r3,128
143	bdnz	1b
144
145	ld	r14,STK_REG(R14)(r1)
146	ld	r15,STK_REG(R15)(r1)
147	ld	r16,STK_REG(R16)(r1)
148	ld	r17,STK_REG(R17)(r1)
149	ld	r18,STK_REG(R18)(r1)
150	ld	r19,STK_REG(R19)(r1)
151	ld	r20,STK_REG(R20)(r1)
152	addi	r1,r1,STACKFRAMESIZE
153	blr
154