1/* Out-of-line LSE atomics for AArch64 architecture.
2   Copyright (C) 2019-2020 Free Software Foundation, Inc.
3   Contributed by Linaro Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26/*
27 * The problem that we are trying to solve is operating system deployment
28 * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
29 *
30 * There are a number of potential solutions for this problem which have
31 * been proposed and rejected for various reasons.  To recap:
32 *
33 * (1) Multiple builds.  The dynamic linker will examine /lib64/atomics/
34 * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
35 * However, not all Linux distributions are happy with multiple builds,
36 * and anyway it has no effect on main applications.
37 *
38 * (2) IFUNC.  We could put these functions into libgcc_s.so, and have
39 * a single copy of each function for all DSOs.  However, ARM is concerned
40 * that the branch-to-indirect-branch that is implied by using a PLT,
41 * as required by IFUNC, is too much overhead for smaller cpus.
42 *
43 * (3) Statically predicted direct branches.  This is the approach that
44 * is taken here.  These functions are linked into every DSO that uses them.
45 * All of the symbols are hidden, so that the functions are called via a
46 * direct branch.  The choice of LSE vs non-LSE is done via one byte load
47 * followed by a well-predicted direct branch.  The functions are compiled
48 * separately to minimize code size.
49 */
50
51#include "auto-target.h"
52
53/* Tell the assembler to accept LSE instructions.  */
54#ifdef HAVE_AS_LSE
55	.arch armv8-a+lse
56#else
57	.arch armv8-a
58#endif
59
60/* Declare the symbol gating the LSE implementations.  */
61	.hidden	__aarch64_have_lse_atomics
62
63/* Turn size and memory model defines into mnemonic fragments.  */
64#if SIZE == 1
65# define S     b
66# define UXT   uxtb
67# define B     0x00000000
68#elif SIZE == 2
69# define S     h
70# define UXT   uxth
71# define B     0x40000000
72#elif SIZE == 4 || SIZE == 8 || SIZE == 16
73# define S
74# define UXT   mov
75# if SIZE == 4
76#  define B    0x80000000
77# elif SIZE == 8
78#  define B    0xc0000000
79# endif
80#else
81# error
82#endif
83
84#if MODEL == 1
85# define SUFF  _relax
86# define A
87# define L
88# define M     0x000000
89# define N     0x000000
90#elif MODEL == 2
91# define SUFF  _acq
92# define A     a
93# define L
94# define M     0x400000
95# define N     0x800000
96#elif MODEL == 3
97# define SUFF  _rel
98# define A
99# define L     l
100# define M     0x008000
101# define N     0x400000
102#elif MODEL == 4
103# define SUFF  _acq_rel
104# define A     a
105# define L     l
106# define M     0x408000
107# define N     0xc00000
108#else
109# error
110#endif
111
112/* Concatenate symbols.  */
113#define glue2_(A, B)		A ## B
114#define glue2(A, B)		glue2_(A, B)
115#define glue3_(A, B, C)		A ## B ## C
116#define glue3(A, B, C)		glue3_(A, B, C)
117#define glue4_(A, B, C, D)	A ## B ## C ## D
118#define glue4(A, B, C, D)	glue4_(A, B, C, D)
119
120/* Select the size of a register, given a regno.  */
121#define x(N)			glue2(x, N)
122#define w(N)			glue2(w, N)
123#if SIZE < 8
124# define s(N)			w(N)
125#else
126# define s(N)			x(N)
127#endif
128
129#define NAME(BASE)		glue4(__aarch64_, BASE, SIZE, SUFF)
130#define LDXR			glue4(ld, A, xr, S)
131#define STXR			glue4(st, L, xr, S)
132
133/* Temporary registers used.  Other than these, only the return value
134   register (x0) and the flags are modified.  */
135#define tmp0	16
136#define tmp1	17
137#define tmp2	15
138
139#define BTI_C	hint	34
140
141/* Start and end a function.  */
142.macro	STARTFN name
143	.text
144	.balign	16
145	.globl	\name
146	.hidden	\name
147	.type	\name, %function
148	.cfi_startproc
149\name:
150	BTI_C
151.endm
152
153.macro	ENDFN name
154	.cfi_endproc
155	.size	\name, . - \name
156.endm
157
158/* Branch to LABEL if LSE is disabled.  */
159.macro	JUMP_IF_NOT_LSE label
160	adrp	x(tmp0), __aarch64_have_lse_atomics
161	ldrb	w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
162	cbz	w(tmp0), \label
163.endm
164
165#ifdef L_cas
166
167STARTFN	NAME(cas)
168	JUMP_IF_NOT_LSE	8f
169
170#if SIZE < 16
171#ifdef HAVE_AS_LSE
172# define CAS	glue4(cas, A, L, S)	s(0), s(1), [x2]
173#else
174# define CAS	.inst 0x08a07c41 + B + M
175#endif
176
177	CAS		/* s(0), s(1), [x2] */
178	ret
179
1808:	UXT		s(tmp0), s(0)
1810:	LDXR		s(0), [x2]
182	cmp		s(0), s(tmp0)
183	bne		1f
184	STXR		w(tmp1), s(1), [x2]
185	cbnz		w(tmp1), 0b
1861:	ret
187
188#else
189#define LDXP	glue3(ld, A, xp)
190#define STXP	glue3(st, L, xp)
191#ifdef HAVE_AS_LSE
192# define CASP	glue3(casp, A, L)	x0, x1, x2, x3, [x4]
193#else
194# define CASP	.inst 0x48207c82 + M
195#endif
196
197	CASP		/* x0, x1, x2, x3, [x4] */
198	ret
199
2008:	mov		x(tmp0), x0
201	mov		x(tmp1), x1
2020:	LDXP		x0, x1, [x4]
203	cmp		x0, x(tmp0)
204	ccmp		x1, x(tmp1), #0, eq
205	bne		1f
206	STXP		w(tmp2), x2, x3, [x4]
207	cbnz		w(tmp2), 0b
2081:	ret
209
210#endif
211
212ENDFN	NAME(cas)
213#endif
214
215#ifdef L_swp
216#ifdef HAVE_AS_LSE
217# define SWP	glue4(swp, A, L, S)	s(0), s(0), [x1]
218#else
219# define SWP	.inst 0x38208020 + B + N
220#endif
221
222STARTFN	NAME(swp)
223	JUMP_IF_NOT_LSE	8f
224
225	SWP		/* s(0), s(0), [x1] */
226	ret
227
2288:	mov		s(tmp0), s(0)
2290:	LDXR		s(0), [x1]
230	STXR		w(tmp1), s(tmp0), [x1]
231	cbnz		w(tmp1), 0b
232	ret
233
234ENDFN	NAME(swp)
235#endif
236
237#if defined(L_ldadd) || defined(L_ldclr) \
238    || defined(L_ldeor) || defined(L_ldset)
239
240#ifdef L_ldadd
241#define LDNM	ldadd
242#define OP	add
243#define OPN	0x0000
244#elif defined(L_ldclr)
245#define LDNM	ldclr
246#define OP	bic
247#define OPN	0x1000
248#elif defined(L_ldeor)
249#define LDNM	ldeor
250#define OP	eor
251#define OPN	0x2000
252#elif defined(L_ldset)
253#define LDNM	ldset
254#define OP	orr
255#define OPN	0x3000
256#else
257#error
258#endif
259#ifdef HAVE_AS_LSE
260# define LDOP	glue4(LDNM, A, L, S)	s(0), s(0), [x1]
261#else
262# define LDOP	.inst 0x38200020 + OPN + B + N
263#endif
264
265STARTFN	NAME(LDNM)
266	JUMP_IF_NOT_LSE	8f
267
268	LDOP		/* s(0), s(0), [x1] */
269	ret
270
2718:	mov		s(tmp0), s(0)
2720:	LDXR		s(0), [x1]
273	OP		s(tmp1), s(0), s(tmp0)
274	STXR		w(tmp2), s(tmp1), [x1]
275	cbnz		w(tmp2), 0b
276	ret
277
278ENDFN	NAME(LDNM)
279#endif
280
281/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code.  */
282#define FEATURE_1_AND 0xc0000000
283#define FEATURE_1_BTI 1
284#define FEATURE_1_PAC 2
285
286/* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
287#define GNU_PROPERTY(type, value)	\
288  .section .note.gnu.property, "a";	\
289  .p2align 3;				\
290  .word 4;				\
291  .word 16;				\
292  .word 5;				\
293  .asciz "GNU";				\
294  .word type;				\
295  .word 4;				\
296  .word value;				\
297  .word 0;
298
299#if defined(__linux__) || defined(__FreeBSD__)
300.section .note.GNU-stack, "", %progbits
301
302/* Add GNU property note if built with branch protection.  */
303# ifdef __ARM_FEATURE_BTI_DEFAULT
304GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI)
305# endif
306#endif
307