1/* Out-of-line LSE atomics for AArch64 architecture. 2 Copyright (C) 2019-2020 Free Software Foundation, Inc. 3 Contributed by Linaro Ltd. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26/* 27 * The problem that we are trying to solve is operating system deployment 28 * of ARMv8.1-Atomics, also known as Large System Exensions (LSE). 29 * 30 * There are a number of potential solutions for this problem which have 31 * been proposed and rejected for various reasons. To recap: 32 * 33 * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/ 34 * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten. 35 * However, not all Linux distributions are happy with multiple builds, 36 * and anyway it has no effect on main applications. 37 * 38 * (2) IFUNC. We could put these functions into libgcc_s.so, and have 39 * a single copy of each function for all DSOs. However, ARM is concerned 40 * that the branch-to-indirect-branch that is implied by using a PLT, 41 * as required by IFUNC, is too much overhead for smaller cpus. 42 * 43 * (3) Statically predicted direct branches. This is the approach that 44 * is taken here. These functions are linked into every DSO that uses them. 45 * All of the symbols are hidden, so that the functions are called via a 46 * direct branch. The choice of LSE vs non-LSE is done via one byte load 47 * followed by a well-predicted direct branch. The functions are compiled 48 * separately to minimize code size. 49 */ 50 51#include "auto-target.h" 52 53/* Tell the assembler to accept LSE instructions. */ 54#ifdef HAVE_AS_LSE 55 .arch armv8-a+lse 56#else 57 .arch armv8-a 58#endif 59 60/* Declare the symbol gating the LSE implementations. */ 61 .hidden __aarch64_have_lse_atomics 62 63/* Turn size and memory model defines into mnemonic fragments. */ 64#if SIZE == 1 65# define S b 66# define UXT uxtb 67# define B 0x00000000 68#elif SIZE == 2 69# define S h 70# define UXT uxth 71# define B 0x40000000 72#elif SIZE == 4 || SIZE == 8 || SIZE == 16 73# define S 74# define UXT mov 75# if SIZE == 4 76# define B 0x80000000 77# elif SIZE == 8 78# define B 0xc0000000 79# endif 80#else 81# error 82#endif 83 84#if MODEL == 1 85# define SUFF _relax 86# define A 87# define L 88# define M 0x000000 89# define N 0x000000 90#elif MODEL == 2 91# define SUFF _acq 92# define A a 93# define L 94# define M 0x400000 95# define N 0x800000 96#elif MODEL == 3 97# define SUFF _rel 98# define A 99# define L l 100# define M 0x008000 101# define N 0x400000 102#elif MODEL == 4 103# define SUFF _acq_rel 104# define A a 105# define L l 106# define M 0x408000 107# define N 0xc00000 108#else 109# error 110#endif 111 112/* Concatenate symbols. */ 113#define glue2_(A, B) A ## B 114#define glue2(A, B) glue2_(A, B) 115#define glue3_(A, B, C) A ## B ## C 116#define glue3(A, B, C) glue3_(A, B, C) 117#define glue4_(A, B, C, D) A ## B ## C ## D 118#define glue4(A, B, C, D) glue4_(A, B, C, D) 119 120/* Select the size of a register, given a regno. */ 121#define x(N) glue2(x, N) 122#define w(N) glue2(w, N) 123#if SIZE < 8 124# define s(N) w(N) 125#else 126# define s(N) x(N) 127#endif 128 129#define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF) 130#define LDXR glue4(ld, A, xr, S) 131#define STXR glue4(st, L, xr, S) 132 133/* Temporary registers used. Other than these, only the return value 134 register (x0) and the flags are modified. */ 135#define tmp0 16 136#define tmp1 17 137#define tmp2 15 138 139#define BTI_C hint 34 140 141/* Start and end a function. */ 142.macro STARTFN name 143 .text 144 .balign 16 145 .globl \name 146 .hidden \name 147 .type \name, %function 148 .cfi_startproc 149\name: 150 BTI_C 151.endm 152 153.macro ENDFN name 154 .cfi_endproc 155 .size \name, . - \name 156.endm 157 158/* Branch to LABEL if LSE is disabled. */ 159.macro JUMP_IF_NOT_LSE label 160 adrp x(tmp0), __aarch64_have_lse_atomics 161 ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] 162 cbz w(tmp0), \label 163.endm 164 165#ifdef L_cas 166 167STARTFN NAME(cas) 168 JUMP_IF_NOT_LSE 8f 169 170#if SIZE < 16 171#ifdef HAVE_AS_LSE 172# define CAS glue4(cas, A, L, S) s(0), s(1), [x2] 173#else 174# define CAS .inst 0x08a07c41 + B + M 175#endif 176 177 CAS /* s(0), s(1), [x2] */ 178 ret 179 1808: UXT s(tmp0), s(0) 1810: LDXR s(0), [x2] 182 cmp s(0), s(tmp0) 183 bne 1f 184 STXR w(tmp1), s(1), [x2] 185 cbnz w(tmp1), 0b 1861: ret 187 188#else 189#define LDXP glue3(ld, A, xp) 190#define STXP glue3(st, L, xp) 191#ifdef HAVE_AS_LSE 192# define CASP glue3(casp, A, L) x0, x1, x2, x3, [x4] 193#else 194# define CASP .inst 0x48207c82 + M 195#endif 196 197 CASP /* x0, x1, x2, x3, [x4] */ 198 ret 199 2008: mov x(tmp0), x0 201 mov x(tmp1), x1 2020: LDXP x0, x1, [x4] 203 cmp x0, x(tmp0) 204 ccmp x1, x(tmp1), #0, eq 205 bne 1f 206 STXP w(tmp2), x2, x3, [x4] 207 cbnz w(tmp2), 0b 2081: ret 209 210#endif 211 212ENDFN NAME(cas) 213#endif 214 215#ifdef L_swp 216#ifdef HAVE_AS_LSE 217# define SWP glue4(swp, A, L, S) s(0), s(0), [x1] 218#else 219# define SWP .inst 0x38208020 + B + N 220#endif 221 222STARTFN NAME(swp) 223 JUMP_IF_NOT_LSE 8f 224 225 SWP /* s(0), s(0), [x1] */ 226 ret 227 2288: mov s(tmp0), s(0) 2290: LDXR s(0), [x1] 230 STXR w(tmp1), s(tmp0), [x1] 231 cbnz w(tmp1), 0b 232 ret 233 234ENDFN NAME(swp) 235#endif 236 237#if defined(L_ldadd) || defined(L_ldclr) \ 238 || defined(L_ldeor) || defined(L_ldset) 239 240#ifdef L_ldadd 241#define LDNM ldadd 242#define OP add 243#define OPN 0x0000 244#elif defined(L_ldclr) 245#define LDNM ldclr 246#define OP bic 247#define OPN 0x1000 248#elif defined(L_ldeor) 249#define LDNM ldeor 250#define OP eor 251#define OPN 0x2000 252#elif defined(L_ldset) 253#define LDNM ldset 254#define OP orr 255#define OPN 0x3000 256#else 257#error 258#endif 259#ifdef HAVE_AS_LSE 260# define LDOP glue4(LDNM, A, L, S) s(0), s(0), [x1] 261#else 262# define LDOP .inst 0x38200020 + OPN + B + N 263#endif 264 265STARTFN NAME(LDNM) 266 JUMP_IF_NOT_LSE 8f 267 268 LDOP /* s(0), s(0), [x1] */ 269 ret 270 2718: mov s(tmp0), s(0) 2720: LDXR s(0), [x1] 273 OP s(tmp1), s(0), s(tmp0) 274 STXR w(tmp2), s(tmp1), [x1] 275 cbnz w(tmp2), 0b 276 ret 277 278ENDFN NAME(LDNM) 279#endif 280 281/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ 282#define FEATURE_1_AND 0xc0000000 283#define FEATURE_1_BTI 1 284#define FEATURE_1_PAC 2 285 286/* Add a NT_GNU_PROPERTY_TYPE_0 note. */ 287#define GNU_PROPERTY(type, value) \ 288 .section .note.gnu.property, "a"; \ 289 .p2align 3; \ 290 .word 4; \ 291 .word 16; \ 292 .word 5; \ 293 .asciz "GNU"; \ 294 .word type; \ 295 .word 4; \ 296 .word value; \ 297 .word 0; 298 299#if defined(__linux__) || defined(__FreeBSD__) 300.section .note.GNU-stack, "", %progbits 301 302/* Add GNU property note if built with branch protection. */ 303# ifdef __ARM_FEATURE_BTI_DEFAULT 304GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI) 305# endif 306#endif 307