xref: /freebsd/sys/sys/_atomic_subword.h (revision e17f5b1d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 #ifndef _SYS__ATOMIC_SUBWORD_H_
30 #define	_SYS__ATOMIC_SUBWORD_H_
31 
32 /*
33  * This header is specifically for platforms that either do not have ways to or
34  * simply do not do sub-word atomic operations.  These are not ideal as they
35  * require a little more effort to make sure our atomic operations are failing
36  * because of the bits of the word we're trying to write rather than the rest
37  * of the word.
38  */
39 #ifndef _MACHINE_ATOMIC_H_
40 #error do not include this header, use machine/atomic.h
41 #endif
42 
43 #include <machine/endian.h>
44 #ifndef _KERNEL
45 #include <stdbool.h>
46 #endif
47 
48 #ifndef NBBY
49 #define	NBBY	8
50 #endif
51 
52 #define	_ATOMIC_WORD_ALIGNED(p)		\
53     (uint32_t *)((__uintptr_t)(p) - ((__uintptr_t)(p) % 4))
54 
55 #if _BYTE_ORDER == _BIG_ENDIAN
56 #define	_ATOMIC_BYTE_SHIFT(p)		\
57     ((3 - ((__uintptr_t)(p) % 4)) * NBBY)
58 
59 #define	_ATOMIC_HWORD_SHIFT(p)		\
60     ((2 - ((__uintptr_t)(p) % 4)) * NBBY)
61 #else
62 #define	_ATOMIC_BYTE_SHIFT(p)		\
63     ((((__uintptr_t)(p) % 4)) * NBBY)
64 
65 #define	_ATOMIC_HWORD_SHIFT(p)		\
66     ((((__uintptr_t)(p) % 4)) * NBBY)
67 #endif
68 
69 #ifndef	_atomic_cmpset_masked_word
70 /*
71  * Pass these bad boys a couple words and a mask of the bits you care about,
72  * they'll loop until we either succeed or fail because of those bits rather
73  * than the ones we're not masking.  old and val should already be preshifted to
74  * the proper position.
75  */
76 static __inline int
77 _atomic_cmpset_masked_word(uint32_t *addr, uint32_t old, uint32_t val,
78     uint32_t mask)
79 {
80 	int ret;
81 	uint32_t wcomp;
82 
83 	wcomp = old;
84 
85 	/*
86 	 * We'll attempt the cmpset on the entire word.  Loop here in case the
87 	 * operation fails due to the other half-word resident in that word,
88 	 * rather than the half-word we're trying to operate on.  Ideally we
89 	 * only take one trip through here.  We'll have to recalculate the old
90 	 * value since it's the other part of the word changing.
91 	 */
92 	do {
93 		old = (*addr & ~mask) | wcomp;
94 		ret = atomic_fcmpset_32(addr, &old, (old & ~mask) | val);
95 	} while (ret == 0 && (old & mask) == wcomp);
96 
97 	return (ret);
98 }
99 #endif
100 
101 #ifndef	_atomic_fcmpset_masked_word
102 static __inline int
103 _atomic_fcmpset_masked_word(uint32_t *addr, uint32_t *old, uint32_t val,
104     uint32_t mask)
105 {
106 
107 	/*
108 	 * fcmpset_* is documented in atomic(9) to allow spurious failures where
109 	 * *old == val on ll/sc architectures because the sc may fail due to
110 	 * parallel writes or other reasons.  We take advantage of that here
111 	 * and only attempt once, because the caller should be compensating for
112 	 * that possibility.
113 	 */
114 	*old = (*addr & ~mask) | *old;
115 	return (atomic_fcmpset_32(addr, old, (*old & ~mask) | val));
116 }
117 #endif
118 
119 #ifndef atomic_cmpset_8
120 static __inline int
121 atomic_cmpset_8(__volatile uint8_t *addr, uint8_t old, uint8_t val)
122 {
123 	int shift;
124 
125 	shift = _ATOMIC_BYTE_SHIFT(addr);
126 
127 	return (_atomic_cmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
128 	    old << shift, val << shift, 0xff << shift));
129 }
130 #endif
131 
132 #ifndef atomic_fcmpset_8
133 static __inline int
134 atomic_fcmpset_8(__volatile uint8_t *addr, uint8_t *old, uint8_t val)
135 {
136 	int ret, shift;
137 	uint32_t wold;
138 
139 	shift = _ATOMIC_BYTE_SHIFT(addr);
140 	wold = *old << shift;
141 	ret = _atomic_fcmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
142 	    &wold, val << shift, 0xff << shift);
143 	if (ret == 0)
144 		*old = (wold >> shift) & 0xff;
145 	return (ret);
146 }
147 #endif
148 
149 #ifndef atomic_cmpset_16
150 static __inline int
151 atomic_cmpset_16(__volatile uint16_t *addr, uint16_t old, uint16_t val)
152 {
153 	int shift;
154 
155 	shift = _ATOMIC_HWORD_SHIFT(addr);
156 
157 	return (_atomic_cmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
158 	    old << shift, val << shift, 0xffff << shift));
159 }
160 #endif
161 
162 #ifndef atomic_fcmpset_16
163 static __inline int
164 atomic_fcmpset_16(__volatile uint16_t *addr, uint16_t *old, uint16_t val)
165 {
166 	int ret, shift;
167 	uint32_t wold;
168 
169 	shift = _ATOMIC_HWORD_SHIFT(addr);
170 	wold = *old << shift;
171 	ret = _atomic_fcmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
172 	    &wold, val << shift, 0xffff << shift);
173 	if (ret == 0)
174 		*old = (wold >> shift) & 0xffff;
175 	return (ret);
176 }
177 #endif
178 
179 #ifndef atomic_load_acq_8
180 static __inline uint8_t
181 atomic_load_acq_8(volatile uint8_t *p)
182 {
183 	int shift;
184 	uint8_t ret;
185 
186 	shift = _ATOMIC_BYTE_SHIFT(p);
187 	ret = (atomic_load_acq_32(_ATOMIC_WORD_ALIGNED(p)) >> shift) & 0xff;
188 	return (ret);
189 }
190 #endif
191 
192 #ifndef atomic_load_acq_16
193 static __inline uint16_t
194 atomic_load_acq_16(volatile uint16_t *p)
195 {
196 	int shift;
197 	uint16_t ret;
198 
199 	shift = _ATOMIC_HWORD_SHIFT(p);
200 	ret = (atomic_load_acq_32(_ATOMIC_WORD_ALIGNED(p)) >> shift) &
201 	    0xffff;
202 	return (ret);
203 }
204 #endif
205 
206 #undef _ATOMIC_WORD_ALIGNED
207 #undef _ATOMIC_BYTE_SHIFT
208 #undef _ATOMIC_HWORD_SHIFT
209 
210 /*
211  * Provide generic testandset_long implementation based on fcmpset long
212  * primitive.  It may not be ideal for any given arch, so machine/atomic.h
213  * should define the macro atomic_testandset_long to override with an
214  * MD-specific version.
215  *
216  * (Organizationally, this isn't really subword atomics.  But atomic_common is
217  * included too early in machine/atomic.h, so it isn't a good place for derived
218  * primitives like this.)
219  */
220 #ifndef atomic_testandset_acq_long
221 static __inline int
222 atomic_testandset_acq_long(volatile u_long *p, u_int v)
223 {
224 	u_long bit, old;
225 	bool ret;
226 
227 	bit = (1ul << (v % (sizeof(*p) * NBBY)));
228 
229 	old = atomic_load_acq_long(p);
230 	ret = false;
231 	while (!ret && (old & bit) == 0)
232 		ret = atomic_fcmpset_acq_long(p, &old, old | bit);
233 
234 	return (!ret);
235 }
236 #endif
237 
238 #ifndef atomic_testandset_long
239 static __inline int
240 atomic_testandset_long(volatile u_long *p, u_int v)
241 {
242 	u_long bit, old;
243 	bool ret;
244 
245 	bit = (1ul << (v % (sizeof(*p) * NBBY)));
246 
247 	old = atomic_load_long(p);
248 	ret = false;
249 	while (!ret && (old & bit) == 0)
250 		ret = atomic_fcmpset_long(p, &old, old | bit);
251 
252 	return (!ret);
253 }
254 #endif
255 
256 #ifndef atomic_testandclear_long
257 static __inline int
258 atomic_testandclear_long(volatile u_long *p, u_int v)
259 {
260 	u_long bit, old;
261 	bool ret;
262 
263 	bit = (1ul << (v % (sizeof(*p) * NBBY)));
264 
265 	old = atomic_load_long(p);
266 	ret = false;
267 	while (!ret && (old & bit) != 0)
268 		ret = atomic_fcmpset_long(p, &old, old & ~bit);
269 
270 	return (ret);
271 }
272 #endif
273 
274 #endif	/* _SYS__ATOMIC_SUBWORD_H_ */
275