1 /* automatically generated by memory-auto.sh, do not edit! */ 2 3 /* 4 * Copyright (c) 2005, 2006 Matt Fredette 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by Matt Fredette. 18 * 4. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 25 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 _TME_RCSID("$Id: memory-auto.sh,v 1.2 2010/02/15 15:16:28 fredette Exp $"); 35 36 /* macros: */ 37 38 /* the plain partial read internal macro: */ 39 #define _tme_memory_read(type_whole, type_part, mem, offset) \ 40 (((type_whole) \ 41 *((_tme_const type_part *) \ 42 (_tme_cast_pointer_const(tme_uint8_t *, type_whole *, mem) \ 43 + (offset)))) \ 44 << (8 * (TME_ENDIAN_NATIVE == TME_ENDIAN_BIG \ 45 ? (sizeof(type_whole) \ 46 - ((offset) + sizeof(type_part))) \ 47 : (offset)))) 48 49 /* the plain partial write internal macro: */ 50 #define _tme_memory_write(type_whole, type_part, mem, offset, x) \ 51 do { \ 52 *((type_part *) \ 53 (_tme_cast_pointer(tme_uint8_t *, type_whole *, mem) \ 54 + (offset))) \ 55 = (type_part) \ 56 (((type_whole) (x)) \ 57 >> (8 * (TME_ENDIAN_NATIVE == TME_ENDIAN_BIG \ 58 ? (sizeof(type_whole) \ 59 - ((offset) + sizeof(type_part))) \ 60 : (offset)))); \ 61 } while (/* CONSTCOND */ 0) 62 63 /* this tests bits in a memory address: */ 64 #define _tme_memory_address_test(mem, bits, align_min) \ 65 (((bits) & ~((align_min - 1))) & ((unsigned long) (mem))) 66 67 /* this returns a mask of all-bits-one in given type: */ 68 #define _tme_memory_type_mask(type, shift) \ 69 ((type) ((((type) 0) - ((type) 1)) shift)) 70 71 72 /* the bus 16-bit read slow function: */ 73 tme_uint16_t tme_memory_bus_read16 _TME_P((_tme_const tme_shared tme_uint16_t *, tme_rwlock_t *, unsigned int, unsigned int)); 74 75 /* the bus 16-bit write slow function: */ 76 void tme_memory_bus_write16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int, unsigned int)); 77 78 /* the bus 32-bit read slow function: */ 79 tme_uint32_t tme_memory_bus_read32 _TME_P((_tme_const tme_shared tme_uint32_t *, tme_rwlock_t *, unsigned int, unsigned int)); 80 81 /* the bus 32-bit write slow function: */ 82 void tme_memory_bus_write32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int, unsigned int)); 83 84 #ifdef TME_HAVE_INT64_T 85 86 /* the bus 64-bit read slow function: */ 87 tme_uint64_t tme_memory_bus_read64 _TME_P((_tme_const tme_shared tme_uint64_t *, tme_rwlock_t *, unsigned int, unsigned int)); 88 89 /* the bus 64-bit write slow function: */ 90 void tme_memory_bus_write64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int, unsigned int)); 91 92 #endif /* TME_HAVE_INT64_T */ 93 94 /* the bus read buffer function and default macro implementation: */ 95 void tme_memory_bus_read_buffer _TME_P((_tme_const tme_shared tme_uint8_t *, tme_uint8_t *, unsigned long, tme_rwlock_t *, unsigned int, unsigned int)); 96 #define tme_memory_bus_read_buffer(mem, buffer, count, rwlock, align_min, bus_boundary) \ 97 do { \ 98 if (TME_THREADS_COOPERATIVE) { \ 99 memcpy((buffer), ((_tme_const tme_uint8_t *) (mem)), (count)); \ 100 } \ 101 else { \ 102 tme_memory_bus_read_buffer(((_tme_const tme_shared tme_uint8_t *) (mem)), ((tme_uint8_t *) _tme_audit_pointer(buffer)), (count), (rwlock), (align_min), (bus_boundary)); \ 103 } \ 104 } while (/* CONSTCOND */ 0) 105 106 /* the bus write buffer function and default macro implementation: */ 107 void tme_memory_bus_write_buffer _TME_P((tme_shared tme_uint8_t *, _tme_const tme_uint8_t *, unsigned long, tme_rwlock_t *, unsigned int, unsigned int)); 108 #define tme_memory_bus_write_buffer(mem, buffer, count, rwlock, align_min, bus_boundary) \ 109 do { \ 110 if (TME_THREADS_COOPERATIVE) { \ 111 memcpy((tme_uint8_t *) (mem), (buffer), (count)); \ 112 } \ 113 else { \ 114 tme_memory_bus_write_buffer(((tme_shared tme_uint8_t *) _tme_audit_pointer_shared(mem)), ((_tme_const tme_uint8_t *) _tme_audit_pointer_const(buffer)), (count), (rwlock), (align_min), (bus_boundary)); \ 115 } \ 116 } while (/* CONSTCOND */ 0) 117 118 /* the 8-bit atomic operations: */ 119 tme_uint8_t tme_memory_atomic_add8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int)); 120 tme_uint8_t tme_memory_atomic_sub8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int)); 121 tme_uint8_t tme_memory_atomic_mul8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int)); 122 tme_uint8_t tme_memory_atomic_div8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int)); 123 tme_uint8_t tme_memory_atomic_and8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int)); 124 tme_uint8_t tme_memory_atomic_or8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int)); 125 tme_uint8_t tme_memory_atomic_xor8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int)); 126 tme_uint8_t tme_memory_atomic_not8 _TME_P((tme_shared tme_uint8_t *, tme_rwlock_t *, unsigned int)); 127 tme_uint8_t tme_memory_atomic_neg8 _TME_P((tme_shared tme_uint8_t *, tme_rwlock_t *, unsigned int)); 128 tme_uint8_t tme_memory_atomic_xchg8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int)); 129 tme_uint8_t tme_memory_atomic_cx8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_uint8_t, tme_rwlock_t *, unsigned int)); 130 131 /* the default 16-bit memory plain read macro: */ 132 #define tme_memory_read16(mem, align_min) \ 133 ( \ 134 /* if we know at compile time that the memory is aligned \ 135 enough to read directly, do the single direct read. \ 136 \ 137 otherwise, if we know at compile time that the memory \ 138 is less aligned than the smallest acceptable parts size, \ 139 test if the memory is aligned enough to read directly, \ 140 and do the single direct read if it is: */ \ 141 (__tme_predict_true((_TME_ALIGNOF_INT16_T == 1 \ 142 || (align_min) >= _TME_ALIGNOF_INT16_T) \ 143 || ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint16_t) \ 144 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT16_T - 1, align_min) == 0))) \ 145 ? \ 146 _tme_memory_read(tme_uint16_t, tme_uint16_t, mem, 0) \ 147 : \ 148 (_tme_memory_read(tme_uint16_t, tme_uint8_t, mem, (0 / 8)) \ 149 | _tme_memory_read(tme_uint16_t, tme_uint8_t, mem, (8 / 8))) \ 150 ) 151 152 /* the default 16-bit memory plain write macro: */ 153 #define tme_memory_write16(mem, x, align_min) \ 154 do { \ 155 if \ 156 /* if we know at compile time that the memory is aligned \ 157 enough to write directly, do the single direct write. \ 158 \ 159 otherwise, if we know at compile time that the memory \ 160 is less aligned than the smallest acceptable parts size, \ 161 test if the memory is aligned enough to write directly, \ 162 and do the single direct write if it is: */ \ 163 (__tme_predict_true((_TME_ALIGNOF_INT16_T == 1 \ 164 || (align_min) >= _TME_ALIGNOF_INT16_T) \ 165 || ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint16_t) \ 166 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT16_T - 1, align_min) == 0))) \ 167 { \ 168 _tme_memory_write(tme_uint16_t, tme_uint16_t, mem, 0, x); \ 169 } \ 170 else \ 171 { \ 172 _tme_memory_write(tme_uint16_t, tme_uint8_t, mem, (0 / 8), x); \ 173 _tme_memory_write(tme_uint16_t, tme_uint8_t, mem, (8 / 8), x); \ 174 } \ 175 } while (/* CONSTCOND */ 0) 176 177 /* the default 16-bit memory atomic read macro: */ 178 #define tme_memory_atomic_read16(mem, lock, align_min) \ 179 ( \ 180 /* if threads are cooperative, do a plain read: */ \ 181 (TME_THREADS_COOPERATIVE) \ 182 ? \ 183 tme_memory_read16((_tme_const tme_uint16_t *) _tme_audit_type(mem, tme_uint16_t *), align_min) \ 184 /* otherwise, if we aren't locking for all memory accesses, and we can \ 185 make direct 16-bit accesses, and this memory is aligned \ 186 enough to make a single direct atomic access, do the single \ 187 direct atomic read: */ \ 188 : \ 189 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 190 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) != 0 \ 191 && _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) - 1, align_min) == 0)) \ 192 ? \ 193 (*_tme_audit_type(mem, tme_uint16_t *)) \ 194 /* otherwise, we must do a slow indirect atomic read: */ \ 195 : \ 196 tme_memory_atomic_read16(mem, lock, align_min) \ 197 ) 198 199 /* the default 16-bit memory atomic write macro: */ 200 #define tme_memory_atomic_write16(mem, x, lock, align_min) \ 201 do { \ 202 if \ 203 /* if threads are cooperative, do a plain write: */ \ 204 (TME_THREADS_COOPERATIVE) \ 205 { \ 206 tme_memory_write16((tme_uint16_t *) _tme_cast_pointer_shared(tme_uint16_t *, tme_uint16_t *, mem), x, align_min); \ 207 /* otherwise, if we aren't locking for all memory accesses, and we can \ 208 make direct 16-bit accesses, and this memory is aligned \ 209 enough to make a single direct atomic access, do the single \ 210 direct atomic write: */ \ 211 } \ 212 else if \ 213 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 214 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) != 0 \ 215 && _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) - 1, align_min) == 0)) \ 216 { \ 217 (*_tme_audit_type(mem, tme_uint16_t *)) \ 218 = (x); \ 219 /* otherwise, we must do a slow indirect atomic write: */ \ 220 } \ 221 else \ 222 { \ 223 tme_memory_atomic_write16(mem, x, lock, align_min); \ 224 } \ 225 } while (/* CONSTCOND */ 0) 226 227 /* the default 16-bit memory bus read macro: */ 228 #define tme_memory_bus_read16(mem, lock, align_min, bus_boundary) \ 229 ( \ 230 /* if threads are cooperative, do a plain read: */ \ 231 (TME_THREADS_COOPERATIVE) \ 232 ? \ 233 tme_memory_read16((_tme_const tme_uint16_t *) _tme_audit_type(mem, tme_uint16_t *), align_min) \ 234 /* otherwise, if we aren't locking for all memory accesses, the \ 235 host supports misaligned 16-bit accesses, the host's bus \ 236 boundary is greater than or equal to the emulated bus \ 237 boundary, and this memory is aligned enough, do a single \ 238 direct bus read: */ \ 239 : \ 240 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 241 && _TME_ALIGNOF_INT16_T < sizeof(tme_uint16_t) \ 242 && TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \ 243 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT16_T - 1, align_min) == 0)) \ 244 ? \ 245 (*_tme_audit_type(mem, tme_uint16_t *)) \ 246 /* otherwise, if we're locking for all memory accesses, or \ 247 if this memory must cross at least one host bus boundary \ 248 and the host bus boundary is less than the emulated bus \ 249 boundary, do a slow indirect atomic read: */ \ 250 : \ 251 (__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \ 252 || (sizeof(tme_uint16_t) > TME_MEMORY_BUS_BOUNDARY \ 253 && TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \ 254 ? \ 255 tme_memory_atomic_read16(mem, lock, align_min) \ 256 /* otherwise, if the memory is not larger than the emulated \ 257 bus boundary, or if size-alignment would mean an atomic \ 258 host access and it is size-aligned, do a single atomic \ 259 read, which may be direct or slow: */ \ 260 : \ 261 (__tme_predict_true((sizeof(tme_uint16_t) <= (bus_boundary) \ 262 || (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) != 0 \ 263 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) <= sizeof(tme_uint16_t))) \ 264 && _tme_memory_address_test(mem, sizeof(tme_uint16_t) - 1, align_min) == 0)) \ 265 ? \ 266 tme_memory_atomic_read16(mem, lock, sizeof(tme_uint16_t)) \ 267 /* otherwise, we must do a slow bus read: */ \ 268 : \ 269 tme_memory_bus_read16(mem, lock, align_min, bus_boundary) \ 270 ) 271 272 /* the default 16-bit memory bus write macro: */ 273 #define tme_memory_bus_write16(mem, x, lock, align_min, bus_boundary) \ 274 do { \ 275 if \ 276 /* if threads are cooperative, do a plain write: */ \ 277 (TME_THREADS_COOPERATIVE) \ 278 { \ 279 tme_memory_write16((tme_uint16_t *) _tme_cast_pointer_shared(tme_uint16_t *, tme_uint16_t *, mem), x, align_min); \ 280 /* otherwise, if we aren't locking for all memory accesses, the \ 281 host supports misaligned 16-bit accesses, the host's bus \ 282 boundary is greater than or equal to the emulated bus \ 283 boundary, and this memory is aligned enough, do a single \ 284 direct bus write: */ \ 285 } \ 286 else if \ 287 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 288 && _TME_ALIGNOF_INT16_T < sizeof(tme_uint16_t) \ 289 && TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \ 290 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT16_T - 1, align_min) == 0)) \ 291 { \ 292 (*_tme_audit_type(mem, tme_uint16_t *)) \ 293 = (x); \ 294 /* otherwise, if we're locking for all memory accesses, or \ 295 if this memory must cross at least one host bus boundary \ 296 and the host bus boundary is less than the emulated bus \ 297 boundary, do a slow indirect atomic write: */ \ 298 } \ 299 else if \ 300 (__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \ 301 || (sizeof(tme_uint16_t) > TME_MEMORY_BUS_BOUNDARY \ 302 && TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \ 303 { \ 304 tme_memory_atomic_write16(mem, x, lock, align_min); \ 305 /* otherwise, if the memory is not larger than the emulated \ 306 bus boundary, or if size-alignment would mean an atomic \ 307 host access and it is size-aligned, do a single atomic \ 308 write, which may be direct or slow: */ \ 309 } \ 310 else if \ 311 (__tme_predict_true((sizeof(tme_uint16_t) <= (bus_boundary) \ 312 || (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) != 0 \ 313 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) <= sizeof(tme_uint16_t))) \ 314 && _tme_memory_address_test(mem, sizeof(tme_uint16_t) - 1, align_min) == 0)) \ 315 { \ 316 tme_memory_atomic_write16(mem, x, lock, sizeof(tme_uint16_t)); \ 317 /* otherwise, we must do a slow bus write: */ \ 318 } \ 319 else \ 320 { \ 321 tme_memory_bus_write16(mem, x, lock, align_min, bus_boundary); \ 322 } \ 323 } while (/* CONSTCOND */ 0) 324 325 /* the 16-bit atomic operations: */ 326 tme_uint16_t tme_memory_atomic_add16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int)); 327 tme_uint16_t tme_memory_atomic_sub16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int)); 328 tme_uint16_t tme_memory_atomic_mul16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int)); 329 tme_uint16_t tme_memory_atomic_div16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int)); 330 tme_uint16_t tme_memory_atomic_and16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int)); 331 tme_uint16_t tme_memory_atomic_or16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int)); 332 tme_uint16_t tme_memory_atomic_xor16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int)); 333 tme_uint16_t tme_memory_atomic_not16 _TME_P((tme_shared tme_uint16_t *, tme_rwlock_t *, unsigned int)); 334 tme_uint16_t tme_memory_atomic_neg16 _TME_P((tme_shared tme_uint16_t *, tme_rwlock_t *, unsigned int)); 335 tme_uint16_t tme_memory_atomic_xchg16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int)); 336 tme_uint16_t tme_memory_atomic_cx16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_uint16_t, tme_rwlock_t *, unsigned int)); 337 tme_uint16_t tme_memory_atomic_read16 _TME_P((_tme_const tme_shared tme_uint16_t *, tme_rwlock_t *, unsigned int)); 338 void tme_memory_atomic_write16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int)); 339 340 /* the default 32-bit memory plain read macro: */ 341 #define tme_memory_read32(mem, align_min) \ 342 ( \ 343 /* if we know at compile time that the memory is aligned \ 344 enough to read directly, do the single direct read. \ 345 \ 346 otherwise, if we know at compile time that the memory \ 347 is less aligned than the smallest acceptable parts size, \ 348 test if the memory is aligned enough to read directly, \ 349 and do the single direct read if it is: */ \ 350 (__tme_predict_true((_TME_ALIGNOF_INT32_T == 1 \ 351 || (align_min) >= _TME_ALIGNOF_INT32_T) \ 352 || ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint32_t) \ 353 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT32_T - 1, align_min) == 0))) \ 354 ? \ 355 _tme_memory_read(tme_uint32_t, tme_uint32_t, mem, 0) \ 356 : \ 357 ((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint32_t) <= sizeof(tme_uint8_t)) \ 358 && ((align_min) <= sizeof(tme_uint8_t))) \ 359 ? \ 360 (_tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (0 / 8)) \ 361 | _tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (8 / 8)) \ 362 | _tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (16 / 8)) \ 363 | _tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (24 / 8))) \ 364 : \ 365 (_tme_memory_address_test(mem, sizeof(tme_uint8_t), align_min) != 0) \ 366 ? \ 367 (_tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (0 / 8)) \ 368 | _tme_memory_read(tme_uint32_t, tme_uint16_t, mem, (8 / 8)) \ 369 | _tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (24 / 8))) \ 370 : \ 371 (_tme_memory_read(tme_uint32_t, tme_uint16_t, mem, (0 / 8)) \ 372 | _tme_memory_read(tme_uint32_t, tme_uint16_t, mem, (16 / 8))) \ 373 ) 374 375 /* the default 32-bit memory plain write macro: */ 376 #define tme_memory_write32(mem, x, align_min) \ 377 do { \ 378 if \ 379 /* if we know at compile time that the memory is aligned \ 380 enough to write directly, do the single direct write. \ 381 \ 382 otherwise, if we know at compile time that the memory \ 383 is less aligned than the smallest acceptable parts size, \ 384 test if the memory is aligned enough to write directly, \ 385 and do the single direct write if it is: */ \ 386 (__tme_predict_true((_TME_ALIGNOF_INT32_T == 1 \ 387 || (align_min) >= _TME_ALIGNOF_INT32_T) \ 388 || ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint32_t) \ 389 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT32_T - 1, align_min) == 0))) \ 390 { \ 391 _tme_memory_write(tme_uint32_t, tme_uint32_t, mem, 0, x); \ 392 } \ 393 else if \ 394 ((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint32_t) <= sizeof(tme_uint8_t)) \ 395 && ((align_min) <= sizeof(tme_uint8_t))) \ 396 { \ 397 _tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (0 / 8), x); \ 398 _tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (8 / 8), x); \ 399 _tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (16 / 8), x); \ 400 _tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (24 / 8), x); \ 401 } \ 402 else if \ 403 (_tme_memory_address_test(mem, sizeof(tme_uint8_t), align_min) != 0) \ 404 { \ 405 _tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (0 / 8), x); \ 406 _tme_memory_write(tme_uint32_t, tme_uint16_t, mem, (8 / 8), x); \ 407 _tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (24 / 8), x); \ 408 } \ 409 else \ 410 { \ 411 _tme_memory_write(tme_uint32_t, tme_uint16_t, mem, (0 / 8), x); \ 412 _tme_memory_write(tme_uint32_t, tme_uint16_t, mem, (16 / 8), x); \ 413 } \ 414 } while (/* CONSTCOND */ 0) 415 416 /* the default 32-bit memory atomic read macro: */ 417 #define tme_memory_atomic_read32(mem, lock, align_min) \ 418 ( \ 419 /* if threads are cooperative, do a plain read: */ \ 420 (TME_THREADS_COOPERATIVE) \ 421 ? \ 422 tme_memory_read32((_tme_const tme_uint32_t *) _tme_audit_type(mem, tme_uint32_t *), align_min) \ 423 /* otherwise, if we aren't locking for all memory accesses, and we can \ 424 make direct 32-bit accesses, and this memory is aligned \ 425 enough to make a single direct atomic access, do the single \ 426 direct atomic read: */ \ 427 : \ 428 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 429 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) != 0 \ 430 && _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) - 1, align_min) == 0)) \ 431 ? \ 432 (*_tme_audit_type(mem, tme_uint32_t *)) \ 433 /* otherwise, we must do a slow indirect atomic read: */ \ 434 : \ 435 tme_memory_atomic_read32(mem, lock, align_min) \ 436 ) 437 438 /* the default 32-bit memory atomic write macro: */ 439 #define tme_memory_atomic_write32(mem, x, lock, align_min) \ 440 do { \ 441 if \ 442 /* if threads are cooperative, do a plain write: */ \ 443 (TME_THREADS_COOPERATIVE) \ 444 { \ 445 tme_memory_write32((tme_uint32_t *) _tme_cast_pointer_shared(tme_uint32_t *, tme_uint32_t *, mem), x, align_min); \ 446 /* otherwise, if we aren't locking for all memory accesses, and we can \ 447 make direct 32-bit accesses, and this memory is aligned \ 448 enough to make a single direct atomic access, do the single \ 449 direct atomic write: */ \ 450 } \ 451 else if \ 452 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 453 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) != 0 \ 454 && _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) - 1, align_min) == 0)) \ 455 { \ 456 (*_tme_audit_type(mem, tme_uint32_t *)) \ 457 = (x); \ 458 /* otherwise, we must do a slow indirect atomic write: */ \ 459 } \ 460 else \ 461 { \ 462 tme_memory_atomic_write32(mem, x, lock, align_min); \ 463 } \ 464 } while (/* CONSTCOND */ 0) 465 466 /* the default 32-bit memory bus read macro: */ 467 #define tme_memory_bus_read32(mem, lock, align_min, bus_boundary) \ 468 ( \ 469 /* if threads are cooperative, do a plain read: */ \ 470 (TME_THREADS_COOPERATIVE) \ 471 ? \ 472 tme_memory_read32((_tme_const tme_uint32_t *) _tme_audit_type(mem, tme_uint32_t *), align_min) \ 473 /* otherwise, if we aren't locking for all memory accesses, the \ 474 host supports misaligned 32-bit accesses, the host's bus \ 475 boundary is greater than or equal to the emulated bus \ 476 boundary, and this memory is aligned enough, do a single \ 477 direct bus read: */ \ 478 : \ 479 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 480 && _TME_ALIGNOF_INT32_T < sizeof(tme_uint32_t) \ 481 && TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \ 482 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT32_T - 1, align_min) == 0)) \ 483 ? \ 484 (*_tme_audit_type(mem, tme_uint32_t *)) \ 485 /* otherwise, if we're locking for all memory accesses, or \ 486 if this memory must cross at least one host bus boundary \ 487 and the host bus boundary is less than the emulated bus \ 488 boundary, do a slow indirect atomic read: */ \ 489 : \ 490 (__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \ 491 || (sizeof(tme_uint32_t) > TME_MEMORY_BUS_BOUNDARY \ 492 && TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \ 493 ? \ 494 tme_memory_atomic_read32(mem, lock, align_min) \ 495 /* otherwise, if the memory is not larger than the emulated \ 496 bus boundary, or if size-alignment would mean an atomic \ 497 host access and it is size-aligned, do a single atomic \ 498 read, which may be direct or slow: */ \ 499 : \ 500 (__tme_predict_true((sizeof(tme_uint32_t) <= (bus_boundary) \ 501 || (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) != 0 \ 502 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) <= sizeof(tme_uint32_t))) \ 503 && _tme_memory_address_test(mem, sizeof(tme_uint32_t) - 1, align_min) == 0)) \ 504 ? \ 505 tme_memory_atomic_read32(mem, lock, sizeof(tme_uint32_t)) \ 506 /* otherwise, we must do a slow bus read: */ \ 507 : \ 508 tme_memory_bus_read32(mem, lock, align_min, bus_boundary) \ 509 ) 510 511 /* the default 32-bit memory bus write macro: */ 512 #define tme_memory_bus_write32(mem, x, lock, align_min, bus_boundary) \ 513 do { \ 514 if \ 515 /* if threads are cooperative, do a plain write: */ \ 516 (TME_THREADS_COOPERATIVE) \ 517 { \ 518 tme_memory_write32((tme_uint32_t *) _tme_cast_pointer_shared(tme_uint32_t *, tme_uint32_t *, mem), x, align_min); \ 519 /* otherwise, if we aren't locking for all memory accesses, the \ 520 host supports misaligned 32-bit accesses, the host's bus \ 521 boundary is greater than or equal to the emulated bus \ 522 boundary, and this memory is aligned enough, do a single \ 523 direct bus write: */ \ 524 } \ 525 else if \ 526 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 527 && _TME_ALIGNOF_INT32_T < sizeof(tme_uint32_t) \ 528 && TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \ 529 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT32_T - 1, align_min) == 0)) \ 530 { \ 531 (*_tme_audit_type(mem, tme_uint32_t *)) \ 532 = (x); \ 533 /* otherwise, if we're locking for all memory accesses, or \ 534 if this memory must cross at least one host bus boundary \ 535 and the host bus boundary is less than the emulated bus \ 536 boundary, do a slow indirect atomic write: */ \ 537 } \ 538 else if \ 539 (__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \ 540 || (sizeof(tme_uint32_t) > TME_MEMORY_BUS_BOUNDARY \ 541 && TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \ 542 { \ 543 tme_memory_atomic_write32(mem, x, lock, align_min); \ 544 /* otherwise, if the memory is not larger than the emulated \ 545 bus boundary, or if size-alignment would mean an atomic \ 546 host access and it is size-aligned, do a single atomic \ 547 write, which may be direct or slow: */ \ 548 } \ 549 else if \ 550 (__tme_predict_true((sizeof(tme_uint32_t) <= (bus_boundary) \ 551 || (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) != 0 \ 552 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) <= sizeof(tme_uint32_t))) \ 553 && _tme_memory_address_test(mem, sizeof(tme_uint32_t) - 1, align_min) == 0)) \ 554 { \ 555 tme_memory_atomic_write32(mem, x, lock, sizeof(tme_uint32_t)); \ 556 /* otherwise, we must do a slow bus write: */ \ 557 } \ 558 else \ 559 { \ 560 tme_memory_bus_write32(mem, x, lock, align_min, bus_boundary); \ 561 } \ 562 } while (/* CONSTCOND */ 0) 563 564 /* the 32-bit atomic operations: */ 565 tme_uint32_t tme_memory_atomic_add32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int)); 566 tme_uint32_t tme_memory_atomic_sub32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int)); 567 tme_uint32_t tme_memory_atomic_mul32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int)); 568 tme_uint32_t tme_memory_atomic_div32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int)); 569 tme_uint32_t tme_memory_atomic_and32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int)); 570 tme_uint32_t tme_memory_atomic_or32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int)); 571 tme_uint32_t tme_memory_atomic_xor32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int)); 572 tme_uint32_t tme_memory_atomic_not32 _TME_P((tme_shared tme_uint32_t *, tme_rwlock_t *, unsigned int)); 573 tme_uint32_t tme_memory_atomic_neg32 _TME_P((tme_shared tme_uint32_t *, tme_rwlock_t *, unsigned int)); 574 tme_uint32_t tme_memory_atomic_xchg32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int)); 575 tme_uint32_t tme_memory_atomic_cx32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_uint32_t, tme_rwlock_t *, unsigned int)); 576 tme_uint32_t tme_memory_atomic_read32 _TME_P((_tme_const tme_shared tme_uint32_t *, tme_rwlock_t *, unsigned int)); 577 void tme_memory_atomic_write32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int)); 578 579 #ifdef TME_HAVE_INT64_T 580 581 /* the default 64-bit memory plain read macro: */ 582 #define tme_memory_read64(mem, align_min) \ 583 ( \ 584 /* if we know at compile time that the memory is aligned \ 585 enough to read directly, do the single direct read. \ 586 \ 587 otherwise, if we know at compile time that the memory \ 588 is less aligned than the smallest acceptable parts size, \ 589 test if the memory is aligned enough to read directly, \ 590 and do the single direct read if it is: */ \ 591 (__tme_predict_true((_TME_ALIGNOF_INT64_T == 1 \ 592 || (align_min) >= _TME_ALIGNOF_INT64_T) \ 593 || ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) \ 594 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT64_T - 1, align_min) == 0))) \ 595 ? \ 596 _tme_memory_read(tme_uint64_t, tme_uint64_t, mem, 0) \ 597 : \ 598 ((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) <= sizeof(tme_uint8_t)) \ 599 && ((align_min) <= sizeof(tme_uint8_t))) \ 600 ? \ 601 (_tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (0 / 8)) \ 602 | _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (8 / 8)) \ 603 | _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (16 / 8)) \ 604 | _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (24 / 8)) \ 605 | _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (32 / 8)) \ 606 | _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (40 / 8)) \ 607 | _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (48 / 8)) \ 608 | _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (56 / 8))) \ 609 : \ 610 (_tme_memory_address_test(mem, sizeof(tme_uint8_t), align_min) != 0) \ 611 ? \ 612 (_tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (0 / 8)) \ 613 | _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (8 / 8)) \ 614 | _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (24 / 8)) \ 615 | _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (40 / 8)) \ 616 | _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (56 / 8))) \ 617 : \ 618 ((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) <= sizeof(tme_uint16_t)) \ 619 && ((align_min) <= sizeof(tme_uint16_t))) \ 620 ? \ 621 (_tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (0 / 8)) \ 622 | _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (16 / 8)) \ 623 | _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (32 / 8)) \ 624 | _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (48 / 8))) \ 625 : \ 626 (_tme_memory_address_test(mem, sizeof(tme_uint16_t), align_min) != 0) \ 627 ? \ 628 (_tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (0 / 8)) \ 629 | _tme_memory_read(tme_uint64_t, tme_uint32_t, mem, (16 / 8)) \ 630 | _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (48 / 8))) \ 631 : \ 632 (_tme_memory_read(tme_uint64_t, tme_uint32_t, mem, (0 / 8)) \ 633 | _tme_memory_read(tme_uint64_t, tme_uint32_t, mem, (32 / 8))) \ 634 ) 635 636 /* the default 64-bit memory plain write macro: */ 637 #define tme_memory_write64(mem, x, align_min) \ 638 do { \ 639 if \ 640 /* if we know at compile time that the memory is aligned \ 641 enough to write directly, do the single direct write. \ 642 \ 643 otherwise, if we know at compile time that the memory \ 644 is less aligned than the smallest acceptable parts size, \ 645 test if the memory is aligned enough to write directly, \ 646 and do the single direct write if it is: */ \ 647 (__tme_predict_true((_TME_ALIGNOF_INT64_T == 1 \ 648 || (align_min) >= _TME_ALIGNOF_INT64_T) \ 649 || ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) \ 650 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT64_T - 1, align_min) == 0))) \ 651 { \ 652 _tme_memory_write(tme_uint64_t, tme_uint64_t, mem, 0, x); \ 653 } \ 654 else if \ 655 ((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) <= sizeof(tme_uint8_t)) \ 656 && ((align_min) <= sizeof(tme_uint8_t))) \ 657 { \ 658 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (0 / 8), x); \ 659 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (8 / 8), x); \ 660 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (16 / 8), x); \ 661 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (24 / 8), x); \ 662 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (32 / 8), x); \ 663 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (40 / 8), x); \ 664 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (48 / 8), x); \ 665 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (56 / 8), x); \ 666 } \ 667 else if \ 668 (_tme_memory_address_test(mem, sizeof(tme_uint8_t), align_min) != 0) \ 669 { \ 670 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (0 / 8), x); \ 671 _tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (8 / 8), x); \ 672 _tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (24 / 8), x); \ 673 _tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (40 / 8), x); \ 674 _tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (56 / 8), x); \ 675 } \ 676 else if \ 677 ((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) <= sizeof(tme_uint16_t)) \ 678 && ((align_min) <= sizeof(tme_uint16_t))) \ 679 { \ 680 _tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (0 / 8), x); \ 681 _tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (16 / 8), x); \ 682 _tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (32 / 8), x); \ 683 _tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (48 / 8), x); \ 684 } \ 685 else if \ 686 (_tme_memory_address_test(mem, sizeof(tme_uint16_t), align_min) != 0) \ 687 { \ 688 _tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (0 / 8), x); \ 689 _tme_memory_write(tme_uint64_t, tme_uint32_t, mem, (16 / 8), x); \ 690 _tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (48 / 8), x); \ 691 } \ 692 else \ 693 { \ 694 _tme_memory_write(tme_uint64_t, tme_uint32_t, mem, (0 / 8), x); \ 695 _tme_memory_write(tme_uint64_t, tme_uint32_t, mem, (32 / 8), x); \ 696 } \ 697 } while (/* CONSTCOND */ 0) 698 699 /* the default 64-bit memory atomic read macro: */ 700 #define tme_memory_atomic_read64(mem, lock, align_min) \ 701 ( \ 702 /* if threads are cooperative, do a plain read: */ \ 703 (TME_THREADS_COOPERATIVE) \ 704 ? \ 705 tme_memory_read64((_tme_const tme_uint64_t *) _tme_audit_type(mem, tme_uint64_t *), align_min) \ 706 /* otherwise, if we aren't locking for all memory accesses, and we can \ 707 make direct 64-bit accesses, and this memory is aligned \ 708 enough to make a single direct atomic access, do the single \ 709 direct atomic read: */ \ 710 : \ 711 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 712 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) != 0 \ 713 && _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) - 1, align_min) == 0)) \ 714 ? \ 715 (*_tme_audit_type(mem, tme_uint64_t *)) \ 716 /* otherwise, we must do a slow indirect atomic read: */ \ 717 : \ 718 tme_memory_atomic_read64(mem, lock, align_min) \ 719 ) 720 721 /* the default 64-bit memory atomic write macro: */ 722 #define tme_memory_atomic_write64(mem, x, lock, align_min) \ 723 do { \ 724 if \ 725 /* if threads are cooperative, do a plain write: */ \ 726 (TME_THREADS_COOPERATIVE) \ 727 { \ 728 tme_memory_write64((tme_uint64_t *) _tme_cast_pointer_shared(tme_uint64_t *, tme_uint64_t *, mem), x, align_min); \ 729 /* otherwise, if we aren't locking for all memory accesses, and we can \ 730 make direct 64-bit accesses, and this memory is aligned \ 731 enough to make a single direct atomic access, do the single \ 732 direct atomic write: */ \ 733 } \ 734 else if \ 735 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 736 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) != 0 \ 737 && _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) - 1, align_min) == 0)) \ 738 { \ 739 (*_tme_audit_type(mem, tme_uint64_t *)) \ 740 = (x); \ 741 /* otherwise, we must do a slow indirect atomic write: */ \ 742 } \ 743 else \ 744 { \ 745 tme_memory_atomic_write64(mem, x, lock, align_min); \ 746 } \ 747 } while (/* CONSTCOND */ 0) 748 749 /* the default 64-bit memory bus read macro: */ 750 #define tme_memory_bus_read64(mem, lock, align_min, bus_boundary) \ 751 ( \ 752 /* if threads are cooperative, do a plain read: */ \ 753 (TME_THREADS_COOPERATIVE) \ 754 ? \ 755 tme_memory_read64((_tme_const tme_uint64_t *) _tme_audit_type(mem, tme_uint64_t *), align_min) \ 756 /* otherwise, if we aren't locking for all memory accesses, the \ 757 host supports misaligned 64-bit accesses, the host's bus \ 758 boundary is greater than or equal to the emulated bus \ 759 boundary, and this memory is aligned enough, do a single \ 760 direct bus read: */ \ 761 : \ 762 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 763 && _TME_ALIGNOF_INT64_T < sizeof(tme_uint64_t) \ 764 && TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \ 765 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT64_T - 1, align_min) == 0)) \ 766 ? \ 767 (*_tme_audit_type(mem, tme_uint64_t *)) \ 768 /* otherwise, if we're locking for all memory accesses, or \ 769 if this memory must cross at least one host bus boundary \ 770 and the host bus boundary is less than the emulated bus \ 771 boundary, do a slow indirect atomic read: */ \ 772 : \ 773 (__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \ 774 || (sizeof(tme_uint64_t) > TME_MEMORY_BUS_BOUNDARY \ 775 && TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \ 776 ? \ 777 tme_memory_atomic_read64(mem, lock, align_min) \ 778 /* otherwise, if the memory is not larger than the emulated \ 779 bus boundary, or if size-alignment would mean an atomic \ 780 host access and it is size-aligned, do a single atomic \ 781 read, which may be direct or slow: */ \ 782 : \ 783 (__tme_predict_true((sizeof(tme_uint64_t) <= (bus_boundary) \ 784 || (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) != 0 \ 785 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) <= sizeof(tme_uint64_t))) \ 786 && _tme_memory_address_test(mem, sizeof(tme_uint64_t) - 1, align_min) == 0)) \ 787 ? \ 788 tme_memory_atomic_read64(mem, lock, sizeof(tme_uint64_t)) \ 789 /* otherwise, we must do a slow bus read: */ \ 790 : \ 791 tme_memory_bus_read64(mem, lock, align_min, bus_boundary) \ 792 ) 793 794 /* the default 64-bit memory bus write macro: */ 795 #define tme_memory_bus_write64(mem, x, lock, align_min, bus_boundary) \ 796 do { \ 797 if \ 798 /* if threads are cooperative, do a plain write: */ \ 799 (TME_THREADS_COOPERATIVE) \ 800 { \ 801 tme_memory_write64((tme_uint64_t *) _tme_cast_pointer_shared(tme_uint64_t *, tme_uint64_t *, mem), x, align_min); \ 802 /* otherwise, if we aren't locking for all memory accesses, the \ 803 host supports misaligned 64-bit accesses, the host's bus \ 804 boundary is greater than or equal to the emulated bus \ 805 boundary, and this memory is aligned enough, do a single \ 806 direct bus write: */ \ 807 } \ 808 else if \ 809 (__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \ 810 && _TME_ALIGNOF_INT64_T < sizeof(tme_uint64_t) \ 811 && TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \ 812 && _tme_memory_address_test(mem, _TME_ALIGNOF_INT64_T - 1, align_min) == 0)) \ 813 { \ 814 (*_tme_audit_type(mem, tme_uint64_t *)) \ 815 = (x); \ 816 /* otherwise, if we're locking for all memory accesses, or \ 817 if this memory must cross at least one host bus boundary \ 818 and the host bus boundary is less than the emulated bus \ 819 boundary, do a slow indirect atomic write: */ \ 820 } \ 821 else if \ 822 (__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \ 823 || (sizeof(tme_uint64_t) > TME_MEMORY_BUS_BOUNDARY \ 824 && TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \ 825 { \ 826 tme_memory_atomic_write64(mem, x, lock, align_min); \ 827 /* otherwise, if the memory is not larger than the emulated \ 828 bus boundary, or if size-alignment would mean an atomic \ 829 host access and it is size-aligned, do a single atomic \ 830 write, which may be direct or slow: */ \ 831 } \ 832 else if \ 833 (__tme_predict_true((sizeof(tme_uint64_t) <= (bus_boundary) \ 834 || (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) != 0 \ 835 && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) <= sizeof(tme_uint64_t))) \ 836 && _tme_memory_address_test(mem, sizeof(tme_uint64_t) - 1, align_min) == 0)) \ 837 { \ 838 tme_memory_atomic_write64(mem, x, lock, sizeof(tme_uint64_t)); \ 839 /* otherwise, we must do a slow bus write: */ \ 840 } \ 841 else \ 842 { \ 843 tme_memory_bus_write64(mem, x, lock, align_min, bus_boundary); \ 844 } \ 845 } while (/* CONSTCOND */ 0) 846 847 /* the 64-bit atomic operations: */ 848 tme_uint64_t tme_memory_atomic_add64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int)); 849 tme_uint64_t tme_memory_atomic_sub64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int)); 850 tme_uint64_t tme_memory_atomic_mul64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int)); 851 tme_uint64_t tme_memory_atomic_div64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int)); 852 tme_uint64_t tme_memory_atomic_and64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int)); 853 tme_uint64_t tme_memory_atomic_or64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int)); 854 tme_uint64_t tme_memory_atomic_xor64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int)); 855 tme_uint64_t tme_memory_atomic_not64 _TME_P((tme_shared tme_uint64_t *, tme_rwlock_t *, unsigned int)); 856 tme_uint64_t tme_memory_atomic_neg64 _TME_P((tme_shared tme_uint64_t *, tme_rwlock_t *, unsigned int)); 857 tme_uint64_t tme_memory_atomic_xchg64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int)); 858 tme_uint64_t tme_memory_atomic_cx64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_uint64_t, tme_rwlock_t *, unsigned int)); 859 tme_uint64_t tme_memory_atomic_read64 _TME_P((_tme_const tme_shared tme_uint64_t *, tme_rwlock_t *, unsigned int)); 860 void tme_memory_atomic_write64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int)); 861 862 #endif /* TME_HAVE_INT64_T */ 863