1/*
2 * SPDX-License-Identifier: GPL-2.0-or-later
3 * Atomic extract 64 from 128-bit, x86_64 version.
4 *
5 * Copyright (C) 2023 Linaro, Ltd.
6 */
7
8#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H
9#define X86_64_LOAD_EXTRACT_AL16_AL8_H
10
11#ifdef CONFIG_INT128_TYPE
12#include "host/atomic128-ldst.h"
13
14/**
15 * load_atom_extract_al16_or_al8:
16 * @pv: host address
17 * @s: object size in bytes, @s <= 8.
18 *
19 * Load @s bytes from @pv, when pv % s != 0.  If [p, p+s-1] does not
20 * cross an 16-byte boundary then the access must be 16-byte atomic,
21 * otherwise the access must be 8-byte atomic.
22 */
23static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
24load_atom_extract_al16_or_al8(void *pv, int s)
25{
26    uintptr_t pi = (uintptr_t)pv;
27    __int128_t *ptr_align = (__int128_t *)(pi & ~7);
28    int shr = (pi & 7) * 8;
29    X86Int128Union r;
30
31    /*
32     * ptr_align % 16 is now only 0 or 8.
33     * If the host supports atomic loads with VMOVDQU, then always use that,
34     * making the branch highly predictable.  Otherwise we must use VMOVDQA
35     * when ptr_align % 16 == 0 for 16-byte atomicity.
36     */
37    if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) {
38        asm("vmovdqu %1, %0" : "=x" (r.v) : "m" (*ptr_align));
39    } else {
40        asm("vmovdqa %1, %0" : "=x" (r.v) : "m" (*ptr_align));
41    }
42    return int128_getlo(int128_urshift(r.s, shr));
43}
44#else
45/* Fallback definition that must be optimized away, or error.  */
46uint64_t QEMU_ERROR("unsupported atomic")
47    load_atom_extract_al16_or_al8(void *pv, int s);
48#endif
49
50#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */
51