1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5# This script generates jit/AtomicOperationsGenerated.h
6#
7# See the big comment in jit/AtomicOperations.h for an explanation.
8
9import buildconfig
10
11is_64bit = "JS_64BIT" in buildconfig.defines
12cpu_arch = buildconfig.substs["CPU_ARCH"]
13is_gcc = buildconfig.substs["CC_TYPE"] == "gcc"
14
15
16def fmt_insn(s):
17    return '"' + s + '\\n\\t"\n'
18
19
20def gen_seqcst(fun_name):
21    if cpu_arch in ("x86", "x86_64"):
22        return r"""
23            INLINE_ATTR void %(fun_name)s() {
24                asm volatile ("mfence\n\t" ::: "memory");
25            }""" % {
26            "fun_name": fun_name,
27        }
28    if cpu_arch == "aarch64":
29        return r"""
30            INLINE_ATTR void %(fun_name)s() {
31                asm volatile ("dmb ish\n\t" ::: "memory");
32            }""" % {
33            "fun_name": fun_name,
34        }
35    if cpu_arch == "arm":
36        return r"""
37            INLINE_ATTR void %(fun_name)s() {
38                asm volatile ("dmb sy\n\t" ::: "memory");
39            }""" % {
40            "fun_name": fun_name,
41        }
42    raise Exception("Unexpected arch")
43
44
45def gen_load(fun_name, cpp_type, size, barrier):
46    # NOTE: the assembly code must match the generated code in:
47    # - CacheIRCompiler::emitAtomicsLoadResult
48    # - LIRGenerator::visitLoadUnboxedScalar
49    # - CodeGenerator::visitAtomicLoad64 (on 64-bit platforms)
50    # - MacroAssembler::wasmLoad
51    if cpu_arch in ("x86", "x86_64"):
52        insns = ""
53        if barrier:
54            insns += fmt_insn("mfence")
55        if size == 8:
56            insns += fmt_insn("movb (%[arg]), %[res]")
57        elif size == 16:
58            insns += fmt_insn("movw (%[arg]), %[res]")
59        elif size == 32:
60            insns += fmt_insn("movl (%[arg]), %[res]")
61        else:
62            assert size == 64
63            insns += fmt_insn("movq (%[arg]), %[res]")
64        if barrier:
65            insns += fmt_insn("mfence")
66        return """
67            INLINE_ATTR %(cpp_type)s %(fun_name)s(const %(cpp_type)s* arg) {
68                %(cpp_type)s res;
69                asm volatile (%(insns)s
70                    : [res] "=r" (res)
71                    : [arg] "r" (arg)
72                    : "memory");
73                return res;
74            }""" % {
75            "cpp_type": cpp_type,
76            "fun_name": fun_name,
77            "insns": insns,
78        }
79    if cpu_arch == "aarch64":
80        insns = ""
81        if barrier:
82            insns += fmt_insn("dmb ish")
83        if size == 8:
84            insns += fmt_insn("ldrb %w[res], [%x[arg]]")
85        elif size == 16:
86            insns += fmt_insn("ldrh %w[res], [%x[arg]]")
87        elif size == 32:
88            insns += fmt_insn("ldr %w[res], [%x[arg]]")
89        else:
90            assert size == 64
91            insns += fmt_insn("ldr %x[res], [%x[arg]]")
92        if barrier:
93            insns += fmt_insn("dmb ish")
94        return """
95            INLINE_ATTR %(cpp_type)s %(fun_name)s(const %(cpp_type)s* arg) {
96                %(cpp_type)s res;
97                asm volatile (%(insns)s
98                    : [res] "=r" (res)
99                    : [arg] "r" (arg)
100                    : "memory");
101                return res;
102            }""" % {
103            "cpp_type": cpp_type,
104            "fun_name": fun_name,
105            "insns": insns,
106        }
107    if cpu_arch == "arm":
108        insns = ""
109        if barrier:
110            insns += fmt_insn("dmb sy")
111        if size == 8:
112            insns += fmt_insn("ldrb %[res], [%[arg]]")
113        elif size == 16:
114            insns += fmt_insn("ldrh %[res], [%[arg]]")
115        else:
116            assert size == 32
117            insns += fmt_insn("ldr %[res], [%[arg]]")
118        if barrier:
119            insns += fmt_insn("dmb sy")
120        return """
121            INLINE_ATTR %(cpp_type)s %(fun_name)s(const %(cpp_type)s* arg) {
122                %(cpp_type)s res;
123                asm volatile (%(insns)s
124                    : [res] "=r" (res)
125                    : [arg] "r" (arg)
126                    : "memory");
127                return res;
128            }""" % {
129            "cpp_type": cpp_type,
130            "fun_name": fun_name,
131            "insns": insns,
132        }
133    raise Exception("Unexpected arch")
134
135
136def gen_store(fun_name, cpp_type, size, barrier):
137    # NOTE: the assembly code must match the generated code in:
138    # - CacheIRCompiler::emitAtomicsStoreResult
139    # - LIRGenerator::visitStoreUnboxedScalar
140    # - CodeGenerator::visitAtomicStore64 (on 64-bit platforms)
141    # - MacroAssembler::wasmStore
142    if cpu_arch in ("x86", "x86_64"):
143        insns = ""
144        if barrier:
145            insns += fmt_insn("mfence")
146        if size == 8:
147            insns += fmt_insn("movb %[val], (%[addr])")
148        elif size == 16:
149            insns += fmt_insn("movw %[val], (%[addr])")
150        elif size == 32:
151            insns += fmt_insn("movl %[val], (%[addr])")
152        else:
153            assert size == 64
154            insns += fmt_insn("movq %[val], (%[addr])")
155        if barrier:
156            insns += fmt_insn("mfence")
157        return """
158            INLINE_ATTR void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
159                asm volatile (%(insns)s
160                    :
161                    : [addr] "r" (addr), [val] "r"(val)
162                    : "memory");
163            }""" % {
164            "cpp_type": cpp_type,
165            "fun_name": fun_name,
166            "insns": insns,
167        }
168    if cpu_arch == "aarch64":
169        insns = ""
170        if barrier:
171            insns += fmt_insn("dmb ish")
172        if size == 8:
173            insns += fmt_insn("strb %w[val], [%x[addr]]")
174        elif size == 16:
175            insns += fmt_insn("strh %w[val], [%x[addr]]")
176        elif size == 32:
177            insns += fmt_insn("str %w[val], [%x[addr]]")
178        else:
179            assert size == 64
180            insns += fmt_insn("str %x[val], [%x[addr]]")
181        if barrier:
182            insns += fmt_insn("dmb ish")
183        return """
184            INLINE_ATTR void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
185                asm volatile (%(insns)s
186                    :
187                    : [addr] "r" (addr), [val] "r"(val)
188                    : "memory");
189            }""" % {
190            "cpp_type": cpp_type,
191            "fun_name": fun_name,
192            "insns": insns,
193        }
194    if cpu_arch == "arm":
195        insns = ""
196        if barrier:
197            insns += fmt_insn("dmb sy")
198        if size == 8:
199            insns += fmt_insn("strb %[val], [%[addr]]")
200        elif size == 16:
201            insns += fmt_insn("strh %[val], [%[addr]]")
202        else:
203            assert size == 32
204            insns += fmt_insn("str %[val], [%[addr]]")
205        if barrier:
206            insns += fmt_insn("dmb sy")
207        return """
208            INLINE_ATTR void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
209                asm volatile (%(insns)s
210                    :
211                    : [addr] "r" (addr), [val] "r"(val)
212                    : "memory");
213            }""" % {
214            "cpp_type": cpp_type,
215            "fun_name": fun_name,
216            "insns": insns,
217        }
218    raise Exception("Unexpected arch")
219
220
221def gen_exchange(fun_name, cpp_type, size):
222    # NOTE: the assembly code must match the generated code in:
223    # - MacroAssembler::atomicExchange
224    # - MacroAssembler::atomicExchange64 (on 64-bit platforms)
225    if cpu_arch in ("x86", "x86_64"):
226        # Request an input/output register for `val` so that we can simply XCHG it
227        # with *addr.
228        insns = ""
229        if size == 8:
230            insns += fmt_insn("xchgb %[val], (%[addr])")
231        elif size == 16:
232            insns += fmt_insn("xchgw %[val], (%[addr])")
233        elif size == 32:
234            insns += fmt_insn("xchgl %[val], (%[addr])")
235        else:
236            assert size == 64
237            insns += fmt_insn("xchgq %[val], (%[addr])")
238        return """
239            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
240                asm volatile (%(insns)s
241                    : [val] "+r" (val)
242                    : [addr] "r" (addr)
243                    : "memory");
244                return val;
245            }""" % {
246            "cpp_type": cpp_type,
247            "fun_name": fun_name,
248            "insns": insns,
249        }
250    if cpu_arch == "aarch64":
251        insns = ""
252        insns += fmt_insn("dmb ish")
253        insns += fmt_insn("0:")
254        if size == 8:
255            insns += fmt_insn("ldxrb %w[res], [%x[addr]]")
256            insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]")
257        elif size == 16:
258            insns += fmt_insn("ldxrh %w[res], [%x[addr]]")
259            insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]")
260        elif size == 32:
261            insns += fmt_insn("ldxr %w[res], [%x[addr]]")
262            insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]")
263        else:
264            assert size == 64
265            insns += fmt_insn("ldxr %x[res], [%x[addr]]")
266            insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]")
267        insns += fmt_insn("cbnz %w[scratch], 0b")
268        insns += fmt_insn("dmb ish")
269        return """
270            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
271                %(cpp_type)s res;
272                uint32_t scratch;
273                asm volatile (%(insns)s
274                    : [res] "=&r"(res), [scratch] "=&r"(scratch)
275                    : [addr] "r" (addr), [val] "r"(val)
276                    : "memory", "cc");
277                return res;
278            }""" % {
279            "cpp_type": cpp_type,
280            "fun_name": fun_name,
281            "insns": insns,
282        }
283    if cpu_arch == "arm":
284        insns = ""
285        insns += fmt_insn("dmb sy")
286        insns += fmt_insn("0:")
287        if size == 8:
288            insns += fmt_insn("ldrexb %[res], [%[addr]]")
289            insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]")
290        elif size == 16:
291            insns += fmt_insn("ldrexh %[res], [%[addr]]")
292            insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]")
293        else:
294            assert size == 32
295            insns += fmt_insn("ldrex %[res], [%[addr]]")
296            insns += fmt_insn("strex %[scratch], %[val], [%[addr]]")
297        insns += fmt_insn("cmp %[scratch], #1")
298        insns += fmt_insn("beq 0b")
299        insns += fmt_insn("dmb sy")
300        return """
301            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
302                %(cpp_type)s res;
303                uint32_t scratch;
304                asm volatile (%(insns)s
305                    : [res] "=&r"(res), [scratch] "=&r"(scratch)
306                    : [addr] "r" (addr), [val] "r"(val)
307                    : "memory", "cc");
308                return res;
309            }""" % {
310            "cpp_type": cpp_type,
311            "fun_name": fun_name,
312            "insns": insns,
313        }
314    raise Exception("Unexpected arch")
315
316
317def gen_cmpxchg(fun_name, cpp_type, size):
318    # NOTE: the assembly code must match the generated code in:
319    # - MacroAssembler::compareExchange
320    # - MacroAssembler::compareExchange64
321    if cpu_arch == "x86" and size == 64:
322        # Use a +A constraint to load `oldval` into EDX:EAX as input/output.
323        # `newval` is loaded into ECX:EBX.
324        return r"""
325            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
326                                             %(cpp_type)s oldval,
327                                             %(cpp_type)s newval) {
328                asm volatile ("lock; cmpxchg8b (%%[addr])\n\t"
329                : "+A" (oldval)
330                : [addr] "r" (addr),
331                  "b" (uint32_t(newval & 0xffff'ffff)),
332                  "c" (uint32_t(newval >> 32))
333                : "memory", "cc");
334                return oldval;
335            }""" % {
336            "cpp_type": cpp_type,
337            "fun_name": fun_name,
338        }
339    if cpu_arch == "arm" and size == 64:
340        return r"""
341            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
342                                             %(cpp_type)s oldval,
343                                             %(cpp_type)s newval) {
344                uint32_t oldval0 = oldval & 0xffff'ffff;
345                uint32_t oldval1 = oldval >> 32;
346                uint32_t newval0 = newval & 0xffff'ffff;
347                uint32_t newval1 = newval >> 32;
348                asm volatile (
349                    "dmb sy\n\t"
350                    "0: ldrexd r0, r1, [%%[addr]]\n\t"
351                    "cmp r0, %%[oldval0]\n\t"
352                    "bne 1f\n\t"
353                    "cmp r1, %%[oldval1]\n\t"
354                    "bne 1f\n\t"
355                    "mov r2, %%[newval0]\n\t"
356                    "mov r3, %%[newval1]\n\t"
357                    "strexd r4, r2, r3, [%%[addr]]\n\t"
358                    "cmp r4, #1\n\t"
359                    "beq 0b\n\t"
360                    "1: dmb sy\n\t"
361                    "mov %%[oldval0], r0\n\t"
362                    "mov %%[oldval1], r1\n\t"
363                    : [oldval0] "+&r" (oldval0), [oldval1] "+&r"(oldval1)
364                    : [addr] "r" (addr), [newval0] "r" (newval0), [newval1] "r" (newval1)
365                    : "memory", "cc", "r0", "r1", "r2", "r3", "r4");
366                return uint64_t(oldval0) | (uint64_t(oldval1) << 32);
367            }""" % {
368            "cpp_type": cpp_type,
369            "fun_name": fun_name,
370        }
371    if cpu_arch in ("x86", "x86_64"):
372        # Use a +a constraint to load `oldval` into RAX as input/output register.
373        insns = ""
374        if size == 8:
375            insns += fmt_insn("lock; cmpxchgb %[newval], (%[addr])")
376        elif size == 16:
377            insns += fmt_insn("lock; cmpxchgw %[newval], (%[addr])")
378        elif size == 32:
379            insns += fmt_insn("lock; cmpxchgl %[newval], (%[addr])")
380        else:
381            assert size == 64
382            insns += fmt_insn("lock; cmpxchgq %[newval], (%[addr])")
383        return """
384            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
385                                             %(cpp_type)s oldval,
386                                             %(cpp_type)s newval) {
387                asm volatile (%(insns)s
388                    : [oldval] "+a" (oldval)
389                    : [addr] "r" (addr), [newval] "r" (newval)
390                    : "memory", "cc");
391                return oldval;
392            }""" % {
393            "cpp_type": cpp_type,
394            "fun_name": fun_name,
395            "insns": insns,
396        }
397    if cpu_arch == "aarch64":
398        insns = ""
399        insns += fmt_insn("dmb ish")
400        insns += fmt_insn("0:")
401        if size == 8:
402            insns += fmt_insn("uxtb %w[scratch], %w[oldval]")
403            insns += fmt_insn("ldxrb %w[res], [%x[addr]]")
404            insns += fmt_insn("cmp %w[res], %w[scratch]")
405            insns += fmt_insn("b.ne 1f")
406            insns += fmt_insn("stxrb %w[scratch], %w[newval], [%x[addr]]")
407        elif size == 16:
408            insns += fmt_insn("uxth %w[scratch], %w[oldval]")
409            insns += fmt_insn("ldxrh %w[res], [%x[addr]]")
410            insns += fmt_insn("cmp %w[res], %w[scratch]")
411            insns += fmt_insn("b.ne 1f")
412            insns += fmt_insn("stxrh %w[scratch], %w[newval], [%x[addr]]")
413        elif size == 32:
414            insns += fmt_insn("mov %w[scratch], %w[oldval]")
415            insns += fmt_insn("ldxr %w[res], [%x[addr]]")
416            insns += fmt_insn("cmp %w[res], %w[scratch]")
417            insns += fmt_insn("b.ne 1f")
418            insns += fmt_insn("stxr %w[scratch], %w[newval], [%x[addr]]")
419        else:
420            assert size == 64
421            insns += fmt_insn("mov %x[scratch], %x[oldval]")
422            insns += fmt_insn("ldxr %x[res], [%x[addr]]")
423            insns += fmt_insn("cmp %x[res], %x[scratch]")
424            insns += fmt_insn("b.ne 1f")
425            insns += fmt_insn("stxr %w[scratch], %x[newval], [%x[addr]]")
426        insns += fmt_insn("cbnz %w[scratch], 0b")
427        insns += fmt_insn("1: dmb ish")
428        return """
429            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
430                                             %(cpp_type)s oldval,
431                                             %(cpp_type)s newval) {
432                %(cpp_type)s res, scratch;
433                asm volatile (%(insns)s
434                    : [res] "=&r" (res), [scratch] "=&r" (scratch)
435                    : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval)
436                    : "memory", "cc");
437                return res;
438            }""" % {
439            "cpp_type": cpp_type,
440            "fun_name": fun_name,
441            "insns": insns,
442        }
443    if cpu_arch == "arm":
444        insns = ""
445        insns += fmt_insn("dmb sy")
446        insns += fmt_insn("0:")
447        if size == 8:
448            insns += fmt_insn("uxtb %[scratch], %[oldval]")
449            insns += fmt_insn("ldrexb %[res], [%[addr]]")
450            insns += fmt_insn("cmp %[res], %[scratch]")
451            insns += fmt_insn("bne 1f")
452            insns += fmt_insn("strexb %[scratch], %[newval], [%[addr]]")
453        elif size == 16:
454            insns += fmt_insn("uxth %[scratch], %[oldval]")
455            insns += fmt_insn("ldrexh %[res], [%[addr]]")
456            insns += fmt_insn("cmp %[res], %[scratch]")
457            insns += fmt_insn("bne 1f")
458            insns += fmt_insn("strexh %[scratch], %[newval], [%[addr]]")
459        else:
460            assert size == 32
461            insns += fmt_insn("mov %[scratch], %[oldval]")
462            insns += fmt_insn("ldrex %[res], [%[addr]]")
463            insns += fmt_insn("cmp %[res], %[scratch]")
464            insns += fmt_insn("bne 1f")
465            insns += fmt_insn("strex %[scratch], %[newval], [%[addr]]")
466        insns += fmt_insn("cmp %[scratch], #1")
467        insns += fmt_insn("beq 0b")
468        insns += fmt_insn("1: dmb sy")
469        return """
470            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr,
471                                             %(cpp_type)s oldval,
472                                             %(cpp_type)s newval) {
473                %(cpp_type)s res, scratch;
474                asm volatile (%(insns)s
475                    : [res] "=&r" (res), [scratch] "=&r" (scratch)
476                    : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval)
477                    : "memory", "cc");
478                return res;
479            }""" % {
480            "cpp_type": cpp_type,
481            "fun_name": fun_name,
482            "insns": insns,
483        }
484    raise Exception("Unexpected arch")
485
486
487def gen_fetchop(fun_name, cpp_type, size, op):
488    # NOTE: the assembly code must match the generated code in:
489    # - MacroAssembler::atomicFetchOp
490    # - MacroAssembler::atomicFetchOp64 (on 64-bit platforms)
491    if cpu_arch in ("x86", "x86_64"):
492        # The `add` operation can be optimized with XADD.
493        if op == "add":
494            insns = ""
495            if size == 8:
496                insns += fmt_insn("lock; xaddb %[val], (%[addr])")
497            elif size == 16:
498                insns += fmt_insn("lock; xaddw %[val], (%[addr])")
499            elif size == 32:
500                insns += fmt_insn("lock; xaddl %[val], (%[addr])")
501            else:
502                assert size == 64
503                insns += fmt_insn("lock; xaddq %[val], (%[addr])")
504            return """
505                INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
506                    asm volatile (%(insns)s
507                        : [val] "+&r" (val)
508                        : [addr] "r" (addr)
509                        : "memory", "cc");
510                    return val;
511                }""" % {
512                "cpp_type": cpp_type,
513                "fun_name": fun_name,
514                "insns": insns,
515            }
516        # Use a +a constraint to ensure `res` is stored in RAX. This is required
517        # for the CMPXCHG instruction.
518        insns = ""
519        if size == 8:
520            insns += fmt_insn("movb (%[addr]), %[res]")
521            insns += fmt_insn("0: movb %[res], %[scratch]")
522            insns += fmt_insn("OPb %[val], %[scratch]")
523            insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])")
524        elif size == 16:
525            insns += fmt_insn("movw (%[addr]), %[res]")
526            insns += fmt_insn("0: movw %[res], %[scratch]")
527            insns += fmt_insn("OPw %[val], %[scratch]")
528            insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])")
529        elif size == 32:
530            insns += fmt_insn("movl (%[addr]), %[res]")
531            insns += fmt_insn("0: movl %[res], %[scratch]")
532            insns += fmt_insn("OPl %[val], %[scratch]")
533            insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])")
534        else:
535            assert size == 64
536            insns += fmt_insn("movq (%[addr]), %[res]")
537            insns += fmt_insn("0: movq %[res], %[scratch]")
538            insns += fmt_insn("OPq %[val], %[scratch]")
539            insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])")
540        insns = insns.replace("OP", op)
541        insns += fmt_insn("jnz 0b")
542        return """
543            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
544                %(cpp_type)s res, scratch;
545                asm volatile (%(insns)s
546                    : [res] "=&a" (res), [scratch] "=&r" (scratch)
547                    : [addr] "r" (addr), [val] "r"(val)
548                    : "memory", "cc");
549                return res;
550            }""" % {
551            "cpp_type": cpp_type,
552            "fun_name": fun_name,
553            "insns": insns,
554        }
555    if cpu_arch == "aarch64":
556        insns = ""
557        insns += fmt_insn("dmb ish")
558        insns += fmt_insn("0:")
559        if size == 8:
560            insns += fmt_insn("ldxrb %w[res], [%x[addr]]")
561            insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]")
562            insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]")
563        elif size == 16:
564            insns += fmt_insn("ldxrh %w[res], [%x[addr]]")
565            insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]")
566            insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]")
567        elif size == 32:
568            insns += fmt_insn("ldxr %w[res], [%x[addr]]")
569            insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]")
570            insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]")
571        else:
572            assert size == 64
573            insns += fmt_insn("ldxr %x[res], [%x[addr]]")
574            insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]")
575            insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]")
576        cpu_op = op
577        if cpu_op == "or":
578            cpu_op = "orr"
579        if cpu_op == "xor":
580            cpu_op = "eor"
581        insns = insns.replace("OP", cpu_op)
582        insns += fmt_insn("cbnz %w[scratch2], 0b")
583        insns += fmt_insn("dmb ish")
584        return """
585            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
586                %(cpp_type)s res;
587                uintptr_t scratch1, scratch2;
588                asm volatile (%(insns)s
589                    : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2)
590                    : [addr] "r" (addr), [val] "r"(val)
591                    : "memory", "cc");
592                return res;
593            }""" % {
594            "cpp_type": cpp_type,
595            "fun_name": fun_name,
596            "insns": insns,
597        }
598    if cpu_arch == "arm":
599        insns = ""
600        insns += fmt_insn("dmb sy")
601        insns += fmt_insn("0:")
602        if size == 8:
603            insns += fmt_insn("ldrexb %[res], [%[addr]]")
604            insns += fmt_insn("OP %[scratch1], %[res], %[val]")
605            insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]")
606        elif size == 16:
607            insns += fmt_insn("ldrexh %[res], [%[addr]]")
608            insns += fmt_insn("OP %[scratch1], %[res], %[val]")
609            insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]")
610        else:
611            assert size == 32
612            insns += fmt_insn("ldrex %[res], [%[addr]]")
613            insns += fmt_insn("OP %[scratch1], %[res], %[val]")
614            insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]")
615        cpu_op = op
616        if cpu_op == "or":
617            cpu_op = "orr"
618        if cpu_op == "xor":
619            cpu_op = "eor"
620        insns = insns.replace("OP", cpu_op)
621        insns += fmt_insn("cmp %[scratch2], #1")
622        insns += fmt_insn("beq 0b")
623        insns += fmt_insn("dmb sy")
624        return """
625            INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) {
626                %(cpp_type)s res;
627                uintptr_t scratch1, scratch2;
628                asm volatile (%(insns)s
629                    : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2)
630                    : [addr] "r" (addr), [val] "r"(val)
631                    : "memory", "cc");
632                return res;
633            }""" % {
634            "cpp_type": cpp_type,
635            "fun_name": fun_name,
636            "insns": insns,
637        }
638    raise Exception("Unexpected arch")
639
640
641def gen_copy(fun_name, cpp_type, size, unroll, direction):
642    assert direction in ("down", "up")
643    offset = 0
644    if direction == "up":
645        offset = unroll - 1
646    insns = ""
647    for i in range(unroll):
648        if cpu_arch in ("x86", "x86_64"):
649            if size == 1:
650                insns += fmt_insn("movb OFFSET(%[src]), %[scratch]")
651                insns += fmt_insn("movb %[scratch], OFFSET(%[dst])")
652            elif size == 4:
653                insns += fmt_insn("movl OFFSET(%[src]), %[scratch]")
654                insns += fmt_insn("movl %[scratch], OFFSET(%[dst])")
655            else:
656                assert size == 8
657                insns += fmt_insn("movq OFFSET(%[src]), %[scratch]")
658                insns += fmt_insn("movq %[scratch], OFFSET(%[dst])")
659        elif cpu_arch == "aarch64":
660            if size == 1:
661                insns += fmt_insn("ldrb %w[scratch], [%x[src], OFFSET]")
662                insns += fmt_insn("strb %w[scratch], [%x[dst], OFFSET]")
663            else:
664                assert size == 8
665                insns += fmt_insn("ldr %x[scratch], [%x[src], OFFSET]")
666                insns += fmt_insn("str %x[scratch], [%x[dst], OFFSET]")
667        elif cpu_arch == "arm":
668            if size == 1:
669                insns += fmt_insn("ldrb %[scratch], [%[src], #OFFSET]")
670                insns += fmt_insn("strb %[scratch], [%[dst], #OFFSET]")
671            else:
672                assert size == 4
673                insns += fmt_insn("ldr %[scratch], [%[src], #OFFSET]")
674                insns += fmt_insn("str %[scratch], [%[dst], #OFFSET]")
675        else:
676            raise Exception("Unexpected arch")
677        insns = insns.replace("OFFSET", str(offset * size))
678
679        if direction == "down":
680            offset += 1
681        else:
682            offset -= 1
683
684    return """
685        INLINE_ATTR void %(fun_name)s(uint8_t* dst, const uint8_t* src) {
686            %(cpp_type)s* dst_ = reinterpret_cast<%(cpp_type)s*>(dst);
687            const %(cpp_type)s* src_ = reinterpret_cast<const %(cpp_type)s*>(src);
688            %(cpp_type)s scratch;
689            asm volatile (%(insns)s
690                : [scratch] "=&r" (scratch)
691                : [dst] "r" (dst_), [src] "r"(src_)
692                : "memory");
693        }""" % {
694        "cpp_type": cpp_type,
695        "fun_name": fun_name,
696        "insns": insns,
697    }
698
699
700HEADER_TEMPLATE = """\
701/* This Source Code Form is subject to the terms of the Mozilla Public
702 * License, v. 2.0. If a copy of the MPL was not distributed with this
703 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
704
705#ifndef jit_AtomicOperationsGenerated_h
706#define jit_AtomicOperationsGenerated_h
707
708/* This file is generated by jit/GenerateAtomicOperations.py. Do not edit! */
709
710namespace js {
711namespace jit {
712
713%(contents)s
714
715} // namespace jit
716} // namespace js
717
718#endif // jit_AtomicOperationsGenerated_h
719"""
720
721
722def generate_atomics_header(c_out):
723    contents = ""
724    if cpu_arch in ("x86", "x86_64", "arm", "aarch64"):
725        contents += "#define JS_HAVE_GENERATED_ATOMIC_OPS 1"
726
727        # `fence` performs a full memory barrier.
728        contents += gen_seqcst("AtomicFenceSeqCst")
729
730        contents += gen_load("AtomicLoad8SeqCst", "uint8_t", 8, True)
731        contents += gen_load("AtomicLoad16SeqCst", "uint16_t", 16, True)
732        contents += gen_load("AtomicLoad32SeqCst", "uint32_t", 32, True)
733        if is_64bit:
734            contents += gen_load("AtomicLoad64SeqCst", "uint64_t", 64, True)
735
736        # These are access-atomic up to sizeof(uintptr_t).
737        contents += gen_load("AtomicLoad8Unsynchronized", "uint8_t", 8, False)
738        contents += gen_load("AtomicLoad16Unsynchronized", "uint16_t", 16, False)
739        contents += gen_load("AtomicLoad32Unsynchronized", "uint32_t", 32, False)
740        if is_64bit:
741            contents += gen_load("AtomicLoad64Unsynchronized", "uint64_t", 64, False)
742
743        contents += gen_store("AtomicStore8SeqCst", "uint8_t", 8, True)
744        contents += gen_store("AtomicStore16SeqCst", "uint16_t", 16, True)
745        contents += gen_store("AtomicStore32SeqCst", "uint32_t", 32, True)
746        if is_64bit:
747            contents += gen_store("AtomicStore64SeqCst", "uint64_t", 64, True)
748
749        # These are access-atomic up to sizeof(uintptr_t).
750        contents += gen_store("AtomicStore8Unsynchronized", "uint8_t", 8, False)
751        contents += gen_store("AtomicStore16Unsynchronized", "uint16_t", 16, False)
752        contents += gen_store("AtomicStore32Unsynchronized", "uint32_t", 32, False)
753        if is_64bit:
754            contents += gen_store("AtomicStore64Unsynchronized", "uint64_t", 64, False)
755
756        # `exchange` takes a cell address and a value.  It stores it in the cell and
757        # returns the value previously in the cell.
758        contents += gen_exchange("AtomicExchange8SeqCst", "uint8_t", 8)
759        contents += gen_exchange("AtomicExchange16SeqCst", "uint16_t", 16)
760        contents += gen_exchange("AtomicExchange32SeqCst", "uint32_t", 32)
761        if is_64bit:
762            contents += gen_exchange("AtomicExchange64SeqCst", "uint64_t", 64)
763
764        # `cmpxchg` takes a cell address, an expected value and a replacement value.
765        # If the value in the cell equals the expected value then the replacement value
766        # is stored in the cell.  It always returns the value previously in the cell.
767        contents += gen_cmpxchg("AtomicCmpXchg8SeqCst", "uint8_t", 8)
768        contents += gen_cmpxchg("AtomicCmpXchg16SeqCst", "uint16_t", 16)
769        contents += gen_cmpxchg("AtomicCmpXchg32SeqCst", "uint32_t", 32)
770        contents += gen_cmpxchg("AtomicCmpXchg64SeqCst", "uint64_t", 64)
771
772        # `add` adds a value atomically to the cell and returns the old value in the
773        # cell.  (There is no `sub`; just add the negated value.)
774        contents += gen_fetchop("AtomicAdd8SeqCst", "uint8_t", 8, "add")
775        contents += gen_fetchop("AtomicAdd16SeqCst", "uint16_t", 16, "add")
776        contents += gen_fetchop("AtomicAdd32SeqCst", "uint32_t", 32, "add")
777        if is_64bit:
778            contents += gen_fetchop("AtomicAdd64SeqCst", "uint64_t", 64, "add")
779
780        # `and` bitwise-ands a value atomically into the cell and returns the old value
781        # in the cell.
782        contents += gen_fetchop("AtomicAnd8SeqCst", "uint8_t", 8, "and")
783        contents += gen_fetchop("AtomicAnd16SeqCst", "uint16_t", 16, "and")
784        contents += gen_fetchop("AtomicAnd32SeqCst", "uint32_t", 32, "and")
785        if is_64bit:
786            contents += gen_fetchop("AtomicAnd64SeqCst", "uint64_t", 64, "and")
787
788        # `or` bitwise-ors a value atomically into the cell and returns the old value
789        # in the cell.
790        contents += gen_fetchop("AtomicOr8SeqCst", "uint8_t", 8, "or")
791        contents += gen_fetchop("AtomicOr16SeqCst", "uint16_t", 16, "or")
792        contents += gen_fetchop("AtomicOr32SeqCst", "uint32_t", 32, "or")
793        if is_64bit:
794            contents += gen_fetchop("AtomicOr64SeqCst", "uint64_t", 64, "or")
795
796        # `xor` bitwise-xors a value atomically into the cell and returns the old value
797        # in the cell.
798        contents += gen_fetchop("AtomicXor8SeqCst", "uint8_t", 8, "xor")
799        contents += gen_fetchop("AtomicXor16SeqCst", "uint16_t", 16, "xor")
800        contents += gen_fetchop("AtomicXor32SeqCst", "uint32_t", 32, "xor")
801        if is_64bit:
802            contents += gen_fetchop("AtomicXor64SeqCst", "uint64_t", 64, "xor")
803
804        # See comment in jit/AtomicOperations-shared-jit.cpp for an explanation.
805        wordsize = 8 if is_64bit else 4
806        words_in_block = 8
807        blocksize = words_in_block * wordsize
808
809        contents += gen_copy(
810            "AtomicCopyUnalignedBlockDownUnsynchronized",
811            "uint8_t",
812            1,
813            blocksize,
814            "down",
815        )
816        contents += gen_copy(
817            "AtomicCopyUnalignedBlockUpUnsynchronized", "uint8_t", 1, blocksize, "up"
818        )
819
820        contents += gen_copy(
821            "AtomicCopyUnalignedWordDownUnsynchronized", "uint8_t", 1, wordsize, "down"
822        )
823        contents += gen_copy(
824            "AtomicCopyUnalignedWordUpUnsynchronized", "uint8_t", 1, wordsize, "up"
825        )
826
827        contents += gen_copy(
828            "AtomicCopyBlockDownUnsynchronized",
829            "uintptr_t",
830            wordsize,
831            words_in_block,
832            "down",
833        )
834        contents += gen_copy(
835            "AtomicCopyBlockUpUnsynchronized",
836            "uintptr_t",
837            wordsize,
838            words_in_block,
839            "up",
840        )
841
842        contents += gen_copy(
843            "AtomicCopyWordUnsynchronized", "uintptr_t", wordsize, 1, "down"
844        )
845        contents += gen_copy("AtomicCopyByteUnsynchronized", "uint8_t", 1, 1, "down")
846
847        contents += "\n"
848        contents += (
849            "constexpr size_t JS_GENERATED_ATOMICS_BLOCKSIZE = "
850            + str(blocksize)
851            + ";\n"
852        )
853        contents += (
854            "constexpr size_t JS_GENERATED_ATOMICS_WORDSIZE = " + str(wordsize) + ";\n"
855        )
856
857        # Work around a GCC issue on 32-bit x86 by adding MOZ_NEVER_INLINE.
858        # See bug 1756347.
859        if is_gcc and cpu_arch == "x86":
860            contents = contents.replace("INLINE_ATTR", "MOZ_NEVER_INLINE inline")
861        else:
862            contents = contents.replace("INLINE_ATTR", "inline")
863
864    c_out.write(
865        HEADER_TEMPLATE
866        % {
867            "contents": contents,
868        }
869    )
870