1;; GCC machine description for i386 synchronization instructions.
2;; Copyright (C) 2005-2021 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_c_enum "unspec" [
21  UNSPEC_LFENCE
22  UNSPEC_SFENCE
23  UNSPEC_MFENCE
24
25  UNSPEC_FILD_ATOMIC
26  UNSPEC_FIST_ATOMIC
27
28  UNSPEC_LDX_ATOMIC
29  UNSPEC_STX_ATOMIC
30
31  ;; __atomic support
32  UNSPEC_LDA
33  UNSPEC_STA
34])
35
36(define_c_enum "unspecv" [
37  UNSPECV_CMPXCHG
38  UNSPECV_XCHG
39  UNSPECV_LOCK
40])
41
42(define_expand "sse2_lfence"
43  [(set (match_dup 0)
44	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
45  "TARGET_SSE2"
46{
47  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
48  MEM_VOLATILE_P (operands[0]) = 1;
49})
50
51(define_insn "*sse2_lfence"
52  [(set (match_operand:BLK 0)
53	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
54  "TARGET_SSE2"
55  "lfence"
56  [(set_attr "type" "sse")
57   (set_attr "length_address" "0")
58   (set_attr "atom_sse_attr" "lfence")
59   (set_attr "memory" "unknown")])
60
61(define_expand "sse_sfence"
62  [(set (match_dup 0)
63	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
64  "TARGET_SSE || TARGET_3DNOW_A"
65{
66  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
67  MEM_VOLATILE_P (operands[0]) = 1;
68})
69
70(define_insn "*sse_sfence"
71  [(set (match_operand:BLK 0)
72	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
73  "TARGET_SSE || TARGET_3DNOW_A"
74  "sfence"
75  [(set_attr "type" "sse")
76   (set_attr "length_address" "0")
77   (set_attr "atom_sse_attr" "fence")
78   (set_attr "memory" "unknown")])
79
80(define_expand "sse2_mfence"
81  [(set (match_dup 0)
82	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
83  "TARGET_SSE2"
84{
85  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
86  MEM_VOLATILE_P (operands[0]) = 1;
87})
88
89(define_insn "mfence_sse2"
90  [(set (match_operand:BLK 0)
91	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
92  "TARGET_64BIT || TARGET_SSE2"
93  "mfence"
94  [(set_attr "type" "sse")
95   (set_attr "length_address" "0")
96   (set_attr "atom_sse_attr" "fence")
97   (set_attr "memory" "unknown")])
98
99(define_insn "mfence_nosse"
100  [(set (match_operand:BLK 0)
101	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
102   (clobber (reg:CC FLAGS_REG))]
103  ""
104{
105  rtx mem = gen_rtx_MEM (word_mode, stack_pointer_rtx);
106
107  output_asm_insn ("lock{%;} or%z0\t{$0, %0|%0, 0}", &mem);
108  return "";
109}
110  [(set_attr "memory" "unknown")])
111
112(define_expand "mem_thread_fence"
113  [(match_operand:SI 0 "const_int_operand")]		;; model
114  ""
115{
116  enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
117
118  /* Unless this is a SEQ_CST fence, the i386 memory model is strong
119     enough not to require barriers of any kind.  */
120  if (is_mm_seq_cst (model))
121    {
122      rtx (*mfence_insn)(rtx);
123      rtx mem;
124
125      if ((TARGET_64BIT || TARGET_SSE2)
126	  && (optimize_function_for_size_p (cfun)
127	      || !TARGET_AVOID_MFENCE))
128	mfence_insn = gen_mfence_sse2;
129      else
130	mfence_insn = gen_mfence_nosse;
131
132      mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
133      MEM_VOLATILE_P (mem) = 1;
134
135      emit_insn (mfence_insn (mem));
136    }
137  DONE;
138})
139
140;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations,
141;; Only beginning at Pentium family processors do we get any guarantee of
142;; atomicity in aligned 64-bit quantities.  Beginning at P6, we get a
143;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
144;;
145;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
146;;
147;; Importantly, *no* processor makes atomicity guarantees for larger
148;; accesses.  In particular, there's no way to perform an atomic TImode
149;; move, despite the apparent applicability of MOVDQA et al.
150
151(define_mode_iterator ATOMIC
152   [QI HI SI
153    (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
154   ])
155
156(define_expand "atomic_load<mode>"
157  [(set (match_operand:ATOMIC 0 "nonimmediate_operand")
158	(unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand")
159			(match_operand:SI 2 "const_int_operand")]
160		       UNSPEC_LDA))]
161  ""
162{
163  /* For DImode on 32-bit, we can use the FPU to perform the load.  */
164  if (<MODE>mode == DImode && !TARGET_64BIT)
165    emit_insn (gen_atomic_loaddi_fpu
166	       (operands[0], operands[1],
167	        assign_386_stack_local (DImode, SLOT_TEMP)));
168  else
169    {
170      rtx dst = operands[0];
171
172      if (MEM_P (dst))
173	dst = gen_reg_rtx (<MODE>mode);
174
175      emit_move_insn (dst, operands[1]);
176
177      /* Fix up the destination if needed.  */
178      if (dst != operands[0])
179	emit_move_insn (operands[0], dst);
180    }
181  DONE;
182})
183
184(define_insn_and_split "atomic_loaddi_fpu"
185  [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
186	(unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
187		   UNSPEC_LDA))
188   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
189   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
190  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
191  "#"
192  "&& reload_completed"
193  [(const_int 0)]
194{
195  rtx dst = operands[0], src = operands[1];
196  rtx mem = operands[2], tmp = operands[3];
197
198  if (SSE_REG_P (dst))
199    emit_move_insn (dst, src);
200  else
201    {
202      if (MEM_P (dst))
203	mem = dst;
204
205      if (STACK_REG_P (tmp))
206        {
207	  emit_insn (gen_loaddi_via_fpu (tmp, src));
208	  emit_insn (gen_storedi_via_fpu (mem, tmp));
209	}
210      else
211	{
212	  emit_insn (gen_loaddi_via_sse (tmp, src));
213	  emit_insn (gen_storedi_via_sse (mem, tmp));
214	}
215
216      if (mem != dst)
217	emit_move_insn (dst, mem);
218    }
219  DONE;
220})
221
222(define_expand "atomic_store<mode>"
223  [(set (match_operand:ATOMIC 0 "memory_operand")
224	(unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
225			(match_operand:SI 2 "const_int_operand")]
226		       UNSPEC_STA))]
227  ""
228{
229  enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
230
231  if (<MODE>mode == DImode && !TARGET_64BIT)
232    {
233      /* For DImode on 32-bit, we can use the FPU to perform the store.  */
234      /* Note that while we could perform a cmpxchg8b loop, that turns
235	 out to be significantly larger than this plus a barrier.  */
236      emit_insn (gen_atomic_storedi_fpu
237		 (operands[0], operands[1],
238	          assign_386_stack_local (DImode, SLOT_TEMP)));
239    }
240  else
241    {
242      operands[1] = force_reg (<MODE>mode, operands[1]);
243
244      /* For seq-cst stores, use XCHG when we lack MFENCE.  */
245      if (is_mm_seq_cst (model)
246	  && (!(TARGET_64BIT || TARGET_SSE2)
247	      || TARGET_AVOID_MFENCE))
248	{
249	  emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
250						operands[0], operands[1],
251						operands[2]));
252	  DONE;
253	}
254
255      /* Otherwise use a store.  */
256      emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1],
257					   operands[2]));
258    }
259  /* ... followed by an MFENCE, if required.  */
260  if (is_mm_seq_cst (model))
261    emit_insn (gen_mem_thread_fence (operands[2]));
262  DONE;
263})
264
265(define_insn "atomic_store<mode>_1"
266  [(set (match_operand:SWI 0 "memory_operand" "=m")
267	(unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>")
268		     (match_operand:SI 2 "const_int_operand")]
269		    UNSPEC_STA))]
270  ""
271  "%K2mov{<imodesuffix>}\t{%1, %0|%0, %1}")
272
273(define_insn_and_split "atomic_storedi_fpu"
274  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
275	(unspec:DI [(match_operand:DI 1 "nonimmediate_operand" "x,m,?r")]
276		   UNSPEC_STA))
277   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
278   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
279  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
280  "#"
281  "&& reload_completed"
282  [(const_int 0)]
283{
284  rtx dst = operands[0], src = operands[1];
285  rtx mem = operands[2], tmp = operands[3];
286
287  if (SSE_REG_P (src))
288    emit_move_insn (dst, src);
289  else
290    {
291      if (REG_P (src))
292	{
293	  emit_move_insn (mem, src);
294	  src = mem;
295	}
296
297      if (STACK_REG_P (tmp))
298	{
299	  emit_insn (gen_loaddi_via_fpu (tmp, src));
300	  emit_insn (gen_storedi_via_fpu (dst, tmp));
301	}
302      else
303	{
304	  emit_insn (gen_loaddi_via_sse (tmp, src));
305	  emit_insn (gen_storedi_via_sse (dst, tmp));
306	}
307    }
308  DONE;
309})
310
311;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
312;; operations.  But the fix_trunc patterns want way more setup than we want
313;; to provide.  Note that the scratch is DFmode instead of XFmode in order
314;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
315
316(define_insn "loaddi_via_fpu"
317  [(set (match_operand:DF 0 "register_operand" "=f")
318	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
319		   UNSPEC_FILD_ATOMIC))]
320  "TARGET_80387"
321  "fild%Z1\t%1"
322  [(set_attr "type" "fmov")
323   (set_attr "mode" "DF")
324   (set_attr "fp_int_src" "true")])
325
326(define_insn "storedi_via_fpu"
327  [(set (match_operand:DI 0 "memory_operand" "=m")
328	(unspec:DI [(match_operand:DF 1 "register_operand" "f")]
329		   UNSPEC_FIST_ATOMIC))]
330  "TARGET_80387"
331{
332  gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX);
333
334  return "fistp%Z0\t%0";
335}
336  [(set_attr "type" "fmov")
337   (set_attr "mode" "DI")])
338
339(define_insn "loaddi_via_sse"
340  [(set (match_operand:DF 0 "register_operand" "=x")
341	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
342		   UNSPEC_LDX_ATOMIC))]
343  "TARGET_SSE"
344{
345  if (TARGET_SSE2)
346    return "%vmovq\t{%1, %0|%0, %1}";
347  return "movlps\t{%1, %0|%0, %1}";
348}
349  [(set_attr "type" "ssemov")
350   (set_attr "mode" "DI")])
351
352(define_insn "storedi_via_sse"
353  [(set (match_operand:DI 0 "memory_operand" "=m")
354	(unspec:DI [(match_operand:DF 1 "register_operand" "x")]
355		   UNSPEC_STX_ATOMIC))]
356  "TARGET_SSE"
357{
358  if (TARGET_SSE2)
359    return "%vmovq\t{%1, %0|%0, %1}";
360  return "movlps\t{%1, %0|%0, %1}";
361}
362  [(set_attr "type" "ssemov")
363   (set_attr "mode" "DI")])
364
365(define_expand "atomic_compare_and_swap<mode>"
366  [(match_operand:QI 0 "register_operand")	;; bool success output
367   (match_operand:SWI124 1 "register_operand")	;; oldval output
368   (match_operand:SWI124 2 "memory_operand")	;; memory
369   (match_operand:SWI124 3 "register_operand")	;; expected input
370   (match_operand:SWI124 4 "register_operand")	;; newval input
371   (match_operand:SI 5 "const_int_operand")	;; is_weak
372   (match_operand:SI 6 "const_int_operand")	;; success model
373   (match_operand:SI 7 "const_int_operand")]	;; failure model
374  "TARGET_CMPXCHG"
375{
376  emit_insn
377   (gen_atomic_compare_and_swap<mode>_1
378    (operands[1], operands[2], operands[3], operands[4], operands[6]));
379  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
380		     const0_rtx);
381  DONE;
382})
383
384(define_mode_iterator CASMODE
385  [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
386   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
387(define_mode_attr CASHMODE [(DI "SI") (TI "DI")])
388
389(define_expand "atomic_compare_and_swap<mode>"
390  [(match_operand:QI 0 "register_operand")	;; bool success output
391   (match_operand:CASMODE 1 "register_operand")	;; oldval output
392   (match_operand:CASMODE 2 "memory_operand")	;; memory
393   (match_operand:CASMODE 3 "register_operand")	;; expected input
394   (match_operand:CASMODE 4 "register_operand")	;; newval input
395   (match_operand:SI 5 "const_int_operand")	;; is_weak
396   (match_operand:SI 6 "const_int_operand")	;; success model
397   (match_operand:SI 7 "const_int_operand")]	;; failure model
398  "TARGET_CMPXCHG"
399{
400  if (<MODE>mode == DImode && TARGET_64BIT)
401    {
402      emit_insn
403       (gen_atomic_compare_and_swapdi_1
404	(operands[1], operands[2], operands[3], operands[4], operands[6]));
405    }
406  else
407    {
408      machine_mode hmode = <CASHMODE>mode;
409
410      emit_insn
411       (gen_atomic_compare_and_swap<mode>_doubleword
412        (operands[1], operands[2], operands[3],
413	 gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]),
414	 operands[6]));
415    }
416
417  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
418		     const0_rtx);
419  DONE;
420})
421
422;; For double-word compare and swap, we are obliged to play tricks with
423;; the input newval (op3:op4) because the Intel register numbering does
424;; not match the gcc register numbering, so the pair must be CX:BX.
425
426(define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
427
428(define_insn "atomic_compare_and_swap<dwi>_doubleword"
429  [(set (match_operand:<DWI> 0 "register_operand" "=A")
430	(unspec_volatile:<DWI>
431	  [(match_operand:<DWI> 1 "memory_operand" "+m")
432	   (match_operand:<DWI> 2 "register_operand" "0")
433	   (match_operand:DWIH 3 "register_operand" "b")
434	   (match_operand:DWIH 4 "register_operand" "c")
435	   (match_operand:SI 5 "const_int_operand")]
436	  UNSPECV_CMPXCHG))
437   (set (match_dup 1)
438	(unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
439   (set (reg:CCZ FLAGS_REG)
440        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
441  "TARGET_CMPXCHG<doublemodesuffix>B"
442  "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1")
443
444(define_insn "atomic_compare_and_swap<mode>_1"
445  [(set (match_operand:SWI 0 "register_operand" "=a")
446	(unspec_volatile:SWI
447	  [(match_operand:SWI 1 "memory_operand" "+m")
448	   (match_operand:SWI 2 "register_operand" "0")
449	   (match_operand:SWI 3 "register_operand" "<r>")
450	   (match_operand:SI 4 "const_int_operand")]
451	  UNSPECV_CMPXCHG))
452   (set (match_dup 1)
453	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
454   (set (reg:CCZ FLAGS_REG)
455        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
456  "TARGET_CMPXCHG"
457  "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
458
459(define_peephole2
460  [(set (match_operand:SWI 0 "register_operand")
461	(match_operand:SWI 1 "general_operand"))
462   (parallel [(set (match_dup 0)
463		   (unspec_volatile:SWI
464		     [(match_operand:SWI 2 "memory_operand")
465		      (match_dup 0)
466		      (match_operand:SWI 3 "register_operand")
467		      (match_operand:SI 4 "const_int_operand")]
468		     UNSPECV_CMPXCHG))
469	      (set (match_dup 2)
470		   (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
471	      (set (reg:CCZ FLAGS_REG)
472		   (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])
473   (set (reg:CCZ FLAGS_REG)
474	(compare:CCZ (match_operand:SWI 5 "register_operand")
475		     (match_operand:SWI 6 "general_operand")))]
476  "(rtx_equal_p (operands[0], operands[5])
477    && rtx_equal_p (operands[1], operands[6]))
478   || (rtx_equal_p (operands[0], operands[6])
479       && rtx_equal_p (operands[1], operands[5]))"
480  [(set (match_dup 0)
481	(match_dup 1))
482   (parallel [(set (match_dup 0)
483		   (unspec_volatile:SWI
484		     [(match_dup 2)
485		      (match_dup 0)
486		      (match_dup 3)
487		      (match_dup 4)]
488		     UNSPECV_CMPXCHG))
489	      (set (match_dup 2)
490		   (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
491	      (set (reg:CCZ FLAGS_REG)
492		   (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
493
494(define_peephole2
495  [(parallel [(set (match_operand:SWI48 0 "register_operand")
496		   (match_operand:SWI48 1 "const_int_operand"))
497	      (clobber (reg:CC FLAGS_REG))])
498   (parallel [(set (match_operand:SWI 2 "register_operand")
499		   (unspec_volatile:SWI
500		     [(match_operand:SWI 3 "memory_operand")
501		      (match_dup 2)
502		      (match_operand:SWI 4 "register_operand")
503		      (match_operand:SI 5 "const_int_operand")]
504		     UNSPECV_CMPXCHG))
505	      (set (match_dup 3)
506		   (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
507	      (set (reg:CCZ FLAGS_REG)
508		   (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])
509   (set (reg:CCZ FLAGS_REG)
510	(compare:CCZ (match_dup 2)
511		     (match_dup 1)))]
512  "REGNO (operands[0]) == REGNO (operands[2])"
513  [(parallel [(set (match_dup 0)
514		   (match_dup 1))
515	      (clobber (reg:CC FLAGS_REG))])
516   (parallel [(set (match_dup 2)
517		   (unspec_volatile:SWI
518		     [(match_dup 3)
519		      (match_dup 2)
520		      (match_dup 4)
521		      (match_dup 5)]
522		     UNSPECV_CMPXCHG))
523	      (set (match_dup 3)
524		   (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
525	      (set (reg:CCZ FLAGS_REG)
526		   (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
527
528(define_expand "atomic_fetch_<logic><mode>"
529  [(match_operand:SWI124 0 "register_operand")
530   (any_logic:SWI124
531    (match_operand:SWI124 1 "memory_operand")
532    (match_operand:SWI124 2 "register_operand"))
533   (match_operand:SI 3 "const_int_operand")]
534  "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
535{
536  ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
537				    operands[2], <CODE>, false,
538				    false);
539  DONE;
540})
541
542(define_expand "atomic_<logic>_fetch<mode>"
543  [(match_operand:SWI124 0 "register_operand")
544   (any_logic:SWI124
545    (match_operand:SWI124 1 "memory_operand")
546    (match_operand:SWI124 2 "register_operand"))
547   (match_operand:SI 3 "const_int_operand")]
548  "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
549{
550  ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
551				    operands[2], <CODE>, true,
552				    false);
553  DONE;
554})
555
556(define_expand "atomic_fetch_nand<mode>"
557  [(match_operand:SWI124 0 "register_operand")
558   (match_operand:SWI124 1 "memory_operand")
559   (match_operand:SWI124 2 "register_operand")
560   (match_operand:SI 3 "const_int_operand")]
561  "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
562{
563  ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
564				    operands[2], NOT, false,
565				    false);
566  DONE;
567})
568
569(define_expand "atomic_nand_fetch<mode>"
570  [(match_operand:SWI124 0 "register_operand")
571   (match_operand:SWI124 1 "memory_operand")
572   (match_operand:SWI124 2 "register_operand")
573   (match_operand:SI 3 "const_int_operand")]
574  "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
575{
576  ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
577				    operands[2], NOT, true,
578				    false);
579  DONE;
580})
581
582(define_expand "atomic_fetch_<logic><mode>"
583  [(match_operand:CASMODE 0 "register_operand")
584   (any_logic:CASMODE
585    (match_operand:CASMODE 1 "memory_operand")
586    (match_operand:CASMODE 2 "register_operand"))
587   (match_operand:SI 3 "const_int_operand")]
588  "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
589{
590  bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
591		    || (<MODE>mode == TImode);
592  ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
593				    operands[2], <CODE>, false,
594				    doubleword);
595  DONE;
596})
597
598(define_expand "atomic_<logic>_fetch<mode>"
599  [(match_operand:CASMODE 0 "register_operand")
600   (any_logic:CASMODE
601    (match_operand:CASMODE 1 "memory_operand")
602    (match_operand:CASMODE 2 "register_operand"))
603   (match_operand:SI 3 "const_int_operand")]
604  "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
605{
606  bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
607		    || (<MODE>mode == TImode);
608  ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
609				    operands[2], <CODE>, true,
610				    doubleword);
611  DONE;
612})
613
614(define_expand "atomic_fetch_nand<mode>"
615  [(match_operand:CASMODE 0 "register_operand")
616   (match_operand:CASMODE 1 "memory_operand")
617   (match_operand:CASMODE 2 "register_operand")
618   (match_operand:SI 3 "const_int_operand")]
619  "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
620{
621  bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
622		    || (<MODE>mode == TImode);
623  ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
624				    operands[2], NOT, false,
625				    doubleword);
626  DONE;
627})
628
629(define_expand "atomic_nand_fetch<mode>"
630  [(match_operand:CASMODE 0 "register_operand")
631   (match_operand:CASMODE 1 "memory_operand")
632   (match_operand:CASMODE 2 "register_operand")
633   (match_operand:SI 3 "const_int_operand")]
634  "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
635{
636  bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
637		    || (<MODE>mode == TImode);
638  ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
639				    operands[2], NOT, true,
640				    doubleword);
641  DONE;
642})
643
644
645;; For operand 2 nonmemory_operand predicate is used instead of
646;; register_operand to allow combiner to better optimize atomic
647;; additions of constants.
648(define_insn "atomic_fetch_add<mode>"
649  [(set (match_operand:SWI 0 "register_operand" "=<r>")
650	(unspec_volatile:SWI
651	  [(match_operand:SWI 1 "memory_operand" "+m")
652	   (match_operand:SI 3 "const_int_operand")]		;; model
653	  UNSPECV_XCHG))
654   (set (match_dup 1)
655	(plus:SWI (match_dup 1)
656		  (match_operand:SWI 2 "nonmemory_operand" "0")))
657   (clobber (reg:CC FLAGS_REG))]
658  "TARGET_XADD"
659  "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
660
661;; This peephole2 and following insn optimize
662;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
663;; followed by testing of flags instead of lock xadd and comparisons.
664(define_peephole2
665  [(set (match_operand:SWI 0 "register_operand")
666	(match_operand:SWI 2 "const_int_operand"))
667   (parallel [(set (match_dup 0)
668		   (unspec_volatile:SWI
669		     [(match_operand:SWI 1 "memory_operand")
670		      (match_operand:SI 4 "const_int_operand")]
671		     UNSPECV_XCHG))
672	      (set (match_dup 1)
673		   (plus:SWI (match_dup 1)
674			     (match_dup 0)))
675	      (clobber (reg:CC FLAGS_REG))])
676   (set (reg:CCZ FLAGS_REG)
677	(compare:CCZ (match_dup 0)
678		     (match_operand:SWI 3 "const_int_operand")))]
679  "peep2_reg_dead_p (3, operands[0])
680   && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
681      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
682   && !reg_overlap_mentioned_p (operands[0], operands[1])"
683  [(parallel [(set (reg:CCZ FLAGS_REG)
684		   (compare:CCZ
685		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
686					  UNSPECV_XCHG)
687		     (match_dup 3)))
688	      (set (match_dup 1)
689		   (plus:SWI (match_dup 1)
690			     (match_dup 2)))])])
691
692;; Likewise, but for the -Os special case of *mov<mode>_or.
693(define_peephole2
694  [(parallel [(set (match_operand:SWI 0 "register_operand")
695		   (match_operand:SWI 2 "constm1_operand"))
696	      (clobber (reg:CC FLAGS_REG))])
697   (parallel [(set (match_dup 0)
698		   (unspec_volatile:SWI
699		     [(match_operand:SWI 1 "memory_operand")
700		      (match_operand:SI 4 "const_int_operand")]
701		     UNSPECV_XCHG))
702	      (set (match_dup 1)
703		   (plus:SWI (match_dup 1)
704			     (match_dup 0)))
705	      (clobber (reg:CC FLAGS_REG))])
706   (set (reg:CCZ FLAGS_REG)
707	(compare:CCZ (match_dup 0)
708		     (match_operand:SWI 3 "const_int_operand")))]
709  "peep2_reg_dead_p (3, operands[0])
710   && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
711      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
712   && !reg_overlap_mentioned_p (operands[0], operands[1])"
713  [(parallel [(set (reg:CCZ FLAGS_REG)
714		   (compare:CCZ
715		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
716					  UNSPECV_XCHG)
717		     (match_dup 3)))
718	      (set (match_dup 1)
719		   (plus:SWI (match_dup 1)
720			     (match_dup 2)))])])
721
722(define_insn "*atomic_fetch_add_cmp<mode>"
723  [(set (reg:CCZ FLAGS_REG)
724	(compare:CCZ
725	  (unspec_volatile:SWI
726	    [(match_operand:SWI 0 "memory_operand" "+m")
727	     (match_operand:SI 3 "const_int_operand")]		;; model
728	    UNSPECV_XCHG)
729	  (match_operand:SWI 2 "const_int_operand" "i")))
730   (set (match_dup 0)
731	(plus:SWI (match_dup 0)
732		  (match_operand:SWI 1 "const_int_operand" "i")))]
733  "(unsigned HOST_WIDE_INT) INTVAL (operands[1])
734   == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
735{
736  if (incdec_operand (operands[1], <MODE>mode))
737    {
738      if (operands[1] == const1_rtx)
739	return "lock{%;} %K3inc{<imodesuffix>}\t%0";
740      else
741	{
742	  gcc_assert (operands[1] == constm1_rtx);
743	  return "lock{%;} %K3dec{<imodesuffix>}\t%0";
744	}
745    }
746
747  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
748    return "lock{%;} %K3sub{<imodesuffix>}\t{%1, %0|%0, %1}";
749
750  return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
751})
752
753;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
754;; In addition, it is always a full barrier, so we can ignore the memory model.
755(define_insn "atomic_exchange<mode>"
756  [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
757	(unspec_volatile:SWI
758	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
759	   (match_operand:SI 3 "const_int_operand")]		;; model
760	  UNSPECV_XCHG))
761   (set (match_dup 1)
762	(match_operand:SWI 2 "register_operand" "0"))]		;; input
763  ""
764  "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
765
766(define_insn "atomic_add<mode>"
767  [(set (match_operand:SWI 0 "memory_operand" "+m")
768	(unspec_volatile:SWI
769	  [(plus:SWI (match_dup 0)
770		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
771	   (match_operand:SI 2 "const_int_operand")]		;; model
772	  UNSPECV_LOCK))
773   (clobber (reg:CC FLAGS_REG))]
774  ""
775{
776  if (incdec_operand (operands[1], <MODE>mode))
777    {
778      if (operands[1] == const1_rtx)
779	return "lock{%;} %K2inc{<imodesuffix>}\t%0";
780      else
781	{
782	  gcc_assert (operands[1] == constm1_rtx);
783	  return "lock{%;} %K2dec{<imodesuffix>}\t%0";
784	}
785    }
786
787  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
788    return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
789
790  return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
791})
792
793(define_insn "atomic_sub<mode>"
794  [(set (match_operand:SWI 0 "memory_operand" "+m")
795	(unspec_volatile:SWI
796	  [(minus:SWI (match_dup 0)
797		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
798	   (match_operand:SI 2 "const_int_operand")]		;; model
799	  UNSPECV_LOCK))
800   (clobber (reg:CC FLAGS_REG))]
801  ""
802{
803  if (incdec_operand (operands[1], <MODE>mode))
804    {
805      if (operands[1] == const1_rtx)
806	return "lock{%;} %K2dec{<imodesuffix>}\t%0";
807      else
808	{
809	  gcc_assert (operands[1] == constm1_rtx);
810	  return "lock{%;} %K2inc{<imodesuffix>}\t%0";
811	}
812    }
813
814  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
815    return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
816
817  return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
818})
819
820(define_insn "atomic_<logic><mode>"
821  [(set (match_operand:SWI 0 "memory_operand" "+m")
822	(unspec_volatile:SWI
823	  [(any_logic:SWI (match_dup 0)
824			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
825	   (match_operand:SI 2 "const_int_operand")]		;; model
826	  UNSPECV_LOCK))
827   (clobber (reg:CC FLAGS_REG))]
828  ""
829  "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
830
831(define_expand "atomic_bit_test_and_set<mode>"
832  [(match_operand:SWI248 0 "register_operand")
833   (match_operand:SWI248 1 "memory_operand")
834   (match_operand:SWI248 2 "nonmemory_operand")
835   (match_operand:SI 3 "const_int_operand") ;; model
836   (match_operand:SI 4 "const_int_operand")]
837  ""
838{
839  emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2],
840						  operands[3]));
841  rtx tem = gen_reg_rtx (QImode);
842  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
843  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
844  if (operands[4] == const0_rtx)
845    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
846				  operands[2], operands[0], 0, OPTAB_WIDEN);
847  if (result != operands[0])
848    emit_move_insn (operands[0], result);
849  DONE;
850})
851
852(define_insn "atomic_bit_test_and_set<mode>_1"
853  [(set (reg:CCC FLAGS_REG)
854	(compare:CCC
855	  (unspec_volatile:SWI248
856	    [(match_operand:SWI248 0 "memory_operand" "+m")
857	     (match_operand:SI 2 "const_int_operand")]		;; model
858	    UNSPECV_XCHG)
859	  (const_int 0)))
860   (set (zero_extract:SWI248 (match_dup 0)
861			     (const_int 1)
862			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
863	(const_int 1))]
864  ""
865  "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}")
866
867(define_expand "atomic_bit_test_and_complement<mode>"
868  [(match_operand:SWI248 0 "register_operand")
869   (match_operand:SWI248 1 "memory_operand")
870   (match_operand:SWI248 2 "nonmemory_operand")
871   (match_operand:SI 3 "const_int_operand") ;; model
872   (match_operand:SI 4 "const_int_operand")]
873  ""
874{
875  emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1],
876							 operands[2],
877							 operands[3]));
878  rtx tem = gen_reg_rtx (QImode);
879  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
880  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
881  if (operands[4] == const0_rtx)
882    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
883				  operands[2], operands[0], 0, OPTAB_WIDEN);
884  if (result != operands[0])
885    emit_move_insn (operands[0], result);
886  DONE;
887})
888
889(define_insn "atomic_bit_test_and_complement<mode>_1"
890  [(set (reg:CCC FLAGS_REG)
891	(compare:CCC
892	  (unspec_volatile:SWI248
893	    [(match_operand:SWI248 0 "memory_operand" "+m")
894	     (match_operand:SI 2 "const_int_operand")]		;; model
895	    UNSPECV_XCHG)
896	  (const_int 0)))
897   (set (zero_extract:SWI248 (match_dup 0)
898			     (const_int 1)
899			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
900	(not:SWI248 (zero_extract:SWI248 (match_dup 0)
901					 (const_int 1)
902					 (match_dup 1))))]
903  ""
904  "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}")
905
906(define_expand "atomic_bit_test_and_reset<mode>"
907  [(match_operand:SWI248 0 "register_operand")
908   (match_operand:SWI248 1 "memory_operand")
909   (match_operand:SWI248 2 "nonmemory_operand")
910   (match_operand:SI 3 "const_int_operand") ;; model
911   (match_operand:SI 4 "const_int_operand")]
912  ""
913{
914  emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2],
915						    operands[3]));
916  rtx tem = gen_reg_rtx (QImode);
917  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
918  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
919  if (operands[4] == const0_rtx)
920    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
921				  operands[2], operands[0], 0, OPTAB_WIDEN);
922  if (result != operands[0])
923    emit_move_insn (operands[0], result);
924  DONE;
925})
926
927(define_insn "atomic_bit_test_and_reset<mode>_1"
928  [(set (reg:CCC FLAGS_REG)
929	(compare:CCC
930	  (unspec_volatile:SWI248
931	    [(match_operand:SWI248 0 "memory_operand" "+m")
932	     (match_operand:SI 2 "const_int_operand")]		;; model
933	    UNSPECV_XCHG)
934	  (const_int 0)))
935   (set (zero_extract:SWI248 (match_dup 0)
936			     (const_int 1)
937			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
938	(const_int 0))]
939  ""
940  "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
941