1;; GCC machine description for i386 synchronization instructions.
2;; Copyright (C) 2005-2018 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_c_enum "unspec" [
21  UNSPEC_LFENCE
22  UNSPEC_SFENCE
23  UNSPEC_MFENCE
24
25  UNSPEC_FILD_ATOMIC
26  UNSPEC_FIST_ATOMIC
27
28  UNSPEC_LDX_ATOMIC
29  UNSPEC_STX_ATOMIC
30
31  ;; __atomic support
32  UNSPEC_LDA
33  UNSPEC_STA
34])
35
36(define_c_enum "unspecv" [
37  UNSPECV_CMPXCHG
38  UNSPECV_XCHG
39  UNSPECV_LOCK
40])
41
42(define_expand "sse2_lfence"
43  [(set (match_dup 0)
44	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
45  "TARGET_SSE2"
46{
47  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
48  MEM_VOLATILE_P (operands[0]) = 1;
49})
50
51(define_insn "*sse2_lfence"
52  [(set (match_operand:BLK 0)
53	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
54  "TARGET_SSE2"
55  "lfence"
56  [(set_attr "type" "sse")
57   (set_attr "length_address" "0")
58   (set_attr "atom_sse_attr" "lfence")
59   (set_attr "memory" "unknown")])
60
61(define_expand "sse_sfence"
62  [(set (match_dup 0)
63	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
64  "TARGET_SSE || TARGET_3DNOW_A"
65{
66  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
67  MEM_VOLATILE_P (operands[0]) = 1;
68})
69
70(define_insn "*sse_sfence"
71  [(set (match_operand:BLK 0)
72	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
73  "TARGET_SSE || TARGET_3DNOW_A"
74  "sfence"
75  [(set_attr "type" "sse")
76   (set_attr "length_address" "0")
77   (set_attr "atom_sse_attr" "fence")
78   (set_attr "memory" "unknown")])
79
80(define_expand "sse2_mfence"
81  [(set (match_dup 0)
82	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
83  "TARGET_SSE2"
84{
85  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
86  MEM_VOLATILE_P (operands[0]) = 1;
87})
88
89(define_insn "mfence_sse2"
90  [(set (match_operand:BLK 0)
91	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
92  "TARGET_64BIT || TARGET_SSE2"
93  "mfence"
94  [(set_attr "type" "sse")
95   (set_attr "length_address" "0")
96   (set_attr "atom_sse_attr" "fence")
97   (set_attr "memory" "unknown")])
98
99(define_insn "mfence_nosse"
100  [(set (match_operand:BLK 0)
101	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
102   (clobber (reg:CC FLAGS_REG))]
103  "!(TARGET_64BIT || TARGET_SSE2)"
104  "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
105  [(set_attr "memory" "unknown")])
106
107(define_expand "mem_thread_fence"
108  [(match_operand:SI 0 "const_int_operand")]		;; model
109  ""
110{
111  enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
112
113  /* Unless this is a SEQ_CST fence, the i386 memory model is strong
114     enough not to require barriers of any kind.  */
115  if (is_mm_seq_cst (model))
116    {
117      rtx (*mfence_insn)(rtx);
118      rtx mem;
119
120      if (TARGET_64BIT || TARGET_SSE2)
121	mfence_insn = gen_mfence_sse2;
122      else
123	mfence_insn = gen_mfence_nosse;
124
125      mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
126      MEM_VOLATILE_P (mem) = 1;
127
128      emit_insn (mfence_insn (mem));
129    }
130  DONE;
131})
132
133;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations,
134;; Only beginning at Pentium family processors do we get any guarantee of
135;; atomicity in aligned 64-bit quantities.  Beginning at P6, we get a
136;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
137;;
138;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
139;;
140;; Importantly, *no* processor makes atomicity guarantees for larger
141;; accesses.  In particular, there's no way to perform an atomic TImode
142;; move, despite the apparent applicability of MOVDQA et al.
143
144(define_mode_iterator ATOMIC
145   [QI HI SI
146    (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
147   ])
148
149(define_expand "atomic_load<mode>"
150  [(set (match_operand:ATOMIC 0 "nonimmediate_operand")
151	(unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand")
152			(match_operand:SI 2 "const_int_operand")]
153		       UNSPEC_LDA))]
154  ""
155{
156  /* For DImode on 32-bit, we can use the FPU to perform the load.  */
157  if (<MODE>mode == DImode && !TARGET_64BIT)
158    emit_insn (gen_atomic_loaddi_fpu
159	       (operands[0], operands[1],
160	        assign_386_stack_local (DImode, SLOT_TEMP)));
161  else
162    {
163      rtx dst = operands[0];
164
165      if (MEM_P (dst))
166	dst = gen_reg_rtx (<MODE>mode);
167
168      emit_move_insn (dst, operands[1]);
169
170      /* Fix up the destination if needed.  */
171      if (dst != operands[0])
172	emit_move_insn (operands[0], dst);
173    }
174  DONE;
175})
176
177(define_insn_and_split "atomic_loaddi_fpu"
178  [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
179	(unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
180		   UNSPEC_LDA))
181   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
182   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
183  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
184  "#"
185  "&& reload_completed"
186  [(const_int 0)]
187{
188  rtx dst = operands[0], src = operands[1];
189  rtx mem = operands[2], tmp = operands[3];
190
191  if (SSE_REG_P (dst))
192    emit_move_insn (dst, src);
193  else
194    {
195      if (MEM_P (dst))
196	mem = dst;
197
198      if (STACK_REG_P (tmp))
199        {
200	  emit_insn (gen_loaddi_via_fpu (tmp, src));
201	  emit_insn (gen_storedi_via_fpu (mem, tmp));
202	}
203      else
204	{
205	  emit_insn (gen_loaddi_via_sse (tmp, src));
206	  emit_insn (gen_storedi_via_sse (mem, tmp));
207	}
208
209      if (mem != dst)
210	emit_move_insn (dst, mem);
211    }
212  DONE;
213})
214
215(define_peephole2
216  [(set (match_operand:DF 0 "fp_register_operand")
217	(unspec:DF [(match_operand:DI 1 "memory_operand")]
218		   UNSPEC_FILD_ATOMIC))
219   (set (match_operand:DI 2 "memory_operand")
220	(unspec:DI [(match_dup 0)]
221		   UNSPEC_FIST_ATOMIC))
222   (set (match_operand:DF 3 "any_fp_register_operand")
223	(match_operand:DF 4 "memory_operand"))]
224  "!TARGET_64BIT
225   && peep2_reg_dead_p (2, operands[0])
226   && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
227  [(set (match_dup 3) (match_dup 5))]
228  "operands[5] = gen_lowpart (DFmode, operands[1]);")
229
230(define_peephole2
231  [(set (match_operand:DF 0 "fp_register_operand")
232	(unspec:DF [(match_operand:DI 1 "memory_operand")]
233		   UNSPEC_FILD_ATOMIC))
234   (set (match_operand:DI 2 "memory_operand")
235	(unspec:DI [(match_dup 0)]
236		   UNSPEC_FIST_ATOMIC))
237   (set (mem:BLK (scratch:SI))
238	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
239   (set (match_operand:DF 3 "any_fp_register_operand")
240	(match_operand:DF 4 "memory_operand"))]
241  "!TARGET_64BIT
242   && peep2_reg_dead_p (2, operands[0])
243   && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
244  [(const_int 0)]
245{
246  emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
247  emit_insn (gen_memory_blockage ());
248  DONE;
249})
250
251(define_peephole2
252  [(set (match_operand:DF 0 "sse_reg_operand")
253	(unspec:DF [(match_operand:DI 1 "memory_operand")]
254		   UNSPEC_LDX_ATOMIC))
255   (set (match_operand:DI 2 "memory_operand")
256	(unspec:DI [(match_dup 0)]
257		   UNSPEC_STX_ATOMIC))
258   (set (match_operand:DF 3 "any_fp_register_operand")
259	(match_operand:DF 4 "memory_operand"))]
260  "!TARGET_64BIT
261   && peep2_reg_dead_p (2, operands[0])
262   && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
263  [(set (match_dup 3) (match_dup 5))]
264  "operands[5] = gen_lowpart (DFmode, operands[1]);")
265
266(define_peephole2
267  [(set (match_operand:DF 0 "sse_reg_operand")
268	(unspec:DF [(match_operand:DI 1 "memory_operand")]
269		   UNSPEC_LDX_ATOMIC))
270   (set (match_operand:DI 2 "memory_operand")
271	(unspec:DI [(match_dup 0)]
272		   UNSPEC_STX_ATOMIC))
273   (set (mem:BLK (scratch:SI))
274	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
275   (set (match_operand:DF 3 "any_fp_register_operand")
276	(match_operand:DF 4 "memory_operand"))]
277  "!TARGET_64BIT
278   && peep2_reg_dead_p (2, operands[0])
279   && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
280  [(const_int 0)]
281{
282  emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
283  emit_insn (gen_memory_blockage ());
284  DONE;
285})
286
287(define_expand "atomic_store<mode>"
288  [(set (match_operand:ATOMIC 0 "memory_operand")
289	(unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
290			(match_operand:SI 2 "const_int_operand")]
291		       UNSPEC_STA))]
292  ""
293{
294  enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
295
296  if (<MODE>mode == DImode && !TARGET_64BIT)
297    {
298      /* For DImode on 32-bit, we can use the FPU to perform the store.  */
299      /* Note that while we could perform a cmpxchg8b loop, that turns
300	 out to be significantly larger than this plus a barrier.  */
301      emit_insn (gen_atomic_storedi_fpu
302		 (operands[0], operands[1],
303	          assign_386_stack_local (DImode, SLOT_TEMP)));
304    }
305  else
306    {
307      operands[1] = force_reg (<MODE>mode, operands[1]);
308
309      /* For seq-cst stores, when we lack MFENCE, use XCHG.  */
310      if (is_mm_seq_cst (model) && !(TARGET_64BIT || TARGET_SSE2))
311	{
312	  emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
313						operands[0], operands[1],
314						operands[2]));
315	  DONE;
316	}
317
318      /* Otherwise use a store.  */
319      emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1],
320					   operands[2]));
321    }
322  /* ... followed by an MFENCE, if required.  */
323  if (is_mm_seq_cst (model))
324    emit_insn (gen_mem_thread_fence (operands[2]));
325  DONE;
326})
327
328(define_insn "atomic_store<mode>_1"
329  [(set (match_operand:SWI 0 "memory_operand" "=m")
330	(unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>")
331		     (match_operand:SI 2 "const_int_operand")]
332		    UNSPEC_STA))]
333  ""
334  "%K2mov{<imodesuffix>}\t{%1, %0|%0, %1}")
335
336(define_insn_and_split "atomic_storedi_fpu"
337  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
338	(unspec:DI [(match_operand:DI 1 "nonimmediate_operand" "x,m,?r")]
339		   UNSPEC_STA))
340   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
341   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
342  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
343  "#"
344  "&& reload_completed"
345  [(const_int 0)]
346{
347  rtx dst = operands[0], src = operands[1];
348  rtx mem = operands[2], tmp = operands[3];
349
350  if (SSE_REG_P (src))
351    emit_move_insn (dst, src);
352  else
353    {
354      if (REG_P (src))
355	{
356	  emit_move_insn (mem, src);
357	  src = mem;
358	}
359
360      if (STACK_REG_P (tmp))
361	{
362	  emit_insn (gen_loaddi_via_fpu (tmp, src));
363	  emit_insn (gen_storedi_via_fpu (dst, tmp));
364	}
365      else
366	{
367	  emit_insn (gen_loaddi_via_sse (tmp, src));
368	  emit_insn (gen_storedi_via_sse (dst, tmp));
369	}
370    }
371  DONE;
372})
373
374(define_peephole2
375  [(set (match_operand:DF 0 "memory_operand")
376	(match_operand:DF 1 "any_fp_register_operand"))
377   (set (match_operand:DF 2 "fp_register_operand")
378	(unspec:DF [(match_operand:DI 3 "memory_operand")]
379		   UNSPEC_FILD_ATOMIC))
380   (set (match_operand:DI 4 "memory_operand")
381	(unspec:DI [(match_dup 2)]
382		   UNSPEC_FIST_ATOMIC))]
383  "!TARGET_64BIT
384   && peep2_reg_dead_p (3, operands[2])
385   && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
386  [(set (match_dup 5) (match_dup 1))]
387  "operands[5] = gen_lowpart (DFmode, operands[4]);")
388
389(define_peephole2
390  [(set (match_operand:DF 0 "memory_operand")
391	(match_operand:DF 1 "any_fp_register_operand"))
392   (set (mem:BLK (scratch:SI))
393	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
394   (set (match_operand:DF 2 "fp_register_operand")
395	(unspec:DF [(match_operand:DI 3 "memory_operand")]
396		   UNSPEC_FILD_ATOMIC))
397   (set (match_operand:DI 4 "memory_operand")
398	(unspec:DI [(match_dup 2)]
399		   UNSPEC_FIST_ATOMIC))]
400  "!TARGET_64BIT
401   && peep2_reg_dead_p (4, operands[2])
402   && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
403  [(const_int 0)]
404{
405  emit_insn (gen_memory_blockage ());
406  emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
407  DONE;
408})
409
410(define_peephole2
411  [(set (match_operand:DF 0 "memory_operand")
412	(match_operand:DF 1 "any_fp_register_operand"))
413   (set (match_operand:DF 2 "sse_reg_operand")
414	(unspec:DF [(match_operand:DI 3 "memory_operand")]
415		   UNSPEC_LDX_ATOMIC))
416   (set (match_operand:DI 4 "memory_operand")
417	(unspec:DI [(match_dup 2)]
418		   UNSPEC_STX_ATOMIC))]
419  "!TARGET_64BIT
420   && peep2_reg_dead_p (3, operands[2])
421   && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
422  [(set (match_dup 5) (match_dup 1))]
423  "operands[5] = gen_lowpart (DFmode, operands[4]);")
424
425(define_peephole2
426  [(set (match_operand:DF 0 "memory_operand")
427	(match_operand:DF 1 "any_fp_register_operand"))
428   (set (mem:BLK (scratch:SI))
429	(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
430   (set (match_operand:DF 2 "sse_reg_operand")
431	(unspec:DF [(match_operand:DI 3 "memory_operand")]
432		   UNSPEC_LDX_ATOMIC))
433   (set (match_operand:DI 4 "memory_operand")
434	(unspec:DI [(match_dup 2)]
435		   UNSPEC_STX_ATOMIC))]
436  "!TARGET_64BIT
437   && peep2_reg_dead_p (4, operands[2])
438   && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
439  [(const_int 0)]
440{
441  emit_insn (gen_memory_blockage ());
442  emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
443  DONE;
444})
445
446;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
447;; operations.  But the fix_trunc patterns want way more setup than we want
448;; to provide.  Note that the scratch is DFmode instead of XFmode in order
449;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
450
451(define_insn "loaddi_via_fpu"
452  [(set (match_operand:DF 0 "register_operand" "=f")
453	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
454		   UNSPEC_FILD_ATOMIC))]
455  "TARGET_80387"
456  "fild%Z1\t%1"
457  [(set_attr "type" "fmov")
458   (set_attr "mode" "DF")
459   (set_attr "fp_int_src" "true")])
460
461(define_insn "storedi_via_fpu"
462  [(set (match_operand:DI 0 "memory_operand" "=m")
463	(unspec:DI [(match_operand:DF 1 "register_operand" "f")]
464		   UNSPEC_FIST_ATOMIC))]
465  "TARGET_80387"
466{
467  gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX);
468
469  return "fistp%Z0\t%0";
470}
471  [(set_attr "type" "fmov")
472   (set_attr "mode" "DI")])
473
474(define_insn "loaddi_via_sse"
475  [(set (match_operand:DF 0 "register_operand" "=x")
476	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
477		   UNSPEC_LDX_ATOMIC))]
478  "TARGET_SSE"
479{
480  if (TARGET_SSE2)
481    return "%vmovq\t{%1, %0|%0, %1}";
482  return "movlps\t{%1, %0|%0, %1}";
483}
484  [(set_attr "type" "ssemov")
485   (set_attr "mode" "DI")])
486
487(define_insn "storedi_via_sse"
488  [(set (match_operand:DI 0 "memory_operand" "=m")
489	(unspec:DI [(match_operand:DF 1 "register_operand" "x")]
490		   UNSPEC_STX_ATOMIC))]
491  "TARGET_SSE"
492{
493  if (TARGET_SSE2)
494    return "%vmovq\t{%1, %0|%0, %1}";
495  return "movlps\t{%1, %0|%0, %1}";
496}
497  [(set_attr "type" "ssemov")
498   (set_attr "mode" "DI")])
499
500(define_expand "atomic_compare_and_swap<mode>"
501  [(match_operand:QI 0 "register_operand")	;; bool success output
502   (match_operand:SWI124 1 "register_operand")	;; oldval output
503   (match_operand:SWI124 2 "memory_operand")	;; memory
504   (match_operand:SWI124 3 "register_operand")	;; expected input
505   (match_operand:SWI124 4 "register_operand")	;; newval input
506   (match_operand:SI 5 "const_int_operand")	;; is_weak
507   (match_operand:SI 6 "const_int_operand")	;; success model
508   (match_operand:SI 7 "const_int_operand")]	;; failure model
509  "TARGET_CMPXCHG"
510{
511  emit_insn
512   (gen_atomic_compare_and_swap<mode>_1
513    (operands[1], operands[2], operands[3], operands[4], operands[6]));
514  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
515		     const0_rtx);
516  DONE;
517})
518
519(define_mode_iterator CASMODE
520  [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
521   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
522(define_mode_attr CASHMODE [(DI "SI") (TI "DI")])
523
524(define_expand "atomic_compare_and_swap<mode>"
525  [(match_operand:QI 0 "register_operand")	;; bool success output
526   (match_operand:CASMODE 1 "register_operand")	;; oldval output
527   (match_operand:CASMODE 2 "memory_operand")	;; memory
528   (match_operand:CASMODE 3 "register_operand")	;; expected input
529   (match_operand:CASMODE 4 "register_operand")	;; newval input
530   (match_operand:SI 5 "const_int_operand")	;; is_weak
531   (match_operand:SI 6 "const_int_operand")	;; success model
532   (match_operand:SI 7 "const_int_operand")]	;; failure model
533  "TARGET_CMPXCHG"
534{
535  if (<MODE>mode == DImode && TARGET_64BIT)
536    {
537      emit_insn
538       (gen_atomic_compare_and_swapdi_1
539	(operands[1], operands[2], operands[3], operands[4], operands[6]));
540    }
541  else
542    {
543      machine_mode hmode = <CASHMODE>mode;
544
545      emit_insn
546       (gen_atomic_compare_and_swap<mode>_doubleword
547        (operands[1], operands[2], operands[3],
548	 gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]),
549	 operands[6]));
550    }
551
552  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
553		     const0_rtx);
554  DONE;
555})
556
557;; For double-word compare and swap, we are obliged to play tricks with
558;; the input newval (op3:op4) because the Intel register numbering does
559;; not match the gcc register numbering, so the pair must be CX:BX.
560
561(define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
562
563(define_insn "atomic_compare_and_swap<dwi>_doubleword"
564  [(set (match_operand:<DWI> 0 "register_operand" "=A")
565	(unspec_volatile:<DWI>
566	  [(match_operand:<DWI> 1 "memory_operand" "+m")
567	   (match_operand:<DWI> 2 "register_operand" "0")
568	   (match_operand:DWIH 3 "register_operand" "b")
569	   (match_operand:DWIH 4 "register_operand" "c")
570	   (match_operand:SI 5 "const_int_operand")]
571	  UNSPECV_CMPXCHG))
572   (set (match_dup 1)
573	(unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
574   (set (reg:CCZ FLAGS_REG)
575        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
576  "TARGET_CMPXCHG<doublemodesuffix>B"
577  "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1")
578
579(define_insn "atomic_compare_and_swap<mode>_1"
580  [(set (match_operand:SWI 0 "register_operand" "=a")
581	(unspec_volatile:SWI
582	  [(match_operand:SWI 1 "memory_operand" "+m")
583	   (match_operand:SWI 2 "register_operand" "0")
584	   (match_operand:SWI 3 "register_operand" "<r>")
585	   (match_operand:SI 4 "const_int_operand")]
586	  UNSPECV_CMPXCHG))
587   (set (match_dup 1)
588	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
589   (set (reg:CCZ FLAGS_REG)
590        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
591  "TARGET_CMPXCHG"
592  "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
593
594;; For operand 2 nonmemory_operand predicate is used instead of
595;; register_operand to allow combiner to better optimize atomic
596;; additions of constants.
597(define_insn "atomic_fetch_add<mode>"
598  [(set (match_operand:SWI 0 "register_operand" "=<r>")
599	(unspec_volatile:SWI
600	  [(match_operand:SWI 1 "memory_operand" "+m")
601	   (match_operand:SI 3 "const_int_operand")]		;; model
602	  UNSPECV_XCHG))
603   (set (match_dup 1)
604	(plus:SWI (match_dup 1)
605		  (match_operand:SWI 2 "nonmemory_operand" "0")))
606   (clobber (reg:CC FLAGS_REG))]
607  "TARGET_XADD"
608  "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
609
610;; This peephole2 and following insn optimize
611;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
612;; followed by testing of flags instead of lock xadd and comparisons.
613(define_peephole2
614  [(set (match_operand:SWI 0 "register_operand")
615	(match_operand:SWI 2 "const_int_operand"))
616   (parallel [(set (match_dup 0)
617		   (unspec_volatile:SWI
618		     [(match_operand:SWI 1 "memory_operand")
619		      (match_operand:SI 4 "const_int_operand")]
620		     UNSPECV_XCHG))
621	      (set (match_dup 1)
622		   (plus:SWI (match_dup 1)
623			     (match_dup 0)))
624	      (clobber (reg:CC FLAGS_REG))])
625   (set (reg:CCZ FLAGS_REG)
626	(compare:CCZ (match_dup 0)
627		     (match_operand:SWI 3 "const_int_operand")))]
628  "peep2_reg_dead_p (3, operands[0])
629   && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
630      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
631   && !reg_overlap_mentioned_p (operands[0], operands[1])"
632  [(parallel [(set (reg:CCZ FLAGS_REG)
633		   (compare:CCZ
634		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
635					  UNSPECV_XCHG)
636		     (match_dup 3)))
637	      (set (match_dup 1)
638		   (plus:SWI (match_dup 1)
639			     (match_dup 2)))])])
640
641;; Likewise, but for the -Os special case of *mov<mode>_or.
642(define_peephole2
643  [(parallel [(set (match_operand:SWI 0 "register_operand")
644		   (match_operand:SWI 2 "constm1_operand"))
645	      (clobber (reg:CC FLAGS_REG))])
646   (parallel [(set (match_dup 0)
647		   (unspec_volatile:SWI
648		     [(match_operand:SWI 1 "memory_operand")
649		      (match_operand:SI 4 "const_int_operand")]
650		     UNSPECV_XCHG))
651	      (set (match_dup 1)
652		   (plus:SWI (match_dup 1)
653			     (match_dup 0)))
654	      (clobber (reg:CC FLAGS_REG))])
655   (set (reg:CCZ FLAGS_REG)
656	(compare:CCZ (match_dup 0)
657		     (match_operand:SWI 3 "const_int_operand")))]
658  "peep2_reg_dead_p (3, operands[0])
659   && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
660      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
661   && !reg_overlap_mentioned_p (operands[0], operands[1])"
662  [(parallel [(set (reg:CCZ FLAGS_REG)
663		   (compare:CCZ
664		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
665					  UNSPECV_XCHG)
666		     (match_dup 3)))
667	      (set (match_dup 1)
668		   (plus:SWI (match_dup 1)
669			     (match_dup 2)))])])
670
671(define_insn "*atomic_fetch_add_cmp<mode>"
672  [(set (reg:CCZ FLAGS_REG)
673	(compare:CCZ
674	  (unspec_volatile:SWI
675	    [(match_operand:SWI 0 "memory_operand" "+m")
676	     (match_operand:SI 3 "const_int_operand")]		;; model
677	    UNSPECV_XCHG)
678	  (match_operand:SWI 2 "const_int_operand" "i")))
679   (set (match_dup 0)
680	(plus:SWI (match_dup 0)
681		  (match_operand:SWI 1 "const_int_operand" "i")))]
682  "(unsigned HOST_WIDE_INT) INTVAL (operands[1])
683   == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
684{
685  if (incdec_operand (operands[1], <MODE>mode))
686    {
687      if (operands[1] == const1_rtx)
688	return "lock{%;} %K3inc{<imodesuffix>}\t%0";
689      else
690	{
691	  gcc_assert (operands[1] == constm1_rtx);
692	  return "lock{%;} %K3dec{<imodesuffix>}\t%0";
693	}
694    }
695
696  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
697    return "lock{%;} %K3sub{<imodesuffix>}\t{%1, %0|%0, %1}";
698
699  return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
700})
701
702;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
703;; In addition, it is always a full barrier, so we can ignore the memory model.
704(define_insn "atomic_exchange<mode>"
705  [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
706	(unspec_volatile:SWI
707	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
708	   (match_operand:SI 3 "const_int_operand")]		;; model
709	  UNSPECV_XCHG))
710   (set (match_dup 1)
711	(match_operand:SWI 2 "register_operand" "0"))]		;; input
712  ""
713  "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
714
715(define_insn "atomic_add<mode>"
716  [(set (match_operand:SWI 0 "memory_operand" "+m")
717	(unspec_volatile:SWI
718	  [(plus:SWI (match_dup 0)
719		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
720	   (match_operand:SI 2 "const_int_operand")]		;; model
721	  UNSPECV_LOCK))
722   (clobber (reg:CC FLAGS_REG))]
723  ""
724{
725  if (incdec_operand (operands[1], <MODE>mode))
726    {
727      if (operands[1] == const1_rtx)
728	return "lock{%;} %K2inc{<imodesuffix>}\t%0";
729      else
730	{
731	  gcc_assert (operands[1] == constm1_rtx);
732	  return "lock{%;} %K2dec{<imodesuffix>}\t%0";
733	}
734    }
735
736  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
737    return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
738
739  return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
740})
741
742(define_insn "atomic_sub<mode>"
743  [(set (match_operand:SWI 0 "memory_operand" "+m")
744	(unspec_volatile:SWI
745	  [(minus:SWI (match_dup 0)
746		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
747	   (match_operand:SI 2 "const_int_operand")]		;; model
748	  UNSPECV_LOCK))
749   (clobber (reg:CC FLAGS_REG))]
750  ""
751{
752  if (incdec_operand (operands[1], <MODE>mode))
753    {
754      if (operands[1] == const1_rtx)
755	return "lock{%;} %K2dec{<imodesuffix>}\t%0";
756      else
757	{
758	  gcc_assert (operands[1] == constm1_rtx);
759	  return "lock{%;} %K2inc{<imodesuffix>}\t%0";
760	}
761    }
762
763  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
764    return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
765
766  return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
767})
768
769(define_insn "atomic_<logic><mode>"
770  [(set (match_operand:SWI 0 "memory_operand" "+m")
771	(unspec_volatile:SWI
772	  [(any_logic:SWI (match_dup 0)
773			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
774	   (match_operand:SI 2 "const_int_operand")]		;; model
775	  UNSPECV_LOCK))
776   (clobber (reg:CC FLAGS_REG))]
777  ""
778  "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
779
780(define_expand "atomic_bit_test_and_set<mode>"
781  [(match_operand:SWI248 0 "register_operand")
782   (match_operand:SWI248 1 "memory_operand")
783   (match_operand:SWI248 2 "nonmemory_operand")
784   (match_operand:SI 3 "const_int_operand") ;; model
785   (match_operand:SI 4 "const_int_operand")]
786  ""
787{
788  emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2],
789						  operands[3]));
790  rtx tem = gen_reg_rtx (QImode);
791  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
792  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
793  if (operands[4] == const0_rtx)
794    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
795				  operands[2], operands[0], 0, OPTAB_DIRECT);
796  if (result != operands[0])
797    emit_move_insn (operands[0], result);
798  DONE;
799})
800
801(define_insn "atomic_bit_test_and_set<mode>_1"
802  [(set (reg:CCC FLAGS_REG)
803	(compare:CCC
804	  (unspec_volatile:SWI248
805	    [(match_operand:SWI248 0 "memory_operand" "+m")
806	     (match_operand:SI 2 "const_int_operand")]		;; model
807	    UNSPECV_XCHG)
808	  (const_int 0)))
809   (set (zero_extract:SWI248 (match_dup 0)
810			     (const_int 1)
811			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
812	(const_int 1))]
813  ""
814  "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}")
815
816(define_expand "atomic_bit_test_and_complement<mode>"
817  [(match_operand:SWI248 0 "register_operand")
818   (match_operand:SWI248 1 "memory_operand")
819   (match_operand:SWI248 2 "nonmemory_operand")
820   (match_operand:SI 3 "const_int_operand") ;; model
821   (match_operand:SI 4 "const_int_operand")]
822  ""
823{
824  emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1],
825							 operands[2],
826							 operands[3]));
827  rtx tem = gen_reg_rtx (QImode);
828  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
829  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
830  if (operands[4] == const0_rtx)
831    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
832				  operands[2], operands[0], 0, OPTAB_DIRECT);
833  if (result != operands[0])
834    emit_move_insn (operands[0], result);
835  DONE;
836})
837
838(define_insn "atomic_bit_test_and_complement<mode>_1"
839  [(set (reg:CCC FLAGS_REG)
840	(compare:CCC
841	  (unspec_volatile:SWI248
842	    [(match_operand:SWI248 0 "memory_operand" "+m")
843	     (match_operand:SI 2 "const_int_operand")]		;; model
844	    UNSPECV_XCHG)
845	  (const_int 0)))
846   (set (zero_extract:SWI248 (match_dup 0)
847			     (const_int 1)
848			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
849	(not:SWI248 (zero_extract:SWI248 (match_dup 0)
850					 (const_int 1)
851					 (match_dup 1))))]
852  ""
853  "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}")
854
855(define_expand "atomic_bit_test_and_reset<mode>"
856  [(match_operand:SWI248 0 "register_operand")
857   (match_operand:SWI248 1 "memory_operand")
858   (match_operand:SWI248 2 "nonmemory_operand")
859   (match_operand:SI 3 "const_int_operand") ;; model
860   (match_operand:SI 4 "const_int_operand")]
861  ""
862{
863  emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2],
864						    operands[3]));
865  rtx tem = gen_reg_rtx (QImode);
866  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
867  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
868  if (operands[4] == const0_rtx)
869    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
870				  operands[2], operands[0], 0, OPTAB_DIRECT);
871  if (result != operands[0])
872    emit_move_insn (operands[0], result);
873  DONE;
874})
875
876(define_insn "atomic_bit_test_and_reset<mode>_1"
877  [(set (reg:CCC FLAGS_REG)
878	(compare:CCC
879	  (unspec_volatile:SWI248
880	    [(match_operand:SWI248 0 "memory_operand" "+m")
881	     (match_operand:SI 2 "const_int_operand")]		;; model
882	    UNSPECV_XCHG)
883	  (const_int 0)))
884   (set (zero_extract:SWI248 (match_dup 0)
885			     (const_int 1)
886			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
887	(const_int 0))]
888  ""
889  "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
890