1;; Machine description for NVPTX.
2;; Copyright (C) 2014-2022 Free Software Foundation, Inc.
3;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful,
13;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15;; GNU General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_c_enum "unspec" [
22   UNSPEC_ARG_REG
23
24   UNSPEC_COPYSIGN
25   UNSPEC_LOG2
26   UNSPEC_EXP2
27   UNSPEC_SIN
28   UNSPEC_COS
29   UNSPEC_TANH
30   UNSPEC_ISINF
31
32   UNSPEC_FPINT_FLOOR
33   UNSPEC_FPINT_BTRUNC
34   UNSPEC_FPINT_CEIL
35   UNSPEC_FPINT_NEARBYINT
36
37   UNSPEC_BITREV
38
39   UNSPEC_ALLOCA
40
41   UNSPEC_SET_SOFTSTACK
42
43   UNSPEC_DIM_SIZE
44
45   UNSPEC_BIT_CONV
46
47   UNSPEC_VOTE_BALLOT
48
49   UNSPEC_LANEID
50
51   UNSPEC_SHUFFLE
52   UNSPEC_BR_UNIFIED
53])
54
55(define_c_enum "unspecv" [
56   UNSPECV_LOCK
57   UNSPECV_CAS
58   UNSPECV_CAS_LOCAL
59   UNSPECV_XCHG
60   UNSPECV_ST
61   UNSPECV_BARSYNC
62   UNSPECV_WARPSYNC
63   UNSPECV_UNIFORM_WARP_CHECK
64   UNSPECV_MEMBAR
65   UNSPECV_MEMBAR_CTA
66   UNSPECV_MEMBAR_GL
67   UNSPECV_DIM_POS
68
69   UNSPECV_FORK
70   UNSPECV_FORKED
71   UNSPECV_JOINING
72   UNSPECV_JOIN
73
74   UNSPECV_NOUNROLL
75
76   UNSPECV_SIMT_ENTER
77   UNSPECV_SIMT_EXIT
78
79   UNSPECV_RED_PART
80])
81
82(define_attr "subregs_ok" "false,true"
83  (const_string "false"))
84
85(define_attr "atomic" "false,true"
86  (const_string "false"))
87
88;; The nvptx operand predicates, in general, don't permit subregs and
89;; only literal constants, which differ from the generic ones, which
90;; permit subregs and symbolc constants (as appropriate)
91(define_predicate "nvptx_register_operand"
92  (match_code "reg")
93{
94  return register_operand (op, mode);
95})
96
97(define_predicate "nvptx_register_or_complex_di_df_register_operand"
98  (ior (match_code "reg")
99       (match_code "concat"))
100{
101  if (GET_CODE (op) == CONCAT)
102    return ((GET_MODE (op) == DCmode || GET_MODE (op) == CDImode)
103	    && nvptx_register_operand (XEXP (op, 0), mode)
104	    && nvptx_register_operand (XEXP (op, 1), mode));
105
106  return nvptx_register_operand (op, mode);
107})
108
109(define_predicate "nvptx_nonimmediate_operand"
110  (match_code "mem,reg")
111{
112  return (REG_P (op) ? register_operand (op, mode)
113          : memory_operand (op, mode));
114})
115
116(define_predicate "nvptx_nonmemory_operand"
117  (match_code "reg,const_int,const_double")
118{
119  return (REG_P (op) ? register_operand (op, mode)
120          : immediate_operand (op, mode));
121})
122
123(define_predicate "const0_operand"
124  (and (match_code "const_int")
125       (match_test "op == const0_rtx")))
126
127;; True if this operator is valid for predication.
128(define_predicate "predicate_operator"
129  (match_code "eq,ne"))
130
131(define_predicate "ne_operator"
132  (match_code "ne"))
133
134(define_predicate "nvptx_comparison_operator"
135  (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
136
137(define_predicate "nvptx_float_comparison_operator"
138  (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
139
140(define_predicate "nvptx_vector_index_operand"
141  (and (match_code "const_int")
142       (match_test "UINTVAL (op) < 4")))
143
144;; Test for a valid operand for a call instruction.
145(define_predicate "call_insn_operand"
146  (match_code "symbol_ref,reg")
147{
148  return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
149})
150
151;; Return true if OP is a call with parallel USEs of the argument
152;; pseudos.
153(define_predicate "call_operation"
154  (match_code "parallel")
155{
156  int arg_end = XVECLEN (op, 0);
157
158  for (int i = 1; i < arg_end; i++)
159    {
160      rtx elt = XVECEXP (op, 0, i);
161
162      if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
163        return false;
164    }
165  return true;
166})
167
168;; Test for a function symbol ref operand
169(define_predicate "symbol_ref_function_operand"
170  (match_code "symbol_ref")
171{
172  return SYMBOL_REF_FUNCTION_P (op);
173})
174
175(define_attr "predicable" "no,yes"
176  (const_string "yes"))
177
178(define_cond_exec
179  [(match_operator 0 "predicate_operator"
180      [(match_operand:BI 1 "nvptx_register_operand" "")
181       (match_operand:BI 2 "const0_operand" "")])]
182  ""
183  ""
184  )
185
186(define_constraint "P0"
187  "An integer with the value 0."
188  (and (match_code "const_int")
189       (match_test "ival == 0")))
190
191(define_constraint "P1"
192  "An integer with the value 1."
193  (and (match_code "const_int")
194       (match_test "ival == 1")))
195
196(define_constraint "Pn"
197  "An integer with the value -1."
198  (and (match_code "const_int")
199       (match_test "ival == -1")))
200
201(define_constraint "R"
202  "A pseudo register."
203  (match_code "reg"))
204
205(define_constraint "Ia"
206  "Any integer constant."
207  (and (match_code "const_int") (match_test "true")))
208
209(define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
210(define_mode_iterator QHSDIM [QI HI SI DI])
211(define_mode_iterator HSDIM [HI SI DI])
212(define_mode_iterator BHSDIM [BI HI SI DI])
213(define_mode_iterator SDIM [SI DI])
214(define_mode_iterator SDISDFM [SI DI SF DF])
215(define_mode_iterator QHIM [QI HI])
216(define_mode_iterator QHSIM [QI HI SI])
217(define_mode_iterator SDFM [SF DF])
218(define_mode_iterator HSFM [HF SF])
219(define_mode_iterator SDCM [SC DC])
220(define_mode_iterator BITS [SI SF])
221(define_mode_iterator BITD [DI DF])
222(define_mode_iterator VECIM [V2SI V2DI])
223
224;; This mode iterator allows :P to be used for patterns that operate on
225;; pointer-sized quantities.  Exactly one of the two alternatives will match.
226(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
227
228;; Define element mode for each vector mode.
229(define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")])
230(define_mode_attr Vecelem [(V2SI "si") (V2DI "di")])
231
232;; We should get away with not defining memory alternatives, since we don't
233;; get variables in this mode and pseudos are never spilled.
234(define_insn "movbi"
235  [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
236	(match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,P1"))]
237  ""
238  "@
239   %.\\tmov%t0\\t%0, %1;
240   %.\\tsetp.eq.u32\\t%0, 1, 0;
241   %.\\tsetp.eq.u32\\t%0, 1, 1;")
242
243(define_insn "*mov<mode>_insn"
244  [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
245	(match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
246  "!MEM_P (operands[0]) || REG_P (operands[1])"
247{
248  if (which_alternative == 1)
249    return "%.\\tld%A1%u1\\t%0, %1;";
250  if (which_alternative == 2)
251    return "%.\\tst%A0%u0\\t%0, %1;";
252
253  return nvptx_output_mov_insn (operands[0], operands[1]);
254}
255  [(set_attr "subregs_ok" "true")])
256
257(define_insn "*mov<mode>_insn"
258  [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
259	(match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
260  "!MEM_P (operands[0]) || REG_P (operands[1])"
261{
262  if (which_alternative == 1)
263    return "%.\\tld%A1%u1\\t%0, %1;";
264  if (which_alternative == 2)
265    return "%.\\tst%A0%u0\\t%0, %1;";
266
267  return nvptx_output_mov_insn (operands[0], operands[1]);
268}
269  [(set_attr "subregs_ok" "true")])
270
271;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up.
272(define_split
273  [(set (match_operand:DI 0 "nvptx_register_operand")
274	(const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand")
275			   (match_operand 2 "const_int_operand"))))]
276  ""
277  [(set (match_dup 0) (match_dup 1))
278   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
279  ]
280  "")
281
282(define_insn "*mov<mode>_insn"
283  [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
284	(match_operand:SDFM 1 "general_operand" "RF,m,R"))]
285  "!MEM_P (operands[0]) || REG_P (operands[1])"
286{
287  if (which_alternative == 1)
288    return "%.\\tld%A1%u0\\t%0, %1;";
289  if (which_alternative == 2)
290    return "%.\\tst%A0%u1\\t%0, %1;";
291
292  return nvptx_output_mov_insn (operands[0], operands[1]);
293}
294  [(set_attr "subregs_ok" "true")])
295
296(define_insn "*movhf_insn"
297  [(set (match_operand:HF 0 "nonimmediate_operand" "=R,R,m")
298	(match_operand:HF 1 "nonimmediate_operand" "R,m,R"))]
299  "!MEM_P (operands[0]) || REG_P (operands[1])"
300  "@
301   %.\\tmov.b16\\t%0, %1;
302   %.\\tld.b16\\t%0, %1;
303   %.\\tst.b16\\t%0, %1;"
304  [(set_attr "subregs_ok" "true")])
305
306(define_expand "movhf"
307  [(set (match_operand:HF 0 "nonimmediate_operand" "")
308	(match_operand:HF 1 "nonimmediate_operand" ""))]
309  ""
310{
311  /* Load HFmode constants as SFmode with an explicit FLOAT_TRUNCATE.  */
312  if (CONST_DOUBLE_P (operands[1]))
313    {
314      rtx tmp1 = gen_reg_rtx (SFmode);
315      REAL_VALUE_TYPE d = *CONST_DOUBLE_REAL_VALUE (operands[1]);
316      real_convert (&d, SFmode, &d);
317      emit_move_insn (tmp1, const_double_from_real_value (d, SFmode));
318
319      if (!REG_P (operands[0]))
320	{
321	  rtx tmp2 = gen_reg_rtx (HFmode);
322	  emit_insn (gen_truncsfhf2 (tmp2, tmp1));
323	  emit_move_insn (operands[0], tmp2);
324	}
325      else
326        emit_insn (gen_truncsfhf2 (operands[0], tmp1));
327      DONE;
328    }
329
330  if (MEM_P (operands[0]) && !REG_P (operands[1]))
331    {
332      rtx tmp = gen_reg_rtx (HFmode);
333      emit_move_insn (tmp, operands[1]);
334      emit_move_insn (operands[0], tmp);
335      DONE;
336    }
337})
338
339(define_insn "load_arg_reg<mode>"
340  [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
341	(unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
342		     UNSPEC_ARG_REG))]
343  ""
344  "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
345
346(define_insn "load_arg_reg<mode>"
347  [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
348	(unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
349			UNSPEC_ARG_REG))]
350  ""
351  "%.\\tmov%t0\\t%0, %%ar%1;")
352
353 (define_expand "mov<mode>"
354  [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
355	(match_operand:VECIM 1 "general_operand" ""))]
356  ""
357{
358  if (MEM_P (operands[0]) && !REG_P (operands[1]))
359    {
360      rtx tmp = gen_reg_rtx (<MODE>mode);
361      emit_move_insn (tmp, operands[1]);
362      emit_move_insn (operands[0], tmp);
363      DONE;
364    }
365})
366
367(define_expand "mov<mode>"
368  [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
369	(match_operand:QHSDISDFM 1 "general_operand" ""))]
370  ""
371{
372  if (MEM_P (operands[0]) && !REG_P (operands[1]))
373    {
374      rtx tmp = gen_reg_rtx (<MODE>mode);
375      emit_move_insn (tmp, operands[1]);
376      emit_move_insn (operands[0], tmp);
377      DONE;
378    }
379
380  if (GET_CODE (operands[1]) == LABEL_REF)
381    sorry ("target cannot support label values");
382})
383
384(define_insn "zero_extendqihi2"
385  [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
386	(zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
387  ""
388  "@
389   %.\\tcvt.u16.u%T1\\t%0, %1;
390   %.\\tld%A1.u8\\t%0, %1;"
391  [(set_attr "subregs_ok" "true")])
392
393(define_insn "zero_extend<mode>si2"
394  [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
395	(zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
396  ""
397  "@
398   %.\\tcvt.u32.u%T1\\t%0, %1;
399   %.\\tld%A1.u%T1\\t%0, %1;"
400  [(set_attr "subregs_ok" "true")])
401
402(define_insn "zero_extend<mode>di2"
403  [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
404	(zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
405  ""
406  "@
407   %.\\tcvt.u64.u%T1\\t%0, %1;
408   %.\\tld%A1%u1\\t%0, %1;"
409  [(set_attr "subregs_ok" "true")])
410
411(define_insn "extendqihi2"
412  [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
413	(sign_extend:HI (match_operand:QI 1 "nvptx_register_operand" "R")))]
414  ""
415  "%.\\tcvt.s16.s8\\t%0, %1;"
416  [(set_attr "subregs_ok" "true")])
417
418(define_insn "extend<mode>si2"
419  [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
420	(sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
421  ""
422  "@
423   %.\\tcvt.s32.s%T1\\t%0, %1;
424   %.\\tld%A1.s%T1\\t%0, %1;"
425  [(set_attr "subregs_ok" "true")])
426
427(define_insn "extend<mode>di2"
428  [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
429	(sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
430  ""
431  "@
432   %.\\tcvt.s64.s%T1\\t%0, %1;
433   %.\\tld%A1.s%T1\\t%0, %1;"
434  [(set_attr "subregs_ok" "true")])
435
436(define_insn "trunchiqi2"
437  [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
438	(truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
439  ""
440  "@
441   %.\\tcvt%t0.u16\\t%0, %1;
442   %.\\tst%A0.u8\\t%0, %1;"
443  [(set_attr "subregs_ok" "true")])
444
445(define_insn "truncsi<mode>2"
446  [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
447	(truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
448  ""
449  {
450    if (which_alternative == 1)
451      return "%.\\tst%A0.u%T0\\t%0, %1;";
452    if (GET_MODE (operands[0]) == QImode)
453      return "%.\\tmov%t0\\t%0, %1;";
454    return "%.\\tcvt%t0.u32\\t%0, %1;";
455  }
456  [(set_attr "subregs_ok" "true")])
457
458(define_insn "truncdi<mode>2"
459  [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
460	(truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
461  ""
462  "@
463   %.\\tcvt%t0.u64\\t%0, %1;
464   %.\\tst%A0.u%T0\\t%0, %1;"
465  [(set_attr "subregs_ok" "true")])
466
467;; Sign-extensions of truncations
468
469(define_insn "*extend_trunc_<mode>2_qi"
470  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
471	(sign_extend:HSDIM
472	 (truncate:QI (match_operand:HSDIM 1 "nvptx_register_operand" "R"))))]
473  ""
474  "%.\\tcvt.s%T0.s8\\t%0, %1;"
475  [(set_attr "subregs_ok" "true")])
476
477(define_insn "*extend_trunc_<mode>2_hi"
478  [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
479	(sign_extend:SDIM
480	 (truncate:HI (match_operand:SDIM 1 "nvptx_register_operand" "R"))))]
481  ""
482  "%.\\tcvt.s%T0.s16\\t%0, %1;"
483  [(set_attr "subregs_ok" "true")])
484
485(define_insn "*extend_trunc_di2_si"
486  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
487	(sign_extend:DI
488	 (truncate:SI (match_operand:DI 1 "nvptx_register_operand" "R"))))]
489  ""
490  "%.\\tcvt.s64.s32\\t%0, %1;"
491  [(set_attr "subregs_ok" "true")])
492
493;; Integer arithmetic
494
495(define_insn "add<mode>3"
496  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
497	(plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
498		    (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
499  ""
500  "%.\\tadd%t0\\t%0, %1, %2;")
501
502(define_insn "*vadd_addsi4"
503  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
504        (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
505			  (match_operand:SI 2 "nvptx_register_operand" "R"))
506		 (match_operand:SI 3 "nvptx_register_operand" "R")))]
507  ""
508  "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;")
509
510(define_insn "*vsub_addsi4"
511  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
512        (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
513			   (match_operand:SI 2 "nvptx_register_operand" "R"))
514		 (match_operand:SI 3 "nvptx_register_operand" "R")))]
515  ""
516  "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;")
517
518(define_insn "sub<mode>3"
519  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
520	(minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
521		     (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
522  ""
523  {
524    if (GET_MODE (operands[0]) == HImode)
525      /* Workaround https://developer.nvidia.com/nvidia_bug/3527713.
526	 See PR97005.  */
527      return "%.\\tsub.s16\\t%0, %1, %2;";
528
529    return "%.\\tsub%t0\\t%0, %1, %2;";
530  })
531
532(define_insn "mul<mode>3"
533  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
534	(mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
535		    (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
536  ""
537  "%.\\tmul.lo%t0\\t%0, %1, %2;")
538
539(define_insn "*mad<mode>3"
540  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
541	(plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
542				(match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
543		    (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
544  ""
545  "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
546
547(define_insn "div<mode>3"
548  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
549	(div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
550		   (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
551  ""
552  "%.\\tdiv.s%T0\\t%0, %1, %2;")
553
554(define_insn "udiv<mode>3"
555  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
556	(udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
557		   (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
558  ""
559  "%.\\tdiv.u%T0\\t%0, %1, %2;")
560
561(define_insn "mod<mode>3"
562  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
563	(mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
564		   (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
565  ""
566  "%.\\trem.s%T0\\t%0, %1, %2;")
567
568(define_insn "umod<mode>3"
569  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
570	(umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
571		    (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
572  ""
573  "%.\\trem.u%T0\\t%0, %1, %2;")
574
575(define_insn "smin<mode>3"
576  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
577	(smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
578		    (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
579  ""
580  "%.\\tmin.s%T0\\t%0, %1, %2;")
581
582(define_insn "umin<mode>3"
583  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
584	(umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
585		    (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
586  ""
587  "%.\\tmin.u%T0\\t%0, %1, %2;")
588
589(define_insn "smax<mode>3"
590  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
591	(smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
592		    (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
593  ""
594  "%.\\tmax.s%T0\\t%0, %1, %2;")
595
596(define_insn "umax<mode>3"
597  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
598	(umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
599		    (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
600  ""
601  "%.\\tmax.u%T0\\t%0, %1, %2;")
602
603(define_insn "abs<mode>2"
604  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
605	(abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
606  ""
607  "%.\\tabs.s%T0\\t%0, %1;")
608
609(define_insn "neg<mode>2"
610  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
611	(neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
612  ""
613  "%.\\tneg.s%T0\\t%0, %1;")
614
615(define_insn "one_cmpl<mode>2"
616  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
617	(not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
618  ""
619  "%.\\tnot.b%T0\\t%0, %1;")
620
621(define_insn "one_cmplbi2"
622  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
623	(not:BI (match_operand:BI 1 "nvptx_register_operand" "R")))]
624  ""
625  "%.\\tnot.pred\\t%0, %1;")
626
627(define_insn "*cnot<mode>2"
628  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
629	(eq:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
630		  (const_int 0)))]
631  ""
632  "%.\\tcnot.b%T0\\t%0, %1;")
633
634(define_insn "bitrev<mode>2"
635  [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
636	(unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
637		     UNSPEC_BITREV))]
638  ""
639  "%.\\tbrev.b%T0\\t%0, %1;")
640
641(define_insn "clz<mode>2"
642  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
643	(clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
644  ""
645  "%.\\tclz.b%T1\\t%0, %1;")
646
647(define_expand "ctz<mode>2"
648  [(set (match_operand:SI 0 "nvptx_register_operand" "")
649	(ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
650  ""
651{
652  rtx tmpreg = gen_reg_rtx (<MODE>mode);
653  emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
654  emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
655  DONE;
656})
657
658(define_insn "popcount<mode>2"
659  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
660	(popcount:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
661  ""
662  "%.\\tpopc.b%T1\\t%0, %1;")
663
664;; Multiplication variants
665
666(define_insn "mulhisi3"
667  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
668	(mult:SI (sign_extend:SI
669		  (match_operand:HI 1 "nvptx_register_operand" "R"))
670		 (sign_extend:SI
671		  (match_operand:HI 2 "nvptx_register_operand" "R"))))]
672  ""
673  "%.\\tmul.wide.s16\\t%0, %1, %2;")
674
675(define_insn "mulsidi3"
676  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
677	(mult:DI (sign_extend:DI
678		  (match_operand:SI 1 "nvptx_register_operand" "R"))
679		 (sign_extend:DI
680		  (match_operand:SI 2 "nvptx_register_operand" "R"))))]
681  ""
682  "%.\\tmul.wide.s32\\t%0, %1, %2;")
683
684(define_insn "umulhisi3"
685  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
686	(mult:SI (zero_extend:SI
687		  (match_operand:HI 1 "nvptx_register_operand" "R"))
688		 (zero_extend:SI
689		  (match_operand:HI 2 "nvptx_register_operand" "R"))))]
690  ""
691  "%.\\tmul.wide.u16\\t%0, %1, %2;")
692
693(define_insn "umulsidi3"
694  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
695	(mult:DI (zero_extend:DI
696		  (match_operand:SI 1 "nvptx_register_operand" "R"))
697		 (zero_extend:DI
698		  (match_operand:SI 2 "nvptx_register_operand" "R"))))]
699  ""
700  "%.\\tmul.wide.u32\\t%0, %1, %2;")
701
702(define_expand "mulditi3"
703  [(set (match_operand:TI 0 "nvptx_register_operand")
704	(mult:TI (sign_extend:TI
705		  (match_operand:DI 1 "nvptx_register_operand"))
706		 (sign_extend:DI
707		  (match_operand:DI 2 "nvptx_nonmemory_operand"))))]
708  ""
709{
710  rtx hi = gen_reg_rtx (DImode);
711  rtx lo = gen_reg_rtx (DImode);
712  emit_insn (gen_smuldi3_highpart (hi, operands[1], operands[2]));
713  emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
714  emit_move_insn (gen_highpart (DImode, operands[0]), hi);
715  emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
716  DONE;
717})
718
719(define_expand "umulditi3"
720  [(set (match_operand:TI 0 "nvptx_register_operand")
721	(mult:TI (zero_extend:TI
722		  (match_operand:DI 1 "nvptx_register_operand"))
723		 (zero_extend:DI
724		  (match_operand:DI 2 "nvptx_nonmemory_operand"))))]
725  ""
726{
727  rtx hi = gen_reg_rtx (DImode);
728  rtx lo = gen_reg_rtx (DImode);
729  emit_insn (gen_umuldi3_highpart (hi, operands[1], operands[2]));
730  emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
731  emit_move_insn (gen_highpart (DImode, operands[0]), hi);
732  emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
733  DONE;
734})
735
736(define_insn "smul<mode>3_highpart"
737  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
738	(smul_highpart:HSDIM
739	  (match_operand:HSDIM 1 "nvptx_register_operand" "R")
740	  (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
741  ""
742  "%.\\tmul.hi.s%T0\\t%0, %1, %2;")
743
744(define_insn "umul<mode>3_highpart"
745  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
746	(umul_highpart:HSDIM
747	  (match_operand:HSDIM 1 "nvptx_register_operand" "R")
748	  (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
749  ""
750  "%.\\tmul.hi.u%T0\\t%0, %1, %2;")
751
752(define_insn "*smulhi3_highpart_2"
753  [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
754	(truncate:HI
755	 (lshiftrt:SI
756	  (mult:SI (sign_extend:SI
757		    (match_operand:HI 1 "nvptx_register_operand" "R"))
758		   (sign_extend:SI
759		    (match_operand:HI 2 "nvptx_register_operand" "R")))
760	  (const_int 16))))]
761  ""
762  "%.\\tmul.hi.s16\\t%0, %1, %2;")
763
764(define_insn "*smulsi3_highpart_2"
765  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
766	(truncate:SI
767	 (lshiftrt:DI
768	  (mult:DI (sign_extend:DI
769		    (match_operand:SI 1 "nvptx_register_operand" "R"))
770		   (sign_extend:DI
771		    (match_operand:SI 2 "nvptx_register_operand" "R")))
772	  (const_int 32))))]
773  ""
774  "%.\\tmul.hi.s32\\t%0, %1, %2;")
775
776(define_insn "*umulhi3_highpart_2"
777  [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
778	(truncate:HI
779	 (lshiftrt:SI
780	  (mult:SI (zero_extend:SI
781		    (match_operand:HI 1 "nvptx_register_operand" "R"))
782		   (zero_extend:SI
783		    (match_operand:HI 2 "nvptx_register_operand" "R")))
784	  (const_int 16))))]
785  ""
786  "%.\\tmul.hi.u16\\t%0, %1, %2;")
787
788(define_insn "*umulsi3_highpart_2"
789  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
790	(truncate:SI
791	 (lshiftrt:DI
792	  (mult:DI (zero_extend:DI
793		    (match_operand:SI 1 "nvptx_register_operand" "R"))
794		   (zero_extend:DI
795		    (match_operand:SI 2 "nvptx_register_operand" "R")))
796	  (const_int 32))))]
797  ""
798  "%.\\tmul.hi.u32\\t%0, %1, %2;")
799
800;; Shifts
801
802(define_insn "ashl<mode>3"
803  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
804	(ashift:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
805		      (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
806  ""
807  "%.\\tshl.b%T0\\t%0, %1, %2;")
808
809(define_insn "ashr<mode>3"
810  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
811	(ashiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
812			(match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
813  ""
814  "%.\\tshr.s%T0\\t%0, %1, %2;")
815
816(define_insn "lshr<mode>3"
817  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
818	(lshiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
819			(match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
820  ""
821  "%.\\tshr.u%T0\\t%0, %1, %2;")
822
823(define_insn "rotlsi3"
824  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
825	(rotate:SI (match_operand:SI 1 "nvptx_register_operand" "R")
826		   (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
827			   (const_int 31))))]
828  "TARGET_SM35"
829  "%.\\tshf.l.wrap.b32\\t%0, %1, %1, %2;")
830
831(define_insn "rotrsi3"
832  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
833	(rotatert:SI (match_operand:SI 1 "nvptx_register_operand" "R")
834		     (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
835			     (const_int 31))))]
836  "TARGET_SM35"
837  "%.\\tshf.r.wrap.b32\\t%0, %1, %1, %2;")
838
839;; Logical operations
840
841(define_code_iterator any_logic [and ior xor])
842(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
843(define_code_attr ilogic [(and "and") (ior "ior") (xor "xor")])
844
845(define_insn "<ilogic><mode>3"
846  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
847	(any_logic:HSDIM
848	  (match_operand:HSDIM 1 "nvptx_register_operand" "R")
849	  (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
850  ""
851  "%.\\t<logic>.b%T0\\t%0, %1, %2;")
852
853(define_insn "<ilogic>bi3"
854  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
855	(any_logic:BI (match_operand:BI 1 "nvptx_register_operand" "R")
856		      (match_operand:BI 2 "nvptx_register_operand" "R")))]
857  ""
858  "%.\\t<logic>.pred\\t%0, %1, %2;")
859
860(define_split
861  [(set (match_operand:HSDIM 0 "nvptx_register_operand")
862	(any_logic:HSDIM
863	  (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand")
864		    (const_int 0))
865	  (ne:HSDIM (match_operand:BI 2 "nvptx_register_operand")
866		    (const_int 0))))]
867  "can_create_pseudo_p ()"
868  [(set (match_dup 3) (any_logic:BI (match_dup 1) (match_dup 2)))
869   (set (match_dup 0) (ne:HSDIM (match_dup 3) (const_int 0)))]
870{
871  operands[3] = gen_reg_rtx (BImode);
872})
873
874;; Comparisons and branches
875
876(define_insn "cmp<mode>"
877  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
878	(match_operator:BI 1 "nvptx_comparison_operator"
879	   [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
880	    (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
881  ""
882  "%.\\tsetp%c1\\t%0, %2, %3;")
883
884(define_insn "*cmp<mode>"
885  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
886	(match_operator:BI 1 "nvptx_float_comparison_operator"
887	   [(match_operand:SDFM 2 "nvptx_register_operand" "R")
888	    (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
889  ""
890  "%.\\tsetp%c1\\t%0, %2, %3;")
891
892(define_insn "*cmphf"
893  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
894	(match_operator:BI 1 "nvptx_float_comparison_operator"
895	   [(match_operand:HF 2 "nvptx_register_operand" "R")
896	    (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))]
897  "TARGET_SM53"
898  "%.\\tsetp%c1\\t%0, %2, %3;")
899
900(define_insn "jump"
901  [(set (pc)
902	(label_ref (match_operand 0 "" "")))]
903  ""
904  "%.\\tbra\\t%l0;")
905
906(define_insn "br_true"
907  [(set (pc)
908	(if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
909			  (const_int 0))
910		      (label_ref (match_operand 1 "" ""))
911		      (pc)))]
912  ""
913  "%j0\\tbra\\t%l1;"
914  [(set_attr "predicable" "no")])
915
916(define_insn "br_false"
917  [(set (pc)
918	(if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
919			  (const_int 0))
920		      (label_ref (match_operand 1 "" ""))
921		      (pc)))]
922  ""
923  "%J0\\tbra\\t%l1;"
924  [(set_attr "predicable" "no")])
925
926;; unified conditional branch
927(define_insn "br_true_uni"
928  [(set (pc) (if_then_else
929	(ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
930		       UNSPEC_BR_UNIFIED) (const_int 0))
931        (label_ref (match_operand 1 "" "")) (pc)))]
932  ""
933  "%j0\\tbra.uni\\t%l1;"
934  [(set_attr "predicable" "no")])
935
936(define_insn "br_false_uni"
937  [(set (pc) (if_then_else
938	(eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
939		       UNSPEC_BR_UNIFIED) (const_int 0))
940        (label_ref (match_operand 1 "" "")) (pc)))]
941  ""
942  "%J0\\tbra.uni\\t%l1;"
943  [(set_attr "predicable" "no")])
944
945(define_expand "cbranch<mode>4"
946  [(set (pc)
947	(if_then_else (match_operator 0 "nvptx_comparison_operator"
948		       [(match_operand:HSDIM 1 "nvptx_register_operand" "")
949			(match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
950		      (label_ref (match_operand 3 "" ""))
951		      (pc)))]
952  ""
953{
954  rtx t = nvptx_expand_compare (operands[0]);
955  operands[0] = t;
956  operands[1] = XEXP (t, 0);
957  operands[2] = XEXP (t, 1);
958})
959
960(define_expand "cbranch<mode>4"
961  [(set (pc)
962	(if_then_else (match_operator 0 "nvptx_float_comparison_operator"
963		       [(match_operand:SDFM 1 "nvptx_register_operand" "")
964			(match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
965		      (label_ref (match_operand 3 "" ""))
966		      (pc)))]
967  ""
968{
969  rtx t = nvptx_expand_compare (operands[0]);
970  operands[0] = t;
971  operands[1] = XEXP (t, 0);
972  operands[2] = XEXP (t, 1);
973})
974
975(define_expand "cbranchbi4"
976  [(set (pc)
977	(if_then_else (match_operator 0 "predicate_operator"
978		       [(match_operand:BI 1 "nvptx_register_operand" "")
979			(match_operand:BI 2 "const0_operand" "")])
980		      (label_ref (match_operand 3 "" ""))
981		      (pc)))]
982  ""
983  "")
984
985;; Conditional stores
986
987(define_insn "setcc<mode>_from_bi"
988  [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
989	(ne:QHSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
990		   (const_int 0)))]
991  ""
992  "%.\\tselp%t0\\t%0, 1, 0, %1;")
993
994(define_insn "*setcc<mode>_from_not_bi"
995  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
996	(eq:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
997		   (const_int 0)))]
998  ""
999  "%.\\tselp%t0\\t%0, 0, 1, %1;")
1000
1001(define_insn "extendbi<mode>2"
1002  [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
1003	(sign_extend:QHSDIM
1004	 (match_operand:BI 1 "nvptx_register_operand" "R")))]
1005  ""
1006  "%.\\tselp%t0\\t%0, -1, 0, %1;")
1007
1008(define_insn "zero_extendbi<mode>2"
1009  [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
1010	(zero_extend:QHSDIM
1011	 (match_operand:BI 1 "nvptx_register_operand" "R")))]
1012  ""
1013  "%.\\tselp%t0\\t%0, 1, 0, %1;")
1014
1015(define_insn "sel_true<mode>"
1016  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1017	(if_then_else:HSDIM
1018	  (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1019	  (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
1020	  (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
1021  ""
1022  "%.\\tselp%t0\\t%0, %2, %3, %1;")
1023
1024(define_insn "sel_true<mode>"
1025  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1026	(if_then_else:SDFM
1027	  (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1028	  (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1029	  (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1030  ""
1031  "%.\\tselp%t0\\t%0, %2, %3, %1;")
1032
1033(define_insn "sel_false<mode>"
1034  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1035	(if_then_else:HSDIM
1036	  (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1037	  (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
1038	  (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
1039  ""
1040  "%.\\tselp%t0\\t%0, %3, %2, %1;")
1041
1042(define_insn "sel_false<mode>"
1043  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1044	(if_then_else:SDFM
1045	  (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1046	  (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1047	  (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1048  ""
1049  "%.\\tselp%t0\\t%0, %3, %2, %1;")
1050
1051(define_code_iterator eqne [eq ne])
1052
1053;; Split negation of a predicate into a conditional move.
1054(define_insn_and_split "*selp<mode>_neg_<code>"
1055  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1056	(neg:HSDIM (eqne:HSDIM
1057		     (match_operand:BI 1 "nvptx_register_operand" "R")
1058		     (const_int 0))))]
1059  ""
1060  "#"
1061  "&& 1"
1062  [(set (match_dup 0)
1063	(if_then_else:HSDIM
1064	  (eqne (match_dup 1) (const_int 0))
1065	  (const_int -1)
1066	  (const_int 0)))])
1067
1068;; Split bitwise not of a predicate into a conditional move.
1069(define_insn_and_split "*selp<mode>_not_<code>"
1070  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1071	(not:HSDIM (eqne:HSDIM
1072		     (match_operand:BI 1 "nvptx_register_operand" "R")
1073		     (const_int 0))))]
1074  ""
1075  "#"
1076  "&& 1"
1077  [(set (match_dup 0)
1078	(if_then_else:HSDIM
1079	  (eqne (match_dup 1) (const_int 0))
1080	  (const_int -2)
1081	  (const_int -1)))])
1082
1083(define_insn "*setcc_int<mode>"
1084  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1085	(neg:SI
1086	  (match_operator:SI 1 "nvptx_comparison_operator"
1087	    [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
1088	     (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))]
1089  ""
1090  "%.\\tset%t0%c1\\t%0, %2, %3;")
1091
1092(define_insn "*setcc_int<mode>"
1093  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1094	(neg:SI
1095	  (match_operator:SI 1 "nvptx_float_comparison_operator"
1096	    [(match_operand:SDFM 2 "nvptx_register_operand" "R")
1097	     (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))]
1098  ""
1099  "%.\\tset%t0%c1\\t%0, %2, %3;")
1100
1101(define_insn "setcc_float<mode>"
1102  [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1103	(match_operator:SF 1 "nvptx_comparison_operator"
1104	   [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
1105	    (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
1106  ""
1107  "%.\\tset%t0%c1\\t%0, %2, %3;")
1108
1109(define_insn "setcc_float<mode>"
1110  [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1111	(match_operator:SF 1 "nvptx_float_comparison_operator"
1112	   [(match_operand:SDFM 2 "nvptx_register_operand" "R")
1113	    (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
1114  ""
1115  "%.\\tset%t0%c1\\t%0, %2, %3;")
1116
1117(define_expand "cstore<mode>4"
1118  [(set (match_operand:SI 0 "nvptx_register_operand")
1119	(match_operator:SI 1 "nvptx_comparison_operator"
1120	  [(match_operand:HSDIM 2 "nvptx_register_operand")
1121	   (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
1122  ""
1123{
1124  rtx reg = gen_reg_rtx (BImode);
1125  rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1126			    operands[2], operands[3]);
1127  emit_move_insn (reg, cmp);
1128  emit_insn (gen_setccsi_from_bi (operands[0], reg));
1129  DONE;
1130})
1131
1132(define_expand "cstore<mode>4"
1133  [(set (match_operand:SI 0 "nvptx_register_operand")
1134	(match_operator:SI 1 "nvptx_float_comparison_operator"
1135	  [(match_operand:SDFM 2 "nvptx_register_operand")
1136	   (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
1137  ""
1138{
1139  rtx reg = gen_reg_rtx (BImode);
1140  rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1141			    operands[2], operands[3]);
1142  emit_move_insn (reg, cmp);
1143  emit_insn (gen_setccsi_from_bi (operands[0], reg));
1144  DONE;
1145})
1146
1147(define_expand "cstorehf4"
1148  [(set (match_operand:SI 0 "nvptx_register_operand")
1149	(match_operator:SI 1 "nvptx_float_comparison_operator"
1150	  [(match_operand:HF 2 "nvptx_register_operand")
1151	   (match_operand:HF 3 "nvptx_nonmemory_operand")]))]
1152  "TARGET_SM53"
1153{
1154  rtx reg = gen_reg_rtx (BImode);
1155  rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1156			    operands[2], operands[3]);
1157  emit_move_insn (reg, cmp);
1158  emit_insn (gen_setccsi_from_bi (operands[0], reg));
1159  DONE;
1160})
1161
1162;; Calls
1163
1164(define_insn "call_insn_<mode>"
1165  [(match_parallel 2 "call_operation"
1166    [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs"))
1167	   (match_operand 1))])]
1168  ""
1169{
1170  return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
1171})
1172
1173(define_insn "call_value_insn_<mode>"
1174  [(match_parallel 3 "call_operation"
1175    [(set (match_operand 0 "nvptx_register_operand" "=R")
1176	  (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs"))
1177		(match_operand 2)))])]
1178  ""
1179{
1180  return nvptx_output_call_insn (insn, operands[0], operands[1]);
1181})
1182
1183(define_expand "call"
1184 [(match_operand 0 "" "")]
1185 ""
1186{
1187  nvptx_expand_call (NULL_RTX, operands[0]);
1188  DONE;
1189})
1190
1191(define_expand "call_value"
1192  [(match_operand 0 "" "")
1193   (match_operand 1 "" "")]
1194 ""
1195{
1196  nvptx_expand_call (operands[0], operands[1]);
1197  DONE;
1198})
1199
1200;; Floating point arithmetic.
1201
1202(define_insn "add<mode>3"
1203  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1204	(plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1205		   (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1206  ""
1207  "%.\\tadd%t0\\t%0, %1, %2;")
1208
1209(define_insn "sub<mode>3"
1210  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1211	(minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1212		    (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
1213  ""
1214  "%.\\tsub%t0\\t%0, %1, %2;")
1215
1216(define_insn "mul<mode>3"
1217  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1218	(mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1219		   (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1220  ""
1221  "%.\\tmul%t0\\t%0, %1, %2;")
1222
1223(define_insn "fma<mode>4"
1224  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1225	(fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1226		  (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1227		  (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1228  ""
1229  "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
1230
1231(define_insn "*recip<mode>2"
1232  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1233	(div:SDFM
1234	  (match_operand:SDFM 2 "const_double_operand" "F")
1235	  (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1236  "CONST_DOUBLE_P (operands[2])
1237   && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)"
1238  "%.\\trcp%#%t0\\t%0, %1;")
1239
1240(define_insn "div<mode>3"
1241  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1242	(div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1243		  (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1244  ""
1245  "%.\\tdiv%#%t0\\t%0, %1, %2;")
1246
1247(define_insn "copysign<mode>3"
1248  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1249	(unspec:SDFM [(match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF")
1250		      (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")]
1251		      UNSPEC_COPYSIGN))]
1252  ""
1253  "%.\\tcopysign%t0\\t%0, %2, %1;")
1254
1255(define_insn "smin<mode>3"
1256  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1257	(smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1258		    (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1259  ""
1260  "%.\\tmin%t0\\t%0, %1, %2;")
1261
1262(define_insn "smax<mode>3"
1263  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1264	(smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1265		    (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1266  ""
1267  "%.\\tmax%t0\\t%0, %1, %2;")
1268
1269(define_insn "abs<mode>2"
1270  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1271	(abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1272  ""
1273  "%.\\tabs%t0\\t%0, %1;")
1274
1275(define_insn "neg<mode>2"
1276  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1277	(neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1278  ""
1279  "%.\\tneg%t0\\t%0, %1;")
1280
1281(define_insn "sqrt<mode>2"
1282  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1283	(sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1284  ""
1285  "%.\\tsqrt%#%t0\\t%0, %1;")
1286
1287(define_expand "sincossf3"
1288  [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1289	(unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
1290	           UNSPEC_COS))
1291   (set (match_operand:SF 1 "nvptx_register_operand" "=R")
1292	(unspec:SF [(match_dup 2)] UNSPEC_SIN))]
1293  "flag_unsafe_math_optimizations"
1294{
1295  operands[2] = make_safe_from (operands[2], operands[0]);
1296})
1297
1298(define_insn "sinsf2"
1299  [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1300	(unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1301		   UNSPEC_SIN))]
1302  "flag_unsafe_math_optimizations"
1303  "%.\\tsin.approx%t0\\t%0, %1;")
1304
1305(define_insn "cossf2"
1306  [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1307	(unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1308		   UNSPEC_COS))]
1309  "flag_unsafe_math_optimizations"
1310  "%.\\tcos.approx%t0\\t%0, %1;")
1311
1312(define_insn "log2sf2"
1313  [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1314	(unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1315		   UNSPEC_LOG2))]
1316  "flag_unsafe_math_optimizations"
1317  "%.\\tlg2.approx%t0\\t%0, %1;")
1318
1319(define_insn "exp2sf2"
1320  [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1321	(unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1322		   UNSPEC_EXP2))]
1323  "flag_unsafe_math_optimizations"
1324  "%.\\tex2.approx%t0\\t%0, %1;")
1325
1326(define_insn "setcc_isinf<mode>"
1327  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
1328	(unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1329		   UNSPEC_ISINF))]
1330  ""
1331  "%.\\ttestp.infinite%t1\\t%0, %1;")
1332
1333(define_expand "isinf<mode>2"
1334  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1335	(unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1336		   UNSPEC_ISINF))]
1337  ""
1338{
1339  rtx pred = gen_reg_rtx (BImode);
1340  emit_insn (gen_setcc_isinf<mode> (pred, operands[1]));
1341  emit_insn (gen_setccsi_from_bi (operands[0], pred));
1342  DONE;
1343})
1344
1345;; HFmode floating point arithmetic.
1346
1347(define_insn "addhf3"
1348  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1349	(plus:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1350		 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1351  "TARGET_SM53"
1352  "%.\\tadd.f16\\t%0, %1, %2;")
1353
1354(define_insn "subhf3"
1355  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1356	(minus:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1357		  (match_operand:HF 2 "nvptx_register_operand" "R")))]
1358  "TARGET_SM53"
1359  "%.\\tsub.f16\\t%0, %1, %2;")
1360
1361(define_insn "mulhf3"
1362  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1363	(mult:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1364		 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1365  "TARGET_SM53"
1366  "%.\\tmul.f16\\t%0, %1, %2;")
1367
1368(define_insn "fmahf4"
1369  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1370	(fma:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1371		(match_operand:HF 2 "nvptx_nonmemory_operand" "RF")
1372		(match_operand:HF 3 "nvptx_nonmemory_operand" "RF")))]
1373  "TARGET_SM53"
1374  "%.\\tfma%#.f16\\t%0, %1, %2, %3;")
1375
1376(define_insn "neghf2"
1377  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1378	(neg:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
1379  ""
1380  "%.\\txor.b16\\t%0, %1, -32768;")
1381
1382(define_insn "abshf2"
1383  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1384	(abs:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
1385  ""
1386  "%.\\tand.b16\\t%0, %1, 32767;")
1387
1388(define_insn "exp2hf2"
1389  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1390	(unspec:HF [(match_operand:HF 1 "nvptx_register_operand" "R")]
1391		   UNSPEC_EXP2))]
1392  "TARGET_SM75 && flag_unsafe_math_optimizations"
1393  "%.\\tex2.approx.f16\\t%0, %1;")
1394
1395(define_insn "tanh<mode>2"
1396  [(set (match_operand:HSFM 0 "nvptx_register_operand" "=R")
1397	(unspec:HSFM [(match_operand:HSFM 1 "nvptx_register_operand" "R")]
1398		     UNSPEC_TANH))]
1399  "TARGET_SM75 && flag_unsafe_math_optimizations"
1400  "%.\\ttanh.approx%t0\\t%0, %1;")
1401
1402;; HFmode floating point arithmetic.
1403
1404(define_insn "sminhf3"
1405  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1406	(smin:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1407		 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1408  "TARGET_SM80"
1409  "%.\\tmin.f16\\t%0, %1, %2;")
1410
1411(define_insn "smaxhf3"
1412  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1413	(smax:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1414		 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1415  "TARGET_SM80"
1416  "%.\\tmax.f16\\t%0, %1, %2;")
1417
1418;; Conversions involving floating point
1419
1420(define_insn "extendsfdf2"
1421  [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
1422	(float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
1423  ""
1424  "%.\\tcvt%t0%t1\\t%0, %1;")
1425
1426(define_insn "truncdfsf2"
1427  [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1428	(float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
1429  ""
1430  "%.\\tcvt%#%t0%t1\\t%0, %1;")
1431
1432(define_insn "floatunssi<mode>2"
1433  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1434	(unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1435  ""
1436  "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1437
1438(define_insn "floatsi<mode>2"
1439  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1440	(float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1441  ""
1442  "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1443
1444(define_insn "floatunsdi<mode>2"
1445  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1446	(unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1447  ""
1448  "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1449
1450(define_insn "floatdi<mode>2"
1451  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1452	(float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1453  ""
1454  "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1455
1456(define_insn "fixuns_trunc<mode>si2"
1457  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1458	(unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1459  ""
1460  "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1461
1462(define_insn "fix_trunc<mode>si2"
1463  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1464	(fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1465  ""
1466  "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1467
1468(define_insn "fixuns_trunc<mode>di2"
1469  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1470	(unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1471  ""
1472  "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1473
1474(define_insn "fix_trunc<mode>di2"
1475  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1476	(fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1477  ""
1478  "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1479
1480(define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
1481			    UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
1482(define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
1483			     (UNSPEC_FPINT_BTRUNC "btrunc")
1484			     (UNSPEC_FPINT_CEIL "ceil")
1485			     (UNSPEC_FPINT_NEARBYINT "nearbyint")])
1486(define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1487				     (UNSPEC_FPINT_BTRUNC ".rzi")
1488				     (UNSPEC_FPINT_CEIL ".rpi")
1489				     (UNSPEC_FPINT_NEARBYINT "%#i")])
1490
1491(define_insn "<FPINT:fpint_name><SDFM:mode>2"
1492  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1493	(unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1494		     FPINT))]
1495  ""
1496  "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
1497
1498(define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
1499(define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
1500			     (UNSPEC_FPINT_CEIL "lceil")])
1501(define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1502				     (UNSPEC_FPINT_CEIL ".rpi")])
1503
1504(define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
1505  [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1506	(unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1507		     FPINT2))]
1508  ""
1509  "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
1510
1511(define_insn "extendhf<mode>2"
1512  [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1513	(float_extend:SDFM (match_operand:HF 1 "nvptx_register_operand" "R")))]
1514  "TARGET_SM53"
1515  "%.\\tcvt%t0%t1\\t%0, %1;")
1516
1517(define_insn "trunc<mode>hf2"
1518  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1519	(float_truncate:HF (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1520  "TARGET_SM53"
1521  "%.\\tcvt%#%t0%t1\\t%0, %1;")
1522
1523;; Vector operations
1524
1525(define_insn "*vec_set<mode>_0"
1526  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1527	(vec_merge:VECIM
1528	  (vec_duplicate:VECIM
1529	    (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1530	  (match_dup 0)
1531	  (const_int 1)))]
1532  ""
1533  "%.\\tmov%t1\\t%0.x, %1;")
1534
1535(define_insn "*vec_set<mode>_1"
1536  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1537	(vec_merge:VECIM
1538	  (vec_duplicate:VECIM
1539	    (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1540	  (match_dup 0)
1541	  (const_int 2)))]
1542  ""
1543  "%.\\tmov%t1\\t%0.y, %1;")
1544
1545(define_insn "*vec_set<mode>_2"
1546  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1547	(vec_merge:VECIM
1548	  (vec_duplicate:VECIM
1549	    (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1550	  (match_dup 0)
1551	  (const_int 4)))]
1552  ""
1553  "%.\\tmov%t1\\t%0.z, %1;")
1554
1555(define_insn "*vec_set<mode>_3"
1556  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1557	(vec_merge:VECIM
1558	  (vec_duplicate:VECIM
1559	    (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1560	  (match_dup 0)
1561	  (const_int 8)))]
1562  ""
1563  "%.\\tmov%t1\\t%0.w, %1;")
1564
1565(define_expand "vec_set<mode>"
1566  [(match_operand:VECIM 0 "nvptx_register_operand")
1567   (match_operand:<VECELEM> 1 "nvptx_register_operand")
1568   (match_operand:SI 2 "nvptx_vector_index_operand")]
1569  ""
1570{
1571  enum machine_mode mode = GET_MODE (operands[0]);
1572  int mask = 1 << INTVAL (operands[2]);
1573  rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]);
1574  tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask));
1575  emit_insn (gen_rtx_SET (operands[0], tmp));
1576  DONE;
1577})
1578
1579(define_insn "vec_extract<mode><Vecelem>"
1580  [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R")
1581	(vec_select:<VECELEM>
1582	  (match_operand:VECIM 1 "nvptx_register_operand" "R")
1583	  (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))]
1584  ""
1585{
1586  static const char *const asms[4] = {
1587    "%.\\tmov%t0\\t%0, %1.x;",
1588    "%.\\tmov%t0\\t%0, %1.y;",
1589    "%.\\tmov%t0\\t%0, %1.z;",
1590    "%.\\tmov%t0\\t%0, %1.w;"
1591  };
1592  return asms[INTVAL (operands[2])];
1593})
1594
1595;; Miscellaneous
1596
1597(define_insn "nop"
1598  [(const_int 0)]
1599  ""
1600  "")
1601
1602(define_insn "exit"
1603  [(const_int 1)]
1604  ""
1605  "exit;")
1606
1607(define_insn "fake_nop"
1608  [(const_int 2)]
1609  ""
1610  "{
1611     .reg .u32 %%nop_src;
1612     .reg .u32 %%nop_dst;
1613     mov.u32 %%nop_dst, %%nop_src;
1614   }")
1615
1616(define_insn "return"
1617  [(return)]
1618  ""
1619{
1620  return nvptx_output_return ();
1621}
1622  [(set_attr "predicable" "no")])
1623
1624(define_expand "epilogue"
1625  [(clobber (const_int 0))]
1626  ""
1627{
1628  if (TARGET_SOFT_STACK)
1629    emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode,
1630						      SOFTSTACK_PREV_REGNUM)));
1631  emit_jump_insn (gen_return ());
1632  DONE;
1633})
1634
1635(define_expand "nonlocal_goto"
1636  [(match_operand 0 "" "")
1637   (match_operand 1 "" "")
1638   (match_operand 2 "" "")
1639   (match_operand 3 "" "")]
1640  ""
1641{
1642  sorry ("target cannot support nonlocal goto");
1643  emit_insn (gen_nop ());
1644  DONE;
1645})
1646
1647(define_expand "nonlocal_goto_receiver"
1648  [(const_int 0)]
1649  ""
1650{
1651  sorry ("target cannot support nonlocal goto");
1652})
1653
1654(define_expand "allocate_stack"
1655  [(match_operand 0 "nvptx_register_operand")
1656   (match_operand 1 "nvptx_register_operand")]
1657  ""
1658{
1659  if (TARGET_SOFT_STACK)
1660    {
1661      emit_move_insn (stack_pointer_rtx,
1662		      gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1663      emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
1664      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1665      DONE;
1666    }
1667  /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1668     only)  but notes it is not implemented.  The assembler emits a
1669     confused error message.  Issue a blunt one now instead.  */
1670  sorry ("target cannot support alloca");
1671  emit_insn (gen_nop ());
1672  DONE;
1673})
1674
1675(define_insn "@set_softstack_<mode>"
1676  [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
1677	   UNSPEC_SET_SOFTSTACK)]
1678  "TARGET_SOFT_STACK"
1679{
1680  return nvptx_output_set_softstack (REGNO (operands[0]));
1681})
1682
1683(define_expand "restore_stack_block"
1684  [(match_operand 0 "register_operand" "")
1685   (match_operand 1 "register_operand" "")]
1686  ""
1687{
1688  if (TARGET_SOFT_STACK)
1689    {
1690      emit_move_insn (operands[0], operands[1]);
1691      emit_insn (gen_set_softstack (Pmode, operands[0]));
1692    }
1693  DONE;
1694})
1695
1696(define_expand "restore_stack_function"
1697  [(match_operand 0 "register_operand" "")
1698   (match_operand 1 "register_operand" "")]
1699  ""
1700{
1701  DONE;
1702})
1703
1704(define_insn "trap"
1705  [(trap_if (const_int 1) (const_int 0))]
1706  ""
1707  "trap; exit;")
1708
1709(define_insn "trap_if_true"
1710  [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1711		(const_int 0))
1712	    (const_int 0))]
1713  ""
1714  "%j0 trap; %j0 exit;"
1715  [(set_attr "predicable" "no")])
1716
1717(define_insn "trap_if_false"
1718  [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1719		(const_int 0))
1720	    (const_int 0))]
1721  ""
1722  "%J0 trap; %J0 exit;"
1723  [(set_attr "predicable" "no")])
1724
1725(define_expand "ctrap<mode>4"
1726  [(trap_if (match_operator 0 "nvptx_comparison_operator"
1727			    [(match_operand:SDIM 1 "nvptx_register_operand")
1728			     (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1729	    (match_operand 3 "const0_operand"))]
1730  ""
1731{
1732  rtx t = nvptx_expand_compare (operands[0]);
1733  emit_insn (gen_trap_if_true (t));
1734  DONE;
1735})
1736
1737(define_insn "oacc_dim_size"
1738  [(set (match_operand:SI 0 "nvptx_register_operand" "")
1739	(unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1740		   UNSPEC_DIM_SIZE))]
1741  ""
1742{
1743  static const char *const asms[] =
1744{ /* Must match oacc_loop_levels ordering.  */
1745  "%.\\tmov.u32\\t%0, %%nctaid.x;",	/* gang */
1746  "%.\\tmov.u32\\t%0, %%ntid.y;",	/* worker */
1747  "%.\\tmov.u32\\t%0, %%ntid.x;",	/* vector */
1748};
1749  return asms[INTVAL (operands[1])];
1750})
1751
1752(define_insn "oacc_dim_pos"
1753  [(set (match_operand:SI 0 "nvptx_register_operand" "")
1754	(unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1755			    UNSPECV_DIM_POS))]
1756  ""
1757{
1758  static const char *const asms[] =
1759{ /* Must match oacc_loop_levels ordering.  */
1760  "%.\\tmov.u32\\t%0, %%ctaid.x;",	/* gang */
1761  "%.\\tmov.u32\\t%0, %%tid.y;",	/* worker */
1762  "%.\\tmov.u32\\t%0, %%tid.x;",	/* vector */
1763};
1764  return asms[INTVAL (operands[1])];
1765})
1766
1767(define_insn "nvptx_fork"
1768  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1769		       UNSPECV_FORK)]
1770  ""
1771  "// fork %0;"
1772  [(set_attr "predicable" "no")])
1773
1774(define_insn "nvptx_forked"
1775  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1776		       UNSPECV_FORKED)]
1777  ""
1778  "// forked %0;"
1779  [(set_attr "predicable" "no")])
1780
1781(define_insn "nvptx_joining"
1782  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1783		       UNSPECV_JOINING)]
1784  ""
1785  "// joining %0;"
1786  [(set_attr "predicable" "no")])
1787
1788(define_insn "nvptx_join"
1789  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1790		       UNSPECV_JOIN)]
1791  ""
1792  "// join %0;"
1793  [(set_attr "predicable" "no")])
1794
1795(define_expand "oacc_fork"
1796  [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1797        (match_operand:SI 1 "general_operand" ""))
1798   (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1799		        UNSPECV_FORKED)]
1800  ""
1801{
1802  if (operands[0] != const0_rtx)
1803    emit_move_insn (operands[0], operands[1]);
1804  nvptx_expand_oacc_fork (INTVAL (operands[2]));
1805  DONE;
1806})
1807
1808(define_expand "oacc_join"
1809  [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1810        (match_operand:SI 1 "general_operand" ""))
1811   (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1812		        UNSPECV_JOIN)]
1813  ""
1814{
1815  if (operands[0] != const0_rtx)
1816    emit_move_insn (operands[0], operands[1]);
1817  nvptx_expand_oacc_join (INTVAL (operands[2]));
1818  DONE;
1819})
1820
1821;; only 32-bit shuffles exist.
1822(define_insn "nvptx_shuffle<mode>"
1823  [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1824	(unspec:BITS
1825		[(match_operand:BITS 1 "nvptx_register_operand" "R")
1826		 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1827		 (match_operand:SI 3 "const_int_operand" "n")]
1828		  UNSPEC_SHUFFLE))]
1829  ""
1830  {
1831    if (TARGET_PTX_6_0)
1832      return "%.\\tshfl.sync%S3.b32\\t%0, %1, %2, 31, 0xffffffff;";
1833    else
1834      return "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;";
1835  })
1836
1837(define_insn "nvptx_vote_ballot"
1838  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1839	(unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1840		   UNSPEC_VOTE_BALLOT))]
1841  ""
1842  {
1843    if (TARGET_PTX_6_0)
1844      return "%.\\tvote.sync.ballot.b32\\t%0, %1, 0xffffffff;";
1845    else
1846      return "%.\\tvote.ballot.b32\\t%0, %1;";
1847  })
1848
1849;; Patterns for OpenMP SIMD-via-SIMT lowering
1850
1851(define_insn "@omp_simt_enter_<mode>"
1852  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1853	(unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")
1854			    (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")]
1855			   UNSPECV_SIMT_ENTER))]
1856  ""
1857{
1858  return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1859})
1860
1861(define_expand "omp_simt_enter"
1862  [(match_operand 0 "nvptx_register_operand" "=R")
1863   (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1864   (match_operand 2 "const_int_operand" "n")]
1865  ""
1866{
1867  if (!CONST_INT_P (operands[1]))
1868    cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1869  else
1870    cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1871					  cfun->machine->simt_stack_size);
1872  cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1873					 cfun->machine->simt_stack_align);
1874  cfun->machine->has_simtreg = true;
1875  emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2]));
1876  DONE;
1877})
1878
1879(define_expand "omp_simt_exit"
1880  [(match_operand 0 "nvptx_register_operand" "R")]
1881  ""
1882{
1883  emit_insn (gen_omp_simt_exit (Pmode, operands[0]));
1884  if (TARGET_PTX_6_0)
1885    emit_insn (gen_nvptx_warpsync ());
1886  else
1887    emit_insn (gen_nvptx_uniform_warp_check ());
1888  DONE;
1889})
1890
1891(define_insn "@omp_simt_exit_<mode>"
1892  [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")]
1893		    UNSPECV_SIMT_EXIT)]
1894  ""
1895{
1896  return nvptx_output_simt_exit (operands[0]);
1897})
1898
1899;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1900(define_insn "omp_simt_lane"
1901  [(set (match_operand:SI 0 "nvptx_register_operand" "")
1902	(unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1903  ""
1904  "%.\\tmov.u32\\t%0, %%laneid;")
1905
1906;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1907;; place a compiler barrier to disallow unrolling/peeling the containing loop
1908(define_expand "omp_simt_ordered"
1909  [(match_operand:SI 0 "nvptx_register_operand" "=R")
1910   (match_operand:SI 1 "nvptx_register_operand" "R")]
1911  ""
1912{
1913  emit_move_insn (operands[0], operands[1]);
1914  emit_insn (gen_nvptx_nounroll ());
1915  DONE;
1916})
1917
1918;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1919;; across lanes
1920(define_expand "omp_simt_xchg_bfly"
1921  [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R")
1922   (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R")
1923   (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1924  ""
1925{
1926  emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1927				SHUFFLE_BFLY));
1928  DONE;
1929})
1930
1931;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1932;; from lane given by index in operand 2 to operand 0 in all lanes
1933(define_expand "omp_simt_xchg_idx"
1934  [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R")
1935   (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R")
1936   (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1937  ""
1938{
1939  emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1940				SHUFFLE_IDX));
1941  DONE;
1942})
1943
1944;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1945;; set operand 0 to zero iff all lanes supply zero in operand 1
1946(define_expand "omp_simt_vote_any"
1947  [(match_operand:SI 0 "nvptx_register_operand" "=R")
1948   (match_operand:SI 1 "nvptx_register_operand" "R")]
1949  ""
1950{
1951  rtx pred = gen_reg_rtx (BImode);
1952  emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1953  emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1954  DONE;
1955})
1956
1957;; Implement IFN_GOMP_SIMT_LAST_LANE:
1958;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1959(define_expand "omp_simt_last_lane"
1960  [(match_operand:SI 0 "nvptx_register_operand" "=R")
1961   (match_operand:SI 1 "nvptx_register_operand" "R")]
1962  ""
1963{
1964  rtx pred = gen_reg_rtx (BImode);
1965  rtx tmp = gen_reg_rtx (SImode);
1966  emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1967  emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1968  emit_insn (gen_ctzsi2 (operands[0], tmp));
1969  DONE;
1970})
1971
1972;; extract parts of a 64 bit object into 2 32-bit ints
1973(define_insn "unpack<mode>si2"
1974  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1975        (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1976		    (const_int 0)] UNSPEC_BIT_CONV))
1977   (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1978        (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1979  ""
1980  "%.\\tmov.b64\\t{%0,%1}, %2;")
1981
1982;; pack 2 32-bit ints into a 64 bit object
1983(define_insn "packsi<mode>2"
1984  [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1985        (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1986		      (match_operand:SI 2 "nvptx_register_operand" "R")]
1987		    UNSPEC_BIT_CONV))]
1988  ""
1989  "%.\\tmov.b64\\t%0, {%1,%2};")
1990
1991;; Atomic insns.
1992
1993(define_expand "atomic_compare_and_swap<mode>"
1994  [(match_operand:SI 0 "nvptx_register_operand")	;; bool success output
1995   (match_operand:SDIM 1 "nvptx_register_operand")	;; oldval output
1996   (match_operand:SDIM 2 "memory_operand")		;; memory
1997   (match_operand:SDIM 3 "nvptx_register_operand")	;; expected input
1998   (match_operand:SDIM 4 "nvptx_register_operand")	;; newval input
1999   (match_operand:SI 5 "const_int_operand")		;; is_weak
2000   (match_operand:SI 6 "const_int_operand")		;; success model
2001   (match_operand:SI 7 "const_int_operand")]		;; failure model
2002  ""
2003{
2004  if (nvptx_mem_local_p (operands[2]))
2005    emit_insn (gen_atomic_compare_and_swap<mode>_1_local
2006		(operands[1], operands[2], operands[3], operands[4],
2007		 operands[6]));
2008  else
2009    emit_insn (gen_atomic_compare_and_swap<mode>_1
2010		(operands[1], operands[2], operands[3], operands[4],
2011		 operands[6]));
2012
2013  rtx cond = gen_reg_rtx (BImode);
2014  emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
2015  emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
2016  DONE;
2017})
2018
2019(define_insn "atomic_compare_and_swap<mode>_1_local"
2020  [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2021	(unspec_volatile:SDIM
2022	  [(match_operand:SDIM 1 "memory_operand" "+m")
2023	   (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
2024	   (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
2025	   (match_operand:SI 4 "const_int_operand")]
2026	  UNSPECV_CAS_LOCAL))
2027   (set (match_dup 1)
2028	(unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))]
2029  ""
2030  {
2031	output_asm_insn ("{", NULL);
2032	output_asm_insn ("\\t"	      ".reg.pred"  "\\t" "%%eq_p;", NULL);
2033	output_asm_insn ("\\t"	      ".reg%t0"	   "\\t" "%%val;", operands);
2034	output_asm_insn ("\\t"	      "ld%A1%t0"   "\\t" "%%val,%1;", operands);
2035	output_asm_insn ("\\t"	      "setp.eq%t0" "\\t" "%%eq_p, %%val, %2;",
2036			 operands);
2037	output_asm_insn ("@%%eq_p\\t" "st%A1%t0"   "\\t" "%1,%3;", operands);
2038	output_asm_insn ("\\t"	      "mov%t0"	   "\\t" "%0,%%val;", operands);
2039	output_asm_insn ("}", NULL);
2040	return "";
2041  }
2042  [(set_attr "predicable" "no")])
2043
2044(define_insn "atomic_compare_and_swap<mode>_1"
2045  [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2046	(unspec_volatile:SDIM
2047	  [(match_operand:SDIM 1 "memory_operand" "+m")
2048	   (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
2049	   (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
2050	   (match_operand:SI 4 "const_int_operand")]
2051	  UNSPECV_CAS))
2052   (set (match_dup 1)
2053	(unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
2054  ""
2055  {
2056    const char *t
2057      = "%.\\tatom%A1.cas.b%T0\\t%x0, %1, %2, %3;";
2058    return nvptx_output_atomic_insn (t, operands, 1, 4);
2059  }
2060  [(set_attr "atomic" "true")])
2061
2062(define_insn "atomic_exchange<mode>"
2063  [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")	;; output
2064	(unspec_volatile:SDIM
2065	  [(match_operand:SDIM 1 "memory_operand" "+m")		;; memory
2066	   (match_operand:SI 3 "const_int_operand")]		;; model
2067	  UNSPECV_XCHG))
2068   (set (match_dup 1)
2069	(match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))]	;; input
2070  ""
2071  {
2072    if (nvptx_mem_local_p (operands[1]))
2073      {
2074	output_asm_insn ("{", NULL);
2075	output_asm_insn ("\\t"	 ".reg%t0"  "\\t" "%%val;", operands);
2076	output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2077	output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%2;", operands);
2078	output_asm_insn ("%.\\t" "mov%t0"   "\\t" "%0,%%val;", operands);
2079	output_asm_insn ("}", NULL);
2080	return "";
2081      }
2082    const char *t
2083      = "%.\tatom%A1.exch.b%T0\t%x0, %1, %2;";
2084    return nvptx_output_atomic_insn (t, operands, 1, 3);
2085  }
2086  [(set_attr "atomic" "true")])
2087
2088(define_expand "atomic_store<mode>"
2089  [(match_operand:SDIM 0 "memory_operand" "=m")		  ;; memory
2090   (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri")  ;; input
2091   (match_operand:SI 2 "const_int_operand")]		  ;; model
2092  ""
2093{
2094  struct address_info info;
2095  decompose_mem_address (&info, operands[0]);
2096  if (info.base != NULL && REG_P (*info.base)
2097      && REGNO_PTR_FRAME_P (REGNO (*info.base)))
2098    {
2099      emit_insn (gen_mov<mode> (operands[0], operands[1]));
2100      DONE;
2101    }
2102
2103  if (TARGET_SM70)
2104    {
2105       emit_insn (gen_nvptx_atomic_store_sm70<mode> (operands[0], operands[1],
2106						     operands[2]));
2107       DONE;
2108    }
2109
2110  bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]);
2111  if (!maybe_shared_p)
2112    /* Fall back to expand_atomic_store.  */
2113    FAIL;
2114
2115  emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1],
2116					   operands[2]));
2117  DONE;
2118})
2119
2120(define_insn "nvptx_atomic_store_sm70<mode>"
2121  [(set (match_operand:SDIM 0 "memory_operand" "+m")	      ;; memory
2122       (unspec_volatile:SDIM
2123	 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2124	  (match_operand:SI 2 "const_int_operand")]		;; model
2125	       UNSPECV_ST))]
2126  "TARGET_SM70"
2127  {
2128    const char *t
2129      = "%.\tst%A0.b%T0\t%0, %1;";
2130    return nvptx_output_atomic_insn (t, operands, 0, 2);
2131  }
2132  [(set_attr "atomic" "false")]) ;; Note: st is not an atomic insn.
2133
2134(define_insn "nvptx_atomic_store<mode>"
2135  [(set (match_operand:SDIM 0 "memory_operand" "+m")	      ;; memory
2136       (unspec_volatile:SDIM
2137	 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2138	  (match_operand:SI 2 "const_int_operand")]		;; model
2139	       UNSPECV_ST))]
2140  "!TARGET_SM70"
2141  {
2142    const char *t
2143      = "%.\tatom%A0.exch.b%T0\t_, %0, %1;";
2144    return nvptx_output_atomic_insn (t, operands, 0, 2);
2145  }
2146  [(set_attr "atomic" "true")])
2147
2148(define_insn "atomic_fetch_add<mode>"
2149  [(set (match_operand:SDIM 1 "memory_operand" "+m")
2150	(unspec_volatile:SDIM
2151	  [(plus:SDIM (match_dup 1)
2152		      (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
2153	   (match_operand:SI 3 "const_int_operand")]		;; model
2154	  UNSPECV_LOCK))
2155   (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2156	(match_dup 1))]
2157  ""
2158  {
2159    if (nvptx_mem_local_p (operands[1]))
2160      {
2161	output_asm_insn ("{", NULL);
2162	output_asm_insn ("\\t"	 ".reg%t0"  "\\t" "%%val;", operands);
2163	output_asm_insn ("\\t"	 ".reg%t0"  "\\t" "%%update;", operands);
2164	output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2165	output_asm_insn ("%.\\t" "add%t0"   "\\t" "%%update,%%val,%2;",
2166			 operands);
2167	output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2168	output_asm_insn ("%.\\t" "mov%t0"   "\\t" "%0,%%val;", operands);
2169	output_asm_insn ("}", NULL);
2170	return "";
2171      }
2172    const char *t
2173      = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
2174    return nvptx_output_atomic_insn (t, operands, 1, 3);
2175  }
2176  [(set_attr "atomic" "true")])
2177
2178(define_insn "atomic_fetch_addsf"
2179  [(set (match_operand:SF 1 "memory_operand" "+m")
2180	(unspec_volatile:SF
2181	 [(plus:SF (match_dup 1)
2182		   (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
2183	   (match_operand:SI 3 "const_int_operand")]		;; model
2184	  UNSPECV_LOCK))
2185   (set (match_operand:SF 0 "nvptx_register_operand" "=R")
2186	(match_dup 1))]
2187  ""
2188  {
2189    if (nvptx_mem_local_p (operands[1]))
2190      {
2191	output_asm_insn ("{", NULL);
2192	output_asm_insn ("\\t"	 ".reg%t0"  "\\t" "%%val;", operands);
2193	output_asm_insn ("\\t"	 ".reg%t0"  "\\t" "%%update;", operands);
2194	output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2195	output_asm_insn ("%.\\t" "add%t0"   "\\t" "%%update,%%val,%2;",
2196			 operands);
2197	output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2198	output_asm_insn ("%.\\t" "mov%t0"   "\\t" "%0,%%val;", operands);
2199	output_asm_insn ("}", NULL);
2200	return "";
2201      }
2202    const char *t
2203      = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
2204    return nvptx_output_atomic_insn (t, operands, 1, 3);
2205  }
2206  [(set_attr "atomic" "true")])
2207
2208(define_insn "atomic_fetch_<logic><mode>"
2209  [(set (match_operand:SDIM 1 "memory_operand" "+m")
2210	(unspec_volatile:SDIM
2211	  [(any_logic:SDIM (match_dup 1)
2212			   (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
2213	   (match_operand:SI 3 "const_int_operand")]		;; model
2214	  UNSPECV_LOCK))
2215   (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2216	(match_dup 1))]
2217  "<MODE>mode == SImode || TARGET_SM35"
2218  {
2219    if (nvptx_mem_local_p (operands[1]))
2220      {
2221	output_asm_insn ("{", NULL);
2222	output_asm_insn ("\\t"	 ".reg.b%T0"    "\\t" "%%val;", operands);
2223	output_asm_insn ("\\t"	 ".reg.b%T0"    "\\t" "%%update;", operands);
2224	output_asm_insn ("%.\\t" "ld%A1%t0"     "\\t" "%%val,%1;", operands);
2225	output_asm_insn ("%.\\t" "<logic>.b%T0" "\\t" "%%update,%%val,%2;",
2226			 operands);
2227	output_asm_insn ("%.\\t" "st%A1%t0"     "\\t" "%1,%%update;", operands);
2228	output_asm_insn ("%.\\t" "mov%t0"       "\\t" "%0,%%val;", operands);
2229	output_asm_insn ("}", NULL);
2230	return "";
2231      }
2232    const char *t
2233      = "%.\\tatom%A1.<logic>.b%T0\\t%x0, %1, %2;";
2234    return nvptx_output_atomic_insn (t, operands, 1, 3);
2235  }
2236
2237  [(set_attr "atomic" "true")])
2238
2239(define_expand "atomic_test_and_set"
2240  [(match_operand:SI 0 "nvptx_register_operand")	;; bool success output
2241   (match_operand:QI 1 "memory_operand")		;; memory
2242   (match_operand:SI 2 "const_int_operand")]		;; model
2243  ""
2244{
2245  rtx libfunc;
2246  rtx addr;
2247  libfunc = init_one_libfunc ("__atomic_test_and_set_1");
2248  addr = convert_memory_address (ptr_mode, XEXP (operands[1], 0));
2249  emit_library_call_value (libfunc, operands[0], LCT_NORMAL, SImode,
2250			  addr, ptr_mode,
2251			  operands[2], SImode);
2252  DONE;
2253})
2254
2255(define_insn "nvptx_barsync"
2256  [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
2257		     (match_operand:SI 1 "const_int_operand")]
2258		    UNSPECV_BARSYNC)]
2259  ""
2260  {
2261    if (INTVAL (operands[1]) == 0)
2262      return (TARGET_PTX_6_0
2263	      ? "\\tbarrier.sync.aligned\\t%0;"
2264	      : "\\tbar.sync\\t%0;");
2265    else
2266      return (TARGET_PTX_6_0
2267	      ? "\\tbarrier.sync\\t%0, %1;"
2268	      : "\\tbar.sync\\t%0, %1;");
2269  }
2270  [(set_attr "predicable" "no")])
2271
2272(define_insn "nvptx_warpsync"
2273  [(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)]
2274  "TARGET_PTX_6_0"
2275  "%.\\tbar.warp.sync\\t0xffffffff;")
2276
2277(define_insn "nvptx_uniform_warp_check"
2278  [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)]
2279  ""
2280  {
2281    const char *insns[] = {
2282      "{",
2283      "\\t"		  ".reg.b32"	    "\\t" "%%r_act;",
2284      "%.\\t"		  "vote.ballot.b32" "\\t" "%%r_act,1;",
2285      "\\t"		  ".reg.pred"	    "\\t" "%%r_do_abort;",
2286      "\\t"		  "mov.pred"	    "\\t" "%%r_do_abort,0;",
2287      "%.\\t"		  "setp.ne.b32"	    "\\t" "%%r_do_abort,%%r_act,"
2288						  "0xffffffff;",
2289      "@ %%r_do_abort\\t" "trap;",
2290      "@ %%r_do_abort\\t" "exit;",
2291      "}",
2292      NULL
2293    };
2294    for (const char **p = &insns[0]; *p != NULL; p++)
2295      output_asm_insn (*p, NULL);
2296    return "";
2297  })
2298
2299(define_expand "memory_barrier"
2300  [(set (match_dup 0)
2301	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
2302  ""
2303{
2304  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2305  MEM_VOLATILE_P (operands[0]) = 1;
2306})
2307
2308;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
2309;; (corresponding to cuda functions threadfence_block, threadfence and
2310;; threadfence_system).  For the insn memory_barrier we use membar.sys.  This
2311;; may be overconservative, but before using membar.gl instead we'll need to
2312;; explain in detail why it's safe to use.  For now, use membar.sys.
2313(define_insn "*memory_barrier"
2314  [(set (match_operand:BLK 0 "" "")
2315	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
2316  ""
2317  "\\tmembar.sys;"
2318  [(set_attr "predicable" "no")])
2319
2320(define_expand "nvptx_membar_cta"
2321  [(set (match_dup 0)
2322	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
2323  ""
2324{
2325  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2326  MEM_VOLATILE_P (operands[0]) = 1;
2327})
2328
2329(define_insn "*nvptx_membar_cta"
2330  [(set (match_operand:BLK 0 "" "")
2331	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
2332  ""
2333  "\\tmembar.cta;"
2334  [(set_attr "predicable" "no")])
2335
2336(define_expand "nvptx_membar_gl"
2337  [(set (match_dup 0)
2338	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))]
2339  ""
2340{
2341  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2342  MEM_VOLATILE_P (operands[0]) = 1;
2343})
2344
2345(define_insn "*nvptx_membar_gl"
2346  [(set (match_operand:BLK 0 "" "")
2347	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))]
2348  ""
2349  "\\tmembar.gl;"
2350  [(set_attr "predicable" "no")])
2351
2352(define_insn "nvptx_nounroll"
2353  [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
2354  ""
2355  "\\t.pragma \\\"nounroll\\\";"
2356  [(set_attr "predicable" "no")])
2357
2358(define_insn "nvptx_red_partition"
2359  [(set (match_operand:DI 0 "nonimmediate_operand" "=R")
2360	(unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")]
2361	 UNSPECV_RED_PART))]
2362  ""
2363  {
2364    return nvptx_output_red_partition (operands[0], operands[1]);
2365  }
2366  [(set_attr "predicable" "no")])
2367
2368;; Expand QI mode operations using SI mode instructions.
2369(define_code_iterator any_sbinary [plus minus smin smax])
2370(define_code_attr sbinary [(plus "add") (minus "sub") (smin "smin") (smax "smax")])
2371
2372(define_code_iterator any_ubinary [and ior xor umin umax])
2373(define_code_attr ubinary [(and "and") (ior "ior") (xor "xor") (umin "umin")
2374			   (umax "umax")])
2375
2376(define_code_iterator any_sunary [neg abs])
2377(define_code_attr sunary [(neg "neg") (abs "abs")])
2378
2379(define_code_iterator any_uunary [not])
2380(define_code_attr uunary [(not "one_cmpl")])
2381
2382(define_expand "<sbinary>qi3"
2383  [(set (match_operand:QI 0 "nvptx_register_operand")
2384	(any_sbinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")
2385			(match_operand:QI 2 "nvptx_nonmemory_operand")))]
2386  ""
2387{
2388  rtx reg = gen_reg_rtx (SImode);
2389  rtx op0 = convert_modes (SImode, QImode, operands[1], 0);
2390  rtx op1 = convert_modes (SImode, QImode, operands[2], 0);
2391  if (<CODE> == MINUS)
2392    op0 = force_reg (SImode, op0);
2393  emit_insn (gen_<sbinary>si3 (reg, op0, op1));
2394  emit_insn (gen_truncsiqi2 (operands[0], reg));
2395  DONE;
2396})
2397
2398(define_expand "<ubinary>qi3"
2399  [(set (match_operand:QI 0 "nvptx_register_operand")
2400	(any_ubinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")
2401			(match_operand:QI 2 "nvptx_nonmemory_operand")))]
2402  ""
2403{
2404  rtx reg = gen_reg_rtx (SImode);
2405  rtx op0 = convert_modes (SImode, QImode, operands[1], 1);
2406  rtx op1 = convert_modes (SImode, QImode, operands[2], 1);
2407  emit_insn (gen_<ubinary>si3 (reg, op0, op1));
2408  emit_insn (gen_truncsiqi2 (operands[0], reg));
2409  DONE;
2410})
2411
2412(define_expand "<sunary>qi2"
2413  [(set (match_operand:QI 0 "nvptx_register_operand")
2414	(any_sunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))]
2415  ""
2416{
2417  rtx reg = gen_reg_rtx (SImode);
2418  rtx op0 = convert_modes (SImode, QImode, operands[1], 0);
2419  emit_insn (gen_<sunary>si2 (reg, op0));
2420  emit_insn (gen_truncsiqi2 (operands[0], reg));
2421  DONE;
2422})
2423
2424(define_expand "<uunary>qi2"
2425  [(set (match_operand:QI 0 "nvptx_register_operand")
2426	(any_uunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))]
2427  ""
2428{
2429  rtx reg = gen_reg_rtx (SImode);
2430  rtx op0 = convert_modes (SImode, QImode, operands[1], 1);
2431  emit_insn (gen_<uunary>si2 (reg, op0));
2432  emit_insn (gen_truncsiqi2 (operands[0], reg));
2433  DONE;
2434})
2435
2436(define_expand "cstoreqi4"
2437  [(set (match_operand:SI 0 "nvptx_register_operand")
2438	(match_operator:SI 1 "nvptx_comparison_operator"
2439	  [(match_operand:QI 2 "nvptx_nonmemory_operand")
2440	   (match_operand:QI 3 "nvptx_nonmemory_operand")]))]
2441  ""
2442{
2443  rtx reg = gen_reg_rtx (BImode);
2444  enum rtx_code code = GET_CODE (operands[1]);
2445  int unsignedp = unsigned_condition_p (code);
2446  rtx op2 = convert_modes (SImode, QImode, operands[2], unsignedp);
2447  rtx op3 = convert_modes (SImode, QImode, operands[3], unsignedp);
2448  rtx cmp = gen_rtx_fmt_ee (code, SImode, op2, op3);
2449  emit_insn (gen_cmpsi (reg, cmp, op2, op3));
2450  emit_insn (gen_setccsi_from_bi (operands[0], reg));
2451  DONE;
2452})
2453
2454(define_insn "*ext_truncsi2_qi"
2455  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
2456	(sign_extend:SI
2457	 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))]
2458  ""
2459  "%.\\tcvt.s32.s8\\t%0, %1;")
2460
2461(define_insn "*zext_truncsi2_qi"
2462  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
2463	(zero_extend:SI
2464	 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))]
2465  ""
2466  "%.\\tcvt.u32.u8\\t%0, %1;")
2467