1;; Machine description for NVPTX. 2;; Copyright (C) 2014-2022 Free Software Foundation, Inc. 3;; Contributed by Bernd Schmidt <bernds@codesourcery.com> 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify 8;; it under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, 13;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15;; GNU General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21(define_c_enum "unspec" [ 22 UNSPEC_ARG_REG 23 24 UNSPEC_COPYSIGN 25 UNSPEC_LOG2 26 UNSPEC_EXP2 27 UNSPEC_SIN 28 UNSPEC_COS 29 UNSPEC_TANH 30 UNSPEC_ISINF 31 32 UNSPEC_FPINT_FLOOR 33 UNSPEC_FPINT_BTRUNC 34 UNSPEC_FPINT_CEIL 35 UNSPEC_FPINT_NEARBYINT 36 37 UNSPEC_BITREV 38 39 UNSPEC_ALLOCA 40 41 UNSPEC_SET_SOFTSTACK 42 43 UNSPEC_DIM_SIZE 44 45 UNSPEC_BIT_CONV 46 47 UNSPEC_VOTE_BALLOT 48 49 UNSPEC_LANEID 50 51 UNSPEC_SHUFFLE 52 UNSPEC_BR_UNIFIED 53]) 54 55(define_c_enum "unspecv" [ 56 UNSPECV_LOCK 57 UNSPECV_CAS 58 UNSPECV_CAS_LOCAL 59 UNSPECV_XCHG 60 UNSPECV_ST 61 UNSPECV_BARSYNC 62 UNSPECV_WARPSYNC 63 UNSPECV_UNIFORM_WARP_CHECK 64 UNSPECV_MEMBAR 65 UNSPECV_MEMBAR_CTA 66 UNSPECV_MEMBAR_GL 67 UNSPECV_DIM_POS 68 69 UNSPECV_FORK 70 UNSPECV_FORKED 71 UNSPECV_JOINING 72 UNSPECV_JOIN 73 74 UNSPECV_NOUNROLL 75 76 UNSPECV_SIMT_ENTER 77 UNSPECV_SIMT_EXIT 78 79 UNSPECV_RED_PART 80]) 81 82(define_attr "subregs_ok" "false,true" 83 (const_string "false")) 84 85(define_attr "atomic" "false,true" 86 (const_string "false")) 87 88;; The nvptx operand predicates, in general, don't permit subregs and 89;; only literal constants, which differ from the generic ones, which 90;; permit subregs and symbolc constants (as appropriate) 91(define_predicate "nvptx_register_operand" 92 (match_code "reg") 93{ 94 return register_operand (op, mode); 95}) 96 97(define_predicate "nvptx_register_or_complex_di_df_register_operand" 98 (ior (match_code "reg") 99 (match_code "concat")) 100{ 101 if (GET_CODE (op) == CONCAT) 102 return ((GET_MODE (op) == DCmode || GET_MODE (op) == CDImode) 103 && nvptx_register_operand (XEXP (op, 0), mode) 104 && nvptx_register_operand (XEXP (op, 1), mode)); 105 106 return nvptx_register_operand (op, mode); 107}) 108 109(define_predicate "nvptx_nonimmediate_operand" 110 (match_code "mem,reg") 111{ 112 return (REG_P (op) ? register_operand (op, mode) 113 : memory_operand (op, mode)); 114}) 115 116(define_predicate "nvptx_nonmemory_operand" 117 (match_code "reg,const_int,const_double") 118{ 119 return (REG_P (op) ? register_operand (op, mode) 120 : immediate_operand (op, mode)); 121}) 122 123(define_predicate "const0_operand" 124 (and (match_code "const_int") 125 (match_test "op == const0_rtx"))) 126 127;; True if this operator is valid for predication. 128(define_predicate "predicate_operator" 129 (match_code "eq,ne")) 130 131(define_predicate "ne_operator" 132 (match_code "ne")) 133 134(define_predicate "nvptx_comparison_operator" 135 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu")) 136 137(define_predicate "nvptx_float_comparison_operator" 138 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered")) 139 140(define_predicate "nvptx_vector_index_operand" 141 (and (match_code "const_int") 142 (match_test "UINTVAL (op) < 4"))) 143 144;; Test for a valid operand for a call instruction. 145(define_predicate "call_insn_operand" 146 (match_code "symbol_ref,reg") 147{ 148 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op); 149}) 150 151;; Return true if OP is a call with parallel USEs of the argument 152;; pseudos. 153(define_predicate "call_operation" 154 (match_code "parallel") 155{ 156 int arg_end = XVECLEN (op, 0); 157 158 for (int i = 1; i < arg_end; i++) 159 { 160 rtx elt = XVECEXP (op, 0, i); 161 162 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0))) 163 return false; 164 } 165 return true; 166}) 167 168;; Test for a function symbol ref operand 169(define_predicate "symbol_ref_function_operand" 170 (match_code "symbol_ref") 171{ 172 return SYMBOL_REF_FUNCTION_P (op); 173}) 174 175(define_attr "predicable" "no,yes" 176 (const_string "yes")) 177 178(define_cond_exec 179 [(match_operator 0 "predicate_operator" 180 [(match_operand:BI 1 "nvptx_register_operand" "") 181 (match_operand:BI 2 "const0_operand" "")])] 182 "" 183 "" 184 ) 185 186(define_constraint "P0" 187 "An integer with the value 0." 188 (and (match_code "const_int") 189 (match_test "ival == 0"))) 190 191(define_constraint "P1" 192 "An integer with the value 1." 193 (and (match_code "const_int") 194 (match_test "ival == 1"))) 195 196(define_constraint "Pn" 197 "An integer with the value -1." 198 (and (match_code "const_int") 199 (match_test "ival == -1"))) 200 201(define_constraint "R" 202 "A pseudo register." 203 (match_code "reg")) 204 205(define_constraint "Ia" 206 "Any integer constant." 207 (and (match_code "const_int") (match_test "true"))) 208 209(define_mode_iterator QHSDISDFM [QI HI SI DI SF DF]) 210(define_mode_iterator QHSDIM [QI HI SI DI]) 211(define_mode_iterator HSDIM [HI SI DI]) 212(define_mode_iterator BHSDIM [BI HI SI DI]) 213(define_mode_iterator SDIM [SI DI]) 214(define_mode_iterator SDISDFM [SI DI SF DF]) 215(define_mode_iterator QHIM [QI HI]) 216(define_mode_iterator QHSIM [QI HI SI]) 217(define_mode_iterator SDFM [SF DF]) 218(define_mode_iterator HSFM [HF SF]) 219(define_mode_iterator SDCM [SC DC]) 220(define_mode_iterator BITS [SI SF]) 221(define_mode_iterator BITD [DI DF]) 222(define_mode_iterator VECIM [V2SI V2DI]) 223 224;; This mode iterator allows :P to be used for patterns that operate on 225;; pointer-sized quantities. Exactly one of the two alternatives will match. 226(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) 227 228;; Define element mode for each vector mode. 229(define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")]) 230(define_mode_attr Vecelem [(V2SI "si") (V2DI "di")]) 231 232;; We should get away with not defining memory alternatives, since we don't 233;; get variables in this mode and pseudos are never spilled. 234(define_insn "movbi" 235 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R") 236 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,P1"))] 237 "" 238 "@ 239 %.\\tmov%t0\\t%0, %1; 240 %.\\tsetp.eq.u32\\t%0, 1, 0; 241 %.\\tsetp.eq.u32\\t%0, 1, 1;") 242 243(define_insn "*mov<mode>_insn" 244 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m") 245 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))] 246 "!MEM_P (operands[0]) || REG_P (operands[1])" 247{ 248 if (which_alternative == 1) 249 return "%.\\tld%A1%u1\\t%0, %1;"; 250 if (which_alternative == 2) 251 return "%.\\tst%A0%u0\\t%0, %1;"; 252 253 return nvptx_output_mov_insn (operands[0], operands[1]); 254} 255 [(set_attr "subregs_ok" "true")]) 256 257(define_insn "*mov<mode>_insn" 258 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m") 259 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))] 260 "!MEM_P (operands[0]) || REG_P (operands[1])" 261{ 262 if (which_alternative == 1) 263 return "%.\\tld%A1%u1\\t%0, %1;"; 264 if (which_alternative == 2) 265 return "%.\\tst%A0%u0\\t%0, %1;"; 266 267 return nvptx_output_mov_insn (operands[0], operands[1]); 268} 269 [(set_attr "subregs_ok" "true")]) 270 271;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up. 272(define_split 273 [(set (match_operand:DI 0 "nvptx_register_operand") 274 (const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand") 275 (match_operand 2 "const_int_operand"))))] 276 "" 277 [(set (match_dup 0) (match_dup 1)) 278 (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2))) 279 ] 280 "") 281 282(define_insn "*mov<mode>_insn" 283 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m") 284 (match_operand:SDFM 1 "general_operand" "RF,m,R"))] 285 "!MEM_P (operands[0]) || REG_P (operands[1])" 286{ 287 if (which_alternative == 1) 288 return "%.\\tld%A1%u0\\t%0, %1;"; 289 if (which_alternative == 2) 290 return "%.\\tst%A0%u1\\t%0, %1;"; 291 292 return nvptx_output_mov_insn (operands[0], operands[1]); 293} 294 [(set_attr "subregs_ok" "true")]) 295 296(define_insn "*movhf_insn" 297 [(set (match_operand:HF 0 "nonimmediate_operand" "=R,R,m") 298 (match_operand:HF 1 "nonimmediate_operand" "R,m,R"))] 299 "!MEM_P (operands[0]) || REG_P (operands[1])" 300 "@ 301 %.\\tmov.b16\\t%0, %1; 302 %.\\tld.b16\\t%0, %1; 303 %.\\tst.b16\\t%0, %1;" 304 [(set_attr "subregs_ok" "true")]) 305 306(define_expand "movhf" 307 [(set (match_operand:HF 0 "nonimmediate_operand" "") 308 (match_operand:HF 1 "nonimmediate_operand" ""))] 309 "" 310{ 311 /* Load HFmode constants as SFmode with an explicit FLOAT_TRUNCATE. */ 312 if (CONST_DOUBLE_P (operands[1])) 313 { 314 rtx tmp1 = gen_reg_rtx (SFmode); 315 REAL_VALUE_TYPE d = *CONST_DOUBLE_REAL_VALUE (operands[1]); 316 real_convert (&d, SFmode, &d); 317 emit_move_insn (tmp1, const_double_from_real_value (d, SFmode)); 318 319 if (!REG_P (operands[0])) 320 { 321 rtx tmp2 = gen_reg_rtx (HFmode); 322 emit_insn (gen_truncsfhf2 (tmp2, tmp1)); 323 emit_move_insn (operands[0], tmp2); 324 } 325 else 326 emit_insn (gen_truncsfhf2 (operands[0], tmp1)); 327 DONE; 328 } 329 330 if (MEM_P (operands[0]) && !REG_P (operands[1])) 331 { 332 rtx tmp = gen_reg_rtx (HFmode); 333 emit_move_insn (tmp, operands[1]); 334 emit_move_insn (operands[0], tmp); 335 DONE; 336 } 337}) 338 339(define_insn "load_arg_reg<mode>" 340 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R") 341 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")] 342 UNSPEC_ARG_REG))] 343 "" 344 "%.\\tcvt%t0.u32\\t%0, %%ar%1;") 345 346(define_insn "load_arg_reg<mode>" 347 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") 348 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")] 349 UNSPEC_ARG_REG))] 350 "" 351 "%.\\tmov%t0\\t%0, %%ar%1;") 352 353 (define_expand "mov<mode>" 354 [(set (match_operand:VECIM 0 "nonimmediate_operand" "") 355 (match_operand:VECIM 1 "general_operand" ""))] 356 "" 357{ 358 if (MEM_P (operands[0]) && !REG_P (operands[1])) 359 { 360 rtx tmp = gen_reg_rtx (<MODE>mode); 361 emit_move_insn (tmp, operands[1]); 362 emit_move_insn (operands[0], tmp); 363 DONE; 364 } 365}) 366 367(define_expand "mov<mode>" 368 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "") 369 (match_operand:QHSDISDFM 1 "general_operand" ""))] 370 "" 371{ 372 if (MEM_P (operands[0]) && !REG_P (operands[1])) 373 { 374 rtx tmp = gen_reg_rtx (<MODE>mode); 375 emit_move_insn (tmp, operands[1]); 376 emit_move_insn (operands[0], tmp); 377 DONE; 378 } 379 380 if (GET_CODE (operands[1]) == LABEL_REF) 381 sorry ("target cannot support label values"); 382}) 383 384(define_insn "zero_extendqihi2" 385 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R") 386 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))] 387 "" 388 "@ 389 %.\\tcvt.u16.u%T1\\t%0, %1; 390 %.\\tld%A1.u8\\t%0, %1;" 391 [(set_attr "subregs_ok" "true")]) 392 393(define_insn "zero_extend<mode>si2" 394 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") 395 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] 396 "" 397 "@ 398 %.\\tcvt.u32.u%T1\\t%0, %1; 399 %.\\tld%A1.u%T1\\t%0, %1;" 400 [(set_attr "subregs_ok" "true")]) 401 402(define_insn "zero_extend<mode>di2" 403 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") 404 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] 405 "" 406 "@ 407 %.\\tcvt.u64.u%T1\\t%0, %1; 408 %.\\tld%A1%u1\\t%0, %1;" 409 [(set_attr "subregs_ok" "true")]) 410 411(define_insn "extendqihi2" 412 [(set (match_operand:HI 0 "nvptx_register_operand" "=R") 413 (sign_extend:HI (match_operand:QI 1 "nvptx_register_operand" "R")))] 414 "" 415 "%.\\tcvt.s16.s8\\t%0, %1;" 416 [(set_attr "subregs_ok" "true")]) 417 418(define_insn "extend<mode>si2" 419 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R") 420 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))] 421 "" 422 "@ 423 %.\\tcvt.s32.s%T1\\t%0, %1; 424 %.\\tld%A1.s%T1\\t%0, %1;" 425 [(set_attr "subregs_ok" "true")]) 426 427(define_insn "extend<mode>di2" 428 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R") 429 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))] 430 "" 431 "@ 432 %.\\tcvt.s64.s%T1\\t%0, %1; 433 %.\\tld%A1.s%T1\\t%0, %1;" 434 [(set_attr "subregs_ok" "true")]) 435 436(define_insn "trunchiqi2" 437 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m") 438 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))] 439 "" 440 "@ 441 %.\\tcvt%t0.u16\\t%0, %1; 442 %.\\tst%A0.u8\\t%0, %1;" 443 [(set_attr "subregs_ok" "true")]) 444 445(define_insn "truncsi<mode>2" 446 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m") 447 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))] 448 "" 449 { 450 if (which_alternative == 1) 451 return "%.\\tst%A0.u%T0\\t%0, %1;"; 452 if (GET_MODE (operands[0]) == QImode) 453 return "%.\\tmov%t0\\t%0, %1;"; 454 return "%.\\tcvt%t0.u32\\t%0, %1;"; 455 } 456 [(set_attr "subregs_ok" "true")]) 457 458(define_insn "truncdi<mode>2" 459 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m") 460 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))] 461 "" 462 "@ 463 %.\\tcvt%t0.u64\\t%0, %1; 464 %.\\tst%A0.u%T0\\t%0, %1;" 465 [(set_attr "subregs_ok" "true")]) 466 467;; Sign-extensions of truncations 468 469(define_insn "*extend_trunc_<mode>2_qi" 470 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 471 (sign_extend:HSDIM 472 (truncate:QI (match_operand:HSDIM 1 "nvptx_register_operand" "R"))))] 473 "" 474 "%.\\tcvt.s%T0.s8\\t%0, %1;" 475 [(set_attr "subregs_ok" "true")]) 476 477(define_insn "*extend_trunc_<mode>2_hi" 478 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") 479 (sign_extend:SDIM 480 (truncate:HI (match_operand:SDIM 1 "nvptx_register_operand" "R"))))] 481 "" 482 "%.\\tcvt.s%T0.s16\\t%0, %1;" 483 [(set_attr "subregs_ok" "true")]) 484 485(define_insn "*extend_trunc_di2_si" 486 [(set (match_operand:DI 0 "nvptx_register_operand" "=R") 487 (sign_extend:DI 488 (truncate:SI (match_operand:DI 1 "nvptx_register_operand" "R"))))] 489 "" 490 "%.\\tcvt.s64.s32\\t%0, %1;" 491 [(set_attr "subregs_ok" "true")]) 492 493;; Integer arithmetic 494 495(define_insn "add<mode>3" 496 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 497 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 498 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 499 "" 500 "%.\\tadd%t0\\t%0, %1, %2;") 501 502(define_insn "*vadd_addsi4" 503 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 504 (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R") 505 (match_operand:SI 2 "nvptx_register_operand" "R")) 506 (match_operand:SI 3 "nvptx_register_operand" "R")))] 507 "" 508 "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;") 509 510(define_insn "*vsub_addsi4" 511 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 512 (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R") 513 (match_operand:SI 2 "nvptx_register_operand" "R")) 514 (match_operand:SI 3 "nvptx_register_operand" "R")))] 515 "" 516 "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;") 517 518(define_insn "sub<mode>3" 519 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 520 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 521 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))] 522 "" 523 { 524 if (GET_MODE (operands[0]) == HImode) 525 /* Workaround https://developer.nvidia.com/nvidia_bug/3527713. 526 See PR97005. */ 527 return "%.\\tsub.s16\\t%0, %1, %2;"; 528 529 return "%.\\tsub%t0\\t%0, %1, %2;"; 530 }) 531 532(define_insn "mul<mode>3" 533 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 534 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 535 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 536 "" 537 "%.\\tmul.lo%t0\\t%0, %1, %2;") 538 539(define_insn "*mad<mode>3" 540 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 541 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 542 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")) 543 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] 544 "" 545 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;") 546 547(define_insn "div<mode>3" 548 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 549 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 550 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 551 "" 552 "%.\\tdiv.s%T0\\t%0, %1, %2;") 553 554(define_insn "udiv<mode>3" 555 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 556 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 557 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 558 "" 559 "%.\\tdiv.u%T0\\t%0, %1, %2;") 560 561(define_insn "mod<mode>3" 562 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 563 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") 564 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 565 "" 566 "%.\\trem.s%T0\\t%0, %1, %2;") 567 568(define_insn "umod<mode>3" 569 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 570 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri") 571 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 572 "" 573 "%.\\trem.u%T0\\t%0, %1, %2;") 574 575(define_insn "smin<mode>3" 576 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 577 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 578 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 579 "" 580 "%.\\tmin.s%T0\\t%0, %1, %2;") 581 582(define_insn "umin<mode>3" 583 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 584 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 585 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 586 "" 587 "%.\\tmin.u%T0\\t%0, %1, %2;") 588 589(define_insn "smax<mode>3" 590 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 591 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 592 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 593 "" 594 "%.\\tmax.s%T0\\t%0, %1, %2;") 595 596(define_insn "umax<mode>3" 597 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 598 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 599 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 600 "" 601 "%.\\tmax.u%T0\\t%0, %1, %2;") 602 603(define_insn "abs<mode>2" 604 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 605 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] 606 "" 607 "%.\\tabs.s%T0\\t%0, %1;") 608 609(define_insn "neg<mode>2" 610 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 611 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] 612 "" 613 "%.\\tneg.s%T0\\t%0, %1;") 614 615(define_insn "one_cmpl<mode>2" 616 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 617 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))] 618 "" 619 "%.\\tnot.b%T0\\t%0, %1;") 620 621(define_insn "one_cmplbi2" 622 [(set (match_operand:BI 0 "nvptx_register_operand" "=R") 623 (not:BI (match_operand:BI 1 "nvptx_register_operand" "R")))] 624 "" 625 "%.\\tnot.pred\\t%0, %1;") 626 627(define_insn "*cnot<mode>2" 628 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 629 (eq:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 630 (const_int 0)))] 631 "" 632 "%.\\tcnot.b%T0\\t%0, %1;") 633 634(define_insn "bitrev<mode>2" 635 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") 636 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")] 637 UNSPEC_BITREV))] 638 "" 639 "%.\\tbrev.b%T0\\t%0, %1;") 640 641(define_insn "clz<mode>2" 642 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 643 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] 644 "" 645 "%.\\tclz.b%T1\\t%0, %1;") 646 647(define_expand "ctz<mode>2" 648 [(set (match_operand:SI 0 "nvptx_register_operand" "") 649 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))] 650 "" 651{ 652 rtx tmpreg = gen_reg_rtx (<MODE>mode); 653 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1])); 654 emit_insn (gen_clz<mode>2 (operands[0], tmpreg)); 655 DONE; 656}) 657 658(define_insn "popcount<mode>2" 659 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 660 (popcount:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] 661 "" 662 "%.\\tpopc.b%T1\\t%0, %1;") 663 664;; Multiplication variants 665 666(define_insn "mulhisi3" 667 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 668 (mult:SI (sign_extend:SI 669 (match_operand:HI 1 "nvptx_register_operand" "R")) 670 (sign_extend:SI 671 (match_operand:HI 2 "nvptx_register_operand" "R"))))] 672 "" 673 "%.\\tmul.wide.s16\\t%0, %1, %2;") 674 675(define_insn "mulsidi3" 676 [(set (match_operand:DI 0 "nvptx_register_operand" "=R") 677 (mult:DI (sign_extend:DI 678 (match_operand:SI 1 "nvptx_register_operand" "R")) 679 (sign_extend:DI 680 (match_operand:SI 2 "nvptx_register_operand" "R"))))] 681 "" 682 "%.\\tmul.wide.s32\\t%0, %1, %2;") 683 684(define_insn "umulhisi3" 685 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 686 (mult:SI (zero_extend:SI 687 (match_operand:HI 1 "nvptx_register_operand" "R")) 688 (zero_extend:SI 689 (match_operand:HI 2 "nvptx_register_operand" "R"))))] 690 "" 691 "%.\\tmul.wide.u16\\t%0, %1, %2;") 692 693(define_insn "umulsidi3" 694 [(set (match_operand:DI 0 "nvptx_register_operand" "=R") 695 (mult:DI (zero_extend:DI 696 (match_operand:SI 1 "nvptx_register_operand" "R")) 697 (zero_extend:DI 698 (match_operand:SI 2 "nvptx_register_operand" "R"))))] 699 "" 700 "%.\\tmul.wide.u32\\t%0, %1, %2;") 701 702(define_expand "mulditi3" 703 [(set (match_operand:TI 0 "nvptx_register_operand") 704 (mult:TI (sign_extend:TI 705 (match_operand:DI 1 "nvptx_register_operand")) 706 (sign_extend:DI 707 (match_operand:DI 2 "nvptx_nonmemory_operand"))))] 708 "" 709{ 710 rtx hi = gen_reg_rtx (DImode); 711 rtx lo = gen_reg_rtx (DImode); 712 emit_insn (gen_smuldi3_highpart (hi, operands[1], operands[2])); 713 emit_insn (gen_muldi3 (lo, operands[1], operands[2])); 714 emit_move_insn (gen_highpart (DImode, operands[0]), hi); 715 emit_move_insn (gen_lowpart (DImode, operands[0]), lo); 716 DONE; 717}) 718 719(define_expand "umulditi3" 720 [(set (match_operand:TI 0 "nvptx_register_operand") 721 (mult:TI (zero_extend:TI 722 (match_operand:DI 1 "nvptx_register_operand")) 723 (zero_extend:DI 724 (match_operand:DI 2 "nvptx_nonmemory_operand"))))] 725 "" 726{ 727 rtx hi = gen_reg_rtx (DImode); 728 rtx lo = gen_reg_rtx (DImode); 729 emit_insn (gen_umuldi3_highpart (hi, operands[1], operands[2])); 730 emit_insn (gen_muldi3 (lo, operands[1], operands[2])); 731 emit_move_insn (gen_highpart (DImode, operands[0]), hi); 732 emit_move_insn (gen_lowpart (DImode, operands[0]), lo); 733 DONE; 734}) 735 736(define_insn "smul<mode>3_highpart" 737 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 738 (smul_highpart:HSDIM 739 (match_operand:HSDIM 1 "nvptx_register_operand" "R") 740 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 741 "" 742 "%.\\tmul.hi.s%T0\\t%0, %1, %2;") 743 744(define_insn "umul<mode>3_highpart" 745 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 746 (umul_highpart:HSDIM 747 (match_operand:HSDIM 1 "nvptx_register_operand" "R") 748 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 749 "" 750 "%.\\tmul.hi.u%T0\\t%0, %1, %2;") 751 752(define_insn "*smulhi3_highpart_2" 753 [(set (match_operand:HI 0 "nvptx_register_operand" "=R") 754 (truncate:HI 755 (lshiftrt:SI 756 (mult:SI (sign_extend:SI 757 (match_operand:HI 1 "nvptx_register_operand" "R")) 758 (sign_extend:SI 759 (match_operand:HI 2 "nvptx_register_operand" "R"))) 760 (const_int 16))))] 761 "" 762 "%.\\tmul.hi.s16\\t%0, %1, %2;") 763 764(define_insn "*smulsi3_highpart_2" 765 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 766 (truncate:SI 767 (lshiftrt:DI 768 (mult:DI (sign_extend:DI 769 (match_operand:SI 1 "nvptx_register_operand" "R")) 770 (sign_extend:DI 771 (match_operand:SI 2 "nvptx_register_operand" "R"))) 772 (const_int 32))))] 773 "" 774 "%.\\tmul.hi.s32\\t%0, %1, %2;") 775 776(define_insn "*umulhi3_highpart_2" 777 [(set (match_operand:HI 0 "nvptx_register_operand" "=R") 778 (truncate:HI 779 (lshiftrt:SI 780 (mult:SI (zero_extend:SI 781 (match_operand:HI 1 "nvptx_register_operand" "R")) 782 (zero_extend:SI 783 (match_operand:HI 2 "nvptx_register_operand" "R"))) 784 (const_int 16))))] 785 "" 786 "%.\\tmul.hi.u16\\t%0, %1, %2;") 787 788(define_insn "*umulsi3_highpart_2" 789 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 790 (truncate:SI 791 (lshiftrt:DI 792 (mult:DI (zero_extend:DI 793 (match_operand:SI 1 "nvptx_register_operand" "R")) 794 (zero_extend:DI 795 (match_operand:SI 2 "nvptx_register_operand" "R"))) 796 (const_int 32))))] 797 "" 798 "%.\\tmul.hi.u32\\t%0, %1, %2;") 799 800;; Shifts 801 802(define_insn "ashl<mode>3" 803 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 804 (ashift:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 805 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] 806 "" 807 "%.\\tshl.b%T0\\t%0, %1, %2;") 808 809(define_insn "ashr<mode>3" 810 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 811 (ashiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 812 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] 813 "" 814 "%.\\tshr.s%T0\\t%0, %1, %2;") 815 816(define_insn "lshr<mode>3" 817 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 818 (lshiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") 819 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))] 820 "" 821 "%.\\tshr.u%T0\\t%0, %1, %2;") 822 823(define_insn "rotlsi3" 824 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 825 (rotate:SI (match_operand:SI 1 "nvptx_register_operand" "R") 826 (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") 827 (const_int 31))))] 828 "TARGET_SM35" 829 "%.\\tshf.l.wrap.b32\\t%0, %1, %1, %2;") 830 831(define_insn "rotrsi3" 832 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 833 (rotatert:SI (match_operand:SI 1 "nvptx_register_operand" "R") 834 (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") 835 (const_int 31))))] 836 "TARGET_SM35" 837 "%.\\tshf.r.wrap.b32\\t%0, %1, %1, %2;") 838 839;; Logical operations 840 841(define_code_iterator any_logic [and ior xor]) 842(define_code_attr logic [(and "and") (ior "or") (xor "xor")]) 843(define_code_attr ilogic [(and "and") (ior "ior") (xor "xor")]) 844 845(define_insn "<ilogic><mode>3" 846 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 847 (any_logic:HSDIM 848 (match_operand:HSDIM 1 "nvptx_register_operand" "R") 849 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] 850 "" 851 "%.\\t<logic>.b%T0\\t%0, %1, %2;") 852 853(define_insn "<ilogic>bi3" 854 [(set (match_operand:BI 0 "nvptx_register_operand" "=R") 855 (any_logic:BI (match_operand:BI 1 "nvptx_register_operand" "R") 856 (match_operand:BI 2 "nvptx_register_operand" "R")))] 857 "" 858 "%.\\t<logic>.pred\\t%0, %1, %2;") 859 860(define_split 861 [(set (match_operand:HSDIM 0 "nvptx_register_operand") 862 (any_logic:HSDIM 863 (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand") 864 (const_int 0)) 865 (ne:HSDIM (match_operand:BI 2 "nvptx_register_operand") 866 (const_int 0))))] 867 "can_create_pseudo_p ()" 868 [(set (match_dup 3) (any_logic:BI (match_dup 1) (match_dup 2))) 869 (set (match_dup 0) (ne:HSDIM (match_dup 3) (const_int 0)))] 870{ 871 operands[3] = gen_reg_rtx (BImode); 872}) 873 874;; Comparisons and branches 875 876(define_insn "cmp<mode>" 877 [(set (match_operand:BI 0 "nvptx_register_operand" "=R") 878 (match_operator:BI 1 "nvptx_comparison_operator" 879 [(match_operand:HSDIM 2 "nvptx_register_operand" "R") 880 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] 881 "" 882 "%.\\tsetp%c1\\t%0, %2, %3;") 883 884(define_insn "*cmp<mode>" 885 [(set (match_operand:BI 0 "nvptx_register_operand" "=R") 886 (match_operator:BI 1 "nvptx_float_comparison_operator" 887 [(match_operand:SDFM 2 "nvptx_register_operand" "R") 888 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] 889 "" 890 "%.\\tsetp%c1\\t%0, %2, %3;") 891 892(define_insn "*cmphf" 893 [(set (match_operand:BI 0 "nvptx_register_operand" "=R") 894 (match_operator:BI 1 "nvptx_float_comparison_operator" 895 [(match_operand:HF 2 "nvptx_register_operand" "R") 896 (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))] 897 "TARGET_SM53" 898 "%.\\tsetp%c1\\t%0, %2, %3;") 899 900(define_insn "jump" 901 [(set (pc) 902 (label_ref (match_operand 0 "" "")))] 903 "" 904 "%.\\tbra\\t%l0;") 905 906(define_insn "br_true" 907 [(set (pc) 908 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R") 909 (const_int 0)) 910 (label_ref (match_operand 1 "" "")) 911 (pc)))] 912 "" 913 "%j0\\tbra\\t%l1;" 914 [(set_attr "predicable" "no")]) 915 916(define_insn "br_false" 917 [(set (pc) 918 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R") 919 (const_int 0)) 920 (label_ref (match_operand 1 "" "")) 921 (pc)))] 922 "" 923 "%J0\\tbra\\t%l1;" 924 [(set_attr "predicable" "no")]) 925 926;; unified conditional branch 927(define_insn "br_true_uni" 928 [(set (pc) (if_then_else 929 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] 930 UNSPEC_BR_UNIFIED) (const_int 0)) 931 (label_ref (match_operand 1 "" "")) (pc)))] 932 "" 933 "%j0\\tbra.uni\\t%l1;" 934 [(set_attr "predicable" "no")]) 935 936(define_insn "br_false_uni" 937 [(set (pc) (if_then_else 938 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")] 939 UNSPEC_BR_UNIFIED) (const_int 0)) 940 (label_ref (match_operand 1 "" "")) (pc)))] 941 "" 942 "%J0\\tbra.uni\\t%l1;" 943 [(set_attr "predicable" "no")]) 944 945(define_expand "cbranch<mode>4" 946 [(set (pc) 947 (if_then_else (match_operator 0 "nvptx_comparison_operator" 948 [(match_operand:HSDIM 1 "nvptx_register_operand" "") 949 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")]) 950 (label_ref (match_operand 3 "" "")) 951 (pc)))] 952 "" 953{ 954 rtx t = nvptx_expand_compare (operands[0]); 955 operands[0] = t; 956 operands[1] = XEXP (t, 0); 957 operands[2] = XEXP (t, 1); 958}) 959 960(define_expand "cbranch<mode>4" 961 [(set (pc) 962 (if_then_else (match_operator 0 "nvptx_float_comparison_operator" 963 [(match_operand:SDFM 1 "nvptx_register_operand" "") 964 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")]) 965 (label_ref (match_operand 3 "" "")) 966 (pc)))] 967 "" 968{ 969 rtx t = nvptx_expand_compare (operands[0]); 970 operands[0] = t; 971 operands[1] = XEXP (t, 0); 972 operands[2] = XEXP (t, 1); 973}) 974 975(define_expand "cbranchbi4" 976 [(set (pc) 977 (if_then_else (match_operator 0 "predicate_operator" 978 [(match_operand:BI 1 "nvptx_register_operand" "") 979 (match_operand:BI 2 "const0_operand" "")]) 980 (label_ref (match_operand 3 "" "")) 981 (pc)))] 982 "" 983 "") 984 985;; Conditional stores 986 987(define_insn "setcc<mode>_from_bi" 988 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R") 989 (ne:QHSDIM (match_operand:BI 1 "nvptx_register_operand" "R") 990 (const_int 0)))] 991 "" 992 "%.\\tselp%t0\\t%0, 1, 0, %1;") 993 994(define_insn "*setcc<mode>_from_not_bi" 995 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 996 (eq:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R") 997 (const_int 0)))] 998 "" 999 "%.\\tselp%t0\\t%0, 0, 1, %1;") 1000 1001(define_insn "extendbi<mode>2" 1002 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R") 1003 (sign_extend:QHSDIM 1004 (match_operand:BI 1 "nvptx_register_operand" "R")))] 1005 "" 1006 "%.\\tselp%t0\\t%0, -1, 0, %1;") 1007 1008(define_insn "zero_extendbi<mode>2" 1009 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R") 1010 (zero_extend:QHSDIM 1011 (match_operand:BI 1 "nvptx_register_operand" "R")))] 1012 "" 1013 "%.\\tselp%t0\\t%0, 1, 0, %1;") 1014 1015(define_insn "sel_true<mode>" 1016 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 1017 (if_then_else:HSDIM 1018 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) 1019 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") 1020 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] 1021 "" 1022 "%.\\tselp%t0\\t%0, %2, %3, %1;") 1023 1024(define_insn "sel_true<mode>" 1025 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1026 (if_then_else:SDFM 1027 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) 1028 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") 1029 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] 1030 "" 1031 "%.\\tselp%t0\\t%0, %2, %3, %1;") 1032 1033(define_insn "sel_false<mode>" 1034 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 1035 (if_then_else:HSDIM 1036 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) 1037 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") 1038 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] 1039 "" 1040 "%.\\tselp%t0\\t%0, %3, %2, %1;") 1041 1042(define_insn "sel_false<mode>" 1043 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1044 (if_then_else:SDFM 1045 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) 1046 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") 1047 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] 1048 "" 1049 "%.\\tselp%t0\\t%0, %3, %2, %1;") 1050 1051(define_code_iterator eqne [eq ne]) 1052 1053;; Split negation of a predicate into a conditional move. 1054(define_insn_and_split "*selp<mode>_neg_<code>" 1055 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 1056 (neg:HSDIM (eqne:HSDIM 1057 (match_operand:BI 1 "nvptx_register_operand" "R") 1058 (const_int 0))))] 1059 "" 1060 "#" 1061 "&& 1" 1062 [(set (match_dup 0) 1063 (if_then_else:HSDIM 1064 (eqne (match_dup 1) (const_int 0)) 1065 (const_int -1) 1066 (const_int 0)))]) 1067 1068;; Split bitwise not of a predicate into a conditional move. 1069(define_insn_and_split "*selp<mode>_not_<code>" 1070 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") 1071 (not:HSDIM (eqne:HSDIM 1072 (match_operand:BI 1 "nvptx_register_operand" "R") 1073 (const_int 0))))] 1074 "" 1075 "#" 1076 "&& 1" 1077 [(set (match_dup 0) 1078 (if_then_else:HSDIM 1079 (eqne (match_dup 1) (const_int 0)) 1080 (const_int -2) 1081 (const_int -1)))]) 1082 1083(define_insn "*setcc_int<mode>" 1084 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 1085 (neg:SI 1086 (match_operator:SI 1 "nvptx_comparison_operator" 1087 [(match_operand:HSDIM 2 "nvptx_register_operand" "R") 1088 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))] 1089 "" 1090 "%.\\tset%t0%c1\\t%0, %2, %3;") 1091 1092(define_insn "*setcc_int<mode>" 1093 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 1094 (neg:SI 1095 (match_operator:SI 1 "nvptx_float_comparison_operator" 1096 [(match_operand:SDFM 2 "nvptx_register_operand" "R") 1097 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))] 1098 "" 1099 "%.\\tset%t0%c1\\t%0, %2, %3;") 1100 1101(define_insn "setcc_float<mode>" 1102 [(set (match_operand:SF 0 "nvptx_register_operand" "=R") 1103 (match_operator:SF 1 "nvptx_comparison_operator" 1104 [(match_operand:HSDIM 2 "nvptx_register_operand" "R") 1105 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] 1106 "" 1107 "%.\\tset%t0%c1\\t%0, %2, %3;") 1108 1109(define_insn "setcc_float<mode>" 1110 [(set (match_operand:SF 0 "nvptx_register_operand" "=R") 1111 (match_operator:SF 1 "nvptx_float_comparison_operator" 1112 [(match_operand:SDFM 2 "nvptx_register_operand" "R") 1113 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] 1114 "" 1115 "%.\\tset%t0%c1\\t%0, %2, %3;") 1116 1117(define_expand "cstore<mode>4" 1118 [(set (match_operand:SI 0 "nvptx_register_operand") 1119 (match_operator:SI 1 "nvptx_comparison_operator" 1120 [(match_operand:HSDIM 2 "nvptx_register_operand") 1121 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))] 1122 "" 1123{ 1124 rtx reg = gen_reg_rtx (BImode); 1125 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode, 1126 operands[2], operands[3]); 1127 emit_move_insn (reg, cmp); 1128 emit_insn (gen_setccsi_from_bi (operands[0], reg)); 1129 DONE; 1130}) 1131 1132(define_expand "cstore<mode>4" 1133 [(set (match_operand:SI 0 "nvptx_register_operand") 1134 (match_operator:SI 1 "nvptx_float_comparison_operator" 1135 [(match_operand:SDFM 2 "nvptx_register_operand") 1136 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))] 1137 "" 1138{ 1139 rtx reg = gen_reg_rtx (BImode); 1140 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode, 1141 operands[2], operands[3]); 1142 emit_move_insn (reg, cmp); 1143 emit_insn (gen_setccsi_from_bi (operands[0], reg)); 1144 DONE; 1145}) 1146 1147(define_expand "cstorehf4" 1148 [(set (match_operand:SI 0 "nvptx_register_operand") 1149 (match_operator:SI 1 "nvptx_float_comparison_operator" 1150 [(match_operand:HF 2 "nvptx_register_operand") 1151 (match_operand:HF 3 "nvptx_nonmemory_operand")]))] 1152 "TARGET_SM53" 1153{ 1154 rtx reg = gen_reg_rtx (BImode); 1155 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode, 1156 operands[2], operands[3]); 1157 emit_move_insn (reg, cmp); 1158 emit_insn (gen_setccsi_from_bi (operands[0], reg)); 1159 DONE; 1160}) 1161 1162;; Calls 1163 1164(define_insn "call_insn_<mode>" 1165 [(match_parallel 2 "call_operation" 1166 [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs")) 1167 (match_operand 1))])] 1168 "" 1169{ 1170 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]); 1171}) 1172 1173(define_insn "call_value_insn_<mode>" 1174 [(match_parallel 3 "call_operation" 1175 [(set (match_operand 0 "nvptx_register_operand" "=R") 1176 (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs")) 1177 (match_operand 2)))])] 1178 "" 1179{ 1180 return nvptx_output_call_insn (insn, operands[0], operands[1]); 1181}) 1182 1183(define_expand "call" 1184 [(match_operand 0 "" "")] 1185 "" 1186{ 1187 nvptx_expand_call (NULL_RTX, operands[0]); 1188 DONE; 1189}) 1190 1191(define_expand "call_value" 1192 [(match_operand 0 "" "") 1193 (match_operand 1 "" "")] 1194 "" 1195{ 1196 nvptx_expand_call (operands[0], operands[1]); 1197 DONE; 1198}) 1199 1200;; Floating point arithmetic. 1201 1202(define_insn "add<mode>3" 1203 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1204 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") 1205 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] 1206 "" 1207 "%.\\tadd%t0\\t%0, %1, %2;") 1208 1209(define_insn "sub<mode>3" 1210 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1211 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") 1212 (match_operand:SDFM 2 "nvptx_register_operand" "R")))] 1213 "" 1214 "%.\\tsub%t0\\t%0, %1, %2;") 1215 1216(define_insn "mul<mode>3" 1217 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1218 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") 1219 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] 1220 "" 1221 "%.\\tmul%t0\\t%0, %1, %2;") 1222 1223(define_insn "fma<mode>4" 1224 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1225 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") 1226 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") 1227 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] 1228 "" 1229 "%.\\tfma%#%t0\\t%0, %1, %2, %3;") 1230 1231(define_insn "*recip<mode>2" 1232 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1233 (div:SDFM 1234 (match_operand:SDFM 2 "const_double_operand" "F") 1235 (match_operand:SDFM 1 "nvptx_register_operand" "R")))] 1236 "CONST_DOUBLE_P (operands[2]) 1237 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)" 1238 "%.\\trcp%#%t0\\t%0, %1;") 1239 1240(define_insn "div<mode>3" 1241 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1242 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") 1243 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] 1244 "" 1245 "%.\\tdiv%#%t0\\t%0, %1, %2;") 1246 1247(define_insn "copysign<mode>3" 1248 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1249 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF") 1250 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")] 1251 UNSPEC_COPYSIGN))] 1252 "" 1253 "%.\\tcopysign%t0\\t%0, %2, %1;") 1254 1255(define_insn "smin<mode>3" 1256 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1257 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") 1258 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] 1259 "" 1260 "%.\\tmin%t0\\t%0, %1, %2;") 1261 1262(define_insn "smax<mode>3" 1263 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1264 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") 1265 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] 1266 "" 1267 "%.\\tmax%t0\\t%0, %1, %2;") 1268 1269(define_insn "abs<mode>2" 1270 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1271 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] 1272 "" 1273 "%.\\tabs%t0\\t%0, %1;") 1274 1275(define_insn "neg<mode>2" 1276 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1277 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] 1278 "" 1279 "%.\\tneg%t0\\t%0, %1;") 1280 1281(define_insn "sqrt<mode>2" 1282 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1283 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))] 1284 "" 1285 "%.\\tsqrt%#%t0\\t%0, %1;") 1286 1287(define_expand "sincossf3" 1288 [(set (match_operand:SF 0 "nvptx_register_operand" "=R") 1289 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")] 1290 UNSPEC_COS)) 1291 (set (match_operand:SF 1 "nvptx_register_operand" "=R") 1292 (unspec:SF [(match_dup 2)] UNSPEC_SIN))] 1293 "flag_unsafe_math_optimizations" 1294{ 1295 operands[2] = make_safe_from (operands[2], operands[0]); 1296}) 1297 1298(define_insn "sinsf2" 1299 [(set (match_operand:SF 0 "nvptx_register_operand" "=R") 1300 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] 1301 UNSPEC_SIN))] 1302 "flag_unsafe_math_optimizations" 1303 "%.\\tsin.approx%t0\\t%0, %1;") 1304 1305(define_insn "cossf2" 1306 [(set (match_operand:SF 0 "nvptx_register_operand" "=R") 1307 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] 1308 UNSPEC_COS))] 1309 "flag_unsafe_math_optimizations" 1310 "%.\\tcos.approx%t0\\t%0, %1;") 1311 1312(define_insn "log2sf2" 1313 [(set (match_operand:SF 0 "nvptx_register_operand" "=R") 1314 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] 1315 UNSPEC_LOG2))] 1316 "flag_unsafe_math_optimizations" 1317 "%.\\tlg2.approx%t0\\t%0, %1;") 1318 1319(define_insn "exp2sf2" 1320 [(set (match_operand:SF 0 "nvptx_register_operand" "=R") 1321 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")] 1322 UNSPEC_EXP2))] 1323 "flag_unsafe_math_optimizations" 1324 "%.\\tex2.approx%t0\\t%0, %1;") 1325 1326(define_insn "setcc_isinf<mode>" 1327 [(set (match_operand:BI 0 "nvptx_register_operand" "=R") 1328 (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] 1329 UNSPEC_ISINF))] 1330 "" 1331 "%.\\ttestp.infinite%t1\\t%0, %1;") 1332 1333(define_expand "isinf<mode>2" 1334 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 1335 (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] 1336 UNSPEC_ISINF))] 1337 "" 1338{ 1339 rtx pred = gen_reg_rtx (BImode); 1340 emit_insn (gen_setcc_isinf<mode> (pred, operands[1])); 1341 emit_insn (gen_setccsi_from_bi (operands[0], pred)); 1342 DONE; 1343}) 1344 1345;; HFmode floating point arithmetic. 1346 1347(define_insn "addhf3" 1348 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1349 (plus:HF (match_operand:HF 1 "nvptx_register_operand" "R") 1350 (match_operand:HF 2 "nvptx_register_operand" "R")))] 1351 "TARGET_SM53" 1352 "%.\\tadd.f16\\t%0, %1, %2;") 1353 1354(define_insn "subhf3" 1355 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1356 (minus:HF (match_operand:HF 1 "nvptx_register_operand" "R") 1357 (match_operand:HF 2 "nvptx_register_operand" "R")))] 1358 "TARGET_SM53" 1359 "%.\\tsub.f16\\t%0, %1, %2;") 1360 1361(define_insn "mulhf3" 1362 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1363 (mult:HF (match_operand:HF 1 "nvptx_register_operand" "R") 1364 (match_operand:HF 2 "nvptx_register_operand" "R")))] 1365 "TARGET_SM53" 1366 "%.\\tmul.f16\\t%0, %1, %2;") 1367 1368(define_insn "fmahf4" 1369 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1370 (fma:HF (match_operand:HF 1 "nvptx_register_operand" "R") 1371 (match_operand:HF 2 "nvptx_nonmemory_operand" "RF") 1372 (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")))] 1373 "TARGET_SM53" 1374 "%.\\tfma%#.f16\\t%0, %1, %2, %3;") 1375 1376(define_insn "neghf2" 1377 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1378 (neg:HF (match_operand:HF 1 "nvptx_register_operand" "R")))] 1379 "" 1380 "%.\\txor.b16\\t%0, %1, -32768;") 1381 1382(define_insn "abshf2" 1383 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1384 (abs:HF (match_operand:HF 1 "nvptx_register_operand" "R")))] 1385 "" 1386 "%.\\tand.b16\\t%0, %1, 32767;") 1387 1388(define_insn "exp2hf2" 1389 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1390 (unspec:HF [(match_operand:HF 1 "nvptx_register_operand" "R")] 1391 UNSPEC_EXP2))] 1392 "TARGET_SM75 && flag_unsafe_math_optimizations" 1393 "%.\\tex2.approx.f16\\t%0, %1;") 1394 1395(define_insn "tanh<mode>2" 1396 [(set (match_operand:HSFM 0 "nvptx_register_operand" "=R") 1397 (unspec:HSFM [(match_operand:HSFM 1 "nvptx_register_operand" "R")] 1398 UNSPEC_TANH))] 1399 "TARGET_SM75 && flag_unsafe_math_optimizations" 1400 "%.\\ttanh.approx%t0\\t%0, %1;") 1401 1402;; HFmode floating point arithmetic. 1403 1404(define_insn "sminhf3" 1405 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1406 (smin:HF (match_operand:HF 1 "nvptx_register_operand" "R") 1407 (match_operand:HF 2 "nvptx_register_operand" "R")))] 1408 "TARGET_SM80" 1409 "%.\\tmin.f16\\t%0, %1, %2;") 1410 1411(define_insn "smaxhf3" 1412 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1413 (smax:HF (match_operand:HF 1 "nvptx_register_operand" "R") 1414 (match_operand:HF 2 "nvptx_register_operand" "R")))] 1415 "TARGET_SM80" 1416 "%.\\tmax.f16\\t%0, %1, %2;") 1417 1418;; Conversions involving floating point 1419 1420(define_insn "extendsfdf2" 1421 [(set (match_operand:DF 0 "nvptx_register_operand" "=R") 1422 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))] 1423 "" 1424 "%.\\tcvt%t0%t1\\t%0, %1;") 1425 1426(define_insn "truncdfsf2" 1427 [(set (match_operand:SF 0 "nvptx_register_operand" "=R") 1428 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))] 1429 "" 1430 "%.\\tcvt%#%t0%t1\\t%0, %1;") 1431 1432(define_insn "floatunssi<mode>2" 1433 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1434 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] 1435 "" 1436 "%.\\tcvt%#%t0.u%T1\\t%0, %1;") 1437 1438(define_insn "floatsi<mode>2" 1439 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1440 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))] 1441 "" 1442 "%.\\tcvt%#%t0.s%T1\\t%0, %1;") 1443 1444(define_insn "floatunsdi<mode>2" 1445 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1446 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] 1447 "" 1448 "%.\\tcvt%#%t0.u%T1\\t%0, %1;") 1449 1450(define_insn "floatdi<mode>2" 1451 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1452 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))] 1453 "" 1454 "%.\\tcvt%#%t0.s%T1\\t%0, %1;") 1455 1456(define_insn "fixuns_trunc<mode>si2" 1457 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 1458 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] 1459 "" 1460 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") 1461 1462(define_insn "fix_trunc<mode>si2" 1463 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 1464 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] 1465 "" 1466 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") 1467 1468(define_insn "fixuns_trunc<mode>di2" 1469 [(set (match_operand:DI 0 "nvptx_register_operand" "=R") 1470 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] 1471 "" 1472 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;") 1473 1474(define_insn "fix_trunc<mode>di2" 1475 [(set (match_operand:DI 0 "nvptx_register_operand" "=R") 1476 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))] 1477 "" 1478 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;") 1479 1480(define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC 1481 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT]) 1482(define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor") 1483 (UNSPEC_FPINT_BTRUNC "btrunc") 1484 (UNSPEC_FPINT_CEIL "ceil") 1485 (UNSPEC_FPINT_NEARBYINT "nearbyint")]) 1486(define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") 1487 (UNSPEC_FPINT_BTRUNC ".rzi") 1488 (UNSPEC_FPINT_CEIL ".rpi") 1489 (UNSPEC_FPINT_NEARBYINT "%#i")]) 1490 1491(define_insn "<FPINT:fpint_name><SDFM:mode>2" 1492 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1493 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] 1494 FPINT))] 1495 "" 1496 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;") 1497 1498(define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL]) 1499(define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor") 1500 (UNSPEC_FPINT_CEIL "lceil")]) 1501(define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi") 1502 (UNSPEC_FPINT_CEIL ".rpi")]) 1503 1504(define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2" 1505 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") 1506 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")] 1507 FPINT2))] 1508 "" 1509 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;") 1510 1511(define_insn "extendhf<mode>2" 1512 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") 1513 (float_extend:SDFM (match_operand:HF 1 "nvptx_register_operand" "R")))] 1514 "TARGET_SM53" 1515 "%.\\tcvt%t0%t1\\t%0, %1;") 1516 1517(define_insn "trunc<mode>hf2" 1518 [(set (match_operand:HF 0 "nvptx_register_operand" "=R") 1519 (float_truncate:HF (match_operand:SDFM 1 "nvptx_register_operand" "R")))] 1520 "TARGET_SM53" 1521 "%.\\tcvt%#%t0%t1\\t%0, %1;") 1522 1523;; Vector operations 1524 1525(define_insn "*vec_set<mode>_0" 1526 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R") 1527 (vec_merge:VECIM 1528 (vec_duplicate:VECIM 1529 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R")) 1530 (match_dup 0) 1531 (const_int 1)))] 1532 "" 1533 "%.\\tmov%t1\\t%0.x, %1;") 1534 1535(define_insn "*vec_set<mode>_1" 1536 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R") 1537 (vec_merge:VECIM 1538 (vec_duplicate:VECIM 1539 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R")) 1540 (match_dup 0) 1541 (const_int 2)))] 1542 "" 1543 "%.\\tmov%t1\\t%0.y, %1;") 1544 1545(define_insn "*vec_set<mode>_2" 1546 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R") 1547 (vec_merge:VECIM 1548 (vec_duplicate:VECIM 1549 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R")) 1550 (match_dup 0) 1551 (const_int 4)))] 1552 "" 1553 "%.\\tmov%t1\\t%0.z, %1;") 1554 1555(define_insn "*vec_set<mode>_3" 1556 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R") 1557 (vec_merge:VECIM 1558 (vec_duplicate:VECIM 1559 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R")) 1560 (match_dup 0) 1561 (const_int 8)))] 1562 "" 1563 "%.\\tmov%t1\\t%0.w, %1;") 1564 1565(define_expand "vec_set<mode>" 1566 [(match_operand:VECIM 0 "nvptx_register_operand") 1567 (match_operand:<VECELEM> 1 "nvptx_register_operand") 1568 (match_operand:SI 2 "nvptx_vector_index_operand")] 1569 "" 1570{ 1571 enum machine_mode mode = GET_MODE (operands[0]); 1572 int mask = 1 << INTVAL (operands[2]); 1573 rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]); 1574 tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask)); 1575 emit_insn (gen_rtx_SET (operands[0], tmp)); 1576 DONE; 1577}) 1578 1579(define_insn "vec_extract<mode><Vecelem>" 1580 [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R") 1581 (vec_select:<VECELEM> 1582 (match_operand:VECIM 1 "nvptx_register_operand" "R") 1583 (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))] 1584 "" 1585{ 1586 static const char *const asms[4] = { 1587 "%.\\tmov%t0\\t%0, %1.x;", 1588 "%.\\tmov%t0\\t%0, %1.y;", 1589 "%.\\tmov%t0\\t%0, %1.z;", 1590 "%.\\tmov%t0\\t%0, %1.w;" 1591 }; 1592 return asms[INTVAL (operands[2])]; 1593}) 1594 1595;; Miscellaneous 1596 1597(define_insn "nop" 1598 [(const_int 0)] 1599 "" 1600 "") 1601 1602(define_insn "exit" 1603 [(const_int 1)] 1604 "" 1605 "exit;") 1606 1607(define_insn "fake_nop" 1608 [(const_int 2)] 1609 "" 1610 "{ 1611 .reg .u32 %%nop_src; 1612 .reg .u32 %%nop_dst; 1613 mov.u32 %%nop_dst, %%nop_src; 1614 }") 1615 1616(define_insn "return" 1617 [(return)] 1618 "" 1619{ 1620 return nvptx_output_return (); 1621} 1622 [(set_attr "predicable" "no")]) 1623 1624(define_expand "epilogue" 1625 [(clobber (const_int 0))] 1626 "" 1627{ 1628 if (TARGET_SOFT_STACK) 1629 emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode, 1630 SOFTSTACK_PREV_REGNUM))); 1631 emit_jump_insn (gen_return ()); 1632 DONE; 1633}) 1634 1635(define_expand "nonlocal_goto" 1636 [(match_operand 0 "" "") 1637 (match_operand 1 "" "") 1638 (match_operand 2 "" "") 1639 (match_operand 3 "" "")] 1640 "" 1641{ 1642 sorry ("target cannot support nonlocal goto"); 1643 emit_insn (gen_nop ()); 1644 DONE; 1645}) 1646 1647(define_expand "nonlocal_goto_receiver" 1648 [(const_int 0)] 1649 "" 1650{ 1651 sorry ("target cannot support nonlocal goto"); 1652}) 1653 1654(define_expand "allocate_stack" 1655 [(match_operand 0 "nvptx_register_operand") 1656 (match_operand 1 "nvptx_register_operand")] 1657 "" 1658{ 1659 if (TARGET_SOFT_STACK) 1660 { 1661 emit_move_insn (stack_pointer_rtx, 1662 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1])); 1663 emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx)); 1664 emit_move_insn (operands[0], virtual_stack_dynamic_rtx); 1665 DONE; 1666 } 1667 /* The ptx documentation specifies an alloca intrinsic (for 32 bit 1668 only) but notes it is not implemented. The assembler emits a 1669 confused error message. Issue a blunt one now instead. */ 1670 sorry ("target cannot support alloca"); 1671 emit_insn (gen_nop ()); 1672 DONE; 1673}) 1674 1675(define_insn "@set_softstack_<mode>" 1676 [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")] 1677 UNSPEC_SET_SOFTSTACK)] 1678 "TARGET_SOFT_STACK" 1679{ 1680 return nvptx_output_set_softstack (REGNO (operands[0])); 1681}) 1682 1683(define_expand "restore_stack_block" 1684 [(match_operand 0 "register_operand" "") 1685 (match_operand 1 "register_operand" "")] 1686 "" 1687{ 1688 if (TARGET_SOFT_STACK) 1689 { 1690 emit_move_insn (operands[0], operands[1]); 1691 emit_insn (gen_set_softstack (Pmode, operands[0])); 1692 } 1693 DONE; 1694}) 1695 1696(define_expand "restore_stack_function" 1697 [(match_operand 0 "register_operand" "") 1698 (match_operand 1 "register_operand" "")] 1699 "" 1700{ 1701 DONE; 1702}) 1703 1704(define_insn "trap" 1705 [(trap_if (const_int 1) (const_int 0))] 1706 "" 1707 "trap; exit;") 1708 1709(define_insn "trap_if_true" 1710 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R") 1711 (const_int 0)) 1712 (const_int 0))] 1713 "" 1714 "%j0 trap; %j0 exit;" 1715 [(set_attr "predicable" "no")]) 1716 1717(define_insn "trap_if_false" 1718 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R") 1719 (const_int 0)) 1720 (const_int 0))] 1721 "" 1722 "%J0 trap; %J0 exit;" 1723 [(set_attr "predicable" "no")]) 1724 1725(define_expand "ctrap<mode>4" 1726 [(trap_if (match_operator 0 "nvptx_comparison_operator" 1727 [(match_operand:SDIM 1 "nvptx_register_operand") 1728 (match_operand:SDIM 2 "nvptx_nonmemory_operand")]) 1729 (match_operand 3 "const0_operand"))] 1730 "" 1731{ 1732 rtx t = nvptx_expand_compare (operands[0]); 1733 emit_insn (gen_trap_if_true (t)); 1734 DONE; 1735}) 1736 1737(define_insn "oacc_dim_size" 1738 [(set (match_operand:SI 0 "nvptx_register_operand" "") 1739 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] 1740 UNSPEC_DIM_SIZE))] 1741 "" 1742{ 1743 static const char *const asms[] = 1744{ /* Must match oacc_loop_levels ordering. */ 1745 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */ 1746 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */ 1747 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */ 1748}; 1749 return asms[INTVAL (operands[1])]; 1750}) 1751 1752(define_insn "oacc_dim_pos" 1753 [(set (match_operand:SI 0 "nvptx_register_operand" "") 1754 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")] 1755 UNSPECV_DIM_POS))] 1756 "" 1757{ 1758 static const char *const asms[] = 1759{ /* Must match oacc_loop_levels ordering. */ 1760 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */ 1761 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */ 1762 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */ 1763}; 1764 return asms[INTVAL (operands[1])]; 1765}) 1766 1767(define_insn "nvptx_fork" 1768 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] 1769 UNSPECV_FORK)] 1770 "" 1771 "// fork %0;" 1772 [(set_attr "predicable" "no")]) 1773 1774(define_insn "nvptx_forked" 1775 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] 1776 UNSPECV_FORKED)] 1777 "" 1778 "// forked %0;" 1779 [(set_attr "predicable" "no")]) 1780 1781(define_insn "nvptx_joining" 1782 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] 1783 UNSPECV_JOINING)] 1784 "" 1785 "// joining %0;" 1786 [(set_attr "predicable" "no")]) 1787 1788(define_insn "nvptx_join" 1789 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] 1790 UNSPECV_JOIN)] 1791 "" 1792 "// join %0;" 1793 [(set_attr "predicable" "no")]) 1794 1795(define_expand "oacc_fork" 1796 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") 1797 (match_operand:SI 1 "general_operand" "")) 1798 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] 1799 UNSPECV_FORKED)] 1800 "" 1801{ 1802 if (operands[0] != const0_rtx) 1803 emit_move_insn (operands[0], operands[1]); 1804 nvptx_expand_oacc_fork (INTVAL (operands[2])); 1805 DONE; 1806}) 1807 1808(define_expand "oacc_join" 1809 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") 1810 (match_operand:SI 1 "general_operand" "")) 1811 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")] 1812 UNSPECV_JOIN)] 1813 "" 1814{ 1815 if (operands[0] != const0_rtx) 1816 emit_move_insn (operands[0], operands[1]); 1817 nvptx_expand_oacc_join (INTVAL (operands[2])); 1818 DONE; 1819}) 1820 1821;; only 32-bit shuffles exist. 1822(define_insn "nvptx_shuffle<mode>" 1823 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R") 1824 (unspec:BITS 1825 [(match_operand:BITS 1 "nvptx_register_operand" "R") 1826 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") 1827 (match_operand:SI 3 "const_int_operand" "n")] 1828 UNSPEC_SHUFFLE))] 1829 "" 1830 { 1831 if (TARGET_PTX_6_0) 1832 return "%.\\tshfl.sync%S3.b32\\t%0, %1, %2, 31, 0xffffffff;"; 1833 else 1834 return "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;"; 1835 }) 1836 1837(define_insn "nvptx_vote_ballot" 1838 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 1839 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")] 1840 UNSPEC_VOTE_BALLOT))] 1841 "" 1842 { 1843 if (TARGET_PTX_6_0) 1844 return "%.\\tvote.sync.ballot.b32\\t%0, %1, 0xffffffff;"; 1845 else 1846 return "%.\\tvote.ballot.b32\\t%0, %1;"; 1847 }) 1848 1849;; Patterns for OpenMP SIMD-via-SIMT lowering 1850 1851(define_insn "@omp_simt_enter_<mode>" 1852 [(set (match_operand:P 0 "nvptx_register_operand" "=R") 1853 (unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri") 1854 (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")] 1855 UNSPECV_SIMT_ENTER))] 1856 "" 1857{ 1858 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]); 1859}) 1860 1861(define_expand "omp_simt_enter" 1862 [(match_operand 0 "nvptx_register_operand" "=R") 1863 (match_operand 1 "nvptx_nonmemory_operand" "Ri") 1864 (match_operand 2 "const_int_operand" "n")] 1865 "" 1866{ 1867 if (!CONST_INT_P (operands[1])) 1868 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U; 1869 else 1870 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]), 1871 cfun->machine->simt_stack_size); 1872 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]), 1873 cfun->machine->simt_stack_align); 1874 cfun->machine->has_simtreg = true; 1875 emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2])); 1876 DONE; 1877}) 1878 1879(define_expand "omp_simt_exit" 1880 [(match_operand 0 "nvptx_register_operand" "R")] 1881 "" 1882{ 1883 emit_insn (gen_omp_simt_exit (Pmode, operands[0])); 1884 if (TARGET_PTX_6_0) 1885 emit_insn (gen_nvptx_warpsync ()); 1886 else 1887 emit_insn (gen_nvptx_uniform_warp_check ()); 1888 DONE; 1889}) 1890 1891(define_insn "@omp_simt_exit_<mode>" 1892 [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")] 1893 UNSPECV_SIMT_EXIT)] 1894 "" 1895{ 1896 return nvptx_output_simt_exit (operands[0]); 1897}) 1898 1899;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index 1900(define_insn "omp_simt_lane" 1901 [(set (match_operand:SI 0 "nvptx_register_operand" "") 1902 (unspec:SI [(const_int 0)] UNSPEC_LANEID))] 1903 "" 1904 "%.\\tmov.u32\\t%0, %%laneid;") 1905 1906;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and 1907;; place a compiler barrier to disallow unrolling/peeling the containing loop 1908(define_expand "omp_simt_ordered" 1909 [(match_operand:SI 0 "nvptx_register_operand" "=R") 1910 (match_operand:SI 1 "nvptx_register_operand" "R")] 1911 "" 1912{ 1913 emit_move_insn (operands[0], operands[1]); 1914 emit_insn (gen_nvptx_nounroll ()); 1915 DONE; 1916}) 1917 1918;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange 1919;; across lanes 1920(define_expand "omp_simt_xchg_bfly" 1921 [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R") 1922 (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R") 1923 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] 1924 "" 1925{ 1926 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], 1927 SHUFFLE_BFLY)); 1928 DONE; 1929}) 1930 1931;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1 1932;; from lane given by index in operand 2 to operand 0 in all lanes 1933(define_expand "omp_simt_xchg_idx" 1934 [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R") 1935 (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R") 1936 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] 1937 "" 1938{ 1939 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], 1940 SHUFFLE_IDX)); 1941 DONE; 1942}) 1943 1944;; Implement IFN_GOMP_SIMT_VOTE_ANY: 1945;; set operand 0 to zero iff all lanes supply zero in operand 1 1946(define_expand "omp_simt_vote_any" 1947 [(match_operand:SI 0 "nvptx_register_operand" "=R") 1948 (match_operand:SI 1 "nvptx_register_operand" "R")] 1949 "" 1950{ 1951 rtx pred = gen_reg_rtx (BImode); 1952 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); 1953 emit_insn (gen_nvptx_vote_ballot (operands[0], pred)); 1954 DONE; 1955}) 1956 1957;; Implement IFN_GOMP_SIMT_LAST_LANE: 1958;; set operand 0 to the lowest lane index that passed non-zero in operand 1 1959(define_expand "omp_simt_last_lane" 1960 [(match_operand:SI 0 "nvptx_register_operand" "=R") 1961 (match_operand:SI 1 "nvptx_register_operand" "R")] 1962 "" 1963{ 1964 rtx pred = gen_reg_rtx (BImode); 1965 rtx tmp = gen_reg_rtx (SImode); 1966 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); 1967 emit_insn (gen_nvptx_vote_ballot (tmp, pred)); 1968 emit_insn (gen_ctzsi2 (operands[0], tmp)); 1969 DONE; 1970}) 1971 1972;; extract parts of a 64 bit object into 2 32-bit ints 1973(define_insn "unpack<mode>si2" 1974 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 1975 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R") 1976 (const_int 0)] UNSPEC_BIT_CONV)) 1977 (set (match_operand:SI 1 "nvptx_register_operand" "=R") 1978 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))] 1979 "" 1980 "%.\\tmov.b64\\t{%0,%1}, %2;") 1981 1982;; pack 2 32-bit ints into a 64 bit object 1983(define_insn "packsi<mode>2" 1984 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R") 1985 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R") 1986 (match_operand:SI 2 "nvptx_register_operand" "R")] 1987 UNSPEC_BIT_CONV))] 1988 "" 1989 "%.\\tmov.b64\\t%0, {%1,%2};") 1990 1991;; Atomic insns. 1992 1993(define_expand "atomic_compare_and_swap<mode>" 1994 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output 1995 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output 1996 (match_operand:SDIM 2 "memory_operand") ;; memory 1997 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input 1998 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input 1999 (match_operand:SI 5 "const_int_operand") ;; is_weak 2000 (match_operand:SI 6 "const_int_operand") ;; success model 2001 (match_operand:SI 7 "const_int_operand")] ;; failure model 2002 "" 2003{ 2004 if (nvptx_mem_local_p (operands[2])) 2005 emit_insn (gen_atomic_compare_and_swap<mode>_1_local 2006 (operands[1], operands[2], operands[3], operands[4], 2007 operands[6])); 2008 else 2009 emit_insn (gen_atomic_compare_and_swap<mode>_1 2010 (operands[1], operands[2], operands[3], operands[4], 2011 operands[6])); 2012 2013 rtx cond = gen_reg_rtx (BImode); 2014 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); 2015 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0))); 2016 DONE; 2017}) 2018 2019(define_insn "atomic_compare_and_swap<mode>_1_local" 2020 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") 2021 (unspec_volatile:SDIM 2022 [(match_operand:SDIM 1 "memory_operand" "+m") 2023 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") 2024 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") 2025 (match_operand:SI 4 "const_int_operand")] 2026 UNSPECV_CAS_LOCAL)) 2027 (set (match_dup 1) 2028 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))] 2029 "" 2030 { 2031 output_asm_insn ("{", NULL); 2032 output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL); 2033 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); 2034 output_asm_insn ("\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); 2035 output_asm_insn ("\\t" "setp.eq%t0" "\\t" "%%eq_p, %%val, %2;", 2036 operands); 2037 output_asm_insn ("@%%eq_p\\t" "st%A1%t0" "\\t" "%1,%3;", operands); 2038 output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands); 2039 output_asm_insn ("}", NULL); 2040 return ""; 2041 } 2042 [(set_attr "predicable" "no")]) 2043 2044(define_insn "atomic_compare_and_swap<mode>_1" 2045 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") 2046 (unspec_volatile:SDIM 2047 [(match_operand:SDIM 1 "memory_operand" "+m") 2048 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") 2049 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") 2050 (match_operand:SI 4 "const_int_operand")] 2051 UNSPECV_CAS)) 2052 (set (match_dup 1) 2053 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] 2054 "" 2055 { 2056 const char *t 2057 = "%.\\tatom%A1.cas.b%T0\\t%x0, %1, %2, %3;"; 2058 return nvptx_output_atomic_insn (t, operands, 1, 4); 2059 } 2060 [(set_attr "atomic" "true")]) 2061 2062(define_insn "atomic_exchange<mode>" 2063 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output 2064 (unspec_volatile:SDIM 2065 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory 2066 (match_operand:SI 3 "const_int_operand")] ;; model 2067 UNSPECV_XCHG)) 2068 (set (match_dup 1) 2069 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input 2070 "" 2071 { 2072 if (nvptx_mem_local_p (operands[1])) 2073 { 2074 output_asm_insn ("{", NULL); 2075 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); 2076 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); 2077 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%2;", operands); 2078 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); 2079 output_asm_insn ("}", NULL); 2080 return ""; 2081 } 2082 const char *t 2083 = "%.\tatom%A1.exch.b%T0\t%x0, %1, %2;"; 2084 return nvptx_output_atomic_insn (t, operands, 1, 3); 2085 } 2086 [(set_attr "atomic" "true")]) 2087 2088(define_expand "atomic_store<mode>" 2089 [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory 2090 (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input 2091 (match_operand:SI 2 "const_int_operand")] ;; model 2092 "" 2093{ 2094 struct address_info info; 2095 decompose_mem_address (&info, operands[0]); 2096 if (info.base != NULL && REG_P (*info.base) 2097 && REGNO_PTR_FRAME_P (REGNO (*info.base))) 2098 { 2099 emit_insn (gen_mov<mode> (operands[0], operands[1])); 2100 DONE; 2101 } 2102 2103 if (TARGET_SM70) 2104 { 2105 emit_insn (gen_nvptx_atomic_store_sm70<mode> (operands[0], operands[1], 2106 operands[2])); 2107 DONE; 2108 } 2109 2110 bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]); 2111 if (!maybe_shared_p) 2112 /* Fall back to expand_atomic_store. */ 2113 FAIL; 2114 2115 emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1], 2116 operands[2])); 2117 DONE; 2118}) 2119 2120(define_insn "nvptx_atomic_store_sm70<mode>" 2121 [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory 2122 (unspec_volatile:SDIM 2123 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input 2124 (match_operand:SI 2 "const_int_operand")] ;; model 2125 UNSPECV_ST))] 2126 "TARGET_SM70" 2127 { 2128 const char *t 2129 = "%.\tst%A0.b%T0\t%0, %1;"; 2130 return nvptx_output_atomic_insn (t, operands, 0, 2); 2131 } 2132 [(set_attr "atomic" "false")]) ;; Note: st is not an atomic insn. 2133 2134(define_insn "nvptx_atomic_store<mode>" 2135 [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory 2136 (unspec_volatile:SDIM 2137 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input 2138 (match_operand:SI 2 "const_int_operand")] ;; model 2139 UNSPECV_ST))] 2140 "!TARGET_SM70" 2141 { 2142 const char *t 2143 = "%.\tatom%A0.exch.b%T0\t_, %0, %1;"; 2144 return nvptx_output_atomic_insn (t, operands, 0, 2); 2145 } 2146 [(set_attr "atomic" "true")]) 2147 2148(define_insn "atomic_fetch_add<mode>" 2149 [(set (match_operand:SDIM 1 "memory_operand" "+m") 2150 (unspec_volatile:SDIM 2151 [(plus:SDIM (match_dup 1) 2152 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) 2153 (match_operand:SI 3 "const_int_operand")] ;; model 2154 UNSPECV_LOCK)) 2155 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") 2156 (match_dup 1))] 2157 "" 2158 { 2159 if (nvptx_mem_local_p (operands[1])) 2160 { 2161 output_asm_insn ("{", NULL); 2162 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); 2163 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands); 2164 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); 2165 output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;", 2166 operands); 2167 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); 2168 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); 2169 output_asm_insn ("}", NULL); 2170 return ""; 2171 } 2172 const char *t 2173 = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;"; 2174 return nvptx_output_atomic_insn (t, operands, 1, 3); 2175 } 2176 [(set_attr "atomic" "true")]) 2177 2178(define_insn "atomic_fetch_addsf" 2179 [(set (match_operand:SF 1 "memory_operand" "+m") 2180 (unspec_volatile:SF 2181 [(plus:SF (match_dup 1) 2182 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF")) 2183 (match_operand:SI 3 "const_int_operand")] ;; model 2184 UNSPECV_LOCK)) 2185 (set (match_operand:SF 0 "nvptx_register_operand" "=R") 2186 (match_dup 1))] 2187 "" 2188 { 2189 if (nvptx_mem_local_p (operands[1])) 2190 { 2191 output_asm_insn ("{", NULL); 2192 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); 2193 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands); 2194 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); 2195 output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;", 2196 operands); 2197 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); 2198 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); 2199 output_asm_insn ("}", NULL); 2200 return ""; 2201 } 2202 const char *t 2203 = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;"; 2204 return nvptx_output_atomic_insn (t, operands, 1, 3); 2205 } 2206 [(set_attr "atomic" "true")]) 2207 2208(define_insn "atomic_fetch_<logic><mode>" 2209 [(set (match_operand:SDIM 1 "memory_operand" "+m") 2210 (unspec_volatile:SDIM 2211 [(any_logic:SDIM (match_dup 1) 2212 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")) 2213 (match_operand:SI 3 "const_int_operand")] ;; model 2214 UNSPECV_LOCK)) 2215 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") 2216 (match_dup 1))] 2217 "<MODE>mode == SImode || TARGET_SM35" 2218 { 2219 if (nvptx_mem_local_p (operands[1])) 2220 { 2221 output_asm_insn ("{", NULL); 2222 output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands); 2223 output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%update;", operands); 2224 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); 2225 output_asm_insn ("%.\\t" "<logic>.b%T0" "\\t" "%%update,%%val,%2;", 2226 operands); 2227 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); 2228 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); 2229 output_asm_insn ("}", NULL); 2230 return ""; 2231 } 2232 const char *t 2233 = "%.\\tatom%A1.<logic>.b%T0\\t%x0, %1, %2;"; 2234 return nvptx_output_atomic_insn (t, operands, 1, 3); 2235 } 2236 2237 [(set_attr "atomic" "true")]) 2238 2239(define_expand "atomic_test_and_set" 2240 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output 2241 (match_operand:QI 1 "memory_operand") ;; memory 2242 (match_operand:SI 2 "const_int_operand")] ;; model 2243 "" 2244{ 2245 rtx libfunc; 2246 rtx addr; 2247 libfunc = init_one_libfunc ("__atomic_test_and_set_1"); 2248 addr = convert_memory_address (ptr_mode, XEXP (operands[1], 0)); 2249 emit_library_call_value (libfunc, operands[0], LCT_NORMAL, SImode, 2250 addr, ptr_mode, 2251 operands[2], SImode); 2252 DONE; 2253}) 2254 2255(define_insn "nvptx_barsync" 2256 [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri") 2257 (match_operand:SI 1 "const_int_operand")] 2258 UNSPECV_BARSYNC)] 2259 "" 2260 { 2261 if (INTVAL (operands[1]) == 0) 2262 return (TARGET_PTX_6_0 2263 ? "\\tbarrier.sync.aligned\\t%0;" 2264 : "\\tbar.sync\\t%0;"); 2265 else 2266 return (TARGET_PTX_6_0 2267 ? "\\tbarrier.sync\\t%0, %1;" 2268 : "\\tbar.sync\\t%0, %1;"); 2269 } 2270 [(set_attr "predicable" "no")]) 2271 2272(define_insn "nvptx_warpsync" 2273 [(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)] 2274 "TARGET_PTX_6_0" 2275 "%.\\tbar.warp.sync\\t0xffffffff;") 2276 2277(define_insn "nvptx_uniform_warp_check" 2278 [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)] 2279 "" 2280 { 2281 const char *insns[] = { 2282 "{", 2283 "\\t" ".reg.b32" "\\t" "%%r_act;", 2284 "%.\\t" "vote.ballot.b32" "\\t" "%%r_act,1;", 2285 "\\t" ".reg.pred" "\\t" "%%r_do_abort;", 2286 "\\t" "mov.pred" "\\t" "%%r_do_abort,0;", 2287 "%.\\t" "setp.ne.b32" "\\t" "%%r_do_abort,%%r_act," 2288 "0xffffffff;", 2289 "@ %%r_do_abort\\t" "trap;", 2290 "@ %%r_do_abort\\t" "exit;", 2291 "}", 2292 NULL 2293 }; 2294 for (const char **p = &insns[0]; *p != NULL; p++) 2295 output_asm_insn (*p, NULL); 2296 return ""; 2297 }) 2298 2299(define_expand "memory_barrier" 2300 [(set (match_dup 0) 2301 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] 2302 "" 2303{ 2304 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 2305 MEM_VOLATILE_P (operands[0]) = 1; 2306}) 2307 2308;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys 2309;; (corresponding to cuda functions threadfence_block, threadfence and 2310;; threadfence_system). For the insn memory_barrier we use membar.sys. This 2311;; may be overconservative, but before using membar.gl instead we'll need to 2312;; explain in detail why it's safe to use. For now, use membar.sys. 2313(define_insn "*memory_barrier" 2314 [(set (match_operand:BLK 0 "" "") 2315 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] 2316 "" 2317 "\\tmembar.sys;" 2318 [(set_attr "predicable" "no")]) 2319 2320(define_expand "nvptx_membar_cta" 2321 [(set (match_dup 0) 2322 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] 2323 "" 2324{ 2325 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 2326 MEM_VOLATILE_P (operands[0]) = 1; 2327}) 2328 2329(define_insn "*nvptx_membar_cta" 2330 [(set (match_operand:BLK 0 "" "") 2331 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] 2332 "" 2333 "\\tmembar.cta;" 2334 [(set_attr "predicable" "no")]) 2335 2336(define_expand "nvptx_membar_gl" 2337 [(set (match_dup 0) 2338 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))] 2339 "" 2340{ 2341 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 2342 MEM_VOLATILE_P (operands[0]) = 1; 2343}) 2344 2345(define_insn "*nvptx_membar_gl" 2346 [(set (match_operand:BLK 0 "" "") 2347 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))] 2348 "" 2349 "\\tmembar.gl;" 2350 [(set_attr "predicable" "no")]) 2351 2352(define_insn "nvptx_nounroll" 2353 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] 2354 "" 2355 "\\t.pragma \\\"nounroll\\\";" 2356 [(set_attr "predicable" "no")]) 2357 2358(define_insn "nvptx_red_partition" 2359 [(set (match_operand:DI 0 "nonimmediate_operand" "=R") 2360 (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")] 2361 UNSPECV_RED_PART))] 2362 "" 2363 { 2364 return nvptx_output_red_partition (operands[0], operands[1]); 2365 } 2366 [(set_attr "predicable" "no")]) 2367 2368;; Expand QI mode operations using SI mode instructions. 2369(define_code_iterator any_sbinary [plus minus smin smax]) 2370(define_code_attr sbinary [(plus "add") (minus "sub") (smin "smin") (smax "smax")]) 2371 2372(define_code_iterator any_ubinary [and ior xor umin umax]) 2373(define_code_attr ubinary [(and "and") (ior "ior") (xor "xor") (umin "umin") 2374 (umax "umax")]) 2375 2376(define_code_iterator any_sunary [neg abs]) 2377(define_code_attr sunary [(neg "neg") (abs "abs")]) 2378 2379(define_code_iterator any_uunary [not]) 2380(define_code_attr uunary [(not "one_cmpl")]) 2381 2382(define_expand "<sbinary>qi3" 2383 [(set (match_operand:QI 0 "nvptx_register_operand") 2384 (any_sbinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand") 2385 (match_operand:QI 2 "nvptx_nonmemory_operand")))] 2386 "" 2387{ 2388 rtx reg = gen_reg_rtx (SImode); 2389 rtx op0 = convert_modes (SImode, QImode, operands[1], 0); 2390 rtx op1 = convert_modes (SImode, QImode, operands[2], 0); 2391 if (<CODE> == MINUS) 2392 op0 = force_reg (SImode, op0); 2393 emit_insn (gen_<sbinary>si3 (reg, op0, op1)); 2394 emit_insn (gen_truncsiqi2 (operands[0], reg)); 2395 DONE; 2396}) 2397 2398(define_expand "<ubinary>qi3" 2399 [(set (match_operand:QI 0 "nvptx_register_operand") 2400 (any_ubinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand") 2401 (match_operand:QI 2 "nvptx_nonmemory_operand")))] 2402 "" 2403{ 2404 rtx reg = gen_reg_rtx (SImode); 2405 rtx op0 = convert_modes (SImode, QImode, operands[1], 1); 2406 rtx op1 = convert_modes (SImode, QImode, operands[2], 1); 2407 emit_insn (gen_<ubinary>si3 (reg, op0, op1)); 2408 emit_insn (gen_truncsiqi2 (operands[0], reg)); 2409 DONE; 2410}) 2411 2412(define_expand "<sunary>qi2" 2413 [(set (match_operand:QI 0 "nvptx_register_operand") 2414 (any_sunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))] 2415 "" 2416{ 2417 rtx reg = gen_reg_rtx (SImode); 2418 rtx op0 = convert_modes (SImode, QImode, operands[1], 0); 2419 emit_insn (gen_<sunary>si2 (reg, op0)); 2420 emit_insn (gen_truncsiqi2 (operands[0], reg)); 2421 DONE; 2422}) 2423 2424(define_expand "<uunary>qi2" 2425 [(set (match_operand:QI 0 "nvptx_register_operand") 2426 (any_uunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))] 2427 "" 2428{ 2429 rtx reg = gen_reg_rtx (SImode); 2430 rtx op0 = convert_modes (SImode, QImode, operands[1], 1); 2431 emit_insn (gen_<uunary>si2 (reg, op0)); 2432 emit_insn (gen_truncsiqi2 (operands[0], reg)); 2433 DONE; 2434}) 2435 2436(define_expand "cstoreqi4" 2437 [(set (match_operand:SI 0 "nvptx_register_operand") 2438 (match_operator:SI 1 "nvptx_comparison_operator" 2439 [(match_operand:QI 2 "nvptx_nonmemory_operand") 2440 (match_operand:QI 3 "nvptx_nonmemory_operand")]))] 2441 "" 2442{ 2443 rtx reg = gen_reg_rtx (BImode); 2444 enum rtx_code code = GET_CODE (operands[1]); 2445 int unsignedp = unsigned_condition_p (code); 2446 rtx op2 = convert_modes (SImode, QImode, operands[2], unsignedp); 2447 rtx op3 = convert_modes (SImode, QImode, operands[3], unsignedp); 2448 rtx cmp = gen_rtx_fmt_ee (code, SImode, op2, op3); 2449 emit_insn (gen_cmpsi (reg, cmp, op2, op3)); 2450 emit_insn (gen_setccsi_from_bi (operands[0], reg)); 2451 DONE; 2452}) 2453 2454(define_insn "*ext_truncsi2_qi" 2455 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 2456 (sign_extend:SI 2457 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))] 2458 "" 2459 "%.\\tcvt.s32.s8\\t%0, %1;") 2460 2461(define_insn "*zext_truncsi2_qi" 2462 [(set (match_operand:SI 0 "nvptx_register_operand" "=R") 2463 (zero_extend:SI 2464 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))] 2465 "" 2466 "%.\\tcvt.u32.u8\\t%0, %1;") 2467