1;; Copyright (C) 2016-2020 Free Software Foundation, Inc. 2 3;; This file is free software; you can redistribute it and/or modify it under 4;; the terms of the GNU General Public License as published by the Free 5;; Software Foundation; either version 3 of the License, or (at your option) 6;; any later version. 7 8;; This file is distributed in the hope that it will be useful, but WITHOUT 9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11;; for more details. 12 13;; You should have received a copy of the GNU General Public License 14;; along with GCC; see the file COPYING3. If not see 15;; <http://www.gnu.org/licenses/>. 16 17;; {{{ Vector iterators 18 19; Vector modes for specific types 20; (This will make more sense when there are multiple vector sizes) 21(define_mode_iterator V_QI 22 [V64QI]) 23(define_mode_iterator V_HI 24 [V64HI]) 25(define_mode_iterator V_HF 26 [V64HF]) 27(define_mode_iterator V_SI 28 [V64SI]) 29(define_mode_iterator V_SF 30 [V64SF]) 31(define_mode_iterator V_DI 32 [V64DI]) 33(define_mode_iterator V_DF 34 [V64DF]) 35 36; Vector modes for sub-dword modes 37(define_mode_iterator V_QIHI 38 [V64QI V64HI]) 39 40; Vector modes for one vector register 41(define_mode_iterator V_1REG 42 [V64QI V64HI V64SI V64HF V64SF]) 43 44(define_mode_iterator V_INT_1REG 45 [V64QI V64HI V64SI]) 46(define_mode_iterator V_INT_1REG_ALT 47 [V64QI V64HI V64SI]) 48(define_mode_iterator V_FP_1REG 49 [V64HF V64SF]) 50 51; Vector modes for two vector registers 52(define_mode_iterator V_2REG 53 [V64DI V64DF]) 54 55; Vector modes with native support 56(define_mode_iterator V_noQI 57 [V64HI V64HF V64SI V64SF V64DI V64DF]) 58(define_mode_iterator V_noHI 59 [V64HF V64SI V64SF V64DI V64DF]) 60 61(define_mode_iterator V_INT_noQI 62 [V64HI V64SI V64DI]) 63 64; All of above 65(define_mode_iterator V_ALL 66 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) 67(define_mode_iterator V_ALL_ALT 68 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) 69 70(define_mode_iterator V_INT 71 [V64QI V64HI V64SI V64DI]) 72(define_mode_iterator V_FP 73 [V64HF V64SF V64DF]) 74 75(define_mode_attr scalar_mode 76 [(V64QI "qi") (V64HI "hi") (V64SI "si") 77 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) 78 79(define_mode_attr SCALAR_MODE 80 [(V64QI "QI") (V64HI "HI") (V64SI "SI") 81 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) 82 83(define_mode_attr vnsi 84 [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") 85 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")]) 86 87(define_mode_attr VnSI 88 [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") 89 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")]) 90 91(define_mode_attr vndi 92 [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") 93 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")]) 94 95(define_mode_attr VnDI 96 [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") 97 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")]) 98 99(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) 100 101;; }}} 102;; {{{ Substitutions 103 104(define_subst_attr "exec" "vec_merge" 105 "" "_exec") 106(define_subst_attr "exec_clobber" "vec_merge_with_clobber" 107 "" "_exec") 108(define_subst_attr "exec_vcc" "vec_merge_with_vcc" 109 "" "_exec") 110(define_subst_attr "exec_scatter" "scatter_store" 111 "" "_exec") 112 113(define_subst "vec_merge" 114 [(set (match_operand:V_ALL 0) 115 (match_operand:V_ALL 1))] 116 "" 117 [(set (match_dup 0) 118 (vec_merge:V_ALL 119 (match_dup 1) 120 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0") 121 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))]) 122 123(define_subst "vec_merge_with_clobber" 124 [(set (match_operand:V_ALL 0) 125 (match_operand:V_ALL 1)) 126 (clobber (match_operand 2))] 127 "" 128 [(set (match_dup 0) 129 (vec_merge:V_ALL 130 (match_dup 1) 131 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0") 132 (match_operand:DI 4 "gcn_exec_reg_operand" "e"))) 133 (clobber (match_dup 2))]) 134 135(define_subst "vec_merge_with_vcc" 136 [(set (match_operand:V_ALL 0) 137 (match_operand:V_ALL 1)) 138 (set (match_operand:DI 2) 139 (match_operand:DI 3))] 140 "" 141 [(parallel 142 [(set (match_dup 0) 143 (vec_merge:V_ALL 144 (match_dup 1) 145 (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0") 146 (match_operand:DI 5 "gcn_exec_reg_operand" "e"))) 147 (set (match_dup 2) 148 (and:DI (match_dup 3) 149 (reg:DI EXEC_REG)))])]) 150 151(define_subst "scatter_store" 152 [(set (mem:BLK (scratch)) 153 (unspec:BLK 154 [(match_operand 0) 155 (match_operand 1) 156 (match_operand 2) 157 (match_operand 3)] 158 UNSPEC_SCATTER))] 159 "" 160 [(set (mem:BLK (scratch)) 161 (unspec:BLK 162 [(match_dup 0) 163 (match_dup 1) 164 (match_dup 2) 165 (match_dup 3) 166 (match_operand:DI 4 "gcn_exec_reg_operand" "e")] 167 UNSPEC_SCATTER))]) 168 169;; }}} 170;; {{{ Vector moves 171 172; This is the entry point for all vector register moves. Memory accesses can 173; come this way also, but will more usually use the reload_in/out, 174; gather/scatter, maskload/store, etc. 175 176(define_expand "mov<mode>" 177 [(set (match_operand:V_ALL 0 "nonimmediate_operand") 178 (match_operand:V_ALL 1 "general_operand"))] 179 "" 180 { 181 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed) 182 { 183 operands[1] = force_reg (<MODE>mode, operands[1]); 184 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode); 185 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 186 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 187 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 188 operands[0], 189 scratch); 190 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v)); 191 DONE; 192 } 193 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed) 194 { 195 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode); 196 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 197 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 198 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 199 operands[1], 200 scratch); 201 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v)); 202 DONE; 203 } 204 else if ((MEM_P (operands[0]) || MEM_P (operands[1]))) 205 { 206 gcc_assert (!reload_completed); 207 rtx scratch = gen_reg_rtx (<VnDI>mode); 208 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch)); 209 DONE; 210 } 211 }) 212 213; A pseudo instruction that helps LRA use the "U0" constraint. 214 215(define_insn "mov<mode>_unspec" 216 [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v") 217 (match_operand:V_ALL 1 "gcn_unspec_operand" " U"))] 218 "" 219 "" 220 [(set_attr "type" "unknown") 221 (set_attr "length" "0")]) 222 223(define_insn "*mov<mode>" 224 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v") 225 (match_operand:V_1REG 1 "general_operand" "vA,B"))] 226 "" 227 "v_mov_b32\t%0, %1" 228 [(set_attr "type" "vop1,vop1") 229 (set_attr "length" "4,8")]) 230 231(define_insn "mov<mode>_exec" 232 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m") 233 (vec_merge:V_1REG 234 (match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v") 235 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand" 236 "U0,U0,vA,vA,U0,U0") 237 (match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e"))) 238 (clobber (match_scratch:<VnDI> 4 "=X, X, X, X,&v,&v"))] 239 "!MEM_P (operands[0]) || REG_P (operands[1])" 240 "@ 241 v_mov_b32\t%0, %1 242 v_mov_b32\t%0, %1 243 v_cndmask_b32\t%0, %2, %1, vcc 244 v_cndmask_b32\t%0, %2, %1, %3 245 # 246 #" 247 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*") 248 (set_attr "length" "4,8,4,8,16,16")]) 249 250; This variant does not accept an unspec, but does permit MEM 251; read/modify/write which is necessary for maskstore. 252 253;(define_insn "*mov<mode>_exec_match" 254; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m") 255; (vec_merge:V_1REG 256; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v") 257; (match_dup 0) 258; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e"))) 259; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))] 260; "!MEM_P (operands[0]) || REG_P (operands[1])" 261; "@ 262; v_mov_b32\t%0, %1 263; v_mov_b32\t%0, %1 264; # 265; #" 266; [(set_attr "type" "vop1,vop1,*,*") 267; (set_attr "length" "4,8,16,16")]) 268 269(define_insn "*mov<mode>" 270 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v") 271 (match_operand:V_2REG 1 "general_operand" "vDB"))] 272 "" 273 { 274 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) 275 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; 276 else 277 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; 278 } 279 [(set_attr "type" "vmult") 280 (set_attr "length" "16")]) 281 282(define_insn "mov<mode>_exec" 283 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m") 284 (vec_merge:V_2REG 285 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v") 286 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand" 287 " U0,vDA0,vDA0,U0,U0") 288 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e"))) 289 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))] 290 "!MEM_P (operands[0]) || REG_P (operands[1])" 291 { 292 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) 293 switch (which_alternative) 294 { 295 case 0: 296 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; 297 case 1: 298 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;" 299 "v_cndmask_b32\t%H0, %H2, %H1, vcc"; 300 case 2: 301 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;" 302 "v_cndmask_b32\t%H0, %H2, %H1, %3"; 303 } 304 else 305 switch (which_alternative) 306 { 307 case 0: 308 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; 309 case 1: 310 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;" 311 "v_cndmask_b32\t%L0, %L2, %L1, vcc"; 312 case 2: 313 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;" 314 "v_cndmask_b32\t%L0, %L2, %L1, %3"; 315 } 316 317 return "#"; 318 } 319 [(set_attr "type" "vmult,vmult,vmult,*,*") 320 (set_attr "length" "16,16,16,16,16")]) 321 322; This variant does not accept an unspec, but does permit MEM 323; read/modify/write which is necessary for maskstore. 324 325;(define_insn "*mov<mode>_exec_match" 326; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m") 327; (vec_merge:V_2REG 328; (match_operand:V_2REG 1 "general_operand" "vDB, m, v") 329; (match_dup 0) 330; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e"))) 331; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))] 332; "!MEM_P (operands[0]) || REG_P (operands[1])" 333; "@ 334; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ 335; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ 336; else \ 337; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; 338; # 339; #" 340; [(set_attr "type" "vmult,*,*") 341; (set_attr "length" "16,16,16")]) 342 343; A SGPR-base load looks like: 344; <load> v, Sv 345; 346; There's no hardware instruction that corresponds to this, but vector base 347; addresses are placed in an SGPR because it is easier to add to a vector. 348; We also have a temporary vT, and the vector v1 holding numbered lanes. 349; 350; Rewrite as: 351; vT = v1 << log2(element-size) 352; vT += Sv 353; flat_load v, vT 354 355(define_insn "mov<mode>_sgprbase" 356 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m") 357 (unspec:V_1REG 358 [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")] 359 UNSPEC_SGPRBASE)) 360 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))] 361 "lra_in_progress || reload_completed" 362 "@ 363 v_mov_b32\t%0, %1 364 v_mov_b32\t%0, %1 365 # 366 #" 367 [(set_attr "type" "vop1,vop1,*,*") 368 (set_attr "length" "4,8,12,12")]) 369 370(define_insn "mov<mode>_sgprbase" 371 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m") 372 (unspec:V_2REG 373 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v")] 374 UNSPEC_SGPRBASE)) 375 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))] 376 "lra_in_progress || reload_completed" 377 "@ 378 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ 379 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ 380 else \ 381 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; 382 # 383 #" 384 [(set_attr "type" "vmult,*,*") 385 (set_attr "length" "8,12,12")]) 386 387; reload_in was once a standard name, but here it's only referenced by 388; gcn_secondary_reload. It allows a reload with a scratch register. 389 390(define_expand "reload_in<mode>" 391 [(set (match_operand:V_ALL 0 "register_operand" "= v") 392 (match_operand:V_ALL 1 "memory_operand" " m")) 393 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))] 394 "" 395 { 396 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2])); 397 DONE; 398 }) 399 400; reload_out is similar to reload_in, above. 401 402(define_expand "reload_out<mode>" 403 [(set (match_operand:V_ALL 0 "memory_operand" "= m") 404 (match_operand:V_ALL 1 "register_operand" " v")) 405 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))] 406 "" 407 { 408 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2])); 409 DONE; 410 }) 411 412; Expand scalar addresses into gather/scatter patterns 413 414(define_split 415 [(set (match_operand:V_ALL 0 "memory_operand") 416 (unspec:V_ALL 417 [(match_operand:V_ALL 1 "general_operand")] 418 UNSPEC_SGPRBASE)) 419 (clobber (match_scratch:<VnDI> 2))] 420 "" 421 [(set (mem:BLK (scratch)) 422 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)] 423 UNSPEC_SCATTER))] 424 { 425 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 426 operands[0], 427 operands[2]); 428 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 429 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 430 }) 431 432(define_split 433 [(set (match_operand:V_ALL 0 "memory_operand") 434 (vec_merge:V_ALL 435 (match_operand:V_ALL 1 "general_operand") 436 (match_operand:V_ALL 2 "") 437 (match_operand:DI 3 "gcn_exec_reg_operand"))) 438 (clobber (match_scratch:<VnDI> 4))] 439 "" 440 [(set (mem:BLK (scratch)) 441 (unspec:BLK [(match_dup 5) (match_dup 1) 442 (match_dup 6) (match_dup 7) (match_dup 3)] 443 UNSPEC_SCATTER))] 444 { 445 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, 446 operands[3], 447 operands[0], 448 operands[4]); 449 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 450 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 451 }) 452 453(define_split 454 [(set (match_operand:V_ALL 0 "nonimmediate_operand") 455 (unspec:V_ALL 456 [(match_operand:V_ALL 1 "memory_operand")] 457 UNSPEC_SGPRBASE)) 458 (clobber (match_scratch:<VnDI> 2))] 459 "" 460 [(set (match_dup 0) 461 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7) 462 (mem:BLK (scratch))] 463 UNSPEC_GATHER))] 464 { 465 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 466 operands[1], 467 operands[2]); 468 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 469 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 470 }) 471 472(define_split 473 [(set (match_operand:V_ALL 0 "nonimmediate_operand") 474 (vec_merge:V_ALL 475 (match_operand:V_ALL 1 "memory_operand") 476 (match_operand:V_ALL 2 "") 477 (match_operand:DI 3 "gcn_exec_reg_operand"))) 478 (clobber (match_scratch:<VnDI> 4))] 479 "" 480 [(set (match_dup 0) 481 (vec_merge:V_ALL 482 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7) 483 (mem:BLK (scratch))] 484 UNSPEC_GATHER) 485 (match_dup 2) 486 (match_dup 3)))] 487 { 488 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, 489 operands[3], 490 operands[1], 491 operands[4]); 492 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 493 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 494 }) 495 496; TODO: Add zero/sign extending variants. 497 498;; }}} 499;; {{{ Lane moves 500 501; v_writelane and v_readlane work regardless of exec flags. 502; We allow source to be scratch. 503; 504; FIXME these should take A immediates 505 506(define_insn "*vec_set<mode>" 507 [(set (match_operand:V_1REG 0 "register_operand" "= v") 508 (vec_merge:V_1REG 509 (vec_duplicate:V_1REG 510 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) 511 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0") 512 (ashift (const_int 1) 513 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 514 "" 515 "v_writelane_b32 %0, %1, %2" 516 [(set_attr "type" "vop3a") 517 (set_attr "length" "8") 518 (set_attr "exec" "none") 519 (set_attr "laneselect" "yes")]) 520 521; FIXME: 64bit operations really should be splitters, but I am not sure how 522; to represent vertical subregs. 523(define_insn "*vec_set<mode>" 524 [(set (match_operand:V_2REG 0 "register_operand" "= v") 525 (vec_merge:V_2REG 526 (vec_duplicate:V_2REG 527 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) 528 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0") 529 (ashift (const_int 1) 530 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 531 "" 532 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2" 533 [(set_attr "type" "vmult") 534 (set_attr "length" "16") 535 (set_attr "exec" "none") 536 (set_attr "laneselect" "yes")]) 537 538(define_expand "vec_set<mode>" 539 [(set (match_operand:V_ALL 0 "register_operand") 540 (vec_merge:V_ALL 541 (vec_duplicate:V_ALL 542 (match_operand:<SCALAR_MODE> 1 "register_operand")) 543 (match_dup 0) 544 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))] 545 "") 546 547(define_insn "*vec_set<mode>_1" 548 [(set (match_operand:V_1REG 0 "register_operand" "=v") 549 (vec_merge:V_1REG 550 (vec_duplicate:V_1REG 551 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) 552 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0") 553 (match_operand:SI 2 "const_int_operand" " i")))] 554 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))" 555 { 556 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); 557 return "v_writelane_b32 %0, %1, %2"; 558 } 559 [(set_attr "type" "vop3a") 560 (set_attr "length" "8") 561 (set_attr "exec" "none") 562 (set_attr "laneselect" "yes")]) 563 564(define_insn "*vec_set<mode>_1" 565 [(set (match_operand:V_2REG 0 "register_operand" "=v") 566 (vec_merge:V_2REG 567 (vec_duplicate:V_2REG 568 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) 569 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0") 570 (match_operand:SI 2 "const_int_operand" " i")))] 571 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))" 572 { 573 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); 574 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"; 575 } 576 [(set_attr "type" "vmult") 577 (set_attr "length" "16") 578 (set_attr "exec" "none") 579 (set_attr "laneselect" "yes")]) 580 581(define_insn "vec_duplicate<mode><exec>" 582 [(set (match_operand:V_1REG 0 "register_operand" "=v") 583 (vec_duplicate:V_1REG 584 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))] 585 "" 586 "v_mov_b32\t%0, %1" 587 [(set_attr "type" "vop3a") 588 (set_attr "length" "8")]) 589 590(define_insn "vec_duplicate<mode><exec>" 591 [(set (match_operand:V_2REG 0 "register_operand" "= v") 592 (vec_duplicate:V_2REG 593 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))] 594 "" 595 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" 596 [(set_attr "type" "vop3a") 597 (set_attr "length" "16")]) 598 599(define_insn "vec_extract<mode><scalar_mode>" 600 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg") 601 (vec_select:<SCALAR_MODE> 602 (match_operand:V_1REG 1 "register_operand" " v") 603 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))] 604 "" 605 "v_readlane_b32 %0, %1, %2" 606 [(set_attr "type" "vop3a") 607 (set_attr "length" "8") 608 (set_attr "exec" "none") 609 (set_attr "laneselect" "yes")]) 610 611(define_insn "vec_extract<mode><scalar_mode>" 612 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg") 613 (vec_select:<SCALAR_MODE> 614 (match_operand:V_2REG 1 "register_operand" " v") 615 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))] 616 "" 617 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2" 618 [(set_attr "type" "vmult") 619 (set_attr "length" "16") 620 (set_attr "exec" "none") 621 (set_attr "laneselect" "yes")]) 622 623(define_expand "extract_last_<mode>" 624 [(match_operand:<SCALAR_MODE> 0 "register_operand") 625 (match_operand:DI 1 "gcn_alu_operand") 626 (match_operand:V_ALL 2 "register_operand")] 627 "can_create_pseudo_p ()" 628 { 629 rtx dst = operands[0]; 630 rtx mask = operands[1]; 631 rtx vect = operands[2]; 632 rtx tmpreg = gen_reg_rtx (SImode); 633 634 emit_insn (gen_clzdi2 (tmpreg, mask)); 635 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg)); 636 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg)); 637 DONE; 638 }) 639 640(define_expand "fold_extract_last_<mode>" 641 [(match_operand:<SCALAR_MODE> 0 "register_operand") 642 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand") 643 (match_operand:DI 2 "gcn_alu_operand") 644 (match_operand:V_ALL 3 "register_operand")] 645 "can_create_pseudo_p ()" 646 { 647 rtx dst = operands[0]; 648 rtx default_value = operands[1]; 649 rtx mask = operands[2]; 650 rtx vect = operands[3]; 651 rtx else_label = gen_label_rtx (); 652 rtx end_label = gen_label_rtx (); 653 654 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx); 655 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label)); 656 emit_insn (gen_extract_last_<mode> (dst, mask, vect)); 657 emit_jump_insn (gen_jump (end_label)); 658 emit_barrier (); 659 emit_label (else_label); 660 emit_move_insn (dst, default_value); 661 emit_label (end_label); 662 DONE; 663 }) 664 665(define_expand "vec_init<mode><scalar_mode>" 666 [(match_operand:V_ALL 0 "register_operand") 667 (match_operand 1)] 668 "" 669 { 670 gcn_expand_vector_init (operands[0], operands[1]); 671 DONE; 672 }) 673 674;; }}} 675;; {{{ Scatter / Gather 676 677;; GCN does not have an instruction for loading a vector from contiguous 678;; memory so *all* loads and stores are eventually converted to scatter 679;; or gather. 680;; 681;; GCC does not permit MEM to hold vectors of addresses, so we must use an 682;; unspec. The unspec formats are as follows: 683;; 684;; (unspec:V?? 685;; [(<address expression>) 686;; (<addr_space_t>) 687;; (<use_glc>) 688;; (mem:BLK (scratch))] 689;; UNSPEC_GATHER) 690;; 691;; (unspec:BLK 692;; [(<address expression>) 693;; (<source register>) 694;; (<addr_space_t>) 695;; (<use_glc>) 696;; (<exec>)] 697;; UNSPEC_SCATTER) 698;; 699;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>. 700;; - The mem:BLK does not contain any real information, but indicates that an 701;; unknown memory read is taking place. Stores are expected to use a similar 702;; mem:BLK outside the unspec. 703;; - The address space and glc (volatile) fields are there to replace the 704;; fields normally found in a MEM. 705;; - Multiple forms of address expression are supported, below. 706 707(define_expand "gather_load<mode><vnsi>" 708 [(match_operand:V_ALL 0 "register_operand") 709 (match_operand:DI 1 "register_operand") 710 (match_operand:<VnSI> 2 "register_operand") 711 (match_operand 3 "immediate_operand") 712 (match_operand:SI 4 "gcn_alu_operand")] 713 "" 714 { 715 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], 716 operands[2], operands[4], 717 INTVAL (operands[3]), NULL); 718 719 if (GET_MODE (addr) == <VnDI>mode) 720 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx, 721 const0_rtx, const0_rtx)); 722 else 723 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1], 724 addr, const0_rtx, const0_rtx, 725 const0_rtx)); 726 DONE; 727 }) 728 729; Allow any address expression 730(define_expand "gather<mode>_expr<exec>" 731 [(set (match_operand:V_ALL 0 "register_operand") 732 (unspec:V_ALL 733 [(match_operand 1 "") 734 (match_operand 2 "immediate_operand") 735 (match_operand 3 "immediate_operand") 736 (mem:BLK (scratch))] 737 UNSPEC_GATHER))] 738 "" 739 {}) 740 741(define_insn "gather<mode>_insn_1offset<exec>" 742 [(set (match_operand:V_ALL 0 "register_operand" "=v") 743 (unspec:V_ALL 744 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v") 745 (vec_duplicate:<VnDI> 746 (match_operand 2 "immediate_operand" " n"))) 747 (match_operand 3 "immediate_operand" " n") 748 (match_operand 4 "immediate_operand" " n") 749 (mem:BLK (scratch))] 750 UNSPEC_GATHER))] 751 "(AS_FLAT_P (INTVAL (operands[3])) 752 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0) 753 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000))) 754 || (AS_GLOBAL_P (INTVAL (operands[3])) 755 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" 756 { 757 addr_space_t as = INTVAL (operands[3]); 758 const char *glc = INTVAL (operands[4]) ? " glc" : ""; 759 760 static char buf[200]; 761 if (AS_FLAT_P (as)) 762 { 763 if (TARGET_GCN5_PLUS) 764 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0", 765 glc); 766 else 767 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc); 768 } 769 else if (AS_GLOBAL_P (as)) 770 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;" 771 "s_waitcnt\tvmcnt(0)", glc); 772 else 773 gcc_unreachable (); 774 775 return buf; 776 } 777 [(set_attr "type" "flat") 778 (set_attr "length" "12")]) 779 780(define_insn "gather<mode>_insn_1offset_ds<exec>" 781 [(set (match_operand:V_ALL 0 "register_operand" "=v") 782 (unspec:V_ALL 783 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v") 784 (vec_duplicate:<VnSI> 785 (match_operand 2 "immediate_operand" " n"))) 786 (match_operand 3 "immediate_operand" " n") 787 (match_operand 4 "immediate_operand" " n") 788 (mem:BLK (scratch))] 789 UNSPEC_GATHER))] 790 "(AS_ANY_DS_P (INTVAL (operands[3])) 791 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))" 792 { 793 addr_space_t as = INTVAL (operands[3]); 794 static char buf[200]; 795 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)", 796 (AS_GDS_P (as) ? " gds" : "")); 797 return buf; 798 } 799 [(set_attr "type" "ds") 800 (set_attr "length" "12")]) 801 802(define_insn "gather<mode>_insn_2offsets<exec>" 803 [(set (match_operand:V_ALL 0 "register_operand" "=v") 804 (unspec:V_ALL 805 [(plus:<VnDI> 806 (plus:<VnDI> 807 (vec_duplicate:<VnDI> 808 (match_operand:DI 1 "register_operand" "Sv")) 809 (sign_extend:<VnDI> 810 (match_operand:<VnSI> 2 "register_operand" " v"))) 811 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n"))) 812 (match_operand 4 "immediate_operand" " n") 813 (match_operand 5 "immediate_operand" " n") 814 (mem:BLK (scratch))] 815 UNSPEC_GATHER))] 816 "(AS_GLOBAL_P (INTVAL (operands[4])) 817 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))" 818 { 819 addr_space_t as = INTVAL (operands[4]); 820 const char *glc = INTVAL (operands[5]) ? " glc" : ""; 821 822 static char buf[200]; 823 if (AS_GLOBAL_P (as)) 824 { 825 /* Work around assembler bug in which a 64-bit register is expected, 826 but a 32-bit value would be correct. */ 827 int reg = REGNO (operands[2]) - FIRST_VGPR_REG; 828 sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;" 829 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc); 830 } 831 else 832 gcc_unreachable (); 833 834 return buf; 835 } 836 [(set_attr "type" "flat") 837 (set_attr "length" "12")]) 838 839(define_expand "scatter_store<mode><vnsi>" 840 [(match_operand:DI 0 "register_operand") 841 (match_operand:<VnSI> 1 "register_operand") 842 (match_operand 2 "immediate_operand") 843 (match_operand:SI 3 "gcn_alu_operand") 844 (match_operand:V_ALL 4 "register_operand")] 845 "" 846 { 847 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], 848 operands[1], operands[3], 849 INTVAL (operands[2]), NULL); 850 851 if (GET_MODE (addr) == <VnDI>mode) 852 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4], 853 const0_rtx, const0_rtx)); 854 else 855 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr, 856 const0_rtx, operands[4], 857 const0_rtx, const0_rtx)); 858 DONE; 859 }) 860 861; Allow any address expression 862(define_expand "scatter<mode>_expr<exec_scatter>" 863 [(set (mem:BLK (scratch)) 864 (unspec:BLK 865 [(match_operand:<VnDI> 0 "") 866 (match_operand:V_ALL 1 "register_operand") 867 (match_operand 2 "immediate_operand") 868 (match_operand 3 "immediate_operand")] 869 UNSPEC_SCATTER))] 870 "" 871 {}) 872 873(define_insn "scatter<mode>_insn_1offset<exec_scatter>" 874 [(set (mem:BLK (scratch)) 875 (unspec:BLK 876 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v") 877 (vec_duplicate:<VnDI> 878 (match_operand 1 "immediate_operand" "n"))) 879 (match_operand:V_ALL 2 "register_operand" "v") 880 (match_operand 3 "immediate_operand" "n") 881 (match_operand 4 "immediate_operand" "n")] 882 UNSPEC_SCATTER))] 883 "(AS_FLAT_P (INTVAL (operands[3])) 884 && (INTVAL(operands[1]) == 0 885 || (TARGET_GCN5_PLUS 886 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000))) 887 || (AS_GLOBAL_P (INTVAL (operands[3])) 888 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))" 889 { 890 addr_space_t as = INTVAL (operands[3]); 891 const char *glc = INTVAL (operands[4]) ? " glc" : ""; 892 893 static char buf[200]; 894 if (AS_FLAT_P (as)) 895 { 896 if (TARGET_GCN5_PLUS) 897 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc); 898 else 899 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc); 900 } 901 else if (AS_GLOBAL_P (as)) 902 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc); 903 else 904 gcc_unreachable (); 905 906 return buf; 907 } 908 [(set_attr "type" "flat") 909 (set_attr "length" "12")]) 910 911(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>" 912 [(set (mem:BLK (scratch)) 913 (unspec:BLK 914 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v") 915 (vec_duplicate:<VnSI> 916 (match_operand 1 "immediate_operand" "n"))) 917 (match_operand:V_ALL 2 "register_operand" "v") 918 (match_operand 3 "immediate_operand" "n") 919 (match_operand 4 "immediate_operand" "n")] 920 UNSPEC_SCATTER))] 921 "(AS_ANY_DS_P (INTVAL (operands[3])) 922 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))" 923 { 924 addr_space_t as = INTVAL (operands[3]); 925 static char buf[200]; 926 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s", 927 (AS_GDS_P (as) ? " gds" : "")); 928 return buf; 929 } 930 [(set_attr "type" "ds") 931 (set_attr "length" "12")]) 932 933(define_insn "scatter<mode>_insn_2offsets<exec_scatter>" 934 [(set (mem:BLK (scratch)) 935 (unspec:BLK 936 [(plus:<VnDI> 937 (plus:<VnDI> 938 (vec_duplicate:<VnDI> 939 (match_operand:DI 0 "register_operand" "Sv")) 940 (sign_extend:<VnDI> 941 (match_operand:<VnSI> 1 "register_operand" " v"))) 942 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n"))) 943 (match_operand:V_ALL 3 "register_operand" " v") 944 (match_operand 4 "immediate_operand" " n") 945 (match_operand 5 "immediate_operand" " n")] 946 UNSPEC_SCATTER))] 947 "(AS_GLOBAL_P (INTVAL (operands[4])) 948 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" 949 { 950 addr_space_t as = INTVAL (operands[4]); 951 const char *glc = INTVAL (operands[5]) ? " glc" : ""; 952 953 static char buf[200]; 954 if (AS_GLOBAL_P (as)) 955 { 956 /* Work around assembler bug in which a 64-bit register is expected, 957 but a 32-bit value would be correct. */ 958 int reg = REGNO (operands[1]) - FIRST_VGPR_REG; 959 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s", 960 reg, reg + 1, glc); 961 } 962 else 963 gcc_unreachable (); 964 965 return buf; 966 } 967 [(set_attr "type" "flat") 968 (set_attr "length" "12")]) 969 970;; }}} 971;; {{{ Permutations 972 973(define_insn "ds_bpermute<mode>" 974 [(set (match_operand:V_1REG 0 "register_operand" "=v") 975 (unspec:V_1REG 976 [(match_operand:V_1REG 2 "register_operand" " v") 977 (match_operand:<VnSI> 1 "register_operand" " v") 978 (match_operand:DI 3 "gcn_exec_reg_operand" " e")] 979 UNSPEC_BPERMUTE))] 980 "" 981 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)" 982 [(set_attr "type" "vop2") 983 (set_attr "length" "12")]) 984 985(define_insn_and_split "ds_bpermute<mode>" 986 [(set (match_operand:V_2REG 0 "register_operand" "=&v") 987 (unspec:V_2REG 988 [(match_operand:V_2REG 2 "register_operand" " v0") 989 (match_operand:<VnSI> 1 "register_operand" " v") 990 (match_operand:DI 3 "gcn_exec_reg_operand" " e")] 991 UNSPEC_BPERMUTE))] 992 "" 993 "#" 994 "reload_completed" 995 [(set (match_dup 4) (unspec:<VnSI> 996 [(match_dup 6) (match_dup 1) (match_dup 3)] 997 UNSPEC_BPERMUTE)) 998 (set (match_dup 5) (unspec:<VnSI> 999 [(match_dup 7) (match_dup 1) (match_dup 3)] 1000 UNSPEC_BPERMUTE))] 1001 { 1002 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); 1003 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); 1004 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0); 1005 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1); 1006 } 1007 [(set_attr "type" "vmult") 1008 (set_attr "length" "24")]) 1009 1010(define_insn "@dpp_move<mode>" 1011 [(set (match_operand:V_noHI 0 "register_operand" "=v") 1012 (unspec:V_noHI 1013 [(match_operand:V_noHI 1 "register_operand" " v") 1014 (match_operand:SI 2 "const_int_operand" " n")] 1015 UNSPEC_MOV_DPP_SHR))] 1016 "" 1017 { 1018 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32", 1019 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2])); 1020 } 1021 [(set_attr "type" "vop_dpp") 1022 (set_attr "length" "16")]) 1023 1024;; }}} 1025;; {{{ ALU special case: add/sub 1026 1027(define_insn "add<mode>3<exec_clobber>" 1028 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") 1029 (plus:V_INT_1REG 1030 (match_operand:V_INT_1REG 1 "register_operand" "% v") 1031 (match_operand:V_INT_1REG 2 "gcn_alu_operand" "vSvB"))) 1032 (clobber (reg:DI VCC_REG))] 1033 "" 1034 "v_add%^_u32\t%0, vcc, %2, %1" 1035 [(set_attr "type" "vop2") 1036 (set_attr "length" "8")]) 1037 1038(define_insn "add<mode>3_dup<exec_clobber>" 1039 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") 1040 (plus:V_INT_1REG 1041 (vec_duplicate:V_INT_1REG 1042 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB")) 1043 (match_operand:V_INT_1REG 1 "register_operand" " v"))) 1044 (clobber (reg:DI VCC_REG))] 1045 "" 1046 "v_add%^_u32\t%0, vcc, %2, %1" 1047 [(set_attr "type" "vop2") 1048 (set_attr "length" "8")]) 1049 1050(define_insn "add<mode>3_vcc<exec_vcc>" 1051 [(set (match_operand:V_SI 0 "register_operand" "= v, v") 1052 (plus:V_SI 1053 (match_operand:V_SI 1 "register_operand" "% v, v") 1054 (match_operand:V_SI 2 "gcn_alu_operand" "vSvB,vSvB"))) 1055 (set (match_operand:DI 3 "register_operand" "= cV, Sg") 1056 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2)) 1057 (match_dup 1)))] 1058 "" 1059 "v_add%^_u32\t%0, %3, %2, %1" 1060 [(set_attr "type" "vop2,vop3b") 1061 (set_attr "length" "8")]) 1062 1063; This pattern only changes the VCC bits when the corresponding lane is 1064; enabled, so the set must be described as an ior. 1065 1066(define_insn "add<mode>3_vcc_dup<exec_vcc>" 1067 [(set (match_operand:V_SI 0 "register_operand" "= v, v") 1068 (plus:V_SI 1069 (vec_duplicate:V_SI 1070 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB")) 1071 (match_operand:V_SI 2 "register_operand" " v, v"))) 1072 (set (match_operand:DI 3 "register_operand" "=cV, Sg") 1073 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2)) 1074 (match_dup 1)) 1075 (vec_duplicate:V_SI (match_dup 2))))] 1076 "" 1077 "v_add%^_u32\t%0, %3, %2, %1" 1078 [(set_attr "type" "vop2,vop3b") 1079 (set_attr "length" "8,8")]) 1080 1081; v_addc does not accept an SGPR because the VCC read already counts as an 1082; SGPR use and the number of SGPR operands is limited to 1. It does not 1083; accept "B" immediate constants due to a related bus conflict. 1084 1085(define_insn "addc<mode>3<exec_vcc>" 1086 [(set (match_operand:V_SI 0 "register_operand" "=v, v") 1087 (plus:V_SI 1088 (plus:V_SI 1089 (vec_merge:V_SI 1090 (vec_duplicate:V_SI (const_int 1)) 1091 (vec_duplicate:V_SI (const_int 0)) 1092 (match_operand:DI 3 "register_operand" " cV,cVSv")) 1093 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA")) 1094 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA"))) 1095 (set (match_operand:DI 4 "register_operand" "=cV,cVSg") 1096 (ior:DI (ltu:DI (plus:V_SI 1097 (plus:V_SI 1098 (vec_merge:V_SI 1099 (vec_duplicate:V_SI (const_int 1)) 1100 (vec_duplicate:V_SI (const_int 0)) 1101 (match_dup 3)) 1102 (match_dup 1)) 1103 (match_dup 2)) 1104 (match_dup 2)) 1105 (ltu:DI (plus:V_SI 1106 (vec_merge:V_SI 1107 (vec_duplicate:V_SI (const_int 1)) 1108 (vec_duplicate:V_SI (const_int 0)) 1109 (match_dup 3)) 1110 (match_dup 1)) 1111 (match_dup 1))))] 1112 "" 1113 "v_addc%^_u32\t%0, %4, %2, %1, %3" 1114 [(set_attr "type" "vop2,vop3b") 1115 (set_attr "length" "4,8")]) 1116 1117(define_insn "sub<mode>3<exec_clobber>" 1118 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v") 1119 (minus:V_INT_1REG 1120 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v") 1121 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB"))) 1122 (clobber (reg:DI VCC_REG))] 1123 "" 1124 "@ 1125 v_sub%^_u32\t%0, vcc, %1, %2 1126 v_subrev%^_u32\t%0, vcc, %2, %1" 1127 [(set_attr "type" "vop2") 1128 (set_attr "length" "8,8")]) 1129 1130(define_insn "sub<mode>3_vcc<exec_vcc>" 1131 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v") 1132 (minus:V_SI 1133 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v") 1134 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB"))) 1135 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg") 1136 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2)) 1137 (match_dup 1)))] 1138 "" 1139 "@ 1140 v_sub%^_u32\t%0, %3, %1, %2 1141 v_sub%^_u32\t%0, %3, %1, %2 1142 v_subrev%^_u32\t%0, %3, %2, %1 1143 v_subrev%^_u32\t%0, %3, %2, %1" 1144 [(set_attr "type" "vop2,vop3b,vop2,vop3b") 1145 (set_attr "length" "8")]) 1146 1147; v_subb does not accept an SGPR because the VCC read already counts as an 1148; SGPR use and the number of SGPR operands is limited to 1. It does not 1149; accept "B" immediate constants due to a related bus conflict. 1150 1151(define_insn "subc<mode>3<exec_vcc>" 1152 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v") 1153 (minus:V_SI 1154 (minus:V_SI 1155 (vec_merge:V_SI 1156 (vec_duplicate:V_SI (const_int 1)) 1157 (vec_duplicate:V_SI (const_int 0)) 1158 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv")) 1159 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA")) 1160 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA"))) 1161 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg") 1162 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI 1163 (vec_merge:V_SI 1164 (vec_duplicate:V_SI (const_int 1)) 1165 (vec_duplicate:V_SI (const_int 0)) 1166 (match_dup 3)) 1167 (match_dup 1)) 1168 (match_dup 2)) 1169 (match_dup 2)) 1170 (ltu:DI (minus:V_SI (vec_merge:V_SI 1171 (vec_duplicate:V_SI (const_int 1)) 1172 (vec_duplicate:V_SI (const_int 0)) 1173 (match_dup 3)) 1174 (match_dup 1)) 1175 (match_dup 1))))] 1176 "" 1177 "@ 1178 v_subb%^_u32\t%0, %4, %1, %2, %3 1179 v_subb%^_u32\t%0, %4, %1, %2, %3 1180 v_subbrev%^_u32\t%0, %4, %2, %1, %3 1181 v_subbrev%^_u32\t%0, %4, %2, %1, %3" 1182 [(set_attr "type" "vop2,vop3b,vop2,vop3b") 1183 (set_attr "length" "4,8,4,8")]) 1184 1185(define_insn_and_split "add<mode>3" 1186 [(set (match_operand:V_DI 0 "register_operand" "= v") 1187 (plus:V_DI 1188 (match_operand:V_DI 1 "register_operand" "%vDb") 1189 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))) 1190 (clobber (reg:DI VCC_REG))] 1191 "" 1192 "#" 1193 "gcn_can_split_p (<MODE>mode, operands[0]) 1194 && gcn_can_split_p (<MODE>mode, operands[1]) 1195 && gcn_can_split_p (<MODE>mode, operands[2])" 1196 [(const_int 0)] 1197 { 1198 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1199 emit_insn (gen_add<vnsi>3_vcc 1200 (gcn_operand_part (<MODE>mode, operands[0], 0), 1201 gcn_operand_part (<MODE>mode, operands[1], 0), 1202 gcn_operand_part (<MODE>mode, operands[2], 0), 1203 vcc)); 1204 emit_insn (gen_addc<vnsi>3 1205 (gcn_operand_part (<MODE>mode, operands[0], 1), 1206 gcn_operand_part (<MODE>mode, operands[1], 1), 1207 gcn_operand_part (<MODE>mode, operands[2], 1), 1208 vcc, vcc)); 1209 DONE; 1210 } 1211 [(set_attr "type" "vmult") 1212 (set_attr "length" "8")]) 1213 1214(define_insn_and_split "add<mode>3_exec" 1215 [(set (match_operand:V_DI 0 "register_operand" "= v") 1216 (vec_merge:V_DI 1217 (plus:V_DI 1218 (match_operand:V_DI 1 "register_operand" "%vDb") 1219 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")) 1220 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1221 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1222 (clobber (reg:DI VCC_REG))] 1223 "" 1224 "#" 1225 "gcn_can_split_p (<MODE>mode, operands[0]) 1226 && gcn_can_split_p (<MODE>mode, operands[1]) 1227 && gcn_can_split_p (<MODE>mode, operands[2]) 1228 && gcn_can_split_p (<MODE>mode, operands[4])" 1229 [(const_int 0)] 1230 { 1231 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1232 emit_insn (gen_add<vnsi>3_vcc_exec 1233 (gcn_operand_part (<MODE>mode, operands[0], 0), 1234 gcn_operand_part (<MODE>mode, operands[1], 0), 1235 gcn_operand_part (<MODE>mode, operands[2], 0), 1236 vcc, 1237 gcn_operand_part (<MODE>mode, operands[3], 0), 1238 operands[4])); 1239 emit_insn (gen_addc<vnsi>3_exec 1240 (gcn_operand_part (<MODE>mode, operands[0], 1), 1241 gcn_operand_part (<MODE>mode, operands[1], 1), 1242 gcn_operand_part (<MODE>mode, operands[2], 1), 1243 vcc, vcc, 1244 gcn_operand_part (<MODE>mode, operands[3], 1), 1245 operands[4])); 1246 DONE; 1247 } 1248 [(set_attr "type" "vmult") 1249 (set_attr "length" "8")]) 1250 1251(define_insn_and_split "sub<mode>3" 1252 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1253 (minus:V_DI 1254 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v") 1255 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb"))) 1256 (clobber (reg:DI VCC_REG))] 1257 "" 1258 "#" 1259 "gcn_can_split_p (<MODE>mode, operands[0]) 1260 && gcn_can_split_p (<MODE>mode, operands[1]) 1261 && gcn_can_split_p (<MODE>mode, operands[2])" 1262 [(const_int 0)] 1263 { 1264 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1265 emit_insn (gen_sub<vnsi>3_vcc 1266 (gcn_operand_part (<MODE>mode, operands[0], 0), 1267 gcn_operand_part (<MODE>mode, operands[1], 0), 1268 gcn_operand_part (<MODE>mode, operands[2], 0), 1269 vcc)); 1270 emit_insn (gen_subc<vnsi>3 1271 (gcn_operand_part (<MODE>mode, operands[0], 1), 1272 gcn_operand_part (<MODE>mode, operands[1], 1), 1273 gcn_operand_part (<MODE>mode, operands[2], 1), 1274 vcc, vcc)); 1275 DONE; 1276 } 1277 [(set_attr "type" "vmult") 1278 (set_attr "length" "8")]) 1279 1280(define_insn_and_split "sub<mode>3_exec" 1281 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1282 (vec_merge:V_DI 1283 (minus:V_DI 1284 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v") 1285 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB")) 1286 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0") 1287 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) 1288 (clobber (reg:DI VCC_REG))] 1289 "register_operand (operands[1], VOIDmode) 1290 || register_operand (operands[2], VOIDmode)" 1291 "#" 1292 "gcn_can_split_p (<MODE>mode, operands[0]) 1293 && gcn_can_split_p (<MODE>mode, operands[1]) 1294 && gcn_can_split_p (<MODE>mode, operands[2]) 1295 && gcn_can_split_p (<MODE>mode, operands[3])" 1296 [(const_int 0)] 1297 { 1298 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1299 emit_insn (gen_sub<vnsi>3_vcc_exec 1300 (gcn_operand_part (<MODE>mode, operands[0], 0), 1301 gcn_operand_part (<MODE>mode, operands[1], 0), 1302 gcn_operand_part (<MODE>mode, operands[2], 0), 1303 vcc, 1304 gcn_operand_part (<MODE>mode, operands[3], 0), 1305 operands[4])); 1306 emit_insn (gen_subc<vnsi>3_exec 1307 (gcn_operand_part (<MODE>mode, operands[0], 1), 1308 gcn_operand_part (<MODE>mode, operands[1], 1), 1309 gcn_operand_part (<MODE>mode, operands[2], 1), 1310 vcc, vcc, 1311 gcn_operand_part (<MODE>mode, operands[3], 1), 1312 operands[4])); 1313 DONE; 1314 } 1315 [(set_attr "type" "vmult") 1316 (set_attr "length" "8")]) 1317 1318(define_insn_and_split "add<mode>3_zext" 1319 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1320 (plus:V_DI 1321 (zero_extend:V_DI 1322 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB")) 1323 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))) 1324 (clobber (reg:DI VCC_REG))] 1325 "" 1326 "#" 1327 "gcn_can_split_p (<MODE>mode, operands[0]) 1328 && gcn_can_split_p (<MODE>mode, operands[2])" 1329 [(const_int 0)] 1330 { 1331 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1332 emit_insn (gen_add<vnsi>3_vcc 1333 (gcn_operand_part (<MODE>mode, operands[0], 0), 1334 operands[1], 1335 gcn_operand_part (<MODE>mode, operands[2], 0), 1336 vcc)); 1337 emit_insn (gen_addc<vnsi>3 1338 (gcn_operand_part (<MODE>mode, operands[0], 1), 1339 gcn_operand_part (<MODE>mode, operands[2], 1), 1340 const0_rtx, vcc, vcc)); 1341 DONE; 1342 } 1343 [(set_attr "type" "vmult") 1344 (set_attr "length" "8")]) 1345 1346(define_insn_and_split "add<mode>3_zext_exec" 1347 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1348 (vec_merge:V_DI 1349 (plus:V_DI 1350 (zero_extend:V_DI 1351 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB")) 1352 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")) 1353 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0") 1354 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) 1355 (clobber (reg:DI VCC_REG))] 1356 "" 1357 "#" 1358 "gcn_can_split_p (<MODE>mode, operands[0]) 1359 && gcn_can_split_p (<MODE>mode, operands[2]) 1360 && gcn_can_split_p (<MODE>mode, operands[3])" 1361 [(const_int 0)] 1362 { 1363 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1364 emit_insn (gen_add<vnsi>3_vcc_exec 1365 (gcn_operand_part (<MODE>mode, operands[0], 0), 1366 operands[1], 1367 gcn_operand_part (<MODE>mode, operands[2], 0), 1368 vcc, 1369 gcn_operand_part (<MODE>mode, operands[3], 0), 1370 operands[4])); 1371 emit_insn (gen_addc<vnsi>3_exec 1372 (gcn_operand_part (<MODE>mode, operands[0], 1), 1373 gcn_operand_part (<MODE>mode, operands[2], 1), 1374 const0_rtx, vcc, vcc, 1375 gcn_operand_part (<MODE>mode, operands[3], 1), 1376 operands[4])); 1377 DONE; 1378 } 1379 [(set_attr "type" "vmult") 1380 (set_attr "length" "8")]) 1381 1382(define_insn_and_split "add<mode>3_vcc_zext_dup" 1383 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1384 (plus:V_DI 1385 (zero_extend:V_DI 1386 (vec_duplicate:<VnSI> 1387 (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv"))) 1388 (match_operand:V_DI 2 "gcn_alu_operand" " vDA, vDb"))) 1389 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV") 1390 (ltu:DI (plus:V_DI 1391 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) 1392 (match_dup 2)) 1393 (match_dup 1)))] 1394 "" 1395 "#" 1396 "gcn_can_split_p (<MODE>mode, operands[0]) 1397 && gcn_can_split_p (<MODE>mode, operands[2])" 1398 [(const_int 0)] 1399 { 1400 emit_insn (gen_add<vnsi>3_vcc_dup 1401 (gcn_operand_part (<MODE>mode, operands[0], 0), 1402 gcn_operand_part (DImode, operands[1], 0), 1403 gcn_operand_part (<MODE>mode, operands[2], 0), 1404 operands[3])); 1405 emit_insn (gen_addc<vnsi>3 1406 (gcn_operand_part (<MODE>mode, operands[0], 1), 1407 gcn_operand_part (<MODE>mode, operands[2], 1), 1408 const0_rtx, operands[3], operands[3])); 1409 DONE; 1410 } 1411 [(set_attr "type" "vmult") 1412 (set_attr "length" "8")]) 1413 1414(define_expand "add<mode>3_zext_dup" 1415 [(match_operand:V_DI 0 "register_operand") 1416 (match_operand:SI 1 "gcn_alu_operand") 1417 (match_operand:V_DI 2 "gcn_alu_operand")] 1418 "" 1419 { 1420 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1421 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1], 1422 operands[2], vcc)); 1423 DONE; 1424 }) 1425 1426(define_insn_and_split "add<mode>3_vcc_zext_dup_exec" 1427 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1428 (vec_merge:V_DI 1429 (plus:V_DI 1430 (zero_extend:V_DI 1431 (vec_duplicate:<VnSI> 1432 (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv"))) 1433 (match_operand:V_DI 2 "gcn_alu_operand" " vDb, vDA")) 1434 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0, U0") 1435 (match_operand:DI 5 "gcn_exec_reg_operand" " e, e"))) 1436 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV") 1437 (and:DI 1438 (ltu:DI (plus:V_DI 1439 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) 1440 (match_dup 2)) 1441 (match_dup 1)) 1442 (match_dup 5)))] 1443 "" 1444 "#" 1445 "gcn_can_split_p (<MODE>mode, operands[0]) 1446 && gcn_can_split_p (<MODE>mode, operands[2]) 1447 && gcn_can_split_p (<MODE>mode, operands[4])" 1448 [(const_int 0)] 1449 { 1450 emit_insn (gen_add<vnsi>3_vcc_dup_exec 1451 (gcn_operand_part (<MODE>mode, operands[0], 0), 1452 gcn_operand_part (DImode, operands[1], 0), 1453 gcn_operand_part (<MODE>mode, operands[2], 0), 1454 operands[3], 1455 gcn_operand_part (<MODE>mode, operands[4], 0), 1456 operands[5])); 1457 emit_insn (gen_addc<vnsi>3_exec 1458 (gcn_operand_part (<MODE>mode, operands[0], 1), 1459 gcn_operand_part (<MODE>mode, operands[2], 1), 1460 const0_rtx, operands[3], operands[3], 1461 gcn_operand_part (<MODE>mode, operands[4], 1), 1462 operands[5])); 1463 DONE; 1464 } 1465 [(set_attr "type" "vmult") 1466 (set_attr "length" "8")]) 1467 1468(define_expand "add<mode>3_zext_dup_exec" 1469 [(match_operand:V_DI 0 "register_operand") 1470 (match_operand:SI 1 "gcn_alu_operand") 1471 (match_operand:V_DI 2 "gcn_alu_operand") 1472 (match_operand:V_DI 3 "gcn_register_or_unspec_operand") 1473 (match_operand:DI 4 "gcn_exec_reg_operand")] 1474 "" 1475 { 1476 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1477 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1], 1478 operands[2], vcc, operands[3], 1479 operands[4])); 1480 DONE; 1481 }) 1482 1483(define_insn_and_split "add<mode>3_vcc_zext_dup2" 1484 [(set (match_operand:V_DI 0 "register_operand" "= v") 1485 (plus:V_DI 1486 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA")) 1487 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv")))) 1488 (set (match_operand:DI 3 "register_operand" "=&SgcV") 1489 (ltu:DI (plus:V_DI 1490 (zero_extend:V_DI (match_dup 1)) 1491 (vec_duplicate:V_DI (match_dup 2))) 1492 (match_dup 1)))] 1493 "" 1494 "#" 1495 "gcn_can_split_p (<MODE>mode, operands[0])" 1496 [(const_int 0)] 1497 { 1498 emit_insn (gen_add<vnsi>3_vcc_dup 1499 (gcn_operand_part (<MODE>mode, operands[0], 0), 1500 gcn_operand_part (DImode, operands[2], 0), 1501 operands[1], 1502 operands[3])); 1503 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); 1504 emit_insn (gen_vec_duplicate<vnsi> 1505 (dsthi, gcn_operand_part (DImode, operands[2], 1))); 1506 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3], 1507 operands[3])); 1508 DONE; 1509 } 1510 [(set_attr "type" "vmult") 1511 (set_attr "length" "8")]) 1512 1513(define_expand "add<mode>3_zext_dup2" 1514 [(match_operand:V_DI 0 "register_operand") 1515 (match_operand:<VnSI> 1 "gcn_alu_operand") 1516 (match_operand:DI 2 "gcn_alu_operand")] 1517 "" 1518 { 1519 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1520 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1], 1521 operands[2], vcc)); 1522 DONE; 1523 }) 1524 1525(define_insn_and_split "add<mode>3_vcc_zext_dup2_exec" 1526 [(set (match_operand:V_DI 0 "register_operand" "= v") 1527 (vec_merge:V_DI 1528 (plus:V_DI 1529 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA")) 1530 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))) 1531 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0") 1532 (match_operand:DI 5 "gcn_exec_reg_operand" " e"))) 1533 (set (match_operand:DI 3 "register_operand" "=&SgcV") 1534 (and:DI 1535 (ltu:DI (plus:V_DI 1536 (zero_extend:V_DI (match_dup 1)) 1537 (vec_duplicate:V_DI (match_dup 2))) 1538 (match_dup 1)) 1539 (match_dup 5)))] 1540 "" 1541 "#" 1542 "gcn_can_split_p (<MODE>mode, operands[0]) 1543 && gcn_can_split_p (<MODE>mode, operands[4])" 1544 [(const_int 0)] 1545 { 1546 emit_insn (gen_add<vnsi>3_vcc_dup_exec 1547 (gcn_operand_part (<MODE>mode, operands[0], 0), 1548 gcn_operand_part (DImode, operands[2], 0), 1549 operands[1], 1550 operands[3], 1551 gcn_operand_part (<MODE>mode, operands[4], 0), 1552 operands[5])); 1553 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); 1554 emit_insn (gen_vec_duplicate<vnsi>_exec 1555 (dsthi, gcn_operand_part (DImode, operands[2], 1), 1556 gcn_operand_part (<MODE>mode, operands[4], 1), 1557 operands[5])); 1558 emit_insn (gen_addc<vnsi>3_exec 1559 (dsthi, dsthi, const0_rtx, operands[3], operands[3], 1560 gcn_operand_part (<MODE>mode, operands[4], 1), 1561 operands[5])); 1562 DONE; 1563 } 1564 [(set_attr "type" "vmult") 1565 (set_attr "length" "8")]) 1566 1567(define_expand "add<mode>3_zext_dup2_exec" 1568 [(match_operand:V_DI 0 "register_operand") 1569 (match_operand:<VnSI> 1 "gcn_alu_operand") 1570 (match_operand:DI 2 "gcn_alu_operand") 1571 (match_operand:V_DI 3 "gcn_register_or_unspec_operand") 1572 (match_operand:DI 4 "gcn_exec_reg_operand")] 1573 "" 1574 { 1575 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1576 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1], 1577 operands[2], vcc, 1578 operands[3], operands[4])); 1579 DONE; 1580 }) 1581 1582(define_insn_and_split "add<mode>3_sext_dup2" 1583 [(set (match_operand:V_DI 0 "register_operand" "= v") 1584 (plus:V_DI 1585 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA")) 1586 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))) 1587 (clobber (match_scratch:<VnSI> 3 "=&v")) 1588 (clobber (reg:DI VCC_REG))] 1589 "" 1590 "#" 1591 "gcn_can_split_p (<MODE>mode, operands[0])" 1592 [(const_int 0)] 1593 { 1594 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1595 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31))); 1596 emit_insn (gen_add<vnsi>3_vcc_dup 1597 (gcn_operand_part (<MODE>mode, operands[0], 0), 1598 gcn_operand_part (DImode, operands[2], 0), 1599 operands[1], 1600 vcc)); 1601 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); 1602 emit_insn (gen_vec_duplicate<vnsi> 1603 (dsthi, gcn_operand_part (DImode, operands[2], 1))); 1604 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc)); 1605 DONE; 1606 } 1607 [(set_attr "type" "vmult") 1608 (set_attr "length" "8")]) 1609 1610(define_insn_and_split "add<mode>3_sext_dup2_exec" 1611 [(set (match_operand:V_DI 0 "register_operand" "= v") 1612 (vec_merge:V_DI 1613 (plus:V_DI 1614 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA")) 1615 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))) 1616 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1617 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1618 (clobber (match_scratch:<VnSI> 5 "=&v")) 1619 (clobber (reg:DI VCC_REG))] 1620 "" 1621 "#" 1622 "gcn_can_split_p (<MODE>mode, operands[0]) 1623 && gcn_can_split_p (<MODE>mode, operands[3])" 1624 [(const_int 0)] 1625 { 1626 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1627 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31), 1628 gcn_gen_undef (<VnSI>mode), operands[4])); 1629 emit_insn (gen_add<vnsi>3_vcc_dup_exec 1630 (gcn_operand_part (<MODE>mode, operands[0], 0), 1631 gcn_operand_part (DImode, operands[2], 0), 1632 operands[1], 1633 vcc, 1634 gcn_operand_part (<MODE>mode, operands[3], 0), 1635 operands[4])); 1636 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); 1637 emit_insn (gen_vec_duplicate<vnsi>_exec 1638 (dsthi, gcn_operand_part (DImode, operands[2], 1), 1639 gcn_operand_part (<MODE>mode, operands[3], 1), 1640 operands[4])); 1641 emit_insn (gen_addc<vnsi>3_exec 1642 (dsthi, dsthi, operands[5], vcc, vcc, 1643 gcn_operand_part (<MODE>mode, operands[3], 1), 1644 operands[4])); 1645 DONE; 1646 } 1647 [(set_attr "type" "vmult") 1648 (set_attr "length" "8")]) 1649 1650;; }}} 1651;; {{{ DS memory ALU: add/sub 1652 1653(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI]) 1654(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI]) 1655 1656;; FIXME: the vector patterns probably need RD expanded to a vector of 1657;; addresses. For now, the only way a vector can get into LDS is 1658;; if the user puts it there manually. 1659;; 1660;; FIXME: the scalar patterns are probably fine in themselves, but need to be 1661;; checked to see if anything can ever use them. 1662 1663(define_insn "add<mode>3_ds<exec>" 1664 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") 1665 (plus:DS_ARITH_MODE 1666 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD") 1667 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] 1668 "rtx_equal_p (operands[0], operands[1])" 1669 "ds_add%u0\t%A0, %2%O0" 1670 [(set_attr "type" "ds") 1671 (set_attr "length" "8")]) 1672 1673(define_insn "add<mode>3_ds_scalar" 1674 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") 1675 (plus:DS_ARITH_SCALAR_MODE 1676 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 1677 "%RD") 1678 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] 1679 "rtx_equal_p (operands[0], operands[1])" 1680 "ds_add%u0\t%A0, %2%O0" 1681 [(set_attr "type" "ds") 1682 (set_attr "length" "8")]) 1683 1684(define_insn "sub<mode>3_ds<exec>" 1685 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") 1686 (minus:DS_ARITH_MODE 1687 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD") 1688 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] 1689 "rtx_equal_p (operands[0], operands[1])" 1690 "ds_sub%u0\t%A0, %2%O0" 1691 [(set_attr "type" "ds") 1692 (set_attr "length" "8")]) 1693 1694(define_insn "sub<mode>3_ds_scalar" 1695 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") 1696 (minus:DS_ARITH_SCALAR_MODE 1697 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 1698 " RD") 1699 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] 1700 "rtx_equal_p (operands[0], operands[1])" 1701 "ds_sub%u0\t%A0, %2%O0" 1702 [(set_attr "type" "ds") 1703 (set_attr "length" "8")]) 1704 1705(define_insn "subr<mode>3_ds<exec>" 1706 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") 1707 (minus:DS_ARITH_MODE 1708 (match_operand:DS_ARITH_MODE 2 "register_operand" " v") 1709 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))] 1710 "rtx_equal_p (operands[0], operands[1])" 1711 "ds_rsub%u0\t%A0, %2%O0" 1712 [(set_attr "type" "ds") 1713 (set_attr "length" "8")]) 1714 1715(define_insn "subr<mode>3_ds_scalar" 1716 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") 1717 (minus:DS_ARITH_SCALAR_MODE 1718 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v") 1719 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 1720 " RD")))] 1721 "rtx_equal_p (operands[0], operands[1])" 1722 "ds_rsub%u0\t%A0, %2%O0" 1723 [(set_attr "type" "ds") 1724 (set_attr "length" "8")]) 1725 1726;; }}} 1727;; {{{ ALU special case: mult 1728 1729(define_insn "<su>mul<mode>3_highpart<exec>" 1730 [(set (match_operand:V_SI 0 "register_operand" "= v") 1731 (truncate:V_SI 1732 (lshiftrt:<VnDI> 1733 (mult:<VnDI> 1734 (any_extend:<VnDI> 1735 (match_operand:V_SI 1 "gcn_alu_operand" " %v")) 1736 (any_extend:<VnDI> 1737 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA"))) 1738 (const_int 32))))] 1739 "" 1740 "v_mul_hi<sgnsuffix>0\t%0, %2, %1" 1741 [(set_attr "type" "vop3a") 1742 (set_attr "length" "8")]) 1743 1744(define_insn "mul<mode>3<exec>" 1745 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") 1746 (mult:V_INT_1REG 1747 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") 1748 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))] 1749 "" 1750 "v_mul_lo_u32\t%0, %1, %2" 1751 [(set_attr "type" "vop3a") 1752 (set_attr "length" "8")]) 1753 1754(define_insn "mul<mode>3_dup<exec>" 1755 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") 1756 (mult:V_INT_1REG 1757 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") 1758 (vec_duplicate:V_INT_1REG 1759 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))] 1760 "" 1761 "v_mul_lo_u32\t%0, %1, %2" 1762 [(set_attr "type" "vop3a") 1763 (set_attr "length" "8")]) 1764 1765(define_insn_and_split "mul<mode>3" 1766 [(set (match_operand:V_DI 0 "register_operand" "=&v") 1767 (mult:V_DI 1768 (match_operand:V_DI 1 "gcn_alu_operand" "% v") 1769 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) 1770 (clobber (match_scratch:<VnSI> 3 "=&v"))] 1771 "" 1772 "#" 1773 "reload_completed" 1774 [(const_int 0)] 1775 { 1776 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1777 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1778 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0); 1779 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1); 1780 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1781 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1782 rtx tmp = operands[3]; 1783 1784 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo)); 1785 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo)); 1786 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo)); 1787 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1788 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi)); 1789 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1790 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi)); 1791 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1792 DONE; 1793 }) 1794 1795(define_insn_and_split "mul<mode>3_exec" 1796 [(set (match_operand:V_DI 0 "register_operand" "=&v") 1797 (vec_merge:V_DI 1798 (mult:V_DI 1799 (match_operand:V_DI 1 "gcn_alu_operand" "% v") 1800 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) 1801 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1802 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1803 (clobber (match_scratch:<VnSI> 5 "=&v"))] 1804 "" 1805 "#" 1806 "reload_completed" 1807 [(const_int 0)] 1808 { 1809 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1810 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1811 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0); 1812 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1); 1813 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1814 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1815 rtx exec = operands[4]; 1816 rtx tmp = operands[5]; 1817 1818 rtx old_lo, old_hi; 1819 if (GET_CODE (operands[3]) == UNSPEC) 1820 { 1821 old_lo = old_hi = gcn_gen_undef (<VnSI>mode); 1822 } 1823 else 1824 { 1825 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); 1826 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); 1827 } 1828 1829 rtx undef = gcn_gen_undef (<VnSI>mode); 1830 1831 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec)); 1832 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo, 1833 old_hi, exec)); 1834 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec)); 1835 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1836 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec)); 1837 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1838 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec)); 1839 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1840 DONE; 1841 }) 1842 1843(define_insn_and_split "mul<mode>3_zext" 1844 [(set (match_operand:V_DI 0 "register_operand" "=&v") 1845 (mult:V_DI 1846 (zero_extend:V_DI 1847 (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) 1848 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) 1849 (clobber (match_scratch:<VnSI> 3 "=&v"))] 1850 "" 1851 "#" 1852 "reload_completed" 1853 [(const_int 0)] 1854 { 1855 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1856 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1857 rtx left = operands[1]; 1858 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1859 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1860 rtx tmp = operands[3]; 1861 1862 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo)); 1863 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo)); 1864 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi)); 1865 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1866 DONE; 1867 }) 1868 1869(define_insn_and_split "mul<mode>3_zext_exec" 1870 [(set (match_operand:V_DI 0 "register_operand" "=&v") 1871 (vec_merge:V_DI 1872 (mult:V_DI 1873 (zero_extend:V_DI 1874 (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) 1875 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) 1876 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1877 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1878 (clobber (match_scratch:<VnSI> 5 "=&v"))] 1879 "" 1880 "#" 1881 "reload_completed" 1882 [(const_int 0)] 1883 { 1884 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1885 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1886 rtx left = operands[1]; 1887 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1888 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1889 rtx exec = operands[4]; 1890 rtx tmp = operands[5]; 1891 1892 rtx old_lo, old_hi; 1893 if (GET_CODE (operands[3]) == UNSPEC) 1894 { 1895 old_lo = old_hi = gcn_gen_undef (<VnSI>mode); 1896 } 1897 else 1898 { 1899 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); 1900 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); 1901 } 1902 1903 rtx undef = gcn_gen_undef (<VnSI>mode); 1904 1905 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec)); 1906 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo, 1907 old_hi, exec)); 1908 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec)); 1909 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1910 DONE; 1911 }) 1912 1913(define_insn_and_split "mul<mode>3_zext_dup2" 1914 [(set (match_operand:V_DI 0 "register_operand" "= &v") 1915 (mult:V_DI 1916 (zero_extend:V_DI 1917 (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) 1918 (vec_duplicate:V_DI 1919 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))) 1920 (clobber (match_scratch:<VnSI> 3 "= &v"))] 1921 "" 1922 "#" 1923 "reload_completed" 1924 [(const_int 0)] 1925 { 1926 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1927 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1928 rtx left = operands[1]; 1929 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1930 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1931 rtx tmp = operands[3]; 1932 1933 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo)); 1934 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo)); 1935 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi)); 1936 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1937 DONE; 1938 }) 1939 1940(define_insn_and_split "mul<mode>3_zext_dup2_exec" 1941 [(set (match_operand:V_DI 0 "register_operand" "= &v") 1942 (vec_merge:V_DI 1943 (mult:V_DI 1944 (zero_extend:V_DI 1945 (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) 1946 (vec_duplicate:V_DI 1947 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))) 1948 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1949 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1950 (clobber (match_scratch:<VnSI> 5 "= &v"))] 1951 "" 1952 "#" 1953 "reload_completed" 1954 [(const_int 0)] 1955 { 1956 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1957 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1958 rtx left = operands[1]; 1959 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1960 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1961 rtx exec = operands[4]; 1962 rtx tmp = operands[5]; 1963 1964 rtx old_lo, old_hi; 1965 if (GET_CODE (operands[3]) == UNSPEC) 1966 { 1967 old_lo = old_hi = gcn_gen_undef (<VnSI>mode); 1968 } 1969 else 1970 { 1971 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); 1972 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); 1973 } 1974 1975 rtx undef = gcn_gen_undef (<VnSI>mode); 1976 1977 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec)); 1978 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo, 1979 old_hi, exec)); 1980 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec)); 1981 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1982 DONE; 1983 }) 1984 1985;; }}} 1986;; {{{ ALU generic case 1987 1988(define_code_iterator bitop [and ior xor]) 1989(define_code_iterator shiftop [ashift lshiftrt ashiftrt]) 1990(define_code_iterator minmaxop [smin smax umin umax]) 1991 1992(define_insn "<expander><mode>2<exec>" 1993 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v") 1994 (bitunop:V_INT_1REG 1995 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))] 1996 "" 1997 "v_<mnemonic>0\t%0, %1" 1998 [(set_attr "type" "vop1") 1999 (set_attr "length" "8")]) 2000 2001(define_insn "<expander><mode>3<exec>" 2002 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD") 2003 (bitop:V_INT_1REG 2004 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0") 2005 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))] 2006 "" 2007 "@ 2008 v_<mnemonic>0\t%0, %2, %1 2009 ds_<mnemonic>0\t%A0, %2%O0" 2010 [(set_attr "type" "vop2,ds") 2011 (set_attr "length" "8,8")]) 2012 2013(define_insn_and_split "<expander><mode>3" 2014 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD") 2015 (bitop:V_DI 2016 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD") 2017 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))] 2018 "" 2019 "@ 2020 # 2021 ds_<mnemonic>0\t%A0, %2%O0" 2022 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))" 2023 [(set (match_dup 3) 2024 (bitop:<VnSI> (match_dup 5) (match_dup 7))) 2025 (set (match_dup 4) 2026 (bitop:<VnSI> (match_dup 6) (match_dup 8)))] 2027 { 2028 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0); 2029 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1); 2030 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0); 2031 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1); 2032 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0); 2033 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1); 2034 } 2035 [(set_attr "type" "vmult,ds") 2036 (set_attr "length" "16,8")]) 2037 2038(define_insn_and_split "<expander><mode>3_exec" 2039 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD") 2040 (vec_merge:V_DI 2041 (bitop:V_DI 2042 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD") 2043 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")) 2044 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0") 2045 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))] 2046 "!memory_operand (operands[0], VOIDmode) 2047 || (rtx_equal_p (operands[0], operands[1]) 2048 && register_operand (operands[2], VOIDmode))" 2049 "@ 2050 # 2051 ds_<mnemonic>0\t%A0, %2%O0" 2052 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))" 2053 [(set (match_dup 5) 2054 (vec_merge:<VnSI> 2055 (bitop:<VnSI> (match_dup 7) (match_dup 9)) 2056 (match_dup 11) 2057 (match_dup 4))) 2058 (set (match_dup 6) 2059 (vec_merge:<VnSI> 2060 (bitop:<VnSI> (match_dup 8) (match_dup 10)) 2061 (match_dup 12) 2062 (match_dup 4)))] 2063 { 2064 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0); 2065 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1); 2066 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0); 2067 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1); 2068 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0); 2069 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1); 2070 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0); 2071 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1); 2072 } 2073 [(set_attr "type" "vmult,ds") 2074 (set_attr "length" "16,8")]) 2075 2076(define_expand "<expander><mode>3" 2077 [(set (match_operand:V_QIHI 0 "register_operand" "= v") 2078 (shiftop:V_QIHI 2079 (match_operand:V_QIHI 1 "gcn_alu_operand" " v") 2080 (vec_duplicate:V_QIHI 2081 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 2082 "" 2083 { 2084 enum {ashift, lshiftrt, ashiftrt}; 2085 bool unsignedp = (<code> == lshiftrt); 2086 rtx insi1 = gen_reg_rtx (<VnSI>mode); 2087 rtx insi2 = gen_reg_rtx (SImode); 2088 rtx outsi = gen_reg_rtx (<VnSI>mode); 2089 2090 convert_move (insi1, operands[1], unsignedp); 2091 convert_move (insi2, operands[2], unsignedp); 2092 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2)); 2093 convert_move (operands[0], outsi, unsignedp); 2094 DONE; 2095 }) 2096 2097(define_insn "<expander><mode>3<exec>" 2098 [(set (match_operand:V_SI 0 "register_operand" "= v") 2099 (shiftop:V_SI 2100 (match_operand:V_SI 1 "gcn_alu_operand" " v") 2101 (vec_duplicate:V_SI 2102 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 2103 "" 2104 "v_<revmnemonic>0\t%0, %2, %1" 2105 [(set_attr "type" "vop2") 2106 (set_attr "length" "8")]) 2107 2108(define_expand "v<expander><mode>3" 2109 [(set (match_operand:V_QIHI 0 "register_operand" "=v") 2110 (shiftop:V_QIHI 2111 (match_operand:V_QIHI 1 "gcn_alu_operand" " v") 2112 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))] 2113 "" 2114 { 2115 enum {ashift, lshiftrt, ashiftrt}; 2116 bool unsignedp = (<code> == lshiftrt); 2117 rtx insi1 = gen_reg_rtx (<VnSI>mode); 2118 rtx insi2 = gen_reg_rtx (<VnSI>mode); 2119 rtx outsi = gen_reg_rtx (<VnSI>mode); 2120 2121 convert_move (insi1, operands[1], unsignedp); 2122 convert_move (insi2, operands[2], unsignedp); 2123 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2)); 2124 convert_move (operands[0], outsi, unsignedp); 2125 DONE; 2126 }) 2127 2128(define_insn "v<expander><mode>3<exec>" 2129 [(set (match_operand:V_SI 0 "register_operand" "=v") 2130 (shiftop:V_SI 2131 (match_operand:V_SI 1 "gcn_alu_operand" " v") 2132 (match_operand:V_SI 2 "gcn_alu_operand" "vB")))] 2133 "" 2134 "v_<revmnemonic>0\t%0, %2, %1" 2135 [(set_attr "type" "vop2") 2136 (set_attr "length" "8")]) 2137 2138(define_expand "<expander><mode>3" 2139 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand") 2140 (minmaxop:V_QIHI 2141 (match_operand:V_QIHI 1 "gcn_valu_src0_operand") 2142 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))] 2143 "" 2144 { 2145 enum {smin, umin, smax, umax}; 2146 bool unsignedp = (<code> == umax || <code> == umin); 2147 rtx insi1 = gen_reg_rtx (<VnSI>mode); 2148 rtx insi2 = gen_reg_rtx (<VnSI>mode); 2149 rtx outsi = gen_reg_rtx (<VnSI>mode); 2150 2151 convert_move (insi1, operands[1], unsignedp); 2152 convert_move (insi2, operands[2], unsignedp); 2153 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2)); 2154 convert_move (operands[0], outsi, unsignedp); 2155 DONE; 2156 }) 2157 2158(define_insn "<expander><vnsi>3<exec>" 2159 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD") 2160 (minmaxop:V_SI 2161 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0") 2162 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))] 2163 "" 2164 "@ 2165 v_<mnemonic>0\t%0, %2, %1 2166 ds_<mnemonic>0\t%A0, %2%O0" 2167 [(set_attr "type" "vop2,ds") 2168 (set_attr "length" "8,8")]) 2169 2170;; }}} 2171;; {{{ FP binops - special cases 2172 2173; GCN does not directly provide a DFmode subtract instruction, so we do it by 2174; adding the negated second operand to the first. 2175 2176(define_insn "sub<mode>3<exec>" 2177 [(set (match_operand:V_DF 0 "register_operand" "= v, v") 2178 (minus:V_DF 2179 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v") 2180 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))] 2181 "" 2182 "@ 2183 v_add_f64\t%0, %1, -%2 2184 v_add_f64\t%0, -%2, %1" 2185 [(set_attr "type" "vop3a") 2186 (set_attr "length" "8,8")]) 2187 2188(define_insn "subdf" 2189 [(set (match_operand:DF 0 "register_operand" "= v, v") 2190 (minus:DF 2191 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v") 2192 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))] 2193 "" 2194 "@ 2195 v_add_f64\t%0, %1, -%2 2196 v_add_f64\t%0, -%2, %1" 2197 [(set_attr "type" "vop3a") 2198 (set_attr "length" "8,8")]) 2199 2200;; }}} 2201;; {{{ FP binops - generic 2202 2203(define_code_iterator comm_fp [plus mult smin smax]) 2204(define_code_iterator nocomm_fp [minus]) 2205(define_code_iterator all_fp [plus mult minus smin smax]) 2206 2207(define_insn "<expander><mode>3<exec>" 2208 [(set (match_operand:V_FP 0 "register_operand" "= v") 2209 (comm_fp:V_FP 2210 (match_operand:V_FP 1 "gcn_alu_operand" "% v") 2211 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))] 2212 "" 2213 "v_<mnemonic>0\t%0, %2, %1" 2214 [(set_attr "type" "vop2") 2215 (set_attr "length" "8")]) 2216 2217(define_insn "<expander><mode>3" 2218 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL") 2219 (comm_fp:FP 2220 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0") 2221 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))] 2222 "" 2223 "@ 2224 v_<mnemonic>0\t%0, %2, %1 2225 v_<mnemonic>0\t%0, %1%O0" 2226 [(set_attr "type" "vop2,ds") 2227 (set_attr "length" "8")]) 2228 2229(define_insn "<expander><mode>3<exec>" 2230 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v") 2231 (nocomm_fp:V_FP_1REG 2232 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v") 2233 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))] 2234 "" 2235 "@ 2236 v_<mnemonic>0\t%0, %1, %2 2237 v_<revmnemonic>0\t%0, %2, %1" 2238 [(set_attr "type" "vop2") 2239 (set_attr "length" "8,8")]) 2240 2241(define_insn "<expander><mode>3" 2242 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v") 2243 (nocomm_fp:FP_1REG 2244 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v") 2245 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))] 2246 "" 2247 "@ 2248 v_<mnemonic>0\t%0, %1, %2 2249 v_<revmnemonic>0\t%0, %2, %1" 2250 [(set_attr "type" "vop2") 2251 (set_attr "length" "8,8")]) 2252 2253;; }}} 2254;; {{{ FP unops 2255 2256(define_insn "abs<mode>2" 2257 [(set (match_operand:FP 0 "register_operand" "=v") 2258 (abs:FP (match_operand:FP 1 "register_operand" " v")))] 2259 "" 2260 "v_add%i0\t%0, 0, |%1|" 2261 [(set_attr "type" "vop3a") 2262 (set_attr "length" "8")]) 2263 2264(define_insn "abs<mode>2<exec>" 2265 [(set (match_operand:V_FP 0 "register_operand" "=v") 2266 (abs:V_FP 2267 (match_operand:V_FP 1 "register_operand" " v")))] 2268 "" 2269 "v_add%i0\t%0, 0, |%1|" 2270 [(set_attr "type" "vop3a") 2271 (set_attr "length" "8")]) 2272 2273(define_insn "neg<mode>2<exec>" 2274 [(set (match_operand:V_FP 0 "register_operand" "=v") 2275 (neg:V_FP 2276 (match_operand:V_FP 1 "register_operand" " v")))] 2277 "" 2278 "v_add%i0\t%0, 0, -%1" 2279 [(set_attr "type" "vop3a") 2280 (set_attr "length" "8")]) 2281 2282(define_insn "sqrt<mode>2<exec>" 2283 [(set (match_operand:V_FP 0 "register_operand" "= v") 2284 (sqrt:V_FP 2285 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))] 2286 "flag_unsafe_math_optimizations" 2287 "v_sqrt%i0\t%0, %1" 2288 [(set_attr "type" "vop1") 2289 (set_attr "length" "8")]) 2290 2291(define_insn "sqrt<mode>2" 2292 [(set (match_operand:FP 0 "register_operand" "= v") 2293 (sqrt:FP 2294 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))] 2295 "flag_unsafe_math_optimizations" 2296 "v_sqrt%i0\t%0, %1" 2297 [(set_attr "type" "vop1") 2298 (set_attr "length" "8")]) 2299 2300;; }}} 2301;; {{{ FP fused multiply and add 2302 2303(define_insn "fma<mode>4<exec>" 2304 [(set (match_operand:V_FP 0 "register_operand" "= v, v") 2305 (fma:V_FP 2306 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA") 2307 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA") 2308 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))] 2309 "" 2310 "v_fma%i0\t%0, %1, %2, %3" 2311 [(set_attr "type" "vop3a") 2312 (set_attr "length" "8")]) 2313 2314(define_insn "fma<mode>4_negop2<exec>" 2315 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v") 2316 (fma:V_FP 2317 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA") 2318 (neg:V_FP 2319 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA")) 2320 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))] 2321 "" 2322 "v_fma%i0\t%0, %1, -%2, %3" 2323 [(set_attr "type" "vop3a") 2324 (set_attr "length" "8")]) 2325 2326(define_insn "fma<mode>4" 2327 [(set (match_operand:FP 0 "register_operand" "= v, v") 2328 (fma:FP 2329 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA") 2330 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA") 2331 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))] 2332 "" 2333 "v_fma%i0\t%0, %1, %2, %3" 2334 [(set_attr "type" "vop3a") 2335 (set_attr "length" "8")]) 2336 2337(define_insn "fma<mode>4_negop2" 2338 [(set (match_operand:FP 0 "register_operand" "= v, v, v") 2339 (fma:FP 2340 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA") 2341 (neg:FP 2342 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA")) 2343 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))] 2344 "" 2345 "v_fma%i0\t%0, %1, -%2, %3" 2346 [(set_attr "type" "vop3a") 2347 (set_attr "length" "8")]) 2348 2349;; }}} 2350;; {{{ FP division 2351 2352(define_insn "recip<mode>2<exec>" 2353 [(set (match_operand:V_FP 0 "register_operand" "= v") 2354 (div:V_FP 2355 (vec_duplicate:V_FP (float:<SCALAR_MODE> (const_int 1))) 2356 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))] 2357 "" 2358 "v_rcp%i0\t%0, %1" 2359 [(set_attr "type" "vop1") 2360 (set_attr "length" "8")]) 2361 2362(define_insn "recip<mode>2" 2363 [(set (match_operand:FP 0 "register_operand" "= v") 2364 (div:FP 2365 (float:FP (const_int 1)) 2366 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))] 2367 "" 2368 "v_rcp%i0\t%0, %1" 2369 [(set_attr "type" "vop1") 2370 (set_attr "length" "8")]) 2371 2372;; Do division via a = b * 1/c 2373;; The v_rcp_* instructions are not sufficiently accurate on their own, 2374;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson 2375;; which the ISA manual says is enough to improve the reciprocal accuracy. 2376;; 2377;; FIXME: This does not handle denormals, NaNs, division-by-zero etc. 2378 2379(define_expand "div<mode>3" 2380 [(match_operand:V_FP 0 "gcn_valu_dst_operand") 2381 (match_operand:V_FP 1 "gcn_valu_src0_operand") 2382 (match_operand:V_FP 2 "gcn_valu_src0_operand")] 2383 "flag_reciprocal_math" 2384 { 2385 rtx two = gcn_vec_constant (<MODE>mode, 2386 const_double_from_real_value (dconst2, <SCALAR_MODE>mode)); 2387 rtx initrcp = gen_reg_rtx (<MODE>mode); 2388 rtx fma = gen_reg_rtx (<MODE>mode); 2389 rtx rcp; 2390 2391 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR 2392 && real_identical 2393 (CONST_DOUBLE_REAL_VALUE 2394 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1)); 2395 2396 if (is_rcp) 2397 rcp = operands[0]; 2398 else 2399 rcp = gen_reg_rtx (<MODE>mode); 2400 2401 emit_insn (gen_recip<mode>2 (initrcp, operands[2])); 2402 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two)); 2403 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma)); 2404 2405 if (!is_rcp) 2406 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp)); 2407 2408 DONE; 2409 }) 2410 2411(define_expand "div<mode>3" 2412 [(match_operand:FP 0 "gcn_valu_dst_operand") 2413 (match_operand:FP 1 "gcn_valu_src0_operand") 2414 (match_operand:FP 2 "gcn_valu_src0_operand")] 2415 "flag_reciprocal_math" 2416 { 2417 rtx two = const_double_from_real_value (dconst2, <MODE>mode); 2418 rtx initrcp = gen_reg_rtx (<MODE>mode); 2419 rtx fma = gen_reg_rtx (<MODE>mode); 2420 rtx rcp; 2421 2422 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE 2423 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]), 2424 &dconstm1)); 2425 2426 if (is_rcp) 2427 rcp = operands[0]; 2428 else 2429 rcp = gen_reg_rtx (<MODE>mode); 2430 2431 emit_insn (gen_recip<mode>2 (initrcp, operands[2])); 2432 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two)); 2433 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma)); 2434 2435 if (!is_rcp) 2436 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp)); 2437 2438 DONE; 2439 }) 2440 2441;; }}} 2442;; {{{ Int/FP conversions 2443 2444(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF]) 2445(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF]) 2446 2447(define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF]) 2448(define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF]) 2449(define_mode_iterator VCVT_IMODE [V64HI V64SI]) 2450 2451(define_code_iterator cvt_op [fix unsigned_fix 2452 float unsigned_float 2453 float_extend float_truncate]) 2454(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc") 2455 (float "float") (unsigned_float "floatuns") 2456 (float_extend "extend") (float_truncate "trunc")]) 2457(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1") 2458 (float "%i0%i1") (unsigned_float "%i0%u1") 2459 (float_extend "%i0%i1") 2460 (float_truncate "%i0%i1")]) 2461 2462(define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2" 2463 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v") 2464 (cvt_op:CVT_TO_MODE 2465 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))] 2466 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode, 2467 <cvt_name>_cvt)" 2468 "v_cvt<cvt_operands>\t%0, %1" 2469 [(set_attr "type" "vop1") 2470 (set_attr "length" "8")]) 2471 2472(define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>" 2473 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v") 2474 (cvt_op:VCVT_FMODE 2475 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))] 2476 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode, 2477 <cvt_name>_cvt)" 2478 "v_cvt<cvt_operands>\t%0, %1" 2479 [(set_attr "type" "vop1") 2480 (set_attr "length" "8")]) 2481 2482(define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>" 2483 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v") 2484 (cvt_op:VCVT_IMODE 2485 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))] 2486 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode, 2487 <cvt_name>_cvt)" 2488 "v_cvt<cvt_operands>\t%0, %1" 2489 [(set_attr "type" "vop1") 2490 (set_attr "length" "8")]) 2491 2492;; }}} 2493;; {{{ Int/int conversions 2494 2495(define_code_iterator zero_convert [truncate zero_extend]) 2496(define_code_attr convop [ 2497 (sign_extend "extend") 2498 (zero_extend "zero_extend") 2499 (truncate "trunc")]) 2500 2501(define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>" 2502 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 2503 (zero_convert:V_INT_1REG 2504 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] 2505 "" 2506 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>" 2507 [(set_attr "type" "vop_sdwa") 2508 (set_attr "length" "8")]) 2509 2510(define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>" 2511 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 2512 (sign_extend:V_INT_1REG 2513 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] 2514 "" 2515 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>" 2516 [(set_attr "type" "vop_sdwa") 2517 (set_attr "length" "8")]) 2518 2519;; GCC can already do these for scalar types, but not for vector types. 2520;; Unfortunately you can't just do SUBREG on a vector to select the low part, 2521;; so there must be a few tricks here. 2522 2523(define_insn_and_split "trunc<vndi><mode>2" 2524 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 2525 (truncate:V_INT_1REG 2526 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))] 2527 "" 2528 "#" 2529 "reload_completed" 2530 [(const_int 0)] 2531 { 2532 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0); 2533 rtx out = operands[0]; 2534 2535 if (<MODE>mode != <VnSI>mode) 2536 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo)); 2537 else 2538 emit_move_insn (out, inlo); 2539 } 2540 [(set_attr "type" "vop2") 2541 (set_attr "length" "4")]) 2542 2543(define_insn_and_split "trunc<vndi><mode>2_exec" 2544 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 2545 (vec_merge:V_INT_1REG 2546 (truncate:V_INT_1REG 2547 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")) 2548 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0") 2549 (match_operand:DI 3 "gcn_exec_operand" " e")))] 2550 "" 2551 "#" 2552 "reload_completed" 2553 [(const_int 0)] 2554 { 2555 rtx out = operands[0]; 2556 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0); 2557 rtx merge = operands[2]; 2558 rtx exec = operands[3]; 2559 2560 if (<MODE>mode != <VnSI>mode) 2561 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec)); 2562 else 2563 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec)); 2564 } 2565 [(set_attr "type" "vop2") 2566 (set_attr "length" "4")]) 2567 2568(define_insn_and_split "<convop><mode><vndi>2" 2569 [(set (match_operand:<VnDI> 0 "register_operand" "=v") 2570 (any_extend:<VnDI> 2571 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))] 2572 "" 2573 "#" 2574 "reload_completed" 2575 [(const_int 0)] 2576 { 2577 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0); 2578 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1); 2579 rtx in = operands[1]; 2580 2581 if (<MODE>mode != <VnSI>mode) 2582 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in)); 2583 else 2584 emit_move_insn (outlo, in); 2585 if ('<su>' == 's') 2586 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31))); 2587 else 2588 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx)); 2589 } 2590 [(set_attr "type" "mult") 2591 (set_attr "length" "12")]) 2592 2593(define_insn_and_split "<convop><mode><vndi>2_exec" 2594 [(set (match_operand:<VnDI> 0 "register_operand" "=v") 2595 (vec_merge:<VnDI> 2596 (any_extend:<VnDI> 2597 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")) 2598 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0") 2599 (match_operand:DI 3 "gcn_exec_operand" " e")))] 2600 "" 2601 "#" 2602 "reload_completed" 2603 [(const_int 0)] 2604 { 2605 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0); 2606 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1); 2607 rtx in = operands[1]; 2608 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0); 2609 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1); 2610 rtx exec = operands[3]; 2611 2612 if (<MODE>mode != <VnSI>mode) 2613 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec)); 2614 else 2615 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec)); 2616 if ('<su>' == 's') 2617 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi, 2618 exec)); 2619 else 2620 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi, 2621 exec)); 2622 } 2623 [(set_attr "type" "mult") 2624 (set_attr "length" "12")]) 2625 2626;; }}} 2627;; {{{ Vector comparison/merge 2628 2629(define_insn "vec_cmp<mode>di" 2630 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") 2631 (match_operator:DI 1 "gcn_fp_compare_operator" 2632 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA") 2633 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])) 2634 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))] 2635 "" 2636 "@ 2637 v_cmp%E1\tvcc, %2, %3 2638 v_cmp%E1\tvcc, %2, %3 2639 v_cmpx%E1\tvcc, %2, %3 2640 v_cmpx%E1\tvcc, %2, %3 2641 v_cmp%E1\t%0, %2, %3 2642 v_cmp%E1\t%0, %2, %3" 2643 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") 2644 (set_attr "length" "4,8,4,8,8,8")]) 2645 2646(define_expand "vec_cmpu<mode>di" 2647 [(match_operand:DI 0 "register_operand") 2648 (match_operator 1 "gcn_compare_operator" 2649 [(match_operand:V_INT_noQI 2 "gcn_alu_operand") 2650 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])] 2651 "" 2652 { 2653 /* Unsigned comparisons use the same patterns as signed comparisons, 2654 except that they use unsigned operators (e.g. LTU vs LT). 2655 The '%E1' directive then does the Right Thing. */ 2656 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2], 2657 operands[3])); 2658 DONE; 2659 }) 2660 2661; There's no instruction for 8-bit vector comparison, so we need to extend. 2662(define_expand "vec_cmp<u><mode>di" 2663 [(match_operand:DI 0 "register_operand") 2664 (match_operator 1 "gcn_compare_operator" 2665 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand")) 2666 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])] 2667 "can_create_pseudo_p ()" 2668 { 2669 rtx sitmp1 = gen_reg_rtx (<VnSI>mode); 2670 rtx sitmp2 = gen_reg_rtx (<VnSI>mode); 2671 2672 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2])); 2673 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3])); 2674 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2)); 2675 DONE; 2676 }) 2677 2678(define_insn "vec_cmp<mode>di_exec" 2679 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") 2680 (and:DI 2681 (match_operator 1 "gcn_fp_compare_operator" 2682 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA") 2683 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]) 2684 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e"))) 2685 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))] 2686 "" 2687 "@ 2688 v_cmp%E1\tvcc, %2, %3 2689 v_cmp%E1\tvcc, %2, %3 2690 v_cmpx%E1\tvcc, %2, %3 2691 v_cmpx%E1\tvcc, %2, %3 2692 v_cmp%E1\t%0, %2, %3 2693 v_cmp%E1\t%0, %2, %3" 2694 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") 2695 (set_attr "length" "4,8,4,8,8,8")]) 2696 2697(define_expand "vec_cmpu<mode>di_exec" 2698 [(match_operand:DI 0 "register_operand") 2699 (match_operator 1 "gcn_compare_operator" 2700 [(match_operand:V_INT_noQI 2 "gcn_alu_operand") 2701 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")]) 2702 (match_operand:DI 4 "gcn_exec_reg_operand")] 2703 "" 2704 { 2705 /* Unsigned comparisons use the same patterns as signed comparisons, 2706 except that they use unsigned operators (e.g. LTU vs LT). 2707 The '%E1' directive then does the Right Thing. */ 2708 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1], 2709 operands[2], operands[3], 2710 operands[4])); 2711 DONE; 2712 }) 2713 2714(define_expand "vec_cmp<u><mode>di_exec" 2715 [(match_operand:DI 0 "register_operand") 2716 (match_operator 1 "gcn_compare_operator" 2717 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand")) 2718 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))]) 2719 (match_operand:DI 4 "gcn_exec_reg_operand")] 2720 "can_create_pseudo_p ()" 2721 { 2722 rtx sitmp1 = gen_reg_rtx (<VnSI>mode); 2723 rtx sitmp2 = gen_reg_rtx (<VnSI>mode); 2724 2725 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2], 2726 operands[2], operands[4])); 2727 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3], 2728 operands[3], operands[4])); 2729 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1, 2730 sitmp2, operands[4])); 2731 DONE; 2732 }) 2733 2734(define_insn "vec_cmp<mode>di_dup" 2735 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") 2736 (match_operator:DI 1 "gcn_fp_compare_operator" 2737 [(vec_duplicate:V_noQI 2738 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" 2739 " Sv, B,Sv,B, A")) 2740 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])) 2741 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))] 2742 "" 2743 "@ 2744 v_cmp%E1\tvcc, %2, %3 2745 v_cmp%E1\tvcc, %2, %3 2746 v_cmpx%E1\tvcc, %2, %3 2747 v_cmpx%E1\tvcc, %2, %3 2748 v_cmp%E1\t%0, %2, %3" 2749 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") 2750 (set_attr "length" "4,8,4,8,8")]) 2751 2752(define_insn "vec_cmp<mode>di_dup_exec" 2753 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") 2754 (and:DI 2755 (match_operator 1 "gcn_fp_compare_operator" 2756 [(vec_duplicate:V_noQI 2757 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" 2758 " Sv, B,Sv,B, A")) 2759 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]) 2760 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e"))) 2761 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))] 2762 "" 2763 "@ 2764 v_cmp%E1\tvcc, %2, %3 2765 v_cmp%E1\tvcc, %2, %3 2766 v_cmpx%E1\tvcc, %2, %3 2767 v_cmpx%E1\tvcc, %2, %3 2768 v_cmp%E1\t%0, %2, %3" 2769 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") 2770 (set_attr "length" "4,8,4,8,8")]) 2771 2772(define_expand "vcond_mask_<mode>di" 2773 [(parallel 2774 [(set (match_operand:V_ALL 0 "register_operand" "") 2775 (vec_merge:V_ALL 2776 (match_operand:V_ALL 1 "gcn_vop3_operand" "") 2777 (match_operand:V_ALL 2 "gcn_alu_operand" "") 2778 (match_operand:DI 3 "register_operand" ""))) 2779 (clobber (scratch:<VnDI>))])] 2780 "" 2781 "") 2782 2783(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>" 2784 [(match_operand:V_ALL 0 "register_operand") 2785 (match_operand:V_ALL 1 "gcn_vop3_operand") 2786 (match_operand:V_ALL 2 "gcn_alu_operand") 2787 (match_operator 3 "gcn_fp_compare_operator" 2788 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand") 2789 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])] 2790 "" 2791 { 2792 rtx tmp = gen_reg_rtx (DImode); 2793 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di 2794 (tmp, operands[3], operands[4], operands[5])); 2795 emit_insn (gen_vcond_mask_<V_ALL:mode>di 2796 (operands[0], operands[1], operands[2], tmp)); 2797 DONE; 2798 }) 2799 2800(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec" 2801 [(match_operand:V_ALL 0 "register_operand") 2802 (match_operand:V_ALL 1 "gcn_vop3_operand") 2803 (match_operand:V_ALL 2 "gcn_alu_operand") 2804 (match_operator 3 "gcn_fp_compare_operator" 2805 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand") 2806 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")]) 2807 (match_operand:DI 6 "gcn_exec_reg_operand" "e")] 2808 "" 2809 { 2810 rtx tmp = gen_reg_rtx (DImode); 2811 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec 2812 (tmp, operands[3], operands[4], operands[5], operands[6])); 2813 emit_insn (gen_vcond_mask_<V_ALL:mode>di 2814 (operands[0], operands[1], operands[2], tmp)); 2815 DONE; 2816 }) 2817 2818(define_expand "vcondu<V_ALL:mode><V_INT:mode>" 2819 [(match_operand:V_ALL 0 "register_operand") 2820 (match_operand:V_ALL 1 "gcn_vop3_operand") 2821 (match_operand:V_ALL 2 "gcn_alu_operand") 2822 (match_operator 3 "gcn_fp_compare_operator" 2823 [(match_operand:V_INT 4 "gcn_alu_operand") 2824 (match_operand:V_INT 5 "gcn_vop3_operand")])] 2825 "" 2826 { 2827 rtx tmp = gen_reg_rtx (DImode); 2828 emit_insn (gen_vec_cmpu<V_INT:mode>di 2829 (tmp, operands[3], operands[4], operands[5])); 2830 emit_insn (gen_vcond_mask_<V_ALL:mode>di 2831 (operands[0], operands[1], operands[2], tmp)); 2832 DONE; 2833 }) 2834 2835(define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec" 2836 [(match_operand:V_ALL 0 "register_operand") 2837 (match_operand:V_ALL 1 "gcn_vop3_operand") 2838 (match_operand:V_ALL 2 "gcn_alu_operand") 2839 (match_operator 3 "gcn_fp_compare_operator" 2840 [(match_operand:V_INT 4 "gcn_alu_operand") 2841 (match_operand:V_INT 5 "gcn_vop3_operand")]) 2842 (match_operand:DI 6 "gcn_exec_reg_operand" "e")] 2843 "" 2844 { 2845 rtx tmp = gen_reg_rtx (DImode); 2846 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec 2847 (tmp, operands[3], operands[4], operands[5], operands[6])); 2848 emit_insn (gen_vcond_mask_<V_ALL:mode>di 2849 (operands[0], operands[1], operands[2], tmp)); 2850 DONE; 2851 }) 2852 2853;; }}} 2854;; {{{ Fully masked loop support 2855 2856(define_expand "while_ultsidi" 2857 [(match_operand:DI 0 "register_operand") 2858 (match_operand:SI 1 "") 2859 (match_operand:SI 2 "")] 2860 "" 2861 { 2862 if (GET_CODE (operands[1]) != CONST_INT 2863 || GET_CODE (operands[2]) != CONST_INT) 2864 { 2865 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); 2866 rtx tmp = _0_1_2_3; 2867 if (GET_CODE (operands[1]) != CONST_INT 2868 || INTVAL (operands[1]) != 0) 2869 { 2870 tmp = gen_reg_rtx (V64SImode); 2871 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1])); 2872 } 2873 emit_insn (gen_vec_cmpv64sidi_dup (operands[0], 2874 gen_rtx_GT (VOIDmode, 0, 0), 2875 operands[2], tmp)); 2876 } 2877 else 2878 { 2879 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]); 2880 HOST_WIDE_INT mask = (diff >= 64 ? -1 2881 : ~((unsigned HOST_WIDE_INT)-1 << diff)); 2882 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask)); 2883 } 2884 DONE; 2885 }) 2886 2887(define_expand "maskload<mode>di" 2888 [(match_operand:V_ALL 0 "register_operand") 2889 (match_operand:V_ALL 1 "memory_operand") 2890 (match_operand 2 "")] 2891 "" 2892 { 2893 rtx exec = force_reg (DImode, operands[2]); 2894 rtx addr = gcn_expand_scalar_to_vector_address 2895 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode)); 2896 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 2897 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 2898 2899 /* Masked lanes are required to hold zero. */ 2900 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0)); 2901 2902 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, 2903 operands[0], exec)); 2904 DONE; 2905 }) 2906 2907(define_expand "maskstore<mode>di" 2908 [(match_operand:V_ALL 0 "memory_operand") 2909 (match_operand:V_ALL 1 "register_operand") 2910 (match_operand 2 "")] 2911 "" 2912 { 2913 rtx exec = force_reg (DImode, operands[2]); 2914 rtx addr = gcn_expand_scalar_to_vector_address 2915 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode)); 2916 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 2917 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 2918 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec)); 2919 DONE; 2920 }) 2921 2922(define_expand "mask_gather_load<mode><vnsi>" 2923 [(match_operand:V_ALL 0 "register_operand") 2924 (match_operand:DI 1 "register_operand") 2925 (match_operand:<VnSI> 2 "register_operand") 2926 (match_operand 3 "immediate_operand") 2927 (match_operand:SI 4 "gcn_alu_operand") 2928 (match_operand:DI 5 "")] 2929 "" 2930 { 2931 rtx exec = force_reg (DImode, operands[5]); 2932 2933 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], 2934 operands[2], operands[4], 2935 INTVAL (operands[3]), exec); 2936 2937 /* Masked lanes are required to hold zero. */ 2938 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0)); 2939 2940 if (GET_MODE (addr) == <VnDI>mode) 2941 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr, 2942 const0_rtx, const0_rtx, 2943 const0_rtx, operands[0], 2944 exec)); 2945 else 2946 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1], 2947 addr, const0_rtx, 2948 const0_rtx, const0_rtx, 2949 operands[0], exec)); 2950 DONE; 2951 }) 2952 2953(define_expand "mask_scatter_store<mode><vnsi>" 2954 [(match_operand:DI 0 "register_operand") 2955 (match_operand:<VnSI> 1 "register_operand") 2956 (match_operand 2 "immediate_operand") 2957 (match_operand:SI 3 "gcn_alu_operand") 2958 (match_operand:V_ALL 4 "register_operand") 2959 (match_operand:DI 5 "")] 2960 "" 2961 { 2962 rtx exec = force_reg (DImode, operands[5]); 2963 2964 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], 2965 operands[1], operands[3], 2966 INTVAL (operands[2]), exec); 2967 2968 if (GET_MODE (addr) == <VnDI>mode) 2969 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx, 2970 operands[4], const0_rtx, 2971 const0_rtx, 2972 exec)); 2973 else 2974 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr, 2975 const0_rtx, operands[4], 2976 const0_rtx, const0_rtx, 2977 exec)); 2978 DONE; 2979 }) 2980 2981(define_code_iterator cond_op [plus minus mult]) 2982 2983(define_expand "cond_<expander><mode>" 2984 [(match_operand:V_ALL 0 "register_operand") 2985 (match_operand:DI 1 "register_operand") 2986 (cond_op:V_ALL 2987 (match_operand:V_ALL 2 "gcn_alu_operand") 2988 (match_operand:V_ALL 3 "gcn_alu_operand")) 2989 (match_operand:V_ALL 4 "register_operand")] 2990 "" 2991 { 2992 operands[1] = force_reg (DImode, operands[1]); 2993 operands[2] = force_reg (<MODE>mode, operands[2]); 2994 2995 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], 2996 operands[3], operands[4], 2997 operands[1])); 2998 DONE; 2999 }) 3000 3001;; TODO smin umin smax umax 3002(define_code_iterator cond_bitop [and ior xor]) 3003 3004(define_expand "cond_<expander><mode>" 3005 [(match_operand:V_INT 0 "register_operand") 3006 (match_operand:DI 1 "register_operand") 3007 (cond_bitop:V_INT 3008 (match_operand:V_INT 2 "gcn_alu_operand") 3009 (match_operand:V_INT 3 "gcn_alu_operand")) 3010 (match_operand:V_INT 4 "register_operand")] 3011 "" 3012 { 3013 operands[1] = force_reg (DImode, operands[1]); 3014 operands[2] = force_reg (<MODE>mode, operands[2]); 3015 3016 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], 3017 operands[3], operands[4], 3018 operands[1])); 3019 DONE; 3020 }) 3021 3022;; }}} 3023;; {{{ Vector reductions 3024 3025(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR 3026 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR 3027 UNSPEC_PLUS_DPP_SHR 3028 UNSPEC_AND_DPP_SHR 3029 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) 3030 3031(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR 3032 UNSPEC_AND_DPP_SHR 3033 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) 3034 3035; FIXME: Isn't there a better way of doing this? 3036(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR") 3037 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR") 3038 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR") 3039 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR") 3040 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR") 3041 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR") 3042 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR") 3043 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")]) 3044 3045(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin") 3046 (UNSPEC_SMAX_DPP_SHR "smax") 3047 (UNSPEC_UMIN_DPP_SHR "umin") 3048 (UNSPEC_UMAX_DPP_SHR "umax") 3049 (UNSPEC_PLUS_DPP_SHR "plus") 3050 (UNSPEC_AND_DPP_SHR "and") 3051 (UNSPEC_IOR_DPP_SHR "ior") 3052 (UNSPEC_XOR_DPP_SHR "xor")]) 3053 3054(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0") 3055 (UNSPEC_SMAX_DPP_SHR "v_max%i0") 3056 (UNSPEC_UMIN_DPP_SHR "v_min%u0") 3057 (UNSPEC_UMAX_DPP_SHR "v_max%u0") 3058 (UNSPEC_PLUS_DPP_SHR "v_add%U0") 3059 (UNSPEC_AND_DPP_SHR "v_and%B0") 3060 (UNSPEC_IOR_DPP_SHR "v_or%B0") 3061 (UNSPEC_XOR_DPP_SHR "v_xor%B0")]) 3062 3063(define_expand "reduc_<reduc_op>_scal_<mode>" 3064 [(set (match_operand:<SCALAR_MODE> 0 "register_operand") 3065 (unspec:<SCALAR_MODE> 3066 [(match_operand:V_ALL 1 "register_operand")] 3067 REDUC_UNSPEC))] 3068 "" 3069 { 3070 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1], 3071 <reduc_unspec>); 3072 3073 /* The result of the reduction is in lane 63 of tmp. */ 3074 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp)); 3075 3076 DONE; 3077 }) 3078 3079 3080(define_insn "*<reduc_op>_dpp_shr_<mode>" 3081 [(set (match_operand:V_1REG 0 "register_operand" "=v") 3082 (unspec:V_1REG 3083 [(match_operand:V_1REG 1 "register_operand" "v") 3084 (match_operand:V_1REG 2 "register_operand" "v") 3085 (match_operand:SI 3 "const_int_operand" "n")] 3086 REDUC_UNSPEC))] 3087 ; GCN3 requires a carry out, GCN5 not 3088 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode) 3089 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)" 3090 { 3091 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>", 3092 <reduc_unspec>, INTVAL (operands[3])); 3093 } 3094 [(set_attr "type" "vop_dpp") 3095 (set_attr "length" "8")]) 3096 3097(define_insn_and_split "*<reduc_op>_dpp_shr_<mode>" 3098 [(set (match_operand:V_DI 0 "register_operand" "=v") 3099 (unspec:V_DI 3100 [(match_operand:V_DI 1 "register_operand" "v") 3101 (match_operand:V_DI 2 "register_operand" "v") 3102 (match_operand:SI 3 "const_int_operand" "n")] 3103 REDUC_2REG_UNSPEC))] 3104 "" 3105 "#" 3106 "reload_completed" 3107 [(set (match_dup 4) 3108 (unspec:<VnSI> 3109 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC)) 3110 (set (match_dup 5) 3111 (unspec:<VnSI> 3112 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))] 3113 { 3114 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); 3115 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); 3116 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0); 3117 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1); 3118 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0); 3119 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1); 3120 } 3121 [(set_attr "type" "vmult") 3122 (set_attr "length" "16")]) 3123 3124; Special cases for addition. 3125 3126(define_insn "*plus_carry_dpp_shr_<mode>" 3127 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 3128 (unspec:V_INT_1REG 3129 [(match_operand:V_INT_1REG 1 "register_operand" "v") 3130 (match_operand:V_INT_1REG 2 "register_operand" "v") 3131 (match_operand:SI 3 "const_int_operand" "n")] 3132 UNSPEC_PLUS_CARRY_DPP_SHR)) 3133 (clobber (reg:DI VCC_REG))] 3134 "" 3135 { 3136 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32", 3137 UNSPEC_PLUS_CARRY_DPP_SHR, 3138 INTVAL (operands[3])); 3139 } 3140 [(set_attr "type" "vop_dpp") 3141 (set_attr "length" "8")]) 3142 3143(define_insn "*plus_carry_in_dpp_shr_<mode>" 3144 [(set (match_operand:V_SI 0 "register_operand" "=v") 3145 (unspec:V_SI 3146 [(match_operand:V_SI 1 "register_operand" "v") 3147 (match_operand:V_SI 2 "register_operand" "v") 3148 (match_operand:SI 3 "const_int_operand" "n") 3149 (match_operand:DI 4 "register_operand" "cV")] 3150 UNSPEC_PLUS_CARRY_IN_DPP_SHR)) 3151 (clobber (reg:DI VCC_REG))] 3152 "" 3153 { 3154 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32", 3155 UNSPEC_PLUS_CARRY_IN_DPP_SHR, 3156 INTVAL (operands[3])); 3157 } 3158 [(set_attr "type" "vop_dpp") 3159 (set_attr "length" "8")]) 3160 3161(define_insn_and_split "*plus_carry_dpp_shr_<mode>" 3162 [(set (match_operand:V_DI 0 "register_operand" "=v") 3163 (unspec:V_DI 3164 [(match_operand:V_DI 1 "register_operand" "v") 3165 (match_operand:V_DI 2 "register_operand" "v") 3166 (match_operand:SI 3 "const_int_operand" "n")] 3167 UNSPEC_PLUS_CARRY_DPP_SHR)) 3168 (clobber (reg:DI VCC_REG))] 3169 "" 3170 "#" 3171 "reload_completed" 3172 [(parallel [(set (match_dup 4) 3173 (unspec:<VnSI> 3174 [(match_dup 6) (match_dup 8) (match_dup 3)] 3175 UNSPEC_PLUS_CARRY_DPP_SHR)) 3176 (clobber (reg:DI VCC_REG))]) 3177 (parallel [(set (match_dup 5) 3178 (unspec:<VnSI> 3179 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)] 3180 UNSPEC_PLUS_CARRY_IN_DPP_SHR)) 3181 (clobber (reg:DI VCC_REG))])] 3182 { 3183 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); 3184 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); 3185 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0); 3186 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1); 3187 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0); 3188 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1); 3189 } 3190 [(set_attr "type" "vmult") 3191 (set_attr "length" "16")]) 3192 3193; Instructions to move a scalar value from lane 63 of a vector register. 3194(define_insn "mov_from_lane63_<mode>" 3195 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v") 3196 (unspec:<SCALAR_MODE> 3197 [(match_operand:V_1REG 1 "register_operand" " v,v")] 3198 UNSPEC_MOV_FROM_LANE63))] 3199 "" 3200 "@ 3201 v_readlane_b32\t%0, %1, 63 3202 v_mov_b32\t%0, %1 wave_ror:1" 3203 [(set_attr "type" "vop3a,vop_dpp") 3204 (set_attr "exec" "none,*") 3205 (set_attr "length" "8")]) 3206 3207(define_insn "mov_from_lane63_<mode>" 3208 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v") 3209 (unspec:<SCALAR_MODE> 3210 [(match_operand:V_2REG 1 "register_operand" " v,v")] 3211 UNSPEC_MOV_FROM_LANE63))] 3212 "" 3213 "@ 3214 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63 3215 * if (REGNO (operands[0]) <= REGNO (operands[1])) \ 3216 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \ 3217 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \ 3218 else \ 3219 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \ 3220 \"v_mov_b32\t%L0, %L1 wave_ror:1\";" 3221 [(set_attr "type" "vop3a,vop_dpp") 3222 (set_attr "exec" "none,*") 3223 (set_attr "length" "8")]) 3224 3225;; }}} 3226;; {{{ Miscellaneous 3227 3228(define_expand "vec_series<mode>" 3229 [(match_operand:V_SI 0 "register_operand") 3230 (match_operand:SI 1 "gcn_alu_operand") 3231 (match_operand:SI 2 "gcn_alu_operand")] 3232 "" 3233 { 3234 rtx tmp = gen_reg_rtx (<MODE>mode); 3235 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1)); 3236 3237 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2])); 3238 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1])); 3239 DONE; 3240 }) 3241 3242(define_expand "vec_series<mode>" 3243 [(match_operand:V_DI 0 "register_operand") 3244 (match_operand:DI 1 "gcn_alu_operand") 3245 (match_operand:DI 2 "gcn_alu_operand")] 3246 "" 3247 { 3248 rtx tmp = gen_reg_rtx (<MODE>mode); 3249 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1)); 3250 rtx op1vec = gen_reg_rtx (<MODE>mode); 3251 3252 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2])); 3253 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1])); 3254 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec)); 3255 DONE; 3256 }) 3257 3258;; }}} 3259