1;; Copyright (C) 2016-2021 Free Software Foundation, Inc. 2 3;; This file is free software; you can redistribute it and/or modify it under 4;; the terms of the GNU General Public License as published by the Free 5;; Software Foundation; either version 3 of the License, or (at your option) 6;; any later version. 7 8;; This file is distributed in the hope that it will be useful, but WITHOUT 9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11;; for more details. 12 13;; You should have received a copy of the GNU General Public License 14;; along with GCC; see the file COPYING3. If not see 15;; <http://www.gnu.org/licenses/>. 16 17;; {{{ Vector iterators 18 19; Vector modes for specific types 20; (This will make more sense when there are multiple vector sizes) 21(define_mode_iterator V_QI 22 [V64QI]) 23(define_mode_iterator V_HI 24 [V64HI]) 25(define_mode_iterator V_HF 26 [V64HF]) 27(define_mode_iterator V_SI 28 [V64SI]) 29(define_mode_iterator V_SF 30 [V64SF]) 31(define_mode_iterator V_DI 32 [V64DI]) 33(define_mode_iterator V_DF 34 [V64DF]) 35 36; Vector modes for sub-dword modes 37(define_mode_iterator V_QIHI 38 [V64QI V64HI]) 39 40; Vector modes for one vector register 41(define_mode_iterator V_1REG 42 [V64QI V64HI V64SI V64HF V64SF]) 43 44(define_mode_iterator V_INT_1REG 45 [V64QI V64HI V64SI]) 46(define_mode_iterator V_INT_1REG_ALT 47 [V64QI V64HI V64SI]) 48(define_mode_iterator V_FP_1REG 49 [V64HF V64SF]) 50 51; Vector modes for two vector registers 52(define_mode_iterator V_2REG 53 [V64DI V64DF]) 54 55; Vector modes with native support 56(define_mode_iterator V_noQI 57 [V64HI V64HF V64SI V64SF V64DI V64DF]) 58(define_mode_iterator V_noHI 59 [V64HF V64SI V64SF V64DI V64DF]) 60 61(define_mode_iterator V_INT_noQI 62 [V64HI V64SI V64DI]) 63 64; All of above 65(define_mode_iterator V_ALL 66 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) 67(define_mode_iterator V_ALL_ALT 68 [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) 69 70(define_mode_iterator V_INT 71 [V64QI V64HI V64SI V64DI]) 72(define_mode_iterator V_FP 73 [V64HF V64SF V64DF]) 74 75(define_mode_attr scalar_mode 76 [(V64QI "qi") (V64HI "hi") (V64SI "si") 77 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) 78 79(define_mode_attr SCALAR_MODE 80 [(V64QI "QI") (V64HI "HI") (V64SI "SI") 81 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) 82 83(define_mode_attr vnsi 84 [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") 85 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")]) 86 87(define_mode_attr VnSI 88 [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") 89 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")]) 90 91(define_mode_attr vndi 92 [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") 93 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")]) 94 95(define_mode_attr VnDI 96 [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") 97 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")]) 98 99(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) 100 101;; }}} 102;; {{{ Substitutions 103 104(define_subst_attr "exec" "vec_merge" 105 "" "_exec") 106(define_subst_attr "exec_clobber" "vec_merge_with_clobber" 107 "" "_exec") 108(define_subst_attr "exec_vcc" "vec_merge_with_vcc" 109 "" "_exec") 110(define_subst_attr "exec_scatter" "scatter_store" 111 "" "_exec") 112 113(define_subst "vec_merge" 114 [(set (match_operand:V_ALL 0) 115 (match_operand:V_ALL 1))] 116 "" 117 [(set (match_dup 0) 118 (vec_merge:V_ALL 119 (match_dup 1) 120 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0") 121 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))]) 122 123(define_subst "vec_merge_with_clobber" 124 [(set (match_operand:V_ALL 0) 125 (match_operand:V_ALL 1)) 126 (clobber (match_operand 2))] 127 "" 128 [(set (match_dup 0) 129 (vec_merge:V_ALL 130 (match_dup 1) 131 (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0") 132 (match_operand:DI 4 "gcn_exec_reg_operand" "e"))) 133 (clobber (match_dup 2))]) 134 135(define_subst "vec_merge_with_vcc" 136 [(set (match_operand:V_ALL 0) 137 (match_operand:V_ALL 1)) 138 (set (match_operand:DI 2) 139 (match_operand:DI 3))] 140 "" 141 [(parallel 142 [(set (match_dup 0) 143 (vec_merge:V_ALL 144 (match_dup 1) 145 (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0") 146 (match_operand:DI 5 "gcn_exec_reg_operand" "e"))) 147 (set (match_dup 2) 148 (and:DI (match_dup 3) 149 (reg:DI EXEC_REG)))])]) 150 151(define_subst "scatter_store" 152 [(set (mem:BLK (scratch)) 153 (unspec:BLK 154 [(match_operand 0) 155 (match_operand 1) 156 (match_operand 2) 157 (match_operand 3)] 158 UNSPEC_SCATTER))] 159 "" 160 [(set (mem:BLK (scratch)) 161 (unspec:BLK 162 [(match_dup 0) 163 (match_dup 1) 164 (match_dup 2) 165 (match_dup 3) 166 (match_operand:DI 4 "gcn_exec_reg_operand" "e")] 167 UNSPEC_SCATTER))]) 168 169;; }}} 170;; {{{ Vector moves 171 172; This is the entry point for all vector register moves. Memory accesses can 173; come this way also, but will more usually use the reload_in/out, 174; gather/scatter, maskload/store, etc. 175 176(define_expand "mov<mode>" 177 [(set (match_operand:V_ALL 0 "nonimmediate_operand") 178 (match_operand:V_ALL 1 "general_operand"))] 179 "" 180 { 181 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed) 182 { 183 operands[1] = force_reg (<MODE>mode, operands[1]); 184 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode); 185 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 186 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 187 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 188 operands[0], 189 scratch); 190 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v)); 191 DONE; 192 } 193 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed) 194 { 195 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode); 196 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 197 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 198 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 199 operands[1], 200 scratch); 201 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v)); 202 DONE; 203 } 204 else if ((MEM_P (operands[0]) || MEM_P (operands[1]))) 205 { 206 gcc_assert (!reload_completed); 207 rtx scratch = gen_reg_rtx (<VnDI>mode); 208 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch)); 209 DONE; 210 } 211 }) 212 213; A pseudo instruction that helps LRA use the "U0" constraint. 214 215(define_insn "mov<mode>_unspec" 216 [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v") 217 (match_operand:V_ALL 1 "gcn_unspec_operand" " U"))] 218 "" 219 "" 220 [(set_attr "type" "unknown") 221 (set_attr "length" "0")]) 222 223(define_insn "*mov<mode>" 224 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v") 225 (match_operand:V_1REG 1 "general_operand" "vA,B"))] 226 "" 227 "v_mov_b32\t%0, %1" 228 [(set_attr "type" "vop1,vop1") 229 (set_attr "length" "4,8")]) 230 231(define_insn "mov<mode>_exec" 232 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m") 233 (vec_merge:V_1REG 234 (match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v") 235 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand" 236 "U0,U0,vA,vA,U0,U0") 237 (match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e"))) 238 (clobber (match_scratch:<VnDI> 4 "=X, X, X, X,&v,&v"))] 239 "!MEM_P (operands[0]) || REG_P (operands[1])" 240 "@ 241 v_mov_b32\t%0, %1 242 v_mov_b32\t%0, %1 243 v_cndmask_b32\t%0, %2, %1, vcc 244 v_cndmask_b32\t%0, %2, %1, %3 245 # 246 #" 247 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*") 248 (set_attr "length" "4,8,4,8,16,16")]) 249 250; This variant does not accept an unspec, but does permit MEM 251; read/modify/write which is necessary for maskstore. 252 253;(define_insn "*mov<mode>_exec_match" 254; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m") 255; (vec_merge:V_1REG 256; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v") 257; (match_dup 0) 258; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e"))) 259; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))] 260; "!MEM_P (operands[0]) || REG_P (operands[1])" 261; "@ 262; v_mov_b32\t%0, %1 263; v_mov_b32\t%0, %1 264; # 265; #" 266; [(set_attr "type" "vop1,vop1,*,*") 267; (set_attr "length" "4,8,16,16")]) 268 269(define_insn "*mov<mode>" 270 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v") 271 (match_operand:V_2REG 1 "general_operand" "vDB"))] 272 "" 273 { 274 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) 275 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; 276 else 277 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; 278 } 279 [(set_attr "type" "vmult") 280 (set_attr "length" "16")]) 281 282(define_insn "mov<mode>_exec" 283 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m") 284 (vec_merge:V_2REG 285 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v") 286 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand" 287 " U0,vDA0,vDA0,U0,U0") 288 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e"))) 289 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))] 290 "!MEM_P (operands[0]) || REG_P (operands[1])" 291 { 292 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) 293 switch (which_alternative) 294 { 295 case 0: 296 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; 297 case 1: 298 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;" 299 "v_cndmask_b32\t%H0, %H2, %H1, vcc"; 300 case 2: 301 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;" 302 "v_cndmask_b32\t%H0, %H2, %H1, %3"; 303 } 304 else 305 switch (which_alternative) 306 { 307 case 0: 308 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; 309 case 1: 310 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;" 311 "v_cndmask_b32\t%L0, %L2, %L1, vcc"; 312 case 2: 313 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;" 314 "v_cndmask_b32\t%L0, %L2, %L1, %3"; 315 } 316 317 return "#"; 318 } 319 [(set_attr "type" "vmult,vmult,vmult,*,*") 320 (set_attr "length" "16,16,16,16,16")]) 321 322; This variant does not accept an unspec, but does permit MEM 323; read/modify/write which is necessary for maskstore. 324 325;(define_insn "*mov<mode>_exec_match" 326; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m") 327; (vec_merge:V_2REG 328; (match_operand:V_2REG 1 "general_operand" "vDB, m, v") 329; (match_dup 0) 330; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e"))) 331; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))] 332; "!MEM_P (operands[0]) || REG_P (operands[1])" 333; "@ 334; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ 335; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ 336; else \ 337; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; 338; # 339; #" 340; [(set_attr "type" "vmult,*,*") 341; (set_attr "length" "16,16,16")]) 342 343; A SGPR-base load looks like: 344; <load> v, Sv 345; 346; There's no hardware instruction that corresponds to this, but vector base 347; addresses are placed in an SGPR because it is easier to add to a vector. 348; We also have a temporary vT, and the vector v1 holding numbered lanes. 349; 350; Rewrite as: 351; vT = v1 << log2(element-size) 352; vT += Sv 353; flat_load v, vT 354 355(define_insn "mov<mode>_sgprbase" 356 [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m") 357 (unspec:V_1REG 358 [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")] 359 UNSPEC_SGPRBASE)) 360 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))] 361 "lra_in_progress || reload_completed" 362 "@ 363 v_mov_b32\t%0, %1 364 v_mov_b32\t%0, %1 365 # 366 #" 367 [(set_attr "type" "vop1,vop1,*,*") 368 (set_attr "length" "4,8,12,12")]) 369 370(define_insn "mov<mode>_sgprbase" 371 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m") 372 (unspec:V_2REG 373 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v")] 374 UNSPEC_SGPRBASE)) 375 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))] 376 "lra_in_progress || reload_completed" 377 "@ 378 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ 379 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ 380 else \ 381 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; 382 # 383 #" 384 [(set_attr "type" "vmult,*,*") 385 (set_attr "length" "8,12,12")]) 386 387; reload_in was once a standard name, but here it's only referenced by 388; gcn_secondary_reload. It allows a reload with a scratch register. 389 390(define_expand "reload_in<mode>" 391 [(set (match_operand:V_ALL 0 "register_operand" "= v") 392 (match_operand:V_ALL 1 "memory_operand" " m")) 393 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))] 394 "" 395 { 396 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2])); 397 DONE; 398 }) 399 400; reload_out is similar to reload_in, above. 401 402(define_expand "reload_out<mode>" 403 [(set (match_operand:V_ALL 0 "memory_operand" "= m") 404 (match_operand:V_ALL 1 "register_operand" " v")) 405 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))] 406 "" 407 { 408 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2])); 409 DONE; 410 }) 411 412; Expand scalar addresses into gather/scatter patterns 413 414(define_split 415 [(set (match_operand:V_ALL 0 "memory_operand") 416 (unspec:V_ALL 417 [(match_operand:V_ALL 1 "general_operand")] 418 UNSPEC_SGPRBASE)) 419 (clobber (match_scratch:<VnDI> 2))] 420 "" 421 [(set (mem:BLK (scratch)) 422 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)] 423 UNSPEC_SCATTER))] 424 { 425 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 426 operands[0], 427 operands[2]); 428 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 429 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 430 }) 431 432(define_split 433 [(set (match_operand:V_ALL 0 "memory_operand") 434 (vec_merge:V_ALL 435 (match_operand:V_ALL 1 "general_operand") 436 (match_operand:V_ALL 2 "") 437 (match_operand:DI 3 "gcn_exec_reg_operand"))) 438 (clobber (match_scratch:<VnDI> 4))] 439 "" 440 [(set (mem:BLK (scratch)) 441 (unspec:BLK [(match_dup 5) (match_dup 1) 442 (match_dup 6) (match_dup 7) (match_dup 3)] 443 UNSPEC_SCATTER))] 444 { 445 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, 446 operands[3], 447 operands[0], 448 operands[4]); 449 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 450 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 451 }) 452 453(define_split 454 [(set (match_operand:V_ALL 0 "nonimmediate_operand") 455 (unspec:V_ALL 456 [(match_operand:V_ALL 1 "memory_operand")] 457 UNSPEC_SGPRBASE)) 458 (clobber (match_scratch:<VnDI> 2))] 459 "" 460 [(set (match_dup 0) 461 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7) 462 (mem:BLK (scratch))] 463 UNSPEC_GATHER))] 464 { 465 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 466 operands[1], 467 operands[2]); 468 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 469 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 470 }) 471 472(define_split 473 [(set (match_operand:V_ALL 0 "nonimmediate_operand") 474 (vec_merge:V_ALL 475 (match_operand:V_ALL 1 "memory_operand") 476 (match_operand:V_ALL 2 "") 477 (match_operand:DI 3 "gcn_exec_reg_operand"))) 478 (clobber (match_scratch:<VnDI> 4))] 479 "" 480 [(set (match_dup 0) 481 (vec_merge:V_ALL 482 (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7) 483 (mem:BLK (scratch))] 484 UNSPEC_GATHER) 485 (match_dup 2) 486 (match_dup 3)))] 487 { 488 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, 489 operands[3], 490 operands[1], 491 operands[4]); 492 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 493 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 494 }) 495 496; TODO: Add zero/sign extending variants. 497 498;; }}} 499;; {{{ Lane moves 500 501; v_writelane and v_readlane work regardless of exec flags. 502; We allow source to be scratch. 503; 504; FIXME these should take A immediates 505 506(define_insn "*vec_set<mode>" 507 [(set (match_operand:V_1REG 0 "register_operand" "= v") 508 (vec_merge:V_1REG 509 (vec_duplicate:V_1REG 510 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) 511 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0") 512 (ashift (const_int 1) 513 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 514 "" 515 "v_writelane_b32 %0, %1, %2" 516 [(set_attr "type" "vop3a") 517 (set_attr "length" "8") 518 (set_attr "exec" "none") 519 (set_attr "laneselect" "yes")]) 520 521; FIXME: 64bit operations really should be splitters, but I am not sure how 522; to represent vertical subregs. 523(define_insn "*vec_set<mode>" 524 [(set (match_operand:V_2REG 0 "register_operand" "= v") 525 (vec_merge:V_2REG 526 (vec_duplicate:V_2REG 527 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) 528 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0") 529 (ashift (const_int 1) 530 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 531 "" 532 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2" 533 [(set_attr "type" "vmult") 534 (set_attr "length" "16") 535 (set_attr "exec" "none") 536 (set_attr "laneselect" "yes")]) 537 538(define_expand "vec_set<mode>" 539 [(set (match_operand:V_ALL 0 "register_operand") 540 (vec_merge:V_ALL 541 (vec_duplicate:V_ALL 542 (match_operand:<SCALAR_MODE> 1 "register_operand")) 543 (match_dup 0) 544 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))] 545 "") 546 547(define_insn "*vec_set<mode>_1" 548 [(set (match_operand:V_1REG 0 "register_operand" "=v") 549 (vec_merge:V_1REG 550 (vec_duplicate:V_1REG 551 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) 552 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0") 553 (match_operand:SI 2 "const_int_operand" " i")))] 554 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))" 555 { 556 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); 557 return "v_writelane_b32 %0, %1, %2"; 558 } 559 [(set_attr "type" "vop3a") 560 (set_attr "length" "8") 561 (set_attr "exec" "none") 562 (set_attr "laneselect" "yes")]) 563 564(define_insn "*vec_set<mode>_1" 565 [(set (match_operand:V_2REG 0 "register_operand" "=v") 566 (vec_merge:V_2REG 567 (vec_duplicate:V_2REG 568 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) 569 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0") 570 (match_operand:SI 2 "const_int_operand" " i")))] 571 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))" 572 { 573 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); 574 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"; 575 } 576 [(set_attr "type" "vmult") 577 (set_attr "length" "16") 578 (set_attr "exec" "none") 579 (set_attr "laneselect" "yes")]) 580 581(define_insn "vec_duplicate<mode><exec>" 582 [(set (match_operand:V_1REG 0 "register_operand" "=v") 583 (vec_duplicate:V_1REG 584 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))] 585 "" 586 "v_mov_b32\t%0, %1" 587 [(set_attr "type" "vop3a") 588 (set_attr "length" "8")]) 589 590(define_insn "vec_duplicate<mode><exec>" 591 [(set (match_operand:V_2REG 0 "register_operand" "= v") 592 (vec_duplicate:V_2REG 593 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))] 594 "" 595 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" 596 [(set_attr "type" "vop3a") 597 (set_attr "length" "16")]) 598 599(define_insn "vec_extract<mode><scalar_mode>" 600 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg") 601 (vec_select:<SCALAR_MODE> 602 (match_operand:V_1REG 1 "register_operand" " v") 603 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))] 604 "" 605 "v_readlane_b32 %0, %1, %2" 606 [(set_attr "type" "vop3a") 607 (set_attr "length" "8") 608 (set_attr "exec" "none") 609 (set_attr "laneselect" "yes")]) 610 611(define_insn "vec_extract<mode><scalar_mode>" 612 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg") 613 (vec_select:<SCALAR_MODE> 614 (match_operand:V_2REG 1 "register_operand" " v") 615 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))] 616 "" 617 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2" 618 [(set_attr "type" "vmult") 619 (set_attr "length" "16") 620 (set_attr "exec" "none") 621 (set_attr "laneselect" "yes")]) 622 623(define_expand "extract_last_<mode>" 624 [(match_operand:<SCALAR_MODE> 0 "register_operand") 625 (match_operand:DI 1 "gcn_alu_operand") 626 (match_operand:V_ALL 2 "register_operand")] 627 "can_create_pseudo_p ()" 628 { 629 rtx dst = operands[0]; 630 rtx mask = operands[1]; 631 rtx vect = operands[2]; 632 rtx tmpreg = gen_reg_rtx (SImode); 633 634 emit_insn (gen_clzdi2 (tmpreg, mask)); 635 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg)); 636 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg)); 637 DONE; 638 }) 639 640(define_expand "fold_extract_last_<mode>" 641 [(match_operand:<SCALAR_MODE> 0 "register_operand") 642 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand") 643 (match_operand:DI 2 "gcn_alu_operand") 644 (match_operand:V_ALL 3 "register_operand")] 645 "can_create_pseudo_p ()" 646 { 647 rtx dst = operands[0]; 648 rtx default_value = operands[1]; 649 rtx mask = operands[2]; 650 rtx vect = operands[3]; 651 rtx else_label = gen_label_rtx (); 652 rtx end_label = gen_label_rtx (); 653 654 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx); 655 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label)); 656 emit_insn (gen_extract_last_<mode> (dst, mask, vect)); 657 emit_jump_insn (gen_jump (end_label)); 658 emit_barrier (); 659 emit_label (else_label); 660 emit_move_insn (dst, default_value); 661 emit_label (end_label); 662 DONE; 663 }) 664 665(define_expand "vec_init<mode><scalar_mode>" 666 [(match_operand:V_ALL 0 "register_operand") 667 (match_operand 1)] 668 "" 669 { 670 gcn_expand_vector_init (operands[0], operands[1]); 671 DONE; 672 }) 673 674;; }}} 675;; {{{ Scatter / Gather 676 677;; GCN does not have an instruction for loading a vector from contiguous 678;; memory so *all* loads and stores are eventually converted to scatter 679;; or gather. 680;; 681;; GCC does not permit MEM to hold vectors of addresses, so we must use an 682;; unspec. The unspec formats are as follows: 683;; 684;; (unspec:V?? 685;; [(<address expression>) 686;; (<addr_space_t>) 687;; (<use_glc>) 688;; (mem:BLK (scratch))] 689;; UNSPEC_GATHER) 690;; 691;; (unspec:BLK 692;; [(<address expression>) 693;; (<source register>) 694;; (<addr_space_t>) 695;; (<use_glc>) 696;; (<exec>)] 697;; UNSPEC_SCATTER) 698;; 699;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>. 700;; - The mem:BLK does not contain any real information, but indicates that an 701;; unknown memory read is taking place. Stores are expected to use a similar 702;; mem:BLK outside the unspec. 703;; - The address space and glc (volatile) fields are there to replace the 704;; fields normally found in a MEM. 705;; - Multiple forms of address expression are supported, below. 706 707(define_expand "gather_load<mode><vnsi>" 708 [(match_operand:V_ALL 0 "register_operand") 709 (match_operand:DI 1 "register_operand") 710 (match_operand:<VnSI> 2 "register_operand") 711 (match_operand 3 "immediate_operand") 712 (match_operand:SI 4 "gcn_alu_operand")] 713 "" 714 { 715 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], 716 operands[2], operands[4], 717 INTVAL (operands[3]), NULL); 718 719 if (GET_MODE (addr) == <VnDI>mode) 720 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx, 721 const0_rtx, const0_rtx)); 722 else 723 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1], 724 addr, const0_rtx, const0_rtx, 725 const0_rtx)); 726 DONE; 727 }) 728 729; Allow any address expression 730(define_expand "gather<mode>_expr<exec>" 731 [(set (match_operand:V_ALL 0 "register_operand") 732 (unspec:V_ALL 733 [(match_operand 1 "") 734 (match_operand 2 "immediate_operand") 735 (match_operand 3 "immediate_operand") 736 (mem:BLK (scratch))] 737 UNSPEC_GATHER))] 738 "" 739 {}) 740 741(define_insn "gather<mode>_insn_1offset<exec>" 742 [(set (match_operand:V_ALL 0 "register_operand" "=v") 743 (unspec:V_ALL 744 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v") 745 (vec_duplicate:<VnDI> 746 (match_operand 2 "immediate_operand" " n"))) 747 (match_operand 3 "immediate_operand" " n") 748 (match_operand 4 "immediate_operand" " n") 749 (mem:BLK (scratch))] 750 UNSPEC_GATHER))] 751 "(AS_FLAT_P (INTVAL (operands[3])) 752 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0) 753 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000))) 754 || (AS_GLOBAL_P (INTVAL (operands[3])) 755 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" 756 { 757 addr_space_t as = INTVAL (operands[3]); 758 const char *glc = INTVAL (operands[4]) ? " glc" : ""; 759 760 static char buf[200]; 761 if (AS_FLAT_P (as)) 762 { 763 if (TARGET_GCN5_PLUS) 764 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0", 765 glc); 766 else 767 sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc); 768 } 769 else if (AS_GLOBAL_P (as)) 770 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;" 771 "s_waitcnt\tvmcnt(0)", glc); 772 else 773 gcc_unreachable (); 774 775 return buf; 776 } 777 [(set_attr "type" "flat") 778 (set_attr "length" "12")]) 779 780(define_insn "gather<mode>_insn_1offset_ds<exec>" 781 [(set (match_operand:V_ALL 0 "register_operand" "=v") 782 (unspec:V_ALL 783 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v") 784 (vec_duplicate:<VnSI> 785 (match_operand 2 "immediate_operand" " n"))) 786 (match_operand 3 "immediate_operand" " n") 787 (match_operand 4 "immediate_operand" " n") 788 (mem:BLK (scratch))] 789 UNSPEC_GATHER))] 790 "(AS_ANY_DS_P (INTVAL (operands[3])) 791 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))" 792 { 793 addr_space_t as = INTVAL (operands[3]); 794 static char buf[200]; 795 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)", 796 (AS_GDS_P (as) ? " gds" : "")); 797 return buf; 798 } 799 [(set_attr "type" "ds") 800 (set_attr "length" "12")]) 801 802(define_insn "gather<mode>_insn_2offsets<exec>" 803 [(set (match_operand:V_ALL 0 "register_operand" "=v") 804 (unspec:V_ALL 805 [(plus:<VnDI> 806 (plus:<VnDI> 807 (vec_duplicate:<VnDI> 808 (match_operand:DI 1 "register_operand" "Sv")) 809 (sign_extend:<VnDI> 810 (match_operand:<VnSI> 2 "register_operand" " v"))) 811 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n"))) 812 (match_operand 4 "immediate_operand" " n") 813 (match_operand 5 "immediate_operand" " n") 814 (mem:BLK (scratch))] 815 UNSPEC_GATHER))] 816 "(AS_GLOBAL_P (INTVAL (operands[4])) 817 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))" 818 { 819 addr_space_t as = INTVAL (operands[4]); 820 const char *glc = INTVAL (operands[5]) ? " glc" : ""; 821 822 static char buf[200]; 823 if (AS_GLOBAL_P (as)) 824 { 825 /* Work around assembler bug in which a 64-bit register is expected, 826 but a 32-bit value would be correct. */ 827 int reg = REGNO (operands[2]) - FIRST_VGPR_REG; 828 sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;" 829 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc); 830 } 831 else 832 gcc_unreachable (); 833 834 return buf; 835 } 836 [(set_attr "type" "flat") 837 (set_attr "length" "12")]) 838 839(define_expand "scatter_store<mode><vnsi>" 840 [(match_operand:DI 0 "register_operand") 841 (match_operand:<VnSI> 1 "register_operand") 842 (match_operand 2 "immediate_operand") 843 (match_operand:SI 3 "gcn_alu_operand") 844 (match_operand:V_ALL 4 "register_operand")] 845 "" 846 { 847 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], 848 operands[1], operands[3], 849 INTVAL (operands[2]), NULL); 850 851 if (GET_MODE (addr) == <VnDI>mode) 852 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4], 853 const0_rtx, const0_rtx)); 854 else 855 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr, 856 const0_rtx, operands[4], 857 const0_rtx, const0_rtx)); 858 DONE; 859 }) 860 861; Allow any address expression 862(define_expand "scatter<mode>_expr<exec_scatter>" 863 [(set (mem:BLK (scratch)) 864 (unspec:BLK 865 [(match_operand:<VnDI> 0 "") 866 (match_operand:V_ALL 1 "register_operand") 867 (match_operand 2 "immediate_operand") 868 (match_operand 3 "immediate_operand")] 869 UNSPEC_SCATTER))] 870 "" 871 {}) 872 873(define_insn "scatter<mode>_insn_1offset<exec_scatter>" 874 [(set (mem:BLK (scratch)) 875 (unspec:BLK 876 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v") 877 (vec_duplicate:<VnDI> 878 (match_operand 1 "immediate_operand" "n"))) 879 (match_operand:V_ALL 2 "register_operand" "v") 880 (match_operand 3 "immediate_operand" "n") 881 (match_operand 4 "immediate_operand" "n")] 882 UNSPEC_SCATTER))] 883 "(AS_FLAT_P (INTVAL (operands[3])) 884 && (INTVAL(operands[1]) == 0 885 || (TARGET_GCN5_PLUS 886 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000))) 887 || (AS_GLOBAL_P (INTVAL (operands[3])) 888 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))" 889 { 890 addr_space_t as = INTVAL (operands[3]); 891 const char *glc = INTVAL (operands[4]) ? " glc" : ""; 892 893 static char buf[200]; 894 if (AS_FLAT_P (as)) 895 { 896 if (TARGET_GCN5_PLUS) 897 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc); 898 else 899 sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc); 900 } 901 else if (AS_GLOBAL_P (as)) 902 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc); 903 else 904 gcc_unreachable (); 905 906 return buf; 907 } 908 [(set_attr "type" "flat") 909 (set_attr "length" "12")]) 910 911(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>" 912 [(set (mem:BLK (scratch)) 913 (unspec:BLK 914 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v") 915 (vec_duplicate:<VnSI> 916 (match_operand 1 "immediate_operand" "n"))) 917 (match_operand:V_ALL 2 "register_operand" "v") 918 (match_operand 3 "immediate_operand" "n") 919 (match_operand 4 "immediate_operand" "n")] 920 UNSPEC_SCATTER))] 921 "(AS_ANY_DS_P (INTVAL (operands[3])) 922 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))" 923 { 924 addr_space_t as = INTVAL (operands[3]); 925 static char buf[200]; 926 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)", 927 (AS_GDS_P (as) ? " gds" : "")); 928 return buf; 929 } 930 [(set_attr "type" "ds") 931 (set_attr "length" "12")]) 932 933(define_insn "scatter<mode>_insn_2offsets<exec_scatter>" 934 [(set (mem:BLK (scratch)) 935 (unspec:BLK 936 [(plus:<VnDI> 937 (plus:<VnDI> 938 (vec_duplicate:<VnDI> 939 (match_operand:DI 0 "register_operand" "Sv")) 940 (sign_extend:<VnDI> 941 (match_operand:<VnSI> 1 "register_operand" " v"))) 942 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n"))) 943 (match_operand:V_ALL 3 "register_operand" " v") 944 (match_operand 4 "immediate_operand" " n") 945 (match_operand 5 "immediate_operand" " n")] 946 UNSPEC_SCATTER))] 947 "(AS_GLOBAL_P (INTVAL (operands[4])) 948 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" 949 { 950 addr_space_t as = INTVAL (operands[4]); 951 const char *glc = INTVAL (operands[5]) ? " glc" : ""; 952 953 static char buf[200]; 954 if (AS_GLOBAL_P (as)) 955 { 956 /* Work around assembler bug in which a 64-bit register is expected, 957 but a 32-bit value would be correct. */ 958 int reg = REGNO (operands[1]) - FIRST_VGPR_REG; 959 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s", 960 reg, reg + 1, glc); 961 } 962 else 963 gcc_unreachable (); 964 965 return buf; 966 } 967 [(set_attr "type" "flat") 968 (set_attr "length" "12")]) 969 970;; }}} 971;; {{{ Permutations 972 973(define_insn "ds_bpermute<mode>" 974 [(set (match_operand:V_1REG 0 "register_operand" "=v") 975 (unspec:V_1REG 976 [(match_operand:V_1REG 2 "register_operand" " v") 977 (match_operand:<VnSI> 1 "register_operand" " v") 978 (match_operand:DI 3 "gcn_exec_reg_operand" " e")] 979 UNSPEC_BPERMUTE))] 980 "" 981 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)" 982 [(set_attr "type" "vop2") 983 (set_attr "length" "12")]) 984 985(define_insn_and_split "ds_bpermute<mode>" 986 [(set (match_operand:V_2REG 0 "register_operand" "=&v") 987 (unspec:V_2REG 988 [(match_operand:V_2REG 2 "register_operand" " v0") 989 (match_operand:<VnSI> 1 "register_operand" " v") 990 (match_operand:DI 3 "gcn_exec_reg_operand" " e")] 991 UNSPEC_BPERMUTE))] 992 "" 993 "#" 994 "reload_completed" 995 [(set (match_dup 4) (unspec:<VnSI> 996 [(match_dup 6) (match_dup 1) (match_dup 3)] 997 UNSPEC_BPERMUTE)) 998 (set (match_dup 5) (unspec:<VnSI> 999 [(match_dup 7) (match_dup 1) (match_dup 3)] 1000 UNSPEC_BPERMUTE))] 1001 { 1002 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); 1003 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); 1004 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0); 1005 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1); 1006 } 1007 [(set_attr "type" "vmult") 1008 (set_attr "length" "24")]) 1009 1010(define_insn "@dpp_move<mode>" 1011 [(set (match_operand:V_noHI 0 "register_operand" "=v") 1012 (unspec:V_noHI 1013 [(match_operand:V_noHI 1 "register_operand" " v") 1014 (match_operand:SI 2 "const_int_operand" " n")] 1015 UNSPEC_MOV_DPP_SHR))] 1016 "" 1017 { 1018 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32", 1019 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2])); 1020 } 1021 [(set_attr "type" "vop_dpp") 1022 (set_attr "length" "16")]) 1023 1024;; }}} 1025;; {{{ ALU special case: add/sub 1026 1027(define_insn "add<mode>3<exec_clobber>" 1028 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") 1029 (plus:V_INT_1REG 1030 (match_operand:V_INT_1REG 1 "register_operand" "% v") 1031 (match_operand:V_INT_1REG 2 "gcn_alu_operand" "vSvB"))) 1032 (clobber (reg:DI VCC_REG))] 1033 "" 1034 "v_add%^_u32\t%0, vcc, %2, %1" 1035 [(set_attr "type" "vop2") 1036 (set_attr "length" "8")]) 1037 1038(define_insn "add<mode>3_dup<exec_clobber>" 1039 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") 1040 (plus:V_INT_1REG 1041 (vec_duplicate:V_INT_1REG 1042 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB")) 1043 (match_operand:V_INT_1REG 1 "register_operand" " v"))) 1044 (clobber (reg:DI VCC_REG))] 1045 "" 1046 "v_add%^_u32\t%0, vcc, %2, %1" 1047 [(set_attr "type" "vop2") 1048 (set_attr "length" "8")]) 1049 1050(define_insn "add<mode>3_vcc<exec_vcc>" 1051 [(set (match_operand:V_SI 0 "register_operand" "= v, v") 1052 (plus:V_SI 1053 (match_operand:V_SI 1 "register_operand" "% v, v") 1054 (match_operand:V_SI 2 "gcn_alu_operand" "vSvB,vSvB"))) 1055 (set (match_operand:DI 3 "register_operand" "= cV, Sg") 1056 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2)) 1057 (match_dup 1)))] 1058 "" 1059 "v_add%^_u32\t%0, %3, %2, %1" 1060 [(set_attr "type" "vop2,vop3b") 1061 (set_attr "length" "8")]) 1062 1063; This pattern only changes the VCC bits when the corresponding lane is 1064; enabled, so the set must be described as an ior. 1065 1066(define_insn "add<mode>3_vcc_dup<exec_vcc>" 1067 [(set (match_operand:V_SI 0 "register_operand" "= v, v") 1068 (plus:V_SI 1069 (vec_duplicate:V_SI 1070 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB")) 1071 (match_operand:V_SI 2 "register_operand" " v, v"))) 1072 (set (match_operand:DI 3 "register_operand" "=cV, Sg") 1073 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2)) 1074 (match_dup 1)) 1075 (vec_duplicate:V_SI (match_dup 2))))] 1076 "" 1077 "v_add%^_u32\t%0, %3, %2, %1" 1078 [(set_attr "type" "vop2,vop3b") 1079 (set_attr "length" "8,8")]) 1080 1081; v_addc does not accept an SGPR because the VCC read already counts as an 1082; SGPR use and the number of SGPR operands is limited to 1. It does not 1083; accept "B" immediate constants due to a related bus conflict. 1084 1085(define_insn "addc<mode>3<exec_vcc>" 1086 [(set (match_operand:V_SI 0 "register_operand" "=v, v") 1087 (plus:V_SI 1088 (plus:V_SI 1089 (vec_merge:V_SI 1090 (vec_duplicate:V_SI (const_int 1)) 1091 (vec_duplicate:V_SI (const_int 0)) 1092 (match_operand:DI 3 "register_operand" " cV,cVSv")) 1093 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA")) 1094 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA"))) 1095 (set (match_operand:DI 4 "register_operand" "=cV,cVSg") 1096 (ior:DI (ltu:DI (plus:V_SI 1097 (plus:V_SI 1098 (vec_merge:V_SI 1099 (vec_duplicate:V_SI (const_int 1)) 1100 (vec_duplicate:V_SI (const_int 0)) 1101 (match_dup 3)) 1102 (match_dup 1)) 1103 (match_dup 2)) 1104 (match_dup 2)) 1105 (ltu:DI (plus:V_SI 1106 (vec_merge:V_SI 1107 (vec_duplicate:V_SI (const_int 1)) 1108 (vec_duplicate:V_SI (const_int 0)) 1109 (match_dup 3)) 1110 (match_dup 1)) 1111 (match_dup 1))))] 1112 "" 1113 "v_addc%^_u32\t%0, %4, %2, %1, %3" 1114 [(set_attr "type" "vop2,vop3b") 1115 (set_attr "length" "4,8")]) 1116 1117(define_insn "sub<mode>3<exec_clobber>" 1118 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v") 1119 (minus:V_INT_1REG 1120 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v") 1121 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB"))) 1122 (clobber (reg:DI VCC_REG))] 1123 "" 1124 "@ 1125 v_sub%^_u32\t%0, vcc, %1, %2 1126 v_subrev%^_u32\t%0, vcc, %2, %1" 1127 [(set_attr "type" "vop2") 1128 (set_attr "length" "8,8")]) 1129 1130(define_insn "sub<mode>3_vcc<exec_vcc>" 1131 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v") 1132 (minus:V_SI 1133 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v") 1134 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB"))) 1135 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg") 1136 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2)) 1137 (match_dup 1)))] 1138 "" 1139 "@ 1140 v_sub%^_u32\t%0, %3, %1, %2 1141 v_sub%^_u32\t%0, %3, %1, %2 1142 v_subrev%^_u32\t%0, %3, %2, %1 1143 v_subrev%^_u32\t%0, %3, %2, %1" 1144 [(set_attr "type" "vop2,vop3b,vop2,vop3b") 1145 (set_attr "length" "8")]) 1146 1147; v_subb does not accept an SGPR because the VCC read already counts as an 1148; SGPR use and the number of SGPR operands is limited to 1. It does not 1149; accept "B" immediate constants due to a related bus conflict. 1150 1151(define_insn "subc<mode>3<exec_vcc>" 1152 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v") 1153 (minus:V_SI 1154 (minus:V_SI 1155 (vec_merge:V_SI 1156 (vec_duplicate:V_SI (const_int 1)) 1157 (vec_duplicate:V_SI (const_int 0)) 1158 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv")) 1159 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA")) 1160 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA"))) 1161 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg") 1162 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI 1163 (vec_merge:V_SI 1164 (vec_duplicate:V_SI (const_int 1)) 1165 (vec_duplicate:V_SI (const_int 0)) 1166 (match_dup 3)) 1167 (match_dup 1)) 1168 (match_dup 2)) 1169 (match_dup 2)) 1170 (ltu:DI (minus:V_SI (vec_merge:V_SI 1171 (vec_duplicate:V_SI (const_int 1)) 1172 (vec_duplicate:V_SI (const_int 0)) 1173 (match_dup 3)) 1174 (match_dup 1)) 1175 (match_dup 1))))] 1176 "" 1177 "@ 1178 v_subb%^_u32\t%0, %4, %1, %2, %3 1179 v_subb%^_u32\t%0, %4, %1, %2, %3 1180 v_subbrev%^_u32\t%0, %4, %2, %1, %3 1181 v_subbrev%^_u32\t%0, %4, %2, %1, %3" 1182 [(set_attr "type" "vop2,vop3b,vop2,vop3b") 1183 (set_attr "length" "4,8,4,8")]) 1184 1185(define_insn_and_split "add<mode>3" 1186 [(set (match_operand:V_DI 0 "register_operand" "= v") 1187 (plus:V_DI 1188 (match_operand:V_DI 1 "register_operand" "%vDb") 1189 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))) 1190 (clobber (reg:DI VCC_REG))] 1191 "" 1192 "#" 1193 "gcn_can_split_p (<MODE>mode, operands[0]) 1194 && gcn_can_split_p (<MODE>mode, operands[1]) 1195 && gcn_can_split_p (<MODE>mode, operands[2])" 1196 [(const_int 0)] 1197 { 1198 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1199 emit_insn (gen_add<vnsi>3_vcc 1200 (gcn_operand_part (<MODE>mode, operands[0], 0), 1201 gcn_operand_part (<MODE>mode, operands[1], 0), 1202 gcn_operand_part (<MODE>mode, operands[2], 0), 1203 vcc)); 1204 emit_insn (gen_addc<vnsi>3 1205 (gcn_operand_part (<MODE>mode, operands[0], 1), 1206 gcn_operand_part (<MODE>mode, operands[1], 1), 1207 gcn_operand_part (<MODE>mode, operands[2], 1), 1208 vcc, vcc)); 1209 DONE; 1210 } 1211 [(set_attr "type" "vmult") 1212 (set_attr "length" "8")]) 1213 1214(define_insn_and_split "add<mode>3_exec" 1215 [(set (match_operand:V_DI 0 "register_operand" "= v") 1216 (vec_merge:V_DI 1217 (plus:V_DI 1218 (match_operand:V_DI 1 "register_operand" "%vDb") 1219 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")) 1220 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1221 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1222 (clobber (reg:DI VCC_REG))] 1223 "" 1224 "#" 1225 "gcn_can_split_p (<MODE>mode, operands[0]) 1226 && gcn_can_split_p (<MODE>mode, operands[1]) 1227 && gcn_can_split_p (<MODE>mode, operands[2]) 1228 && gcn_can_split_p (<MODE>mode, operands[4])" 1229 [(const_int 0)] 1230 { 1231 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1232 emit_insn (gen_add<vnsi>3_vcc_exec 1233 (gcn_operand_part (<MODE>mode, operands[0], 0), 1234 gcn_operand_part (<MODE>mode, operands[1], 0), 1235 gcn_operand_part (<MODE>mode, operands[2], 0), 1236 vcc, 1237 gcn_operand_part (<MODE>mode, operands[3], 0), 1238 operands[4])); 1239 emit_insn (gen_addc<vnsi>3_exec 1240 (gcn_operand_part (<MODE>mode, operands[0], 1), 1241 gcn_operand_part (<MODE>mode, operands[1], 1), 1242 gcn_operand_part (<MODE>mode, operands[2], 1), 1243 vcc, vcc, 1244 gcn_operand_part (<MODE>mode, operands[3], 1), 1245 operands[4])); 1246 DONE; 1247 } 1248 [(set_attr "type" "vmult") 1249 (set_attr "length" "8")]) 1250 1251(define_insn_and_split "sub<mode>3" 1252 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1253 (minus:V_DI 1254 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v") 1255 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb"))) 1256 (clobber (reg:DI VCC_REG))] 1257 "" 1258 "#" 1259 "gcn_can_split_p (<MODE>mode, operands[0]) 1260 && gcn_can_split_p (<MODE>mode, operands[1]) 1261 && gcn_can_split_p (<MODE>mode, operands[2])" 1262 [(const_int 0)] 1263 { 1264 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1265 emit_insn (gen_sub<vnsi>3_vcc 1266 (gcn_operand_part (<MODE>mode, operands[0], 0), 1267 gcn_operand_part (<MODE>mode, operands[1], 0), 1268 gcn_operand_part (<MODE>mode, operands[2], 0), 1269 vcc)); 1270 emit_insn (gen_subc<vnsi>3 1271 (gcn_operand_part (<MODE>mode, operands[0], 1), 1272 gcn_operand_part (<MODE>mode, operands[1], 1), 1273 gcn_operand_part (<MODE>mode, operands[2], 1), 1274 vcc, vcc)); 1275 DONE; 1276 } 1277 [(set_attr "type" "vmult") 1278 (set_attr "length" "8")]) 1279 1280(define_insn_and_split "sub<mode>3_exec" 1281 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1282 (vec_merge:V_DI 1283 (minus:V_DI 1284 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v") 1285 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB")) 1286 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0") 1287 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) 1288 (clobber (reg:DI VCC_REG))] 1289 "register_operand (operands[1], VOIDmode) 1290 || register_operand (operands[2], VOIDmode)" 1291 "#" 1292 "gcn_can_split_p (<MODE>mode, operands[0]) 1293 && gcn_can_split_p (<MODE>mode, operands[1]) 1294 && gcn_can_split_p (<MODE>mode, operands[2]) 1295 && gcn_can_split_p (<MODE>mode, operands[3])" 1296 [(const_int 0)] 1297 { 1298 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1299 emit_insn (gen_sub<vnsi>3_vcc_exec 1300 (gcn_operand_part (<MODE>mode, operands[0], 0), 1301 gcn_operand_part (<MODE>mode, operands[1], 0), 1302 gcn_operand_part (<MODE>mode, operands[2], 0), 1303 vcc, 1304 gcn_operand_part (<MODE>mode, operands[3], 0), 1305 operands[4])); 1306 emit_insn (gen_subc<vnsi>3_exec 1307 (gcn_operand_part (<MODE>mode, operands[0], 1), 1308 gcn_operand_part (<MODE>mode, operands[1], 1), 1309 gcn_operand_part (<MODE>mode, operands[2], 1), 1310 vcc, vcc, 1311 gcn_operand_part (<MODE>mode, operands[3], 1), 1312 operands[4])); 1313 DONE; 1314 } 1315 [(set_attr "type" "vmult") 1316 (set_attr "length" "8")]) 1317 1318(define_insn_and_split "add<mode>3_zext" 1319 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1320 (plus:V_DI 1321 (zero_extend:V_DI 1322 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB")) 1323 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))) 1324 (clobber (reg:DI VCC_REG))] 1325 "" 1326 "#" 1327 "gcn_can_split_p (<MODE>mode, operands[0]) 1328 && gcn_can_split_p (<MODE>mode, operands[2])" 1329 [(const_int 0)] 1330 { 1331 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1332 emit_insn (gen_add<vnsi>3_vcc 1333 (gcn_operand_part (<MODE>mode, operands[0], 0), 1334 operands[1], 1335 gcn_operand_part (<MODE>mode, operands[2], 0), 1336 vcc)); 1337 emit_insn (gen_addc<vnsi>3 1338 (gcn_operand_part (<MODE>mode, operands[0], 1), 1339 gcn_operand_part (<MODE>mode, operands[2], 1), 1340 const0_rtx, vcc, vcc)); 1341 DONE; 1342 } 1343 [(set_attr "type" "vmult") 1344 (set_attr "length" "8")]) 1345 1346(define_insn_and_split "add<mode>3_zext_exec" 1347 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1348 (vec_merge:V_DI 1349 (plus:V_DI 1350 (zero_extend:V_DI 1351 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB")) 1352 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")) 1353 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0") 1354 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) 1355 (clobber (reg:DI VCC_REG))] 1356 "" 1357 "#" 1358 "gcn_can_split_p (<MODE>mode, operands[0]) 1359 && gcn_can_split_p (<MODE>mode, operands[2]) 1360 && gcn_can_split_p (<MODE>mode, operands[3])" 1361 [(const_int 0)] 1362 { 1363 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1364 emit_insn (gen_add<vnsi>3_vcc_exec 1365 (gcn_operand_part (<MODE>mode, operands[0], 0), 1366 operands[1], 1367 gcn_operand_part (<MODE>mode, operands[2], 0), 1368 vcc, 1369 gcn_operand_part (<MODE>mode, operands[3], 0), 1370 operands[4])); 1371 emit_insn (gen_addc<vnsi>3_exec 1372 (gcn_operand_part (<MODE>mode, operands[0], 1), 1373 gcn_operand_part (<MODE>mode, operands[2], 1), 1374 const0_rtx, vcc, vcc, 1375 gcn_operand_part (<MODE>mode, operands[3], 1), 1376 operands[4])); 1377 DONE; 1378 } 1379 [(set_attr "type" "vmult") 1380 (set_attr "length" "8")]) 1381 1382(define_insn_and_split "add<mode>3_vcc_zext_dup" 1383 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1384 (plus:V_DI 1385 (zero_extend:V_DI 1386 (vec_duplicate:<VnSI> 1387 (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv"))) 1388 (match_operand:V_DI 2 "gcn_alu_operand" " vDA, vDb"))) 1389 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV") 1390 (ltu:DI (plus:V_DI 1391 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) 1392 (match_dup 2)) 1393 (match_dup 1)))] 1394 "" 1395 "#" 1396 "gcn_can_split_p (<MODE>mode, operands[0]) 1397 && gcn_can_split_p (<MODE>mode, operands[2])" 1398 [(const_int 0)] 1399 { 1400 emit_insn (gen_add<vnsi>3_vcc_dup 1401 (gcn_operand_part (<MODE>mode, operands[0], 0), 1402 gcn_operand_part (DImode, operands[1], 0), 1403 gcn_operand_part (<MODE>mode, operands[2], 0), 1404 operands[3])); 1405 emit_insn (gen_addc<vnsi>3 1406 (gcn_operand_part (<MODE>mode, operands[0], 1), 1407 gcn_operand_part (<MODE>mode, operands[2], 1), 1408 const0_rtx, operands[3], operands[3])); 1409 DONE; 1410 } 1411 [(set_attr "type" "vmult") 1412 (set_attr "length" "8")]) 1413 1414(define_expand "add<mode>3_zext_dup" 1415 [(match_operand:V_DI 0 "register_operand") 1416 (match_operand:SI 1 "gcn_alu_operand") 1417 (match_operand:V_DI 2 "gcn_alu_operand")] 1418 "" 1419 { 1420 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1421 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1], 1422 operands[2], vcc)); 1423 DONE; 1424 }) 1425 1426(define_insn_and_split "add<mode>3_vcc_zext_dup_exec" 1427 [(set (match_operand:V_DI 0 "register_operand" "= v, v") 1428 (vec_merge:V_DI 1429 (plus:V_DI 1430 (zero_extend:V_DI 1431 (vec_duplicate:<VnSI> 1432 (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv"))) 1433 (match_operand:V_DI 2 "gcn_alu_operand" " vDb, vDA")) 1434 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0, U0") 1435 (match_operand:DI 5 "gcn_exec_reg_operand" " e, e"))) 1436 (set (match_operand:DI 3 "register_operand" "=&SgcV,&SgcV") 1437 (and:DI 1438 (ltu:DI (plus:V_DI 1439 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) 1440 (match_dup 2)) 1441 (match_dup 1)) 1442 (match_dup 5)))] 1443 "" 1444 "#" 1445 "gcn_can_split_p (<MODE>mode, operands[0]) 1446 && gcn_can_split_p (<MODE>mode, operands[2]) 1447 && gcn_can_split_p (<MODE>mode, operands[4])" 1448 [(const_int 0)] 1449 { 1450 emit_insn (gen_add<vnsi>3_vcc_dup_exec 1451 (gcn_operand_part (<MODE>mode, operands[0], 0), 1452 gcn_operand_part (DImode, operands[1], 0), 1453 gcn_operand_part (<MODE>mode, operands[2], 0), 1454 operands[3], 1455 gcn_operand_part (<MODE>mode, operands[4], 0), 1456 operands[5])); 1457 emit_insn (gen_addc<vnsi>3_exec 1458 (gcn_operand_part (<MODE>mode, operands[0], 1), 1459 gcn_operand_part (<MODE>mode, operands[2], 1), 1460 const0_rtx, operands[3], operands[3], 1461 gcn_operand_part (<MODE>mode, operands[4], 1), 1462 operands[5])); 1463 DONE; 1464 } 1465 [(set_attr "type" "vmult") 1466 (set_attr "length" "8")]) 1467 1468(define_expand "add<mode>3_zext_dup_exec" 1469 [(match_operand:V_DI 0 "register_operand") 1470 (match_operand:SI 1 "gcn_alu_operand") 1471 (match_operand:V_DI 2 "gcn_alu_operand") 1472 (match_operand:V_DI 3 "gcn_register_or_unspec_operand") 1473 (match_operand:DI 4 "gcn_exec_reg_operand")] 1474 "" 1475 { 1476 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1477 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1], 1478 operands[2], vcc, operands[3], 1479 operands[4])); 1480 DONE; 1481 }) 1482 1483(define_insn_and_split "add<mode>3_vcc_zext_dup2" 1484 [(set (match_operand:V_DI 0 "register_operand" "= v") 1485 (plus:V_DI 1486 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA")) 1487 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv")))) 1488 (set (match_operand:DI 3 "register_operand" "=&SgcV") 1489 (ltu:DI (plus:V_DI 1490 (zero_extend:V_DI (match_dup 1)) 1491 (vec_duplicate:V_DI (match_dup 2))) 1492 (match_dup 1)))] 1493 "" 1494 "#" 1495 "gcn_can_split_p (<MODE>mode, operands[0])" 1496 [(const_int 0)] 1497 { 1498 emit_insn (gen_add<vnsi>3_vcc_dup 1499 (gcn_operand_part (<MODE>mode, operands[0], 0), 1500 gcn_operand_part (DImode, operands[2], 0), 1501 operands[1], 1502 operands[3])); 1503 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); 1504 emit_insn (gen_vec_duplicate<vnsi> 1505 (dsthi, gcn_operand_part (DImode, operands[2], 1))); 1506 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3], 1507 operands[3])); 1508 DONE; 1509 } 1510 [(set_attr "type" "vmult") 1511 (set_attr "length" "8")]) 1512 1513(define_expand "add<mode>3_zext_dup2" 1514 [(match_operand:V_DI 0 "register_operand") 1515 (match_operand:<VnSI> 1 "gcn_alu_operand") 1516 (match_operand:DI 2 "gcn_alu_operand")] 1517 "" 1518 { 1519 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1520 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1], 1521 operands[2], vcc)); 1522 DONE; 1523 }) 1524 1525(define_insn_and_split "add<mode>3_vcc_zext_dup2_exec" 1526 [(set (match_operand:V_DI 0 "register_operand" "= v") 1527 (vec_merge:V_DI 1528 (plus:V_DI 1529 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA")) 1530 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))) 1531 (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0") 1532 (match_operand:DI 5 "gcn_exec_reg_operand" " e"))) 1533 (set (match_operand:DI 3 "register_operand" "=&SgcV") 1534 (and:DI 1535 (ltu:DI (plus:V_DI 1536 (zero_extend:V_DI (match_dup 1)) 1537 (vec_duplicate:V_DI (match_dup 2))) 1538 (match_dup 1)) 1539 (match_dup 5)))] 1540 "" 1541 "#" 1542 "gcn_can_split_p (<MODE>mode, operands[0]) 1543 && gcn_can_split_p (<MODE>mode, operands[4])" 1544 [(const_int 0)] 1545 { 1546 emit_insn (gen_add<vnsi>3_vcc_dup_exec 1547 (gcn_operand_part (<MODE>mode, operands[0], 0), 1548 gcn_operand_part (DImode, operands[2], 0), 1549 operands[1], 1550 operands[3], 1551 gcn_operand_part (<MODE>mode, operands[4], 0), 1552 operands[5])); 1553 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); 1554 emit_insn (gen_vec_duplicate<vnsi>_exec 1555 (dsthi, gcn_operand_part (DImode, operands[2], 1), 1556 gcn_operand_part (<MODE>mode, operands[4], 1), 1557 operands[5])); 1558 emit_insn (gen_addc<vnsi>3_exec 1559 (dsthi, dsthi, const0_rtx, operands[3], operands[3], 1560 gcn_operand_part (<MODE>mode, operands[4], 1), 1561 operands[5])); 1562 DONE; 1563 } 1564 [(set_attr "type" "vmult") 1565 (set_attr "length" "8")]) 1566 1567(define_expand "add<mode>3_zext_dup2_exec" 1568 [(match_operand:V_DI 0 "register_operand") 1569 (match_operand:<VnSI> 1 "gcn_alu_operand") 1570 (match_operand:DI 2 "gcn_alu_operand") 1571 (match_operand:V_DI 3 "gcn_register_or_unspec_operand") 1572 (match_operand:DI 4 "gcn_exec_reg_operand")] 1573 "" 1574 { 1575 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1576 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1], 1577 operands[2], vcc, 1578 operands[3], operands[4])); 1579 DONE; 1580 }) 1581 1582(define_insn_and_split "add<mode>3_sext_dup2" 1583 [(set (match_operand:V_DI 0 "register_operand" "= v") 1584 (plus:V_DI 1585 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA")) 1586 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))) 1587 (clobber (match_scratch:<VnSI> 3 "=&v")) 1588 (clobber (reg:DI VCC_REG))] 1589 "" 1590 "#" 1591 "gcn_can_split_p (<MODE>mode, operands[0])" 1592 [(const_int 0)] 1593 { 1594 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1595 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31))); 1596 emit_insn (gen_add<vnsi>3_vcc_dup 1597 (gcn_operand_part (<MODE>mode, operands[0], 0), 1598 gcn_operand_part (DImode, operands[2], 0), 1599 operands[1], 1600 vcc)); 1601 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); 1602 emit_insn (gen_vec_duplicate<vnsi> 1603 (dsthi, gcn_operand_part (DImode, operands[2], 1))); 1604 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc)); 1605 DONE; 1606 } 1607 [(set_attr "type" "vmult") 1608 (set_attr "length" "8")]) 1609 1610(define_insn_and_split "add<mode>3_sext_dup2_exec" 1611 [(set (match_operand:V_DI 0 "register_operand" "= v") 1612 (vec_merge:V_DI 1613 (plus:V_DI 1614 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA")) 1615 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))) 1616 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1617 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1618 (clobber (match_scratch:<VnSI> 5 "=&v")) 1619 (clobber (reg:DI VCC_REG))] 1620 "" 1621 "#" 1622 "gcn_can_split_p (<MODE>mode, operands[0]) 1623 && gcn_can_split_p (<MODE>mode, operands[3])" 1624 [(const_int 0)] 1625 { 1626 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1627 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31), 1628 gcn_gen_undef (<VnSI>mode), operands[4])); 1629 emit_insn (gen_add<vnsi>3_vcc_dup_exec 1630 (gcn_operand_part (<MODE>mode, operands[0], 0), 1631 gcn_operand_part (DImode, operands[2], 0), 1632 operands[1], 1633 vcc, 1634 gcn_operand_part (<MODE>mode, operands[3], 0), 1635 operands[4])); 1636 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); 1637 emit_insn (gen_vec_duplicate<vnsi>_exec 1638 (dsthi, gcn_operand_part (DImode, operands[2], 1), 1639 gcn_operand_part (<MODE>mode, operands[3], 1), 1640 operands[4])); 1641 emit_insn (gen_addc<vnsi>3_exec 1642 (dsthi, dsthi, operands[5], vcc, vcc, 1643 gcn_operand_part (<MODE>mode, operands[3], 1), 1644 operands[4])); 1645 DONE; 1646 } 1647 [(set_attr "type" "vmult") 1648 (set_attr "length" "8")]) 1649 1650;; }}} 1651;; {{{ DS memory ALU: add/sub 1652 1653(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI]) 1654(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI]) 1655 1656;; FIXME: the vector patterns probably need RD expanded to a vector of 1657;; addresses. For now, the only way a vector can get into LDS is 1658;; if the user puts it there manually. 1659;; 1660;; FIXME: the scalar patterns are probably fine in themselves, but need to be 1661;; checked to see if anything can ever use them. 1662 1663(define_insn "add<mode>3_ds<exec>" 1664 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") 1665 (plus:DS_ARITH_MODE 1666 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD") 1667 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] 1668 "rtx_equal_p (operands[0], operands[1])" 1669 "ds_add%u0\t%A0, %2%O0" 1670 [(set_attr "type" "ds") 1671 (set_attr "length" "8")]) 1672 1673(define_insn "add<mode>3_ds_scalar" 1674 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") 1675 (plus:DS_ARITH_SCALAR_MODE 1676 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 1677 "%RD") 1678 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] 1679 "rtx_equal_p (operands[0], operands[1])" 1680 "ds_add%u0\t%A0, %2%O0" 1681 [(set_attr "type" "ds") 1682 (set_attr "length" "8")]) 1683 1684(define_insn "sub<mode>3_ds<exec>" 1685 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") 1686 (minus:DS_ARITH_MODE 1687 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD") 1688 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] 1689 "rtx_equal_p (operands[0], operands[1])" 1690 "ds_sub%u0\t%A0, %2%O0" 1691 [(set_attr "type" "ds") 1692 (set_attr "length" "8")]) 1693 1694(define_insn "sub<mode>3_ds_scalar" 1695 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") 1696 (minus:DS_ARITH_SCALAR_MODE 1697 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 1698 " RD") 1699 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] 1700 "rtx_equal_p (operands[0], operands[1])" 1701 "ds_sub%u0\t%A0, %2%O0" 1702 [(set_attr "type" "ds") 1703 (set_attr "length" "8")]) 1704 1705(define_insn "subr<mode>3_ds<exec>" 1706 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") 1707 (minus:DS_ARITH_MODE 1708 (match_operand:DS_ARITH_MODE 2 "register_operand" " v") 1709 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))] 1710 "rtx_equal_p (operands[0], operands[1])" 1711 "ds_rsub%u0\t%A0, %2%O0" 1712 [(set_attr "type" "ds") 1713 (set_attr "length" "8")]) 1714 1715(define_insn "subr<mode>3_ds_scalar" 1716 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") 1717 (minus:DS_ARITH_SCALAR_MODE 1718 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v") 1719 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 1720 " RD")))] 1721 "rtx_equal_p (operands[0], operands[1])" 1722 "ds_rsub%u0\t%A0, %2%O0" 1723 [(set_attr "type" "ds") 1724 (set_attr "length" "8")]) 1725 1726;; }}} 1727;; {{{ ALU special case: mult 1728 1729(define_insn "<su>mul<mode>3_highpart<exec>" 1730 [(set (match_operand:V_SI 0 "register_operand" "= v") 1731 (truncate:V_SI 1732 (lshiftrt:<VnDI> 1733 (mult:<VnDI> 1734 (any_extend:<VnDI> 1735 (match_operand:V_SI 1 "gcn_alu_operand" " %v")) 1736 (any_extend:<VnDI> 1737 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA"))) 1738 (const_int 32))))] 1739 "" 1740 "v_mul_hi<sgnsuffix>0\t%0, %2, %1" 1741 [(set_attr "type" "vop3a") 1742 (set_attr "length" "8")]) 1743 1744(define_insn "mul<mode>3<exec>" 1745 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") 1746 (mult:V_INT_1REG 1747 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") 1748 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))] 1749 "" 1750 "v_mul_lo_u32\t%0, %1, %2" 1751 [(set_attr "type" "vop3a") 1752 (set_attr "length" "8")]) 1753 1754(define_insn "mul<mode>3_dup<exec>" 1755 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") 1756 (mult:V_INT_1REG 1757 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") 1758 (vec_duplicate:V_INT_1REG 1759 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))] 1760 "" 1761 "v_mul_lo_u32\t%0, %1, %2" 1762 [(set_attr "type" "vop3a") 1763 (set_attr "length" "8")]) 1764 1765(define_insn_and_split "mul<mode>3" 1766 [(set (match_operand:V_DI 0 "register_operand" "=&v") 1767 (mult:V_DI 1768 (match_operand:V_DI 1 "gcn_alu_operand" "% v") 1769 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) 1770 (clobber (match_scratch:<VnSI> 3 "=&v"))] 1771 "" 1772 "#" 1773 "reload_completed" 1774 [(const_int 0)] 1775 { 1776 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1777 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1778 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0); 1779 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1); 1780 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1781 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1782 rtx tmp = operands[3]; 1783 1784 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo)); 1785 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo)); 1786 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo)); 1787 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1788 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi)); 1789 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1790 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi)); 1791 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1792 DONE; 1793 }) 1794 1795(define_insn_and_split "mul<mode>3_exec" 1796 [(set (match_operand:V_DI 0 "register_operand" "=&v") 1797 (vec_merge:V_DI 1798 (mult:V_DI 1799 (match_operand:V_DI 1 "gcn_alu_operand" "% v") 1800 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) 1801 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1802 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1803 (clobber (match_scratch:<VnSI> 5 "=&v"))] 1804 "" 1805 "#" 1806 "reload_completed" 1807 [(const_int 0)] 1808 { 1809 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1810 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1811 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0); 1812 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1); 1813 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1814 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1815 rtx exec = operands[4]; 1816 rtx tmp = operands[5]; 1817 1818 rtx old_lo, old_hi; 1819 if (GET_CODE (operands[3]) == UNSPEC) 1820 { 1821 old_lo = old_hi = gcn_gen_undef (<VnSI>mode); 1822 } 1823 else 1824 { 1825 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); 1826 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); 1827 } 1828 1829 rtx undef = gcn_gen_undef (<VnSI>mode); 1830 1831 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec)); 1832 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo, 1833 old_hi, exec)); 1834 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec)); 1835 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1836 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec)); 1837 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1838 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec)); 1839 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1840 DONE; 1841 }) 1842 1843(define_insn_and_split "mul<mode>3_zext" 1844 [(set (match_operand:V_DI 0 "register_operand" "=&v") 1845 (mult:V_DI 1846 (zero_extend:V_DI 1847 (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) 1848 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) 1849 (clobber (match_scratch:<VnSI> 3 "=&v"))] 1850 "" 1851 "#" 1852 "reload_completed" 1853 [(const_int 0)] 1854 { 1855 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1856 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1857 rtx left = operands[1]; 1858 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1859 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1860 rtx tmp = operands[3]; 1861 1862 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo)); 1863 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo)); 1864 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi)); 1865 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1866 DONE; 1867 }) 1868 1869(define_insn_and_split "mul<mode>3_zext_exec" 1870 [(set (match_operand:V_DI 0 "register_operand" "=&v") 1871 (vec_merge:V_DI 1872 (mult:V_DI 1873 (zero_extend:V_DI 1874 (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) 1875 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) 1876 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1877 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1878 (clobber (match_scratch:<VnSI> 5 "=&v"))] 1879 "" 1880 "#" 1881 "reload_completed" 1882 [(const_int 0)] 1883 { 1884 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1885 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1886 rtx left = operands[1]; 1887 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1888 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1889 rtx exec = operands[4]; 1890 rtx tmp = operands[5]; 1891 1892 rtx old_lo, old_hi; 1893 if (GET_CODE (operands[3]) == UNSPEC) 1894 { 1895 old_lo = old_hi = gcn_gen_undef (<VnSI>mode); 1896 } 1897 else 1898 { 1899 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); 1900 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); 1901 } 1902 1903 rtx undef = gcn_gen_undef (<VnSI>mode); 1904 1905 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec)); 1906 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo, 1907 old_hi, exec)); 1908 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec)); 1909 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1910 DONE; 1911 }) 1912 1913(define_insn_and_split "mul<mode>3_zext_dup2" 1914 [(set (match_operand:V_DI 0 "register_operand" "= &v") 1915 (mult:V_DI 1916 (zero_extend:V_DI 1917 (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) 1918 (vec_duplicate:V_DI 1919 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))) 1920 (clobber (match_scratch:<VnSI> 3 "= &v"))] 1921 "" 1922 "#" 1923 "reload_completed" 1924 [(const_int 0)] 1925 { 1926 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1927 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1928 rtx left = operands[1]; 1929 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1930 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1931 rtx tmp = operands[3]; 1932 1933 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo)); 1934 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo)); 1935 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi)); 1936 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); 1937 DONE; 1938 }) 1939 1940(define_insn_and_split "mul<mode>3_zext_dup2_exec" 1941 [(set (match_operand:V_DI 0 "register_operand" "= &v") 1942 (vec_merge:V_DI 1943 (mult:V_DI 1944 (zero_extend:V_DI 1945 (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) 1946 (vec_duplicate:V_DI 1947 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))) 1948 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") 1949 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1950 (clobber (match_scratch:<VnSI> 5 "= &v"))] 1951 "" 1952 "#" 1953 "reload_completed" 1954 [(const_int 0)] 1955 { 1956 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); 1957 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); 1958 rtx left = operands[1]; 1959 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); 1960 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); 1961 rtx exec = operands[4]; 1962 rtx tmp = operands[5]; 1963 1964 rtx old_lo, old_hi; 1965 if (GET_CODE (operands[3]) == UNSPEC) 1966 { 1967 old_lo = old_hi = gcn_gen_undef (<VnSI>mode); 1968 } 1969 else 1970 { 1971 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); 1972 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); 1973 } 1974 1975 rtx undef = gcn_gen_undef (<VnSI>mode); 1976 1977 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec)); 1978 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo, 1979 old_hi, exec)); 1980 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec)); 1981 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1982 DONE; 1983 }) 1984 1985;; }}} 1986;; {{{ ALU generic case 1987 1988(define_code_iterator bitop [and ior xor]) 1989(define_code_iterator shiftop [ashift lshiftrt ashiftrt]) 1990(define_code_iterator minmaxop [smin smax umin umax]) 1991 1992(define_insn "<expander><mode>2<exec>" 1993 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v") 1994 (bitunop:V_INT_1REG 1995 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))] 1996 "" 1997 "v_<mnemonic>0\t%0, %1" 1998 [(set_attr "type" "vop1") 1999 (set_attr "length" "8")]) 2000 2001(define_insn "<expander><mode>3<exec>" 2002 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD") 2003 (bitop:V_INT_1REG 2004 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0") 2005 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))] 2006 "" 2007 "@ 2008 v_<mnemonic>0\t%0, %2, %1 2009 ds_<mnemonic>0\t%A0, %2%O0" 2010 [(set_attr "type" "vop2,ds") 2011 (set_attr "length" "8,8")]) 2012 2013(define_insn_and_split "<expander><mode>3" 2014 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD") 2015 (bitop:V_DI 2016 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD") 2017 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))] 2018 "" 2019 "@ 2020 # 2021 ds_<mnemonic>0\t%A0, %2%O0" 2022 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))" 2023 [(set (match_dup 3) 2024 (bitop:<VnSI> (match_dup 5) (match_dup 7))) 2025 (set (match_dup 4) 2026 (bitop:<VnSI> (match_dup 6) (match_dup 8)))] 2027 { 2028 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0); 2029 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1); 2030 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0); 2031 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1); 2032 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0); 2033 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1); 2034 } 2035 [(set_attr "type" "vmult,ds") 2036 (set_attr "length" "16,8")]) 2037 2038(define_insn_and_split "<expander><mode>3_exec" 2039 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD") 2040 (vec_merge:V_DI 2041 (bitop:V_DI 2042 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD") 2043 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")) 2044 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0") 2045 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))] 2046 "!memory_operand (operands[0], VOIDmode) 2047 || (rtx_equal_p (operands[0], operands[1]) 2048 && register_operand (operands[2], VOIDmode))" 2049 "@ 2050 # 2051 ds_<mnemonic>0\t%A0, %2%O0" 2052 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))" 2053 [(set (match_dup 5) 2054 (vec_merge:<VnSI> 2055 (bitop:<VnSI> (match_dup 7) (match_dup 9)) 2056 (match_dup 11) 2057 (match_dup 4))) 2058 (set (match_dup 6) 2059 (vec_merge:<VnSI> 2060 (bitop:<VnSI> (match_dup 8) (match_dup 10)) 2061 (match_dup 12) 2062 (match_dup 4)))] 2063 { 2064 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0); 2065 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1); 2066 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0); 2067 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1); 2068 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0); 2069 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1); 2070 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0); 2071 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1); 2072 } 2073 [(set_attr "type" "vmult,ds") 2074 (set_attr "length" "16,8")]) 2075 2076(define_expand "<expander><mode>3" 2077 [(set (match_operand:V_QIHI 0 "register_operand" "= v") 2078 (shiftop:V_QIHI 2079 (match_operand:V_QIHI 1 "gcn_alu_operand" " v") 2080 (vec_duplicate:V_QIHI 2081 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 2082 "" 2083 { 2084 enum {ashift, lshiftrt, ashiftrt}; 2085 bool unsignedp = (<code> == lshiftrt); 2086 rtx insi1 = gen_reg_rtx (<VnSI>mode); 2087 rtx insi2 = gen_reg_rtx (SImode); 2088 rtx outsi = gen_reg_rtx (<VnSI>mode); 2089 2090 convert_move (insi1, operands[1], unsignedp); 2091 convert_move (insi2, operands[2], unsignedp); 2092 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2)); 2093 convert_move (operands[0], outsi, unsignedp); 2094 DONE; 2095 }) 2096 2097(define_insn "<expander><mode>3<exec>" 2098 [(set (match_operand:V_SI 0 "register_operand" "= v") 2099 (shiftop:V_SI 2100 (match_operand:V_SI 1 "gcn_alu_operand" " v") 2101 (vec_duplicate:V_SI 2102 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 2103 "" 2104 "v_<revmnemonic>0\t%0, %2, %1" 2105 [(set_attr "type" "vop2") 2106 (set_attr "length" "8")]) 2107 2108(define_expand "v<expander><mode>3" 2109 [(set (match_operand:V_QIHI 0 "register_operand" "=v") 2110 (shiftop:V_QIHI 2111 (match_operand:V_QIHI 1 "gcn_alu_operand" " v") 2112 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))] 2113 "" 2114 { 2115 enum {ashift, lshiftrt, ashiftrt}; 2116 bool unsignedp = (<code> == lshiftrt); 2117 rtx insi1 = gen_reg_rtx (<VnSI>mode); 2118 rtx insi2 = gen_reg_rtx (<VnSI>mode); 2119 rtx outsi = gen_reg_rtx (<VnSI>mode); 2120 2121 convert_move (insi1, operands[1], unsignedp); 2122 convert_move (insi2, operands[2], unsignedp); 2123 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2)); 2124 convert_move (operands[0], outsi, unsignedp); 2125 DONE; 2126 }) 2127 2128(define_insn "v<expander><mode>3<exec>" 2129 [(set (match_operand:V_SI 0 "register_operand" "=v") 2130 (shiftop:V_SI 2131 (match_operand:V_SI 1 "gcn_alu_operand" " v") 2132 (match_operand:V_SI 2 "gcn_alu_operand" "vB")))] 2133 "" 2134 "v_<revmnemonic>0\t%0, %2, %1" 2135 [(set_attr "type" "vop2") 2136 (set_attr "length" "8")]) 2137 2138(define_expand "<expander><mode>3" 2139 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand") 2140 (minmaxop:V_QIHI 2141 (match_operand:V_QIHI 1 "gcn_valu_src0_operand") 2142 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))] 2143 "" 2144 { 2145 enum {smin, umin, smax, umax}; 2146 bool unsignedp = (<code> == umax || <code> == umin); 2147 rtx insi1 = gen_reg_rtx (<VnSI>mode); 2148 rtx insi2 = gen_reg_rtx (<VnSI>mode); 2149 rtx outsi = gen_reg_rtx (<VnSI>mode); 2150 2151 convert_move (insi1, operands[1], unsignedp); 2152 convert_move (insi2, operands[2], unsignedp); 2153 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2)); 2154 convert_move (operands[0], outsi, unsignedp); 2155 DONE; 2156 }) 2157 2158(define_insn "<expander><vnsi>3<exec>" 2159 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD") 2160 (minmaxop:V_SI 2161 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0") 2162 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))] 2163 "" 2164 "@ 2165 v_<mnemonic>0\t%0, %2, %1 2166 ds_<mnemonic>0\t%A0, %2%O0" 2167 [(set_attr "type" "vop2,ds") 2168 (set_attr "length" "8,8")]) 2169 2170;; }}} 2171;; {{{ FP binops - special cases 2172 2173; GCN does not directly provide a DFmode subtract instruction, so we do it by 2174; adding the negated second operand to the first. 2175 2176(define_insn "sub<mode>3<exec>" 2177 [(set (match_operand:V_DF 0 "register_operand" "= v, v") 2178 (minus:V_DF 2179 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v") 2180 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))] 2181 "" 2182 "@ 2183 v_add_f64\t%0, %1, -%2 2184 v_add_f64\t%0, -%2, %1" 2185 [(set_attr "type" "vop3a") 2186 (set_attr "length" "8,8")]) 2187 2188(define_insn "subdf3" 2189 [(set (match_operand:DF 0 "register_operand" "= v, v") 2190 (minus:DF 2191 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v") 2192 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))] 2193 "" 2194 "@ 2195 v_add_f64\t%0, %1, -%2 2196 v_add_f64\t%0, -%2, %1" 2197 [(set_attr "type" "vop3a") 2198 (set_attr "length" "8,8")]) 2199 2200;; }}} 2201;; {{{ FP binops - generic 2202 2203(define_code_iterator comm_fp [plus mult smin smax]) 2204(define_code_iterator nocomm_fp [minus]) 2205(define_code_iterator all_fp [plus mult minus smin smax]) 2206 2207(define_insn "<expander><mode>3<exec>" 2208 [(set (match_operand:V_FP 0 "register_operand" "= v") 2209 (comm_fp:V_FP 2210 (match_operand:V_FP 1 "gcn_alu_operand" "% v") 2211 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))] 2212 "" 2213 "v_<mnemonic>0\t%0, %2, %1" 2214 [(set_attr "type" "vop2") 2215 (set_attr "length" "8")]) 2216 2217(define_insn "<expander><mode>3" 2218 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL") 2219 (comm_fp:FP 2220 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0") 2221 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))] 2222 "" 2223 "@ 2224 v_<mnemonic>0\t%0, %2, %1 2225 v_<mnemonic>0\t%0, %1%O0" 2226 [(set_attr "type" "vop2,ds") 2227 (set_attr "length" "8")]) 2228 2229(define_insn "<expander><mode>3<exec>" 2230 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v") 2231 (nocomm_fp:V_FP_1REG 2232 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v") 2233 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))] 2234 "" 2235 "@ 2236 v_<mnemonic>0\t%0, %1, %2 2237 v_<revmnemonic>0\t%0, %2, %1" 2238 [(set_attr "type" "vop2") 2239 (set_attr "length" "8,8")]) 2240 2241(define_insn "<expander><mode>3" 2242 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v") 2243 (nocomm_fp:FP_1REG 2244 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v") 2245 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))] 2246 "" 2247 "@ 2248 v_<mnemonic>0\t%0, %1, %2 2249 v_<revmnemonic>0\t%0, %2, %1" 2250 [(set_attr "type" "vop2") 2251 (set_attr "length" "8,8")]) 2252 2253;; }}} 2254;; {{{ FP unops 2255 2256(define_insn "abs<mode>2" 2257 [(set (match_operand:FP 0 "register_operand" "=v") 2258 (abs:FP (match_operand:FP 1 "register_operand" " v")))] 2259 "" 2260 "v_add%i0\t%0, 0, |%1|" 2261 [(set_attr "type" "vop3a") 2262 (set_attr "length" "8")]) 2263 2264(define_insn "abs<mode>2<exec>" 2265 [(set (match_operand:V_FP 0 "register_operand" "=v") 2266 (abs:V_FP 2267 (match_operand:V_FP 1 "register_operand" " v")))] 2268 "" 2269 "v_add%i0\t%0, 0, |%1|" 2270 [(set_attr "type" "vop3a") 2271 (set_attr "length" "8")]) 2272 2273(define_insn "neg<mode>2<exec>" 2274 [(set (match_operand:V_FP 0 "register_operand" "=v") 2275 (neg:V_FP 2276 (match_operand:V_FP 1 "register_operand" " v")))] 2277 "" 2278 "v_add%i0\t%0, 0, -%1" 2279 [(set_attr "type" "vop3a") 2280 (set_attr "length" "8")]) 2281 2282(define_insn "sqrt<mode>2<exec>" 2283 [(set (match_operand:V_FP 0 "register_operand" "= v") 2284 (sqrt:V_FP 2285 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))] 2286 "flag_unsafe_math_optimizations" 2287 "v_sqrt%i0\t%0, %1" 2288 [(set_attr "type" "vop1") 2289 (set_attr "length" "8")]) 2290 2291(define_insn "sqrt<mode>2" 2292 [(set (match_operand:FP 0 "register_operand" "= v") 2293 (sqrt:FP 2294 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))] 2295 "flag_unsafe_math_optimizations" 2296 "v_sqrt%i0\t%0, %1" 2297 [(set_attr "type" "vop1") 2298 (set_attr "length" "8")]) 2299 2300;; }}} 2301;; {{{ FP fused multiply and add 2302 2303(define_insn "fma<mode>4<exec>" 2304 [(set (match_operand:V_FP 0 "register_operand" "= v, v") 2305 (fma:V_FP 2306 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA") 2307 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA") 2308 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))] 2309 "" 2310 "v_fma%i0\t%0, %1, %2, %3" 2311 [(set_attr "type" "vop3a") 2312 (set_attr "length" "8")]) 2313 2314(define_insn "fma<mode>4_negop2<exec>" 2315 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v") 2316 (fma:V_FP 2317 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA") 2318 (neg:V_FP 2319 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA")) 2320 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))] 2321 "" 2322 "v_fma%i0\t%0, %1, -%2, %3" 2323 [(set_attr "type" "vop3a") 2324 (set_attr "length" "8")]) 2325 2326(define_insn "fma<mode>4" 2327 [(set (match_operand:FP 0 "register_operand" "= v, v") 2328 (fma:FP 2329 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA") 2330 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA") 2331 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))] 2332 "" 2333 "v_fma%i0\t%0, %1, %2, %3" 2334 [(set_attr "type" "vop3a") 2335 (set_attr "length" "8")]) 2336 2337(define_insn "fma<mode>4_negop2" 2338 [(set (match_operand:FP 0 "register_operand" "= v, v, v") 2339 (fma:FP 2340 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA") 2341 (neg:FP 2342 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA")) 2343 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))] 2344 "" 2345 "v_fma%i0\t%0, %1, -%2, %3" 2346 [(set_attr "type" "vop3a") 2347 (set_attr "length" "8")]) 2348 2349;; }}} 2350;; {{{ FP division 2351 2352(define_insn "recip<mode>2<exec>" 2353 [(set (match_operand:V_FP 0 "register_operand" "= v") 2354 (unspec:V_FP 2355 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")] 2356 UNSPEC_RCP))] 2357 "" 2358 "v_rcp%i0\t%0, %1" 2359 [(set_attr "type" "vop1") 2360 (set_attr "length" "8")]) 2361 2362(define_insn "recip<mode>2" 2363 [(set (match_operand:FP 0 "register_operand" "= v") 2364 (unspec:FP 2365 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")] 2366 UNSPEC_RCP))] 2367 "" 2368 "v_rcp%i0\t%0, %1" 2369 [(set_attr "type" "vop1") 2370 (set_attr "length" "8")]) 2371 2372;; Do division via a = b * 1/c 2373;; The v_rcp_* instructions are not sufficiently accurate on their own, 2374;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson 2375;; which the ISA manual says is enough to improve the reciprocal accuracy. 2376;; 2377;; FIXME: This does not handle denormals, NaNs, division-by-zero etc. 2378 2379(define_expand "div<mode>3" 2380 [(match_operand:V_FP 0 "gcn_valu_dst_operand") 2381 (match_operand:V_FP 1 "gcn_valu_src0_operand") 2382 (match_operand:V_FP 2 "gcn_valu_src0_operand")] 2383 "flag_reciprocal_math" 2384 { 2385 rtx one = gcn_vec_constant (<MODE>mode, 2386 const_double_from_real_value (dconst1, <SCALAR_MODE>mode)); 2387 rtx initrcp = gen_reg_rtx (<MODE>mode); 2388 rtx fma = gen_reg_rtx (<MODE>mode); 2389 rtx rcp; 2390 rtx num = operands[1], denom = operands[2]; 2391 2392 bool is_rcp = (GET_CODE (num) == CONST_VECTOR 2393 && real_identical 2394 (CONST_DOUBLE_REAL_VALUE 2395 (CONST_VECTOR_ELT (num, 0)), &dconstm1)); 2396 2397 if (is_rcp) 2398 rcp = operands[0]; 2399 else 2400 rcp = gen_reg_rtx (<MODE>mode); 2401 2402 emit_insn (gen_recip<mode>2 (initrcp, denom)); 2403 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one)); 2404 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp)); 2405 2406 if (!is_rcp) 2407 { 2408 rtx div_est = gen_reg_rtx (<MODE>mode); 2409 rtx fma2 = gen_reg_rtx (<MODE>mode); 2410 rtx fma3 = gen_reg_rtx (<MODE>mode); 2411 rtx fma4 = gen_reg_rtx (<MODE>mode); 2412 emit_insn (gen_mul<mode>3 (div_est, num, rcp)); 2413 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num)); 2414 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est)); 2415 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num)); 2416 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3)); 2417 } 2418 2419 DONE; 2420 }) 2421 2422(define_expand "div<mode>3" 2423 [(match_operand:FP 0 "gcn_valu_dst_operand") 2424 (match_operand:FP 1 "gcn_valu_src0_operand") 2425 (match_operand:FP 2 "gcn_valu_src0_operand")] 2426 "flag_reciprocal_math" 2427 { 2428 rtx one = const_double_from_real_value (dconst1, <MODE>mode); 2429 rtx initrcp = gen_reg_rtx (<MODE>mode); 2430 rtx fma = gen_reg_rtx (<MODE>mode); 2431 rtx rcp; 2432 rtx num = operands[1], denom = operands[2]; 2433 2434 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE 2435 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]), 2436 &dconstm1)); 2437 2438 if (is_rcp) 2439 rcp = operands[0]; 2440 else 2441 rcp = gen_reg_rtx (<MODE>mode); 2442 2443 emit_insn (gen_recip<mode>2 (initrcp, denom)); 2444 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, denom, one)); 2445 emit_insn (gen_fma<mode>4 (rcp, fma, initrcp, initrcp)); 2446 2447 if (!is_rcp) 2448 { 2449 rtx div_est = gen_reg_rtx (<MODE>mode); 2450 rtx fma2 = gen_reg_rtx (<MODE>mode); 2451 rtx fma3 = gen_reg_rtx (<MODE>mode); 2452 rtx fma4 = gen_reg_rtx (<MODE>mode); 2453 emit_insn (gen_mul<mode>3 (div_est, num, rcp)); 2454 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, denom, num)); 2455 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est)); 2456 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, denom, num)); 2457 emit_insn (gen_fma<mode>4 (operands[0], fma4, rcp, fma3)); 2458 } 2459 2460 DONE; 2461 }) 2462 2463;; }}} 2464;; {{{ Int/FP conversions 2465 2466(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF]) 2467(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF]) 2468 2469(define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF]) 2470(define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF]) 2471(define_mode_iterator VCVT_IMODE [V64HI V64SI]) 2472 2473(define_code_iterator cvt_op [fix unsigned_fix 2474 float unsigned_float 2475 float_extend float_truncate]) 2476(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc") 2477 (float "float") (unsigned_float "floatuns") 2478 (float_extend "extend") (float_truncate "trunc")]) 2479(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1") 2480 (float "%i0%i1") (unsigned_float "%i0%u1") 2481 (float_extend "%i0%i1") 2482 (float_truncate "%i0%i1")]) 2483 2484(define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2" 2485 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v") 2486 (cvt_op:CVT_TO_MODE 2487 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))] 2488 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode, 2489 <cvt_name>_cvt)" 2490 "v_cvt<cvt_operands>\t%0, %1" 2491 [(set_attr "type" "vop1") 2492 (set_attr "length" "8")]) 2493 2494(define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>" 2495 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v") 2496 (cvt_op:VCVT_FMODE 2497 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))] 2498 "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode, 2499 <cvt_name>_cvt)" 2500 "v_cvt<cvt_operands>\t%0, %1" 2501 [(set_attr "type" "vop1") 2502 (set_attr "length" "8")]) 2503 2504(define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>" 2505 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v") 2506 (cvt_op:VCVT_IMODE 2507 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))] 2508 "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode, 2509 <cvt_name>_cvt)" 2510 "v_cvt<cvt_operands>\t%0, %1" 2511 [(set_attr "type" "vop1") 2512 (set_attr "length" "8")]) 2513 2514;; }}} 2515;; {{{ Int/int conversions 2516 2517(define_code_iterator zero_convert [truncate zero_extend]) 2518(define_code_attr convop [ 2519 (sign_extend "extend") 2520 (zero_extend "zero_extend") 2521 (truncate "trunc")]) 2522 2523(define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>" 2524 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 2525 (zero_convert:V_INT_1REG 2526 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] 2527 "" 2528 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>" 2529 [(set_attr "type" "vop_sdwa") 2530 (set_attr "length" "8")]) 2531 2532(define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>" 2533 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 2534 (sign_extend:V_INT_1REG 2535 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] 2536 "" 2537 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>" 2538 [(set_attr "type" "vop_sdwa") 2539 (set_attr "length" "8")]) 2540 2541;; GCC can already do these for scalar types, but not for vector types. 2542;; Unfortunately you can't just do SUBREG on a vector to select the low part, 2543;; so there must be a few tricks here. 2544 2545(define_insn_and_split "trunc<vndi><mode>2" 2546 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 2547 (truncate:V_INT_1REG 2548 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))] 2549 "" 2550 "#" 2551 "reload_completed" 2552 [(const_int 0)] 2553 { 2554 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0); 2555 rtx out = operands[0]; 2556 2557 if (<MODE>mode != <VnSI>mode) 2558 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo)); 2559 else 2560 emit_move_insn (out, inlo); 2561 } 2562 [(set_attr "type" "vop2") 2563 (set_attr "length" "4")]) 2564 2565(define_insn_and_split "trunc<vndi><mode>2_exec" 2566 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 2567 (vec_merge:V_INT_1REG 2568 (truncate:V_INT_1REG 2569 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")) 2570 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0") 2571 (match_operand:DI 3 "gcn_exec_operand" " e")))] 2572 "" 2573 "#" 2574 "reload_completed" 2575 [(const_int 0)] 2576 { 2577 rtx out = operands[0]; 2578 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0); 2579 rtx merge = operands[2]; 2580 rtx exec = operands[3]; 2581 2582 if (<MODE>mode != <VnSI>mode) 2583 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec)); 2584 else 2585 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec)); 2586 } 2587 [(set_attr "type" "vop2") 2588 (set_attr "length" "4")]) 2589 2590(define_insn_and_split "<convop><mode><vndi>2" 2591 [(set (match_operand:<VnDI> 0 "register_operand" "=v") 2592 (any_extend:<VnDI> 2593 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))] 2594 "" 2595 "#" 2596 "reload_completed" 2597 [(const_int 0)] 2598 { 2599 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0); 2600 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1); 2601 rtx in = operands[1]; 2602 2603 if (<MODE>mode != <VnSI>mode) 2604 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in)); 2605 else 2606 emit_move_insn (outlo, in); 2607 if ('<su>' == 's') 2608 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31))); 2609 else 2610 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx)); 2611 } 2612 [(set_attr "type" "mult") 2613 (set_attr "length" "12")]) 2614 2615(define_insn_and_split "<convop><mode><vndi>2_exec" 2616 [(set (match_operand:<VnDI> 0 "register_operand" "=v") 2617 (vec_merge:<VnDI> 2618 (any_extend:<VnDI> 2619 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")) 2620 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0") 2621 (match_operand:DI 3 "gcn_exec_operand" " e")))] 2622 "" 2623 "#" 2624 "reload_completed" 2625 [(const_int 0)] 2626 { 2627 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0); 2628 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1); 2629 rtx in = operands[1]; 2630 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0); 2631 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1); 2632 rtx exec = operands[3]; 2633 2634 if (<MODE>mode != <VnSI>mode) 2635 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec)); 2636 else 2637 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec)); 2638 if ('<su>' == 's') 2639 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi, 2640 exec)); 2641 else 2642 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi, 2643 exec)); 2644 } 2645 [(set_attr "type" "mult") 2646 (set_attr "length" "12")]) 2647 2648;; }}} 2649;; {{{ Vector comparison/merge 2650 2651(define_insn "vec_cmp<mode>di" 2652 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") 2653 (match_operator:DI 1 "gcn_fp_compare_operator" 2654 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA") 2655 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])) 2656 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))] 2657 "" 2658 "@ 2659 v_cmp%E1\tvcc, %2, %3 2660 v_cmp%E1\tvcc, %2, %3 2661 v_cmpx%E1\tvcc, %2, %3 2662 v_cmpx%E1\tvcc, %2, %3 2663 v_cmp%E1\t%0, %2, %3 2664 v_cmp%E1\t%0, %2, %3" 2665 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") 2666 (set_attr "length" "4,8,4,8,8,8")]) 2667 2668(define_expand "vec_cmpu<mode>di" 2669 [(match_operand:DI 0 "register_operand") 2670 (match_operator 1 "gcn_compare_operator" 2671 [(match_operand:V_INT_noQI 2 "gcn_alu_operand") 2672 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])] 2673 "" 2674 { 2675 /* Unsigned comparisons use the same patterns as signed comparisons, 2676 except that they use unsigned operators (e.g. LTU vs LT). 2677 The '%E1' directive then does the Right Thing. */ 2678 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2], 2679 operands[3])); 2680 DONE; 2681 }) 2682 2683; There's no instruction for 8-bit vector comparison, so we need to extend. 2684(define_expand "vec_cmp<u><mode>di" 2685 [(match_operand:DI 0 "register_operand") 2686 (match_operator 1 "gcn_compare_operator" 2687 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand")) 2688 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])] 2689 "can_create_pseudo_p ()" 2690 { 2691 rtx sitmp1 = gen_reg_rtx (<VnSI>mode); 2692 rtx sitmp2 = gen_reg_rtx (<VnSI>mode); 2693 2694 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2])); 2695 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3])); 2696 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2)); 2697 DONE; 2698 }) 2699 2700(define_insn "vec_cmp<mode>di_exec" 2701 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") 2702 (and:DI 2703 (match_operator 1 "gcn_fp_compare_operator" 2704 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA") 2705 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]) 2706 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e"))) 2707 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))] 2708 "" 2709 "@ 2710 v_cmp%E1\tvcc, %2, %3 2711 v_cmp%E1\tvcc, %2, %3 2712 v_cmpx%E1\tvcc, %2, %3 2713 v_cmpx%E1\tvcc, %2, %3 2714 v_cmp%E1\t%0, %2, %3 2715 v_cmp%E1\t%0, %2, %3" 2716 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") 2717 (set_attr "length" "4,8,4,8,8,8")]) 2718 2719(define_expand "vec_cmpu<mode>di_exec" 2720 [(match_operand:DI 0 "register_operand") 2721 (match_operator 1 "gcn_compare_operator" 2722 [(match_operand:V_INT_noQI 2 "gcn_alu_operand") 2723 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")]) 2724 (match_operand:DI 4 "gcn_exec_reg_operand")] 2725 "" 2726 { 2727 /* Unsigned comparisons use the same patterns as signed comparisons, 2728 except that they use unsigned operators (e.g. LTU vs LT). 2729 The '%E1' directive then does the Right Thing. */ 2730 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1], 2731 operands[2], operands[3], 2732 operands[4])); 2733 DONE; 2734 }) 2735 2736(define_expand "vec_cmp<u><mode>di_exec" 2737 [(match_operand:DI 0 "register_operand") 2738 (match_operator 1 "gcn_compare_operator" 2739 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand")) 2740 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))]) 2741 (match_operand:DI 4 "gcn_exec_reg_operand")] 2742 "can_create_pseudo_p ()" 2743 { 2744 rtx sitmp1 = gen_reg_rtx (<VnSI>mode); 2745 rtx sitmp2 = gen_reg_rtx (<VnSI>mode); 2746 2747 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2], 2748 operands[2], operands[4])); 2749 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3], 2750 operands[3], operands[4])); 2751 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1, 2752 sitmp2, operands[4])); 2753 DONE; 2754 }) 2755 2756(define_insn "vec_cmp<mode>di_dup" 2757 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") 2758 (match_operator:DI 1 "gcn_fp_compare_operator" 2759 [(vec_duplicate:V_noQI 2760 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" 2761 " Sv, B,Sv,B, A")) 2762 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])) 2763 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))] 2764 "" 2765 "@ 2766 v_cmp%E1\tvcc, %2, %3 2767 v_cmp%E1\tvcc, %2, %3 2768 v_cmpx%E1\tvcc, %2, %3 2769 v_cmpx%E1\tvcc, %2, %3 2770 v_cmp%E1\t%0, %2, %3" 2771 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") 2772 (set_attr "length" "4,8,4,8,8")]) 2773 2774(define_insn "vec_cmp<mode>di_dup_exec" 2775 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") 2776 (and:DI 2777 (match_operator 1 "gcn_fp_compare_operator" 2778 [(vec_duplicate:V_noQI 2779 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" 2780 " Sv, B,Sv,B, A")) 2781 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]) 2782 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e"))) 2783 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))] 2784 "" 2785 "@ 2786 v_cmp%E1\tvcc, %2, %3 2787 v_cmp%E1\tvcc, %2, %3 2788 v_cmpx%E1\tvcc, %2, %3 2789 v_cmpx%E1\tvcc, %2, %3 2790 v_cmp%E1\t%0, %2, %3" 2791 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") 2792 (set_attr "length" "4,8,4,8,8")]) 2793 2794(define_expand "vcond_mask_<mode>di" 2795 [(parallel 2796 [(set (match_operand:V_ALL 0 "register_operand" "") 2797 (vec_merge:V_ALL 2798 (match_operand:V_ALL 1 "gcn_vop3_operand" "") 2799 (match_operand:V_ALL 2 "gcn_alu_operand" "") 2800 (match_operand:DI 3 "register_operand" ""))) 2801 (clobber (scratch:<VnDI>))])] 2802 "" 2803 "") 2804 2805(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>" 2806 [(match_operand:V_ALL 0 "register_operand") 2807 (match_operand:V_ALL 1 "gcn_vop3_operand") 2808 (match_operand:V_ALL 2 "gcn_alu_operand") 2809 (match_operator 3 "gcn_fp_compare_operator" 2810 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand") 2811 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])] 2812 "" 2813 { 2814 rtx tmp = gen_reg_rtx (DImode); 2815 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di 2816 (tmp, operands[3], operands[4], operands[5])); 2817 emit_insn (gen_vcond_mask_<V_ALL:mode>di 2818 (operands[0], operands[1], operands[2], tmp)); 2819 DONE; 2820 }) 2821 2822(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec" 2823 [(match_operand:V_ALL 0 "register_operand") 2824 (match_operand:V_ALL 1 "gcn_vop3_operand") 2825 (match_operand:V_ALL 2 "gcn_alu_operand") 2826 (match_operator 3 "gcn_fp_compare_operator" 2827 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand") 2828 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")]) 2829 (match_operand:DI 6 "gcn_exec_reg_operand" "e")] 2830 "" 2831 { 2832 rtx tmp = gen_reg_rtx (DImode); 2833 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec 2834 (tmp, operands[3], operands[4], operands[5], operands[6])); 2835 emit_insn (gen_vcond_mask_<V_ALL:mode>di 2836 (operands[0], operands[1], operands[2], tmp)); 2837 DONE; 2838 }) 2839 2840(define_expand "vcondu<V_ALL:mode><V_INT:mode>" 2841 [(match_operand:V_ALL 0 "register_operand") 2842 (match_operand:V_ALL 1 "gcn_vop3_operand") 2843 (match_operand:V_ALL 2 "gcn_alu_operand") 2844 (match_operator 3 "gcn_fp_compare_operator" 2845 [(match_operand:V_INT 4 "gcn_alu_operand") 2846 (match_operand:V_INT 5 "gcn_vop3_operand")])] 2847 "" 2848 { 2849 rtx tmp = gen_reg_rtx (DImode); 2850 emit_insn (gen_vec_cmpu<V_INT:mode>di 2851 (tmp, operands[3], operands[4], operands[5])); 2852 emit_insn (gen_vcond_mask_<V_ALL:mode>di 2853 (operands[0], operands[1], operands[2], tmp)); 2854 DONE; 2855 }) 2856 2857(define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec" 2858 [(match_operand:V_ALL 0 "register_operand") 2859 (match_operand:V_ALL 1 "gcn_vop3_operand") 2860 (match_operand:V_ALL 2 "gcn_alu_operand") 2861 (match_operator 3 "gcn_fp_compare_operator" 2862 [(match_operand:V_INT 4 "gcn_alu_operand") 2863 (match_operand:V_INT 5 "gcn_vop3_operand")]) 2864 (match_operand:DI 6 "gcn_exec_reg_operand" "e")] 2865 "" 2866 { 2867 rtx tmp = gen_reg_rtx (DImode); 2868 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec 2869 (tmp, operands[3], operands[4], operands[5], operands[6])); 2870 emit_insn (gen_vcond_mask_<V_ALL:mode>di 2871 (operands[0], operands[1], operands[2], tmp)); 2872 DONE; 2873 }) 2874 2875;; }}} 2876;; {{{ Fully masked loop support 2877 2878(define_expand "while_ultsidi" 2879 [(match_operand:DI 0 "register_operand") 2880 (match_operand:SI 1 "") 2881 (match_operand:SI 2 "")] 2882 "" 2883 { 2884 if (GET_CODE (operands[1]) != CONST_INT 2885 || GET_CODE (operands[2]) != CONST_INT) 2886 { 2887 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); 2888 rtx tmp = _0_1_2_3; 2889 if (GET_CODE (operands[1]) != CONST_INT 2890 || INTVAL (operands[1]) != 0) 2891 { 2892 tmp = gen_reg_rtx (V64SImode); 2893 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1])); 2894 } 2895 emit_insn (gen_vec_cmpv64sidi_dup (operands[0], 2896 gen_rtx_GT (VOIDmode, 0, 0), 2897 operands[2], tmp)); 2898 } 2899 else 2900 { 2901 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]); 2902 HOST_WIDE_INT mask = (diff >= 64 ? -1 2903 : ~((unsigned HOST_WIDE_INT)-1 << diff)); 2904 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask)); 2905 } 2906 DONE; 2907 }) 2908 2909(define_expand "maskload<mode>di" 2910 [(match_operand:V_ALL 0 "register_operand") 2911 (match_operand:V_ALL 1 "memory_operand") 2912 (match_operand 2 "")] 2913 "" 2914 { 2915 rtx exec = force_reg (DImode, operands[2]); 2916 rtx addr = gcn_expand_scalar_to_vector_address 2917 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode)); 2918 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 2919 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 2920 2921 /* Masked lanes are required to hold zero. */ 2922 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0)); 2923 2924 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, 2925 operands[0], exec)); 2926 DONE; 2927 }) 2928 2929(define_expand "maskstore<mode>di" 2930 [(match_operand:V_ALL 0 "memory_operand") 2931 (match_operand:V_ALL 1 "register_operand") 2932 (match_operand 2 "")] 2933 "" 2934 { 2935 rtx exec = force_reg (DImode, operands[2]); 2936 rtx addr = gcn_expand_scalar_to_vector_address 2937 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode)); 2938 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 2939 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 2940 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec)); 2941 DONE; 2942 }) 2943 2944(define_expand "mask_gather_load<mode><vnsi>" 2945 [(match_operand:V_ALL 0 "register_operand") 2946 (match_operand:DI 1 "register_operand") 2947 (match_operand:<VnSI> 2 "register_operand") 2948 (match_operand 3 "immediate_operand") 2949 (match_operand:SI 4 "gcn_alu_operand") 2950 (match_operand:DI 5 "")] 2951 "" 2952 { 2953 rtx exec = force_reg (DImode, operands[5]); 2954 2955 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], 2956 operands[2], operands[4], 2957 INTVAL (operands[3]), exec); 2958 2959 /* Masked lanes are required to hold zero. */ 2960 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0)); 2961 2962 if (GET_MODE (addr) == <VnDI>mode) 2963 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr, 2964 const0_rtx, const0_rtx, 2965 const0_rtx, operands[0], 2966 exec)); 2967 else 2968 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1], 2969 addr, const0_rtx, 2970 const0_rtx, const0_rtx, 2971 operands[0], exec)); 2972 DONE; 2973 }) 2974 2975(define_expand "mask_scatter_store<mode><vnsi>" 2976 [(match_operand:DI 0 "register_operand") 2977 (match_operand:<VnSI> 1 "register_operand") 2978 (match_operand 2 "immediate_operand") 2979 (match_operand:SI 3 "gcn_alu_operand") 2980 (match_operand:V_ALL 4 "register_operand") 2981 (match_operand:DI 5 "")] 2982 "" 2983 { 2984 rtx exec = force_reg (DImode, operands[5]); 2985 2986 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], 2987 operands[1], operands[3], 2988 INTVAL (operands[2]), exec); 2989 2990 if (GET_MODE (addr) == <VnDI>mode) 2991 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx, 2992 operands[4], const0_rtx, 2993 const0_rtx, 2994 exec)); 2995 else 2996 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr, 2997 const0_rtx, operands[4], 2998 const0_rtx, const0_rtx, 2999 exec)); 3000 DONE; 3001 }) 3002 3003(define_code_iterator cond_op [plus minus mult]) 3004 3005(define_expand "cond_<expander><mode>" 3006 [(match_operand:V_ALL 0 "register_operand") 3007 (match_operand:DI 1 "register_operand") 3008 (cond_op:V_ALL 3009 (match_operand:V_ALL 2 "gcn_alu_operand") 3010 (match_operand:V_ALL 3 "gcn_alu_operand")) 3011 (match_operand:V_ALL 4 "register_operand")] 3012 "" 3013 { 3014 operands[1] = force_reg (DImode, operands[1]); 3015 operands[2] = force_reg (<MODE>mode, operands[2]); 3016 3017 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], 3018 operands[3], operands[4], 3019 operands[1])); 3020 DONE; 3021 }) 3022 3023;; TODO smin umin smax umax 3024(define_code_iterator cond_bitop [and ior xor]) 3025 3026(define_expand "cond_<expander><mode>" 3027 [(match_operand:V_INT 0 "register_operand") 3028 (match_operand:DI 1 "register_operand") 3029 (cond_bitop:V_INT 3030 (match_operand:V_INT 2 "gcn_alu_operand") 3031 (match_operand:V_INT 3 "gcn_alu_operand")) 3032 (match_operand:V_INT 4 "register_operand")] 3033 "" 3034 { 3035 operands[1] = force_reg (DImode, operands[1]); 3036 operands[2] = force_reg (<MODE>mode, operands[2]); 3037 3038 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], 3039 operands[3], operands[4], 3040 operands[1])); 3041 DONE; 3042 }) 3043 3044;; }}} 3045;; {{{ Vector reductions 3046 3047(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR 3048 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR 3049 UNSPEC_PLUS_DPP_SHR 3050 UNSPEC_AND_DPP_SHR 3051 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) 3052 3053(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR 3054 UNSPEC_AND_DPP_SHR 3055 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) 3056 3057; FIXME: Isn't there a better way of doing this? 3058(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR") 3059 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR") 3060 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR") 3061 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR") 3062 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR") 3063 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR") 3064 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR") 3065 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")]) 3066 3067(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin") 3068 (UNSPEC_SMAX_DPP_SHR "smax") 3069 (UNSPEC_UMIN_DPP_SHR "umin") 3070 (UNSPEC_UMAX_DPP_SHR "umax") 3071 (UNSPEC_PLUS_DPP_SHR "plus") 3072 (UNSPEC_AND_DPP_SHR "and") 3073 (UNSPEC_IOR_DPP_SHR "ior") 3074 (UNSPEC_XOR_DPP_SHR "xor")]) 3075 3076(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0") 3077 (UNSPEC_SMAX_DPP_SHR "v_max%i0") 3078 (UNSPEC_UMIN_DPP_SHR "v_min%u0") 3079 (UNSPEC_UMAX_DPP_SHR "v_max%u0") 3080 (UNSPEC_PLUS_DPP_SHR "v_add%U0") 3081 (UNSPEC_AND_DPP_SHR "v_and%B0") 3082 (UNSPEC_IOR_DPP_SHR "v_or%B0") 3083 (UNSPEC_XOR_DPP_SHR "v_xor%B0")]) 3084 3085(define_expand "reduc_<reduc_op>_scal_<mode>" 3086 [(set (match_operand:<SCALAR_MODE> 0 "register_operand") 3087 (unspec:<SCALAR_MODE> 3088 [(match_operand:V_ALL 1 "register_operand")] 3089 REDUC_UNSPEC))] 3090 "" 3091 { 3092 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1], 3093 <reduc_unspec>); 3094 3095 /* The result of the reduction is in lane 63 of tmp. */ 3096 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp)); 3097 3098 DONE; 3099 }) 3100 3101;; Warning: This "-ffast-math" implementation converts in-order reductions 3102;; into associative reductions. It's also used where OpenMP or 3103;; OpenACC paralellization has already broken the in-order semantics. 3104(define_expand "fold_left_plus_<mode>" 3105 [(match_operand:<SCALAR_MODE> 0 "register_operand") 3106 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand") 3107 (match_operand:V_FP 2 "gcn_alu_operand")] 3108 "can_create_pseudo_p () 3109 && (flag_openacc || flag_openmp 3110 || flag_associative_math)" 3111 { 3112 rtx dest = operands[0]; 3113 rtx scalar = operands[1]; 3114 rtx vector = operands[2]; 3115 rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode); 3116 3117 emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector)); 3118 emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp)); 3119 DONE; 3120 }) 3121 3122(define_insn "*<reduc_op>_dpp_shr_<mode>" 3123 [(set (match_operand:V_1REG 0 "register_operand" "=v") 3124 (unspec:V_1REG 3125 [(match_operand:V_1REG 1 "register_operand" "v") 3126 (match_operand:V_1REG 2 "register_operand" "v") 3127 (match_operand:SI 3 "const_int_operand" "n")] 3128 REDUC_UNSPEC))] 3129 ; GCN3 requires a carry out, GCN5 not 3130 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode) 3131 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)" 3132 { 3133 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>", 3134 <reduc_unspec>, INTVAL (operands[3])); 3135 } 3136 [(set_attr "type" "vop_dpp") 3137 (set_attr "length" "8")]) 3138 3139(define_insn_and_split "*<reduc_op>_dpp_shr_<mode>" 3140 [(set (match_operand:V_DI 0 "register_operand" "=v") 3141 (unspec:V_DI 3142 [(match_operand:V_DI 1 "register_operand" "v") 3143 (match_operand:V_DI 2 "register_operand" "v") 3144 (match_operand:SI 3 "const_int_operand" "n")] 3145 REDUC_2REG_UNSPEC))] 3146 "" 3147 "#" 3148 "reload_completed" 3149 [(set (match_dup 4) 3150 (unspec:<VnSI> 3151 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC)) 3152 (set (match_dup 5) 3153 (unspec:<VnSI> 3154 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))] 3155 { 3156 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); 3157 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); 3158 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0); 3159 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1); 3160 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0); 3161 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1); 3162 } 3163 [(set_attr "type" "vmult") 3164 (set_attr "length" "16")]) 3165 3166; Special cases for addition. 3167 3168(define_insn "*plus_carry_dpp_shr_<mode>" 3169 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") 3170 (unspec:V_INT_1REG 3171 [(match_operand:V_INT_1REG 1 "register_operand" "v") 3172 (match_operand:V_INT_1REG 2 "register_operand" "v") 3173 (match_operand:SI 3 "const_int_operand" "n")] 3174 UNSPEC_PLUS_CARRY_DPP_SHR)) 3175 (clobber (reg:DI VCC_REG))] 3176 "" 3177 { 3178 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32", 3179 UNSPEC_PLUS_CARRY_DPP_SHR, 3180 INTVAL (operands[3])); 3181 } 3182 [(set_attr "type" "vop_dpp") 3183 (set_attr "length" "8")]) 3184 3185(define_insn "*plus_carry_in_dpp_shr_<mode>" 3186 [(set (match_operand:V_SI 0 "register_operand" "=v") 3187 (unspec:V_SI 3188 [(match_operand:V_SI 1 "register_operand" "v") 3189 (match_operand:V_SI 2 "register_operand" "v") 3190 (match_operand:SI 3 "const_int_operand" "n") 3191 (match_operand:DI 4 "register_operand" "cV")] 3192 UNSPEC_PLUS_CARRY_IN_DPP_SHR)) 3193 (clobber (reg:DI VCC_REG))] 3194 "" 3195 { 3196 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32", 3197 UNSPEC_PLUS_CARRY_IN_DPP_SHR, 3198 INTVAL (operands[3])); 3199 } 3200 [(set_attr "type" "vop_dpp") 3201 (set_attr "length" "8")]) 3202 3203(define_insn_and_split "*plus_carry_dpp_shr_<mode>" 3204 [(set (match_operand:V_DI 0 "register_operand" "=v") 3205 (unspec:V_DI 3206 [(match_operand:V_DI 1 "register_operand" "v") 3207 (match_operand:V_DI 2 "register_operand" "v") 3208 (match_operand:SI 3 "const_int_operand" "n")] 3209 UNSPEC_PLUS_CARRY_DPP_SHR)) 3210 (clobber (reg:DI VCC_REG))] 3211 "" 3212 "#" 3213 "reload_completed" 3214 [(parallel [(set (match_dup 4) 3215 (unspec:<VnSI> 3216 [(match_dup 6) (match_dup 8) (match_dup 3)] 3217 UNSPEC_PLUS_CARRY_DPP_SHR)) 3218 (clobber (reg:DI VCC_REG))]) 3219 (parallel [(set (match_dup 5) 3220 (unspec:<VnSI> 3221 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)] 3222 UNSPEC_PLUS_CARRY_IN_DPP_SHR)) 3223 (clobber (reg:DI VCC_REG))])] 3224 { 3225 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); 3226 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); 3227 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0); 3228 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1); 3229 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0); 3230 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1); 3231 } 3232 [(set_attr "type" "vmult") 3233 (set_attr "length" "16")]) 3234 3235; Instructions to move a scalar value from lane 63 of a vector register. 3236(define_insn "mov_from_lane63_<mode>" 3237 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v") 3238 (unspec:<SCALAR_MODE> 3239 [(match_operand:V_1REG 1 "register_operand" " v,v")] 3240 UNSPEC_MOV_FROM_LANE63))] 3241 "" 3242 "@ 3243 v_readlane_b32\t%0, %1, 63 3244 v_mov_b32\t%0, %1 wave_ror:1" 3245 [(set_attr "type" "vop3a,vop_dpp") 3246 (set_attr "exec" "none,*") 3247 (set_attr "length" "8")]) 3248 3249(define_insn "mov_from_lane63_<mode>" 3250 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v") 3251 (unspec:<SCALAR_MODE> 3252 [(match_operand:V_2REG 1 "register_operand" " v,v")] 3253 UNSPEC_MOV_FROM_LANE63))] 3254 "" 3255 "@ 3256 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63 3257 * if (REGNO (operands[0]) <= REGNO (operands[1])) \ 3258 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \ 3259 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \ 3260 else \ 3261 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \ 3262 \"v_mov_b32\t%L0, %L1 wave_ror:1\";" 3263 [(set_attr "type" "vop3a,vop_dpp") 3264 (set_attr "exec" "none,*") 3265 (set_attr "length" "8")]) 3266 3267;; }}} 3268;; {{{ Miscellaneous 3269 3270(define_expand "vec_series<mode>" 3271 [(match_operand:V_SI 0 "register_operand") 3272 (match_operand:SI 1 "gcn_alu_operand") 3273 (match_operand:SI 2 "gcn_alu_operand")] 3274 "" 3275 { 3276 rtx tmp = gen_reg_rtx (<MODE>mode); 3277 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1)); 3278 3279 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2])); 3280 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1])); 3281 DONE; 3282 }) 3283 3284(define_expand "vec_series<mode>" 3285 [(match_operand:V_DI 0 "register_operand") 3286 (match_operand:DI 1 "gcn_alu_operand") 3287 (match_operand:DI 2 "gcn_alu_operand")] 3288 "" 3289 { 3290 rtx tmp = gen_reg_rtx (<MODE>mode); 3291 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1)); 3292 rtx op1vec = gen_reg_rtx (<MODE>mode); 3293 3294 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2])); 3295 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1])); 3296 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec)); 3297 DONE; 3298 }) 3299 3300;; }}} 3301