1;; Copyright (C) 2007-2021 Free Software Foundation, Inc. 2;; 3;; This file is part of GCC. 4;; 5;; GCC is free software; you can redistribute it and/or modify 6;; it under the terms of the GNU General Public License as published by 7;; the Free Software Foundation; either version 3, or (at your option) 8;; any later version. 9;; 10;; GCC is distributed in the hope that it will be useful, 11;; but WITHOUT ANY WARRANTY; without even the implied warranty of 12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13;; GNU General Public License for more details. 14;; 15;; You should have received a copy of the GNU General Public License 16;; along with GCC; see the file COPYING3. If not see 17;; <http://www.gnu.org/licenses/>. 18 19;; For the internal conditional math routines: 20 21;; operand 0 is always the result 22;; operand 1 is always the predicate 23;; operand 2, 3, and sometimes 4 are the input values. 24;; operand 4 or 5 is the floating point status register to use. 25;; operand 5 or 6 is the rounding to do. (0 = single, 1 = double, 2 = none) 26;; 27;; addrf3_cond - F0 = F2 + F3 28;; subrf3_cond - F0 = F2 - F3 29;; mulrf3_cond - F0 = F2 * F3 30;; nmulrf3_cond - F0 = - (F2 * F3) 31;; m1addrf4_cond - F0 = (F2 * F3) + F4 32;; m1subrf4_cond - F0 = (F2 * F3) - F4 33;; m2addrf4_cond - F0 = F2 + (F3 * F4) 34;; m2subrf4_cond - F0 = F2 - (F3 * F4) 35 36;; Basic plus/minus/mult operations 37 38(define_insn "addrf3_cond" 39 [(set (match_operand:RF 0 "fr_register_operand" "=f,f") 40 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") 41 (const_int 0)) 42 (plus:RF 43 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") 44 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) 45 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H"))) 46 (use (match_operand:SI 5 "const_int_operand" "")) 47 (use (match_operand:SI 6 "const_int_operand" ""))] 48 "" 49 "(%1) fadd%R6.s%5 %0 = %F2, %F3" 50 [(set_attr "itanium_class" "fmac") 51 (set_attr "predicable" "no")]) 52 53(define_insn "subrf3_cond" 54 [(set (match_operand:RF 0 "fr_register_operand" "=f,f") 55 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") 56 (const_int 0)) 57 (minus:RF 58 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") 59 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) 60 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H"))) 61 (use (match_operand:SI 5 "const_int_operand" "")) 62 (use (match_operand:SI 6 "const_int_operand" ""))] 63 "" 64 "(%1) fsub%R6.s%5 %0 = %F2, %F3" 65 [(set_attr "itanium_class" "fmac") 66 (set_attr "predicable" "no")]) 67 68(define_insn "mulrf3_cond" 69 [(set (match_operand:RF 0 "fr_register_operand" "=f,f") 70 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") 71 (const_int 0)) 72 (mult:RF 73 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") 74 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) 75 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H"))) 76 (use (match_operand:SI 5 "const_int_operand" "")) 77 (use (match_operand:SI 6 "const_int_operand" ""))] 78 "" 79 "(%1) fmpy%R6.s%5 %0 = %F2, %F3" 80 [(set_attr "itanium_class" "fmac") 81 (set_attr "predicable" "no")]) 82 83;; neg-mult operation 84 85(define_insn "nmulrf3_cond" 86 [(set (match_operand:RF 0 "fr_register_operand" "=f,f") 87 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") 88 (const_int 0)) 89 (neg:RF (mult:RF 90 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") 91 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))) 92 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H"))) 93 (use (match_operand:SI 5 "const_int_operand" "")) 94 (use (match_operand:SI 6 "const_int_operand" ""))] 95 "" 96 "(%1) fnmpy%R6.s%5 %0 = %F2, %F3" 97 [(set_attr "itanium_class" "fmac") 98 (set_attr "predicable" "no")]) 99 100;; add-mult/sub-mult operations (mult as op1) 101 102(define_insn "m1addrf4_cond" 103 [(set (match_operand:RF 0 "fr_register_operand" "=f,f") 104 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") 105 (const_int 0)) 106 (plus:RF 107 (mult:RF 108 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") 109 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) 110 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")) 111 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H"))) 112 (use (match_operand:SI 6 "const_int_operand" "")) 113 (use (match_operand:SI 7 "const_int_operand" ""))] 114 "" 115 "(%1) fma%R7.s%6 %0 = %F2, %F3, %F4" 116 [(set_attr "itanium_class" "fmac") 117 (set_attr "predicable" "no")]) 118 119(define_insn "m1subrf4_cond" 120 [(set (match_operand:RF 0 "fr_register_operand" "=f,f") 121 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") 122 (const_int 0)) 123 (minus:RF 124 (mult:RF 125 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") 126 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) 127 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")) 128 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H"))) 129 (use (match_operand:SI 6 "const_int_operand" "")) 130 (use (match_operand:SI 7 "const_int_operand" ""))] 131 "" 132 "(%1) fms%R7.s%6 %0 = %F2, %F3, %F4" 133 [(set_attr "itanium_class" "fmac") 134 (set_attr "predicable" "no")]) 135 136;; add-mult/sub-mult operations (mult as op2) 137 138(define_insn "m2addrf4_cond" 139 [(set (match_operand:RF 0 "fr_register_operand" "=f,f") 140 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") 141 (const_int 0)) 142 (plus:RF 143 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") 144 (mult:RF 145 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG") 146 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))) 147 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H"))) 148 (use (match_operand:SI 6 "const_int_operand" "")) 149 (use (match_operand:SI 7 "const_int_operand" ""))] 150 "" 151 "(%1) fma%R7.s%6 %0 = %F3, %F4, %F2" 152 [(set_attr "itanium_class" "fmac") 153 (set_attr "predicable" "no")]) 154 155(define_insn "m2subrf4_cond" 156 [(set (match_operand:RF 0 "fr_register_operand" "=f,f") 157 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") 158 (const_int 0)) 159 (minus:RF 160 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") 161 (mult:RF 162 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG") 163 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))) 164 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H"))) 165 (use (match_operand:SI 6 "const_int_operand" "")) 166 (use (match_operand:SI 7 "const_int_operand" ""))] 167 "" 168 "(%1) fnma%R7.s%6 %0 = %F3, %F4, %F2" 169 [(set_attr "itanium_class" "fmac") 170 (set_attr "predicable" "no")]) 171 172;; Conversions to/from RF and SF/DF/XF 173;; These conversions should not generate any code but make it possible 174;; for all the instructions used to implement floating point division 175;; to be written for RFmode only and to not have to handle multiple 176;; modes or to have to handle a register in more than one mode. 177 178(define_mode_iterator SDX_F [SF DF XF]) 179 180(define_insn "extend<mode>rf2" 181 [(set (match_operand:RF 0 "fr_register_operand" "=f") 182 (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "fG")))] 183 "" 184 "#" 185 [(set_attr "itanium_class" "fmisc") 186 (set_attr "predicable" "yes")]) 187 188(define_split 189 [(set (match_operand:RF 0 "fr_register_operand" "") 190 (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "")))] 191 "reload_completed" 192 [(set (match_dup 0) (match_dup 2))] 193{ 194 if (operands[1] == CONST0_RTX (<MODE>mode)) 195 operands[2] = gen_rtx_REG (RFmode, FR_REG (0)); 196 else if (operands[1] == CONST1_RTX (<MODE>mode)) 197 operands[2] = gen_rtx_REG (RFmode, FR_REG (1)); 198 else 199 operands[2] = gen_rtx_REG (RFmode, REGNO (operands[1])); 200}) 201 202 203(define_insn "truncrf<mode>2" 204 [(set (match_operand:SDX_F 0 "fr_register_operand" "=f") 205 (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))] 206 "" 207 "#" 208 [(set_attr "itanium_class" "fmisc") 209 (set_attr "predicable" "yes")]) 210 211(define_split 212 [(set (match_operand:SDX_F 0 "fr_register_operand" "") 213 (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "")))] 214 "reload_completed" 215 [(set (match_dup 0) (match_dup 2))] 216{ 217 if (operands[1] == CONST0_RTX (RFmode)) 218 operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (0)); 219 else if (operands[1] == CONST1_RTX (RFmode)) 220 operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (1)); 221 else 222 operands[2] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); 223}) 224 225;; Float to integer truncations using an alternative status register. 226 227(define_insn "fix_truncrfdi2_alts" 228 [(set (match_operand:DI 0 "fr_register_operand" "=f") 229 (fix:DI (match_operand:RF 1 "fr_register_operand" "f"))) 230 (use (match_operand:SI 2 "const_int_operand" ""))] 231 "" 232 "fcvt.fx.trunc.s%2 %0 = %1" 233 [(set_attr "itanium_class" "fcvtfx")]) 234 235(define_insn "fixuns_truncrfdi2_alts" 236 [(set (match_operand:DI 0 "fr_register_operand" "=f") 237 (unsigned_fix:DI (match_operand:RF 1 "fr_register_operand" "f"))) 238 (use (match_operand:SI 2 "const_int_operand" ""))] 239 "" 240 "fcvt.fxu.trunc.s%2 %0 = %1" 241 [(set_attr "itanium_class" "fcvtfx")]) 242 243(define_insn "setf_exp_rf" 244 [(set (match_operand:RF 0 "fr_register_operand" "=f") 245 (unspec:RF [(match_operand:DI 1 "register_operand" "r")] 246 UNSPEC_SETF_EXP))] 247 "" 248 "setf.exp %0 = %1" 249 [(set_attr "itanium_class" "frfr")]) 250 251;; Reciprocal approximation 252 253(define_insn "recip_approx_rf" 254 [(set (match_operand:RF 0 "fr_register_operand" "=f") 255 (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG") 256 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG")] 257 UNSPEC_FR_RECIP_APPROX_RES)) 258 (set (match_operand:CCI 3 "register_operand" "=c") 259 (unspec:CCI [(match_dup 1) (match_dup 2)] UNSPEC_FR_RECIP_APPROX)) 260 (use (match_operand:SI 4 "const_int_operand" ""))] 261 "" 262 "frcpa.s%4 %0, %3 = %F1, %F2" 263 [(set_attr "itanium_class" "fmisc") 264 (set_attr "predicable" "no")]) 265 266;; Single precision floating point division 267 268(define_expand "divsf3" 269 [(set (match_operand:SF 0 "fr_register_operand" "") 270 (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "") 271 (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))] 272 "TARGET_INLINE_FLOAT_DIV" 273{ 274 rtx insn; 275 if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT) 276 insn = gen_divsf3_internal_lat (operands[0], operands[1], operands[2]); 277 else 278 insn = gen_divsf3_internal_thr (operands[0], operands[1], operands[2]); 279 emit_insn (insn); 280 DONE; 281}) 282 283;; Single precision floating point division (maximum throughput algorithm). 284 285(define_expand "divsf3_internal_thr" 286 [(set (match_operand:SF 0 "fr_register_operand" "") 287 (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "") 288 (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))] 289 "TARGET_INLINE_FLOAT_DIV" 290{ 291 rtx y = gen_reg_rtx (RFmode); 292 rtx a = gen_reg_rtx (RFmode); 293 rtx b = gen_reg_rtx (RFmode); 294 rtx e = gen_reg_rtx (RFmode); 295 rtx y1 = gen_reg_rtx (RFmode); 296 rtx y2 = gen_reg_rtx (RFmode); 297 rtx q = gen_reg_rtx (RFmode); 298 rtx r = gen_reg_rtx (RFmode); 299 rtx q_res = gen_reg_rtx (RFmode); 300 rtx cond = gen_reg_rtx (CCImode); 301 rtx zero = CONST0_RTX (RFmode); 302 rtx one = CONST1_RTX (RFmode); 303 rtx status0 = CONST0_RTX (SImode); 304 rtx status1 = CONST1_RTX (SImode); 305 rtx trunc_sgl = CONST0_RTX (SImode); 306 rtx trunc_off = CONST2_RTX (SImode); 307 308 /* Empty conversions to put inputs into RFmode. */ 309 emit_insn (gen_extendsfrf2 (a, operands[1])); 310 emit_insn (gen_extendsfrf2 (b, operands[2])); 311 /* y = 1 / b */ 312 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); 313 /* e = 1 - (b * y) */ 314 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); 315 /* y1 = y + (y * e) */ 316 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off)); 317 /* y2 = y + (y1 * e) */ 318 emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e, zero, status1, trunc_off)); 319 /* q = single(a * y2) */ 320 emit_insn (gen_mulrf3_cond (q, cond, a, y2, zero, status1, trunc_sgl)); 321 /* r = a - (q * b) */ 322 emit_insn (gen_m2subrf4_cond (r, cond, a, q, b, zero, status1, trunc_off)); 323 /* Q = single (q + (r * y2)) */ 324 emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y2, y, status0, trunc_sgl)); 325 /* Conversion back into SFmode. */ 326 emit_insn (gen_truncrfsf2 (operands[0], q_res)); 327 DONE; 328}) 329 330;; Single precision floating point division (minimum latency algorithm). 331 332(define_expand "divsf3_internal_lat" 333 [(set (match_operand:SF 0 "fr_register_operand" "") 334 (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "") 335 (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))] 336 "TARGET_INLINE_FLOAT_DIV" 337{ 338 rtx y = gen_reg_rtx (RFmode); 339 rtx a = gen_reg_rtx (RFmode); 340 rtx b = gen_reg_rtx (RFmode); 341 rtx e = gen_reg_rtx (RFmode); 342 rtx q = gen_reg_rtx (RFmode); 343 rtx e1 = gen_reg_rtx (RFmode); 344 rtx y1 = gen_reg_rtx (RFmode); 345 rtx q1 = gen_reg_rtx (RFmode); 346 rtx r = gen_reg_rtx (RFmode); 347 rtx q_res = gen_reg_rtx (RFmode); 348 rtx cond = gen_reg_rtx (CCImode); 349 rtx zero = CONST0_RTX (RFmode); 350 rtx one = CONST1_RTX (RFmode); 351 rtx status0 = CONST0_RTX (SImode); 352 rtx status1 = CONST1_RTX (SImode); 353 rtx trunc_sgl = CONST0_RTX (SImode); 354 rtx trunc_off = CONST2_RTX (SImode); 355 356 /* Empty conversions to put inputs into RFmode. */ 357 emit_insn (gen_extendsfrf2 (a, operands[1])); 358 emit_insn (gen_extendsfrf2 (b, operands[2])); 359 /* y = 1 / b */ 360 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); 361 /* q = a * y */ 362 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); 363 /* e = 1 - (b * y) */ 364 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); 365 /* e1 = e + (e * e) */ 366 emit_insn (gen_m2addrf4_cond (e1, cond, e, e, e, zero, status1, trunc_off)); 367 /* q1 = single(q + (q * e1)) */ 368 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e1, zero, status1, trunc_sgl)); 369 /* y1 = y + (y * e1) */ 370 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e1, zero, status1, trunc_off)); 371 /* r = a - (q1 * b) */ 372 emit_insn (gen_m2subrf4_cond (r, cond, a, q1, b, zero, status1, trunc_off)); 373 /* Q = single (q1 + (r * y1)) */ 374 emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r, y1, y, status0, trunc_sgl)); 375 /* Conversion back into SFmode. */ 376 emit_insn (gen_truncrfsf2 (operands[0], q_res)); 377 DONE; 378}) 379 380;; Double precision floating point division 381 382(define_expand "divdf3" 383 [(set (match_operand:DF 0 "fr_register_operand" "") 384 (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "") 385 (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))] 386 "TARGET_INLINE_FLOAT_DIV" 387{ 388 rtx insn; 389 if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT) 390 insn = gen_divdf3_internal_lat (operands[0], operands[1], operands[2]); 391 else 392 insn = gen_divdf3_internal_thr (operands[0], operands[1], operands[2]); 393 emit_insn (insn); 394 DONE; 395}) 396 397;; Double precision floating point division (maximum throughput algorithm). 398 399(define_expand "divdf3_internal_thr" 400 [(set (match_operand:DF 0 "fr_register_operand" "") 401 (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "") 402 (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))] 403 "TARGET_INLINE_FLOAT_DIV" 404{ 405 rtx q_res = gen_reg_rtx (RFmode); 406 rtx a = gen_reg_rtx (RFmode); 407 rtx b = gen_reg_rtx (RFmode); 408 rtx y = gen_reg_rtx (RFmode); 409 rtx e = gen_reg_rtx (RFmode); 410 rtx y1 = gen_reg_rtx (RFmode); 411 rtx e1 = gen_reg_rtx (RFmode); 412 rtx y2 = gen_reg_rtx (RFmode); 413 rtx e2 = gen_reg_rtx (RFmode); 414 rtx y3 = gen_reg_rtx (RFmode); 415 rtx q = gen_reg_rtx (RFmode); 416 rtx r = gen_reg_rtx (RFmode); 417 rtx cond = gen_reg_rtx (CCImode); 418 rtx zero = CONST0_RTX (RFmode); 419 rtx one = CONST1_RTX (RFmode); 420 rtx status0 = CONST0_RTX (SImode); 421 rtx status1 = CONST1_RTX (SImode); 422 rtx trunc_dbl = CONST1_RTX (SImode); 423 rtx trunc_off = CONST2_RTX (SImode); 424 /* Empty conversions to put inputs into RFmode */ 425 emit_insn (gen_extenddfrf2 (a, operands[1])); 426 emit_insn (gen_extenddfrf2 (b, operands[2])); 427 /* y = 1 / b */ 428 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); 429 /* e = 1 - (b * y) */ 430 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); 431 /* y1 = y + (y * e) */ 432 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off)); 433 /* e1 = e * e */ 434 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); 435 /* y2 = y1 + (y1 * e1) */ 436 emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off)); 437 /* e2 = e1 * e1 */ 438 emit_insn (gen_mulrf3_cond (e2, cond, e1, e1, zero, status1, trunc_off)); 439 /* y3 = y2 + (y2 * e2) */ 440 emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e2, zero, status1, trunc_off)); 441 /* q = double (a * y3) */ 442 emit_insn (gen_mulrf3_cond (q, cond, a, y3, zero, status1, trunc_dbl)); 443 /* r = a - (b * q) */ 444 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off)); 445 /* Q = double (q + (r * y3)) */ 446 emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y3, y, status0, trunc_dbl)); 447 /* Conversion back into DFmode */ 448 emit_insn (gen_truncrfdf2 (operands[0], q_res)); 449 DONE; 450}) 451 452;; Double precision floating point division (minimum latency algorithm). 453 454(define_expand "divdf3_internal_lat" 455 [(set (match_operand:DF 0 "fr_register_operand" "") 456 (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "") 457 (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))] 458 "TARGET_INLINE_FLOAT_DIV" 459{ 460 rtx q_res = gen_reg_rtx (RFmode); 461 rtx a = gen_reg_rtx (RFmode); 462 rtx b = gen_reg_rtx (RFmode); 463 rtx y = gen_reg_rtx (RFmode); 464 rtx e = gen_reg_rtx (RFmode); 465 rtx y1 = gen_reg_rtx (RFmode); 466 rtx e1 = gen_reg_rtx (RFmode); 467 rtx q1 = gen_reg_rtx (RFmode); 468 rtx y2 = gen_reg_rtx (RFmode); 469 rtx e2 = gen_reg_rtx (RFmode); 470 rtx q2 = gen_reg_rtx (RFmode); 471 rtx e3 = gen_reg_rtx (RFmode); 472 rtx q = gen_reg_rtx (RFmode); 473 rtx r1 = gen_reg_rtx (RFmode); 474 rtx cond = gen_reg_rtx (CCImode); 475 rtx zero = CONST0_RTX (RFmode); 476 rtx one = CONST1_RTX (RFmode); 477 rtx status0 = CONST0_RTX (SImode); 478 rtx status1 = CONST1_RTX (SImode); 479 rtx trunc_dbl = CONST1_RTX (SImode); 480 rtx trunc_off = CONST2_RTX (SImode); 481 482 /* Empty conversions to put inputs into RFmode */ 483 emit_insn (gen_extenddfrf2 (a, operands[1])); 484 emit_insn (gen_extenddfrf2 (b, operands[2])); 485 /* y = 1 / b */ 486 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); 487 /* e = 1 - (b * y) */ 488 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); 489 /* q = a * y */ 490 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); 491 /* e2 = e + (e * e) */ 492 emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off)); 493 /* e1 = e * e */ 494 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); 495 /* e3 = e + (e1 * e1) */ 496 emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off)); 497 /* q1 = q + (q * e2) */ 498 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e2, zero, status1, trunc_off)); 499 /* y1 = y + (y * e2) */ 500 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off)); 501 /* q2 = double(q + (q1 * e3)) */ 502 emit_insn (gen_m2addrf4_cond (q2, cond, q, q1, e3, zero, status1, trunc_dbl)); 503 /* y2 = y + (y1 * e3) */ 504 emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off)); 505 /* r1 = a - (b * q2) */ 506 emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q2, zero, status1, trunc_off)); 507 /* Q = double (q2 + (r1 * y2)) */ 508 emit_insn (gen_m2addrf4_cond (q_res, cond, q2, r1, y2, y, status0, trunc_dbl)); 509 /* Conversion back into DFmode */ 510 emit_insn (gen_truncrfdf2 (operands[0], q_res)); 511 DONE; 512}) 513 514;; Extended precision floating point division. 515 516(define_expand "divxf3" 517 [(set (match_operand:XF 0 "fr_register_operand" "") 518 (div:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "") 519 (match_operand:XF 2 "fr_reg_or_fp01_operand" "")))] 520 "TARGET_INLINE_FLOAT_DIV" 521{ 522 rtx q_res = gen_reg_rtx (RFmode); 523 rtx a = gen_reg_rtx (RFmode); 524 rtx b = gen_reg_rtx (RFmode); 525 rtx y = gen_reg_rtx (RFmode); 526 rtx e = gen_reg_rtx (RFmode); 527 rtx y1 = gen_reg_rtx (RFmode); 528 rtx e1 = gen_reg_rtx (RFmode); 529 rtx q1 = gen_reg_rtx (RFmode); 530 rtx y2 = gen_reg_rtx (RFmode); 531 rtx e2 = gen_reg_rtx (RFmode); 532 rtx y3 = gen_reg_rtx (RFmode); 533 rtx e3 = gen_reg_rtx (RFmode); 534 rtx e4 = gen_reg_rtx (RFmode); 535 rtx q = gen_reg_rtx (RFmode); 536 rtx r = gen_reg_rtx (RFmode); 537 rtx r1 = gen_reg_rtx (RFmode); 538 rtx cond = gen_reg_rtx (CCImode); 539 rtx zero = CONST0_RTX (RFmode); 540 rtx one = CONST1_RTX (RFmode); 541 rtx status0 = CONST0_RTX (SImode); 542 rtx status1 = CONST1_RTX (SImode); 543 rtx trunc_off = CONST2_RTX (SImode); 544 545 /* Empty conversions to put inputs into RFmode */ 546 emit_insn (gen_extendxfrf2 (a, operands[1])); 547 emit_insn (gen_extendxfrf2 (b, operands[2])); 548 /* y = 1 / b */ 549 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); 550 /* e = 1 - (b * y) */ 551 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); 552 /* q = a * y */ 553 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); 554 /* e2 = e + (e * e) */ 555 emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off)); 556 /* e1 = e * e */ 557 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); 558 /* y1 = y + (y * e2) */ 559 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off)); 560 /* e3 = e + (e1 * e1) */ 561 emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off)); 562 /* y2 = y + (y1 * e3) */ 563 emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off)); 564 /* r = a - (b * q) */ 565 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off)); 566 /* e4 = 1 - (b * y2) */ 567 emit_insn (gen_m2subrf4_cond (e4, cond, one, b, y2, zero, status1, trunc_off)); 568 /* q1 = q + (r * y2) */ 569 emit_insn (gen_m2addrf4_cond (q1, cond, q, r, y2, zero, status1, trunc_off)); 570 /* y3 = y2 + (y2 * e4) */ 571 emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e4, zero, status1, trunc_off)); 572 /* r1 = a - (b * q1) */ 573 emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q1, zero, status1, trunc_off)); 574 /* Q = q1 + (r1 * y3) */ 575 emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r1, y3, y, status0, trunc_off)); 576 /* Conversion back into XFmode */ 577 emit_insn (gen_truncrfxf2 (operands[0], q_res)); 578 DONE; 579}) 580 581 582;; Integer division operations 583 584(define_expand "divsi3" 585 [(set (match_operand:SI 0 "register_operand" "") 586 (div:SI (match_operand:SI 1 "general_operand" "") 587 (match_operand:SI 2 "general_operand" "")))] 588 "TARGET_INLINE_INT_DIV" 589{ 590 rtx op1_rf, op2_rf, op0_rf, op0_di; 591 592 op0_rf = gen_reg_rtx (RFmode); 593 op0_di = gen_reg_rtx (DImode); 594 595 if (! register_operand (operands[1], SImode)) 596 operands[1] = force_reg (SImode, operands[1]); 597 op1_rf = gen_reg_rtx (RFmode); 598 expand_float (op1_rf, operands[1], 0); 599 600 if (! register_operand (operands[2], SImode)) 601 operands[2] = force_reg (SImode, operands[2]); 602 op2_rf = gen_reg_rtx (RFmode); 603 expand_float (op2_rf, operands[2], 0); 604 605 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode), 606 CONST1_RTX (SImode))); 607 608 emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf)); 609 610 emit_insn (gen_fix_truncrfdi2_alts (op0_di, op0_rf, const1_rtx)); 611 emit_move_insn (operands[0], gen_lowpart (SImode, op0_di)); 612 DONE; 613}) 614 615(define_expand "modsi3" 616 [(set (match_operand:SI 0 "register_operand" "") 617 (mod:SI (match_operand:SI 1 "general_operand" "") 618 (match_operand:SI 2 "general_operand" "")))] 619 "TARGET_INLINE_INT_DIV" 620{ 621 rtx op2_neg, op1_di, div; 622 623 div = gen_reg_rtx (SImode); 624 emit_insn (gen_divsi3 (div, operands[1], operands[2])); 625 626 op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0); 627 628 /* This is a trick to get us to reuse the value that we're sure to 629 have already copied to the FP regs. */ 630 op1_di = gen_reg_rtx (DImode); 631 convert_move (op1_di, operands[1], 0); 632 633 emit_insn (gen_maddsi4 (operands[0], div, op2_neg, 634 gen_lowpart (SImode, op1_di))); 635 DONE; 636}) 637 638(define_expand "udivsi3" 639 [(set (match_operand:SI 0 "register_operand" "") 640 (udiv:SI (match_operand:SI 1 "general_operand" "") 641 (match_operand:SI 2 "general_operand" "")))] 642 "TARGET_INLINE_INT_DIV" 643{ 644 rtx op1_rf, op2_rf, op0_rf, op0_di; 645 646 op0_rf = gen_reg_rtx (RFmode); 647 op0_di = gen_reg_rtx (DImode); 648 649 if (! register_operand (operands[1], SImode)) 650 operands[1] = force_reg (SImode, operands[1]); 651 op1_rf = gen_reg_rtx (RFmode); 652 expand_float (op1_rf, operands[1], 1); 653 654 if (! register_operand (operands[2], SImode)) 655 operands[2] = force_reg (SImode, operands[2]); 656 op2_rf = gen_reg_rtx (RFmode); 657 expand_float (op2_rf, operands[2], 1); 658 659 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode), 660 CONST1_RTX (SImode))); 661 662 emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf)); 663 664 emit_insn (gen_fixuns_truncrfdi2_alts (op0_di, op0_rf, const1_rtx)); 665 emit_move_insn (operands[0], gen_lowpart (SImode, op0_di)); 666 DONE; 667}) 668 669(define_expand "umodsi3" 670 [(set (match_operand:SI 0 "register_operand" "") 671 (umod:SI (match_operand:SI 1 "general_operand" "") 672 (match_operand:SI 2 "general_operand" "")))] 673 "TARGET_INLINE_INT_DIV" 674{ 675 rtx op2_neg, op1_di, div; 676 677 div = gen_reg_rtx (SImode); 678 emit_insn (gen_udivsi3 (div, operands[1], operands[2])); 679 680 op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0); 681 682 /* This is a trick to get us to reuse the value that we're sure to 683 have already copied to the FP regs. */ 684 op1_di = gen_reg_rtx (DImode); 685 convert_move (op1_di, operands[1], 1); 686 687 emit_insn (gen_maddsi4 (operands[0], div, op2_neg, 688 gen_lowpart (SImode, op1_di))); 689 DONE; 690}) 691 692(define_expand "divsi3_internal" 693 [(set (match_operand:RF 0 "fr_register_operand" "") 694 (float:RF (div:SI (match_operand:RF 1 "fr_register_operand" "") 695 (match_operand:RF 2 "fr_register_operand" ""))))] 696 "TARGET_INLINE_INT_DIV" 697{ 698 rtx a = operands[1]; 699 rtx b = operands[2]; 700 rtx y = gen_reg_rtx (RFmode); 701 rtx e = gen_reg_rtx (RFmode); 702 rtx e1 = gen_reg_rtx (RFmode); 703 rtx q = gen_reg_rtx (RFmode); 704 rtx q1 = gen_reg_rtx (RFmode); 705 rtx cond = gen_reg_rtx (CCImode); 706 rtx zero = CONST0_RTX (RFmode); 707 rtx one = CONST1_RTX (RFmode); 708 rtx status1 = CONST1_RTX (SImode); 709 rtx trunc_off = CONST2_RTX (SImode); 710 rtx twon34_exp = gen_reg_rtx (DImode); 711 rtx twon34 = gen_reg_rtx (RFmode); 712 713 /* Load cosntant 2**(-34) */ 714 emit_move_insn (twon34_exp, GEN_INT (65501)); 715 emit_insn (gen_setf_exp_rf (twon34, twon34_exp)); 716 717 /* y = 1 / b */ 718 emit_insn (gen_recip_approx_rf (y, a, b, cond, status1)); 719 /* e = 1 - (b * y) */ 720 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); 721 /* q = a * y */ 722 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); 723 /* q1 = q + (q * e) */ 724 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off)); 725 /* e1 = (2**-34) + (e * e) */ 726 emit_insn (gen_m2addrf4_cond (e1, cond, twon34, e, e, zero, status1, trunc_off)); 727 /* q2 = q1 + (e1 * q1) */ 728 emit_insn (gen_m2addrf4_cond (operands[0], cond, q1, e1, q1, y, status1, trunc_off)); 729 DONE; 730}) 731 732(define_expand "divdi3" 733 [(set (match_operand:DI 0 "register_operand" "") 734 (div:DI (match_operand:DI 1 "general_operand" "") 735 (match_operand:DI 2 "general_operand" "")))] 736 "TARGET_INLINE_INT_DIV" 737{ 738 rtx op1_rf, op2_rf, op0_rf; 739 740 op0_rf = gen_reg_rtx (RFmode); 741 742 if (! register_operand (operands[1], DImode)) 743 operands[1] = force_reg (DImode, operands[1]); 744 op1_rf = gen_reg_rtx (RFmode); 745 expand_float (op1_rf, operands[1], 0); 746 747 if (! register_operand (operands[2], DImode)) 748 operands[2] = force_reg (DImode, operands[2]); 749 op2_rf = gen_reg_rtx (RFmode); 750 expand_float (op2_rf, operands[2], 0); 751 752 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode), 753 CONST1_RTX (DImode))); 754 755 if (TARGET_INLINE_INT_DIV == INL_MIN_LAT) 756 emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf)); 757 else 758 emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf)); 759 760 emit_insn (gen_fix_truncrfdi2_alts (operands[0], op0_rf, const1_rtx)); 761 DONE; 762}) 763 764(define_expand "moddi3" 765 [(set (match_operand:DI 0 "register_operand" "") 766 (mod:SI (match_operand:DI 1 "general_operand" "") 767 (match_operand:DI 2 "general_operand" "")))] 768 "TARGET_INLINE_INT_DIV" 769{ 770 rtx op2_neg, div; 771 772 div = gen_reg_rtx (DImode); 773 emit_insn (gen_divdi3 (div, operands[1], operands[2])); 774 775 op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0); 776 777 emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1])); 778 DONE; 779}) 780 781(define_expand "udivdi3" 782 [(set (match_operand:DI 0 "register_operand" "") 783 (udiv:DI (match_operand:DI 1 "general_operand" "") 784 (match_operand:DI 2 "general_operand" "")))] 785 "TARGET_INLINE_INT_DIV" 786{ 787 rtx op1_rf, op2_rf, op0_rf; 788 789 op0_rf = gen_reg_rtx (RFmode); 790 791 if (! register_operand (operands[1], DImode)) 792 operands[1] = force_reg (DImode, operands[1]); 793 op1_rf = gen_reg_rtx (RFmode); 794 expand_float (op1_rf, operands[1], 1); 795 796 if (! register_operand (operands[2], DImode)) 797 operands[2] = force_reg (DImode, operands[2]); 798 op2_rf = gen_reg_rtx (RFmode); 799 expand_float (op2_rf, operands[2], 1); 800 801 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode), 802 CONST1_RTX (DImode))); 803 804 if (TARGET_INLINE_INT_DIV == INL_MIN_LAT) 805 emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf)); 806 else 807 emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf)); 808 809 emit_insn (gen_fixuns_truncrfdi2_alts (operands[0], op0_rf, const1_rtx)); 810 DONE; 811}) 812 813(define_expand "umoddi3" 814 [(set (match_operand:DI 0 "register_operand" "") 815 (umod:DI (match_operand:DI 1 "general_operand" "") 816 (match_operand:DI 2 "general_operand" "")))] 817 "TARGET_INLINE_INT_DIV" 818{ 819 rtx op2_neg, div; 820 821 div = gen_reg_rtx (DImode); 822 emit_insn (gen_udivdi3 (div, operands[1], operands[2])); 823 824 op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0); 825 826 emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1])); 827 DONE; 828}) 829 830(define_expand "divdi3_internal_lat" 831 [(set (match_operand:RF 0 "fr_register_operand" "") 832 (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "") 833 (match_operand:RF 2 "fr_register_operand" ""))))] 834 "TARGET_INLINE_INT_DIV" 835{ 836 rtx a = operands[1]; 837 rtx b = operands[2]; 838 rtx y = gen_reg_rtx (RFmode); 839 rtx y1 = gen_reg_rtx (RFmode); 840 rtx y2 = gen_reg_rtx (RFmode); 841 rtx e = gen_reg_rtx (RFmode); 842 rtx e1 = gen_reg_rtx (RFmode); 843 rtx q = gen_reg_rtx (RFmode); 844 rtx q1 = gen_reg_rtx (RFmode); 845 rtx q2 = gen_reg_rtx (RFmode); 846 rtx r = gen_reg_rtx (RFmode); 847 rtx cond = gen_reg_rtx (CCImode); 848 rtx zero = CONST0_RTX (RFmode); 849 rtx one = CONST1_RTX (RFmode); 850 rtx status1 = CONST1_RTX (SImode); 851 rtx trunc_off = CONST2_RTX (SImode); 852 853 /* y = 1 / b */ 854 emit_insn (gen_recip_approx_rf (y, a, b, cond, status1)); 855 /* e = 1 - (b * y) */ 856 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); 857 /* q = a * y */ 858 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); 859 /* q1 = q + (q * e) */ 860 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off)); 861 /* e1 = e * e */ 862 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); 863 /* q2 = q1 + (e1 * q1) */ 864 emit_insn (gen_m2addrf4_cond (q2, cond, q1, e1, q1, zero, status1, trunc_off)); 865 /* y1 = y + (y * e) */ 866 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off)); 867 /* r = a - (b * q2) */ 868 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off)); 869 /* y2 = y1 + (y1 * e1) */ 870 emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off)); 871 /* q3 = q2 + (r * y2) */ 872 emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off)); 873 DONE; 874}) 875 876(define_expand "divdi3_internal_thr" 877 [(set (match_operand:RF 0 "fr_register_operand" "") 878 (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "") 879 (match_operand:RF 2 "fr_register_operand" ""))))] 880 "TARGET_INLINE_INT_DIV" 881{ 882 rtx a = operands[1]; 883 rtx b = operands[2]; 884 rtx y = gen_reg_rtx (RFmode); 885 rtx y1 = gen_reg_rtx (RFmode); 886 rtx y2 = gen_reg_rtx (RFmode); 887 rtx e = gen_reg_rtx (RFmode); 888 rtx e1 = gen_reg_rtx (RFmode); 889 rtx q2 = gen_reg_rtx (RFmode); 890 rtx r = gen_reg_rtx (RFmode); 891 rtx cond = gen_reg_rtx (CCImode); 892 rtx zero = CONST0_RTX (RFmode); 893 rtx one = CONST1_RTX (RFmode); 894 rtx status1 = CONST1_RTX (SImode); 895 rtx trunc_off = CONST2_RTX (SImode); 896 897 /* y = 1 / b */ 898 emit_insn (gen_recip_approx_rf (y, a, b, cond, status1)); 899 /* e = 1 - (b * y) */ 900 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); 901 /* y1 = y + (y * e) */ 902 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off)); 903 /* e1 = e * e */ 904 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); 905 /* y2 = y1 + (y1 * e1) */ 906 emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off)); 907 /* q2 = y2 * a */ 908 emit_insn (gen_mulrf3_cond (q2, cond, y2, a, zero, status1, trunc_off)); 909 /* r = a - (b * q2) */ 910 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off)); 911 /* q3 = q2 + (r * y2) */ 912 emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off)); 913 DONE; 914}) 915 916;; SQRT operations 917 918 919(define_insn "sqrt_approx_rf" 920 [(set (match_operand:RF 0 "fr_register_operand" "=f") 921 (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")] 922 UNSPEC_FR_SQRT_RECIP_APPROX_RES)) 923 (set (match_operand:CCI 2 "register_operand" "=c") 924 (unspec:CCI [(match_dup 1)] UNSPEC_FR_SQRT_RECIP_APPROX)) 925 (use (match_operand:SI 3 "const_int_operand" ""))] 926 "" 927 "frsqrta.s%3 %0, %2 = %F1" 928 [(set_attr "itanium_class" "fmisc") 929 (set_attr "predicable" "no")]) 930 931(define_expand "sqrtsf2" 932 [(set (match_operand:SF 0 "fr_register_operand" "=&f") 933 (sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))] 934 "TARGET_INLINE_SQRT" 935{ 936 rtx insn; 937 if (TARGET_INLINE_SQRT == INL_MIN_LAT) 938 insn = gen_sqrtsf2_internal_lat (operands[0], operands[1]); 939 else 940 insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]); 941 emit_insn (insn); 942 DONE; 943}) 944 945(define_expand "sqrtsf2_internal_thr" 946 [(set (match_operand:SF 0 "fr_register_operand" "") 947 (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))] 948 "TARGET_INLINE_SQRT" 949{ 950 rtx y = gen_reg_rtx (RFmode); 951 rtx b = gen_reg_rtx (RFmode); 952 rtx g = gen_reg_rtx (RFmode); 953 rtx e = gen_reg_rtx (RFmode); 954 rtx s = gen_reg_rtx (RFmode); 955 rtx f = gen_reg_rtx (RFmode); 956 rtx y1 = gen_reg_rtx (RFmode); 957 rtx g1 = gen_reg_rtx (RFmode); 958 rtx h = gen_reg_rtx (RFmode); 959 rtx d = gen_reg_rtx (RFmode); 960 rtx g2 = gen_reg_rtx (RFmode); 961 rtx cond = gen_reg_rtx (CCImode); 962 rtx zero = CONST0_RTX (RFmode); 963 rtx one = CONST1_RTX (RFmode); 964 rtx c1 = ia64_dconst_0_5(); 965 rtx c2 = ia64_dconst_0_375(); 966 rtx reg_df_c1 = gen_reg_rtx (DFmode); 967 rtx reg_df_c2 = gen_reg_rtx (DFmode); 968 rtx reg_rf_c1 = gen_reg_rtx (RFmode); 969 rtx reg_rf_c2 = gen_reg_rtx (RFmode); 970 rtx status0 = CONST0_RTX (SImode); 971 rtx status1 = CONST1_RTX (SImode); 972 rtx trunc_sgl = CONST0_RTX (SImode); 973 rtx trunc_off = CONST2_RTX (SImode); 974 975 /* Put needed constants into registers. */ 976 emit_insn (gen_movdf (reg_df_c1, c1)); 977 emit_insn (gen_movdf (reg_df_c2, c2)); 978 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); 979 emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2)); 980 /* Empty conversion to put input into RFmode. */ 981 emit_insn (gen_extendsfrf2 (b, operands[1])); 982 /* y = sqrt (1 / b) */ 983 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); 984 /* g = b * y */ 985 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); 986 /* e = 1 - (g * y) */ 987 emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off)); 988 /* s = 0.5 + (0.375 * e) */ 989 emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off)); 990 /* f = y * e */ 991 emit_insn (gen_mulrf3_cond (f, cond, y, e, zero, status1, trunc_off)); 992 /* y1 = y + (f * s) */ 993 emit_insn (gen_m2addrf4_cond (y1, cond, y, f, s, zero, status1, trunc_off)); 994 /* g1 = single (b * y1) */ 995 emit_insn (gen_mulrf3_cond (g1, cond, b, y1, zero, status1, trunc_sgl)); 996 /* h = 0.5 * y1 */ 997 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y1, zero, status1, trunc_off)); 998 /* d = b - g1 * g1 */ 999 emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off)); 1000 /* g2 = single(g1 + (d * h)) */ 1001 emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h, y, status0, trunc_sgl)); 1002 /* Conversion back into SFmode. */ 1003 emit_insn (gen_truncrfsf2 (operands[0], g2)); 1004 DONE; 1005}) 1006 1007(define_expand "sqrtsf2_internal_lat" 1008 [(set (match_operand:SF 0 "fr_register_operand" "") 1009 (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))] 1010 "TARGET_INLINE_SQRT" 1011{ 1012 rtx y = gen_reg_rtx (RFmode); 1013 rtx b = gen_reg_rtx (RFmode); 1014 rtx g = gen_reg_rtx (RFmode); 1015 rtx g1 = gen_reg_rtx (RFmode); 1016 rtx g2 = gen_reg_rtx (RFmode); 1017 rtx e = gen_reg_rtx (RFmode); 1018 rtx s = gen_reg_rtx (RFmode); 1019 rtx f = gen_reg_rtx (RFmode); 1020 rtx f1 = gen_reg_rtx (RFmode); 1021 rtx h = gen_reg_rtx (RFmode); 1022 rtx h1 = gen_reg_rtx (RFmode); 1023 rtx d = gen_reg_rtx (RFmode); 1024 rtx cond = gen_reg_rtx (CCImode); 1025 rtx zero = CONST0_RTX (RFmode); 1026 rtx one = CONST1_RTX (RFmode); 1027 rtx c1 = ia64_dconst_0_5(); 1028 rtx c2 = ia64_dconst_0_375(); 1029 rtx reg_df_c1 = gen_reg_rtx (DFmode); 1030 rtx reg_df_c2 = gen_reg_rtx (DFmode); 1031 rtx reg_rf_c1 = gen_reg_rtx (RFmode); 1032 rtx reg_rf_c2 = gen_reg_rtx (RFmode); 1033 rtx status0 = CONST0_RTX (SImode); 1034 rtx status1 = CONST1_RTX (SImode); 1035 rtx trunc_sgl = CONST0_RTX (SImode); 1036 rtx trunc_off = CONST2_RTX (SImode); 1037 1038 /* Put needed constants into registers. */ 1039 emit_insn (gen_movdf (reg_df_c1, c1)); 1040 emit_insn (gen_movdf (reg_df_c2, c2)); 1041 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); 1042 emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2)); 1043 /* Empty conversion to put input into RFmode. */ 1044 emit_insn (gen_extendsfrf2 (b, operands[1])); 1045 /* y = sqrt (1 / b) */ 1046 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); 1047 /* g = b * y */ 1048 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); 1049 /* e = 1 - (g * y) */ 1050 emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off)); 1051 /* h = 0.5 * y */ 1052 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off)); 1053 /* s = 0.5 + (0.375 * e) */ 1054 emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off)); 1055 /* f = e * g */ 1056 emit_insn (gen_mulrf3_cond (f, cond, e, g, zero, status1, trunc_off)); 1057 /* g1 = single (g + (f * s)) */ 1058 emit_insn (gen_m2addrf4_cond (g1, cond, g, f, s, zero, status1, trunc_sgl)); 1059 /* f1 = e * h */ 1060 emit_insn (gen_mulrf3_cond (f1, cond, e, h, zero, status1, trunc_off)); 1061 /* d = b - g1 * g1 */ 1062 emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off)); 1063 /* h1 = h + (f1 * s) */ 1064 emit_insn (gen_m2addrf4_cond (h1, cond, h, f1, s, zero, status1, trunc_off)); 1065 /* g2 = single(g1 + (d * h1)) */ 1066 emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h1, y, status0, trunc_sgl)); 1067 /* Conversion back into SFmode. */ 1068 emit_insn (gen_truncrfsf2 (operands[0], g2)); 1069 DONE; 1070}) 1071 1072(define_expand "sqrtdf2" 1073 [(set (match_operand:DF 0 "fr_register_operand" "=&f") 1074 (sqrt:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))] 1075 "TARGET_INLINE_SQRT" 1076{ 1077 rtx insn; 1078#if 0 1079 if (TARGET_INLINE_SQRT == INL_MIN_LAT) 1080 insn = gen_sqrtdf2_internal_lat (operands[0], operands[1]); 1081 else 1082#endif 1083 insn = gen_sqrtdf2_internal_thr (operands[0], operands[1]); 1084 emit_insn (insn); 1085 DONE; 1086}) 1087 1088(define_expand "sqrtdf2_internal_thr" 1089 [(set (match_operand:DF 0 "fr_register_operand" "") 1090 (sqrt:DF (match_operand:DF 1 "fr_register_operand" "")))] 1091 "TARGET_INLINE_SQRT" 1092{ 1093 rtx y = gen_reg_rtx (RFmode); 1094 rtx b = gen_reg_rtx (RFmode); 1095 rtx g = gen_reg_rtx (RFmode); 1096 rtx g1 = gen_reg_rtx (RFmode); 1097 rtx g2 = gen_reg_rtx (RFmode); 1098 rtx g3 = gen_reg_rtx (RFmode); 1099 rtx g4 = gen_reg_rtx (RFmode); 1100 rtx r = gen_reg_rtx (RFmode); 1101 rtx r1 = gen_reg_rtx (RFmode); 1102 rtx h = gen_reg_rtx (RFmode); 1103 rtx h1 = gen_reg_rtx (RFmode); 1104 rtx h2 = gen_reg_rtx (RFmode); 1105 rtx d = gen_reg_rtx (RFmode); 1106 rtx d1 = gen_reg_rtx (RFmode); 1107 rtx cond = gen_reg_rtx (CCImode); 1108 rtx zero = CONST0_RTX (RFmode); 1109 rtx c1 = ia64_dconst_0_5(); 1110 rtx reg_df_c1 = gen_reg_rtx (DFmode); 1111 rtx reg_rf_c1 = gen_reg_rtx (RFmode); 1112 rtx status0 = CONST0_RTX (SImode); 1113 rtx status1 = CONST1_RTX (SImode); 1114 rtx trunc_dbl = CONST1_RTX (SImode); 1115 rtx trunc_off = CONST2_RTX (SImode); 1116 1117 /* Put needed constants into registers. */ 1118 emit_insn (gen_movdf (reg_df_c1, c1)); 1119 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); 1120 /* Empty conversion to put input into RFmode. */ 1121 emit_insn (gen_extenddfrf2 (b, operands[1])); 1122 /* y = sqrt (1 / b) */ 1123 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); 1124 /* g = b * y */ 1125 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); 1126 /* h = 0.5 * y */ 1127 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off)); 1128 /* r = 0.5 - (g * h) */ 1129 emit_insn (gen_m2subrf4_cond (r, cond, reg_rf_c1, g, h, zero, status1, trunc_off)); 1130 /* g1 = g + (g * r) */ 1131 emit_insn (gen_m2addrf4_cond (g1, cond, g, g, r, zero, status1, trunc_off)); 1132 /* h1 = h + (h * r) */ 1133 emit_insn (gen_m2addrf4_cond (h1, cond, h, h, r, zero, status1, trunc_off)); 1134 /* r1 = 0.5 - (g1 * h1) */ 1135 emit_insn (gen_m2subrf4_cond (r1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off)); 1136 /* g2 = g1 + (g1 * r1) */ 1137 emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, r1, zero, status1, trunc_off)); 1138 /* h2 = h1 + (h1 * r1) */ 1139 emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, r1, zero, status1, trunc_off)); 1140 /* d = b - (g2 * g2) */ 1141 emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off)); 1142 /* g3 = g2 + (d * h2) */ 1143 emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off)); 1144 /* d1 = b - (g3 * g3) */ 1145 emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off)); 1146 /* g4 = g3 + (d1 * h2) */ 1147 emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h2, y, status1, trunc_dbl)); 1148 /* Conversion back into SFmode. */ 1149 emit_insn (gen_truncrfdf2 (operands[0], g4)); 1150 DONE; 1151}) 1152 1153(define_expand "sqrtxf2" 1154 [(set (match_operand:XF 0 "fr_register_operand" "") 1155 (sqrt:XF (match_operand:XF 1 "fr_register_operand" "")))] 1156 "TARGET_INLINE_SQRT" 1157{ 1158 rtx y = gen_reg_rtx (RFmode); 1159 rtx b = gen_reg_rtx (RFmode); 1160 rtx g = gen_reg_rtx (RFmode); 1161 rtx g1 = gen_reg_rtx (RFmode); 1162 rtx g2 = gen_reg_rtx (RFmode); 1163 rtx g3 = gen_reg_rtx (RFmode); 1164 rtx g4 = gen_reg_rtx (RFmode); 1165 rtx e = gen_reg_rtx (RFmode); 1166 rtx e1 = gen_reg_rtx (RFmode); 1167 rtx e2 = gen_reg_rtx (RFmode); 1168 rtx h = gen_reg_rtx (RFmode); 1169 rtx h1 = gen_reg_rtx (RFmode); 1170 rtx h2 = gen_reg_rtx (RFmode); 1171 rtx h3 = gen_reg_rtx (RFmode); 1172 rtx d = gen_reg_rtx (RFmode); 1173 rtx d1 = gen_reg_rtx (RFmode); 1174 rtx cond = gen_reg_rtx (CCImode); 1175 rtx zero = CONST0_RTX (RFmode); 1176 rtx c1 = ia64_dconst_0_5(); 1177 rtx reg_df_c1 = gen_reg_rtx (DFmode); 1178 rtx reg_rf_c1 = gen_reg_rtx (RFmode); 1179 rtx status0 = CONST0_RTX (SImode); 1180 rtx status1 = CONST1_RTX (SImode); 1181 rtx trunc_off = CONST2_RTX (SImode); 1182 1183 /* Put needed constants into registers. */ 1184 emit_insn (gen_movdf (reg_df_c1, c1)); 1185 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); 1186 /* Empty conversion to put input into RFmode. */ 1187 emit_insn (gen_extendxfrf2 (b, operands[1])); 1188 /* y = sqrt (1 / b) */ 1189 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); 1190 /* g = b * y */ 1191 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); 1192 /* h = 0.5 * y */ 1193 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off)); 1194 /* e = 0.5 - (g * h) */ 1195 emit_insn (gen_m2subrf4_cond (e, cond, reg_rf_c1, g, h, zero, status1, trunc_off)); 1196 /* g1 = g + (g * e) */ 1197 emit_insn (gen_m2addrf4_cond (g1, cond, g, g, e, zero, status1, trunc_off)); 1198 /* h1 = h + (h * e) */ 1199 emit_insn (gen_m2addrf4_cond (h1, cond, h, h, e, zero, status1, trunc_off)); 1200 /* e1 = 0.5 - (g1 * h1) */ 1201 emit_insn (gen_m2subrf4_cond (e1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off)); 1202 /* g2 = g1 + (g1 * e1) */ 1203 emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, e1, zero, status1, trunc_off)); 1204 /* h2 = h1 + (h1 * e1) */ 1205 emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, e1, zero, status1, trunc_off)); 1206 /* d = b - (g2 * g2) */ 1207 emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off)); 1208 /* e2 = 0.5 - (g2 * h2) */ 1209 emit_insn (gen_m2subrf4_cond (e2, cond, reg_rf_c1, g2, h2, zero, status1, trunc_off)); 1210 /* g3 = g2 + (d * h2) */ 1211 emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off)); 1212 /* h3 = h2 + (e2 * h2) */ 1213 emit_insn (gen_m2addrf4_cond (h3, cond, h2, e2, h2, zero, status1, trunc_off)); 1214 /* d1 = b - (g3 * g3) */ 1215 emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off)); 1216 /* g4 = g3 + (d1 * h3) */ 1217 emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h3, y, status1, trunc_off)); 1218 /* Conversion back into SFmode. */ 1219 emit_insn (gen_truncrfxf2 (operands[0], g4)); 1220 DONE; 1221}) 1222