1;; Copyright (C) 2007-2021 Free Software Foundation, Inc.
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation; either version 3, or (at your option)
8;; any later version.
9;;
10;; GCC is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13;; GNU General Public License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3.  If not see
17;; <http://www.gnu.org/licenses/>.
18
19;; For the internal conditional math routines:
20
21;; operand 0 is always the result
22;; operand 1 is always the predicate
23;; operand 2, 3, and sometimes 4 are the input values.
24;; operand 4 or 5 is the floating point status register to use.
25;; operand 5 or 6 is the rounding to do. (0 = single, 1 = double, 2 = none)
26;;
27;; addrf3_cond   - F0 = F2 + F3
28;; subrf3_cond   - F0 = F2 - F3
29;; mulrf3_cond   - F0 = F2 * F3
30;; nmulrf3_cond  - F0 = - (F2 * F3)
31;; m1addrf4_cond - F0 = (F2 * F3) + F4
32;; m1subrf4_cond - F0 = (F2 * F3) - F4
33;; m2addrf4_cond - F0 = F2 + (F3 * F4)
34;; m2subrf4_cond - F0 = F2 - (F3 * F4)
35
36;; Basic plus/minus/mult operations
37
38(define_insn "addrf3_cond"
39  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
40        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
41                                (const_int 0))
42          (plus:RF
43            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
44            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
45          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
46   (use (match_operand:SI 5 "const_int_operand" ""))
47   (use (match_operand:SI 6 "const_int_operand" ""))]
48  ""
49  "(%1) fadd%R6.s%5 %0 = %F2, %F3"
50  [(set_attr "itanium_class" "fmac")
51   (set_attr "predicable" "no")])
52
53(define_insn "subrf3_cond"
54  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
55        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
56                                (const_int 0))
57          (minus:RF
58            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
59            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
60          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
61   (use (match_operand:SI 5 "const_int_operand" ""))
62   (use (match_operand:SI 6 "const_int_operand" ""))]
63  ""
64  "(%1) fsub%R6.s%5 %0 = %F2, %F3"
65  [(set_attr "itanium_class" "fmac")
66   (set_attr "predicable" "no")])
67
68(define_insn "mulrf3_cond"
69  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
70        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
71                                (const_int 0))
72          (mult:RF
73            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
74            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
75          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
76   (use (match_operand:SI 5 "const_int_operand" ""))
77   (use (match_operand:SI 6 "const_int_operand" ""))]
78  ""
79  "(%1) fmpy%R6.s%5 %0 = %F2, %F3"
80  [(set_attr "itanium_class" "fmac")
81   (set_attr "predicable" "no")])
82
83;; neg-mult operation
84
85(define_insn "nmulrf3_cond"
86  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
87        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
88                                (const_int 0))
89          (neg:RF (mult:RF
90            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
91            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")))
92          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
93   (use (match_operand:SI 5 "const_int_operand" ""))
94   (use (match_operand:SI 6 "const_int_operand" ""))]
95  ""
96  "(%1) fnmpy%R6.s%5 %0 = %F2, %F3"
97  [(set_attr "itanium_class" "fmac")
98   (set_attr "predicable" "no")])
99
100;; add-mult/sub-mult operations (mult as op1)
101
102(define_insn "m1addrf4_cond"
103  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
104        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
105                                (const_int 0))
106          (plus:RF
107            (mult:RF
108              (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
109              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
110            (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
111          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
112   (use (match_operand:SI 6 "const_int_operand" ""))
113   (use (match_operand:SI 7 "const_int_operand" ""))]
114  ""
115  "(%1) fma%R7.s%6 %0 = %F2, %F3, %F4"
116  [(set_attr "itanium_class" "fmac")
117   (set_attr "predicable" "no")])
118
119(define_insn "m1subrf4_cond"
120  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
121        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
122                                (const_int 0))
123          (minus:RF
124            (mult:RF
125              (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
126              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
127            (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
128          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
129   (use (match_operand:SI 6 "const_int_operand" ""))
130   (use (match_operand:SI 7 "const_int_operand" ""))]
131  ""
132  "(%1) fms%R7.s%6 %0 = %F2, %F3, %F4"
133  [(set_attr "itanium_class" "fmac")
134   (set_attr "predicable" "no")])
135
136;; add-mult/sub-mult operations (mult as op2)
137
138(define_insn "m2addrf4_cond"
139  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
140        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
141                                (const_int 0))
142          (plus:RF
143            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
144            (mult:RF
145              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
146              (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
147          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
148   (use (match_operand:SI 6 "const_int_operand" ""))
149   (use (match_operand:SI 7 "const_int_operand" ""))]
150  ""
151  "(%1) fma%R7.s%6 %0 = %F3, %F4, %F2"
152  [(set_attr "itanium_class" "fmac")
153   (set_attr "predicable" "no")])
154
155(define_insn "m2subrf4_cond"
156  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
157        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
158                                (const_int 0))
159          (minus:RF
160            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
161            (mult:RF
162              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
163              (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
164          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
165   (use (match_operand:SI 6 "const_int_operand" ""))
166   (use (match_operand:SI 7 "const_int_operand" ""))]
167  ""
168  "(%1) fnma%R7.s%6 %0 = %F3, %F4, %F2"
169  [(set_attr "itanium_class" "fmac")
170   (set_attr "predicable" "no")])
171
172;; Conversions to/from RF and SF/DF/XF
173;; These conversions should not generate any code but make it possible
174;; for all the instructions used to implement floating point division
175;; to be written for RFmode only and to not have to handle multiple
176;; modes or to have to handle a register in more than one mode.
177
178(define_mode_iterator SDX_F [SF DF XF])
179
180(define_insn "extend<mode>rf2"
181  [(set (match_operand:RF 0 "fr_register_operand" "=f")
182        (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "fG")))]
183  ""
184  "#"
185  [(set_attr "itanium_class" "fmisc")
186   (set_attr "predicable" "yes")])
187
188(define_split
189  [(set (match_operand:RF 0 "fr_register_operand" "")
190        (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "")))]
191   "reload_completed"
192   [(set (match_dup 0) (match_dup 2))]
193{
194   if (operands[1] == CONST0_RTX (<MODE>mode))
195     operands[2] = gen_rtx_REG (RFmode, FR_REG (0));
196   else if (operands[1] == CONST1_RTX (<MODE>mode))
197     operands[2] = gen_rtx_REG (RFmode, FR_REG (1));
198   else
199     operands[2] = gen_rtx_REG (RFmode, REGNO (operands[1]));
200})
201
202
203(define_insn "truncrf<mode>2"
204  [(set (match_operand:SDX_F 0 "fr_register_operand" "=f")
205        (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))]
206  ""
207  "#"
208  [(set_attr "itanium_class" "fmisc")
209   (set_attr "predicable" "yes")])
210
211(define_split
212  [(set (match_operand:SDX_F 0 "fr_register_operand" "")
213        (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "")))]
214   "reload_completed"
215   [(set (match_dup 0) (match_dup 2))]
216{
217   if (operands[1] == CONST0_RTX (RFmode))
218     operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (0));
219   else if (operands[1] == CONST1_RTX (RFmode))
220     operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (1));
221   else
222     operands[2] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
223})
224
225;; Float to integer truncations using an alternative status register.
226
227(define_insn "fix_truncrfdi2_alts"
228  [(set (match_operand:DI 0 "fr_register_operand" "=f")
229        (fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
230   (use (match_operand:SI 2 "const_int_operand" ""))]
231  ""
232  "fcvt.fx.trunc.s%2 %0 = %1"
233  [(set_attr "itanium_class" "fcvtfx")])
234
235(define_insn "fixuns_truncrfdi2_alts"
236  [(set (match_operand:DI 0 "fr_register_operand" "=f")
237        (unsigned_fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
238   (use (match_operand:SI 2 "const_int_operand" ""))]
239  ""
240  "fcvt.fxu.trunc.s%2 %0 = %1"
241  [(set_attr "itanium_class" "fcvtfx")])
242
243(define_insn "setf_exp_rf"
244  [(set (match_operand:RF 0 "fr_register_operand" "=f")
245        (unspec:RF [(match_operand:DI 1 "register_operand" "r")]
246                  UNSPEC_SETF_EXP))]
247  ""
248  "setf.exp %0 = %1"
249  [(set_attr "itanium_class" "frfr")])
250
251;; Reciprocal approximation
252
253(define_insn "recip_approx_rf"
254  [(set (match_operand:RF 0 "fr_register_operand" "=f")
255        (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")
256		    (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG")]
257		   UNSPEC_FR_RECIP_APPROX_RES))
258   (set (match_operand:CCI 3 "register_operand" "=c")
259        (unspec:CCI [(match_dup 1) (match_dup 2)] UNSPEC_FR_RECIP_APPROX))
260   (use (match_operand:SI 4 "const_int_operand" ""))]
261  ""
262  "frcpa.s%4 %0, %3 = %F1, %F2"
263  [(set_attr "itanium_class" "fmisc")
264   (set_attr "predicable" "no")])
265
266;; Single precision floating point division
267
268(define_expand "divsf3"
269  [(set (match_operand:SF 0 "fr_register_operand" "")
270	(div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
271		(match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
272  "TARGET_INLINE_FLOAT_DIV"
273{
274  rtx insn;
275  if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
276    insn = gen_divsf3_internal_lat (operands[0], operands[1], operands[2]);
277  else
278    insn = gen_divsf3_internal_thr (operands[0], operands[1], operands[2]);
279  emit_insn (insn);
280  DONE;
281})
282
283;; Single precision floating point division (maximum throughput algorithm).
284
285(define_expand "divsf3_internal_thr"
286  [(set (match_operand:SF 0 "fr_register_operand" "")
287        (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
288                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
289  "TARGET_INLINE_FLOAT_DIV"
290{
291  rtx y     = gen_reg_rtx (RFmode);
292  rtx a     = gen_reg_rtx (RFmode);
293  rtx b     = gen_reg_rtx (RFmode);
294  rtx e     = gen_reg_rtx (RFmode);
295  rtx y1    = gen_reg_rtx (RFmode);
296  rtx y2    = gen_reg_rtx (RFmode);
297  rtx q     = gen_reg_rtx (RFmode);
298  rtx r     = gen_reg_rtx (RFmode);
299  rtx q_res = gen_reg_rtx (RFmode);
300  rtx cond  = gen_reg_rtx (CCImode);
301  rtx zero    = CONST0_RTX (RFmode);
302  rtx one     = CONST1_RTX (RFmode);
303  rtx status0 = CONST0_RTX (SImode);
304  rtx status1 = CONST1_RTX (SImode);
305  rtx trunc_sgl = CONST0_RTX (SImode);
306  rtx trunc_off    = CONST2_RTX (SImode);
307
308  /* Empty conversions to put inputs into RFmode.  */
309  emit_insn (gen_extendsfrf2 (a, operands[1]));
310  emit_insn (gen_extendsfrf2 (b, operands[2]));
311  /* y = 1 / b				*/
312  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
313  /* e = 1 - (b * y)			*/
314  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
315  /* y1 = y + (y * e)			*/
316  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
317  /* y2 = y + (y1 * e)			*/
318  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e, zero, status1, trunc_off));
319  /* q = single(a * y2)			*/
320  emit_insn (gen_mulrf3_cond (q, cond, a, y2, zero, status1, trunc_sgl));
321  /* r = a - (q * b)			*/
322  emit_insn (gen_m2subrf4_cond (r, cond, a, q, b, zero, status1, trunc_off));
323  /* Q = single (q + (r * y2))		*/
324  emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y2, y, status0, trunc_sgl));
325  /* Conversion back into SFmode.	*/
326  emit_insn (gen_truncrfsf2 (operands[0], q_res));
327  DONE;
328})
329
330;; Single precision floating point division (minimum latency algorithm).
331
332(define_expand "divsf3_internal_lat"
333  [(set (match_operand:SF 0 "fr_register_operand" "")
334        (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
335                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
336  "TARGET_INLINE_FLOAT_DIV"
337{
338  rtx y         = gen_reg_rtx (RFmode);
339  rtx a         = gen_reg_rtx (RFmode);
340  rtx b         = gen_reg_rtx (RFmode);
341  rtx e         = gen_reg_rtx (RFmode);
342  rtx q         = gen_reg_rtx (RFmode);
343  rtx e1        = gen_reg_rtx (RFmode);
344  rtx y1        = gen_reg_rtx (RFmode);
345  rtx q1        = gen_reg_rtx (RFmode);
346  rtx r         = gen_reg_rtx (RFmode);
347  rtx q_res     = gen_reg_rtx (RFmode);
348  rtx cond      = gen_reg_rtx (CCImode);
349  rtx zero      = CONST0_RTX (RFmode);
350  rtx one       = CONST1_RTX (RFmode);
351  rtx status0   = CONST0_RTX (SImode);
352  rtx status1   = CONST1_RTX (SImode);
353  rtx trunc_sgl = CONST0_RTX (SImode);
354  rtx trunc_off = CONST2_RTX (SImode);
355
356  /* Empty conversions to put inputs into RFmode.  */
357  emit_insn (gen_extendsfrf2 (a, operands[1]));
358  emit_insn (gen_extendsfrf2 (b, operands[2]));
359  /* y = 1 / b				*/
360  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
361  /* q = a * y				*/
362  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
363  /* e = 1 - (b * y)			*/
364  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
365  /* e1 = e + (e * e)			*/
366  emit_insn (gen_m2addrf4_cond (e1, cond, e, e, e, zero, status1, trunc_off));
367  /* q1 = single(q + (q * e1))		*/
368  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e1, zero, status1, trunc_sgl));
369  /* y1 = y + (y * e1)			*/
370  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e1, zero, status1, trunc_off));
371  /* r = a - (q1 * b)			*/
372  emit_insn (gen_m2subrf4_cond (r, cond, a, q1, b, zero, status1, trunc_off));
373  /* Q = single (q1 + (r * y1))		*/
374  emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r, y1, y, status0, trunc_sgl));
375  /* Conversion back into SFmode.	*/
376  emit_insn (gen_truncrfsf2 (operands[0], q_res));
377  DONE;
378})
379
380;; Double precision floating point division
381
382(define_expand "divdf3"
383  [(set (match_operand:DF 0 "fr_register_operand" "")
384	(div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
385		(match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
386  "TARGET_INLINE_FLOAT_DIV"
387{
388  rtx insn;
389  if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
390    insn = gen_divdf3_internal_lat (operands[0], operands[1], operands[2]);
391  else
392    insn = gen_divdf3_internal_thr (operands[0], operands[1], operands[2]);
393  emit_insn (insn);
394  DONE;
395})
396
397;; Double precision floating point division (maximum throughput algorithm).
398
399(define_expand "divdf3_internal_thr"
400  [(set (match_operand:DF 0 "fr_register_operand" "")
401        (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
402                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
403  "TARGET_INLINE_FLOAT_DIV"
404{
405  rtx q_res = gen_reg_rtx (RFmode);
406  rtx a     = gen_reg_rtx (RFmode);
407  rtx b     = gen_reg_rtx (RFmode);
408  rtx y     = gen_reg_rtx (RFmode);
409  rtx e     = gen_reg_rtx (RFmode);
410  rtx y1    = gen_reg_rtx (RFmode);
411  rtx e1    = gen_reg_rtx (RFmode);
412  rtx y2    = gen_reg_rtx (RFmode);
413  rtx e2    = gen_reg_rtx (RFmode);
414  rtx y3    = gen_reg_rtx (RFmode);
415  rtx q     = gen_reg_rtx (RFmode);
416  rtx r     = gen_reg_rtx (RFmode);
417  rtx cond  = gen_reg_rtx (CCImode);
418  rtx zero    = CONST0_RTX (RFmode);
419  rtx one     = CONST1_RTX (RFmode);
420  rtx status0 = CONST0_RTX (SImode);
421  rtx status1 = CONST1_RTX (SImode);
422  rtx trunc_dbl = CONST1_RTX (SImode);
423  rtx trunc_off = CONST2_RTX (SImode);
424  /* Empty conversions to put inputs into RFmode */
425  emit_insn (gen_extenddfrf2 (a, operands[1]));
426  emit_insn (gen_extenddfrf2 (b, operands[2]));
427  /* y  = 1 / b			*/
428  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
429  /* e  = 1 - (b * y)		*/
430  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
431  /* y1 = y + (y * e)		*/
432  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
433  /* e1 = e * e			*/
434  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
435  /* y2 = y1 + (y1 * e1)	*/
436  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
437  /* e2 = e1 * e1		*/
438  emit_insn (gen_mulrf3_cond (e2, cond, e1, e1, zero, status1, trunc_off));
439  /* y3 = y2 + (y2 * e2)	*/
440  emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e2, zero, status1, trunc_off));
441  /* q  = double (a * y3)	*/
442  emit_insn (gen_mulrf3_cond (q, cond, a, y3, zero, status1, trunc_dbl));
443  /* r  = a - (b * q)		*/
444  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
445  /* Q  = double (q + (r * y3))	*/
446  emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y3, y, status0, trunc_dbl));
447  /* Conversion back into DFmode */
448  emit_insn (gen_truncrfdf2 (operands[0], q_res));
449  DONE;
450})
451
452;; Double precision floating point division (minimum latency algorithm).
453
454(define_expand "divdf3_internal_lat"
455  [(set (match_operand:DF 0 "fr_register_operand" "")
456        (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
457                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
458  "TARGET_INLINE_FLOAT_DIV"
459{
460  rtx q_res     = gen_reg_rtx (RFmode);
461  rtx a         = gen_reg_rtx (RFmode);
462  rtx b         = gen_reg_rtx (RFmode);
463  rtx y         = gen_reg_rtx (RFmode);
464  rtx e         = gen_reg_rtx (RFmode);
465  rtx y1        = gen_reg_rtx (RFmode);
466  rtx e1        = gen_reg_rtx (RFmode);
467  rtx q1        = gen_reg_rtx (RFmode);
468  rtx y2        = gen_reg_rtx (RFmode);
469  rtx e2        = gen_reg_rtx (RFmode);
470  rtx q2        = gen_reg_rtx (RFmode);
471  rtx e3        = gen_reg_rtx (RFmode);
472  rtx q         = gen_reg_rtx (RFmode);
473  rtx r1        = gen_reg_rtx (RFmode);
474  rtx cond      = gen_reg_rtx (CCImode);
475  rtx zero      = CONST0_RTX (RFmode);
476  rtx one       = CONST1_RTX (RFmode);
477  rtx status0   = CONST0_RTX (SImode);
478  rtx status1   = CONST1_RTX (SImode);
479  rtx trunc_dbl = CONST1_RTX (SImode);
480  rtx trunc_off = CONST2_RTX (SImode);
481
482  /* Empty conversions to put inputs into RFmode */
483  emit_insn (gen_extenddfrf2 (a, operands[1]));
484  emit_insn (gen_extenddfrf2 (b, operands[2]));
485  /* y  = 1 / b			*/
486  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
487  /* e  = 1 - (b * y)		*/
488  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
489  /* q  = a * y                 */
490  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
491  /* e2 = e + (e * e)		*/
492  emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
493  /* e1 = e * e                 */
494  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
495  /* e3 = e + (e1 * e1)		*/
496  emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
497  /* q1 = q + (q * e2)		*/
498  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e2, zero, status1, trunc_off));
499  /* y1 = y + (y * e2)		*/
500  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
501  /* q2 = double(q + (q1 * e3))	*/
502  emit_insn (gen_m2addrf4_cond (q2, cond, q, q1, e3, zero, status1, trunc_dbl));
503  /* y2 = y + (y1 * e3)		*/
504  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
505  /* r1  = a - (b * q2)		*/
506  emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q2, zero, status1, trunc_off));
507  /* Q  = double (q2 + (r1 * y2))	*/
508  emit_insn (gen_m2addrf4_cond (q_res, cond, q2, r1, y2, y, status0, trunc_dbl));
509  /* Conversion back into DFmode */
510  emit_insn (gen_truncrfdf2 (operands[0], q_res));
511  DONE;
512})
513
514;; Extended precision floating point division.
515
516(define_expand "divxf3"
517  [(set (match_operand:XF 0 "fr_register_operand" "")
518        (div:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "")
519                (match_operand:XF 2 "fr_reg_or_fp01_operand" "")))]
520  "TARGET_INLINE_FLOAT_DIV"
521{
522  rtx q_res     = gen_reg_rtx (RFmode);
523  rtx a         = gen_reg_rtx (RFmode);
524  rtx b         = gen_reg_rtx (RFmode);
525  rtx y         = gen_reg_rtx (RFmode);
526  rtx e         = gen_reg_rtx (RFmode);
527  rtx y1        = gen_reg_rtx (RFmode);
528  rtx e1        = gen_reg_rtx (RFmode);
529  rtx q1        = gen_reg_rtx (RFmode);
530  rtx y2        = gen_reg_rtx (RFmode);
531  rtx e2        = gen_reg_rtx (RFmode);
532  rtx y3        = gen_reg_rtx (RFmode);
533  rtx e3        = gen_reg_rtx (RFmode);
534  rtx e4        = gen_reg_rtx (RFmode);
535  rtx q         = gen_reg_rtx (RFmode);
536  rtx r         = gen_reg_rtx (RFmode);
537  rtx r1        = gen_reg_rtx (RFmode);
538  rtx cond      = gen_reg_rtx (CCImode);
539  rtx zero      = CONST0_RTX (RFmode);
540  rtx one       = CONST1_RTX (RFmode);
541  rtx status0   = CONST0_RTX (SImode);
542  rtx status1   = CONST1_RTX (SImode);
543  rtx trunc_off = CONST2_RTX (SImode);
544
545  /* Empty conversions to put inputs into RFmode */
546  emit_insn (gen_extendxfrf2 (a, operands[1]));
547  emit_insn (gen_extendxfrf2 (b, operands[2]));
548  /* y  = 1 / b			*/
549  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
550  /* e  = 1 - (b * y)		*/
551  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
552  /* q  = a * y                 */
553  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
554  /* e2 = e + (e * e)		*/
555  emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
556  /* e1 = e * e                 */
557  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
558  /* y1 = y + (y * e2)		*/
559  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
560  /* e3 = e + (e1 * e1)		*/
561  emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
562  /* y2 = y + (y1 * e3)		*/
563  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
564  /* r  = a - (b * q)		*/
565  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
566  /* e4  = 1 - (b * y2)		*/
567  emit_insn (gen_m2subrf4_cond (e4, cond, one, b, y2, zero, status1, trunc_off));
568  /* q1 = q + (r * y2)		*/
569  emit_insn (gen_m2addrf4_cond (q1, cond, q, r, y2, zero, status1, trunc_off));
570  /* y3 = y2 + (y2 * e4)	*/
571  emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e4, zero, status1, trunc_off));
572  /* r1  = a - (b * q1)		*/
573  emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q1, zero, status1, trunc_off));
574  /* Q  = q1 + (r1 * y3)	*/
575  emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r1, y3, y, status0, trunc_off));
576  /* Conversion back into XFmode */
577  emit_insn (gen_truncrfxf2 (operands[0], q_res));
578  DONE;
579})
580
581
582;; Integer division operations
583
584(define_expand "divsi3"
585  [(set (match_operand:SI 0 "register_operand" "")
586	(div:SI (match_operand:SI 1 "general_operand" "")
587		(match_operand:SI 2 "general_operand" "")))]
588  "TARGET_INLINE_INT_DIV"
589{
590  rtx op1_rf, op2_rf, op0_rf, op0_di;
591
592  op0_rf = gen_reg_rtx (RFmode);
593  op0_di = gen_reg_rtx (DImode);
594
595  if (! register_operand (operands[1], SImode))
596    operands[1] = force_reg (SImode, operands[1]);
597  op1_rf = gen_reg_rtx (RFmode);
598  expand_float (op1_rf, operands[1], 0);
599
600  if (! register_operand (operands[2], SImode))
601    operands[2] = force_reg (SImode, operands[2]);
602  op2_rf = gen_reg_rtx (RFmode);
603  expand_float (op2_rf, operands[2], 0);
604
605  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
606			    CONST1_RTX (SImode)));
607
608  emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
609
610  emit_insn (gen_fix_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
611  emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
612  DONE;
613})
614
615(define_expand "modsi3"
616  [(set (match_operand:SI 0 "register_operand" "")
617	(mod:SI (match_operand:SI 1 "general_operand" "")
618		(match_operand:SI 2 "general_operand" "")))]
619  "TARGET_INLINE_INT_DIV"
620{
621  rtx op2_neg, op1_di, div;
622
623  div = gen_reg_rtx (SImode);
624  emit_insn (gen_divsi3 (div, operands[1], operands[2]));
625
626  op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
627
628  /* This is a trick to get us to reuse the value that we're sure to
629     have already copied to the FP regs.  */
630  op1_di = gen_reg_rtx (DImode);
631  convert_move (op1_di, operands[1], 0);
632
633  emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
634			  gen_lowpart (SImode, op1_di)));
635  DONE;
636})
637
638(define_expand "udivsi3"
639  [(set (match_operand:SI 0 "register_operand" "")
640	(udiv:SI (match_operand:SI 1 "general_operand" "")
641		 (match_operand:SI 2 "general_operand" "")))]
642  "TARGET_INLINE_INT_DIV"
643{
644  rtx op1_rf, op2_rf, op0_rf, op0_di;
645
646  op0_rf = gen_reg_rtx (RFmode);
647  op0_di = gen_reg_rtx (DImode);
648
649  if (! register_operand (operands[1], SImode))
650    operands[1] = force_reg (SImode, operands[1]);
651  op1_rf = gen_reg_rtx (RFmode);
652  expand_float (op1_rf, operands[1], 1);
653
654  if (! register_operand (operands[2], SImode))
655    operands[2] = force_reg (SImode, operands[2]);
656  op2_rf = gen_reg_rtx (RFmode);
657  expand_float (op2_rf, operands[2], 1);
658
659  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
660                            CONST1_RTX (SImode)));
661
662  emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
663
664  emit_insn (gen_fixuns_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
665  emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
666  DONE;
667})
668
669(define_expand "umodsi3"
670  [(set (match_operand:SI 0 "register_operand" "")
671	(umod:SI (match_operand:SI 1 "general_operand" "")
672		 (match_operand:SI 2 "general_operand" "")))]
673  "TARGET_INLINE_INT_DIV"
674{
675  rtx op2_neg, op1_di, div;
676
677  div = gen_reg_rtx (SImode);
678  emit_insn (gen_udivsi3 (div, operands[1], operands[2]));
679
680  op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
681
682  /* This is a trick to get us to reuse the value that we're sure to
683     have already copied to the FP regs.  */
684  op1_di = gen_reg_rtx (DImode);
685  convert_move (op1_di, operands[1], 1);
686
687  emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
688			  gen_lowpart (SImode, op1_di)));
689  DONE;
690})
691
692(define_expand "divsi3_internal"
693  [(set (match_operand:RF 0 "fr_register_operand" "")
694        (float:RF (div:SI (match_operand:RF 1 "fr_register_operand" "")
695                          (match_operand:RF 2 "fr_register_operand" ""))))]
696  "TARGET_INLINE_INT_DIV"
697{
698  rtx a         = operands[1];
699  rtx b         = operands[2];
700  rtx y         = gen_reg_rtx (RFmode);
701  rtx e         = gen_reg_rtx (RFmode);
702  rtx e1        = gen_reg_rtx (RFmode);
703  rtx q         = gen_reg_rtx (RFmode);
704  rtx q1        = gen_reg_rtx (RFmode);
705  rtx cond      = gen_reg_rtx (CCImode);
706  rtx zero      = CONST0_RTX (RFmode);
707  rtx one       = CONST1_RTX (RFmode);
708  rtx status1   = CONST1_RTX (SImode);
709  rtx trunc_off = CONST2_RTX (SImode);
710  rtx twon34_exp = gen_reg_rtx (DImode);
711  rtx twon34    = gen_reg_rtx (RFmode);
712
713  /* Load cosntant 2**(-34) */
714  emit_move_insn (twon34_exp, GEN_INT (65501));
715  emit_insn (gen_setf_exp_rf (twon34, twon34_exp));
716
717  /* y  = 1 / b			*/
718  emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
719  /* e  = 1 - (b * y)		*/
720  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
721  /* q  = a * y                 */
722  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
723  /* q1 = q + (q * e)		*/
724  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
725  /* e1 = (2**-34) + (e * e)		*/
726  emit_insn (gen_m2addrf4_cond (e1, cond, twon34, e, e, zero, status1, trunc_off));
727  /* q2 = q1 + (e1 * q1)		*/
728  emit_insn (gen_m2addrf4_cond (operands[0], cond, q1, e1, q1, y, status1, trunc_off));
729  DONE;
730})
731
732(define_expand "divdi3"
733  [(set (match_operand:DI 0 "register_operand" "")
734	(div:DI (match_operand:DI 1 "general_operand" "")
735		(match_operand:DI 2 "general_operand" "")))]
736  "TARGET_INLINE_INT_DIV"
737{
738  rtx op1_rf, op2_rf, op0_rf;
739
740  op0_rf = gen_reg_rtx (RFmode);
741
742  if (! register_operand (operands[1], DImode))
743    operands[1] = force_reg (DImode, operands[1]);
744  op1_rf = gen_reg_rtx (RFmode);
745  expand_float (op1_rf, operands[1], 0);
746
747  if (! register_operand (operands[2], DImode))
748    operands[2] = force_reg (DImode, operands[2]);
749  op2_rf = gen_reg_rtx (RFmode);
750  expand_float (op2_rf, operands[2], 0);
751
752  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
753                            CONST1_RTX (DImode)));
754
755  if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
756    emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
757  else
758    emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
759
760  emit_insn (gen_fix_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
761  DONE;
762})
763
764(define_expand "moddi3"
765  [(set (match_operand:DI 0 "register_operand" "")
766	(mod:SI (match_operand:DI 1 "general_operand" "")
767		(match_operand:DI 2 "general_operand" "")))]
768  "TARGET_INLINE_INT_DIV"
769{
770  rtx op2_neg, div;
771
772  div = gen_reg_rtx (DImode);
773  emit_insn (gen_divdi3 (div, operands[1], operands[2]));
774
775  op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
776
777  emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
778  DONE;
779})
780
781(define_expand "udivdi3"
782  [(set (match_operand:DI 0 "register_operand" "")
783	(udiv:DI (match_operand:DI 1 "general_operand" "")
784		 (match_operand:DI 2 "general_operand" "")))]
785  "TARGET_INLINE_INT_DIV"
786{
787  rtx op1_rf, op2_rf, op0_rf;
788
789  op0_rf = gen_reg_rtx (RFmode);
790
791  if (! register_operand (operands[1], DImode))
792    operands[1] = force_reg (DImode, operands[1]);
793  op1_rf = gen_reg_rtx (RFmode);
794  expand_float (op1_rf, operands[1], 1);
795
796  if (! register_operand (operands[2], DImode))
797    operands[2] = force_reg (DImode, operands[2]);
798  op2_rf = gen_reg_rtx (RFmode);
799  expand_float (op2_rf, operands[2], 1);
800
801  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
802                            CONST1_RTX (DImode)));
803
804  if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
805    emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
806  else
807    emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
808
809  emit_insn (gen_fixuns_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
810  DONE;
811})
812
813(define_expand "umoddi3"
814  [(set (match_operand:DI 0 "register_operand" "")
815	(umod:DI (match_operand:DI 1 "general_operand" "")
816		 (match_operand:DI 2 "general_operand" "")))]
817  "TARGET_INLINE_INT_DIV"
818{
819  rtx op2_neg, div;
820
821  div = gen_reg_rtx (DImode);
822  emit_insn (gen_udivdi3 (div, operands[1], operands[2]));
823
824  op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
825
826  emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
827  DONE;
828})
829
830(define_expand "divdi3_internal_lat"
831  [(set (match_operand:RF 0 "fr_register_operand" "")
832        (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
833                          (match_operand:RF 2 "fr_register_operand" ""))))]
834  "TARGET_INLINE_INT_DIV"
835{
836  rtx a         = operands[1];
837  rtx b         = operands[2];
838  rtx y         = gen_reg_rtx (RFmode);
839  rtx y1        = gen_reg_rtx (RFmode);
840  rtx y2        = gen_reg_rtx (RFmode);
841  rtx e         = gen_reg_rtx (RFmode);
842  rtx e1        = gen_reg_rtx (RFmode);
843  rtx q         = gen_reg_rtx (RFmode);
844  rtx q1        = gen_reg_rtx (RFmode);
845  rtx q2        = gen_reg_rtx (RFmode);
846  rtx r         = gen_reg_rtx (RFmode);
847  rtx cond      = gen_reg_rtx (CCImode);
848  rtx zero      = CONST0_RTX (RFmode);
849  rtx one       = CONST1_RTX (RFmode);
850  rtx status1   = CONST1_RTX (SImode);
851  rtx trunc_off = CONST2_RTX (SImode);
852
853  /* y  = 1 / b			*/
854  emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
855  /* e  = 1 - (b * y)		*/
856  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
857  /* q  = a * y                 */
858  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
859  /* q1 = q + (q * e)		*/
860  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
861  /* e1 = e * e			*/
862  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
863  /* q2 = q1 + (e1 * q1)	*/
864  emit_insn (gen_m2addrf4_cond (q2, cond, q1, e1, q1, zero, status1, trunc_off));
865  /* y1 = y + (y * e)		*/
866  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
867  /* r  = a - (b * q2)		*/
868  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
869  /* y2 = y1 + (y1 * e1)	*/
870  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
871  /* q3 = q2 + (r * y2)		*/
872  emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
873  DONE;
874})
875
876(define_expand "divdi3_internal_thr"
877  [(set (match_operand:RF 0 "fr_register_operand" "")
878        (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
879                          (match_operand:RF 2 "fr_register_operand" ""))))]
880  "TARGET_INLINE_INT_DIV"
881{
882  rtx a         = operands[1];
883  rtx b         = operands[2];
884  rtx y         = gen_reg_rtx (RFmode);
885  rtx y1        = gen_reg_rtx (RFmode);
886  rtx y2        = gen_reg_rtx (RFmode);
887  rtx e         = gen_reg_rtx (RFmode);
888  rtx e1        = gen_reg_rtx (RFmode);
889  rtx q2        = gen_reg_rtx (RFmode);
890  rtx r         = gen_reg_rtx (RFmode);
891  rtx cond      = gen_reg_rtx (CCImode);
892  rtx zero      = CONST0_RTX (RFmode);
893  rtx one       = CONST1_RTX (RFmode);
894  rtx status1   = CONST1_RTX (SImode);
895  rtx trunc_off = CONST2_RTX (SImode);
896
897  /* y  = 1 / b			*/
898  emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
899  /* e  = 1 - (b * y)		*/
900  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
901  /* y1 = y + (y * e)		*/
902  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
903  /* e1 = e * e			*/
904  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
905  /* y2 = y1 + (y1 * e1)	*/
906  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
907  /* q2 = y2 * a		*/
908  emit_insn (gen_mulrf3_cond (q2, cond, y2, a, zero, status1, trunc_off));
909  /* r  = a - (b * q2)		*/
910  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
911  /* q3 = q2 + (r * y2)		*/
912  emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
913  DONE;
914})
915
916;; SQRT operations
917
918
919(define_insn "sqrt_approx_rf"
920  [(set (match_operand:RF 0 "fr_register_operand" "=f")
921                (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")]
922			   UNSPEC_FR_SQRT_RECIP_APPROX_RES))
923   (set (match_operand:CCI 2 "register_operand" "=c")
924        (unspec:CCI [(match_dup 1)] UNSPEC_FR_SQRT_RECIP_APPROX))
925   (use (match_operand:SI 3 "const_int_operand" ""))]
926  ""
927  "frsqrta.s%3 %0, %2 = %F1"
928  [(set_attr "itanium_class" "fmisc")
929   (set_attr "predicable" "no")])
930
931(define_expand "sqrtsf2"
932  [(set (match_operand:SF 0 "fr_register_operand" "=&f")
933	(sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
934  "TARGET_INLINE_SQRT"
935{
936  rtx insn;
937  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
938    insn = gen_sqrtsf2_internal_lat (operands[0], operands[1]);
939  else
940    insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]);
941  emit_insn (insn);
942  DONE;
943})
944
945(define_expand "sqrtsf2_internal_thr"
946  [(set (match_operand:SF 0 "fr_register_operand" "")
947        (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
948  "TARGET_INLINE_SQRT"
949{
950  rtx y         = gen_reg_rtx (RFmode);
951  rtx b         = gen_reg_rtx (RFmode);
952  rtx g         = gen_reg_rtx (RFmode);
953  rtx e         = gen_reg_rtx (RFmode);
954  rtx s         = gen_reg_rtx (RFmode);
955  rtx f         = gen_reg_rtx (RFmode);
956  rtx y1        = gen_reg_rtx (RFmode);
957  rtx g1        = gen_reg_rtx (RFmode);
958  rtx h         = gen_reg_rtx (RFmode);
959  rtx d         = gen_reg_rtx (RFmode);
960  rtx g2        = gen_reg_rtx (RFmode);
961  rtx cond      = gen_reg_rtx (CCImode);
962  rtx zero      = CONST0_RTX (RFmode);
963  rtx one       = CONST1_RTX (RFmode);
964  rtx c1        = ia64_dconst_0_5();
965  rtx c2        = ia64_dconst_0_375();
966  rtx reg_df_c1	= gen_reg_rtx (DFmode);
967  rtx reg_df_c2	= gen_reg_rtx (DFmode);
968  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
969  rtx reg_rf_c2 = gen_reg_rtx (RFmode);
970  rtx status0   = CONST0_RTX (SImode);
971  rtx status1   = CONST1_RTX (SImode);
972  rtx trunc_sgl = CONST0_RTX (SImode);
973  rtx trunc_off = CONST2_RTX (SImode);
974
975  /* Put needed constants into registers.	 */
976  emit_insn (gen_movdf (reg_df_c1, c1));
977  emit_insn (gen_movdf (reg_df_c2, c2));
978  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
979  emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
980  /* Empty conversion to put input into RFmode.  */
981  emit_insn (gen_extendsfrf2 (b, operands[1]));
982  /* y = sqrt (1 / b)			*/
983  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
984  /* g = b * y				*/
985  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
986  /* e = 1 - (g * y)			*/
987  emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
988  /* s = 0.5 + (0.375 * e)		*/
989  emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
990  /* f = y * e				*/
991  emit_insn (gen_mulrf3_cond (f, cond, y, e, zero, status1, trunc_off));
992  /* y1 = y + (f * s)			*/
993  emit_insn (gen_m2addrf4_cond (y1, cond, y, f, s, zero, status1, trunc_off));
994  /* g1 = single (b * y1)		*/
995  emit_insn (gen_mulrf3_cond (g1, cond, b, y1, zero, status1, trunc_sgl));
996  /* h = 0.5 * y1			*/
997  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y1, zero, status1, trunc_off));
998  /* d = b - g1 * g1			*/
999  emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
1000  /* g2 = single(g1 + (d * h))		*/
1001  emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h, y, status0, trunc_sgl));
1002  /* Conversion back into SFmode.       */
1003  emit_insn (gen_truncrfsf2 (operands[0], g2));
1004  DONE;
1005})
1006
1007(define_expand "sqrtsf2_internal_lat"
1008  [(set (match_operand:SF 0 "fr_register_operand" "")
1009        (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
1010  "TARGET_INLINE_SQRT"
1011{
1012  rtx y         = gen_reg_rtx (RFmode);
1013  rtx b         = gen_reg_rtx (RFmode);
1014  rtx g         = gen_reg_rtx (RFmode);
1015  rtx g1        = gen_reg_rtx (RFmode);
1016  rtx g2        = gen_reg_rtx (RFmode);
1017  rtx e         = gen_reg_rtx (RFmode);
1018  rtx s         = gen_reg_rtx (RFmode);
1019  rtx f         = gen_reg_rtx (RFmode);
1020  rtx f1        = gen_reg_rtx (RFmode);
1021  rtx h         = gen_reg_rtx (RFmode);
1022  rtx h1        = gen_reg_rtx (RFmode);
1023  rtx d         = gen_reg_rtx (RFmode);
1024  rtx cond      = gen_reg_rtx (CCImode);
1025  rtx zero      = CONST0_RTX (RFmode);
1026  rtx one       = CONST1_RTX (RFmode);
1027  rtx c1        = ia64_dconst_0_5();
1028  rtx c2        = ia64_dconst_0_375();
1029  rtx reg_df_c1	= gen_reg_rtx (DFmode);
1030  rtx reg_df_c2	= gen_reg_rtx (DFmode);
1031  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
1032  rtx reg_rf_c2 = gen_reg_rtx (RFmode);
1033  rtx status0   = CONST0_RTX (SImode);
1034  rtx status1   = CONST1_RTX (SImode);
1035  rtx trunc_sgl = CONST0_RTX (SImode);
1036  rtx trunc_off = CONST2_RTX (SImode);
1037
1038  /* Put needed constants into registers.	 */
1039  emit_insn (gen_movdf (reg_df_c1, c1));
1040  emit_insn (gen_movdf (reg_df_c2, c2));
1041  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
1042  emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
1043  /* Empty conversion to put input into RFmode.  */
1044  emit_insn (gen_extendsfrf2 (b, operands[1]));
1045  /* y = sqrt (1 / b)			*/
1046  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
1047  /* g = b * y				*/
1048  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
1049  /* e = 1 - (g * y)			*/
1050  emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
1051  /* h = 0.5 * y			*/
1052  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
1053  /* s = 0.5 + (0.375 * e)		*/
1054  emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
1055  /* f = e * g				*/
1056  emit_insn (gen_mulrf3_cond (f, cond, e, g, zero, status1, trunc_off));
1057  /* g1 = single (g + (f * s))		*/
1058  emit_insn (gen_m2addrf4_cond (g1, cond, g, f, s, zero, status1, trunc_sgl));
1059  /* f1 = e * h				*/
1060  emit_insn (gen_mulrf3_cond (f1, cond, e, h, zero, status1, trunc_off));
1061  /* d = b - g1 * g1			*/
1062  emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
1063  /* h1 = h + (f1 * s)			*/
1064  emit_insn (gen_m2addrf4_cond (h1, cond, h, f1, s, zero, status1, trunc_off));
1065  /* g2 = single(g1 + (d * h1))		*/
1066  emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h1, y, status0, trunc_sgl));
1067  /* Conversion back into SFmode.       */
1068  emit_insn (gen_truncrfsf2 (operands[0], g2));
1069  DONE;
1070})
1071
1072(define_expand "sqrtdf2"
1073  [(set (match_operand:DF 0 "fr_register_operand" "=&f")
1074	(sqrt:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
1075  "TARGET_INLINE_SQRT"
1076{
1077  rtx insn;
1078#if 0
1079  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
1080    insn = gen_sqrtdf2_internal_lat (operands[0], operands[1]);
1081  else
1082#endif
1083  insn = gen_sqrtdf2_internal_thr (operands[0], operands[1]);
1084  emit_insn (insn);
1085  DONE;
1086})
1087
1088(define_expand "sqrtdf2_internal_thr"
1089  [(set (match_operand:DF 0 "fr_register_operand" "")
1090        (sqrt:DF (match_operand:DF 1 "fr_register_operand" "")))]
1091  "TARGET_INLINE_SQRT"
1092{
1093  rtx y         = gen_reg_rtx (RFmode);
1094  rtx b         = gen_reg_rtx (RFmode);
1095  rtx g         = gen_reg_rtx (RFmode);
1096  rtx g1        = gen_reg_rtx (RFmode);
1097  rtx g2        = gen_reg_rtx (RFmode);
1098  rtx g3        = gen_reg_rtx (RFmode);
1099  rtx g4        = gen_reg_rtx (RFmode);
1100  rtx r         = gen_reg_rtx (RFmode);
1101  rtx r1        = gen_reg_rtx (RFmode);
1102  rtx h         = gen_reg_rtx (RFmode);
1103  rtx h1        = gen_reg_rtx (RFmode);
1104  rtx h2        = gen_reg_rtx (RFmode);
1105  rtx d         = gen_reg_rtx (RFmode);
1106  rtx d1        = gen_reg_rtx (RFmode);
1107  rtx cond      = gen_reg_rtx (CCImode);
1108  rtx zero      = CONST0_RTX (RFmode);
1109  rtx c1        = ia64_dconst_0_5();
1110  rtx reg_df_c1	= gen_reg_rtx (DFmode);
1111  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
1112  rtx status0   = CONST0_RTX (SImode);
1113  rtx status1   = CONST1_RTX (SImode);
1114  rtx trunc_dbl = CONST1_RTX (SImode);
1115  rtx trunc_off = CONST2_RTX (SImode);
1116
1117  /* Put needed constants into registers.	 */
1118  emit_insn (gen_movdf (reg_df_c1, c1));
1119  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
1120  /* Empty conversion to put input into RFmode.  */
1121  emit_insn (gen_extenddfrf2 (b, operands[1]));
1122  /* y = sqrt (1 / b)			*/
1123  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
1124  /* g = b * y				*/
1125  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
1126  /* h = 0.5 * y			*/
1127  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
1128  /* r = 0.5 - (g * h)			*/
1129  emit_insn (gen_m2subrf4_cond (r, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
1130  /* g1 = g + (g * r)			*/
1131  emit_insn (gen_m2addrf4_cond (g1, cond, g, g, r, zero, status1, trunc_off));
1132  /* h1 = h + (h * r)			*/
1133  emit_insn (gen_m2addrf4_cond (h1, cond, h, h, r, zero, status1, trunc_off));
1134  /* r1 = 0.5 - (g1 * h1)		*/
1135  emit_insn (gen_m2subrf4_cond (r1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
1136  /* g2 = g1 + (g1 * r1)		*/
1137  emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, r1, zero, status1, trunc_off));
1138  /* h2 = h1 + (h1 * r1)		*/
1139  emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, r1, zero, status1, trunc_off));
1140  /* d = b - (g2 * g2)			*/
1141  emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
1142  /* g3 = g2 + (d * h2)			*/
1143  emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
1144  /* d1 = b - (g3 * g3)			*/
1145  emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
1146  /* g4 = g3 + (d1 * h2)		*/
1147  emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h2, y, status1, trunc_dbl));
1148  /* Conversion back into SFmode.       */
1149  emit_insn (gen_truncrfdf2 (operands[0], g4));
1150  DONE;
1151})
1152
1153(define_expand "sqrtxf2"
1154  [(set (match_operand:XF 0 "fr_register_operand" "")
1155        (sqrt:XF (match_operand:XF 1 "fr_register_operand" "")))]
1156  "TARGET_INLINE_SQRT"
1157{
1158  rtx y         = gen_reg_rtx (RFmode);
1159  rtx b         = gen_reg_rtx (RFmode);
1160  rtx g         = gen_reg_rtx (RFmode);
1161  rtx g1        = gen_reg_rtx (RFmode);
1162  rtx g2        = gen_reg_rtx (RFmode);
1163  rtx g3        = gen_reg_rtx (RFmode);
1164  rtx g4        = gen_reg_rtx (RFmode);
1165  rtx e         = gen_reg_rtx (RFmode);
1166  rtx e1        = gen_reg_rtx (RFmode);
1167  rtx e2        = gen_reg_rtx (RFmode);
1168  rtx h         = gen_reg_rtx (RFmode);
1169  rtx h1        = gen_reg_rtx (RFmode);
1170  rtx h2        = gen_reg_rtx (RFmode);
1171  rtx h3        = gen_reg_rtx (RFmode);
1172  rtx d         = gen_reg_rtx (RFmode);
1173  rtx d1        = gen_reg_rtx (RFmode);
1174  rtx cond      = gen_reg_rtx (CCImode);
1175  rtx zero      = CONST0_RTX (RFmode);
1176  rtx c1        = ia64_dconst_0_5();
1177  rtx reg_df_c1	= gen_reg_rtx (DFmode);
1178  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
1179  rtx status0   = CONST0_RTX (SImode);
1180  rtx status1   = CONST1_RTX (SImode);
1181  rtx trunc_off = CONST2_RTX (SImode);
1182
1183  /* Put needed constants into registers.	 */
1184  emit_insn (gen_movdf (reg_df_c1, c1));
1185  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
1186  /* Empty conversion to put input into RFmode.  */
1187  emit_insn (gen_extendxfrf2 (b, operands[1]));
1188  /* y = sqrt (1 / b)			*/
1189  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
1190  /* g = b * y				*/
1191  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
1192  /* h = 0.5 * y			*/
1193  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
1194  /* e = 0.5 - (g * h)			*/
1195  emit_insn (gen_m2subrf4_cond (e, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
1196  /* g1 = g + (g * e)			*/
1197  emit_insn (gen_m2addrf4_cond (g1, cond, g, g, e, zero, status1, trunc_off));
1198  /* h1 = h + (h * e)			*/
1199  emit_insn (gen_m2addrf4_cond (h1, cond, h, h, e, zero, status1, trunc_off));
1200  /* e1 = 0.5 - (g1 * h1)		*/
1201  emit_insn (gen_m2subrf4_cond (e1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
1202  /* g2 = g1 + (g1 * e1)		*/
1203  emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, e1, zero, status1, trunc_off));
1204  /* h2 = h1 + (h1 * e1)		*/
1205  emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, e1, zero, status1, trunc_off));
1206  /* d = b - (g2 * g2)			*/
1207  emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
1208  /* e2 = 0.5 - (g2 * h2)		*/
1209  emit_insn (gen_m2subrf4_cond (e2, cond, reg_rf_c1, g2, h2, zero, status1, trunc_off));
1210  /* g3 = g2 + (d * h2)			*/
1211  emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
1212  /* h3 = h2 + (e2 * h2)		*/
1213  emit_insn (gen_m2addrf4_cond (h3, cond, h2, e2, h2, zero, status1, trunc_off));
1214  /* d1 = b - (g3 * g3)			*/
1215  emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
1216  /* g4 = g3 + (d1 * h3)		*/
1217  emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h3, y, status1, trunc_off));
1218  /* Conversion back into SFmode.       */
1219  emit_insn (gen_truncrfxf2 (operands[0], g4));
1220  DONE;
1221})
1222