1;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify it
6;; under the terms of the GNU General Public License as published
7;; by the Free Software Foundation; either version 3, or (at your
8;; option) any later version.
9;;
10;; GCC is distributed in the hope that it will be useful, but WITHOUT
11;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
13;; License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3.  If not see
17;; <http://www.gnu.org/licenses/>.
18;;
19;; This file contains ARM instructions that support fixed-point operations.
20
21(define_insn "add<mode>3"
22  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
23	(plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
24		    (match_operand:FIXED 2 "s_register_operand" "l,r")))]
25  "TARGET_32BIT"
26  "add%?\\t%0, %1, %2"
27  [(set_attr "predicable" "yes")
28   (set_attr "predicable_short_it" "yes,no")
29   (set_attr "type" "alu_sreg")])
30
31(define_insn "add<mode>3"
32  [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
33	(plus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
34		     (match_operand:ADDSUB 2 "s_register_operand" "r")))]
35  "TARGET_INT_SIMD"
36  "sadd<qaddsub_suf>%?\\t%0, %1, %2"
37  [(set_attr "predicable" "yes")
38   (set_attr "type" "alu_dsp_reg")])
39
40(define_insn "usadd<mode>3"
41  [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
42	(us_plus:UQADDSUB (match_operand:UQADDSUB 1 "s_register_operand" "r")
43			  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
44  "TARGET_INT_SIMD"
45  "uqadd<qaddsub_suf>%?\\t%0, %1, %2"
46  [(set_attr "predicable" "yes")
47   (set_attr "type" "alu_dsp_reg")])
48
49(define_insn "ssadd<mode>3"
50  [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
51	(ss_plus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
52			 (match_operand:QADDSUB 2 "s_register_operand" "r")))]
53  "TARGET_INT_SIMD"
54  "qadd<qaddsub_suf>%?\\t%0, %1, %2"
55  [(set_attr "predicable" "yes")
56   (set_attr "type" "alu_dsp_reg")])
57
58(define_insn "sub<mode>3"
59  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
60	(minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
61		     (match_operand:FIXED 2 "s_register_operand" "l,r")))]
62  "TARGET_32BIT"
63  "sub%?\\t%0, %1, %2"
64  [(set_attr "predicable" "yes")
65   (set_attr "predicable_short_it" "yes,no")
66   (set_attr "type" "alu_sreg")])
67
68(define_insn "sub<mode>3"
69  [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
70	(minus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r")
71		      (match_operand:ADDSUB 2 "s_register_operand" "r")))]
72  "TARGET_INT_SIMD"
73  "ssub<qaddsub_suf>%?\\t%0, %1, %2"
74  [(set_attr "predicable" "yes")
75   (set_attr "type" "alu_dsp_reg")])
76
77(define_insn "ussub<mode>3"
78  [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
79	(us_minus:UQADDSUB
80	  (match_operand:UQADDSUB 1 "s_register_operand" "r")
81	  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
82  "TARGET_INT_SIMD"
83  "uqsub<qaddsub_suf>%?\\t%0, %1, %2"
84  [(set_attr "predicable" "yes")
85   (set_attr "type" "alu_dsp_reg")])
86
87(define_insn "sssub<mode>3"
88  [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
89	(ss_minus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r")
90			  (match_operand:QADDSUB 2 "s_register_operand" "r")))]
91  "TARGET_INT_SIMD"
92  "qsub<qaddsub_suf>%?\\t%0, %1, %2"
93  [(set_attr "predicable" "yes")
94   (set_attr "type" "alu_dsp_reg")])
95
96;; Fractional multiplies.
97
98; Note: none of these do any rounding.
99
100(define_expand "mulqq3"
101  [(set (match_operand:QQ 0 "s_register_operand" "")
102	(mult:QQ (match_operand:QQ 1 "s_register_operand" "")
103		 (match_operand:QQ 2 "s_register_operand" "")))]
104  "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
105{
106  rtx tmp1 = gen_reg_rtx (HImode);
107  rtx tmp2 = gen_reg_rtx (HImode);
108  rtx tmp3 = gen_reg_rtx (SImode);
109
110  emit_insn (gen_extendqihi2 (tmp1, gen_lowpart (QImode, operands[1])));
111  emit_insn (gen_extendqihi2 (tmp2, gen_lowpart (QImode, operands[2])));
112  emit_insn (gen_mulhisi3 (tmp3, tmp1, tmp2));
113  emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp3, GEN_INT (8),
114		       GEN_INT (7)));
115  DONE;
116})
117
118(define_expand "mulhq3"
119  [(set (match_operand:HQ 0 "s_register_operand" "")
120	(mult:HQ (match_operand:HQ 1 "s_register_operand" "")
121		 (match_operand:HQ 2 "s_register_operand" "")))]
122  "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
123{
124  rtx tmp = gen_reg_rtx (SImode);
125
126  emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
127			   gen_lowpart (HImode, operands[2])));
128  /* We're doing a s.15 * s.15 multiplication, getting an s.30 result.  Extract
129     an s.15 value from that.  This won't overflow/saturate for _Fract
130     values.  */
131  emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp,
132		       GEN_INT (16), GEN_INT (15)));
133  DONE;
134})
135
136(define_expand "mulsq3"
137  [(set (match_operand:SQ 0 "s_register_operand" "")
138	(mult:SQ (match_operand:SQ 1 "s_register_operand" "")
139		 (match_operand:SQ 2 "s_register_operand" "")))]
140  "TARGET_32BIT && arm_arch3m"
141{
142  rtx tmp1 = gen_reg_rtx (DImode);
143  rtx tmp2 = gen_reg_rtx (SImode);
144  rtx tmp3 = gen_reg_rtx (SImode);
145
146  /* s.31 * s.31 -> s.62 multiplication.  */
147  emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
148			   gen_lowpart (SImode, operands[2])));
149  emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (31)));
150  emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (1)));
151  emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
152
153  DONE;
154})
155
156;; Accumulator multiplies.
157
158(define_expand "mulsa3"
159  [(set (match_operand:SA 0 "s_register_operand" "")
160	(mult:SA (match_operand:SA 1 "s_register_operand" "")
161		 (match_operand:SA 2 "s_register_operand" "")))]
162  "TARGET_32BIT && arm_arch3m"
163{
164  rtx tmp1 = gen_reg_rtx (DImode);
165  rtx tmp2 = gen_reg_rtx (SImode);
166  rtx tmp3 = gen_reg_rtx (SImode);
167
168  emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
169			   gen_lowpart (SImode, operands[2])));
170  emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (15)));
171  emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (17)));
172  emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
173
174  DONE;
175})
176
177(define_expand "mulusa3"
178  [(set (match_operand:USA 0 "s_register_operand" "")
179	(mult:USA (match_operand:USA 1 "s_register_operand" "")
180		  (match_operand:USA 2 "s_register_operand" "")))]
181  "TARGET_32BIT && arm_arch3m"
182{
183  rtx tmp1 = gen_reg_rtx (DImode);
184  rtx tmp2 = gen_reg_rtx (SImode);
185  rtx tmp3 = gen_reg_rtx (SImode);
186
187  emit_insn (gen_umulsidi3 (tmp1, gen_lowpart (SImode, operands[1]),
188			    gen_lowpart (SImode, operands[2])));
189  emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (16)));
190  emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (16)));
191  emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3));
192
193  DONE;
194})
195
196;; The code sequence emitted by this insn pattern uses the Q flag, which GCC
197;; doesn't generally know about, so we don't bother expanding to individual
198;; instructions.  It may be better to just use an out-of-line asm libcall for
199;; this.
200
201(define_insn "ssmulsa3"
202  [(set (match_operand:SA 0 "s_register_operand" "=r")
203	(ss_mult:SA (match_operand:SA 1 "s_register_operand" "r")
204		    (match_operand:SA 2 "s_register_operand" "r")))
205   (clobber (match_scratch:DI 3 "=r"))
206   (clobber (match_scratch:SI 4 "=r"))
207   (clobber (reg:CC CC_REGNUM))]
208  "TARGET_32BIT && arm_arch6"
209{
210  /* s16.15 * s16.15 -> s32.30.  */
211  output_asm_insn ("smull\\t%Q3, %R3, %1, %2", operands);
212
213  if (TARGET_ARM)
214    output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands);
215  else
216    {
217      output_asm_insn ("mov\\t%4, #0", operands);
218      output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands);
219    }
220
221  /* We have:
222      31  high word  0     31  low word  0
223
224    [ S i i .... i i i ] [ i f f f ... f f ]
225                        |
226			v
227	     [ S i ... i f ... f f ]
228
229    Need 16 integral bits, so saturate at 15th bit of high word.  */
230
231  output_asm_insn ("ssat\\t%R3, #15, %R3", operands);
232  output_asm_insn ("mrs\\t%4, APSR", operands);
233  output_asm_insn ("tst\\t%4, #1<<27", operands);
234  if (arm_restrict_it)
235    {
236      output_asm_insn ("mvn\\t%4, %R3, asr #32", operands);
237      output_asm_insn ("it\\tne", operands);
238      output_asm_insn ("movne\\t%Q3, %4", operands);
239    }
240  else
241    {
242      if (TARGET_THUMB2)
243        output_asm_insn ("it\\tne", operands);
244      output_asm_insn ("mvnne\\t%Q3, %R3, asr #32", operands);
245    }
246  output_asm_insn ("mov\\t%0, %Q3, lsr #15", operands);
247  output_asm_insn ("orr\\t%0, %0, %R3, asl #17", operands);
248  return "";
249}
250  [(set_attr "conds" "clob")
251   (set_attr "type" "multiple")
252   (set (attr "length")
253	(if_then_else (eq_attr "is_thumb" "yes")
254		      (if_then_else (match_test "arm_restrict_it")
255		                    (const_int 40)
256		                    (const_int 38))
257		      (const_int 32)))])
258
259;; Same goes for this.
260
261(define_insn "usmulusa3"
262  [(set (match_operand:USA 0 "s_register_operand" "=r")
263	(us_mult:USA (match_operand:USA 1 "s_register_operand" "r")
264		     (match_operand:USA 2 "s_register_operand" "r")))
265   (clobber (match_scratch:DI 3 "=r"))
266   (clobber (match_scratch:SI 4 "=r"))
267   (clobber (reg:CC CC_REGNUM))]
268  "TARGET_32BIT && arm_arch6"
269{
270  /* 16.16 * 16.16 -> 32.32.  */
271  output_asm_insn ("umull\\t%Q3, %R3, %1, %2", operands);
272
273  if (TARGET_ARM)
274    output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands);
275  else
276    {
277      output_asm_insn ("mov\\t%4, #0", operands);
278      output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands);
279    }
280
281  /* We have:
282      31  high word  0     31  low word  0
283
284    [ i i i .... i i i ] [ f f f f ... f f ]
285                        |
286			v
287	     [ i i ... i f ... f f ]
288
289    Need 16 integral bits, so saturate at 16th bit of high word.  */
290
291  output_asm_insn ("usat\\t%R3, #16, %R3", operands);
292  output_asm_insn ("mrs\\t%4, APSR", operands);
293  output_asm_insn ("tst\\t%4, #1<<27", operands);
294  if (arm_restrict_it)
295    {
296      output_asm_insn ("sbfx\\t%4, %R3, #15, #1", operands);
297      output_asm_insn ("it\\tne", operands);
298      output_asm_insn ("movne\\t%Q3, %4", operands);
299    }
300  else
301    {
302      if (TARGET_THUMB2)
303        output_asm_insn ("it\\tne", operands);
304      output_asm_insn ("sbfxne\\t%Q3, %R3, #15, #1", operands);
305    }
306  output_asm_insn ("lsr\\t%0, %Q3, #16", operands);
307  output_asm_insn ("orr\\t%0, %0, %R3, asl #16", operands);
308  return "";
309}
310  [(set_attr "conds" "clob")
311   (set_attr "type" "multiple")
312   (set (attr "length")
313	(if_then_else (eq_attr "is_thumb" "yes")
314		      (if_then_else (match_test "arm_restrict_it")
315		                    (const_int 40)
316		                    (const_int 38))
317		      (const_int 32)))])
318
319(define_expand "mulha3"
320  [(set (match_operand:HA 0 "s_register_operand" "")
321	(mult:HA (match_operand:HA 1 "s_register_operand" "")
322		 (match_operand:HA 2 "s_register_operand" "")))]
323  "TARGET_DSP_MULTIPLY && arm_arch_thumb2"
324{
325  rtx tmp = gen_reg_rtx (SImode);
326
327  emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
328			   gen_lowpart (HImode, operands[2])));
329  emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp, GEN_INT (16),
330		       GEN_INT (7)));
331
332  DONE;
333})
334
335(define_expand "muluha3"
336  [(set (match_operand:UHA 0 "s_register_operand" "")
337	(mult:UHA (match_operand:UHA 1 "s_register_operand" "")
338		  (match_operand:UHA 2 "s_register_operand" "")))]
339  "TARGET_DSP_MULTIPLY"
340{
341  rtx tmp1 = gen_reg_rtx (SImode);
342  rtx tmp2 = gen_reg_rtx (SImode);
343  rtx tmp3 = gen_reg_rtx (SImode);
344
345  /* 8.8 * 8.8 -> 16.16 multiply.  */
346  emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1])));
347  emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2])));
348  emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2));
349  emit_insn (gen_extzv (gen_lowpart (SImode, operands[0]), tmp3,
350			GEN_INT (16), GEN_INT (8)));
351
352  DONE;
353})
354
355(define_expand "ssmulha3"
356  [(set (match_operand:HA 0 "s_register_operand" "")
357	(ss_mult:HA (match_operand:HA 1 "s_register_operand" "")
358		    (match_operand:HA 2 "s_register_operand" "")))]
359  "TARGET_32BIT && TARGET_DSP_MULTIPLY && arm_arch6"
360{
361  rtx tmp = gen_reg_rtx (SImode);
362  rtx rshift;
363
364  emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]),
365			   gen_lowpart (HImode, operands[2])));
366
367  rshift = gen_rtx_ASHIFTRT (SImode, tmp, GEN_INT (7));
368
369  emit_insn (gen_rtx_SET (gen_lowpart (HImode, operands[0]),
370			  gen_rtx_SS_TRUNCATE (HImode, rshift)));
371
372  DONE;
373})
374
375(define_expand "usmuluha3"
376  [(set (match_operand:UHA 0 "s_register_operand" "")
377	(us_mult:UHA (match_operand:UHA 1 "s_register_operand" "")
378		     (match_operand:UHA 2 "s_register_operand" "")))]
379  "TARGET_INT_SIMD"
380{
381  rtx tmp1 = gen_reg_rtx (SImode);
382  rtx tmp2 = gen_reg_rtx (SImode);
383  rtx tmp3 = gen_reg_rtx (SImode);
384  rtx rshift_tmp = gen_reg_rtx (SImode);
385
386  /* Note: there's no smul[bt][bt] equivalent for unsigned multiplies.  Use a
387     normal 32x32->32-bit multiply instead.  */
388  emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1])));
389  emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2])));
390
391  emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2));
392
393  /* The operand to "usat" is signed, so we cannot use the "..., asr #8"
394     form of that instruction since the multiplication result TMP3 may have the
395     top bit set, thus be negative and saturate to zero.  Use a separate
396     logical right-shift instead.  */
397  emit_insn (gen_lshrsi3 (rshift_tmp, tmp3, GEN_INT (8)));
398  emit_insn (gen_arm_usatsihi (gen_lowpart (HImode, operands[0]), rshift_tmp));
399
400  DONE;
401})
402
403(define_insn "arm_ssatsihi_shift"
404  [(set (match_operand:HI 0 "s_register_operand" "=r")
405	(ss_truncate:HI (match_operator:SI 1 "sat_shift_operator"
406			  [(match_operand:SI 2 "s_register_operand" "r")
407			   (match_operand:SI 3 "immediate_operand" "I")])))]
408  "TARGET_32BIT && arm_arch6"
409  "ssat%?\\t%0, #16, %2%S1"
410  [(set_attr "predicable" "yes")
411   (set_attr "shift" "1")
412   (set_attr "type" "alu_shift_imm")])
413
414(define_insn "arm_usatsihi"
415  [(set (match_operand:HI 0 "s_register_operand" "=r")
416	(us_truncate:HI (match_operand:SI 1 "s_register_operand")))]
417  "TARGET_INT_SIMD"
418  "usat%?\\t%0, #16, %1"
419  [(set_attr "predicable" "yes")
420   (set_attr "type" "alu_imm")]
421)
422