1;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
2
3;; This file is free software; you can redistribute it and/or modify it under
4;; the terms of the GNU General Public License as published by the Free
5;; Software Foundation; either version 3 of the License, or (at your option)
6;; any later version.
7
8;; This file is distributed in the hope that it will be useful, but WITHOUT
9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11;; for more details.
12
13;; You should have received a copy of the GNU General Public License
14;; along with GCC; see the file COPYING3.  If not see
15;; <http://www.gnu.org/licenses/>.
16
17;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
18
19
20;; Define an insn type attribute.  This is used in function unit delay
21;; computations.
22;; multi0 is a multiple insn rtl whose first insn is in pipe0
23;; multi1 is a multiple insn rtl whose first insn is in pipe1
24(define_attr "type" "fx2,shuf,fx3,load,store,br,spr,lnop,nop,fxb,fp6,fp7,fpd,iprefetch,multi0,multi1,hbr,convert"
25  (const_string "fx2"))
26
27;; Length (in bytes).
28(define_attr "length" ""
29		(const_int 4))
30
31(define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune")))
32;; Processor type -- this attribute must exactly match the processor_type
33;; enumeration in spu.h.
34
35(define_attr "cpu" "spu"
36  (const (symbol_ref "spu_cpu_attr")))
37
38; (define_function_unit NAME MULTIPLICITY SIMULTANEITY
39;			TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST])
40
41(define_cpu_unit "pipe0,pipe1,fp,ls")
42
43(define_insn_reservation "NOP" 1 (eq_attr "type" "nop")
44    "pipe0")
45
46(define_insn_reservation "FX2" 2 (eq_attr "type" "fx2")
47    "pipe0, nothing")
48
49(define_insn_reservation "FX3" 4 (eq_attr "type" "fx3,fxb")
50    "pipe0, nothing*3")
51
52(define_insn_reservation "FP6" 6 (eq_attr "type" "fp6")
53    "pipe0 + fp, nothing*5")
54
55(define_insn_reservation "FP7" 7 (eq_attr "type" "fp7")
56    "pipe0, fp, nothing*5")
57
58;; The behavior of the double precision is that both pipes stall
59;; for 6 cycles and the rest of the operation pipelines for
60;; 7 cycles.  The simplest way to model this is to simply ignore
61;; the 6 cyle stall.
62(define_insn_reservation "FPD" 7
63  (and (eq_attr "tune" "cell")
64       (eq_attr "type" "fpd"))
65    "pipe0 + pipe1, fp, nothing*5")
66
67;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined
68(define_insn_reservation "FPD_CELLEDP" 9
69  (and (eq_attr "tune" "celledp")
70       (eq_attr "type" "fpd"))
71  "pipe0 + fp, nothing*8")
72
73(define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop")
74    "pipe1")
75
76(define_insn_reservation "STORE" 1 (eq_attr "type" "store")
77    "pipe1 + ls")
78
79(define_insn_reservation "IPREFETCH" 1 (eq_attr "type" "iprefetch")
80    "pipe1 + ls")
81
82(define_insn_reservation "SHUF" 4 (eq_attr "type" "shuf,br,spr")
83    "pipe1, nothing*3")
84
85(define_insn_reservation "LOAD" 6 (eq_attr "type" "load")
86    "pipe1 + ls, nothing*5")
87
88(define_insn_reservation "HBR" 18 (eq_attr "type" "hbr")
89    "pipe1, nothing*15")
90
91(define_insn_reservation "MULTI0" 4 (eq_attr "type" "multi0")
92    "pipe0+pipe1, nothing*3")
93
94(define_insn_reservation "MULTI1" 4 (eq_attr "type" "multi1")
95    "pipe1, nothing*3")
96
97(define_insn_reservation "CONVERT" 0 (eq_attr "type" "convert")
98    "nothing")
99
100;; Force pipe0 to occur before pipe 1 in a cycle.
101(absence_set "pipe0" "pipe1")
102
103
104(define_c_enum "unspec" [
105  UNSPEC_IPREFETCH
106  UNSPEC_FREST
107  UNSPEC_FRSQEST
108  UNSPEC_FI
109  UNSPEC_EXTEND_CMP
110  UNSPEC_CG
111  UNSPEC_CGX
112  UNSPEC_ADDX
113  UNSPEC_BG
114  UNSPEC_BGX
115  UNSPEC_SFX
116  UNSPEC_FSM
117  UNSPEC_HBR
118  UNSPEC_NOP
119  UNSPEC_CONVERT
120  UNSPEC_SELB
121  UNSPEC_SHUFB
122  UNSPEC_CPAT
123  UNSPEC_CNTB
124  UNSPEC_SUMB
125  UNSPEC_FSMB
126  UNSPEC_FSMH
127  UNSPEC_GBB
128  UNSPEC_GBH
129  UNSPEC_GB
130  UNSPEC_AVGB
131  UNSPEC_ABSDB
132  UNSPEC_ORX
133  UNSPEC_HEQ
134  UNSPEC_HGT
135  UNSPEC_HLGT
136  UNSPEC_STOP
137  UNSPEC_STOPD
138  UNSPEC_SET_INTR
139  UNSPEC_FSCRRD
140  UNSPEC_FSCRWR
141  UNSPEC_MFSPR
142  UNSPEC_MTSPR
143  UNSPEC_RDCH
144  UNSPEC_RCHCNT
145  UNSPEC_WRCH
146  UNSPEC_SPU_REALIGN_LOAD
147  UNSPEC_SPU_MASK_FOR_LOAD
148  UNSPEC_DFTSV
149  UNSPEC_FLOAT_EXTEND
150  UNSPEC_FLOAT_TRUNCATE
151  UNSPEC_SP_SET
152  UNSPEC_SP_TEST
153])
154
155(define_c_enum "unspecv" [
156  UNSPECV_BLOCKAGE
157  UNSPECV_LNOP
158  UNSPECV_NOP
159  UNSPECV_SYNC
160])
161
162(include "predicates.md")
163(include "constraints.md")
164
165
166;; Mode iterators
167
168(define_mode_iterator ALL [QI V16QI
169			HI V8HI
170			SI V4SI
171			DI V2DI
172			TI
173                        SF V4SF
174                        DF V2DF])
175
176; Everything except DI and TI which are handled separately because
177; they need different constraints to correctly test VOIDmode constants
178(define_mode_iterator MOV [QI V16QI
179			HI V8HI
180			SI V4SI
181			V2DI
182                        SF V4SF
183                        DF V2DF])
184
185(define_mode_iterator QHSI  [QI HI SI])
186(define_mode_iterator QHSDI  [QI HI SI DI])
187(define_mode_iterator DTI  [DI TI])
188
189(define_mode_iterator VINT [QI V16QI
190			 HI V8HI
191			 SI V4SI
192			 DI V2DI
193			 TI])
194
195(define_mode_iterator VQHSI [QI V16QI
196			  HI V8HI
197			  SI V4SI])
198
199(define_mode_iterator VHSI [HI V8HI
200			 SI V4SI])
201
202(define_mode_iterator VSDF [SF V4SF
203                         DF V2DF])
204
205(define_mode_iterator VSI [SI V4SI])
206(define_mode_iterator VDI [DI V2DI])
207(define_mode_iterator VSF [SF V4SF])
208(define_mode_iterator VDF [DF V2DF])
209
210(define_mode_iterator VCMP [V16QI
211			 V8HI
212			 V4SI
213                         V4SF
214                         V2DF])
215
216(define_mode_iterator VCMPU [V16QI
217			  V8HI
218			  V4SI])
219
220(define_mode_attr v	 [(V8HI  "v") (V4SI  "v")
221			  (HI    "") (SI    "")])
222
223(define_mode_attr bh  [(QI "b")  (V16QI "b")
224		       (HI "h")  (V8HI "h")
225		       (SI "")   (V4SI "")])
226
227(define_mode_attr d   [(SF "")   (V4SF "")
228                       (DF "d")  (V2DF "d")])
229(define_mode_attr d6  [(SF "6")  (V4SF "6")
230                       (DF "d")  (V2DF "d")])
231
232(define_mode_attr f2i [(SF "si") (V4SF "v4si")
233                       (DF "di") (V2DF "v2di")])
234(define_mode_attr F2I [(SF "SI") (V4SF "V4SI")
235                       (DF "DI") (V2DF "V2DI")])
236(define_mode_attr i2f [(SI "sf") (V4SI "v4sf")
237                       (DI "df") (V2DI "v2df")])
238(define_mode_attr I2F [(SI "SF") (V4SI "V4SF")
239                       (DI "DF") (V2DI "V2DF")])
240
241(define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")])
242
243(define_mode_attr umask  [(HI "f")  (V8HI "f")
244		          (SI "g")  (V4SI "g")])
245(define_mode_attr nmask  [(HI "F")  (V8HI "F")
246		          (SI "G")  (V4SI "G")])
247
248;; Used for carry and borrow instructions.
249(define_mode_iterator CBOP  [SI DI V4SI V2DI])
250
251;; Used in vec_set and vec_extract
252(define_mode_iterator V [V2DI V4SI V8HI V16QI V2DF V4SF])
253(define_mode_attr inner  [(V16QI "QI")
254			  (V8HI  "HI")
255			  (V4SI  "SI")
256			  (V2DI  "DI")
257			  (V4SF  "SF")
258			  (V2DF  "DF")])
259;; Like above, but in lower case
260(define_mode_attr inner_l [(V16QI "qi")
261			   (V8HI  "hi")
262			   (V4SI  "si")
263			   (V2DI  "di")
264			   (V4SF  "sf")
265			   (V2DF  "df")])
266(define_mode_attr vmult  [(V16QI "1")
267			  (V8HI  "2")
268			  (V4SI  "4")
269			  (V2DI  "8")
270			  (V4SF  "4")
271			  (V2DF  "8")])
272(define_mode_attr voff   [(V16QI "13")
273			  (V8HI  "14")
274			  (V4SI  "0")
275			  (V2DI  "0")
276			  (V4SF  "0")
277			  (V2DF  "0")])
278
279
280;; mov
281
282(define_expand "mov<mode>"
283  [(set (match_operand:ALL 0 "nonimmediate_operand" "")
284	(match_operand:ALL 1 "general_operand" ""))]
285  ""
286  {
287    if (spu_expand_mov(operands, <MODE>mode))
288      DONE;
289  })
290
291(define_split
292  [(set (match_operand 0 "spu_reg_operand")
293	(match_operand 1 "immediate_operand"))]
294
295  ""
296  [(set (match_dup 0)
297	(high (match_dup 1)))
298   (set (match_dup 0)
299	(lo_sum (match_dup 0)
300	        (match_dup 1)))]
301  {
302    if (spu_split_immediate (operands))
303      DONE;
304    FAIL;
305  })
306
307(define_insn "pic"
308  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
309	(match_operand:SI 1 "immediate_operand" "s"))
310   (use (const_int 0))]
311  "flag_pic"
312  "ila\t%0,%%pic(%1)")
313
314;; Whenever a function generates the 'pic' pattern above we need to
315;; load the pic_offset_table register.
316;; GCC doesn't deal well with labels in the middle of a block so we
317;; hardcode the offsets in the asm here.
318(define_insn "load_pic_offset"
319  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
320	(unspec:SI [(const_int 0)] 0))
321   (set (match_operand:SI 1 "spu_reg_operand" "=r")
322	(unspec:SI [(const_int 0)] 0))]
323  "flag_pic"
324  "ila\t%1,.+8\;brsl\t%0,4"
325  [(set_attr "length" "8")
326   (set_attr "type" "multi0")])
327
328
329;; move internal
330
331(define_insn "_mov<mode>"
332  [(set (match_operand:MOV 0 "spu_dest_operand" "=r,r,r,r,r,m")
333	(match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))]
334  "register_operand(operands[0], <MODE>mode)
335   || register_operand(operands[1], <MODE>mode)"
336  "@
337   ori\t%0,%1,0
338   il%s1\t%0,%S1
339   fsmbi\t%0,%S1
340   c%s1d\t%0,%S1($sp)
341   lq%p1\t%0,%1
342   stq%p0\t%1,%0"
343  [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
344
345(define_insn "low_<mode>"
346  [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
347	(lo_sum:VSI (match_operand:VSI 1 "spu_reg_operand" "0")
348		    (match_operand:VSI 2 "immediate_operand" "i")))]
349  ""
350  "iohl\t%0,%2@l")
351
352(define_insn "_movdi"
353  [(set (match_operand:DI 0 "spu_dest_operand" "=r,r,r,r,r,m")
354	(match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))]
355  "register_operand(operands[0], DImode)
356   || register_operand(operands[1], DImode)"
357  "@
358   ori\t%0,%1,0
359   il%d1\t%0,%D1
360   fsmbi\t%0,%D1
361   c%d1d\t%0,%D1($sp)
362   lq%p1\t%0,%1
363   stq%p0\t%1,%0"
364  [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
365
366(define_insn "_movti"
367  [(set (match_operand:TI 0 "spu_dest_operand" "=r,r,r,r,r,m")
368	(match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))]
369  "register_operand(operands[0], TImode)
370   || register_operand(operands[1], TImode)"
371  "@
372   ori\t%0,%1,0
373   il%t1\t%0,%T1
374   fsmbi\t%0,%T1
375   c%t1d\t%0,%T1($sp)
376   lq%p1\t%0,%1
377   stq%p0\t%1,%0"
378  [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
379
380(define_split
381  [(set (match_operand 0 "spu_reg_operand")
382	(match_operand 1 "memory_operand"))]
383  "GET_MODE_SIZE (GET_MODE (operands[0])) < 16
384   && GET_MODE(operands[0]) == GET_MODE(operands[1])
385   && !reload_in_progress && !reload_completed"
386  [(set (match_dup 0)
387	(match_dup 1))]
388  { if (spu_split_load(operands))
389      DONE;
390  })
391
392(define_split
393  [(set (match_operand 0 "memory_operand")
394	(match_operand 1 "spu_reg_operand"))]
395  "GET_MODE_SIZE (GET_MODE (operands[0])) < 16
396   && GET_MODE(operands[0]) == GET_MODE(operands[1])
397   && !reload_in_progress && !reload_completed"
398  [(set (match_dup 0)
399	(match_dup 1))]
400  { if (spu_split_store(operands))
401      DONE;
402  })
403;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d
404
405(define_expand "cpat"
406  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
407	(unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
408		    (match_operand:SI 2 "spu_nonmem_operand" "r,n")
409		    (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
410  ""
411  {
412    rtx x = gen_cpat_const (operands);
413    if (x)
414      {
415        emit_move_insn (operands[0], x);
416        DONE;
417      }
418  })
419
420(define_insn "_cpat"
421  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
422	(unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
423		    (match_operand:SI 2 "spu_nonmem_operand" "r,n")
424		    (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
425  ""
426  "@
427   c%M3x\t%0,%1,%2
428   c%M3d\t%0,%C2(%1)"
429  [(set_attr "type" "shuf")])
430
431(define_split
432  [(set (match_operand:TI 0 "spu_reg_operand")
433	(unspec:TI [(match_operand:SI 1 "spu_nonmem_operand")
434		    (match_operand:SI 2 "immediate_operand")
435		    (match_operand:SI 3 "immediate_operand")] UNSPEC_CPAT))]
436  ""
437  [(set (match_dup:TI 0)
438        (match_dup:TI 4))]
439  {
440    operands[4] = gen_cpat_const (operands);
441    if (!operands[4])
442      FAIL;
443  })
444
445;; extend
446
447(define_insn "extendqihi2"
448  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
449	(sign_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
450  ""
451  "xsbh\t%0,%1")
452
453(define_insn "extendhisi2"
454  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
455	(sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))]
456  ""
457  "xshw\t%0,%1")
458
459(define_expand "extendsidi2"
460  [(set (match_dup:DI 2)
461	(zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "")))
462   (set (match_operand:DI 0 "spu_reg_operand" "")
463	(sign_extend:DI (vec_select:SI (match_dup:V2SI 3)
464				       (parallel [(const_int 1)]))))]
465  ""
466  {
467    operands[2] = gen_reg_rtx (DImode);
468    operands[3] = spu_gen_subreg (V2SImode, operands[2]);
469  })
470
471(define_insn "xswd"
472  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
473	(sign_extend:DI
474	  (vec_select:SI
475	    (match_operand:V2SI 1 "spu_reg_operand" "r")
476	    (parallel [(const_int 1) ]))))]
477  ""
478  "xswd\t%0,%1");
479
480;; By splitting this late we don't allow much opportunity for sharing of
481;; constants.  That's ok because this should really be optimized away.
482(define_insn_and_split "extend<mode>ti2"
483  [(set (match_operand:TI 0 "register_operand" "")
484	(sign_extend:TI (match_operand:QHSDI 1 "register_operand" "")))]
485  ""
486  "#"
487  ""
488  [(set (match_dup:TI 0)
489	(sign_extend:TI (match_dup:QHSDI 1)))]
490  {
491    spu_expand_sign_extend(operands);
492    DONE;
493  })
494
495
496;; zero_extend
497
498(define_insn "zero_extendqihi2"
499  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
500	(zero_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
501  ""
502  "andi\t%0,%1,0x00ff")
503
504(define_insn "zero_extendqisi2"
505  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
506	(zero_extend:SI (match_operand:QI 1 "spu_reg_operand" "r")))]
507  ""
508  "andi\t%0,%1,0x00ff")
509
510(define_expand "zero_extendhisi2"
511  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
512	(zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))
513   (clobber (match_scratch:SI 2 "=&r"))]
514  ""
515  {
516    rtx mask = gen_reg_rtx (SImode);
517    rtx op1 = simplify_gen_subreg (SImode, operands[1], HImode, 0);
518    emit_move_insn (mask, GEN_INT (0xffff));
519    emit_insn (gen_andsi3(operands[0], op1, mask));
520    DONE;
521  })
522
523(define_insn "zero_extendsidi2"
524  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
525	(zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "r")))]
526  ""
527  "rotqmbyi\t%0,%1,-4"
528  [(set_attr "type" "shuf")])
529
530(define_insn "zero_extendqiti2"
531  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
532	(zero_extend:TI (match_operand:QI 1 "spu_reg_operand" "r")))]
533  ""
534  "andi\t%0,%1,0x00ff\;rotqmbyi\t%0,%0,-12"
535  [(set_attr "type" "multi0")
536   (set_attr "length" "8")])
537
538(define_insn "zero_extendhiti2"
539  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
540	(zero_extend:TI (match_operand:HI 1 "spu_reg_operand" "r")))]
541  ""
542  "shli\t%0,%1,16\;rotqmbyi\t%0,%0,-14"
543  [(set_attr "type" "multi1")
544   (set_attr "length" "8")])
545
546(define_insn "zero_extendsiti2"
547  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
548	(zero_extend:TI (match_operand:SI 1 "spu_reg_operand" "r")))]
549  ""
550  "rotqmbyi\t%0,%1,-12"
551  [(set_attr "type" "shuf")])
552
553(define_insn "zero_extendditi2"
554  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
555	(zero_extend:TI (match_operand:DI 1 "spu_reg_operand" "r")))]
556  ""
557  "rotqmbyi\t%0,%1,-8"
558  [(set_attr "type" "shuf")])
559
560
561;; trunc
562
563(define_insn "truncdiqi2"
564  [(set (match_operand:QI 0 "spu_reg_operand" "=r")
565	(truncate:QI (match_operand:DI 1 "spu_reg_operand" "r")))]
566  ""
567  "shlqbyi\t%0,%1,4"
568  [(set_attr "type" "shuf")])
569
570(define_insn "truncdihi2"
571  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
572	(truncate:HI (match_operand:DI 1 "spu_reg_operand" "r")))]
573  ""
574  "shlqbyi\t%0,%1,4"
575  [(set_attr "type" "shuf")])
576
577(define_insn "truncdisi2"
578  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
579	(truncate:SI (match_operand:DI 1 "spu_reg_operand" "r")))]
580  ""
581  "shlqbyi\t%0,%1,4"
582  [(set_attr "type" "shuf")])
583
584(define_insn "trunctiqi2"
585  [(set (match_operand:QI 0 "spu_reg_operand" "=r")
586	(truncate:QI (match_operand:TI 1 "spu_reg_operand" "r")))]
587  ""
588  "shlqbyi\t%0,%1,12"
589  [(set_attr "type" "shuf")])
590
591(define_insn "trunctihi2"
592  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
593	(truncate:HI (match_operand:TI 1 "spu_reg_operand" "r")))]
594  ""
595  "shlqbyi\t%0,%1,12"
596  [(set_attr "type" "shuf")])
597
598(define_insn "trunctisi2"
599  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
600	(truncate:SI (match_operand:TI 1 "spu_reg_operand" "r")))]
601  ""
602  "shlqbyi\t%0,%1,12"
603  [(set_attr "type" "shuf")])
604
605(define_insn "trunctidi2"
606  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
607	(truncate:DI (match_operand:TI 1 "spu_reg_operand" "r")))]
608  ""
609  "shlqbyi\t%0,%1,8"
610  [(set_attr "type" "shuf")])
611
612
613;; float conversions
614
615(define_insn "float<mode><i2f>2"
616  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
617	(float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r")))]
618  ""
619  "csflt\t%0,%1,0"
620  [(set_attr "type" "fp7")])
621
622(define_insn "fix_trunc<mode><f2i>2"
623  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
624	(fix:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")))]
625  ""
626  "cflts\t%0,%1,0"
627  [(set_attr "type" "fp7")])
628
629(define_insn "floatuns<mode><i2f>2"
630  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
631	(unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r")))]
632  ""
633  "cuflt\t%0,%1,0"
634  [(set_attr "type" "fp7")])
635
636(define_insn "fixuns_trunc<mode><f2i>2"
637  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
638	(unsigned_fix:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")))]
639  ""
640  "cfltu\t%0,%1,0"
641  [(set_attr "type" "fp7")])
642
643(define_insn "float<mode><i2f>2_mul"
644  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
645	(mult:<I2F> (float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
646		    (match_operand:<I2F> 2 "spu_inv_exp2_operand" "w")))]
647  ""
648  "csflt\t%0,%1,%w2"
649  [(set_attr "type" "fp7")])
650
651(define_insn "float<mode><i2f>2_div"
652  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
653	(div:<I2F> (float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
654		   (match_operand:<I2F> 2 "spu_exp2_operand" "v")))]
655  ""
656  "csflt\t%0,%1,%v2"
657  [(set_attr "type" "fp7")])
658
659
660(define_insn "fix_trunc<mode><f2i>2_mul"
661  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
662	(fix:<F2I> (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
663			     (match_operand:VSF 2 "spu_exp2_operand" "v"))))]
664  ""
665  "cflts\t%0,%1,%v2"
666  [(set_attr "type" "fp7")])
667
668(define_insn "floatuns<mode><i2f>2_mul"
669  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
670	(mult:<I2F> (unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
671		    (match_operand:<I2F> 2 "spu_inv_exp2_operand" "w")))]
672  ""
673  "cuflt\t%0,%1,%w2"
674  [(set_attr "type" "fp7")])
675
676(define_insn "floatuns<mode><i2f>2_div"
677  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
678	(div:<I2F> (unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
679		   (match_operand:<I2F> 2 "spu_exp2_operand" "v")))]
680  ""
681  "cuflt\t%0,%1,%v2"
682  [(set_attr "type" "fp7")])
683
684(define_insn "fixuns_trunc<mode><f2i>2_mul"
685  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
686	(unsigned_fix:<F2I> (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
687				      (match_operand:VSF 2 "spu_exp2_operand" "v"))))]
688  ""
689  "cfltu\t%0,%1,%v2"
690  [(set_attr "type" "fp7")])
691
692(define_insn "extendsfdf2"
693  [(set (match_operand:DF 0 "spu_reg_operand" "=r")
694	(unspec:DF [(match_operand:SF 1 "spu_reg_operand" "r")]
695                   UNSPEC_FLOAT_EXTEND))]
696  ""
697  "fesd\t%0,%1"
698  [(set_attr "type" "fpd")])
699
700(define_insn "truncdfsf2"
701  [(set (match_operand:SF 0 "spu_reg_operand" "=r")
702	(unspec:SF [(match_operand:DF 1 "spu_reg_operand" "r")]
703                   UNSPEC_FLOAT_TRUNCATE))]
704  ""
705  "frds\t%0,%1"
706  [(set_attr "type" "fpd")])
707
708(define_expand "floatdisf2"
709  [(set (match_operand:SF 0 "register_operand" "")
710	(float:SF (match_operand:DI 1 "register_operand" "")))]
711  ""
712  {
713    rtx c0 = gen_reg_rtx (SImode);
714    rtx r0 = gen_reg_rtx (DImode);
715    rtx r1 = gen_reg_rtx (SFmode);
716    rtx r2 = gen_reg_rtx (SImode);
717    rtx setneg = gen_reg_rtx (SImode);
718    rtx isneg = gen_reg_rtx (SImode);
719    rtx neg = gen_reg_rtx (DImode);
720    rtx mask = gen_reg_rtx (DImode);
721
722    emit_move_insn (c0, GEN_INT (-0x80000000ll));
723
724    emit_insn (gen_negdi2 (neg, operands[1]));
725    emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
726    emit_insn (gen_extend_compare (mask, isneg));
727    emit_insn (gen_selb (r0, neg, operands[1], mask));
728    emit_insn (gen_andc_si (setneg, c0, isneg));
729
730    emit_insn (gen_floatunsdisf2 (r1, r0));
731
732    emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg));
733    emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0));
734    DONE;
735  })
736
737(define_insn_and_split "floatunsdisf2"
738  [(set (match_operand:SF 0 "register_operand" "=r")
739        (unsigned_float:SF (match_operand:DI 1 "register_operand" "r")))
740   (clobber (match_scratch:SF 2 "=r"))
741   (clobber (match_scratch:SF 3 "=r"))
742   (clobber (match_scratch:SF 4 "=r"))]
743  ""
744  "#"
745  "reload_completed"
746  [(set (match_dup:SF 0)
747        (unsigned_float:SF (match_dup:DI 1)))]
748  {
749    rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1]));
750    rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2]));
751    rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2]));
752    rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3]));
753
754    REAL_VALUE_TYPE scale;
755    real_2expN (&scale, 32, SFmode);
756
757    emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si));
758    emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4)));
759
760    emit_move_insn (operands[4],
761		    const_double_from_real_value (scale, SFmode));
762    emit_insn (gen_fmasf4 (operands[0],
763			   operands[2], operands[4], operands[3]));
764    DONE;
765  })
766
767(define_expand "floattisf2"
768  [(set (match_operand:SF 0 "register_operand" "")
769	(float:SF (match_operand:TI 1 "register_operand" "")))]
770  ""
771  {
772    rtx c0 = gen_reg_rtx (SImode);
773    rtx r0 = gen_reg_rtx (TImode);
774    rtx r1 = gen_reg_rtx (SFmode);
775    rtx r2 = gen_reg_rtx (SImode);
776    rtx setneg = gen_reg_rtx (SImode);
777    rtx isneg = gen_reg_rtx (SImode);
778    rtx neg = gen_reg_rtx (TImode);
779    rtx mask = gen_reg_rtx (TImode);
780
781    emit_move_insn (c0, GEN_INT (-0x80000000ll));
782
783    emit_insn (gen_negti2 (neg, operands[1]));
784    emit_insn (gen_cgt_ti_m1 (isneg, operands[1]));
785    emit_insn (gen_extend_compare (mask, isneg));
786    emit_insn (gen_selb (r0, neg, operands[1], mask));
787    emit_insn (gen_andc_si (setneg, c0, isneg));
788
789    emit_insn (gen_floatunstisf2 (r1, r0));
790
791    emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg));
792    emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0));
793    DONE;
794  })
795
796(define_insn_and_split "floatunstisf2"
797  [(set (match_operand:SF 0 "register_operand" "=r")
798        (unsigned_float:SF (match_operand:TI 1 "register_operand" "r")))
799   (clobber (match_scratch:SF 2 "=r"))
800   (clobber (match_scratch:SF 3 "=r"))
801   (clobber (match_scratch:SF 4 "=r"))]
802  ""
803  "#"
804  "reload_completed"
805  [(set (match_dup:SF 0)
806        (unsigned_float:SF (match_dup:TI 1)))]
807  {
808    rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1]));
809    rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2]));
810    rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2]));
811    rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3]));
812
813    REAL_VALUE_TYPE scale;
814    real_2expN (&scale, 32, SFmode);
815
816    emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si));
817    emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4)));
818
819    emit_move_insn (operands[4],
820		    const_double_from_real_value (scale, SFmode));
821    emit_insn (gen_fmasf4 (operands[2],
822			   operands[2], operands[4], operands[3]));
823
824    emit_insn (gen_shlqby_ti (op3_ti, op3_ti, GEN_INT (4)));
825    emit_insn (gen_fmasf4 (operands[2],
826			   operands[2], operands[4], operands[3]));
827
828    emit_insn (gen_shlqby_ti (op3_ti, op3_ti, GEN_INT (4)));
829    emit_insn (gen_fmasf4 (operands[0],
830			   operands[2], operands[4], operands[3]));
831    DONE;
832  })
833
834;; Do (double)(operands[1]+0x80000000u)-(double)0x80000000
835(define_expand "floatsidf2"
836  [(set (match_operand:DF 0 "register_operand" "")
837	(float:DF (match_operand:SI 1 "register_operand" "")))]
838  ""
839  {
840    rtx c0 = gen_reg_rtx (SImode);
841    rtx c1 = gen_reg_rtx (DFmode);
842    rtx r0 = gen_reg_rtx (SImode);
843    rtx r1 = gen_reg_rtx (DFmode);
844
845    emit_move_insn (c0, GEN_INT (-0x80000000ll));
846    emit_move_insn (c1, spu_float_const ("2147483648", DFmode));
847    emit_insn (gen_xorsi3 (r0, operands[1], c0));
848    emit_insn (gen_floatunssidf2 (r1, r0));
849    emit_insn (gen_subdf3 (operands[0], r1, c1));
850    DONE;
851  })
852
853(define_expand "floatunssidf2"
854  [(set (match_operand:DF 0 "register_operand"  "=r")
855        (unsigned_float:DF (match_operand:SI 1 "register_operand"   "r")))]
856  ""
857  "{
858    rtx value;
859    rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080,
860                                             0x06071415, 0x16178080);
861    rtx r0 = gen_reg_rtx (V16QImode);
862
863    if (optimize_size)
864    {
865       start_sequence ();
866       value =
867	 emit_library_call_value (convert_optab_libfunc (ufloat_optab,
868							 DFmode, SImode),
869				  NULL_RTX, LCT_NORMAL, DFmode,
870				  operands[1], SImode);
871       rtx_insn *insns = get_insns ();
872       end_sequence ();
873       emit_libcall_block (insns, operands[0], value,
874                           gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1]));
875     }
876     else
877     {
878      emit_move_insn (r0, c0);
879      emit_insn (gen_floatunssidf2_internal (operands[0], operands[1], r0));
880     }
881    DONE;
882  }")
883
884(define_insn_and_split "floatunssidf2_internal"
885  [(set (match_operand:DF 0 "register_operand"  "=r")
886        (unsigned_float:DF (match_operand:SI 1 "register_operand"   "r")))
887   (use (match_operand:V16QI 2 "register_operand" "r"))
888   (clobber (match_scratch:V4SI 3 "=&r"))
889   (clobber (match_scratch:V4SI 4 "=&r"))
890   (clobber (match_scratch:V4SI 5 "=&r"))
891   (clobber (match_scratch:V4SI 6 "=&r"))]
892  ""
893  "clz\t%3,%1\;il\t%6,1023+31\;shl\t%4,%1,%3\;ceqi\t%5,%3,32\;sf\t%6,%3,%6\;a\t%4,%4,%4\;andc\t%6,%6,%5\;shufb\t%6,%6,%4,%2\;shlqbii\t%0,%6,4"
894  "reload_completed"
895  [(set (match_dup:DF 0)
896        (unsigned_float:DF (match_dup:SI 1)))]
897 "{
898    rtx *ops = operands;
899    rtx op1_v4si = gen_rtx_REG(V4SImode, REGNO(ops[1]));
900    rtx op0_ti = gen_rtx_REG (TImode, REGNO (ops[0]));
901    rtx op2_ti = gen_rtx_REG (TImode, REGNO (ops[2]));
902    rtx op6_ti = gen_rtx_REG (TImode, REGNO (ops[6]));
903    emit_insn (gen_clzv4si2 (ops[3],op1_v4si));
904    emit_move_insn (ops[6], spu_const (V4SImode, 1023+31));
905    emit_insn (gen_vashlv4si3 (ops[4],op1_v4si,ops[3]));
906    emit_insn (gen_ceq_v4si (ops[5],ops[3],spu_const (V4SImode, 32)));
907    emit_insn (gen_subv4si3 (ops[6],ops[6],ops[3]));
908    emit_insn (gen_addv4si3 (ops[4],ops[4],ops[4]));
909    emit_insn (gen_andc_v4si  (ops[6],ops[6],ops[5]));
910    emit_insn (gen_shufb (ops[6],ops[6],ops[4],op2_ti));
911    emit_insn (gen_shlqbi_ti (op0_ti,op6_ti,GEN_INT(4)));
912    DONE;
913  }"
914 [(set_attr "length" "32")])
915
916(define_expand "floatdidf2"
917  [(set (match_operand:DF 0 "register_operand" "")
918	(float:DF (match_operand:DI 1 "register_operand" "")))]
919  ""
920  {
921    rtx c0 = gen_reg_rtx (DImode);
922    rtx r0 = gen_reg_rtx (DImode);
923    rtx r1 = gen_reg_rtx (DFmode);
924    rtx r2 = gen_reg_rtx (DImode);
925    rtx setneg = gen_reg_rtx (DImode);
926    rtx isneg = gen_reg_rtx (SImode);
927    rtx neg = gen_reg_rtx (DImode);
928    rtx mask = gen_reg_rtx (DImode);
929
930    emit_move_insn (c0, GEN_INT (0x8000000000000000ull));
931
932    emit_insn (gen_negdi2 (neg, operands[1]));
933    emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
934    emit_insn (gen_extend_compare (mask, isneg));
935    emit_insn (gen_selb (r0, neg, operands[1], mask));
936    emit_insn (gen_andc_di (setneg, c0, mask));
937
938    emit_insn (gen_floatunsdidf2 (r1, r0));
939
940    emit_insn (gen_iordi3 (r2, gen_rtx_SUBREG (DImode, r1, 0), setneg));
941    emit_move_insn (operands[0], gen_rtx_SUBREG (DFmode, r2, 0));
942    DONE;
943  })
944
945(define_expand "floatunsdidf2"
946  [(set (match_operand:DF 0 "register_operand"  "=r")
947        (unsigned_float:DF (match_operand:DI 1 "register_operand"   "r")))]
948  ""
949  "{
950    rtx value;
951    rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080,
952                                             0x06071415, 0x16178080);
953    rtx c1 = spu_const_from_ints (V4SImode, 1023+63, 1023+31, 0, 0);
954    rtx r0 = gen_reg_rtx (V16QImode);
955    rtx r1 = gen_reg_rtx (V4SImode);
956
957    if (optimize_size)
958    {
959      start_sequence ();
960      value =
961         emit_library_call_value (convert_optab_libfunc (ufloat_optab,
962                                                         DFmode, DImode),
963				  NULL_RTX, LCT_NORMAL, DFmode,
964				  operands[1], DImode);
965      rtx_insn *insns = get_insns ();
966      end_sequence ();
967      emit_libcall_block (insns, operands[0], value,
968                          gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1]));
969    }
970    else
971    {
972      emit_move_insn (r1, c1);
973      emit_move_insn (r0, c0);
974      emit_insn (gen_floatunsdidf2_internal (operands[0], operands[1], r0, r1));
975    }
976    DONE;
977  }")
978
979(define_insn_and_split "floatunsdidf2_internal"
980  [(set (match_operand:DF 0 "register_operand"  "=r")
981        (unsigned_float:DF (match_operand:DI 1 "register_operand"   "r")))
982   (use (match_operand:V16QI 2 "register_operand" "r"))
983   (use (match_operand:V4SI 3 "register_operand" "r"))
984   (clobber (match_scratch:V4SI 4 "=&r"))
985   (clobber (match_scratch:V4SI 5 "=&r"))
986   (clobber (match_scratch:V4SI 6 "=&r"))]
987  ""
988  "clz\t%4,%1\;shl\t%5,%1,%4\;ceqi\t%6,%4,32\;sf\t%4,%4,%3\;a\t%5,%5,%5\;andc\t%4,%4,%6\;shufb\t%4,%4,%5,%2\;shlqbii\t%4,%4,4\;shlqbyi\t%5,%4,8\;dfa\t%0,%4,%5"
989  "reload_completed"
990  [(set (match_operand:DF 0 "register_operand"  "=r")
991        (unsigned_float:DF (match_operand:DI 1 "register_operand"   "r")))]
992  "{
993    rtx *ops = operands;
994    rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO(ops[1]));
995    rtx op2_ti = gen_rtx_REG (TImode, REGNO(ops[2]));
996    rtx op4_ti = gen_rtx_REG (TImode, REGNO(ops[4]));
997    rtx op5_ti = gen_rtx_REG (TImode, REGNO(ops[5]));
998    rtx op4_df = gen_rtx_REG (DFmode, REGNO(ops[4]));
999    rtx op5_df = gen_rtx_REG (DFmode, REGNO(ops[5]));
1000    emit_insn (gen_clzv4si2 (ops[4],op1_v4si));
1001    emit_insn (gen_vashlv4si3 (ops[5],op1_v4si,ops[4]));
1002    emit_insn (gen_ceq_v4si (ops[6],ops[4],spu_const (V4SImode, 32)));
1003    emit_insn (gen_subv4si3 (ops[4],ops[3],ops[4]));
1004    emit_insn (gen_addv4si3 (ops[5],ops[5],ops[5]));
1005    emit_insn (gen_andc_v4si (ops[4],ops[4],ops[6]));
1006    emit_insn (gen_shufb (ops[4],ops[4],ops[5],op2_ti));
1007    emit_insn (gen_shlqbi_ti (op4_ti,op4_ti,GEN_INT(4)));
1008    emit_insn (gen_shlqby_ti (op5_ti,op4_ti,GEN_INT(8)));
1009    emit_insn (gen_adddf3 (ops[0],op4_df,op5_df));
1010    DONE;
1011  }"
1012  [(set_attr "length" "40")])
1013
1014
1015;; add
1016
1017(define_expand "addv16qi3"
1018  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
1019	(plus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
1020		    (match_operand:V16QI 2 "spu_reg_operand" "r")))]
1021  ""
1022  "{
1023    rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
1024    rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
1025    rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
1026    rtx rhs_and = gen_reg_rtx (V8HImode);
1027    rtx hi_char = gen_reg_rtx (V8HImode);
1028    rtx lo_char = gen_reg_rtx (V8HImode);
1029    rtx mask = gen_reg_rtx (V8HImode);
1030
1031    emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
1032    emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
1033    emit_insn (gen_addv8hi3 (hi_char, lhs_short, rhs_and));
1034    emit_insn (gen_addv8hi3 (lo_char, lhs_short, rhs_short));
1035    emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
1036    DONE;
1037   }")
1038
1039(define_insn "add<mode>3"
1040  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
1041	(plus:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
1042		   (match_operand:VHSI 2 "spu_arith_operand" "r,B")))]
1043  ""
1044  "@
1045  a<bh>\t%0,%1,%2
1046  a<bh>i\t%0,%1,%2")
1047
1048(define_expand "add<mode>3"
1049  [(set (match_dup:VDI 3)
1050	(unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
1051		     (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_CG))
1052   (set (match_dup:VDI 5)
1053	(unspec:VDI [(match_dup 3)
1054		     (match_dup 3)
1055		     (match_dup:TI 4)] UNSPEC_SHUFB))
1056   (set (match_operand:VDI 0 "spu_reg_operand" "")
1057	(unspec:VDI [(match_dup 1)
1058		     (match_dup 2)
1059		     (match_dup 5)] UNSPEC_ADDX))]
1060  ""
1061  {
1062    unsigned char pat[16] = {
1063      0x04, 0x05, 0x06, 0x07,
1064      0x80, 0x80, 0x80, 0x80,
1065      0x0c, 0x0d, 0x0e, 0x0f,
1066      0x80, 0x80, 0x80, 0x80
1067    };
1068    operands[3] = gen_reg_rtx (<MODE>mode);
1069    operands[4] = gen_reg_rtx (TImode);
1070    operands[5] = gen_reg_rtx (<MODE>mode);
1071    emit_move_insn (operands[4], array_to_constant (TImode, pat));
1072  })
1073
1074(define_insn "cg_<mode>"
1075  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1076	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1077		      (match_operand 2 "spu_reg_operand" "r")] UNSPEC_CG))]
1078  "operands != NULL"
1079  "cg\t%0,%1,%2")
1080
1081(define_insn "cgx_<mode>"
1082  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1083	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1084		      (match_operand 2 "spu_reg_operand" "r")
1085		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_CGX))]
1086  "operands != NULL"
1087  "cgx\t%0,%1,%2")
1088
1089(define_insn "addx_<mode>"
1090  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1091	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1092		      (match_operand 2 "spu_reg_operand" "r")
1093		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_ADDX))]
1094  "operands != NULL"
1095  "addx\t%0,%1,%2")
1096
1097
1098;; This is not the most efficient implementation of addti3.
1099;; We include this here because 1) the compiler needs it to be
1100;; defined as the word size is 128-bit and 2) sometimes gcc
1101;; substitutes an add for a constant left-shift. 2) is unlikely
1102;; because we also give addti3 a high cost. In case gcc does
1103;; generate TImode add, here is the code to do it.
1104;; operand 2 is a nonmemory because the compiler requires it.
1105(define_insn "addti3"
1106  [(set (match_operand:TI 0 "spu_reg_operand" "=&r")
1107	(plus:TI (match_operand:TI 1 "spu_reg_operand" "r")
1108		 (match_operand:TI 2 "spu_nonmem_operand" "r")))
1109   (clobber (match_scratch:TI 3 "=&r"))]
1110  ""
1111  "cg\t%3,%1,%2\n\\
1112   shlqbyi\t%3,%3,4\n\\
1113   cgx\t%3,%1,%2\n\\
1114   shlqbyi\t%3,%3,4\n\\
1115   cgx\t%3,%1,%2\n\\
1116   shlqbyi\t%0,%3,4\n\\
1117   addx\t%0,%1,%2"
1118  [(set_attr "type" "multi0")
1119   (set_attr "length" "28")])
1120
1121(define_insn "add<mode>3"
1122  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1123	(plus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1124		  (match_operand:VSF 2 "spu_reg_operand" "r")))]
1125  ""
1126  "fa\t%0,%1,%2"
1127  [(set_attr "type" "fp6")])
1128
1129(define_insn "add<mode>3"
1130  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1131	(plus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1132		  (match_operand:VDF 2 "spu_reg_operand" "r")))]
1133  ""
1134  "dfa\t%0,%1,%2"
1135  [(set_attr "type" "fpd")])
1136
1137
1138;; sub
1139
1140(define_expand "subv16qi3"
1141  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
1142	(minus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
1143		     (match_operand:V16QI 2 "spu_reg_operand" "r")))]
1144  ""
1145  "{
1146    rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
1147    rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
1148    rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
1149    rtx rhs_and = gen_reg_rtx (V8HImode);
1150    rtx hi_char = gen_reg_rtx (V8HImode);
1151    rtx lo_char = gen_reg_rtx (V8HImode);
1152    rtx mask = gen_reg_rtx (V8HImode);
1153
1154    emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
1155    emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
1156    emit_insn (gen_subv8hi3 (hi_char, lhs_short, rhs_and));
1157    emit_insn (gen_subv8hi3 (lo_char, lhs_short, rhs_short));
1158    emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
1159    DONE;
1160   }")
1161
1162(define_insn "sub<mode>3"
1163  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
1164	(minus:VHSI (match_operand:VHSI 1 "spu_arith_operand" "r,B")
1165		    (match_operand:VHSI 2 "spu_reg_operand" "r,r")))]
1166  ""
1167  "@
1168  sf<bh>\t%0,%2,%1
1169  sf<bh>i\t%0,%2,%1")
1170
1171(define_expand "sub<mode>3"
1172  [(set (match_dup:VDI 3)
1173	(unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
1174		     (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_BG))
1175   (set (match_dup:VDI 5)
1176	(unspec:VDI [(match_dup 3)
1177		     (match_dup 3)
1178		     (match_dup:TI 4)] UNSPEC_SHUFB))
1179   (set (match_operand:VDI 0 "spu_reg_operand" "")
1180	(unspec:VDI [(match_dup 1)
1181		     (match_dup 2)
1182		     (match_dup 5)] UNSPEC_SFX))]
1183  ""
1184  {
1185    unsigned char pat[16] = {
1186      0x04, 0x05, 0x06, 0x07,
1187      0xc0, 0xc0, 0xc0, 0xc0,
1188      0x0c, 0x0d, 0x0e, 0x0f,
1189      0xc0, 0xc0, 0xc0, 0xc0
1190    };
1191    operands[3] = gen_reg_rtx (<MODE>mode);
1192    operands[4] = gen_reg_rtx (TImode);
1193    operands[5] = gen_reg_rtx (<MODE>mode);
1194    emit_move_insn (operands[4], array_to_constant (TImode, pat));
1195  })
1196
1197(define_insn "bg_<mode>"
1198  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1199	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1200		      (match_operand 2 "spu_reg_operand" "r")] UNSPEC_BG))]
1201  "operands != NULL"
1202  "bg\t%0,%2,%1")
1203
1204(define_insn "bgx_<mode>"
1205  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1206	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1207		      (match_operand 2 "spu_reg_operand" "r")
1208		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_BGX))]
1209  "operands != NULL"
1210  "bgx\t%0,%2,%1")
1211
1212(define_insn "sfx_<mode>"
1213  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
1214	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
1215		      (match_operand 2 "spu_reg_operand" "r")
1216		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_SFX))]
1217  "operands != NULL"
1218  "sfx\t%0,%2,%1")
1219
1220(define_insn "subti3"
1221  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
1222	(minus:TI (match_operand:TI 1 "spu_reg_operand" "r")
1223		  (match_operand:TI 2 "spu_reg_operand" "r")))
1224   (clobber (match_scratch:TI 3 "=&r"))
1225   (clobber (match_scratch:TI 4 "=&r"))
1226   (clobber (match_scratch:TI 5 "=&r"))
1227   (clobber (match_scratch:TI 6 "=&r"))]
1228  ""
1229  "il\t%6,1\n\\
1230   bg\t%3,%2,%1\n\\
1231   xor\t%3,%3,%6\n\\
1232   sf\t%4,%2,%1\n\\
1233   shlqbyi\t%5,%3,4\n\\
1234   bg\t%3,%5,%4\n\\
1235   xor\t%3,%3,%6\n\\
1236   sf\t%4,%5,%4\n\\
1237   shlqbyi\t%5,%3,4\n\\
1238   bg\t%3,%5,%4\n\\
1239   xor\t%3,%3,%6\n\\
1240   sf\t%4,%5,%4\n\\
1241   shlqbyi\t%5,%3,4\n\\
1242   sf\t%0,%5,%4"
1243  [(set_attr "type" "multi0")
1244   (set_attr "length" "56")])
1245
1246(define_insn "sub<mode>3"
1247  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1248	(minus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1249		   (match_operand:VSF 2 "spu_reg_operand" "r")))]
1250  ""
1251  "fs\t%0,%1,%2"
1252  [(set_attr "type" "fp6")])
1253
1254(define_insn "sub<mode>3"
1255  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1256	(minus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1257		   (match_operand:VDF 2 "spu_reg_operand" "r")))]
1258  ""
1259  "dfs\t%0,%1,%2"
1260  [(set_attr "type" "fpd")])
1261
1262
1263;; neg
1264
1265(define_expand "negv16qi2"
1266  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
1267	(neg:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")))]
1268  ""
1269  "{
1270    rtx zero = gen_reg_rtx (V16QImode);
1271    emit_move_insn (zero, CONST0_RTX (V16QImode));
1272    emit_insn (gen_subv16qi3 (operands[0], zero, operands[1]));
1273    DONE;
1274   }")
1275
1276(define_insn "neg<mode>2"
1277  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
1278	(neg:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")))]
1279  ""
1280  "sf<bh>i\t%0,%1,0")
1281
1282(define_expand "negdi2"
1283  [(set (match_operand:DI 0 "spu_reg_operand" "")
1284	(neg:DI (match_operand:DI 1 "spu_reg_operand" "")))]
1285  ""
1286  {
1287    rtx zero = gen_reg_rtx(DImode);
1288    emit_move_insn(zero, GEN_INT(0));
1289    emit_insn (gen_subdi3(operands[0], zero, operands[1]));
1290    DONE;
1291  })
1292
1293(define_expand "negti2"
1294  [(set (match_operand:TI 0 "spu_reg_operand" "")
1295	(neg:TI (match_operand:TI 1 "spu_reg_operand" "")))]
1296  ""
1297  {
1298    rtx zero = gen_reg_rtx(TImode);
1299    emit_move_insn(zero, GEN_INT(0));
1300    emit_insn (gen_subti3(operands[0], zero, operands[1]));
1301    DONE;
1302  })
1303
1304(define_expand "neg<mode>2"
1305  [(parallel
1306    [(set (match_operand:VSF 0 "spu_reg_operand" "")
1307	  (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
1308     (use (match_dup 2))])]
1309  ""
1310  "operands[2] = gen_reg_rtx (<F2I>mode);
1311   emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));")
1312
1313(define_expand "neg<mode>2"
1314  [(parallel
1315    [(set (match_operand:VDF 0 "spu_reg_operand" "")
1316	  (neg:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
1317     (use (match_dup 2))])]
1318  ""
1319  "operands[2] = gen_reg_rtx (<F2I>mode);
1320   emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));")
1321
1322(define_insn_and_split "_neg<mode>2"
1323  [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1324	(neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
1325   (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
1326  ""
1327  "#"
1328  ""
1329  [(set (match_dup:<F2I> 3)
1330	(xor:<F2I> (match_dup:<F2I> 4)
1331		   (match_dup:<F2I> 2)))]
1332  {
1333    operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
1334    operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
1335  })
1336
1337
1338;; abs
1339
1340(define_expand "abs<mode>2"
1341  [(parallel
1342    [(set (match_operand:VSF 0 "spu_reg_operand" "")
1343	  (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
1344     (use (match_dup 2))])]
1345  ""
1346  "operands[2] = gen_reg_rtx (<F2I>mode);
1347   emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));")
1348
1349(define_expand "abs<mode>2"
1350  [(parallel
1351    [(set (match_operand:VDF 0 "spu_reg_operand" "")
1352	  (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
1353     (use (match_dup 2))])]
1354  ""
1355  "operands[2] = gen_reg_rtx (<F2I>mode);
1356   emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));")
1357
1358(define_insn_and_split "_abs<mode>2"
1359  [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1360	(abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
1361   (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
1362  ""
1363  "#"
1364  ""
1365  [(set (match_dup:<F2I> 3)
1366	(and:<F2I> (match_dup:<F2I> 4)
1367		   (match_dup:<F2I> 2)))]
1368  {
1369    operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
1370    operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
1371  })
1372
1373
1374;; mul
1375
1376(define_insn "mulhi3"
1377  [(set (match_operand:HI 0 "spu_reg_operand" "=r,r")
1378	(mult:HI (match_operand:HI 1 "spu_reg_operand" "r,r")
1379		 (match_operand:HI 2 "spu_arith_operand" "r,B")))]
1380  ""
1381  "@
1382  mpy\t%0,%1,%2
1383  mpyi\t%0,%1,%2"
1384  [(set_attr "type" "fp7")])
1385
1386(define_expand "mulv8hi3"
1387  [(set (match_operand:V8HI 0 "spu_reg_operand" "")
1388	(mult:V8HI (match_operand:V8HI 1 "spu_reg_operand" "")
1389		   (match_operand:V8HI 2 "spu_reg_operand" "")))]
1390  ""
1391  "{
1392    rtx result = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1393    rtx low = gen_reg_rtx (V4SImode);
1394    rtx high = gen_reg_rtx (V4SImode);
1395    rtx shift = gen_reg_rtx (V4SImode);
1396    rtx mask = gen_reg_rtx (V4SImode);
1397
1398    emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff));
1399    emit_insn (gen_vec_widen_smult_even_v8hi (high, operands[1], operands[2]));
1400    emit_insn (gen_vec_widen_smult_odd_v8hi (low, operands[1], operands[2]));
1401    emit_insn (gen_vashlv4si3 (shift, high, spu_const(V4SImode, 16)));
1402    emit_insn (gen_selb (result, shift, low, mask));
1403    DONE;
1404   }")
1405
1406(define_expand "mul<mode>3"
1407  [(parallel
1408    [(set (match_operand:VSI 0 "spu_reg_operand" "")
1409	  (mult:VSI (match_operand:VSI 1 "spu_reg_operand" "")
1410		    (match_operand:VSI 2 "spu_reg_operand" "")))
1411     (clobber (match_dup:VSI 3))
1412     (clobber (match_dup:VSI 4))
1413     (clobber (match_dup:VSI 5))
1414     (clobber (match_dup:VSI 6))])]
1415  ""
1416  {
1417    operands[3] = gen_reg_rtx(<MODE>mode);
1418    operands[4] = gen_reg_rtx(<MODE>mode);
1419    operands[5] = gen_reg_rtx(<MODE>mode);
1420    operands[6] = gen_reg_rtx(<MODE>mode);
1421  })
1422
1423(define_insn_and_split "_mulsi3"
1424  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1425	(mult:SI (match_operand:SI 1 "spu_reg_operand" "r")
1426		 (match_operand:SI 2 "spu_arith_operand" "rK")))
1427   (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))
1428   (clobber (match_operand:SI 4 "spu_reg_operand" "=&r"))
1429   (clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))
1430   (clobber (match_operand:SI 6 "spu_reg_operand" "=&r"))]
1431  ""
1432  "#"
1433  ""
1434  [(set (match_dup:SI 0)
1435	(mult:SI (match_dup:SI 1)
1436		 (match_dup:SI 2)))]
1437  {
1438    HOST_WIDE_INT val = 0;
1439    rtx a = operands[3];
1440    rtx b = operands[4];
1441    rtx c = operands[5];
1442    rtx d = operands[6];
1443    if (GET_CODE(operands[2]) == CONST_INT)
1444      {
1445	val = INTVAL(operands[2]);
1446	emit_move_insn(d, operands[2]);
1447	operands[2] = d;
1448      }
1449    if (val && (val & 0xffff) == 0)
1450      {
1451	emit_insn (gen_mpyh_si(operands[0], operands[2], operands[1]));
1452      }
1453    else if (val > 0 && val < 0x10000)
1454      {
1455	rtx cst = satisfies_constraint_K (GEN_INT (val)) ? GEN_INT(val) : d;
1456	emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
1457	emit_insn (gen_mpyu_si(c, operands[1], cst));
1458	emit_insn (gen_addsi3(operands[0], a, c));
1459      }
1460    else
1461      {
1462	emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
1463	emit_insn (gen_mpyh_si(b, operands[2], operands[1]));
1464	emit_insn (gen_mpyu_si(c, operands[1], operands[2]));
1465	emit_insn (gen_addsi3(d, a, b));
1466	emit_insn (gen_addsi3(operands[0], d, c));
1467      }
1468    DONE;
1469   })
1470
1471(define_insn_and_split "_mulv4si3"
1472  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
1473	(mult:V4SI (match_operand:V4SI 1 "spu_reg_operand" "r")
1474		   (match_operand:V4SI 2 "spu_reg_operand" "r")))
1475   (clobber (match_operand:V4SI 3 "spu_reg_operand" "=&r"))
1476   (clobber (match_operand:V4SI 4 "spu_reg_operand" "=&r"))
1477   (clobber (match_operand:V4SI 5 "spu_reg_operand" "=&r"))
1478   (clobber (match_operand:V4SI 6 "spu_reg_operand" "=&r"))]
1479  ""
1480  "#"
1481  ""
1482  [(set (match_dup:V4SI 0)
1483	(mult:V4SI (match_dup:V4SI 1)
1484		   (match_dup:V4SI 2)))]
1485  {
1486    rtx a = operands[3];
1487    rtx b = operands[4];
1488    rtx c = operands[5];
1489    rtx d = operands[6];
1490    rtx op1 = simplify_gen_subreg (V8HImode, operands[1], V4SImode, 0);
1491    rtx op2 = simplify_gen_subreg (V8HImode, operands[2], V4SImode, 0);
1492    emit_insn (gen_spu_mpyh(a, op1, op2));
1493    emit_insn (gen_spu_mpyh(b, op2, op1));
1494    emit_insn (gen_vec_widen_umult_odd_v8hi (c, op1, op2));
1495    emit_insn (gen_addv4si3(d, a, b));
1496    emit_insn (gen_addv4si3(operands[0], d, c));
1497    DONE;
1498   })
1499
1500(define_insn "mulhisi3"
1501  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1502	(mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1503		 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
1504  ""
1505  "mpy\t%0,%1,%2"
1506  [(set_attr "type" "fp7")])
1507
1508(define_insn "mulhisi3_imm"
1509  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1510	(mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1511		 (match_operand:SI 2 "imm_K_operand" "K")))]
1512  ""
1513  "mpyi\t%0,%1,%2"
1514  [(set_attr "type" "fp7")])
1515
1516(define_insn "umulhisi3"
1517  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1518	(mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1519		 (zero_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
1520  ""
1521  "mpyu\t%0,%1,%2"
1522  [(set_attr "type" "fp7")])
1523
1524(define_insn "umulhisi3_imm"
1525  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1526	(mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1527		 (and:SI (match_operand:SI 2 "imm_K_operand" "K") (const_int 65535))))]
1528  ""
1529  "mpyui\t%0,%1,%2"
1530  [(set_attr "type" "fp7")])
1531
1532(define_insn "mpyu_si"
1533  [(set (match_operand:SI 0 "spu_reg_operand" "=r,r")
1534	(mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r,r")
1535			 (const_int 65535))
1536		 (and:SI (match_operand:SI 2 "spu_arith_operand" "r,K")
1537			 (const_int 65535))))]
1538  ""
1539  "@
1540   mpyu\t%0,%1,%2
1541   mpyui\t%0,%1,%2"
1542  [(set_attr "type" "fp7")])
1543
1544;; This isn't always profitable to use.  Consider r = a * b + c * d.
1545;; It's faster to do the multiplies in parallel then add them.  If we
1546;; merge a multiply and add it prevents the multiplies from happening in
1547;; parallel.
1548(define_insn "mpya_si"
1549  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1550	(plus:SI (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1551			  (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
1552		 (match_operand:SI 3 "spu_reg_operand" "r")))]
1553  "0"
1554  "mpya\t%0,%1,%2,%3"
1555  [(set_attr "type" "fp7")])
1556
1557(define_insn "mpyh_si"
1558  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1559	(mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r")
1560			 (const_int -65536))
1561	         (and:SI (match_operand:SI 2 "spu_reg_operand" "r")
1562			 (const_int 65535))))]
1563  ""
1564  "mpyh\t%0,%1,%2"
1565  [(set_attr "type" "fp7")])
1566
1567(define_insn "mpys_si"
1568  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1569	(ashiftrt:SI
1570	    (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1571		     (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
1572	    (const_int 16)))]
1573  ""
1574  "mpys\t%0,%1,%2"
1575  [(set_attr "type" "fp7")])
1576
1577(define_insn "mpyhh_si"
1578  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1579	(mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1580			      (const_int 16))
1581		 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1582			      (const_int 16))))]
1583  ""
1584  "mpyhh\t%0,%1,%2"
1585  [(set_attr "type" "fp7")])
1586
1587(define_insn "mpyhhu_si"
1588  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1589	(mult:SI (lshiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1590			      (const_int 16))
1591		 (lshiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1592			      (const_int 16))))]
1593  ""
1594  "mpyhhu\t%0,%1,%2"
1595  [(set_attr "type" "fp7")])
1596
1597(define_insn "mpyhha_si"
1598  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1599	(plus:SI (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1600				       (const_int 16))
1601			  (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1602				       (const_int 16)))
1603		 (match_operand:SI 3 "spu_reg_operand" "0")))]
1604  "0"
1605  "mpyhha\t%0,%1,%2"
1606  [(set_attr "type" "fp7")])
1607
1608(define_insn "mul<mode>3"
1609  [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1610	(mult:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")
1611		   (match_operand:VSDF 2 "spu_reg_operand" "r")))]
1612  ""
1613  "<d>fm\t%0,%1,%2"
1614  [(set_attr "type" "fp<d6>")])
1615
1616(define_insn "fma<mode>4"
1617  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1618	(fma:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1619		 (match_operand:VSF 2 "spu_reg_operand" "r")
1620		 (match_operand:VSF 3 "spu_reg_operand" "r")))]
1621  ""
1622  "fma\t%0,%1,%2,%3"
1623  [(set_attr "type"	"fp6")])
1624
1625;; ??? The official description is (c - a*b), which is exactly (-a*b + c).
1626;; Note that this doesn't match the dfnms description.  Incorrect?
1627(define_insn "fnma<mode>4"
1628  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1629	(fma:VSF
1630	  (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
1631	  (match_operand:VSF 2 "spu_reg_operand" "r")
1632	  (match_operand:VSF 3 "spu_reg_operand" "r")))]
1633  ""
1634  "fnms\t%0,%1,%2,%3"
1635  [(set_attr "type" "fp6")])
1636
1637(define_insn "fms<mode>4"
1638  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1639	(fma:VSF
1640	  (match_operand:VSF 1 "spu_reg_operand" "r")
1641	  (match_operand:VSF 2 "spu_reg_operand" "r")
1642	  (neg:VSF (match_operand:VSF 3 "spu_reg_operand" "r"))))]
1643  ""
1644  "fms\t%0,%1,%2,%3"
1645  [(set_attr "type" "fp6")])
1646
1647(define_insn "fma<mode>4"
1648  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1649	(fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1650		 (match_operand:VDF 2 "spu_reg_operand" "r")
1651		 (match_operand:VDF 3 "spu_reg_operand" "0")))]
1652  ""
1653  "dfma\t%0,%1,%2"
1654  [(set_attr "type"	"fpd")])
1655
1656(define_insn "fms<mode>4"
1657  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1658	(fma:VDF
1659	  (match_operand:VDF 1 "spu_reg_operand" "r")
1660	  (match_operand:VDF 2 "spu_reg_operand" "r")
1661	  (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0"))))]
1662  ""
1663  "dfms\t%0,%1,%2"
1664  [(set_attr "type" "fpd")])
1665
1666(define_insn "nfma<mode>4"
1667  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1668	(neg:VDF
1669	  (fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1670		   (match_operand:VDF 2 "spu_reg_operand" "r")
1671		   (match_operand:VDF 3 "spu_reg_operand" "0"))))]
1672  ""
1673  "dfnma\t%0,%1,%2"
1674  [(set_attr "type"	"fpd")])
1675
1676(define_insn "nfms<mode>4"
1677  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1678	(neg:VDF
1679	  (fma:VDF
1680	    (match_operand:VDF 1 "spu_reg_operand" "r")
1681	    (match_operand:VDF 2 "spu_reg_operand" "r")
1682	    (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0")))))]
1683  ""
1684  "dfnms\t%0,%1,%2"
1685  [(set_attr "type" "fpd")])
1686
1687;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
1688(define_expand "fnma<mode>4"
1689  [(set (match_operand:VDF 0 "spu_reg_operand" "")
1690	(neg:VDF
1691	  (fma:VDF
1692	    (match_operand:VDF 1 "spu_reg_operand" "")
1693	    (match_operand:VDF 2 "spu_reg_operand" "")
1694	    (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "")))))]
1695  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
1696  "")
1697
1698;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
1699(define_expand "fnms<mode>4"
1700  [(set (match_operand:VDF 0 "register_operand" "")
1701	(neg:VDF
1702	  (fma:VDF
1703	    (match_operand:VDF 1 "register_operand" "")
1704	    (match_operand:VDF 2 "register_operand" "")
1705	    (match_operand:VDF 3 "register_operand" ""))))]
1706  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
1707  "")
1708
1709;; mul highpart, used for divide by constant optimizations.
1710
1711(define_expand "smulsi3_highpart"
1712  [(set (match_operand:SI 0 "register_operand" "")
1713	(truncate:SI
1714	  (ashiftrt:DI
1715	    (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
1716	             (sign_extend:DI (match_operand:SI 2 "register_operand" "")))
1717	    (const_int 32))))]
1718  ""
1719  {
1720    rtx t0 = gen_reg_rtx (SImode);
1721    rtx t1 = gen_reg_rtx (SImode);
1722    rtx t2 = gen_reg_rtx (SImode);
1723    rtx t3 = gen_reg_rtx (SImode);
1724    rtx t4 = gen_reg_rtx (SImode);
1725    rtx t5 = gen_reg_rtx (SImode);
1726    rtx t6 = gen_reg_rtx (SImode);
1727    rtx t7 = gen_reg_rtx (SImode);
1728    rtx t8 = gen_reg_rtx (SImode);
1729    rtx t9 = gen_reg_rtx (SImode);
1730    rtx t11 = gen_reg_rtx (SImode);
1731    rtx t12 = gen_reg_rtx (SImode);
1732    rtx t14 = gen_reg_rtx (SImode);
1733    rtx t15 = gen_reg_rtx (HImode);
1734    rtx t16 = gen_reg_rtx (HImode);
1735    rtx t17 = gen_reg_rtx (HImode);
1736    rtx t18 = gen_reg_rtx (HImode);
1737    rtx t19 = gen_reg_rtx (SImode);
1738    rtx t20 = gen_reg_rtx (SImode);
1739    rtx t21 = gen_reg_rtx (SImode);
1740    rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
1741    rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
1742    rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
1743    rtx t1_hi = gen_rtx_SUBREG (HImode, t1, 2);
1744
1745    rtx_insn *insn = emit_insn (gen_lshrsi3 (t0, operands[1], GEN_INT (16)));
1746    emit_insn (gen_lshrsi3 (t1, operands[2], GEN_INT (16)));
1747    emit_insn (gen_umulhisi3 (t2, op1_hi, op2_hi));
1748    emit_insn (gen_mpyh_si (t3, operands[1], operands[2]));
1749    emit_insn (gen_mpyh_si (t4, operands[2], operands[1]));
1750    emit_insn (gen_mpyhh_si (t5, operands[1], operands[2]));
1751    emit_insn (gen_mpys_si (t6, t0_hi, op2_hi));
1752    emit_insn (gen_mpys_si (t7, t1_hi, op1_hi));
1753
1754    /* Gen carry bits (in t9 and t11). */
1755    emit_insn (gen_addsi3 (t8, t2, t3));
1756    emit_insn (gen_cg_si (t9, t2, t3));
1757    emit_insn (gen_cg_si (t11, t8, t4));
1758
1759    /* Gen high 32 bits in operand[0].  Correct for mpys. */
1760    emit_insn (gen_addx_si (t12, t5, t6, t9));
1761    emit_insn (gen_addx_si (t14, t12, t7, t11));
1762
1763    /* mpys treats both operands as signed when we really want it to treat
1764       the first operand as signed and the second operand as unsigned.
1765       The code below corrects for that difference.  */
1766    emit_insn (gen_cgt_hi (t15, op1_hi, GEN_INT (-1)));
1767    emit_insn (gen_cgt_hi (t16, op2_hi, GEN_INT (-1)));
1768    emit_insn (gen_andc_hi (t17, t1_hi, t15));
1769    emit_insn (gen_andc_hi (t18, t0_hi, t16));
1770    emit_insn (gen_extendhisi2 (t19, t17));
1771    emit_insn (gen_extendhisi2 (t20, t18));
1772    emit_insn (gen_addsi3 (t21, t19, t20));
1773    emit_insn (gen_addsi3 (operands[0], t14, t21));
1774    unshare_all_rtl_in_chain (insn);
1775    DONE;
1776  })
1777
1778(define_expand "umulsi3_highpart"
1779  [(set (match_operand:SI 0 "register_operand" "")
1780	(truncate:SI
1781	  (ashiftrt:DI
1782	    (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
1783	             (zero_extend:DI (match_operand:SI 2 "register_operand" "")))
1784	    (const_int 32))))]
1785  ""
1786
1787  {
1788    rtx t0 = gen_reg_rtx (SImode);
1789    rtx t1 = gen_reg_rtx (SImode);
1790    rtx t2 = gen_reg_rtx (SImode);
1791    rtx t3 = gen_reg_rtx (SImode);
1792    rtx t4 = gen_reg_rtx (SImode);
1793    rtx t5 = gen_reg_rtx (SImode);
1794    rtx t6 = gen_reg_rtx (SImode);
1795    rtx t7 = gen_reg_rtx (SImode);
1796    rtx t8 = gen_reg_rtx (SImode);
1797    rtx t9 = gen_reg_rtx (SImode);
1798    rtx t10 = gen_reg_rtx (SImode);
1799    rtx t12 = gen_reg_rtx (SImode);
1800    rtx t13 = gen_reg_rtx (SImode);
1801    rtx t14 = gen_reg_rtx (SImode);
1802    rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
1803    rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
1804    rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
1805
1806    rtx_insn *insn = emit_insn (gen_rotlsi3 (t0, operands[2], GEN_INT (16)));
1807    emit_insn (gen_umulhisi3 (t1, op1_hi, op2_hi));
1808    emit_insn (gen_umulhisi3 (t2, op1_hi, t0_hi));
1809    emit_insn (gen_mpyhhu_si (t3, operands[1], t0));
1810    emit_insn (gen_mpyhhu_si (t4, operands[1], operands[2]));
1811    emit_insn (gen_ashlsi3 (t5, t2, GEN_INT (16)));
1812    emit_insn (gen_ashlsi3 (t6, t3, GEN_INT (16)));
1813    emit_insn (gen_lshrsi3 (t7, t2, GEN_INT (16)));
1814    emit_insn (gen_lshrsi3 (t8, t3, GEN_INT (16)));
1815
1816    /* Gen carry bits (in t10 and t12). */
1817    emit_insn (gen_addsi3 (t9, t1, t5));
1818    emit_insn (gen_cg_si (t10, t1, t5));
1819    emit_insn (gen_cg_si (t12, t9, t6));
1820
1821    /* Gen high 32 bits in operand[0]. */
1822    emit_insn (gen_addx_si (t13, t4, t7, t10));
1823    emit_insn (gen_addx_si (t14, t13, t8, t12));
1824    emit_insn (gen_movsi (operands[0], t14));
1825    unshare_all_rtl_in_chain (insn);
1826
1827    DONE;
1828  })
1829
1830;; div
1831
1832;; Not necessarily the best implementation of divide but faster then
1833;; the default that gcc provides because this is inlined and it uses
1834;; clz.
1835(define_insn "divmodsi4"
1836      [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
1837	    (div:SI (match_operand:SI 1 "spu_reg_operand" "r")
1838		    (match_operand:SI 2 "spu_reg_operand" "r")))
1839       (set (match_operand:SI 3 "spu_reg_operand" "=&r")
1840	    (mod:SI (match_dup 1)
1841		    (match_dup 2)))
1842       (clobber (match_scratch:SI 4 "=&r"))
1843       (clobber (match_scratch:SI 5 "=&r"))
1844       (clobber (match_scratch:SI 6 "=&r"))
1845       (clobber (match_scratch:SI 7 "=&r"))
1846       (clobber (match_scratch:SI 8 "=&r"))
1847       (clobber (match_scratch:SI 9 "=&r"))
1848       (clobber (match_scratch:SI 10 "=&r"))
1849       (clobber (match_scratch:SI 11 "=&r"))
1850       (clobber (match_scratch:SI 12 "=&r"))
1851       (clobber (reg:SI 130))]
1852  ""
1853  "heqi	%2,0\\n\\
1854	hbrr	3f,1f\\n\\
1855	sfi	%8,%1,0\\n\\
1856	sfi	%9,%2,0\\n\\
1857	cgti	%10,%1,-1\\n\\
1858	cgti	%11,%2,-1\\n\\
1859	selb	%8,%8,%1,%10\\n\\
1860	selb	%9,%9,%2,%11\\n\\
1861	clz	%4,%8\\n\\
1862	clz	%7,%9\\n\\
1863	il	%5,1\\n\\
1864	fsmbi	%0,0\\n\\
1865	sf	%7,%4,%7\\n\\
1866	shlqbyi	%3,%8,0\\n\\
1867	xor	%11,%10,%11\\n\\
1868	shl	%5,%5,%7\\n\\
1869	shl	%4,%9,%7\\n\\
1870	lnop	\\n\\
18711:	or	%12,%0,%5\\n\\
1872	rotqmbii	%5,%5,-1\\n\\
1873	clgt	%6,%4,%3\\n\\
1874	lnop	\\n\\
1875	sf	%7,%4,%3\\n\\
1876	rotqmbii	%4,%4,-1\\n\\
1877	selb	%0,%12,%0,%6\\n\\
1878	lnop	\\n\\
1879	selb	%3,%7,%3,%6\\n\\
18803:	brnz	%5,1b\\n\\
18812:	sfi	%8,%3,0\\n\\
1882	sfi	%9,%0,0\\n\\
1883	selb	%3,%8,%3,%10\\n\\
1884	selb	%0,%0,%9,%11"
1885  [(set_attr "type" "multi0")
1886   (set_attr "length" "128")])
1887
1888(define_insn "udivmodsi4"
1889      [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
1890	    (udiv:SI (match_operand:SI 1 "spu_reg_operand" "r")
1891		     (match_operand:SI 2 "spu_reg_operand" "r")))
1892       (set (match_operand:SI 3 "spu_reg_operand" "=&r")
1893	    (umod:SI (match_dup 1)
1894		     (match_dup 2)))
1895       (clobber (match_scratch:SI 4 "=&r"))
1896       (clobber (match_scratch:SI 5 "=&r"))
1897       (clobber (match_scratch:SI 6 "=&r"))
1898       (clobber (match_scratch:SI 7 "=&r"))
1899       (clobber (match_scratch:SI 8 "=&r"))
1900       (clobber (reg:SI 130))]
1901  ""
1902  "heqi	%2,0\\n\\
1903	hbrr	3f,1f\\n\\
1904	clz	%7,%2\\n\\
1905	clz	%4,%1\\n\\
1906	il	%5,1\\n\\
1907	fsmbi	%0,0\\n\\
1908	sf	%7,%4,%7\\n\\
1909	ori	%3,%1,0\\n\\
1910	shl	%5,%5,%7\\n\\
1911	shl	%4,%2,%7\\n\\
19121:	or	%8,%0,%5\\n\\
1913	rotqmbii	%5,%5,-1\\n\\
1914	clgt	%6,%4,%3\\n\\
1915	lnop	\\n\\
1916	sf	%7,%4,%3\\n\\
1917	rotqmbii	%4,%4,-1\\n\\
1918	selb	%0,%8,%0,%6\\n\\
1919	lnop	\\n\\
1920	selb	%3,%7,%3,%6\\n\\
19213:	brnz	%5,1b\\n\\
19222:"
1923  [(set_attr "type" "multi0")
1924   (set_attr "length" "80")])
1925
1926(define_expand "div<mode>3"
1927  [(parallel
1928    [(set (match_operand:VSF 0 "spu_reg_operand" "")
1929	  (div:VSF (match_operand:VSF 1 "spu_reg_operand" "")
1930		   (match_operand:VSF 2 "spu_reg_operand" "")))
1931     (clobber (match_scratch:VSF 3 ""))
1932     (clobber (match_scratch:VSF 4 ""))
1933     (clobber (match_scratch:VSF 5 ""))])]
1934  ""
1935  "")
1936
1937(define_insn_and_split "*div<mode>3_fast"
1938  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1939	(div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1940		 (match_operand:VSF 2 "spu_reg_operand" "r")))
1941   (clobber (match_scratch:VSF 3 "=&r"))
1942   (clobber (match_scratch:VSF 4 "=&r"))
1943   (clobber (scratch:VSF))]
1944  "flag_unsafe_math_optimizations"
1945  "#"
1946  "reload_completed"
1947  [(set (match_dup:VSF 0)
1948	(div:VSF (match_dup:VSF 1)
1949		 (match_dup:VSF 2)))
1950   (clobber (match_dup:VSF 3))
1951   (clobber (match_dup:VSF 4))
1952   (clobber (scratch:VSF))]
1953  {
1954    emit_insn (gen_frest_<mode>(operands[3], operands[2]));
1955    emit_insn (gen_fi_<mode>(operands[3], operands[2], operands[3]));
1956    emit_insn (gen_mul<mode>3(operands[4], operands[1], operands[3]));
1957    emit_insn (gen_fnma<mode>4(operands[0], operands[4], operands[2], operands[1]));
1958    emit_insn (gen_fma<mode>4(operands[0], operands[0], operands[3], operands[4]));
1959    DONE;
1960  })
1961
1962(define_insn_and_split "*div<mode>3_adjusted"
1963  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1964	(div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1965		 (match_operand:VSF 2 "spu_reg_operand" "r")))
1966   (clobber (match_scratch:VSF 3 "=&r"))
1967   (clobber (match_scratch:VSF 4 "=&r"))
1968   (clobber (match_scratch:VSF 5 "=&r"))]
1969  "!flag_unsafe_math_optimizations"
1970  "#"
1971  "reload_completed"
1972  [(set (match_dup:VSF 0)
1973	(div:VSF (match_dup:VSF 1)
1974		 (match_dup:VSF 2)))
1975   (clobber (match_dup:VSF 3))
1976   (clobber (match_dup:VSF 4))
1977   (clobber (match_dup:VSF 5))]
1978  {
1979    emit_insn (gen_frest_<mode> (operands[3], operands[2]));
1980    emit_insn (gen_fi_<mode> (operands[3], operands[2], operands[3]));
1981    emit_insn (gen_mul<mode>3 (operands[4], operands[1], operands[3]));
1982    emit_insn (gen_fnma<mode>4 (operands[5], operands[4], operands[2], operands[1]));
1983    emit_insn (gen_fma<mode>4 (operands[3], operands[5], operands[3], operands[4]));
1984
1985   /* Due to truncation error, the quotient result may be low by 1 ulp.
1986      Conditionally add one if the estimate is too small in magnitude.  */
1987
1988    emit_move_insn (gen_lowpart (<F2I>mode, operands[4]),
1989		    spu_const (<F2I>mode, 0x80000000ULL));
1990    emit_move_insn (gen_lowpart (<F2I>mode, operands[5]),
1991		    spu_const (<F2I>mode, 0x3f800000ULL));
1992    emit_insn (gen_selb (operands[5], operands[5], operands[1], operands[4]));
1993
1994    emit_insn (gen_add<f2i>3 (gen_lowpart (<F2I>mode, operands[4]),
1995			      gen_lowpart (<F2I>mode, operands[3]),
1996			      spu_const (<F2I>mode, 1)));
1997    emit_insn (gen_fnma<mode>4 (operands[0], operands[2], operands[4], operands[1]));
1998    emit_insn (gen_mul<mode>3 (operands[0], operands[0], operands[5]));
1999    emit_insn (gen_cgt_<f2i> (gen_lowpart (<F2I>mode, operands[0]),
2000			      gen_lowpart (<F2I>mode, operands[0]),
2001			      spu_const (<F2I>mode, -1)));
2002    emit_insn (gen_selb (operands[0], operands[3], operands[4], operands[0]));
2003    DONE;
2004  })
2005
2006
2007;; sqrt
2008
2009(define_insn_and_split "sqrtsf2"
2010  [(set (match_operand:SF 0 "spu_reg_operand" "=r")
2011	(sqrt:SF (match_operand:SF 1 "spu_reg_operand" "r")))
2012   (clobber (match_scratch:SF 2 "=&r"))
2013   (clobber (match_scratch:SF 3 "=&r"))
2014   (clobber (match_scratch:SF 4 "=&r"))
2015   (clobber (match_scratch:SF 5 "=&r"))]
2016  ""
2017  "#"
2018  "reload_completed"
2019  [(set (match_dup:SF 0)
2020	(sqrt:SF (match_dup:SF 1)))
2021   (clobber (match_dup:SF 2))
2022   (clobber (match_dup:SF 3))
2023   (clobber (match_dup:SF 4))
2024   (clobber (match_dup:SF 5))]
2025  {
2026    emit_move_insn (operands[3],spu_float_const(\"0.5\",SFmode));
2027    emit_move_insn (operands[4],spu_float_const(\"1.00000011920928955078125\",SFmode));
2028    emit_insn (gen_frsqest_sf(operands[2],operands[1]));
2029    emit_insn (gen_fi_sf(operands[2],operands[1],operands[2]));
2030    emit_insn (gen_mulsf3(operands[5],operands[2],operands[1]));
2031    emit_insn (gen_mulsf3(operands[3],operands[5],operands[3]));
2032    emit_insn (gen_fnmasf4(operands[4],operands[2],operands[5],operands[4]));
2033    emit_insn (gen_fmasf4(operands[0],operands[4],operands[3],operands[5]));
2034    DONE;
2035  })
2036
2037(define_insn "frest_<mode>"
2038  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
2039	(unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FREST))]
2040  ""
2041  "frest\t%0,%1"
2042  [(set_attr "type" "shuf")])
2043
2044(define_insn "frsqest_<mode>"
2045  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
2046	(unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FRSQEST))]
2047  ""
2048  "frsqest\t%0,%1"
2049  [(set_attr "type" "shuf")])
2050
2051(define_insn "fi_<mode>"
2052  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
2053	(unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")
2054		    (match_operand:VSF 2 "spu_reg_operand" "r")] UNSPEC_FI))]
2055  ""
2056  "fi\t%0,%1,%2"
2057  [(set_attr "type" "fp7")])
2058
2059
2060;; and
2061
2062(define_insn "and<mode>3"
2063  [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
2064	(and:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
2065		 (match_operand:MOV 2 "spu_logical_operand" "r,C")))]
2066  ""
2067  "@
2068  and\t%0,%1,%2
2069  and%j2i\t%0,%1,%J2")
2070
2071(define_insn "anddi3"
2072  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2073	(and:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2074		(match_operand:DI 2 "spu_logical_operand" "r,c")))]
2075  ""
2076  "@
2077  and\t%0,%1,%2
2078  and%k2i\t%0,%1,%K2")
2079
2080(define_insn "andti3"
2081  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2082	(and:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2083		(match_operand:TI 2 "spu_logical_operand" "r,Y")))]
2084  ""
2085  "@
2086  and\t%0,%1,%2
2087  and%m2i\t%0,%1,%L2")
2088
2089(define_insn "andc_<mode>"
2090  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2091	(and:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
2092	         (match_operand:ALL 1 "spu_reg_operand" "r")))]
2093  ""
2094  "andc\t%0,%1,%2")
2095
2096(define_insn "nand_<mode>"
2097  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2098	(not:ALL (and:ALL (match_operand:ALL 2 "spu_reg_operand" "r")
2099			  (match_operand:ALL 1 "spu_reg_operand" "r"))))]
2100  ""
2101  "nand\t%0,%1,%2")
2102
2103
2104;; ior
2105
2106(define_insn "ior<mode>3"
2107  [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r,r")
2108	(ior:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r,0")
2109		 (match_operand:MOV 2 "spu_ior_operand" "r,C,D")))]
2110  ""
2111  "@
2112  or\t%0,%1,%2
2113  or%j2i\t%0,%1,%J2
2114  iohl\t%0,%J2")
2115
2116(define_insn "iordi3"
2117  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r,r")
2118	(ior:DI (match_operand:DI 1 "spu_reg_operand" "r,r,0")
2119		(match_operand:DI 2 "spu_ior_operand" "r,c,d")))]
2120  ""
2121  "@
2122  or\t%0,%1,%2
2123  or%k2i\t%0,%1,%K2
2124  iohl\t%0,%K2")
2125
2126(define_insn "iorti3"
2127  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r,r")
2128	(ior:TI (match_operand:TI 1 "spu_reg_operand" "r,r,0")
2129		(match_operand:TI 2 "spu_ior_operand" "r,Y,Z")))]
2130  ""
2131  "@
2132  or\t%0,%1,%2
2133  or%m2i\t%0,%1,%L2
2134  iohl\t%0,%L2")
2135
2136(define_insn "orc_<mode>"
2137  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2138	(ior:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
2139	         (match_operand:ALL 1 "spu_reg_operand" "r")))]
2140  ""
2141  "orc\t%0,%1,%2")
2142
2143(define_insn "nor_<mode>"
2144  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2145	(not:ALL (ior:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
2146			  (match_operand:ALL 2 "spu_reg_operand" "r"))))]
2147  ""
2148  "nor\t%0,%1,%2")
2149
2150;; xor
2151
2152(define_insn "xor<mode>3"
2153  [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
2154	(xor:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
2155		 (match_operand:MOV 2 "spu_logical_operand" "r,B")))]
2156  ""
2157  "@
2158  xor\t%0,%1,%2
2159  xor%j2i\t%0,%1,%J2")
2160
2161(define_insn "xordi3"
2162  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2163	(xor:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2164		(match_operand:DI 2 "spu_logical_operand" "r,c")))]
2165  ""
2166  "@
2167  xor\t%0,%1,%2
2168  xor%k2i\t%0,%1,%K2")
2169
2170(define_insn "xorti3"
2171  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2172	(xor:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2173		(match_operand:TI 2 "spu_logical_operand" "r,Y")))]
2174  ""
2175  "@
2176  xor\t%0,%1,%2
2177  xor%m2i\t%0,%1,%L2")
2178
2179(define_insn "eqv_<mode>"
2180  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2181	(not:ALL (xor:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
2182			  (match_operand:ALL 2 "spu_reg_operand" "r"))))]
2183  ""
2184  "eqv\t%0,%1,%2")
2185
2186;; one_cmpl
2187
2188(define_insn "one_cmpl<mode>2"
2189  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
2190	(not:ALL (match_operand:ALL 1 "spu_reg_operand" "r")))]
2191  ""
2192  "nor\t%0,%1,%1")
2193
2194
2195;; selb
2196
2197(define_expand "selb"
2198  [(set (match_operand 0 "spu_reg_operand" "")
2199  	(unspec [(match_operand 1 "spu_reg_operand" "")
2200		 (match_operand 2 "spu_reg_operand" "")
2201		 (match_operand 3 "spu_reg_operand" "")] UNSPEC_SELB))]
2202  ""
2203  {
2204    rtx s = gen__selb (operands[0], operands[1], operands[2], operands[3]);
2205    PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
2206    emit_insn (s);
2207    DONE;
2208  })
2209
2210;; This could be defined as a combination of logical operations, but at
2211;; one time it caused a crash due to recursive expansion of rtl during CSE.
2212(define_insn "_selb"
2213  [(set (match_operand 0 "spu_reg_operand" "=r")
2214  	(unspec [(match_operand 1 "spu_reg_operand" "r")
2215		 (match_operand 2 "spu_reg_operand" "r")
2216		 (match_operand 3 "spu_reg_operand" "r")] UNSPEC_SELB))]
2217  "GET_MODE(operands[0]) == GET_MODE(operands[1])
2218   && GET_MODE(operands[1]) == GET_MODE(operands[2])"
2219  "selb\t%0,%1,%2,%3")
2220
2221
2222;; Misc. byte/bit operations
2223;; clz/ctz/ffs/popcount/parity
2224;; cntb/sumb
2225
2226(define_insn "clz<mode>2"
2227  [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
2228	(clz:VSI (match_operand:VSI 1 "spu_reg_operand" "r")))]
2229  ""
2230  "clz\t%0,%1")
2231
2232(define_expand "ctz<mode>2"
2233  [(set (match_dup 2)
2234	(neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
2235   (set (match_dup 3) (and:VSI (match_dup 1)
2236			       (match_dup 2)))
2237   (set (match_dup 4) (clz:VSI (match_dup 3)))
2238   (set (match_operand:VSI 0 "spu_reg_operand" "")
2239	(minus:VSI (match_dup 5) (match_dup 4)))]
2240  ""
2241  {
2242     operands[2] = gen_reg_rtx (<MODE>mode);
2243     operands[3] = gen_reg_rtx (<MODE>mode);
2244     operands[4] = gen_reg_rtx (<MODE>mode);
2245     operands[5] = spu_const(<MODE>mode, 31);
2246  })
2247
2248(define_expand "clrsb<mode>2"
2249  [(set (match_dup 2)
2250        (gt:VSI (match_operand:VSI 1 "spu_reg_operand" "") (match_dup 5)))
2251   (set (match_dup 3) (not:VSI (xor:VSI (match_dup 1) (match_dup 2))))
2252   (set (match_dup 4) (clz:VSI (match_dup 3)))
2253   (set (match_operand:VSI 0 "spu_reg_operand")
2254        (plus:VSI (match_dup 4) (match_dup 5)))]
2255  ""
2256  {
2257     operands[2] = gen_reg_rtx (<MODE>mode);
2258     operands[3] = gen_reg_rtx (<MODE>mode);
2259     operands[4] = gen_reg_rtx (<MODE>mode);
2260     operands[5] = spu_const(<MODE>mode, -1);
2261  })
2262
2263(define_expand "ffs<mode>2"
2264  [(set (match_dup 2)
2265	(neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
2266   (set (match_dup 3) (and:VSI (match_dup 1)
2267			       (match_dup 2)))
2268   (set (match_dup 4) (clz:VSI (match_dup 3)))
2269   (set (match_operand:VSI 0 "spu_reg_operand" "")
2270	(minus:VSI (match_dup 5) (match_dup 4)))]
2271  ""
2272  {
2273     operands[2] = gen_reg_rtx (<MODE>mode);
2274     operands[3] = gen_reg_rtx (<MODE>mode);
2275     operands[4] = gen_reg_rtx (<MODE>mode);
2276     operands[5] = spu_const(<MODE>mode, 32);
2277  })
2278
2279(define_expand "popcountsi2"
2280  [(set (match_dup 2)
2281	(unspec:SI [(match_operand:SI 1 "spu_reg_operand" "")]
2282		     UNSPEC_CNTB))
2283   (set (match_dup 3)
2284	(unspec:HI [(match_dup 2)] UNSPEC_SUMB))
2285   (set (match_operand:SI 0 "spu_reg_operand" "")
2286	(sign_extend:SI (match_dup 3)))]
2287  ""
2288  {
2289    operands[2] = gen_reg_rtx (SImode);
2290    operands[3] = gen_reg_rtx (HImode);
2291  })
2292
2293(define_expand "paritysi2"
2294  [(set (match_operand:SI 0 "spu_reg_operand" "")
2295	(parity:SI (match_operand:SI 1 "spu_reg_operand" "")))]
2296  ""
2297  {
2298    operands[2] = gen_reg_rtx (SImode);
2299    emit_insn (gen_popcountsi2(operands[2], operands[1]));
2300    emit_insn (gen_andsi3(operands[0], operands[2], GEN_INT (1)));
2301    DONE;
2302  })
2303
2304(define_insn "cntb_si"
2305  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2306        (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "r")]
2307                   UNSPEC_CNTB))]
2308  ""
2309  "cntb\t%0,%1"
2310  [(set_attr "type" "fxb")])
2311
2312(define_insn "cntb_v16qi"
2313  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
2314        (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")]
2315                      UNSPEC_CNTB))]
2316  ""
2317  "cntb\t%0,%1"
2318  [(set_attr "type" "fxb")])
2319
2320(define_insn "sumb_si"
2321  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
2322        (unspec:HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_SUMB))]
2323  ""
2324  "sumb\t%0,%1,%1"
2325  [(set_attr "type" "fxb")])
2326
2327
2328;; ashl, vashl
2329
2330(define_insn "<v>ashl<mode>3"
2331  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2332	(ashift:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2333		     (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
2334  ""
2335  "@
2336  shl<bh>\t%0,%1,%2
2337  shl<bh>i\t%0,%1,%<umask>2"
2338  [(set_attr "type" "fx3")])
2339
2340(define_insn_and_split "ashldi3"
2341  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2342	(ashift:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2343	           (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
2344   (clobber (match_scratch:SI 3 "=&r,X"))]
2345  ""
2346  "#"
2347  "reload_completed"
2348  [(set (match_dup:DI 0)
2349	(ashift:DI (match_dup:DI 1)
2350	           (match_dup:SI 2)))]
2351  {
2352    rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
2353    rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
2354    rtx op2 = operands[2];
2355    rtx op3 = operands[3];
2356
2357    if (GET_CODE (operands[2]) == REG)
2358      {
2359	emit_insn (gen_addsi3 (op3, op2, GEN_INT (64)));
2360	emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
2361	emit_insn (gen_shlqbybi_ti (op0, op0, op3));
2362	emit_insn (gen_shlqbi_ti (op0, op0, op3));
2363      }
2364    else
2365      {
2366	HOST_WIDE_INT val = INTVAL (operands[2]);
2367	emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
2368	emit_insn (gen_shlqby_ti (op0, op0, GEN_INT (val / 8 + 8)));
2369	if (val % 8)
2370	  emit_insn (gen_shlqbi_ti (op0, op0, GEN_INT (val % 8)));
2371      }
2372    DONE;
2373  })
2374
2375(define_expand "ashlti3"
2376  [(parallel [(set (match_operand:TI 0 "spu_reg_operand" "")
2377		   (ashift:TI (match_operand:TI 1 "spu_reg_operand" "")
2378			      (match_operand:SI 2 "spu_nonmem_operand" "")))
2379	      (clobber (match_dup:TI 3))])]
2380  ""
2381  "if (GET_CODE (operands[2]) == CONST_INT)
2382    {
2383      emit_insn (gen_ashlti3_imm(operands[0], operands[1], operands[2]));
2384      DONE;
2385    }
2386   operands[3] = gen_reg_rtx (TImode);")
2387
2388(define_insn_and_split "ashlti3_imm"
2389  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2390	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2391		   (match_operand:SI 2 "immediate_operand" "O,P")))]
2392  ""
2393  "@
2394   shlqbyi\t%0,%1,%h2
2395   shlqbii\t%0,%1,%e2"
2396  "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])"
2397  [(set (match_dup:TI 0)
2398	(ashift:TI (match_dup:TI 1)
2399		   (match_dup:SI 3)))
2400   (set (match_dup:TI 0)
2401	(ashift:TI (match_dup:TI 0)
2402		   (match_dup:SI 4)))]
2403  {
2404    HOST_WIDE_INT val = INTVAL(operands[2]);
2405    operands[3] = GEN_INT (val&7);
2406    operands[4] = GEN_INT (val&-8);
2407  }
2408  [(set_attr "type" "shuf,shuf")])
2409
2410(define_insn_and_split "ashlti3_reg"
2411  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2412	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r")
2413		   (match_operand:SI 2 "spu_reg_operand" "r")))
2414   (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
2415  ""
2416  "#"
2417  ""
2418  [(set (match_dup:TI 3)
2419	(ashift:TI (match_dup:TI 1)
2420		   (and:SI (match_dup:SI 2)
2421			   (const_int 7))))
2422   (set (match_dup:TI 0)
2423	(ashift:TI (match_dup:TI 3)
2424		   (and:SI (match_dup:SI 2)
2425			   (const_int -8))))]
2426  "")
2427
2428(define_insn "shlqbybi_ti"
2429  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2430	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2431		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2432			   (const_int -8))))]
2433  ""
2434  "@
2435   shlqbybi\t%0,%1,%2
2436   shlqbyi\t%0,%1,%h2"
2437  [(set_attr "type" "shuf,shuf")])
2438
2439(define_insn "shlqbi_ti"
2440  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2441	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2442		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2443			   (const_int 7))))]
2444  ""
2445  "@
2446   shlqbi\t%0,%1,%2
2447   shlqbii\t%0,%1,%e2"
2448  [(set_attr "type" "shuf,shuf")])
2449
2450(define_insn "shlqby_ti"
2451  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2452	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2453		   (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2454			    (const_int 8))))]
2455  ""
2456  "@
2457   shlqby\t%0,%1,%2
2458   shlqbyi\t%0,%1,%f2"
2459  [(set_attr "type" "shuf,shuf")])
2460
2461
2462;; lshr, vlshr
2463
2464(define_insn_and_split "<v>lshr<mode>3"
2465  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2466	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2467		       (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
2468   (clobber (match_scratch:VHSI 3 "=&r,X"))]
2469  ""
2470  "@
2471   #
2472   rot<bh>mi\t%0,%1,-%<umask>2"
2473  "reload_completed && GET_CODE (operands[2]) == REG"
2474  [(set (match_dup:VHSI 3)
2475	(neg:VHSI (match_dup:VHSI 2)))
2476   (set (match_dup:VHSI 0)
2477	(lshiftrt:VHSI (match_dup:VHSI 1)
2478		       (neg:VHSI (match_dup:VHSI 3))))]
2479  ""
2480  [(set_attr "type" "*,fx3")])
2481
2482(define_insn "<v>lshr<mode>3_imm"
2483  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
2484	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
2485		       (match_operand:VHSI 2 "immediate_operand" "W")))]
2486  ""
2487  "rot<bh>mi\t%0,%1,-%<umask>2"
2488  [(set_attr "type" "fx3")])
2489
2490(define_insn "rotm_<mode>"
2491  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2492	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2493		       (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
2494  ""
2495  "@
2496   rot<bh>m\t%0,%1,%2
2497   rot<bh>mi\t%0,%1,-%<nmask>2"
2498  [(set_attr "type" "fx3")])
2499
2500(define_insn_and_split "lshr<mode>3"
2501  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r,r")
2502	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r,r")
2503		      (match_operand:SI 2 "spu_nonmem_operand" "r,O,P")))]
2504  ""
2505  "@
2506   #
2507   rotqmbyi\t%0,%1,-%h2
2508   rotqmbii\t%0,%1,-%e2"
2509  "REG_P (operands[2]) || (!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2]))"
2510  [(set (match_dup:DTI 3)
2511	(lshiftrt:DTI (match_dup:DTI 1)
2512		      (match_dup:SI 4)))
2513   (set (match_dup:DTI 0)
2514	(lshiftrt:DTI (match_dup:DTI 3)
2515		      (match_dup:SI 5)))]
2516  {
2517    operands[3] = gen_reg_rtx (<MODE>mode);
2518    if (GET_CODE (operands[2]) == CONST_INT)
2519      {
2520	HOST_WIDE_INT val = INTVAL(operands[2]);
2521	operands[4] = GEN_INT (val & 7);
2522	operands[5] = GEN_INT (val & -8);
2523      }
2524    else
2525      {
2526        rtx t0 = gen_reg_rtx (SImode);
2527        rtx t1 = gen_reg_rtx (SImode);
2528	emit_insn (gen_subsi3(t0, GEN_INT(0), operands[2]));
2529	emit_insn (gen_subsi3(t1, GEN_INT(7), operands[2]));
2530        operands[4] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, t0), GEN_INT (7));
2531        operands[5] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, gen_rtx_AND (SImode, t1, GEN_INT (-8))), GEN_INT (-8));
2532      }
2533  }
2534  [(set_attr "type" "*,shuf,shuf")])
2535
2536(define_expand "shrqbybi_<mode>"
2537  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2538	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2539		      (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2540					      (const_int -8)))
2541			      (const_int -8))))]
2542  ""
2543  {
2544    if (GET_CODE (operands[2]) == CONST_INT)
2545      operands[2] = GEN_INT (7 - INTVAL (operands[2]));
2546    else
2547      {
2548        rtx t0 = gen_reg_rtx (SImode);
2549	emit_insn (gen_subsi3 (t0, GEN_INT (7), operands[2]));
2550        operands[2] = t0;
2551      }
2552  })
2553
2554(define_insn "rotqmbybi_<mode>"
2555  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2556	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2557		      (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2558					      (const_int -8)))
2559			      (const_int -8))))]
2560  ""
2561  "@
2562   rotqmbybi\t%0,%1,%2
2563   rotqmbyi\t%0,%1,-%H2"
2564  [(set_attr "type" "shuf")])
2565
2566(define_insn_and_split "shrqbi_<mode>"
2567  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2568	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2569		      (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2570			      (const_int 7))))
2571   (clobber (match_scratch:SI 3 "=&r,X"))]
2572  ""
2573  "#"
2574  "reload_completed"
2575  [(set (match_dup:DTI 0)
2576	(lshiftrt:DTI (match_dup:DTI 1)
2577		      (and:SI (neg:SI (match_dup:SI 3)) (const_int 7))))]
2578  {
2579    if (GET_CODE (operands[2]) == CONST_INT)
2580      operands[3] = GEN_INT (-INTVAL (operands[2]));
2581    else
2582      emit_insn (gen_subsi3 (operands[3], GEN_INT (0), operands[2]));
2583  }
2584  [(set_attr "type" "shuf")])
2585
2586(define_insn "rotqmbi_<mode>"
2587  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2588	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2589		      (and:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2590			      (const_int 7))))]
2591  ""
2592  "@
2593   rotqmbi\t%0,%1,%2
2594   rotqmbii\t%0,%1,-%E2"
2595  [(set_attr "type" "shuf")])
2596
2597(define_expand "shrqby_<mode>"
2598  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2599	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2600		      (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2601			       (const_int 8))))]
2602  ""
2603  {
2604    if (GET_CODE (operands[2]) == CONST_INT)
2605      operands[2] = GEN_INT (-INTVAL (operands[2]));
2606    else
2607      {
2608        rtx t0 = gen_reg_rtx (SImode);
2609	emit_insn (gen_subsi3 (t0, GEN_INT (0), operands[2]));
2610        operands[2] = t0;
2611      }
2612  })
2613
2614(define_insn "rotqmby_<mode>"
2615  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2616	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2617		      (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2618			       (const_int 8))))]
2619  ""
2620  "@
2621   rotqmby\t%0,%1,%2
2622   rotqmbyi\t%0,%1,-%F2"
2623  [(set_attr "type" "shuf")])
2624
2625
2626;; ashr, vashr
2627
2628(define_insn_and_split "<v>ashr<mode>3"
2629  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2630	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2631		       (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
2632   (clobber (match_scratch:VHSI 3 "=&r,X"))]
2633  ""
2634  "@
2635   #
2636   rotma<bh>i\t%0,%1,-%<umask>2"
2637  "reload_completed && GET_CODE (operands[2]) == REG"
2638  [(set (match_dup:VHSI 3)
2639	(neg:VHSI (match_dup:VHSI 2)))
2640   (set (match_dup:VHSI 0)
2641	(ashiftrt:VHSI (match_dup:VHSI 1)
2642		       (neg:VHSI (match_dup:VHSI 3))))]
2643  ""
2644  [(set_attr "type" "*,fx3")])
2645
2646(define_insn "<v>ashr<mode>3_imm"
2647  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
2648	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
2649		       (match_operand:VHSI 2 "immediate_operand" "W")))]
2650  ""
2651  "rotma<bh>i\t%0,%1,-%<umask>2"
2652  [(set_attr "type" "fx3")])
2653
2654
2655(define_insn "rotma_<mode>"
2656  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2657	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2658		       (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
2659  ""
2660  "@
2661   rotma<bh>\t%0,%1,%2
2662   rotma<bh>i\t%0,%1,-%<nmask>2"
2663  [(set_attr "type" "fx3")])
2664
2665(define_insn_and_split "ashrdi3"
2666  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2667        (ashiftrt:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2668                     (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
2669   (clobber (match_scratch:TI 3 "=&r,&r"))
2670   (clobber (match_scratch:TI 4 "=&r,&r"))
2671   (clobber (match_scratch:SI 5 "=&r,&r"))]
2672  ""
2673  "#"
2674  "reload_completed"
2675  [(set (match_dup:DI 0)
2676        (ashiftrt:DI (match_dup:DI 1)
2677                     (match_dup:SI 2)))]
2678  {
2679    rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
2680    rtx op0v = gen_rtx_REG (V4SImode, REGNO (op0));
2681    rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
2682    rtx op1s = gen_rtx_REG (SImode, REGNO (op1));
2683    rtx op2 = operands[2];
2684    rtx op3 = operands[3];
2685    rtx op4 = operands[4];
2686    rtx op5 = operands[5];
2687
2688    if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 63)
2689      {
2690	rtx op0s = gen_rtx_REG (SImode, REGNO (op0));
2691	emit_insn (gen_ashrsi3 (op0s, op1s, GEN_INT (32)));
2692	emit_insn (gen_spu_fsm (op0v, op0s));
2693      }
2694    else if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 32)
2695      {
2696	rtx op0d = gen_rtx_REG (V2DImode, REGNO (op0));
2697	HOST_WIDE_INT val = INTVAL (op2);
2698	emit_insn (gen_lshrti3 (op0, op1, GEN_INT (32)));
2699	emit_insn (gen_spu_xswd (op0d, op0v));
2700        if (val > 32)
2701	  emit_insn (gen_vashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32)));
2702      }
2703    else
2704      {
2705	rtx op3v = gen_rtx_REG (V4SImode, REGNO (op3));
2706	unsigned char arr[16] = {
2707	  0xff, 0xff, 0xff, 0xff,
2708	  0xff, 0xff, 0xff, 0xff,
2709	  0x00, 0x00, 0x00, 0x00,
2710	  0x00, 0x00, 0x00, 0x00
2711	};
2712
2713	emit_insn (gen_ashrsi3 (op5, op1s, GEN_INT (31)));
2714	emit_move_insn (op4, array_to_constant (TImode, arr));
2715	emit_insn (gen_spu_fsm (op3v, op5));
2716
2717	if (GET_CODE (operands[2]) == REG)
2718	  {
2719	    emit_insn (gen_selb (op4, op3, op1, op4));
2720	    emit_insn (gen_negsi2 (op5, op2));
2721	    emit_insn (gen_rotqbybi_ti (op0, op4, op5));
2722	    emit_insn (gen_rotqbi_ti (op0, op0, op5));
2723	  }
2724	else
2725	  {
2726	    HOST_WIDE_INT val = -INTVAL (op2);
2727	    emit_insn (gen_selb (op0, op3, op1, op4));
2728	    if ((val - 7) / 8)
2729	      emit_insn (gen_rotqby_ti (op0, op0, GEN_INT ((val - 7) / 8)));
2730	    if (val % 8)
2731	      emit_insn (gen_rotqbi_ti (op0, op0, GEN_INT (val % 8)));
2732	  }
2733      }
2734    DONE;
2735  })
2736
2737
2738(define_insn_and_split "ashrti3"
2739  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2740	(ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2741		     (match_operand:SI 2 "spu_nonmem_operand" "r,i")))]
2742  ""
2743  "#"
2744  ""
2745  [(set (match_dup:TI 0)
2746	(ashiftrt:TI (match_dup:TI 1)
2747		     (match_dup:SI 2)))]
2748  {
2749    rtx sign_shift = gen_reg_rtx (SImode);
2750    rtx sign_mask = gen_reg_rtx (TImode);
2751    rtx sign_mask_v4si = gen_rtx_SUBREG (V4SImode, sign_mask, 0);
2752    rtx op1_v4si = spu_gen_subreg (V4SImode, operands[1]);
2753    rtx t = gen_reg_rtx (TImode);
2754    emit_insn (gen_subsi3 (sign_shift, GEN_INT (128), force_reg (SImode, operands[2])));
2755    emit_insn (gen_vashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31)));
2756    emit_insn (gen_fsm_ti (sign_mask, sign_mask));
2757    emit_insn (gen_ashlti3 (sign_mask, sign_mask, sign_shift));
2758    emit_insn (gen_lshrti3 (t, operands[1], operands[2]));
2759    emit_insn (gen_iorti3 (operands[0], t, sign_mask));
2760    DONE;
2761  })
2762
2763;; fsm is used after rotam to replicate the sign across the whole register.
2764(define_insn "fsm_ti"
2765  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2766	(unspec:TI [(match_operand:TI 1 "spu_reg_operand" "r")] UNSPEC_FSM))]
2767  ""
2768  "fsm\t%0,%1"
2769  [(set_attr "type" "shuf")])
2770
2771
2772;; vrotl, rotl
2773
2774(define_insn "<v>rotl<mode>3"
2775  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2776	(rotate:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2777		     (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
2778  ""
2779  "@
2780  rot<bh>\t%0,%1,%2
2781  rot<bh>i\t%0,%1,%<umask>2"
2782  [(set_attr "type" "fx3")])
2783
2784(define_insn "rotlti3"
2785  [(set (match_operand:TI 0 "spu_reg_operand" "=&r,r,r,r")
2786	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r,r,r")
2787		   (match_operand:SI 2 "spu_nonmem_operand" "r,O,P,I")))]
2788  ""
2789  "@
2790  rotqbybi\t%0,%1,%2\;rotqbi\t%0,%0,%2
2791  rotqbyi\t%0,%1,%h2
2792  rotqbii\t%0,%1,%e2
2793  rotqbyi\t%0,%1,%h2\;rotqbii\t%0,%0,%e2"
2794  [(set_attr "length" "8,4,4,8")
2795   (set_attr "type" "multi1,shuf,shuf,multi1")])
2796
2797(define_insn "rotqbybi_ti"
2798  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2799	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2800		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2801			   (const_int -8))))]
2802  ""
2803  "@
2804  rotqbybi\t%0,%1,%2
2805  rotqbyi\t%0,%1,%h2"
2806  [(set_attr "type" "shuf,shuf")])
2807
2808(define_insn "rotqby_ti"
2809  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2810	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2811		   (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2812			    (const_int 8))))]
2813  ""
2814  "@
2815  rotqby\t%0,%1,%2
2816  rotqbyi\t%0,%1,%f2"
2817  [(set_attr "type" "shuf,shuf")])
2818
2819(define_insn "rotqbi_ti"
2820  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2821	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2822		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2823			   (const_int 7))))]
2824  ""
2825  "@
2826  rotqbi\t%0,%1,%2
2827  rotqbii\t%0,%1,%e2"
2828  [(set_attr "type" "shuf,shuf")])
2829
2830
2831;; struct extract/insert
2832;; We handle mem's because GCC will generate invalid SUBREG's
2833;; and inefficient code.
2834
2835(define_expand "extv"
2836  [(set (match_operand:TI 0 "register_operand" "")
2837	(sign_extract:TI (match_operand 1 "nonimmediate_operand" "")
2838			 (match_operand:SI 2 "const_int_operand" "")
2839			 (match_operand:SI 3 "const_int_operand" "")))]
2840  ""
2841  {
2842    spu_expand_extv (operands, 0);
2843    DONE;
2844  })
2845
2846(define_expand "extzv"
2847  [(set (match_operand:TI 0 "register_operand" "")
2848	(zero_extract:TI (match_operand 1 "nonimmediate_operand" "")
2849			 (match_operand:SI 2 "const_int_operand" "")
2850			 (match_operand:SI 3 "const_int_operand" "")))]
2851  ""
2852  {
2853    spu_expand_extv (operands, 1);
2854    DONE;
2855  })
2856
2857(define_expand "insv"
2858  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
2859		      (match_operand:SI 1 "const_int_operand" "")
2860		      (match_operand:SI 2 "const_int_operand" ""))
2861	(match_operand 3 "nonmemory_operand" ""))]
2862  ""
2863  {
2864    if (INTVAL (operands[1]) + INTVAL (operands[2])
2865        > GET_MODE_BITSIZE (GET_MODE (operands[0])))
2866      FAIL;
2867    spu_expand_insv(operands);
2868    DONE;
2869  })
2870
2871;; Simplify a number of patterns that get generated by extv, extzv,
2872;; insv, and loads.
2873(define_insn_and_split "trunc_shr_ti<mode>"
2874  [(set (match_operand:QHSI 0 "spu_reg_operand" "=r")
2875        (truncate:QHSI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
2876								(const_int 96)])))]
2877  ""
2878  "#"
2879  "reload_completed"
2880  [(const_int 0)]
2881  {
2882    spu_split_convert (operands);
2883    DONE;
2884  }
2885  [(set_attr "type" "convert")
2886   (set_attr "length" "0")])
2887
2888(define_insn_and_split "trunc_shr_tidi"
2889  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
2890        (truncate:DI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
2891							      (const_int 64)])))]
2892  ""
2893  "#"
2894  "reload_completed"
2895  [(const_int 0)]
2896  {
2897    spu_split_convert (operands);
2898    DONE;
2899  }
2900  [(set_attr "type" "convert")
2901   (set_attr "length" "0")])
2902
2903(define_insn_and_split "shl_ext_<mode>ti"
2904  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2905        (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:QHSI 1 "spu_reg_operand" "0")])
2906		   (const_int 96)))]
2907  ""
2908  "#"
2909  "reload_completed"
2910  [(const_int 0)]
2911  {
2912    spu_split_convert (operands);
2913    DONE;
2914  }
2915  [(set_attr "type" "convert")
2916   (set_attr "length" "0")])
2917
2918(define_insn_and_split "shl_ext_diti"
2919  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2920        (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:DI 1 "spu_reg_operand" "0")])
2921		   (const_int 64)))]
2922  ""
2923  "#"
2924  "reload_completed"
2925  [(const_int 0)]
2926  {
2927    spu_split_convert (operands);
2928    DONE;
2929  }
2930  [(set_attr "type" "convert")
2931   (set_attr "length" "0")])
2932
2933(define_insn "sext_trunc_lshr_tiqisi"
2934  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2935        (sign_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
2936									      (const_int 120)]))))]
2937  ""
2938  "rotmai\t%0,%1,-24"
2939  [(set_attr "type" "fx3")])
2940
2941(define_insn "zext_trunc_lshr_tiqisi"
2942  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2943        (zero_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
2944									      (const_int 120)]))))]
2945  ""
2946  "rotmi\t%0,%1,-24"
2947  [(set_attr "type" "fx3")])
2948
2949(define_insn "sext_trunc_lshr_tihisi"
2950  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2951        (sign_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
2952									      (const_int 112)]))))]
2953  ""
2954  "rotmai\t%0,%1,-16"
2955  [(set_attr "type" "fx3")])
2956
2957(define_insn "zext_trunc_lshr_tihisi"
2958  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2959        (zero_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
2960									      (const_int 112)]))))]
2961  ""
2962  "rotmi\t%0,%1,-16"
2963  [(set_attr "type" "fx3")])
2964
2965
2966;; String/block move insn.
2967;; Argument 0 is the destination
2968;; Argument 1 is the source
2969;; Argument 2 is the length
2970;; Argument 3 is the alignment
2971
2972(define_expand "movstrsi"
2973  [(parallel [(set (match_operand:BLK 0 "" "")
2974		   (match_operand:BLK 1 "" ""))
2975	      (use (match_operand:SI 2 "" ""))
2976	      (use (match_operand:SI 3 "" ""))])]
2977  ""
2978  "
2979  {
2980    if (spu_expand_block_move (operands))
2981      DONE;
2982    else
2983      FAIL;
2984  }")
2985
2986
2987;; jump
2988
2989(define_insn "indirect_jump"
2990  [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))]
2991  ""
2992  "bi\t%0"
2993  [(set_attr "type" "br")])
2994
2995(define_insn "jump"
2996  [(set (pc)
2997	(label_ref (match_operand 0 "" "")))]
2998  ""
2999  "br\t%0"
3000  [(set_attr "type" "br")])
3001
3002
3003;; return
3004
3005;; This will be used for leaf functions, that don't save any regs and
3006;; don't have locals on stack, maybe... that is for functions that
3007;; don't change $sp and don't need to save $lr.
3008(define_expand "return"
3009    [(return)]
3010  "direct_return()"
3011  "")
3012
3013;; used in spu_expand_epilogue to generate return from a function and
3014;; explicitly set use of $lr.
3015
3016(define_insn "_return"
3017  [(return)]
3018  ""
3019  "bi\t$lr"
3020  [(set_attr "type" "br")])
3021
3022
3023
3024;; ceq
3025
3026(define_insn "ceq_<mode>"
3027  [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
3028	(eq:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
3029	         (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
3030  ""
3031  "@
3032  ceq<bh>\t%0,%1,%2
3033  ceq<bh>i\t%0,%1,%2")
3034
3035(define_insn_and_split "ceq_di"
3036  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3037        (eq:SI (match_operand:DI 1 "spu_reg_operand" "r")
3038	       (match_operand:DI 2 "spu_reg_operand" "r")))]
3039  ""
3040  "#"
3041  "reload_completed"
3042  [(set (match_dup:SI 0)
3043        (eq:SI (match_dup:DI 1)
3044	       (match_dup:DI 2)))]
3045  {
3046    rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
3047    rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
3048    rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
3049    emit_insn (gen_ceq_v4si (op0, op1, op2));
3050    emit_insn (gen_spu_gb (op0, op0));
3051    emit_insn (gen_cgt_si (operands[0], operands[0], GEN_INT (11)));
3052    DONE;
3053  })
3054
3055
3056;; We provide the TI compares for completeness and because some parts of
3057;; gcc/libgcc use them, even though user code might never see it.
3058(define_insn "ceq_ti"
3059  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3060	(eq:SI (match_operand:TI 1 "spu_reg_operand" "r")
3061	       (match_operand:TI 2 "spu_reg_operand" "r")))]
3062  ""
3063  "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15"
3064  [(set_attr "type" "multi0")
3065   (set_attr "length" "12")])
3066
3067(define_insn "ceq_<mode>"
3068  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
3069	(eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
3070		  (match_operand:VSF 2 "spu_reg_operand" "r")))]
3071  ""
3072  "fceq\t%0,%1,%2")
3073
3074(define_insn "cmeq_<mode>"
3075  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
3076	(eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
3077	          (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
3078  ""
3079  "fcmeq\t%0,%1,%2")
3080
3081;; These implementations will ignore checking of NaN or INF if
3082;; compiled with option -ffinite-math-only.
3083(define_expand "ceq_df"
3084  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3085        (eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
3086               (match_operand:DF 2 "const_zero_operand" "i")))]
3087  ""
3088{
3089  if (spu_arch == PROCESSOR_CELL)
3090      {
3091        rtx ra = gen_reg_rtx (V4SImode);
3092        rtx rb = gen_reg_rtx (V4SImode);
3093        rtx temp = gen_reg_rtx (TImode);
3094        rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3095        rtx temp2 = gen_reg_rtx (V4SImode);
3096        rtx biteq = gen_reg_rtx (V4SImode);
3097        rtx ahi_inf = gen_reg_rtx (V4SImode);
3098        rtx a_nan = gen_reg_rtx (V4SImode);
3099        rtx a_abs = gen_reg_rtx (V4SImode);
3100        rtx b_abs = gen_reg_rtx (V4SImode);
3101        rtx iszero = gen_reg_rtx (V4SImode);
3102        rtx sign_mask = gen_reg_rtx (V4SImode);
3103        rtx nan_mask = gen_reg_rtx (V4SImode);
3104        rtx hihi_promote = gen_reg_rtx (TImode);
3105        rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3106                                                 0x7FFFFFFF, 0xFFFFFFFF);
3107
3108        emit_move_insn (sign_mask, pat);
3109        pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3110                                             0x7FF00000, 0x0);
3111        emit_move_insn (nan_mask, pat);
3112        pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
3113                                           0x08090A0B, 0x18191A1B);
3114        emit_move_insn (hihi_promote, pat);
3115
3116        emit_insn (gen_spu_convert (ra, operands[1]));
3117        emit_insn (gen_spu_convert (rb, operands[2]));
3118        emit_insn (gen_ceq_v4si (biteq, ra, rb));
3119        emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
3120				GEN_INT (4 * 8)));
3121        emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
3122
3123        emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3124        emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3125	if (!flag_finite_math_only)
3126          {
3127            emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3128            emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
3129            emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3130                                   GEN_INT (4 * 8)));
3131            emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
3132            emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3133	  }
3134        emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
3135        emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
3136        emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
3137				GEN_INT (4 * 8)));
3138        emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
3139        emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
3140	if (!flag_finite_math_only)
3141          {
3142            emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3143	  }
3144        emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
3145        DONE;
3146      }
3147})
3148
3149(define_insn "ceq_<mode>_celledp"
3150  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
3151        (eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
3152                   (match_operand:VDF 2 "spu_reg_operand" "r")))]
3153  "spu_arch == PROCESSOR_CELLEDP"
3154  "dfceq\t%0,%1,%2"
3155  [(set_attr "type" "fpd")])
3156
3157(define_insn "cmeq_<mode>_celledp"
3158  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
3159        (eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
3160                   (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
3161  "spu_arch == PROCESSOR_CELLEDP"
3162  "dfcmeq\t%0,%1,%2"
3163  [(set_attr "type" "fpd")])
3164
3165(define_expand "ceq_v2df"
3166  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3167        (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
3168                 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
3169  ""
3170{
3171  if (spu_arch == PROCESSOR_CELL)
3172    {
3173      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3174      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
3175      rtx temp = gen_reg_rtx (TImode);
3176      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3177      rtx temp2 = gen_reg_rtx (V4SImode);
3178      rtx biteq = gen_reg_rtx (V4SImode);
3179      rtx ahi_inf = gen_reg_rtx (V4SImode);
3180      rtx a_nan = gen_reg_rtx (V4SImode);
3181      rtx a_abs = gen_reg_rtx (V4SImode);
3182      rtx b_abs = gen_reg_rtx (V4SImode);
3183      rtx iszero = gen_reg_rtx (V4SImode);
3184      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3185                                               0x7FFFFFFF, 0xFFFFFFFF);
3186      rtx sign_mask = gen_reg_rtx (V4SImode);
3187      rtx nan_mask = gen_reg_rtx (V4SImode);
3188      rtx hihi_promote = gen_reg_rtx (TImode);
3189
3190      emit_move_insn (sign_mask, pat);
3191      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3192					     0x7FF00000, 0x0);
3193      emit_move_insn (nan_mask, pat);
3194      pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
3195					   0x08090A0B, 0x18191A1B);
3196      emit_move_insn (hihi_promote, pat);
3197
3198      emit_insn (gen_ceq_v4si (biteq, ra, rb));
3199      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
3200                              GEN_INT (4 * 8)));
3201      emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
3202      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3203      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3204      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3205      emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
3206      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3207                              GEN_INT (4 * 8)));
3208      emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
3209      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3210      emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
3211      emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
3212      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
3213                              GEN_INT (4 * 8)));
3214      emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
3215      emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
3216      emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3217      emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
3218      DONE;
3219  }
3220})
3221
3222(define_expand "cmeq_v2df"
3223  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3224        (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
3225                 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
3226  ""
3227{
3228  if (spu_arch == PROCESSOR_CELL)
3229    {
3230      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3231      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
3232      rtx temp = gen_reg_rtx (TImode);
3233      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3234      rtx temp2 = gen_reg_rtx (V4SImode);
3235      rtx biteq = gen_reg_rtx (V4SImode);
3236      rtx ahi_inf = gen_reg_rtx (V4SImode);
3237      rtx a_nan = gen_reg_rtx (V4SImode);
3238      rtx a_abs = gen_reg_rtx (V4SImode);
3239      rtx b_abs = gen_reg_rtx (V4SImode);
3240
3241      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3242                                               0x7FFFFFFF, 0xFFFFFFFF);
3243      rtx sign_mask = gen_reg_rtx (V4SImode);
3244      rtx nan_mask = gen_reg_rtx (V4SImode);
3245      rtx hihi_promote = gen_reg_rtx (TImode);
3246
3247      emit_move_insn (sign_mask, pat);
3248
3249      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3250                                           0x7FF00000, 0x0);
3251      emit_move_insn (nan_mask, pat);
3252      pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
3253                                         0x08090A0B, 0x18191A1B);
3254      emit_move_insn (hihi_promote, pat);
3255
3256      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3257      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3258      emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs));
3259      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
3260                                                    GEN_INT (4 * 8)));
3261      emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
3262      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3263      emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
3264      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3265                                                    GEN_INT (4 * 8)));
3266      emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
3267      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3268      emit_insn (gen_andc_v4si (temp2, biteq, a_nan));
3269      emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
3270      DONE;
3271  }
3272})
3273
3274
3275;; cgt
3276
3277(define_insn "cgt_<mode>"
3278  [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
3279	(gt:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
3280	          (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
3281  ""
3282  "@
3283  cgt<bh>\t%0,%1,%2
3284  cgt<bh>i\t%0,%1,%2")
3285
3286(define_insn "cgt_di_m1"
3287  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3288	(gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
3289	       (const_int -1)))]
3290  ""
3291  "cgti\t%0,%1,-1")
3292
3293(define_insn_and_split "cgt_di"
3294  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3295	(gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
3296	       (match_operand:DI 2 "spu_reg_operand" "r")))
3297   (clobber (match_scratch:V4SI 3 "=&r"))
3298   (clobber (match_scratch:V4SI 4 "=&r"))
3299   (clobber (match_scratch:V4SI 5 "=&r"))]
3300  ""
3301  "#"
3302  "reload_completed"
3303  [(set (match_dup:SI 0)
3304        (gt:SI (match_dup:DI 1)
3305	       (match_dup:DI 2)))]
3306  {
3307    rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
3308    rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
3309    rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
3310    rtx op3 = operands[3];
3311    rtx op4 = operands[4];
3312    rtx op5 = operands[5];
3313    rtx op3d = gen_rtx_REG (V2DImode, REGNO (operands[3]));
3314    emit_insn (gen_clgt_v4si (op3, op1, op2));
3315    emit_insn (gen_ceq_v4si (op4, op1, op2));
3316    emit_insn (gen_cgt_v4si (op5, op1, op2));
3317    emit_insn (gen_spu_xswd (op3d, op3));
3318    emit_insn (gen_selb (op0, op5, op3, op4));
3319    DONE;
3320  })
3321
3322(define_insn "cgt_ti_m1"
3323  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3324	(gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
3325	       (const_int -1)))]
3326  ""
3327  "cgti\t%0,%1,-1")
3328
3329(define_insn "cgt_ti"
3330  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3331	(gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
3332	       (match_operand:TI 2 "spu_reg_operand" "r")))
3333   (clobber (match_scratch:V4SI 3 "=&r"))
3334   (clobber (match_scratch:V4SI 4 "=&r"))
3335   (clobber (match_scratch:V4SI 5 "=&r"))]
3336  ""
3337  "clgt\t%4,%1,%2\;\
3338ceq\t%3,%1,%2\;\
3339cgt\t%5,%1,%2\;\
3340shlqbyi\t%0,%4,4\;\
3341selb\t%0,%4,%0,%3\;\
3342shlqbyi\t%0,%0,4\;\
3343selb\t%0,%4,%0,%3\;\
3344shlqbyi\t%0,%0,4\;\
3345selb\t%0,%5,%0,%3"
3346  [(set_attr "type" "multi0")
3347   (set_attr "length" "36")])
3348
3349(define_insn "cgt_<mode>"
3350  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
3351	(gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
3352		  (match_operand:VSF 2 "spu_reg_operand" "r")))]
3353  ""
3354  "fcgt\t%0,%1,%2")
3355
3356(define_insn "cmgt_<mode>"
3357  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
3358	(gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
3359		  (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
3360  ""
3361  "fcmgt\t%0,%1,%2")
3362
3363(define_expand "cgt_df"
3364  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3365        (gt:SI (match_operand:DF 1 "spu_reg_operand" "r")
3366               (match_operand:DF 2 "const_zero_operand" "i")))]
3367  ""
3368{
3369  if (spu_arch == PROCESSOR_CELL)
3370    {
3371      rtx ra = gen_reg_rtx (V4SImode);
3372      rtx rb = gen_reg_rtx (V4SImode);
3373      rtx zero = gen_reg_rtx (V4SImode);
3374      rtx temp = gen_reg_rtx (TImode);
3375      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3376      rtx temp2 = gen_reg_rtx (V4SImode);
3377      rtx hi_inf = gen_reg_rtx (V4SImode);
3378      rtx a_nan = gen_reg_rtx (V4SImode);
3379      rtx b_nan = gen_reg_rtx (V4SImode);
3380      rtx a_abs = gen_reg_rtx (V4SImode);
3381      rtx b_abs = gen_reg_rtx (V4SImode);
3382      rtx asel = gen_reg_rtx (V4SImode);
3383      rtx bsel = gen_reg_rtx (V4SImode);
3384      rtx abor = gen_reg_rtx (V4SImode);
3385      rtx bbor = gen_reg_rtx (V4SImode);
3386      rtx gt_hi = gen_reg_rtx (V4SImode);
3387      rtx gt_lo = gen_reg_rtx (V4SImode);
3388      rtx sign_mask = gen_reg_rtx (V4SImode);
3389      rtx nan_mask = gen_reg_rtx (V4SImode);
3390      rtx hi_promote = gen_reg_rtx (TImode);
3391      rtx borrow_shuffle = gen_reg_rtx (TImode);
3392
3393      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3394                                               0x7FFFFFFF, 0xFFFFFFFF);
3395      emit_move_insn (sign_mask, pat);
3396      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3397                                             0x7FF00000, 0x0);
3398      emit_move_insn (nan_mask, pat);
3399      pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3400                                         0x08090A0B, 0x08090A0B);
3401      emit_move_insn (hi_promote, pat);
3402      pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
3403                                         0x0C0D0E0F, 0xC0C0C0C0);
3404      emit_move_insn (borrow_shuffle, pat);
3405
3406      emit_insn (gen_spu_convert (ra, operands[1]));
3407      emit_insn (gen_spu_convert (rb, operands[2]));
3408      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3409      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3410
3411      if (!flag_finite_math_only)
3412	{
3413	  /* check if ra is NaN  */
3414          emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
3415          emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3416          emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3417                                  GEN_INT (4 * 8)));
3418          emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3419          emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3420          emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
3421
3422	  /* check if rb is NaN  */
3423          emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
3424          emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
3425          emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
3426                                  GEN_INT (4 * 8)));
3427          emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3428          emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
3429          emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
3430
3431	  /* check if ra or rb is NaN  */
3432          emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
3433	}
3434      emit_move_insn (zero, CONST0_RTX (V4SImode));
3435      emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
3436      emit_insn (gen_shufb (asel, asel, asel, hi_promote));
3437      emit_insn (gen_bg_v4si (abor, zero, a_abs));
3438      emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
3439      emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
3440      emit_insn (gen_selb (abor, a_abs, abor, asel));
3441
3442      emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
3443      emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
3444      emit_insn (gen_bg_v4si (bbor, zero, b_abs));
3445      emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
3446      emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
3447      emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
3448
3449      emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
3450      emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
3451      emit_insn (gen_ceq_v4si (temp2, abor, bbor));
3452      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
3453                                GEN_INT (4 * 8)));
3454      emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
3455      emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
3456      emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
3457      if (!flag_finite_math_only)
3458        {
3459	  /* correct for NaNs  */
3460          emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3461	}
3462      emit_insn (gen_spu_convert (operands[0], temp2));
3463      DONE;
3464    }
3465})
3466
3467(define_insn "cgt_<mode>_celledp"
3468  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
3469        (gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
3470                   (match_operand:VDF 2 "spu_reg_operand" "r")))]
3471  "spu_arch == PROCESSOR_CELLEDP"
3472  "dfcgt\t%0,%1,%2"
3473  [(set_attr "type" "fpd")])
3474
3475(define_insn "cmgt_<mode>_celledp"
3476  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
3477        (gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
3478                   (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
3479  "spu_arch == PROCESSOR_CELLEDP"
3480  "dfcmgt\t%0,%1,%2"
3481  [(set_attr "type" "fpd")])
3482
3483(define_expand "cgt_v2df"
3484  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3485        (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
3486                 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
3487  ""
3488{
3489  if (spu_arch == PROCESSOR_CELL)
3490    {
3491      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3492      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
3493      rtx zero = gen_reg_rtx (V4SImode);
3494      rtx temp = gen_reg_rtx (TImode);
3495      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3496      rtx temp2 = gen_reg_rtx (V4SImode);
3497      rtx hi_inf = gen_reg_rtx (V4SImode);
3498      rtx a_nan = gen_reg_rtx (V4SImode);
3499      rtx b_nan = gen_reg_rtx (V4SImode);
3500      rtx a_abs = gen_reg_rtx (V4SImode);
3501      rtx b_abs = gen_reg_rtx (V4SImode);
3502      rtx asel = gen_reg_rtx (V4SImode);
3503      rtx bsel = gen_reg_rtx (V4SImode);
3504      rtx abor = gen_reg_rtx (V4SImode);
3505      rtx bbor = gen_reg_rtx (V4SImode);
3506      rtx gt_hi = gen_reg_rtx (V4SImode);
3507      rtx gt_lo = gen_reg_rtx (V4SImode);
3508      rtx sign_mask = gen_reg_rtx (V4SImode);
3509      rtx nan_mask = gen_reg_rtx (V4SImode);
3510      rtx hi_promote = gen_reg_rtx (TImode);
3511      rtx borrow_shuffle = gen_reg_rtx (TImode);
3512      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3513                                               0x7FFFFFFF, 0xFFFFFFFF);
3514      emit_move_insn (sign_mask, pat);
3515      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3516                                           0x7FF00000, 0x0);
3517      emit_move_insn (nan_mask, pat);
3518      pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3519                                         0x08090A0B, 0x08090A0B);
3520      emit_move_insn (hi_promote, pat);
3521      pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
3522                                         0x0C0D0E0F, 0xC0C0C0C0);
3523      emit_move_insn (borrow_shuffle, pat);
3524
3525      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3526      emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
3527      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3528      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3529                                                    GEN_INT (4 * 8)));
3530      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3531      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3532      emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
3533      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3534      emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
3535      emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
3536      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
3537                                                    GEN_INT (4 * 8)));
3538      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3539      emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
3540      emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
3541      emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
3542      emit_move_insn (zero, CONST0_RTX (V4SImode));
3543      emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
3544      emit_insn (gen_shufb (asel, asel, asel, hi_promote));
3545      emit_insn (gen_bg_v4si (abor, zero, a_abs));
3546      emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
3547      emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
3548      emit_insn (gen_selb (abor, a_abs, abor, asel));
3549      emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
3550      emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
3551      emit_insn (gen_bg_v4si (bbor, zero, b_abs));
3552      emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
3553      emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
3554      emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
3555      emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
3556      emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
3557      emit_insn (gen_ceq_v4si (temp2, abor, bbor));
3558      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
3559                                                    GEN_INT (4 * 8)));
3560      emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
3561      emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
3562
3563      emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
3564      emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3565      emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
3566      DONE;
3567    }
3568})
3569
3570(define_expand "cmgt_v2df"
3571  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3572        (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
3573                 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
3574  ""
3575{
3576  if (spu_arch == PROCESSOR_CELL)
3577    {
3578      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3579      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
3580      rtx temp = gen_reg_rtx (TImode);
3581      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3582      rtx temp2 = gen_reg_rtx (V4SImode);
3583      rtx hi_inf = gen_reg_rtx (V4SImode);
3584      rtx a_nan = gen_reg_rtx (V4SImode);
3585      rtx b_nan = gen_reg_rtx (V4SImode);
3586      rtx a_abs = gen_reg_rtx (V4SImode);
3587      rtx b_abs = gen_reg_rtx (V4SImode);
3588      rtx gt_hi = gen_reg_rtx (V4SImode);
3589      rtx gt_lo = gen_reg_rtx (V4SImode);
3590      rtx sign_mask = gen_reg_rtx (V4SImode);
3591      rtx nan_mask = gen_reg_rtx (V4SImode);
3592      rtx hi_promote = gen_reg_rtx (TImode);
3593      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3594                                               0x7FFFFFFF, 0xFFFFFFFF);
3595      emit_move_insn (sign_mask, pat);
3596      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3597                                           0x7FF00000, 0x0);
3598      emit_move_insn (nan_mask, pat);
3599      pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3600                                         0x08090A0B, 0x08090A0B);
3601      emit_move_insn (hi_promote, pat);
3602
3603      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
3604      emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
3605      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
3606      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
3607                                                    GEN_INT (4 * 8)));
3608      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3609      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
3610      emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
3611      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
3612      emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
3613      emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
3614      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
3615                                                    GEN_INT (4 * 8)));
3616      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
3617      emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
3618      emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
3619      emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
3620
3621      emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs));
3622      emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs));
3623      emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs));
3624      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
3625                                                    GEN_INT (4 * 8)));
3626      emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
3627      emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
3628      emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
3629      emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
3630      emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
3631      DONE;
3632    }
3633})
3634
3635
3636;; clgt
3637
3638(define_insn "clgt_<mode>"
3639  [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
3640	(gtu:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
3641		   (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
3642  ""
3643  "@
3644  clgt<bh>\t%0,%1,%2
3645  clgt<bh>i\t%0,%1,%2")
3646
3647(define_insn_and_split "clgt_di"
3648  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3649	(gtu:SI (match_operand:DI 1 "spu_reg_operand" "r")
3650	        (match_operand:DI 2 "spu_reg_operand" "r")))
3651   (clobber (match_scratch:V4SI 3 "=&r"))
3652   (clobber (match_scratch:V4SI 4 "=&r"))
3653   (clobber (match_scratch:V4SI 5 "=&r"))]
3654  ""
3655  "#"
3656  "reload_completed"
3657  [(set (match_dup:SI 0)
3658        (gtu:SI (match_dup:DI 1)
3659	        (match_dup:DI 2)))]
3660  {
3661    rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
3662    rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
3663    rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
3664    rtx op3 = operands[3];
3665    rtx op4 = operands[4];
3666    rtx op5 = operands[5];
3667    rtx op5d = gen_rtx_REG (V2DImode, REGNO (operands[5]));
3668    emit_insn (gen_clgt_v4si (op3, op1, op2));
3669    emit_insn (gen_ceq_v4si (op4, op1, op2));
3670    emit_insn (gen_spu_xswd (op5d, op3));
3671    emit_insn (gen_selb (op0, op3, op5, op4));
3672    DONE;
3673  })
3674
3675(define_insn "clgt_ti"
3676  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3677	(gtu:SI (match_operand:TI 1 "spu_reg_operand" "r")
3678	       (match_operand:TI 2 "spu_reg_operand" "r")))
3679   (clobber (match_scratch:V4SI 3 "=&r"))
3680   (clobber (match_scratch:V4SI 4 "=&r"))]
3681  ""
3682  "ceq\t%3,%1,%2\;\
3683clgt\t%4,%1,%2\;\
3684shlqbyi\t%0,%4,4\;\
3685selb\t%0,%4,%0,%3\;\
3686shlqbyi\t%0,%0,4\;\
3687selb\t%0,%4,%0,%3\;\
3688shlqbyi\t%0,%0,4\;\
3689selb\t%0,%4,%0,%3"
3690  [(set_attr "type" "multi0")
3691   (set_attr "length" "32")])
3692
3693
3694;; dftsv
3695(define_insn "dftsv_celledp"
3696  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3697        (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand"  "r")
3698		      (match_operand:SI   2 "const_int_operand" "i")]
3699		      UNSPEC_DFTSV))]
3700  "spu_arch == PROCESSOR_CELLEDP"
3701  "dftsv\t%0,%1,%2"
3702  [(set_attr "type" "fpd")])
3703
3704(define_expand "dftsv"
3705  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3706        (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r")
3707		      (match_operand:SI   2 "const_int_operand" "i")]
3708		      UNSPEC_DFTSV))]
3709  ""
3710{
3711  if (spu_arch == PROCESSOR_CELL)
3712    {
3713      rtx result = gen_reg_rtx (V4SImode);
3714      emit_move_insn (result, CONST0_RTX (V4SImode));
3715
3716      if (INTVAL (operands[2]))
3717        {
3718          rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3719          rtx abs = gen_reg_rtx (V4SImode);
3720          rtx sign = gen_reg_rtx (V4SImode);
3721          rtx temp = gen_reg_rtx (TImode);
3722          rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3723          rtx temp2 = gen_reg_rtx (V4SImode);
3724          rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3725                                                   0x7FFFFFFF, 0xFFFFFFFF);
3726          rtx sign_mask = gen_reg_rtx (V4SImode);
3727          rtx hi_promote = gen_reg_rtx (TImode);
3728          emit_move_insn (sign_mask, pat);
3729          pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3730                                             0x08090A0B, 0x08090A0B);
3731          emit_move_insn (hi_promote, pat);
3732
3733          emit_insn (gen_vashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
3734          emit_insn (gen_shufb (sign, sign, sign, hi_promote));
3735          emit_insn (gen_andv4si3 (abs, ra, sign_mask));
3736
3737          /* NaN  or +inf or -inf */
3738          if (INTVAL (operands[2]) & 0x70)
3739            {
3740              rtx nan_mask = gen_reg_rtx (V4SImode);
3741              rtx isinf = gen_reg_rtx (V4SImode);
3742              pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3743		   			           0x7FF00000, 0x0);
3744              emit_move_insn (nan_mask, pat);
3745              emit_insn (gen_ceq_v4si (isinf, abs, nan_mask));
3746
3747              /* NaN  */
3748              if (INTVAL (operands[2]) & 0x40)
3749                {
3750                  rtx isnan = gen_reg_rtx (V4SImode);
3751                  emit_insn (gen_clgt_v4si (isnan, abs, nan_mask));
3752                  emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan),
3753                                                             GEN_INT (4 * 8)));
3754                  emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf));
3755                  emit_insn (gen_iorv4si3 (isnan, isnan, temp2));
3756                  emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote));
3757                  emit_insn (gen_iorv4si3 (result, result, isnan));
3758                }
3759              /* +inf or -inf  */
3760              if (INTVAL (operands[2]) & 0x30)
3761                {
3762                  emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf),
3763                                                             GEN_INT (4 * 8)));
3764                  emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si));
3765                  emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote));
3766
3767                  /* +inf  */
3768                  if (INTVAL (operands[2]) & 0x20)
3769                    {
3770                      emit_insn (gen_andc_v4si (temp2, isinf, sign));
3771                      emit_insn (gen_iorv4si3 (result, result, temp2));
3772                    }
3773                  /* -inf  */
3774                  if (INTVAL (operands[2]) & 0x10)
3775                    {
3776                      emit_insn (gen_andv4si3 (temp2, isinf, sign));
3777                      emit_insn (gen_iorv4si3 (result, result, temp2));
3778                    }
3779                }
3780            }
3781
3782          /* 0 or denorm  */
3783          if (INTVAL (operands[2]) & 0xF)
3784            {
3785              rtx iszero = gen_reg_rtx (V4SImode);
3786              emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode)));
3787              emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
3788                                                          GEN_INT (4 * 8)));
3789              emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
3790
3791              /* denorm  */
3792              if (INTVAL (operands[2]) & 0x3)
3793                {
3794                  rtx isdenorm = gen_reg_rtx (V4SImode);
3795                  rtx denorm_mask = gen_reg_rtx (V4SImode);
3796                  emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF));
3797                  emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask));
3798                  emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero));
3799                  emit_insn (gen_shufb (isdenorm, isdenorm,
3800                                        isdenorm, hi_promote));
3801                  /* +denorm  */
3802                  if (INTVAL (operands[2]) & 0x2)
3803                    {
3804                      emit_insn (gen_andc_v4si (temp2, isdenorm, sign));
3805                      emit_insn (gen_iorv4si3 (result, result, temp2));
3806                    }
3807                  /* -denorm  */
3808                  if (INTVAL (operands[2]) & 0x1)
3809                    {
3810                      emit_insn (gen_andv4si3 (temp2, isdenorm, sign));
3811                      emit_insn (gen_iorv4si3 (result, result, temp2));
3812                    }
3813                }
3814
3815              /* 0  */
3816              if (INTVAL (operands[2]) & 0xC)
3817                {
3818                  emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote));
3819                  /* +0  */
3820                  if (INTVAL (operands[2]) & 0x8)
3821                    {
3822                      emit_insn (gen_andc_v4si (temp2, iszero, sign));
3823                      emit_insn (gen_iorv4si3 (result, result, temp2));
3824                    }
3825                  /* -0  */
3826                  if (INTVAL (operands[2]) & 0x4)
3827                    {
3828                      emit_insn (gen_andv4si3 (temp2, iszero, sign));
3829                      emit_insn (gen_iorv4si3 (result, result, temp2));
3830                    }
3831                }
3832             }
3833          }
3834      emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result));
3835      DONE;
3836    }
3837})
3838
3839
3840;; branches
3841
3842(define_insn ""
3843  [(set (pc)
3844	(if_then_else (match_operator 1 "branch_comparison_operator"
3845				      [(match_operand 2
3846						      "spu_reg_operand" "r")
3847				       (const_int 0)])
3848		      (label_ref (match_operand 0 "" ""))
3849		      (pc)))]
3850  ""
3851  "br%b2%b1z\t%2,%0"
3852  [(set_attr "type" "br")])
3853
3854(define_insn ""
3855  [(set (pc)
3856	(if_then_else (match_operator 0 "branch_comparison_operator"
3857				      [(match_operand 1
3858						      "spu_reg_operand" "r")
3859				       (const_int 0)])
3860		      (return)
3861		      (pc)))]
3862  "direct_return ()"
3863  "bi%b1%b0z\t%1,$lr"
3864  [(set_attr "type" "br")])
3865
3866(define_insn ""
3867  [(set (pc)
3868	(if_then_else (match_operator 1 "branch_comparison_operator"
3869				      [(match_operand 2
3870						      "spu_reg_operand" "r")
3871				       (const_int 0)])
3872		      (pc)
3873		      (label_ref (match_operand 0 "" ""))))]
3874  ""
3875  "br%b2%b1z\t%2,%0"
3876  [(set_attr "type" "br")])
3877
3878(define_insn ""
3879  [(set (pc)
3880	(if_then_else (match_operator 0 "branch_comparison_operator"
3881				      [(match_operand 1
3882						      "spu_reg_operand" "r")
3883				       (const_int 0)])
3884		      (pc)
3885		      (return)))]
3886  "direct_return ()"
3887  "bi%b1%b0z\t%1,$lr"
3888  [(set_attr "type" "br")])
3889
3890
3891;; vector conditional compare patterns
3892(define_expand "vcond<mode><mode>"
3893  [(set (match_operand:VCMP 0 "spu_reg_operand" "=r")
3894        (if_then_else:VCMP
3895          (match_operator 3 "comparison_operator"
3896            [(match_operand:VCMP 4 "spu_reg_operand" "r")
3897             (match_operand:VCMP 5 "spu_reg_operand" "r")])
3898          (match_operand:VCMP 1 "spu_reg_operand" "r")
3899          (match_operand:VCMP 2 "spu_reg_operand" "r")))]
3900  ""
3901  {
3902    if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
3903                                   operands[3], operands[4], operands[5]))
3904    DONE;
3905    else
3906    FAIL;
3907  })
3908
3909(define_expand "vcondu<mode><mode>"
3910  [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r")
3911        (if_then_else:VCMPU
3912          (match_operator 3 "comparison_operator"
3913            [(match_operand:VCMPU 4 "spu_reg_operand" "r")
3914             (match_operand:VCMPU 5 "spu_reg_operand" "r")])
3915          (match_operand:VCMPU 1 "spu_reg_operand" "r")
3916          (match_operand:VCMPU 2 "spu_reg_operand" "r")))]
3917  ""
3918  {
3919    if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
3920                                   operands[3], operands[4], operands[5]))
3921    DONE;
3922    else
3923    FAIL;
3924  })
3925
3926
3927;; branch on condition
3928
3929(define_expand "cbranch<mode>4"
3930  [(use (match_operator 0 "ordered_comparison_operator"
3931	 [(match_operand:VQHSI 1 "spu_reg_operand" "")
3932	  (match_operand:VQHSI 2 "spu_nonmem_operand" "")]))
3933   (use (match_operand 3 ""))]
3934  ""
3935  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
3936
3937(define_expand "cbranch<mode>4"
3938  [(use (match_operator 0 "ordered_comparison_operator"
3939	 [(match_operand:DTI 1 "spu_reg_operand" "")
3940	  (match_operand:DTI 2 "spu_reg_operand" "")]))
3941   (use (match_operand 3 ""))]
3942  ""
3943  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
3944
3945(define_expand "cbranch<mode>4"
3946  [(use (match_operator 0 "ordered_comparison_operator"
3947	 [(match_operand:VSF 1 "spu_reg_operand" "")
3948	  (match_operand:VSF 2 "spu_reg_operand" "")]))
3949   (use (match_operand 3 ""))]
3950  ""
3951  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
3952
3953(define_expand "cbranchdf4"
3954  [(use (match_operator 0 "ordered_comparison_operator"
3955	 [(match_operand:DF 1 "spu_reg_operand" "")
3956	  (match_operand:DF 2 "spu_reg_operand" "")]))
3957   (use (match_operand 3 ""))]
3958  ""
3959  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
3960
3961
3962;; set on condition
3963
3964(define_expand "cstore<mode>4"
3965  [(use (match_operator 1 "ordered_comparison_operator"
3966	 [(match_operand:VQHSI 2 "spu_reg_operand" "")
3967	  (match_operand:VQHSI 3 "spu_nonmem_operand" "")]))
3968   (clobber (match_operand:SI 0 "spu_reg_operand"))]
3969  ""
3970  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
3971
3972(define_expand "cstore<mode>4"
3973  [(use (match_operator 1 "ordered_comparison_operator"
3974	 [(match_operand:DTI 2 "spu_reg_operand" "")
3975	  (match_operand:DTI 3 "spu_reg_operand" "")]))
3976   (clobber (match_operand:SI 0 "spu_reg_operand"))]
3977  ""
3978  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
3979
3980(define_expand "cstore<mode>4"
3981  [(use (match_operator 1 "ordered_comparison_operator"
3982	 [(match_operand:VSF 2 "spu_reg_operand" "")
3983	  (match_operand:VSF 3 "spu_reg_operand" "")]))
3984   (clobber (match_operand:SI 0 "spu_reg_operand"))]
3985  ""
3986  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
3987
3988(define_expand "cstoredf4"
3989  [(use (match_operator 1 "ordered_comparison_operator"
3990	 [(match_operand:DF 2 "spu_reg_operand" "")
3991	  (match_operand:DF 3 "spu_reg_operand" "")]))
3992   (clobber (match_operand:SI 0 "spu_reg_operand"))]
3993  ""
3994  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
3995
3996
3997;; conditional move
3998
3999;; Define this first one so HAVE_conditional_move is defined.
4000(define_insn "movcc_dummy"
4001  [(set (match_operand 0 "" "")
4002       (if_then_else (match_operand 1 "" "")
4003		     (match_operand 2 "" "")
4004		     (match_operand 3 "" "")))]
4005  "!operands[0]"
4006  "")
4007
4008(define_expand "mov<mode>cc"
4009  [(set (match_operand:ALL 0 "spu_reg_operand" "")
4010	(if_then_else:ALL (match_operand 1 "ordered_comparison_operator" "")
4011		      (match_operand:ALL 2 "spu_reg_operand" "")
4012		      (match_operand:ALL 3 "spu_reg_operand" "")))]
4013  ""
4014  {
4015    spu_emit_branch_or_set(2, operands[1], operands);
4016    DONE;
4017  })
4018
4019;; This pattern is used when the result of a compare is not large
4020;; enough to use in a selb when expanding conditional moves.
4021(define_expand "extend_compare"
4022  [(set (match_operand 0 "spu_reg_operand" "=r")
4023	(unspec [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
4024  ""
4025  {
4026    emit_insn (gen_rtx_SET (operands[0],
4027			    gen_rtx_UNSPEC (GET_MODE (operands[0]),
4028			                    gen_rtvec (1, operands[1]),
4029					    UNSPEC_EXTEND_CMP)));
4030    DONE;
4031  })
4032
4033(define_insn "extend_compare<mode>"
4034  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
4035	(unspec:ALL [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
4036  "operands != NULL"
4037  "fsm\t%0,%1"
4038  [(set_attr "type" "shuf")])
4039
4040
4041;; case
4042
4043;; operand 0 is index
4044;; operand 1 is the minimum bound
4045;; operand 2 is the maximum bound - minimum bound + 1
4046;; operand 3 is CODE_LABEL for the table;
4047;; operand 4 is the CODE_LABEL to go to if index out of range.
4048(define_expand "casesi"
4049  [(match_operand:SI 0 "spu_reg_operand" "")
4050   (match_operand:SI 1 "immediate_operand" "")
4051   (match_operand:SI 2 "immediate_operand" "")
4052   (match_operand 3 "" "")
4053   (match_operand 4 "" "")]
4054  ""
4055  {
4056    rtx table = gen_reg_rtx (SImode);
4057    rtx index = gen_reg_rtx (SImode);
4058    rtx sindex = gen_reg_rtx (SImode);
4059    rtx addr = gen_reg_rtx (Pmode);
4060
4061    emit_move_insn (table, gen_rtx_LABEL_REF (SImode, operands[3]));
4062
4063    emit_insn (gen_subsi3(index, operands[0], force_reg(SImode, operands[1])));
4064    emit_insn (gen_ashlsi3(sindex, index, GEN_INT (2)));
4065    emit_move_insn (addr, gen_rtx_MEM (SImode,
4066				       gen_rtx_PLUS (SImode, table, sindex)));
4067    if (flag_pic)
4068      emit_insn (gen_addsi3 (addr, addr, table));
4069
4070    emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1, operands[4]);
4071    emit_jump_insn (gen_tablejump (addr, operands[3]));
4072    DONE;
4073  })
4074
4075(define_insn "tablejump"
4076  [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))
4077   (use (label_ref (match_operand 1 "" "")))]
4078  ""
4079  "bi\t%0"
4080  [(set_attr "type" "br")])
4081
4082
4083;; call
4084
4085;; Note that operand 1 is total size of args, in bytes,
4086;; and what the call insn wants is the number of words.
4087(define_expand "sibcall"
4088  [(parallel
4089    [(call (match_operand:QI 0 "call_operand" "")
4090	   (match_operand:QI 1 "" ""))
4091     (use (reg:SI 0))])]
4092  ""
4093  {
4094    if (! call_operand (operands[0], QImode))
4095      XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
4096  })
4097
4098(define_insn "_sibcall"
4099  [(parallel
4100    [(call (match_operand:QI 0 "call_operand" "R,S")
4101	   (match_operand:QI 1 "" "i,i"))
4102     (use (reg:SI 0))])]
4103  "SIBLING_CALL_P(insn)"
4104  "@
4105   bi\t%i0
4106   br\t%0"
4107   [(set_attr "type" "br,br")])
4108
4109(define_expand "sibcall_value"
4110  [(parallel
4111    [(set (match_operand 0 "" "")
4112	  (call (match_operand:QI 1 "call_operand" "")
4113		(match_operand:QI 2 "" "")))
4114     (use (reg:SI 0))])]
4115  ""
4116  {
4117    if (! call_operand (operands[1], QImode))
4118      XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
4119  })
4120
4121(define_insn "_sibcall_value"
4122  [(parallel
4123    [(set (match_operand 0 "" "")
4124	  (call (match_operand:QI 1 "call_operand" "R,S")
4125		(match_operand:QI 2 "" "i,i")))
4126     (use (reg:SI 0))])]
4127  "SIBLING_CALL_P(insn)"
4128  "@
4129   bi\t%i1
4130   br\t%1"
4131   [(set_attr "type" "br,br")])
4132
4133;; Note that operand 1 is total size of args, in bytes,
4134;; and what the call insn wants is the number of words.
4135(define_expand "call"
4136  [(parallel
4137    [(call (match_operand:QI 0 "call_operand" "")
4138	   (match_operand:QI 1 "" ""))
4139     (clobber (reg:SI 0))
4140     (clobber (reg:SI 130))])]
4141  ""
4142  {
4143    if (! call_operand (operands[0], QImode))
4144      XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
4145  })
4146
4147(define_insn "_call"
4148  [(parallel
4149    [(call (match_operand:QI 0 "call_operand" "R,S,T")
4150	   (match_operand:QI 1 "" "i,i,i"))
4151     (clobber (reg:SI 0))
4152     (clobber (reg:SI 130))])]
4153  ""
4154  "@
4155   bisl\t$lr,%i0
4156   brsl\t$lr,%0
4157   brasl\t$lr,%0"
4158   [(set_attr "type" "br")])
4159
4160(define_expand "call_value"
4161  [(parallel
4162    [(set (match_operand 0 "" "")
4163	  (call (match_operand:QI 1 "call_operand" "")
4164		(match_operand:QI 2 "" "")))
4165     (clobber (reg:SI 0))
4166     (clobber (reg:SI 130))])]
4167  ""
4168  {
4169    if (! call_operand (operands[1], QImode))
4170      XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
4171  })
4172
4173(define_insn "_call_value"
4174  [(parallel
4175    [(set (match_operand 0 "" "")
4176	  (call (match_operand:QI 1 "call_operand" "R,S,T")
4177		(match_operand:QI 2 "" "i,i,i")))
4178     (clobber (reg:SI 0))
4179     (clobber (reg:SI 130))])]
4180  ""
4181  "@
4182   bisl\t$lr,%i1
4183   brsl\t$lr,%1
4184   brasl\t$lr,%1"
4185   [(set_attr "type" "br")])
4186
4187(define_expand "untyped_call"
4188  [(parallel [(call (match_operand 0 "" "")
4189		    (const_int 0))
4190	      (match_operand 1 "" "")
4191	      (match_operand 2 "" "")])]
4192  ""
4193  {
4194    int i;
4195    rtx reg = gen_rtx_REG (TImode, 3);
4196
4197    /* We need to use call_value so the return value registers don't get
4198     * clobbered. */
4199    emit_call_insn (gen_call_value (reg, operands[0], const0_rtx));
4200
4201    for (i = 0; i < XVECLEN (operands[2], 0); i++)
4202      {
4203	rtx set = XVECEXP (operands[2], 0, i);
4204	emit_move_insn (SET_DEST (set), SET_SRC (set));
4205      }
4206
4207    /* The optimizer does not know that the call sets the function value
4208       registers we stored in the result block.  We avoid problems by
4209       claiming that all hard registers are used and clobbered at this
4210       point.  */
4211    emit_insn (gen_blockage ());
4212
4213    DONE;
4214  })
4215
4216
4217;; Patterns used for splitting and combining.
4218
4219
4220;; Function prologue and epilogue.
4221
4222(define_expand "prologue"
4223  [(const_int 1)]
4224  ""
4225  { spu_expand_prologue (); DONE; })
4226
4227;; "blockage" is only emitted in epilogue.  This is what it took to
4228;; make "basic block reordering" work with the insns sequence
4229;; generated by the spu_expand_epilogue (taken from mips.md)
4230
4231(define_insn "blockage"
4232  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
4233  ""
4234  ""
4235  [(set_attr "type" "convert")
4236   (set_attr "length" "0")])
4237
4238(define_expand "epilogue"
4239  [(const_int 2)]
4240  ""
4241  { spu_expand_epilogue (false); DONE; })
4242
4243(define_expand "sibcall_epilogue"
4244  [(const_int 2)]
4245  ""
4246  { spu_expand_epilogue (true); DONE; })
4247
4248
4249;; stack manipulations
4250
4251;; An insn to allocate new stack space for dynamic use (e.g., alloca).
4252;; We move the back-chain and decrement the stack pointer.
4253(define_expand "allocate_stack"
4254  [(set (match_operand 0 "spu_reg_operand" "")
4255	(minus (reg 1) (match_operand 1 "spu_nonmem_operand" "")))
4256   (set (reg 1)
4257	(minus (reg 1) (match_dup 1)))]
4258  ""
4259  "spu_allocate_stack (operands[0], operands[1]); DONE;")
4260
4261;; These patterns say how to save and restore the stack pointer.  We need not
4262;; save the stack pointer at function level since we are careful to preserve
4263;; the backchain.
4264;;
4265
4266;; At block level the stack pointer is saved and restored, so that the
4267;; stack space allocated within a block is deallocated when leaving
4268;; block scope.  By default, according to the SPU ABI, the stack
4269;; pointer and available stack size are saved in a register. Upon
4270;; restoration, the stack pointer is simply copied back, and the
4271;; current available stack size is calculated against the restored
4272;; stack pointer.
4273;;
4274;; For nonlocal gotos, we must save the stack pointer and its
4275;; backchain and restore both.  Note that in the nonlocal case, the
4276;; save area is a memory location.
4277
4278(define_expand "save_stack_function"
4279  [(match_operand 0 "general_operand" "")
4280   (match_operand 1 "general_operand" "")]
4281  ""
4282  "DONE;")
4283
4284(define_expand "restore_stack_function"
4285  [(match_operand 0 "general_operand" "")
4286   (match_operand 1 "general_operand" "")]
4287  ""
4288  "DONE;")
4289
4290(define_expand "restore_stack_block"
4291  [(match_operand 0 "spu_reg_operand" "")
4292   (match_operand 1 "memory_operand" "")]
4293  ""
4294  "
4295  {
4296    spu_restore_stack_block (operands[0], operands[1]);
4297    DONE;
4298  }")
4299
4300(define_expand "save_stack_nonlocal"
4301  [(match_operand 0 "memory_operand" "")
4302   (match_operand 1 "spu_reg_operand" "")]
4303  ""
4304  "
4305  {
4306    rtx temp = gen_reg_rtx (Pmode);
4307
4308    /* Copy the backchain to the first word, sp to the second.  We need to
4309       save the back chain because __builtin_apply appears to clobber it. */
4310    emit_move_insn (temp, gen_rtx_MEM (Pmode, operands[1]));
4311    emit_move_insn (adjust_address_nv (operands[0], SImode, 0), temp);
4312    emit_move_insn (adjust_address_nv (operands[0], SImode, 4), operands[1]);
4313    DONE;
4314  }")
4315
4316(define_expand "restore_stack_nonlocal"
4317  [(match_operand 0 "spu_reg_operand" "")
4318   (match_operand 1 "memory_operand" "")]
4319  ""
4320  "
4321  {
4322    spu_restore_stack_nonlocal(operands[0], operands[1]);
4323    DONE;
4324  }")
4325
4326
4327;; vector patterns
4328
4329;; Vector initialization
4330(define_expand "vec_init<mode><inner_l>"
4331  [(match_operand:V 0 "register_operand" "")
4332   (match_operand 1 "" "")]
4333  ""
4334  {
4335    spu_expand_vector_init (operands[0], operands[1]);
4336    DONE;
4337  })
4338
4339(define_expand "vec_set<mode>"
4340  [(use (match_operand:SI 2 "spu_nonmem_operand" ""))
4341   (set (match_dup:TI 3)
4342        (unspec:TI [(match_dup:SI 4)
4343		    (match_dup:SI 5)
4344		    (match_dup:SI 6)] UNSPEC_CPAT))
4345   (set (match_operand:V 0 "spu_reg_operand" "")
4346	(unspec:V [(match_operand:<inner> 1 "spu_reg_operand" "")
4347		   (match_dup:V 0)
4348		   (match_dup:TI 3)] UNSPEC_SHUFB))]
4349  ""
4350  {
4351    HOST_WIDE_INT size = GET_MODE_SIZE (<inner>mode);
4352    rtx offset = GEN_INT (INTVAL (operands[2]) * size);
4353    operands[3] = gen_reg_rtx (TImode);
4354    operands[4] = stack_pointer_rtx;
4355    operands[5] = offset;
4356    operands[6] = GEN_INT (size);
4357  })
4358
4359(define_expand "vec_extract<mode><inner_l>"
4360  [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
4361	(vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
4362			    (parallel [(match_operand 2 "const_int_operand" "i")])))]
4363  ""
4364  {
4365    if ((INTVAL (operands[2]) * <vmult> + <voff>) % 16 == 0)
4366      {
4367	emit_insn (gen_spu_convert (operands[0], operands[1]));
4368	DONE;
4369      }
4370  })
4371
4372(define_insn "_vec_extract<mode>"
4373  [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
4374	(vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
4375			    (parallel [(match_operand 2 "const_int_operand" "i")])))]
4376  ""
4377  "rotqbyi\t%0,%1,(%2*<vmult>+<voff>)%%16"
4378  [(set_attr "type" "shuf")])
4379
4380(define_insn "_vec_extractv8hi_ze"
4381  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
4382	(zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "spu_reg_operand" "r")
4383				       (parallel [(const_int 0)]))))]
4384  ""
4385  "rotqmbyi\t%0,%1,-2"
4386  [(set_attr "type" "shuf")])
4387
4388
4389;; misc
4390
4391(define_expand "shufb"
4392  [(set (match_operand 0 "spu_reg_operand" "")
4393	(unspec [(match_operand 1 "spu_reg_operand" "")
4394		 (match_operand 2 "spu_reg_operand" "")
4395		 (match_operand:TI 3 "spu_reg_operand" "")] UNSPEC_SHUFB))]
4396  ""
4397  {
4398    rtx s = gen__shufb (operands[0], operands[1], operands[2], operands[3]);
4399    PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
4400    emit_insn (s);
4401    DONE;
4402  })
4403
4404(define_insn "_shufb"
4405  [(set (match_operand 0 "spu_reg_operand" "=r")
4406	(unspec [(match_operand 1 "spu_reg_operand" "r")
4407		 (match_operand 2 "spu_reg_operand" "r")
4408		 (match_operand:TI 3 "spu_reg_operand" "r")] UNSPEC_SHUFB))]
4409  "operands != NULL"
4410  "shufb\t%0,%1,%2,%3"
4411  [(set_attr "type" "shuf")])
4412
4413; The semantics of vec_permv16qi are nearly identical to those of the SPU
4414; shufb instruction, except that we need to reduce the selector modulo 32.
4415(define_expand "vec_permv16qi"
4416  [(set (match_dup 4) (and:V16QI (match_operand:V16QI 3 "spu_reg_operand" "")
4417                                 (match_dup 6)))
4418   (set (match_operand:V16QI 0 "spu_reg_operand" "")
4419	(unspec:V16QI
4420	  [(match_operand:V16QI 1 "spu_reg_operand" "")
4421	   (match_operand:V16QI 2 "spu_reg_operand" "")
4422	   (match_dup 5)]
4423	  UNSPEC_SHUFB))]
4424  ""
4425  {
4426    operands[4] = gen_reg_rtx (V16QImode);
4427    operands[5] = gen_lowpart (TImode, operands[4]);
4428    operands[6] = spu_const (V16QImode, 31);
4429  })
4430
4431(define_insn "nop"
4432  [(unspec_volatile [(const_int 0)] UNSPECV_NOP)]
4433  ""
4434  "nop"
4435  [(set_attr "type" "nop")])
4436
4437(define_insn "nopn"
4438  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "K")] UNSPECV_NOP)]
4439  ""
4440  "nop\t%0"
4441  [(set_attr "type" "nop")])
4442
4443(define_insn "lnop"
4444  [(unspec_volatile [(const_int 0)] UNSPECV_LNOP)]
4445  ""
4446  "lnop"
4447  [(set_attr "type" "lnop")])
4448
4449;; The operand is so we know why we generated this hbrp.
4450;; We clobber mem to make sure it isn't moved over any
4451;; loads, stores or calls while scheduling.
4452(define_insn "iprefetch"
4453  [(unspec [(match_operand:SI 0 "const_int_operand" "n")] UNSPEC_IPREFETCH)
4454   (clobber (mem:BLK (scratch)))]
4455  ""
4456  "hbrp\t# %0"
4457  [(set_attr "type" "iprefetch")])
4458
4459;; A non-volatile version so it gets scheduled
4460(define_insn "nopn_nv"
4461  [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_NOP)]
4462  ""
4463  "nop\t%0"
4464  [(set_attr "type" "nop")])
4465
4466(define_insn "hbr"
4467  [(set (reg:SI 130)
4468	(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i,i")
4469		    (match_operand:SI 1 "nonmemory_operand" "r,s,i")] UNSPEC_HBR))
4470   (unspec [(const_int 0)] UNSPEC_HBR)]
4471  ""
4472  "@
4473   hbr\t%0,%1
4474   hbrr\t%0,%1
4475   hbra\t%0,%1"
4476  [(set_attr "type" "hbr")])
4477
4478(define_insn "sync"
4479  [(unspec_volatile [(const_int 0)] UNSPECV_SYNC)
4480   (clobber (mem:BLK (scratch)))]
4481  ""
4482  "sync"
4483  [(set_attr "type" "br")])
4484
4485(define_insn "syncc"
4486  [(unspec_volatile [(const_int 1)] UNSPECV_SYNC)
4487   (clobber (mem:BLK (scratch)))]
4488  ""
4489  "syncc"
4490  [(set_attr "type" "br")])
4491
4492(define_insn "dsync"
4493  [(unspec_volatile [(const_int 2)] UNSPECV_SYNC)
4494   (clobber (mem:BLK (scratch)))]
4495  ""
4496  "dsync"
4497  [(set_attr "type" "br")])
4498
4499
4500
4501 ;; Define the subtract-one-and-jump insns so loop.c
4502 ;; knows what to generate.
4503 (define_expand "doloop_end"
4504   [(use (match_operand 0 "" ""))      ; loop pseudo
4505    (use (match_operand 1 "" ""))]     ; label
4506   ""
4507   "
4508 {
4509   /* Currently SMS relies on the do-loop pattern to recognize loops
4510      where (1) the control part comprises of all insns defining and/or
4511      using a certain 'count' register and (2) the loop count can be
4512      adjusted by modifying this register prior to the loop.
4513.     ??? The possible introduction of a new block to initialize the
4514      new IV can potentially effects branch optimizations.  */
4515   if (optimize > 0 && flag_modulo_sched)
4516   {
4517     rtx s0;
4518     rtx bcomp;
4519     rtx loc_ref;
4520
4521     if (GET_MODE (operands[0]) != SImode)
4522       FAIL;
4523
4524     s0 = operands [0];
4525     emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1)));
4526     bcomp = gen_rtx_NE(SImode, s0, const0_rtx);
4527     loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
4528     emit_jump_insn (gen_rtx_SET (pc_rtx,
4529                                  gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4530                                                        loc_ref, pc_rtx)));
4531
4532     DONE;
4533   }else
4534      FAIL;
4535 }")
4536
4537;; convert between any two modes, avoiding any GCC assumptions
4538(define_expand "spu_convert"
4539  [(set (match_operand 0 "spu_reg_operand" "")
4540	(unspec [(match_operand 1 "spu_reg_operand" "")] UNSPEC_CONVERT))]
4541  ""
4542  {
4543    rtx c = gen__spu_convert (operands[0], operands[1]);
4544    PUT_MODE (SET_SRC (c), GET_MODE (operands[0]));
4545    emit_insn (c);
4546    DONE;
4547  })
4548
4549(define_insn_and_split "_spu_convert"
4550  [(set (match_operand 0 "spu_reg_operand" "=r")
4551	(unspec [(match_operand 1 "spu_reg_operand" "0")] UNSPEC_CONVERT))]
4552  ""
4553  "#"
4554  "reload_completed"
4555  [(const_int 0)]
4556  {
4557    spu_split_convert (operands);
4558    DONE;
4559  }
4560  [(set_attr "type" "convert")
4561   (set_attr "length" "0")])
4562
4563
4564;;
4565(include "spu-builtins.md")
4566
4567
4568(define_expand "smaxv4sf3"
4569  [(set (match_operand:V4SF 0 "register_operand" "=r")
4570        (smax:V4SF (match_operand:V4SF 1 "register_operand" "r")
4571                 (match_operand:V4SF 2 "register_operand" "r")))]
4572  ""
4573  "
4574{
4575  rtx mask = gen_reg_rtx (V4SImode);
4576
4577  emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
4578  emit_insn (gen_selb (operands[0], operands[2], operands[1], mask));
4579  DONE;
4580}")
4581
4582(define_expand "sminv4sf3"
4583  [(set (match_operand:V4SF 0 "register_operand" "=r")
4584        (smin:V4SF (match_operand:V4SF 1 "register_operand" "r")
4585                 (match_operand:V4SF 2 "register_operand" "r")))]
4586  ""
4587  "
4588{
4589  rtx mask = gen_reg_rtx (V4SImode);
4590
4591  emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
4592  emit_insn (gen_selb (operands[0], operands[1], operands[2], mask));
4593  DONE;
4594}")
4595
4596(define_expand "smaxv2df3"
4597  [(set (match_operand:V2DF 0 "register_operand" "=r")
4598        (smax:V2DF (match_operand:V2DF 1 "register_operand" "r")
4599                 (match_operand:V2DF 2 "register_operand" "r")))]
4600  ""
4601  "
4602{
4603  rtx mask = gen_reg_rtx (V2DImode);
4604  emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
4605  emit_insn (gen_selb (operands[0], operands[2], operands[1],
4606		       spu_gen_subreg (V4SImode, mask)));
4607  DONE;
4608}")
4609
4610(define_expand "sminv2df3"
4611  [(set (match_operand:V2DF 0 "register_operand" "=r")
4612        (smin:V2DF (match_operand:V2DF 1 "register_operand" "r")
4613                 (match_operand:V2DF 2 "register_operand" "r")))]
4614  ""
4615  "
4616{
4617  rtx mask = gen_reg_rtx (V2DImode);
4618  emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
4619  emit_insn (gen_selb (operands[0], operands[1], operands[2],
4620		       spu_gen_subreg (V4SImode, mask)));
4621  DONE;
4622}")
4623
4624(define_insn "vec_widen_smult_odd_v8hi"
4625  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
4626        (mult:V4SI
4627	  (sign_extend:V4SI
4628	    (vec_select:V4HI
4629	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
4630	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
4631          (sign_extend:V4SI
4632	    (vec_select:V4HI
4633	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
4634	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
4635  ""
4636  "@
4637   mpy\t%0,%1,%2
4638   mpyi\t%0,%1,%2"
4639  [(set_attr "type" "fp7")])
4640
4641(define_insn "vec_widen_umult_odd_v8hi"
4642  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
4643        (mult:V4SI
4644	  (zero_extend:V4SI
4645	    (vec_select:V4HI
4646	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
4647	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
4648          (zero_extend:V4SI
4649	    (vec_select:V4HI
4650	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
4651	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
4652  ""
4653  "@
4654   mpyu\t%0,%1,%2
4655   mpyui\t%0,%1,%2"
4656  [(set_attr "type" "fp7")])
4657
4658(define_insn "vec_widen_smult_even_v8hi"
4659  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4660	(mult:V4SI
4661	  (sign_extend:V4SI
4662	    (vec_select:V4HI
4663	      (match_operand:V8HI 1 "spu_reg_operand" "r")
4664	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
4665	  (sign_extend:V4SI
4666	    (vec_select:V4HI
4667	      (match_operand:V8HI 2 "spu_reg_operand" "r")
4668	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
4669  ""
4670  "mpyhh\t%0,%1,%2"
4671  [(set_attr "type" "fp7")])
4672
4673(define_insn "vec_widen_umult_even_v8hi"
4674  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4675	(mult:V4SI
4676	  (zero_extend:V4SI
4677	    (vec_select:V4HI
4678	      (match_operand:V8HI 1 "spu_reg_operand" "r")
4679	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
4680	  (zero_extend:V4SI
4681	    (vec_select:V4HI
4682	      (match_operand:V8HI 2 "spu_reg_operand" "r")
4683	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
4684  ""
4685  "mpyhhu\t%0,%1,%2"
4686  [(set_attr "type" "fp7")])
4687
4688(define_expand "vec_widen_umult_hi_v8hi"
4689  [(set (match_operand:V4SI 0 "register_operand"   "=r")
4690        (mult:V4SI
4691          (zero_extend:V4SI
4692            (vec_select:V4HI
4693              (match_operand:V8HI 1 "register_operand" "r")
4694              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
4695          (zero_extend:V4SI
4696            (vec_select:V4HI
4697              (match_operand:V8HI 2 "register_operand" "r")
4698              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
4699  ""
4700  "
4701{
4702  rtx ve = gen_reg_rtx (V4SImode);
4703  rtx vo = gen_reg_rtx (V4SImode);
4704  rtx mask = gen_reg_rtx (TImode);
4705  unsigned char arr[16] = {
4706    0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
4707    0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
4708
4709  emit_move_insn (mask, array_to_constant (TImode, arr));
4710  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
4711  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
4712  emit_insn (gen_shufb (operands[0], ve, vo, mask));
4713  DONE;
4714}")
4715
4716(define_expand "vec_widen_umult_lo_v8hi"
4717  [(set (match_operand:V4SI 0 "register_operand"   "=r")
4718        (mult:V4SI
4719          (zero_extend:V4SI
4720            (vec_select:V4HI
4721              (match_operand:V8HI 1 "register_operand" "r")
4722              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
4723          (zero_extend:V4SI
4724            (vec_select:V4HI
4725              (match_operand:V8HI 2 "register_operand" "r")
4726              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
4727  ""
4728  "
4729{
4730  rtx ve = gen_reg_rtx (V4SImode);
4731  rtx vo = gen_reg_rtx (V4SImode);
4732  rtx mask = gen_reg_rtx (TImode);
4733  unsigned char arr[16] = {
4734    0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
4735    0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
4736
4737  emit_move_insn (mask, array_to_constant (TImode, arr));
4738  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
4739  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
4740  emit_insn (gen_shufb (operands[0], ve, vo, mask));
4741  DONE;
4742}")
4743
4744(define_expand "vec_widen_smult_hi_v8hi"
4745  [(set (match_operand:V4SI 0 "register_operand"   "=r")
4746        (mult:V4SI
4747          (sign_extend:V4SI
4748            (vec_select:V4HI
4749              (match_operand:V8HI 1 "register_operand" "r")
4750              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
4751          (sign_extend:V4SI
4752            (vec_select:V4HI
4753              (match_operand:V8HI 2 "register_operand" "r")
4754              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
4755  ""
4756  "
4757{
4758  rtx ve = gen_reg_rtx (V4SImode);
4759  rtx vo = gen_reg_rtx (V4SImode);
4760  rtx mask = gen_reg_rtx (TImode);
4761  unsigned char arr[16] = {
4762    0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
4763    0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
4764
4765  emit_move_insn (mask, array_to_constant (TImode, arr));
4766  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
4767  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
4768  emit_insn (gen_shufb (operands[0], ve, vo, mask));
4769  DONE;
4770}")
4771
4772(define_expand "vec_widen_smult_lo_v8hi"
4773  [(set (match_operand:V4SI 0 "register_operand"   "=r")
4774        (mult:V4SI
4775          (sign_extend:V4SI
4776            (vec_select:V4HI
4777              (match_operand:V8HI 1 "register_operand" "r")
4778              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
4779          (sign_extend:V4SI
4780            (vec_select:V4HI
4781              (match_operand:V8HI 2 "register_operand" "r")
4782              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
4783  ""
4784  "
4785{
4786  rtx ve = gen_reg_rtx (V4SImode);
4787  rtx vo = gen_reg_rtx (V4SImode);
4788  rtx mask = gen_reg_rtx (TImode);
4789  unsigned char arr[16] = {
4790    0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
4791    0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
4792
4793  emit_move_insn (mask, array_to_constant (TImode, arr));
4794  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
4795  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
4796  emit_insn (gen_shufb (operands[0], ve, vo, mask));
4797  DONE;
4798}")
4799
4800(define_expand "vec_realign_load_<mode>"
4801  [(set (match_operand:ALL 0 "register_operand" "=r")
4802	(unspec:ALL [(match_operand:ALL 1 "register_operand" "r")
4803		     (match_operand:ALL 2 "register_operand" "r")
4804		     (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))]
4805  ""
4806  "
4807{
4808  emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3]));
4809  DONE;
4810}")
4811
4812(define_expand "spu_lvsr"
4813  [(set (match_operand:V16QI 0 "register_operand" "")
4814        (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))]
4815  ""
4816  "
4817{
4818  rtx addr;
4819  rtx offset = gen_reg_rtx (V8HImode);
4820  rtx addr_bits = gen_reg_rtx (SImode);
4821  rtx addr_bits_vec = gen_reg_rtx (V8HImode);
4822  rtx splatqi = gen_reg_rtx (TImode);
4823  rtx result = gen_reg_rtx (V8HImode);
4824  unsigned char arr[16] = {
4825    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
4826    0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
4827  unsigned char arr2[16] = {
4828    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
4829    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03};
4830
4831  emit_move_insn (offset, array_to_constant (V8HImode, arr));
4832  emit_move_insn (splatqi, array_to_constant (TImode, arr2));
4833
4834  gcc_assert (GET_CODE (operands[1]) == MEM);
4835  addr = force_reg (Pmode, XEXP (operands[1], 0));
4836  emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF)));
4837  emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi));
4838
4839  /* offset - (addr & 0xF)
4840     It is safe to use a single sfh, because each byte of offset is > 15 and
4841     each byte of addr is <= 15. */
4842  emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec));
4843
4844  result = simplify_gen_subreg (V16QImode, result, V8HImode, 0);
4845  emit_move_insn (operands[0], result);
4846
4847  DONE;
4848}")
4849
4850(define_expand "vec_unpacku_hi_v8hi"
4851  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4852        (zero_extend:V4SI
4853          (vec_select:V4HI
4854            (match_operand:V8HI 1 "spu_reg_operand" "r")
4855            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
4856  ""
4857{
4858  rtx mask = gen_reg_rtx (TImode);
4859  unsigned char arr[16] = {
4860    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
4861    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
4862
4863  emit_move_insn (mask, array_to_constant (TImode, arr));
4864  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
4865
4866  DONE;
4867})
4868
4869(define_expand "vec_unpacku_lo_v8hi"
4870  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4871         (zero_extend:V4SI
4872          (vec_select:V4HI
4873            (match_operand:V8HI 1 "spu_reg_operand" "r")
4874            (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
4875""
4876{
4877  rtx mask = gen_reg_rtx (TImode);
4878  unsigned char arr[16] = {
4879    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
4880    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
4881
4882  emit_move_insn (mask, array_to_constant (TImode, arr));
4883  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
4884
4885  DONE;
4886})
4887
4888(define_expand "vec_unpacks_hi_v8hi"
4889  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4890         (sign_extend:V4SI
4891          (vec_select:V4HI
4892            (match_operand:V8HI 1 "spu_reg_operand" "r")
4893            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
4894  ""
4895{
4896  rtx tmp1 = gen_reg_rtx (V8HImode);
4897  rtx tmp2 = gen_reg_rtx (V4SImode);
4898  rtx mask = gen_reg_rtx (TImode);
4899  unsigned char arr[16] = {
4900    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
4901    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
4902
4903  emit_move_insn (mask, array_to_constant (TImode, arr));
4904  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
4905  emit_insn (gen_spu_xshw (tmp2, tmp1));
4906  emit_move_insn (operands[0], tmp2);
4907
4908  DONE;
4909})
4910
4911(define_expand "vec_unpacks_lo_v8hi"
4912  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
4913         (sign_extend:V4SI
4914          (vec_select:V4HI
4915            (match_operand:V8HI 1 "spu_reg_operand" "r")
4916            (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
4917""
4918{
4919  rtx tmp1 = gen_reg_rtx (V8HImode);
4920  rtx tmp2 = gen_reg_rtx (V4SImode);
4921  rtx mask = gen_reg_rtx (TImode);
4922  unsigned char arr[16] = {
4923    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
4924    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
4925
4926  emit_move_insn (mask, array_to_constant (TImode, arr));
4927  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
4928  emit_insn (gen_spu_xshw (tmp2, tmp1));
4929  emit_move_insn (operands[0], tmp2);
4930
4931DONE;
4932})
4933
4934(define_expand "vec_unpacku_hi_v16qi"
4935  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
4936        (zero_extend:V8HI
4937          (vec_select:V8QI
4938            (match_operand:V16QI 1 "spu_reg_operand" "r")
4939            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
4940                       (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
4941  ""
4942{
4943  rtx mask = gen_reg_rtx (TImode);
4944  unsigned char arr[16] = {
4945    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
4946    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
4947
4948  emit_move_insn (mask, array_to_constant (TImode, arr));
4949  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
4950
4951  DONE;
4952})
4953
4954(define_expand "vec_unpacku_lo_v16qi"
4955  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
4956          (zero_extend:V8HI
4957          (vec_select:V8QI
4958            (match_operand:V16QI 1 "spu_reg_operand" "r")
4959            (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
4960                       (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
4961""
4962{
4963  rtx mask = gen_reg_rtx (TImode);
4964  unsigned char arr[16] = {
4965    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
4966    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
4967
4968  emit_move_insn (mask, array_to_constant (TImode, arr));
4969  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
4970
4971  DONE;
4972})
4973
4974(define_expand "vec_unpacks_hi_v16qi"
4975  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
4976         (sign_extend:V8HI
4977          (vec_select:V8QI
4978            (match_operand:V16QI 1 "spu_reg_operand" "r")
4979            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
4980                       (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
4981""
4982{
4983  rtx tmp1 = gen_reg_rtx (V16QImode);
4984  rtx tmp2 = gen_reg_rtx (V8HImode);
4985  rtx mask = gen_reg_rtx (TImode);
4986  unsigned char arr[16] = {
4987    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
4988    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
4989
4990  emit_move_insn (mask, array_to_constant (TImode, arr));
4991  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
4992  emit_insn (gen_spu_xsbh (tmp2, tmp1));
4993  emit_move_insn (operands[0], tmp2);
4994
4995  DONE;
4996})
4997
4998(define_expand "vec_unpacks_lo_v16qi"
4999  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
5000         (sign_extend:V8HI
5001          (vec_select:V8QI
5002            (match_operand:V16QI 1 "spu_reg_operand" "r")
5003            (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
5004                       (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
5005""
5006{
5007  rtx tmp1 = gen_reg_rtx (V16QImode);
5008  rtx tmp2 = gen_reg_rtx (V8HImode);
5009  rtx mask = gen_reg_rtx (TImode);
5010  unsigned char arr[16] = {
5011    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
5012    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
5013
5014  emit_move_insn (mask, array_to_constant (TImode, arr));
5015  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
5016  emit_insn (gen_spu_xsbh (tmp2, tmp1));
5017  emit_move_insn (operands[0], tmp2);
5018
5019DONE;
5020})
5021
5022
5023(define_expand "vec_pack_trunc_v8hi"
5024  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
5025	(vec_concat:V16QI
5026          (truncate:V8QI (match_operand:V8HI 1 "spu_reg_operand" "r"))
5027          (truncate:V8QI (match_operand:V8HI 2 "spu_reg_operand" "r"))))]
5028  ""
5029  "
5030{
5031  rtx mask = gen_reg_rtx (TImode);
5032  unsigned char arr[16] = {
5033    0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
5034    0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F};
5035
5036  emit_move_insn (mask, array_to_constant (TImode, arr));
5037  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
5038
5039  DONE;
5040}")
5041
5042(define_expand "vec_pack_trunc_v4si"
5043  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
5044	(vec_concat:V8HI
5045          (truncate:V4HI (match_operand:V4SI 1 "spu_reg_operand" "r"))
5046          (truncate:V4HI (match_operand:V4SI 2 "spu_reg_operand" "r"))))]
5047  ""
5048  "
5049{
5050  rtx mask = gen_reg_rtx (TImode);
5051  unsigned char arr[16] = {
5052    0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
5053    0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F};
5054
5055  emit_move_insn (mask, array_to_constant (TImode, arr));
5056  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
5057
5058  DONE;
5059}")
5060
5061(define_insn "stack_protect_set"
5062  [(set (match_operand:SI 0 "memory_operand" "=m")
5063        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
5064   (set (match_scratch:SI 2 "=&r") (const_int 0))]
5065  ""
5066  "lq%p1\t%2,%1\;stq%p0\t%2,%0\;xor\t%2,%2,%2"
5067  [(set_attr "length" "12")
5068   (set_attr "type" "multi1")]
5069)
5070
5071(define_expand "stack_protect_test"
5072  [(match_operand 0 "memory_operand" "")
5073   (match_operand 1 "memory_operand" "")
5074   (match_operand 2 "" "")]
5075  ""
5076{
5077  rtx compare_result;
5078  rtx bcomp, loc_ref;
5079
5080  compare_result = gen_reg_rtx (SImode);
5081
5082  emit_insn (gen_stack_protect_test_si (compare_result,
5083                                        operands[0],
5084                                        operands[1]));
5085
5086  bcomp = gen_rtx_NE (SImode, compare_result, const0_rtx);
5087
5088  loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[2]);
5089
5090  emit_jump_insn (gen_rtx_SET (pc_rtx,
5091                               gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
5092                                                     loc_ref, pc_rtx)));
5093
5094  DONE;
5095})
5096
5097(define_insn "stack_protect_test_si"
5098  [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
5099        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
5100                    (match_operand:SI 2 "memory_operand" "m")]
5101                   UNSPEC_SP_TEST))
5102   (set (match_scratch:SI 3 "=&r") (const_int 0))]
5103  ""
5104  "lq%p1\t%0,%1\;lq%p2\t%3,%2\;ceq\t%0,%0,%3\;xor\t%3,%3,%3"
5105  [(set_attr "length" "16")
5106   (set_attr "type" "multi1")]
5107)
5108
5109; Atomic operations
5110;
5111; SPU execution is always single-threaded, so there is no need for real
5112; atomic operations.  We provide the atomic primitives anyway so that
5113; code expecting the builtins to be present (like libgfortran) will work.
5114
5115;; Types that we should provide atomic instructions for.
5116(define_mode_iterator AINT [QI HI SI DI TI])
5117
5118(define_code_iterator ATOMIC [plus minus ior xor and mult])
5119(define_code_attr atomic_name
5120  [(plus "add") (minus "sub")
5121   (ior "or") (xor "xor") (and "and") (mult "nand")])
5122(define_code_attr atomic_pred
5123  [(plus "spu_arith_operand") (minus "spu_reg_operand")
5124   (ior "spu_logical_operand") (xor "spu_logical_operand")
5125   (and "spu_logical_operand") (mult "spu_logical_operand")])
5126
5127(define_expand "atomic_load<mode>"
5128  [(set (match_operand:AINT 0 "spu_reg_operand" "")		;; output
5129	(match_operand:AINT 1 "memory_operand" ""))		;; memory
5130   (use (match_operand:SI 2 "const_int_operand" ""))]		;; model
5131  ""
5132{
5133  if (MEM_ADDR_SPACE (operands[1]))
5134    FAIL;
5135
5136  emit_move_insn (operands[0], operands[1]);
5137  DONE;
5138})
5139
5140(define_expand "atomic_store<mode>"
5141  [(set (match_operand:AINT 0 "memory_operand" "")		;; memory
5142	(match_operand:AINT 1 "spu_reg_operand" ""))		;; input
5143   (use (match_operand:SI 2 "const_int_operand" ""))]		;; model
5144  ""
5145{
5146  if (MEM_ADDR_SPACE (operands[0]))
5147    FAIL;
5148
5149  emit_move_insn (operands[0], operands[1]);
5150  DONE;
5151})
5152
5153(define_expand "atomic_compare_and_swap<mode>"
5154  [(match_operand:SI 0 "spu_reg_operand" "")		;; bool out
5155   (match_operand:AINT 1 "spu_reg_operand" "")		;; val out
5156   (match_operand:AINT 2 "memory_operand" "")		;; memory
5157   (match_operand:AINT 3 "spu_nonmem_operand" "")	;; expected
5158   (match_operand:AINT 4 "spu_nonmem_operand" "")	;; desired
5159   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
5160   (match_operand:SI 6 "const_int_operand" "")		;; model succ
5161   (match_operand:SI 7 "const_int_operand" "")]		;; model fail
5162  ""
5163{
5164  rtx boolval, retval, label;
5165
5166  if (MEM_ADDR_SPACE (operands[2]))
5167    FAIL;
5168
5169  boolval = gen_reg_rtx (SImode);
5170  retval = gen_reg_rtx (<MODE>mode);
5171  label = gen_label_rtx ();
5172
5173  emit_move_insn (retval, operands[2]);
5174  emit_move_insn (boolval, const0_rtx);
5175
5176  emit_cmp_and_jump_insns (retval, operands[3], NE, NULL_RTX,
5177                           <MODE>mode, 1, label);
5178
5179  emit_move_insn (operands[2], operands[4]);
5180  emit_move_insn (boolval, const1_rtx);
5181
5182  emit_label (label);
5183
5184  emit_move_insn (operands[0], boolval);
5185  emit_move_insn (operands[1], retval);
5186  DONE;
5187})
5188
5189(define_expand "atomic_exchange<mode>"
5190  [(match_operand:AINT 0 "spu_reg_operand" "")		;; output
5191   (match_operand:AINT 1 "memory_operand" "")		;; memory
5192   (match_operand:AINT 2 "spu_nonmem_operand" "")	;; input
5193   (match_operand:SI 3 "const_int_operand" "")]		;; model
5194  ""
5195{
5196  rtx retval;
5197
5198  if (MEM_ADDR_SPACE (operands[1]))
5199    FAIL;
5200
5201  retval = gen_reg_rtx (<MODE>mode);
5202
5203  emit_move_insn (retval, operands[1]);
5204  emit_move_insn (operands[1], operands[2]);
5205  emit_move_insn (operands[0], retval);
5206  DONE;
5207})
5208
5209(define_expand "atomic_<atomic_name><mode>"
5210  [(ATOMIC:AINT
5211     (match_operand:AINT 0 "memory_operand" "")		;; memory
5212     (match_operand:AINT 1 "<atomic_pred>" ""))		;; operand
5213   (match_operand:SI 2 "const_int_operand" "")]		;; model
5214  ""
5215{
5216  if (MEM_ADDR_SPACE (operands[0]))
5217    FAIL;
5218
5219  spu_expand_atomic_op (<CODE>, operands[0], operands[1],
5220			NULL_RTX, NULL_RTX);
5221  DONE;
5222})
5223
5224(define_expand "atomic_fetch_<atomic_name><mode>"
5225  [(match_operand:AINT 0 "spu_reg_operand" "")		;; output
5226   (ATOMIC:AINT
5227     (match_operand:AINT 1 "memory_operand" "")		;; memory
5228     (match_operand:AINT 2 "<atomic_pred>" ""))		;; operand
5229   (match_operand:SI 3 "const_int_operand" "")]		;; model
5230  ""
5231{
5232  if (MEM_ADDR_SPACE (operands[1]))
5233    FAIL;
5234
5235  spu_expand_atomic_op (<CODE>, operands[1], operands[2],
5236			operands[0], NULL_RTX);
5237  DONE;
5238})
5239
5240(define_expand "atomic_<atomic_name>_fetch<mode>"
5241  [(match_operand:AINT 0 "spu_reg_operand" "")		;; output
5242   (ATOMIC:AINT
5243     (match_operand:AINT 1 "memory_operand" "")		;; memory
5244     (match_operand:AINT 2 "<atomic_pred>" ""))		;; operand
5245   (match_operand:SI 3 "const_int_operand" "")]		;; model
5246  ""
5247{
5248  if (MEM_ADDR_SPACE (operands[1]))
5249    FAIL;
5250
5251  spu_expand_atomic_op (<CODE>, operands[1], operands[2],
5252			NULL_RTX, operands[0]);
5253  DONE;
5254})
5255
5256