1//===- HexagonPatternsHVX.td - Selection Patterns for HVX --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9
10def SDTVecUnaryOp:
11  SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
12
13def SDTVecBinOp:
14  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>;
15
16def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
17  [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
18def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>;
19
20def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
21  [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
22def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
23
24def HwLen2: SDNodeXForm<imm, [{
25  const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget());
26  return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32);
27}]>;
28
29def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>;
30
31def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt),
32  (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>;
33
34def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt),
35  (V6_vandvrt
36    (V6_vor
37      (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)),
38               (A2_tfrsi (HwLen2 (i32 0)))),  // Half the vector length
39      (V6_vpackeb (V6_vd0), (Q2V $Qt))),
40    (A2_tfrsi -1))>;
41
42def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>;
43def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>;
44
45def HexagonQCAT:       SDNode<"HexagonISD::QCAT",       SDTVecBinOp>;
46def HexagonQTRUE:      SDNode<"HexagonISD::QTRUE",      SDTVecLeaf>;
47def HexagonQFALSE:     SDNode<"HexagonISD::QFALSE",     SDTVecLeaf>;
48def HexagonVPACKL:     SDNode<"HexagonISD::VPACKL",     SDTVecUnaryOp>;
49def HexagonVUNPACK:    SDNode<"HexagonISD::VUNPACK",    SDTVecUnaryOp>;
50def HexagonVUNPACKU:   SDNode<"HexagonISD::VUNPACKU",   SDTVecUnaryOp>;
51
52def vzero:  PatFrag<(ops), (splat_vector (i32 0))>;
53def qtrue:  PatFrag<(ops), (HexagonQTRUE)>;
54def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
55def qcat:   PatFrag<(ops node:$Qs, node:$Qt),
56                    (HexagonQCAT node:$Qs, node:$Qt)>;
57
58def qnot:     PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
59def vpackl:   PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>;
60def vunpack:  PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>;
61def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>;
62
63def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb  $Vs)>;
64def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh  $Vs)>;
65def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
66def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;
67
68def IsVecOff : PatLeaf<(i32 imm), [{
69  int32_t V = N->getSExtValue();
70  int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass);
71  assert(isPowerOf2_32(VecSize));
72  if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0)
73    return false;
74  int32_t L = Log2_32(VecSize);
75  return isInt<4>(V >> L);
76}]>;
77
78
79def alignedload: PatFrag<(ops node:$a), (load $a), [{
80  return isAlignedMemNode(dyn_cast<MemSDNode>(N));
81}]>;
82
83def unalignedload: PatFrag<(ops node:$a), (load $a), [{
84  return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
85}]>;
86
87def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
88  return isAlignedMemNode(dyn_cast<MemSDNode>(N));
89}]>;
90
91def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
92  return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
93}]>;
94
95
96// HVX loads
97
98multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
99                     PatFrag ImmPred> {
100  def: Pat<(ResType (Load I32:$Rt)),
101           (MI I32:$Rt, 0)>;
102  def: Pat<(ResType (Load (add I32:$Rt, ImmPred:$s))),
103           (MI I32:$Rt, imm:$s)>;
104  // The HVX selection code for shuffles can generate vector constants.
105  // Calling "Select" on the resulting loads from CP fails without these
106  // patterns.
107  def: Pat<(ResType (Load (HexagonCP tconstpool:$A))),
108           (MI (A2_tfrsi imm:$A), 0)>;
109  def: Pat<(ResType (Load (HexagonAtPcrel tconstpool:$A))),
110           (MI (C4_addipc imm:$A), 0)>;
111}
112
113multiclass HvxLda_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
114                      PatFrag ImmPred> {
115  let AddedComplexity = 50 in {
116    def: Pat<(ResType (Load (valignaddr I32:$Rt))),
117             (MI I32:$Rt, 0)>;
118    def: Pat<(ResType (Load (add (valignaddr I32:$Rt), ImmPred:$Off))),
119             (MI I32:$Rt, imm:$Off)>;
120  }
121  defm: HvxLd_pat<MI, Load, ResType, ImmPred>;
122}
123
124let Predicates = [UseHVX] in {
125  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI8,  IsVecOff>;
126  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI16, IsVecOff>;
127  defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI32, IsVecOff>;
128
129  defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI8,  IsVecOff>;
130  defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI16, IsVecOff>;
131  defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI32, IsVecOff>;
132
133  defm: HvxLd_pat<V6_vL32Ub_ai,  unalignedload, VecI8,  IsVecOff>;
134  defm: HvxLd_pat<V6_vL32Ub_ai,  unalignedload, VecI16, IsVecOff>;
135  defm: HvxLd_pat<V6_vL32Ub_ai,  unalignedload, VecI32, IsVecOff>;
136}
137
138// HVX stores
139
140multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag ImmPred,
141                     PatFrag Value> {
142  def: Pat<(Store Value:$Vs, I32:$Rt),
143           (MI I32:$Rt, 0, Value:$Vs)>;
144  def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$s)),
145           (MI I32:$Rt, imm:$s, Value:$Vs)>;
146}
147
148let Predicates = [UseHVX] in {
149  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, IsVecOff, HVI8>;
150  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, IsVecOff, HVI16>;
151  defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, IsVecOff, HVI32>;
152
153  defm: HvxSt_pat<V6_vS32b_ai, alignedstore, IsVecOff, HVI8>;
154  defm: HvxSt_pat<V6_vS32b_ai, alignedstore, IsVecOff, HVI16>;
155  defm: HvxSt_pat<V6_vS32b_ai, alignedstore, IsVecOff, HVI32>;
156
157  defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI8>;
158  defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI16>;
159  defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI32>;
160}
161
162// Bitcasts between same-size vector types are no-ops, except for the
163// actual type change.
164let Predicates = [UseHVX] in {
165  defm: NopCast_pat<VecI8,   VecI16,  HvxVR>;
166  defm: NopCast_pat<VecI8,   VecI32,  HvxVR>;
167  defm: NopCast_pat<VecI16,  VecI32,  HvxVR>;
168
169  defm: NopCast_pat<VecPI8,  VecPI16, HvxWR>;
170  defm: NopCast_pat<VecPI8,  VecPI32, HvxWR>;
171  defm: NopCast_pat<VecPI16, VecPI32, HvxWR>;
172}
173
174let Predicates = [UseHVX] in {
175  let AddedComplexity = 100 in {
176    // These should be preferred over a vsplat of 0.
177    def: Pat<(VecI8   vzero), (V6_vd0)>;
178    def: Pat<(VecI16  vzero), (V6_vd0)>;
179    def: Pat<(VecI32  vzero), (V6_vd0)>;
180    def: Pat<(VecPI8  vzero), (PS_vdd0)>;
181    def: Pat<(VecPI16 vzero), (PS_vdd0)>;
182    def: Pat<(VecPI32 vzero), (PS_vdd0)>;
183
184    def: Pat<(concat_vectors  (VecI8 vzero),  (VecI8 vzero)), (PS_vdd0)>;
185    def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
186    def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>;
187  }
188
189  def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
190           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
191  def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)),
192           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
193  def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)),
194           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
195
196  def: Pat<(VecQ8  (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>;
197  def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>;
198
199  def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs),
200           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
201  def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs),
202           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
203  def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs),
204           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
205
206  def: Pat<(HexagonVINSERTW0 HVI8:$Vu,  I32:$Rt),
207           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
208  def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt),
209           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
210  def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt),
211           (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
212}
213
214// Splats for HvxV60
215def V60splatib: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 (SplatB $V)))>;
216def V60splatih: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 (SplatH $V)))>;
217def V60splatiw: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 $V))>;
218def V60splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>;
219def V60splatrh: OutPatFrag<(ops node:$Rs),
220                           (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>;
221def V60splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
222
223// Splats for HvxV62+
224def V62splatib: OutPatFrag<(ops node:$V),  (V6_lvsplatb (ToI32 $V))>;
225def V62splatih: OutPatFrag<(ops node:$V),  (V6_lvsplath (ToI32 $V))>;
226def V62splatiw: OutPatFrag<(ops node:$V),  (V6_lvsplatw (ToI32 $V))>;
227def V62splatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatb $Rs)>;
228def V62splatrh: OutPatFrag<(ops node:$Rs), (V6_lvsplath $Rs)>;
229def V62splatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
230
231def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>;
232
233let Predicates = [UseHVX,UseHVXV60] in {
234  let AddedComplexity = 10 in {
235    def: Pat<(VecI8   (splat_vector u8_0ImmPred:$V)),  (V60splatib $V)>;
236    def: Pat<(VecI16  (splat_vector u16_0ImmPred:$V)), (V60splatih $V)>;
237    def: Pat<(VecI32  (splat_vector anyimm:$V)),       (V60splatiw $V)>;
238    def: Pat<(VecPI8  (splat_vector u8_0ImmPred:$V)),  (Rep (V60splatib $V))>;
239    def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)), (Rep (V60splatih $V))>;
240    def: Pat<(VecPI32 (splat_vector anyimm:$V)),       (Rep (V60splatiw $V))>;
241  }
242  def: Pat<(VecI8   (splat_vector I32:$Rs)), (V60splatrb $Rs)>;
243  def: Pat<(VecI16  (splat_vector I32:$Rs)), (V60splatrh $Rs)>;
244  def: Pat<(VecI32  (splat_vector I32:$Rs)), (V60splatrw $Rs)>;
245  def: Pat<(VecPI8  (splat_vector I32:$Rs)), (Rep (V60splatrb $Rs))>;
246  def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V60splatrh $Rs))>;
247  def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V60splatrw $Rs))>;
248}
249let Predicates = [UseHVX,UseHVXV62] in {
250  let AddedComplexity = 30 in {
251    def: Pat<(VecI8   (splat_vector u8_0ImmPred:$V)),  (V62splatib imm:$V)>;
252    def: Pat<(VecI16  (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>;
253    def: Pat<(VecI32  (splat_vector anyimm:$V)),       (V62splatiw imm:$V)>;
254    def: Pat<(VecPI8  (splat_vector u8_0ImmPred:$V)),
255             (Rep (V62splatib imm:$V))>;
256    def: Pat<(VecPI16 (splat_vector u16_0ImmPred:$V)),
257             (Rep (V62splatih imm:$V))>;
258    def: Pat<(VecPI32 (splat_vector anyimm:$V)),
259             (Rep (V62splatiw imm:$V))>;
260  }
261  let AddedComplexity = 20 in {
262    def: Pat<(VecI8   (splat_vector I32:$Rs)), (V62splatrb $Rs)>;
263    def: Pat<(VecI16  (splat_vector I32:$Rs)), (V62splatrh $Rs)>;
264    def: Pat<(VecI32  (splat_vector I32:$Rs)), (V62splatrw $Rs)>;
265    def: Pat<(VecPI8  (splat_vector I32:$Rs)), (Rep (V62splatrb $Rs))>;
266    def: Pat<(VecPI16 (splat_vector I32:$Rs)), (Rep (V62splatrh $Rs))>;
267    def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>;
268  }
269}
270
271class Vneg1<ValueType VecTy>
272  : PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>;
273
274class Vnot<ValueType VecTy>
275  : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
276
277let Predicates = [UseHVX] in {
278  let AddedComplexity = 200 in {
279    def: Pat<(Vnot<VecI8>   HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
280    def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>;
281    def: Pat<(Vnot<VecI32> HVI32:$Vs), (V6_vnot HvxVR:$Vs)>;
282  }
283
284  def: OpR_RR_pat<V6_vaddb,    Add,   VecI8,  HVI8>;
285  def: OpR_RR_pat<V6_vaddh,    Add,  VecI16, HVI16>;
286  def: OpR_RR_pat<V6_vaddw,    Add,  VecI32, HVI32>;
287  def: OpR_RR_pat<V6_vaddb_dv, Add,  VecPI8,  HWI8>;
288  def: OpR_RR_pat<V6_vaddh_dv, Add, VecPI16, HWI16>;
289  def: OpR_RR_pat<V6_vaddw_dv, Add, VecPI32, HWI32>;
290  def: OpR_RR_pat<V6_vsubb,    Sub,   VecI8,  HVI8>;
291  def: OpR_RR_pat<V6_vsubh,    Sub,  VecI16, HVI16>;
292  def: OpR_RR_pat<V6_vsubw,    Sub,  VecI32, HVI32>;
293  def: OpR_RR_pat<V6_vsubb_dv, Sub,  VecPI8,  HWI8>;
294  def: OpR_RR_pat<V6_vsubh_dv, Sub, VecPI16, HWI16>;
295  def: OpR_RR_pat<V6_vsubw_dv, Sub, VecPI32, HWI32>;
296  def: OpR_RR_pat<V6_vand,     And,   VecI8,  HVI8>;
297  def: OpR_RR_pat<V6_vand,     And,  VecI16, HVI16>;
298  def: OpR_RR_pat<V6_vand,     And,  VecI32, HVI32>;
299  def: OpR_RR_pat<V6_vor,       Or,   VecI8,  HVI8>;
300  def: OpR_RR_pat<V6_vor,       Or,  VecI16, HVI16>;
301  def: OpR_RR_pat<V6_vor,       Or,  VecI32, HVI32>;
302  def: OpR_RR_pat<V6_vxor,     Xor,   VecI8,  HVI8>;
303  def: OpR_RR_pat<V6_vxor,     Xor,  VecI16, HVI16>;
304  def: OpR_RR_pat<V6_vxor,     Xor,  VecI32, HVI32>;
305
306  def: OpR_RR_pat<V6_vminb,   Smin,   VecI8,  HVI8>;
307  def: OpR_RR_pat<V6_vmaxb,   Smax,   VecI8,  HVI8>;
308  def: OpR_RR_pat<V6_vminub,  Umin,   VecI8,  HVI8>;
309  def: OpR_RR_pat<V6_vmaxub,  Umax,   VecI8,  HVI8>;
310  def: OpR_RR_pat<V6_vminh,   Smin,  VecI16, HVI16>;
311  def: OpR_RR_pat<V6_vmaxh,   Smax,  VecI16, HVI16>;
312  def: OpR_RR_pat<V6_vminuh,  Umin,  VecI16, HVI16>;
313  def: OpR_RR_pat<V6_vmaxuh,  Umax,  VecI16, HVI16>;
314  def: OpR_RR_pat<V6_vminw,   Smin,  VecI32, HVI32>;
315  def: OpR_RR_pat<V6_vmaxw,   Smax,  VecI32, HVI32>;
316
317  def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt),
318           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
319  def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt),
320           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
321  def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt),
322           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
323
324  def: Pat<(vselect (qnot HQ8:$Qu), HVI8:$Vs, HVI8:$Vt),
325           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
326  def: Pat<(vselect (qnot HQ16:$Qu), HVI16:$Vs, HVI16:$Vt),
327           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
328  def: Pat<(vselect (qnot HQ32:$Qu), HVI32:$Vs, HVI32:$Vt),
329           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
330}
331
332let Predicates = [UseHVX] in {
333  // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
334  // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
335  // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
336  def: Pat<(mul HVI8:$Vs, HVI8:$Vt),
337           (V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)),
338                        (LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>;
339  def: Pat<(mul HVI16:$Vs, HVI16:$Vt),
340           (V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>;
341  def: Pat<(mul HVI32:$Vs, HVI32:$Vt),
342           (V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt),
343                             HvxVR:$Vs, HvxVR:$Vt)>;
344}
345
346let Predicates = [UseHVX] in {
347  def: Pat<(VecPI16 (sext HVI8:$Vs)),  (VSxtb $Vs)>;
348  def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
349  def: Pat<(VecPI16 (zext HVI8:$Vs)),  (VZxtb $Vs)>;
350  def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>;
351
352  def: Pat<(VecI16 (sext_invec HVI8:$Vs)),  (LoVec (VSxtb $Vs))>;
353  def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>;
354  def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
355           (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
356  def: Pat<(VecPI16 (sext_invec HWI8:$Vss)),  (VSxtb (LoVec $Vss))>;
357  def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>;
358  def: Pat<(VecPI32 (sext_invec HWI8:$Vss)),
359           (VSxth (LoVec (VSxtb (LoVec $Vss))))>;
360
361  def: Pat<(VecI16 (zext_invec HVI8:$Vs)),  (LoVec (VZxtb $Vs))>;
362  def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>;
363  def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
364           (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
365  def: Pat<(VecPI16 (zext_invec HWI8:$Vss)),  (VZxtb (LoVec $Vss))>;
366  def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>;
367  def: Pat<(VecPI32 (zext_invec HWI8:$Vss)),
368           (VZxth (LoVec (VZxtb (LoVec $Vss))))>;
369
370  def: Pat<(VecI8 (trunc HWI16:$Vss)),
371           (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>;
372  def: Pat<(VecI16 (trunc HWI32:$Vss)),
373           (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>;
374
375  def: Pat<(VecQ8 (trunc HVI8:$Vs)),
376           (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
377  def: Pat<(VecQ16 (trunc HVI16:$Vs)),
378           (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
379  def: Pat<(VecQ32 (trunc HVI32:$Vs)),
380           (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
381}
382
383let Predicates = [UseHVX] in {
384  // The "source" types are not legal, and there are no parameterized
385  // definitions for them, but they are length-specific.
386  let Predicates = [UseHVX,UseHVX64B] in {
387    def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)),
388             (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
389    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)),
390             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
391    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)),
392             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
393  }
394  let Predicates = [UseHVX,UseHVX128B] in {
395    def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)),
396             (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
397    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)),
398             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
399    def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)),
400             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
401  }
402
403  // Take a pair of vectors Vt:Vs and shift them towards LSB by (Rt & HwLen).
404  def: Pat<(VecI8 (valign HVI8:$Vt, HVI8:$Vs, I32:$Rt)),
405           (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
406  def: Pat<(VecI16 (valign HVI16:$Vt, HVI16:$Vs, I32:$Rt)),
407           (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
408  def: Pat<(VecI32 (valign HVI32:$Vt, HVI32:$Vs, I32:$Rt)),
409           (LoVec (V6_valignb HvxVR:$Vt, HvxVR:$Vs, I32:$Rt))>;
410
411  def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt),
412           (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
413                       (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
414  def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt),
415           (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt),
416                       (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>;
417  def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt),
418           (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
419                       (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
420
421  def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>;
422  def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>;
423  def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>;
424  def: Pat<(HexagonVASR HVI32:$Vs, I32:$Rt), (V6_vasrw HvxVR:$Vs, I32:$Rt)>;
425  def: Pat<(HexagonVLSR HVI16:$Vs, I32:$Rt), (V6_vlsrh HvxVR:$Vs, I32:$Rt)>;
426  def: Pat<(HexagonVLSR HVI32:$Vs, I32:$Rt), (V6_vlsrw HvxVR:$Vs, I32:$Rt)>;
427
428  def: Pat<(add HVI32:$Vx, (HexagonVASL HVI32:$Vu, I32:$Rt)),
429           (V6_vaslw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>;
430  def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)),
431           (V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>;
432
433  def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>;
434  def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>;
435  def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>;
436  def: Pat<(sra HVI32:$Vs, HVI32:$Vt), (V6_vasrwv HvxVR:$Vs, HvxVR:$Vt)>;
437  def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>;
438  def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>;
439
440  // Vpackl is a pseudo-op that is used when legalizing widened truncates.
441  // It should never be produced with a register pair in the output, but
442  // it can happen to have a pair as an input.
443  def: Pat<(VecI8  (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>;
444  def: Pat<(VecI8  (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>;
445  def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>;
446  def: Pat<(VecI8  (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>;
447  def: Pat<(VecI8  (vpackl HWI32:$Vs)),
448           (V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>;
449  def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>;
450
451  def: Pat<(VecI16  (vunpack   HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
452  def: Pat<(VecI32  (vunpack   HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
453  def: Pat<(VecI32  (vunpack  HVI16:$Vs)), (LoVec (VSxth $Vs))>;
454  def: Pat<(VecPI16 (vunpack   HVI8:$Vs)), (VSxtb $Vs)>;
455  def: Pat<(VecPI32 (vunpack   HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>;
456  def: Pat<(VecPI32 (vunpack  HVI32:$Vs)), (VSxth $Vs)>;
457
458  def: Pat<(VecI16  (vunpacku  HVI8:$Vs)), (LoVec (VZxtb $Vs))>;
459  def: Pat<(VecI32  (vunpacku  HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
460  def: Pat<(VecI32  (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>;
461  def: Pat<(VecPI16 (vunpacku  HVI8:$Vs)), (VZxtb $Vs)>;
462  def: Pat<(VecPI32 (vunpacku  HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>;
463  def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>;
464
465  let Predicates = [UseHVX,UseHVXV60] in {
466    def: Pat<(VecI16 (bswap HVI16:$Vs)),
467             (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x01)))>;
468    def: Pat<(VecI32 (bswap HVI32:$Vs)),
469             (V6_vdelta HvxVR:$Vs, (V60splatib (i32 0x03)))>;
470  }
471  let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in {
472    def: Pat<(VecI16 (bswap HVI16:$Vs)),
473             (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x01)))>;
474    def: Pat<(VecI32 (bswap HVI32:$Vs)),
475             (V6_vdelta HvxVR:$Vs, (V62splatib (i32 0x03)))>;
476  }
477
478  def: Pat<(VecI8 (ctpop HVI8:$Vs)),
479           (V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))),
480                       (V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>;
481  def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>;
482  def: Pat<(VecI32 (ctpop HVI32:$Vs)),
483           (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))),
484                     (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>;
485
486  let Predicates = [UseHVX,UseHVXV60] in
487  def: Pat<(VecI8 (ctlz HVI8:$Vs)),
488           (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
489                                 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
490                     (V60splatib (i32 0x08)))>;
491  let Predicates = [UseHVX,UseHVXV62], AddedComplexity = 10 in
492  def: Pat<(VecI8 (ctlz HVI8:$Vs)),
493           (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
494                                 (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
495                     (V62splatib (i32 0x08)))>;
496
497  def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>;
498  def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>;
499}
500
501class HvxSel_pat<InstHexagon MI, PatFrag RegPred>
502  : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt),
503        (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>;
504
505let Predicates = [UseHVX] in {
506  def: HvxSel_pat<PS_vselect, HVI8>;
507  def: HvxSel_pat<PS_vselect, HVI16>;
508  def: HvxSel_pat<PS_vselect, HVI32>;
509  def: HvxSel_pat<PS_wselect, HWI8>;
510  def: HvxSel_pat<PS_wselect, HWI16>;
511  def: HvxSel_pat<PS_wselect, HWI32>;
512}
513
514let Predicates = [UseHVX] in {
515  def: Pat<(VecQ8   (qtrue)), (PS_qtrue)>;
516  def: Pat<(VecQ16  (qtrue)), (PS_qtrue)>;
517  def: Pat<(VecQ32  (qtrue)), (PS_qtrue)>;
518  def: Pat<(VecQ8  (qfalse)), (PS_qfalse)>;
519  def: Pat<(VecQ16 (qfalse)), (PS_qfalse)>;
520  def: Pat<(VecQ32 (qfalse)), (PS_qfalse)>;
521
522  def: Pat<(vnot  HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>;
523  def: Pat<(vnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>;
524  def: Pat<(vnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>;
525  def: Pat<(qnot  HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>;
526  def: Pat<(qnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>;
527  def: Pat<(qnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>;
528
529  def: OpR_RR_pat<V6_pred_and,         And,  VecQ8,   HQ8>;
530  def: OpR_RR_pat<V6_pred_and,         And, VecQ16,  HQ16>;
531  def: OpR_RR_pat<V6_pred_and,         And, VecQ32,  HQ32>;
532  def: OpR_RR_pat<V6_pred_or,           Or,  VecQ8,   HQ8>;
533  def: OpR_RR_pat<V6_pred_or,           Or, VecQ16,  HQ16>;
534  def: OpR_RR_pat<V6_pred_or,           Or, VecQ32,  HQ32>;
535  def: OpR_RR_pat<V6_pred_xor,         Xor,  VecQ8,   HQ8>;
536  def: OpR_RR_pat<V6_pred_xor,         Xor, VecQ16,  HQ16>;
537  def: OpR_RR_pat<V6_pred_xor,         Xor, VecQ32,  HQ32>;
538
539  def: OpR_RR_pat<V6_pred_and_n, Not2<And>,  VecQ8,   HQ8>;
540  def: OpR_RR_pat<V6_pred_and_n, Not2<And>, VecQ16,  HQ16>;
541  def: OpR_RR_pat<V6_pred_and_n, Not2<And>, VecQ32,  HQ32>;
542  def: OpR_RR_pat<V6_pred_or_n,   Not2<Or>,  VecQ8,   HQ8>;
543  def: OpR_RR_pat<V6_pred_or_n,   Not2<Or>, VecQ16,  HQ16>;
544  def: OpR_RR_pat<V6_pred_or_n,   Not2<Or>, VecQ32,  HQ32>;
545
546  def: OpR_RR_pat<V6_veqb,              seteq,  VecQ8,  HVI8>;
547  def: OpR_RR_pat<V6_veqh,              seteq, VecQ16, HVI16>;
548  def: OpR_RR_pat<V6_veqw,              seteq, VecQ32, HVI32>;
549  def: OpR_RR_pat<V6_vgtb,              setgt,  VecQ8,  HVI8>;
550  def: OpR_RR_pat<V6_vgth,              setgt, VecQ16, HVI16>;
551  def: OpR_RR_pat<V6_vgtw,              setgt, VecQ32, HVI32>;
552  def: OpR_RR_pat<V6_vgtub,            setugt,  VecQ8,  HVI8>;
553  def: OpR_RR_pat<V6_vgtuh,            setugt, VecQ16, HVI16>;
554  def: OpR_RR_pat<V6_vgtuw,            setugt, VecQ32, HVI32>;
555
556  def: AccRRR_pat<V6_veqb_and,    And,  seteq,    HQ8,  HVI8,  HVI8>;
557  def: AccRRR_pat<V6_veqb_or,      Or,  seteq,    HQ8,  HVI8,  HVI8>;
558  def: AccRRR_pat<V6_veqb_xor,    Xor,  seteq,    HQ8,  HVI8,  HVI8>;
559  def: AccRRR_pat<V6_veqh_and,    And,  seteq,   HQ16, HVI16, HVI16>;
560  def: AccRRR_pat<V6_veqh_or,      Or,  seteq,   HQ16, HVI16, HVI16>;
561  def: AccRRR_pat<V6_veqh_xor,    Xor,  seteq,   HQ16, HVI16, HVI16>;
562  def: AccRRR_pat<V6_veqw_and,    And,  seteq,   HQ32, HVI32, HVI32>;
563  def: AccRRR_pat<V6_veqw_or,      Or,  seteq,   HQ32, HVI32, HVI32>;
564  def: AccRRR_pat<V6_veqw_xor,    Xor,  seteq,   HQ32, HVI32, HVI32>;
565
566  def: AccRRR_pat<V6_vgtb_and,    And,  setgt,    HQ8,  HVI8,  HVI8>;
567  def: AccRRR_pat<V6_vgtb_or,      Or,  setgt,    HQ8,  HVI8,  HVI8>;
568  def: AccRRR_pat<V6_vgtb_xor,    Xor,  setgt,    HQ8,  HVI8,  HVI8>;
569  def: AccRRR_pat<V6_vgth_and,    And,  setgt,   HQ16, HVI16, HVI16>;
570  def: AccRRR_pat<V6_vgth_or,      Or,  setgt,   HQ16, HVI16, HVI16>;
571  def: AccRRR_pat<V6_vgth_xor,    Xor,  setgt,   HQ16, HVI16, HVI16>;
572  def: AccRRR_pat<V6_vgtw_and,    And,  setgt,   HQ32, HVI32, HVI32>;
573  def: AccRRR_pat<V6_vgtw_or,      Or,  setgt,   HQ32, HVI32, HVI32>;
574  def: AccRRR_pat<V6_vgtw_xor,    Xor,  setgt,   HQ32, HVI32, HVI32>;
575
576  def: AccRRR_pat<V6_vgtub_and,   And, setugt,    HQ8,  HVI8,  HVI8>;
577  def: AccRRR_pat<V6_vgtub_or,     Or, setugt,    HQ8,  HVI8,  HVI8>;
578  def: AccRRR_pat<V6_vgtub_xor,   Xor, setugt,    HQ8,  HVI8,  HVI8>;
579  def: AccRRR_pat<V6_vgtuh_and,   And, setugt,   HQ16, HVI16, HVI16>;
580  def: AccRRR_pat<V6_vgtuh_or,     Or, setugt,   HQ16, HVI16, HVI16>;
581  def: AccRRR_pat<V6_vgtuh_xor,   Xor, setugt,   HQ16, HVI16, HVI16>;
582  def: AccRRR_pat<V6_vgtuw_and,   And, setugt,   HQ32, HVI32, HVI32>;
583  def: AccRRR_pat<V6_vgtuw_or,     Or, setugt,   HQ32, HVI32, HVI32>;
584  def: AccRRR_pat<V6_vgtuw_xor,   Xor, setugt,   HQ32, HVI32, HVI32>;
585}
586