1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HexagonISelLowering.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/Analysis/MemoryLocation.h"
13 #include "llvm/IR/IntrinsicsHexagon.h"
14 #include "llvm/Support/CommandLine.h"
15 
16 using namespace llvm;
17 
18 static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
19   cl::Hidden, cl::init(16),
20   cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
21 
22 static const MVT LegalV64[] =  { MVT::v64i8,  MVT::v32i16,  MVT::v16i32 };
23 static const MVT LegalW64[] =  { MVT::v128i8, MVT::v64i16,  MVT::v32i32 };
24 static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16,  MVT::v32i32 };
25 static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
26 
27 void
28 HexagonTargetLowering::initializeHVXLowering() {
29   if (Subtarget.useHVX64BOps()) {
30     addRegisterClass(MVT::v64i8,  &Hexagon::HvxVRRegClass);
31     addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
32     addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
33     addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
34     addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
35     addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
36     // These "short" boolean vector types should be legal because
37     // they will appear as results of vector compares. If they were
38     // not legal, type legalization would try to make them legal
39     // and that would require using operations that do not use or
40     // produce such types. That, in turn, would imply using custom
41     // nodes, which would be unoptimizable by the DAG combiner.
42     // The idea is to rely on target-independent operations as much
43     // as possible.
44     addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
45     addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
46     addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
47   } else if (Subtarget.useHVX128BOps()) {
48     addRegisterClass(MVT::v128i8,  &Hexagon::HvxVRRegClass);
49     addRegisterClass(MVT::v64i16,  &Hexagon::HvxVRRegClass);
50     addRegisterClass(MVT::v32i32,  &Hexagon::HvxVRRegClass);
51     addRegisterClass(MVT::v256i8,  &Hexagon::HvxWRRegClass);
52     addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
53     addRegisterClass(MVT::v64i32,  &Hexagon::HvxWRRegClass);
54     addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
55     addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
56     addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
57     if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
58       addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
59       addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
60       addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
61       addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
62     }
63   }
64 
65   // Set up operation actions.
66 
67   bool Use64b = Subtarget.useHVX64BOps();
68   ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
69   ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
70   MVT ByteV = Use64b ?  MVT::v64i8 : MVT::v128i8;
71   MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
72 
73   auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
74     setOperationAction(Opc, FromTy, Promote);
75     AddPromotedToType(Opc, FromTy, ToTy);
76   };
77 
78   // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
79   // Note: v16i1 -> i16 is handled in type legalization instead of op
80   // legalization.
81   setOperationAction(ISD::BITCAST,              MVT::i16, Custom);
82   setOperationAction(ISD::BITCAST,              MVT::i32, Custom);
83   setOperationAction(ISD::BITCAST,              MVT::i64, Custom);
84   setOperationAction(ISD::BITCAST,            MVT::v16i1, Custom);
85   setOperationAction(ISD::BITCAST,           MVT::v128i1, Custom);
86   setOperationAction(ISD::BITCAST,             MVT::i128, Custom);
87   setOperationAction(ISD::VECTOR_SHUFFLE,          ByteV, Legal);
88   setOperationAction(ISD::VECTOR_SHUFFLE,          ByteW, Legal);
89   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
90 
91   if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
92       Subtarget.useHVXFloatingPoint()) {
93 
94     static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
95     static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
96 
97     for (MVT T : FloatV) {
98       setOperationAction(ISD::FADD,              T, Legal);
99       setOperationAction(ISD::FSUB,              T, Legal);
100       setOperationAction(ISD::FMUL,              T, Legal);
101       setOperationAction(ISD::FMINNUM,           T, Legal);
102       setOperationAction(ISD::FMAXNUM,           T, Legal);
103 
104       setOperationAction(ISD::INSERT_SUBVECTOR,  T, Custom);
105       setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
106 
107       setOperationAction(ISD::SPLAT_VECTOR,      T, Legal);
108       setOperationAction(ISD::SPLAT_VECTOR,      T, Legal);
109 
110       setOperationAction(ISD::MLOAD,             T, Custom);
111       setOperationAction(ISD::MSTORE,            T, Custom);
112       // Custom-lower BUILD_VECTOR. The standard (target-independent)
113       // handling of it would convert it to a load, which is not always
114       // the optimal choice.
115       setOperationAction(ISD::BUILD_VECTOR,      T, Custom);
116     }
117 
118 
119     // BUILD_VECTOR with f16 operands cannot be promoted without
120     // promoting the result, so lower the node to vsplat or constant pool
121     setOperationAction(ISD::BUILD_VECTOR,      MVT::f16, Custom);
122     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom);
123     setOperationAction(ISD::SPLAT_VECTOR,      MVT::f16, Custom);
124 
125     // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
126     // generated.
127     setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
128     setPromoteTo(ISD::VECTOR_SHUFFLE,  MVT::v64f16, ByteV);
129     setPromoteTo(ISD::VECTOR_SHUFFLE,  MVT::v64f32, ByteW);
130     setPromoteTo(ISD::VECTOR_SHUFFLE,  MVT::v32f32, ByteV);
131 
132     for (MVT P : FloatW) {
133       setOperationAction(ISD::LOAD,           P, Custom);
134       setOperationAction(ISD::STORE,          P, Custom);
135       setOperationAction(ISD::FADD,           P, Custom);
136       setOperationAction(ISD::FSUB,           P, Custom);
137       setOperationAction(ISD::FMUL,           P, Custom);
138       setOperationAction(ISD::FMINNUM,        P, Custom);
139       setOperationAction(ISD::FMAXNUM,        P, Custom);
140       setOperationAction(ISD::VSELECT,        P, Custom);
141 
142       // Custom-lower BUILD_VECTOR. The standard (target-independent)
143       // handling of it would convert it to a load, which is not always
144       // the optimal choice.
145       setOperationAction(ISD::BUILD_VECTOR,   P, Custom);
146       // Make concat-vectors custom to handle concats of more than 2 vectors.
147       setOperationAction(ISD::CONCAT_VECTORS, P, Custom);
148 
149       setOperationAction(ISD::MLOAD,          P, Custom);
150       setOperationAction(ISD::MSTORE,         P, Custom);
151     }
152 
153     if (Subtarget.useHVXQFloatOps()) {
154       setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
155       setOperationAction(ISD::FP_ROUND,  MVT::v64f16, Legal);
156     } else if (Subtarget.useHVXIEEEFPOps()) {
157       setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
158       setOperationAction(ISD::FP_ROUND,  MVT::v64f16, Legal);
159     }
160   }
161 
162   for (MVT T : LegalV) {
163     setIndexedLoadAction(ISD::POST_INC,  T, Legal);
164     setIndexedStoreAction(ISD::POST_INC, T, Legal);
165 
166     setOperationAction(ISD::AND,            T, Legal);
167     setOperationAction(ISD::OR,             T, Legal);
168     setOperationAction(ISD::XOR,            T, Legal);
169     setOperationAction(ISD::ADD,            T, Legal);
170     setOperationAction(ISD::SUB,            T, Legal);
171     setOperationAction(ISD::MUL,            T, Legal);
172     setOperationAction(ISD::CTPOP,          T, Legal);
173     setOperationAction(ISD::CTLZ,           T, Legal);
174     setOperationAction(ISD::SELECT,         T, Legal);
175     setOperationAction(ISD::SPLAT_VECTOR,   T, Legal);
176     if (T != ByteV) {
177       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
178       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
179       setOperationAction(ISD::BSWAP,                    T, Legal);
180     }
181 
182     setOperationAction(ISD::SMIN,           T, Legal);
183     setOperationAction(ISD::SMAX,           T, Legal);
184     if (T.getScalarType() != MVT::i32) {
185       setOperationAction(ISD::UMIN,         T, Legal);
186       setOperationAction(ISD::UMAX,         T, Legal);
187     }
188 
189     setOperationAction(ISD::CTTZ,               T, Custom);
190     setOperationAction(ISD::LOAD,               T, Custom);
191     setOperationAction(ISD::MLOAD,              T, Custom);
192     setOperationAction(ISD::MSTORE,             T, Custom);
193     setOperationAction(ISD::MULHS,              T, Custom);
194     setOperationAction(ISD::MULHU,              T, Custom);
195     setOperationAction(ISD::BUILD_VECTOR,       T, Custom);
196     // Make concat-vectors custom to handle concats of more than 2 vectors.
197     setOperationAction(ISD::CONCAT_VECTORS,     T, Custom);
198     setOperationAction(ISD::INSERT_SUBVECTOR,   T, Custom);
199     setOperationAction(ISD::INSERT_VECTOR_ELT,  T, Custom);
200     setOperationAction(ISD::EXTRACT_SUBVECTOR,  T, Custom);
201     setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
202     setOperationAction(ISD::ANY_EXTEND,         T, Custom);
203     setOperationAction(ISD::SIGN_EXTEND,        T, Custom);
204     setOperationAction(ISD::ZERO_EXTEND,        T, Custom);
205     if (T != ByteV) {
206       setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
207       // HVX only has shifts of words and halfwords.
208       setOperationAction(ISD::SRA,                     T, Custom);
209       setOperationAction(ISD::SHL,                     T, Custom);
210       setOperationAction(ISD::SRL,                     T, Custom);
211 
212       // Promote all shuffles to operate on vectors of bytes.
213       setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
214     }
215 
216     if (Subtarget.useHVXQFloatOps()) {
217       setOperationAction(ISD::SINT_TO_FP, T, Expand);
218       setOperationAction(ISD::UINT_TO_FP, T, Expand);
219       setOperationAction(ISD::FP_TO_SINT, T, Expand);
220       setOperationAction(ISD::FP_TO_UINT, T, Expand);
221     } else if (Subtarget.useHVXIEEEFPOps()) {
222       setOperationAction(ISD::SINT_TO_FP, T, Custom);
223       setOperationAction(ISD::UINT_TO_FP, T, Custom);
224       setOperationAction(ISD::FP_TO_SINT, T, Custom);
225       setOperationAction(ISD::FP_TO_UINT, T, Custom);
226     }
227 
228     setCondCodeAction(ISD::SETNE,  T, Expand);
229     setCondCodeAction(ISD::SETLE,  T, Expand);
230     setCondCodeAction(ISD::SETGE,  T, Expand);
231     setCondCodeAction(ISD::SETLT,  T, Expand);
232     setCondCodeAction(ISD::SETULE, T, Expand);
233     setCondCodeAction(ISD::SETUGE, T, Expand);
234     setCondCodeAction(ISD::SETULT, T, Expand);
235   }
236 
237   for (MVT T : LegalW) {
238     // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
239     // independent) handling of it would convert it to a load, which is
240     // not always the optimal choice.
241     setOperationAction(ISD::BUILD_VECTOR,   T, Custom);
242     // Make concat-vectors custom to handle concats of more than 2 vectors.
243     setOperationAction(ISD::CONCAT_VECTORS, T, Custom);
244 
245     // Custom-lower these operations for pairs. Expand them into a concat
246     // of the corresponding operations on individual vectors.
247     setOperationAction(ISD::ANY_EXTEND,               T, Custom);
248     setOperationAction(ISD::SIGN_EXTEND,              T, Custom);
249     setOperationAction(ISD::ZERO_EXTEND,              T, Custom);
250     setOperationAction(ISD::SIGN_EXTEND_INREG,        T, Custom);
251     setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG,  T, Custom);
252     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
253     setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
254     setOperationAction(ISD::SPLAT_VECTOR,             T, Custom);
255 
256     setOperationAction(ISD::LOAD,     T, Custom);
257     setOperationAction(ISD::STORE,    T, Custom);
258     setOperationAction(ISD::MLOAD,    T, Custom);
259     setOperationAction(ISD::MSTORE,   T, Custom);
260     setOperationAction(ISD::CTLZ,     T, Custom);
261     setOperationAction(ISD::CTTZ,     T, Custom);
262     setOperationAction(ISD::CTPOP,    T, Custom);
263 
264     setOperationAction(ISD::ADD,      T, Legal);
265     setOperationAction(ISD::SUB,      T, Legal);
266     setOperationAction(ISD::MUL,      T, Custom);
267     setOperationAction(ISD::MULHS,    T, Custom);
268     setOperationAction(ISD::MULHU,    T, Custom);
269     setOperationAction(ISD::AND,      T, Custom);
270     setOperationAction(ISD::OR,       T, Custom);
271     setOperationAction(ISD::XOR,      T, Custom);
272     setOperationAction(ISD::SETCC,    T, Custom);
273     setOperationAction(ISD::VSELECT,  T, Custom);
274     if (T != ByteW) {
275       setOperationAction(ISD::SRA,      T, Custom);
276       setOperationAction(ISD::SHL,      T, Custom);
277       setOperationAction(ISD::SRL,      T, Custom);
278 
279       // Promote all shuffles to operate on vectors of bytes.
280       setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
281     }
282 
283     setOperationAction(ISD::SMIN,     T, Custom);
284     setOperationAction(ISD::SMAX,     T, Custom);
285     if (T.getScalarType() != MVT::i32) {
286       setOperationAction(ISD::UMIN,   T, Custom);
287       setOperationAction(ISD::UMAX,   T, Custom);
288     }
289 
290     setOperationAction(ISD::SINT_TO_FP, T, Custom);
291     setOperationAction(ISD::UINT_TO_FP, T, Custom);
292     setOperationAction(ISD::FP_TO_SINT, T, Custom);
293     setOperationAction(ISD::FP_TO_UINT, T, Custom);
294   }
295 
296   setCondCodeAction(ISD::SETNE,  MVT::v64f16, Expand);
297   setCondCodeAction(ISD::SETLE,  MVT::v64f16, Expand);
298   setCondCodeAction(ISD::SETGE,  MVT::v64f16, Expand);
299   setCondCodeAction(ISD::SETLT,  MVT::v64f16, Expand);
300   setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
301   setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
302   setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
303   setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
304   setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
305   setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
306   setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
307   setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
308 
309   setCondCodeAction(ISD::SETNE,  MVT::v32f32, Expand);
310   setCondCodeAction(ISD::SETLE,  MVT::v32f32, Expand);
311   setCondCodeAction(ISD::SETGE,  MVT::v32f32, Expand);
312   setCondCodeAction(ISD::SETLT,  MVT::v32f32, Expand);
313   setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
314   setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
315   setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
316   setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
317   setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
318   setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
319   setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
320   setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
321 
322   // Boolean vectors.
323 
324   for (MVT T : LegalW) {
325     // Boolean types for vector pairs will overlap with the boolean
326     // types for single vectors, e.g.
327     //   v64i8  -> v64i1 (single)
328     //   v64i16 -> v64i1 (pair)
329     // Set these actions first, and allow the single actions to overwrite
330     // any duplicates.
331     MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
332     setOperationAction(ISD::SETCC,              BoolW, Custom);
333     setOperationAction(ISD::AND,                BoolW, Custom);
334     setOperationAction(ISD::OR,                 BoolW, Custom);
335     setOperationAction(ISD::XOR,                BoolW, Custom);
336     // Masked load/store takes a mask that may need splitting.
337     setOperationAction(ISD::MLOAD,              BoolW, Custom);
338     setOperationAction(ISD::MSTORE,             BoolW, Custom);
339   }
340 
341   for (MVT T : LegalV) {
342     MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
343     setOperationAction(ISD::BUILD_VECTOR,       BoolV, Custom);
344     setOperationAction(ISD::CONCAT_VECTORS,     BoolV, Custom);
345     setOperationAction(ISD::INSERT_SUBVECTOR,   BoolV, Custom);
346     setOperationAction(ISD::INSERT_VECTOR_ELT,  BoolV, Custom);
347     setOperationAction(ISD::EXTRACT_SUBVECTOR,  BoolV, Custom);
348     setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom);
349     setOperationAction(ISD::SELECT,             BoolV, Custom);
350     setOperationAction(ISD::AND,                BoolV, Legal);
351     setOperationAction(ISD::OR,                 BoolV, Legal);
352     setOperationAction(ISD::XOR,                BoolV, Legal);
353   }
354 
355   if (Use64b) {
356     for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
357       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
358   } else {
359     for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
360       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
361   }
362 
363   // Handle store widening for short vectors.
364   unsigned HwLen = Subtarget.getVectorLength();
365   for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
366     if (ElemTy == MVT::i1)
367       continue;
368     int ElemWidth = ElemTy.getFixedSizeInBits();
369     int MaxElems = (8*HwLen) / ElemWidth;
370     for (int N = 2; N < MaxElems; N *= 2) {
371       MVT VecTy = MVT::getVectorVT(ElemTy, N);
372       auto Action = getPreferredVectorAction(VecTy);
373       if (Action == TargetLoweringBase::TypeWidenVector) {
374         setOperationAction(ISD::LOAD,         VecTy, Custom);
375         setOperationAction(ISD::STORE,        VecTy, Custom);
376         setOperationAction(ISD::SETCC,        VecTy, Custom);
377         setOperationAction(ISD::TRUNCATE,     VecTy, Custom);
378         setOperationAction(ISD::ANY_EXTEND,   VecTy, Custom);
379         setOperationAction(ISD::SIGN_EXTEND,  VecTy, Custom);
380         setOperationAction(ISD::ZERO_EXTEND,  VecTy, Custom);
381 
382         MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
383         if (!isTypeLegal(BoolTy))
384           setOperationAction(ISD::SETCC, BoolTy, Custom);
385       }
386     }
387   }
388 
389   setTargetDAGCombine({ISD::SPLAT_VECTOR, ISD::VSELECT});
390 }
391 
392 unsigned
393 HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
394   MVT ElemTy = VecTy.getVectorElementType();
395   unsigned VecLen = VecTy.getVectorNumElements();
396   unsigned HwLen = Subtarget.getVectorLength();
397 
398   // Split vectors of i1 that exceed byte vector length.
399   if (ElemTy == MVT::i1 && VecLen > HwLen)
400     return TargetLoweringBase::TypeSplitVector;
401 
402   ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
403   // For shorter vectors of i1, widen them if any of the corresponding
404   // vectors of integers needs to be widened.
405   if (ElemTy == MVT::i1) {
406     for (MVT T : Tys) {
407       assert(T != MVT::i1);
408       auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
409       if (A != ~0u)
410         return A;
411     }
412     return ~0u;
413   }
414 
415   // If the size of VecTy is at least half of the vector length,
416   // widen the vector. Note: the threshold was not selected in
417   // any scientific way.
418   if (llvm::is_contained(Tys, ElemTy)) {
419     unsigned VecWidth = VecTy.getSizeInBits();
420     bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
421     if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
422       return TargetLoweringBase::TypeWidenVector;
423     unsigned HwWidth = 8*HwLen;
424     if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
425       return TargetLoweringBase::TypeWidenVector;
426   }
427 
428   // Defer to default.
429   return ~0u;
430 }
431 
432 SDValue
433 HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
434                               const SDLoc &dl, SelectionDAG &DAG) const {
435   SmallVector<SDValue,4> IntOps;
436   IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
437   append_range(IntOps, Ops);
438   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
439 }
440 
441 MVT
442 HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
443   assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
444 
445   MVT ElemTy = Tys.first.getVectorElementType();
446   return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
447                                   Tys.second.getVectorNumElements());
448 }
449 
450 HexagonTargetLowering::TypePair
451 HexagonTargetLowering::typeSplit(MVT VecTy) const {
452   assert(VecTy.isVector());
453   unsigned NumElem = VecTy.getVectorNumElements();
454   assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
455   MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
456   return { HalfTy, HalfTy };
457 }
458 
459 MVT
460 HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
461   MVT ElemTy = VecTy.getVectorElementType();
462   MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
463   return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
464 }
465 
466 MVT
467 HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
468   MVT ElemTy = VecTy.getVectorElementType();
469   MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
470   return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
471 }
472 
473 SDValue
474 HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
475                                   SelectionDAG &DAG) const {
476   if (ty(Vec).getVectorElementType() == ElemTy)
477     return Vec;
478   MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
479   return DAG.getBitcast(CastTy, Vec);
480 }
481 
482 SDValue
483 HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
484                               SelectionDAG &DAG) const {
485   return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
486                      Ops.second, Ops.first);
487 }
488 
489 HexagonTargetLowering::VectorPair
490 HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
491                                SelectionDAG &DAG) const {
492   TypePair Tys = typeSplit(ty(Vec));
493   if (Vec.getOpcode() == HexagonISD::QCAT)
494     return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
495   return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
496 }
497 
498 bool
499 HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
500   return Subtarget.isHVXVectorType(Ty) &&
501          Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
502 }
503 
504 bool
505 HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
506   return Subtarget.isHVXVectorType(Ty) &&
507          Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
508 }
509 
510 bool
511 HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
512   return Subtarget.isHVXVectorType(Ty, true) &&
513          Ty.getVectorElementType() == MVT::i1;
514 }
515 
516 bool HexagonTargetLowering::allowsHvxMemoryAccess(
517     MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
518   // Bool vectors are excluded by default, but make it explicit to
519   // emphasize that bool vectors cannot be loaded or stored.
520   // Also, disallow double vector stores (to prevent unnecessary
521   // store widening in DAG combiner).
522   if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
523     return false;
524   if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
525     return false;
526   if (Fast)
527     *Fast = true;
528   return true;
529 }
530 
531 bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
532     MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
533   if (!Subtarget.isHVXVectorType(VecTy))
534     return false;
535   // XXX Should this be false?  vmemu are a bit slower than vmem.
536   if (Fast)
537     *Fast = true;
538   return true;
539 }
540 
541 SDValue
542 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
543                                           SelectionDAG &DAG) const {
544   if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
545     ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
546 
547   unsigned ElemWidth = ElemTy.getSizeInBits();
548   if (ElemWidth == 8)
549     return ElemIdx;
550 
551   unsigned L = Log2_32(ElemWidth/8);
552   const SDLoc &dl(ElemIdx);
553   return DAG.getNode(ISD::SHL, dl, MVT::i32,
554                      {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
555 }
556 
557 SDValue
558 HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
559                                         SelectionDAG &DAG) const {
560   unsigned ElemWidth = ElemTy.getSizeInBits();
561   assert(ElemWidth >= 8 && ElemWidth <= 32);
562   if (ElemWidth == 32)
563     return Idx;
564 
565   if (ty(Idx) != MVT::i32)
566     Idx = DAG.getBitcast(MVT::i32, Idx);
567   const SDLoc &dl(Idx);
568   SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
569   SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
570   return SubIdx;
571 }
572 
573 SDValue
574 HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
575                                       SDValue Op1, ArrayRef<int> Mask,
576                                       SelectionDAG &DAG) const {
577   MVT OpTy = ty(Op0);
578   assert(OpTy == ty(Op1));
579 
580   MVT ElemTy = OpTy.getVectorElementType();
581   if (ElemTy == MVT::i8)
582     return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
583   assert(ElemTy.getSizeInBits() >= 8);
584 
585   MVT ResTy = tyVector(OpTy, MVT::i8);
586   unsigned ElemSize = ElemTy.getSizeInBits() / 8;
587 
588   SmallVector<int,128> ByteMask;
589   for (int M : Mask) {
590     if (M < 0) {
591       for (unsigned I = 0; I != ElemSize; ++I)
592         ByteMask.push_back(-1);
593     } else {
594       int NewM = M*ElemSize;
595       for (unsigned I = 0; I != ElemSize; ++I)
596         ByteMask.push_back(NewM+I);
597     }
598   }
599   assert(ResTy.getVectorNumElements() == ByteMask.size());
600   return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
601                               opCastElem(Op1, MVT::i8, DAG), ByteMask);
602 }
603 
604 SDValue
605 HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
606                                          const SDLoc &dl, MVT VecTy,
607                                          SelectionDAG &DAG) const {
608   unsigned VecLen = Values.size();
609   MachineFunction &MF = DAG.getMachineFunction();
610   MVT ElemTy = VecTy.getVectorElementType();
611   unsigned ElemWidth = ElemTy.getSizeInBits();
612   unsigned HwLen = Subtarget.getVectorLength();
613 
614   unsigned ElemSize = ElemWidth / 8;
615   assert(ElemSize*VecLen == HwLen);
616   SmallVector<SDValue,32> Words;
617 
618   if (VecTy.getVectorElementType() != MVT::i32 &&
619       !(Subtarget.useHVXFloatingPoint() &&
620       VecTy.getVectorElementType() == MVT::f32)) {
621     assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
622     unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
623     MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
624     for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
625       SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
626       Words.push_back(DAG.getBitcast(MVT::i32, W));
627     }
628   } else {
629     for (SDValue V : Values)
630       Words.push_back(DAG.getBitcast(MVT::i32, V));
631   }
632   auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
633     unsigned NumValues = Values.size();
634     assert(NumValues > 0);
635     bool IsUndef = true;
636     for (unsigned i = 0; i != NumValues; ++i) {
637       if (Values[i].isUndef())
638         continue;
639       IsUndef = false;
640       if (!SplatV.getNode())
641         SplatV = Values[i];
642       else if (SplatV != Values[i])
643         return false;
644     }
645     if (IsUndef)
646       SplatV = Values[0];
647     return true;
648   };
649 
650   unsigned NumWords = Words.size();
651   SDValue SplatV;
652   bool IsSplat = isSplat(Words, SplatV);
653   if (IsSplat && isUndef(SplatV))
654     return DAG.getUNDEF(VecTy);
655   if (IsSplat) {
656     assert(SplatV.getNode());
657     auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode());
658     if (IdxN && IdxN->isZero())
659       return getZero(dl, VecTy, DAG);
660     MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
661     SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
662     return DAG.getBitcast(VecTy, S);
663   }
664 
665   // Delay recognizing constant vectors until here, so that we can generate
666   // a vsplat.
667   SmallVector<ConstantInt*, 128> Consts(VecLen);
668   bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
669   if (AllConst) {
670     ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
671                             (Constant**)Consts.end());
672     Constant *CV = ConstantVector::get(Tmp);
673     Align Alignment(HwLen);
674     SDValue CP =
675         LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
676     return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
677                        MachinePointerInfo::getConstantPool(MF), Alignment);
678   }
679 
680   // A special case is a situation where the vector is built entirely from
681   // elements extracted from another vector. This could be done via a shuffle
682   // more efficiently, but typically, the size of the source vector will not
683   // match the size of the vector being built (which precludes the use of a
684   // shuffle directly).
685   // This only handles a single source vector, and the vector being built
686   // should be of a sub-vector type of the source vector type.
687   auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
688                                              SmallVectorImpl<int> &SrcIdx) {
689     SDValue Vec;
690     for (SDValue V : Values) {
691       if (isUndef(V)) {
692         SrcIdx.push_back(-1);
693         continue;
694       }
695       if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
696         return false;
697       // All extracts should come from the same vector.
698       SDValue T = V.getOperand(0);
699       if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
700         return false;
701       Vec = T;
702       ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
703       if (C == nullptr)
704         return false;
705       int I = C->getSExtValue();
706       assert(I >= 0 && "Negative element index");
707       SrcIdx.push_back(I);
708     }
709     SrcVec = Vec;
710     return true;
711   };
712 
713   SmallVector<int,128> ExtIdx;
714   SDValue ExtVec;
715   if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
716     MVT ExtTy = ty(ExtVec);
717     unsigned ExtLen = ExtTy.getVectorNumElements();
718     if (ExtLen == VecLen || ExtLen == 2*VecLen) {
719       // Construct a new shuffle mask that will produce a vector with the same
720       // number of elements as the input vector, and such that the vector we
721       // want will be the initial subvector of it.
722       SmallVector<int,128> Mask;
723       BitVector Used(ExtLen);
724 
725       for (int M : ExtIdx) {
726         Mask.push_back(M);
727         if (M >= 0)
728           Used.set(M);
729       }
730       // Fill the rest of the mask with the unused elements of ExtVec in hopes
731       // that it will result in a permutation of ExtVec's elements. It's still
732       // fine if it doesn't (e.g. if undefs are present, or elements are
733       // repeated), but permutations can always be done efficiently via vdelta
734       // and vrdelta.
735       for (unsigned I = 0; I != ExtLen; ++I) {
736         if (Mask.size() == ExtLen)
737           break;
738         if (!Used.test(I))
739           Mask.push_back(I);
740       }
741 
742       SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
743                                        DAG.getUNDEF(ExtTy), Mask);
744       if (ExtLen == VecLen)
745         return S;
746       return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S);
747     }
748   }
749 
750   // Find most common element to initialize vector with. This is to avoid
751   // unnecessary vinsert/valign for cases where the same value is present
752   // many times. Creates a histogram of the vector's elements to find the
753   // most common element n.
754   assert(4*Words.size() == Subtarget.getVectorLength());
755   int VecHist[32];
756   int n = 0;
757   for (unsigned i = 0; i != NumWords; ++i) {
758     VecHist[i] = 0;
759     if (Words[i].isUndef())
760       continue;
761     for (unsigned j = i; j != NumWords; ++j)
762       if (Words[i] == Words[j])
763         VecHist[i]++;
764 
765     if (VecHist[i] > VecHist[n])
766       n = i;
767   }
768 
769   SDValue HalfV = getZero(dl, VecTy, DAG);
770   if (VecHist[n] > 1) {
771     SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
772     HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
773                        {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
774   }
775   SDValue HalfV0 = HalfV;
776   SDValue HalfV1 = HalfV;
777 
778   // Construct two halves in parallel, then or them together. Rn and Rm count
779   // number of rotations needed before the next element. One last rotation is
780   // performed post-loop to position the last element.
781   int Rn = 0, Rm = 0;
782   SDValue Sn, Sm;
783   SDValue N = HalfV0;
784   SDValue M = HalfV1;
785   for (unsigned i = 0; i != NumWords/2; ++i) {
786     // Rotate by element count since last insertion.
787     if (Words[i] != Words[n] || VecHist[n] <= 1) {
788       Sn = DAG.getConstant(Rn, dl, MVT::i32);
789       HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
790       N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
791                       {HalfV0, Words[i]});
792       Rn = 0;
793     }
794     if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
795       Sm = DAG.getConstant(Rm, dl, MVT::i32);
796       HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
797       M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
798                       {HalfV1, Words[i+NumWords/2]});
799       Rm = 0;
800     }
801     Rn += 4;
802     Rm += 4;
803   }
804   // Perform last rotation.
805   Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
806   Sm = DAG.getConstant(Rm, dl, MVT::i32);
807   HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
808   HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
809 
810   SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
811   SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
812 
813   SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
814 
815   SDValue OutV =
816       DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
817   return OutV;
818 }
819 
820 SDValue
821 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
822       unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
823   MVT PredTy = ty(PredV);
824   unsigned HwLen = Subtarget.getVectorLength();
825   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
826 
827   if (Subtarget.isHVXVectorType(PredTy, true)) {
828     // Move the vector predicate SubV to a vector register, and scale it
829     // down to match the representation (bytes per type element) that VecV
830     // uses. The scaling down will pick every 2nd or 4th (every Scale-th
831     // in general) element and put them at the front of the resulting
832     // vector. This subvector will then be inserted into the Q2V of VecV.
833     // To avoid having an operation that generates an illegal type (short
834     // vector), generate a full size vector.
835     //
836     SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
837     SmallVector<int,128> Mask(HwLen);
838     // Scale = BitBytes(PredV) / Given BitBytes.
839     unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
840     unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
841 
842     for (unsigned i = 0; i != HwLen; ++i) {
843       unsigned Num = i % Scale;
844       unsigned Off = i / Scale;
845       Mask[BlockLen*Num + Off] = i;
846     }
847     SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
848     if (!ZeroFill)
849       return S;
850     // Fill the bytes beyond BlockLen with 0s.
851     // V6_pred_scalar2 cannot fill the entire predicate, so it only works
852     // when BlockLen < HwLen.
853     assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
854     MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
855     SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
856                          {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
857     SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
858     return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
859   }
860 
861   // Make sure that this is a valid scalar predicate.
862   assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
863 
864   unsigned Bytes = 8 / PredTy.getVectorNumElements();
865   SmallVector<SDValue,4> Words[2];
866   unsigned IdxW = 0;
867 
868   auto Lo32 = [&DAG, &dl] (SDValue P) {
869     return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P);
870   };
871   auto Hi32 = [&DAG, &dl] (SDValue P) {
872     return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P);
873   };
874 
875   SDValue W0 = isUndef(PredV)
876                   ? DAG.getUNDEF(MVT::i64)
877                   : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
878   Words[IdxW].push_back(Hi32(W0));
879   Words[IdxW].push_back(Lo32(W0));
880 
881   while (Bytes < BitBytes) {
882     IdxW ^= 1;
883     Words[IdxW].clear();
884 
885     if (Bytes < 4) {
886       for (const SDValue &W : Words[IdxW ^ 1]) {
887         SDValue T = expandPredicate(W, dl, DAG);
888         Words[IdxW].push_back(Hi32(T));
889         Words[IdxW].push_back(Lo32(T));
890       }
891     } else {
892       for (const SDValue &W : Words[IdxW ^ 1]) {
893         Words[IdxW].push_back(W);
894         Words[IdxW].push_back(W);
895       }
896     }
897     Bytes *= 2;
898   }
899 
900   assert(Bytes == BitBytes);
901 
902   SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
903   SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
904   for (const SDValue &W : Words[IdxW]) {
905     Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
906     Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
907   }
908 
909   return Vec;
910 }
911 
912 SDValue
913 HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
914                                           const SDLoc &dl, MVT VecTy,
915                                           SelectionDAG &DAG) const {
916   // Construct a vector V of bytes, such that a comparison V >u 0 would
917   // produce the required vector predicate.
918   unsigned VecLen = Values.size();
919   unsigned HwLen = Subtarget.getVectorLength();
920   assert(VecLen <= HwLen || VecLen == 8*HwLen);
921   SmallVector<SDValue,128> Bytes;
922   bool AllT = true, AllF = true;
923 
924   auto IsTrue = [] (SDValue V) {
925     if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
926       return !N->isZero();
927     return false;
928   };
929   auto IsFalse = [] (SDValue V) {
930     if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
931       return N->isZero();
932     return false;
933   };
934 
935   if (VecLen <= HwLen) {
936     // In the hardware, each bit of a vector predicate corresponds to a byte
937     // of a vector register. Calculate how many bytes does a bit of VecTy
938     // correspond to.
939     assert(HwLen % VecLen == 0);
940     unsigned BitBytes = HwLen / VecLen;
941     for (SDValue V : Values) {
942       AllT &= IsTrue(V);
943       AllF &= IsFalse(V);
944 
945       SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
946                                  : DAG.getUNDEF(MVT::i8);
947       for (unsigned B = 0; B != BitBytes; ++B)
948         Bytes.push_back(Ext);
949     }
950   } else {
951     // There are as many i1 values, as there are bits in a vector register.
952     // Divide the values into groups of 8 and check that each group consists
953     // of the same value (ignoring undefs).
954     for (unsigned I = 0; I != VecLen; I += 8) {
955       unsigned B = 0;
956       // Find the first non-undef value in this group.
957       for (; B != 8; ++B) {
958         if (!Values[I+B].isUndef())
959           break;
960       }
961       SDValue F = Values[I+B];
962       AllT &= IsTrue(F);
963       AllF &= IsFalse(F);
964 
965       SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
966                             : DAG.getUNDEF(MVT::i8);
967       Bytes.push_back(Ext);
968       // Verify that the rest of values in the group are the same as the
969       // first.
970       for (; B != 8; ++B)
971         assert(Values[I+B].isUndef() || Values[I+B] == F);
972     }
973   }
974 
975   if (AllT)
976     return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
977   if (AllF)
978     return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
979 
980   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
981   SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
982   return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
983 }
984 
985 SDValue
986 HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
987       const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
988   MVT ElemTy = ty(VecV).getVectorElementType();
989 
990   unsigned ElemWidth = ElemTy.getSizeInBits();
991   assert(ElemWidth >= 8 && ElemWidth <= 32);
992   (void)ElemWidth;
993 
994   SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
995   SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
996                                {VecV, ByteIdx});
997   if (ElemTy == MVT::i32)
998     return ExWord;
999 
1000   // Have an extracted word, need to extract the smaller element out of it.
1001   // 1. Extract the bits of (the original) IdxV that correspond to the index
1002   //    of the desired element in the 32-bit word.
1003   SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1004   // 2. Extract the element from the word.
1005   SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1006   return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1007 }
1008 
1009 SDValue
1010 HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1011       const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1012   // Implement other return types if necessary.
1013   assert(ResTy == MVT::i1);
1014 
1015   unsigned HwLen = Subtarget.getVectorLength();
1016   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1017   SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1018 
1019   unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1020   SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1021   IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1022 
1023   SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1024   SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1025   return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1026 }
1027 
1028 SDValue
1029 HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1030       SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1031   MVT ElemTy = ty(VecV).getVectorElementType();
1032 
1033   unsigned ElemWidth = ElemTy.getSizeInBits();
1034   assert(ElemWidth >= 8 && ElemWidth <= 32);
1035   (void)ElemWidth;
1036 
1037   auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1038                                      SDValue ByteIdxV) {
1039     MVT VecTy = ty(VecV);
1040     unsigned HwLen = Subtarget.getVectorLength();
1041     SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32,
1042                                 {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)});
1043     SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1044     SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1045     SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1046                                {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1047     SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1048     return TorV;
1049   };
1050 
1051   SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1052   if (ElemTy == MVT::i32)
1053     return InsertWord(VecV, ValV, ByteIdx);
1054 
1055   // If this is not inserting a 32-bit word, convert it into such a thing.
1056   // 1. Extract the existing word from the target vector.
1057   SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1058                                 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1059   SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1060                                      dl, MVT::i32, DAG);
1061 
1062   // 2. Treating the extracted word as a 32-bit vector, insert the given
1063   //    value into it.
1064   SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1065   MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1066   SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1067                              ValV, SubIdx, dl, ElemTy, DAG);
1068 
1069   // 3. Insert the 32-bit word back into the original vector.
1070   return InsertWord(VecV, Ins, ByteIdx);
1071 }
1072 
1073 SDValue
1074 HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1075       SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1076   unsigned HwLen = Subtarget.getVectorLength();
1077   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1078   SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1079 
1080   unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1081   SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1082   IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1083   ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1084 
1085   SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1086   return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1087 }
1088 
1089 SDValue
1090 HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV,
1091       const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1092   MVT VecTy = ty(VecV);
1093   unsigned HwLen = Subtarget.getVectorLength();
1094   unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
1095   MVT ElemTy = VecTy.getVectorElementType();
1096   unsigned ElemWidth = ElemTy.getSizeInBits();
1097 
1098   // If the source vector is a vector pair, get the single vector containing
1099   // the subvector of interest. The subvector will never overlap two single
1100   // vectors.
1101   if (isHvxPairTy(VecTy)) {
1102     unsigned SubIdx;
1103     if (Idx * ElemWidth >= 8*HwLen) {
1104       SubIdx = Hexagon::vsub_hi;
1105       Idx -= VecTy.getVectorNumElements() / 2;
1106     } else {
1107       SubIdx = Hexagon::vsub_lo;
1108     }
1109     VecTy = typeSplit(VecTy).first;
1110     VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1111     if (VecTy == ResTy)
1112       return VecV;
1113   }
1114 
1115   // The only meaningful subvectors of a single HVX vector are those that
1116   // fit in a scalar register.
1117   assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1118 
1119   MVT WordTy = tyVector(VecTy, MVT::i32);
1120   SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1121   unsigned WordIdx = (Idx*ElemWidth) / 32;
1122 
1123   SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1124   SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1125   if (ResTy.getSizeInBits() == 32)
1126     return DAG.getBitcast(ResTy, W0);
1127 
1128   SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1129   SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1130   SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0});
1131   return DAG.getBitcast(ResTy, WW);
1132 }
1133 
1134 SDValue
1135 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1136       const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1137   MVT VecTy = ty(VecV);
1138   unsigned HwLen = Subtarget.getVectorLength();
1139   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1140   SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1141   // IdxV is required to be a constant.
1142   unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
1143 
1144   unsigned ResLen = ResTy.getVectorNumElements();
1145   unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1146   unsigned Offset = Idx * BitBytes;
1147   SDValue Undef = DAG.getUNDEF(ByteTy);
1148   SmallVector<int,128> Mask;
1149 
1150   if (Subtarget.isHVXVectorType(ResTy, true)) {
1151     // Converting between two vector predicates. Since the result is shorter
1152     // than the source, it will correspond to a vector predicate with the
1153     // relevant bits replicated. The replication count is the ratio of the
1154     // source and target vector lengths.
1155     unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1156     assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1157     for (unsigned i = 0; i != HwLen/Rep; ++i) {
1158       for (unsigned j = 0; j != Rep; ++j)
1159         Mask.push_back(i + Offset);
1160     }
1161     SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1162     return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1163   }
1164 
1165   // Converting between a vector predicate and a scalar predicate. In the
1166   // vector predicate, a group of BitBytes bits will correspond to a single
1167   // i1 element of the source vector type. Those bits will all have the same
1168   // value. The same will be true for ByteVec, where each byte corresponds
1169   // to a bit in the vector predicate.
1170   // The algorithm is to traverse the ByteVec, going over the i1 values from
1171   // the source vector, and generate the corresponding representation in an
1172   // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1173   // elements so that the interesting 8 bytes will be in the low end of the
1174   // vector.
1175   unsigned Rep = 8 / ResLen;
1176   // Make sure the output fill the entire vector register, so repeat the
1177   // 8-byte groups as many times as necessary.
1178   for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1179     // This will generate the indexes of the 8 interesting bytes.
1180     for (unsigned i = 0; i != ResLen; ++i) {
1181       for (unsigned j = 0; j != Rep; ++j)
1182         Mask.push_back(Offset + i*BitBytes);
1183     }
1184   }
1185 
1186   SDValue Zero = getZero(dl, MVT::i32, DAG);
1187   SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1188   // Combine the two low words from ShuffV into a v8i8, and byte-compare
1189   // them against 0.
1190   SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1191   SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1192                            {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1193   SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0});
1194   return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1195                   {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1196 }
1197 
1198 SDValue
1199 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1200       SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1201   MVT VecTy = ty(VecV);
1202   MVT SubTy = ty(SubV);
1203   unsigned HwLen = Subtarget.getVectorLength();
1204   MVT ElemTy = VecTy.getVectorElementType();
1205   unsigned ElemWidth = ElemTy.getSizeInBits();
1206 
1207   bool IsPair = isHvxPairTy(VecTy);
1208   MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1209   // The two single vectors that VecV consists of, if it's a pair.
1210   SDValue V0, V1;
1211   SDValue SingleV = VecV;
1212   SDValue PickHi;
1213 
1214   if (IsPair) {
1215     V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV);
1216     V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV);
1217 
1218     SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1219                                     dl, MVT::i32);
1220     PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1221     if (isHvxSingleTy(SubTy)) {
1222       if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1223         unsigned Idx = CN->getZExtValue();
1224         assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1225         unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1226         return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1227       }
1228       // If IdxV is not a constant, generate the two variants: with the
1229       // SubV as the high and as the low subregister, and select the right
1230       // pair based on the IdxV.
1231       SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1232       SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1233       return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1234     }
1235     // The subvector being inserted must be entirely contained in one of
1236     // the vectors V0 or V1. Set SingleV to the correct one, and update
1237     // IdxV to be the index relative to the beginning of that vector.
1238     SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1239     IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1240     SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1241   }
1242 
1243   // The only meaningful subvectors of a single HVX vector are those that
1244   // fit in a scalar register.
1245   assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1246   // Convert IdxV to be index in bytes.
1247   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1248   if (!IdxN || !IdxN->isZero()) {
1249     IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1250                        DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1251     SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1252   }
1253   // When inserting a single word, the rotation back to the original position
1254   // would be by HwLen-Idx, but if two words are inserted, it will need to be
1255   // by (HwLen-4)-Idx.
1256   unsigned RolBase = HwLen;
1257   if (VecTy.getSizeInBits() == 32) {
1258     SDValue V = DAG.getBitcast(MVT::i32, SubV);
1259     SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V);
1260   } else {
1261     SDValue V = DAG.getBitcast(MVT::i64, SubV);
1262     SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V);
1263     SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V);
1264     SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1265     SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1266                           DAG.getConstant(4, dl, MVT::i32));
1267     SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1268     RolBase = HwLen-4;
1269   }
1270   // If the vector wasn't ror'ed, don't ror it back.
1271   if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1272     SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1273                                DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1274     SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1275   }
1276 
1277   if (IsPair) {
1278     SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1279     SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1280     return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1281   }
1282   return SingleV;
1283 }
1284 
1285 SDValue
1286 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1287       SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1288   MVT VecTy = ty(VecV);
1289   MVT SubTy = ty(SubV);
1290   assert(Subtarget.isHVXVectorType(VecTy, true));
1291   // VecV is an HVX vector predicate. SubV may be either an HVX vector
1292   // predicate as well, or it can be a scalar predicate.
1293 
1294   unsigned VecLen = VecTy.getVectorNumElements();
1295   unsigned HwLen = Subtarget.getVectorLength();
1296   assert(HwLen % VecLen == 0 && "Unexpected vector type");
1297 
1298   unsigned Scale = VecLen / SubTy.getVectorNumElements();
1299   unsigned BitBytes = HwLen / VecLen;
1300   unsigned BlockLen = HwLen / Scale;
1301 
1302   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1303   SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1304   SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1305   SDValue ByteIdx;
1306 
1307   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1308   if (!IdxN || !IdxN->isZero()) {
1309     ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1310                           DAG.getConstant(BitBytes, dl, MVT::i32));
1311     ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1312   }
1313 
1314   // ByteVec is the target vector VecV rotated in such a way that the
1315   // subvector should be inserted at index 0. Generate a predicate mask
1316   // and use vmux to do the insertion.
1317   assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1318   MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1319   SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1320                        {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1321   ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1322   // Rotate ByteVec back, and convert to a vector predicate.
1323   if (!IdxN || !IdxN->isZero()) {
1324     SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1325     SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1326     ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1327   }
1328   return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1329 }
1330 
1331 SDValue
1332 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1333       MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1334   // Sign- and any-extending of a vector predicate to a vector register is
1335   // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1336   // a vector of 1s (where the 1s are of type matching the vector type).
1337   assert(Subtarget.isHVXVectorType(ResTy));
1338   if (!ZeroExt)
1339     return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1340 
1341   assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1342   SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1343                              DAG.getConstant(1, dl, MVT::i32));
1344   SDValue False = getZero(dl, ResTy, DAG);
1345   return DAG.getSelect(dl, ResTy, VecV, True, False);
1346 }
1347 
1348 SDValue
1349 HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1350       MVT ResTy, SelectionDAG &DAG) const {
1351   // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1352   // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1353   // vector register. The remaining bits of the vector register are
1354   // unspecified.
1355 
1356   MachineFunction &MF = DAG.getMachineFunction();
1357   unsigned HwLen = Subtarget.getVectorLength();
1358   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1359   MVT PredTy = ty(VecQ);
1360   unsigned PredLen = PredTy.getVectorNumElements();
1361   assert(HwLen % PredLen == 0);
1362   MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1363 
1364   Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1365   SmallVector<Constant*, 128> Tmp;
1366   // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1367   // These are bytes with the LSB rotated left with respect to their index.
1368   for (unsigned i = 0; i != HwLen/8; ++i) {
1369     for (unsigned j = 0; j != 8; ++j)
1370       Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1371   }
1372   Constant *CV = ConstantVector::get(Tmp);
1373   Align Alignment(HwLen);
1374   SDValue CP =
1375       LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1376   SDValue Bytes =
1377       DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1378                   MachinePointerInfo::getConstantPool(MF), Alignment);
1379 
1380   // Select the bytes that correspond to true bits in the vector predicate.
1381   SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1382       getZero(dl, VecTy, DAG));
1383   // Calculate the OR of all bytes in each group of 8. That will compress
1384   // all the individual bits into a single byte.
1385   // First, OR groups of 4, via vrmpy with 0x01010101.
1386   SDValue All1 =
1387       DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1388   SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1389   // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1390   SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1391       {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1392   SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1393 
1394   // Pick every 8th byte and coalesce them at the beginning of the output.
1395   // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1396   // byte and so on.
1397   SmallVector<int,128> Mask;
1398   for (unsigned i = 0; i != HwLen; ++i)
1399     Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1400   SDValue Collect =
1401       DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1402   return DAG.getBitcast(ResTy, Collect);
1403 }
1404 
1405 SDValue
1406 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1407       const {
1408   const SDLoc &dl(Op);
1409   MVT VecTy = ty(Op);
1410 
1411   unsigned Size = Op.getNumOperands();
1412   SmallVector<SDValue,128> Ops;
1413   for (unsigned i = 0; i != Size; ++i)
1414     Ops.push_back(Op.getOperand(i));
1415 
1416   // First, split the BUILD_VECTOR for vector pairs. We could generate
1417   // some pairs directly (via splat), but splats should be generated
1418   // by the combiner prior to getting here.
1419   if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
1420     ArrayRef<SDValue> A(Ops);
1421     MVT SingleTy = typeSplit(VecTy).first;
1422     SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
1423     SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
1424     return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1425   }
1426 
1427   if (VecTy.getVectorElementType() == MVT::i1)
1428     return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1429 
1430   // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1431   // not a legal type, just bitcast the node to use i16
1432   // types and bitcast the result back to f16
1433   if (VecTy.getVectorElementType() == MVT::f16) {
1434     SmallVector<SDValue,64> NewOps;
1435     for (unsigned i = 0; i != Size; i++)
1436       NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1437 
1438     SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1439         tyVector(VecTy, MVT::i16), NewOps);
1440     return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1441   }
1442 
1443   return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1444 }
1445 
1446 SDValue
1447 HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1448       const {
1449   const SDLoc &dl(Op);
1450   MVT VecTy = ty(Op);
1451   MVT ArgTy = ty(Op.getOperand(0));
1452 
1453   if (ArgTy == MVT::f16) {
1454     MVT SplatTy =  MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1455     SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1456     SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1457     SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1458     return DAG.getBitcast(VecTy, Splat);
1459   }
1460 
1461   return SDValue();
1462 }
1463 
1464 SDValue
1465 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1466       const {
1467   // Vector concatenation of two integer (non-bool) vectors does not need
1468   // special lowering. Custom-lower concats of bool vectors and expand
1469   // concats of more than 2 vectors.
1470   MVT VecTy = ty(Op);
1471   const SDLoc &dl(Op);
1472   unsigned NumOp = Op.getNumOperands();
1473   if (VecTy.getVectorElementType() != MVT::i1) {
1474     if (NumOp == 2)
1475       return Op;
1476     // Expand the other cases into a build-vector.
1477     SmallVector<SDValue,8> Elems;
1478     for (SDValue V : Op.getNode()->ops())
1479       DAG.ExtractVectorElements(V, Elems);
1480     // A vector of i16 will be broken up into a build_vector of i16's.
1481     // This is a problem, since at the time of operation legalization,
1482     // all operations are expected to be type-legalized, and i16 is not
1483     // a legal type. If any of the extracted elements is not of a valid
1484     // type, sign-extend it to a valid one.
1485     for (unsigned i = 0, e = Elems.size(); i != e; ++i) {
1486       SDValue V = Elems[i];
1487       MVT Ty = ty(V);
1488       if (!isTypeLegal(Ty)) {
1489         EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty);
1490         if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1491           Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1492                                  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1493                                              V.getOperand(0), V.getOperand(1)),
1494                                  DAG.getValueType(Ty));
1495           continue;
1496         }
1497         // A few less complicated cases.
1498         switch (V.getOpcode()) {
1499           case ISD::Constant:
1500             Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy);
1501             break;
1502           case ISD::UNDEF:
1503             Elems[i] = DAG.getUNDEF(NTy);
1504             break;
1505           case ISD::TRUNCATE:
1506             Elems[i] = V.getOperand(0);
1507             break;
1508           default:
1509             llvm_unreachable("Unexpected vector element");
1510         }
1511       }
1512     }
1513     return DAG.getBuildVector(VecTy, dl, Elems);
1514   }
1515 
1516   assert(VecTy.getVectorElementType() == MVT::i1);
1517   unsigned HwLen = Subtarget.getVectorLength();
1518   assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1519 
1520   SDValue Op0 = Op.getOperand(0);
1521 
1522   // If the operands are HVX types (i.e. not scalar predicates), then
1523   // defer the concatenation, and create QCAT instead.
1524   if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1525     if (NumOp == 2)
1526       return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1527 
1528     ArrayRef<SDUse> U(Op.getNode()->ops());
1529     SmallVector<SDValue,4> SV(U.begin(), U.end());
1530     ArrayRef<SDValue> Ops(SV);
1531 
1532     MVT HalfTy = typeSplit(VecTy).first;
1533     SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1534                              Ops.take_front(NumOp/2));
1535     SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1536                              Ops.take_back(NumOp/2));
1537     return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1538   }
1539 
1540   // Count how many bytes (in a vector register) each bit in VecTy
1541   // corresponds to.
1542   unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1543 
1544   SmallVector<SDValue,8> Prefixes;
1545   for (SDValue V : Op.getNode()->op_values()) {
1546     SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1547     Prefixes.push_back(P);
1548   }
1549 
1550   unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1551   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1552   SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32);
1553   SDValue Res = getZero(dl, ByteTy, DAG);
1554   for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1555     Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1556     Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1557   }
1558   return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1559 }
1560 
1561 SDValue
1562 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1563       const {
1564   // Change the type of the extracted element to i32.
1565   SDValue VecV = Op.getOperand(0);
1566   MVT ElemTy = ty(VecV).getVectorElementType();
1567   const SDLoc &dl(Op);
1568   SDValue IdxV = Op.getOperand(1);
1569   if (ElemTy == MVT::i1)
1570     return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1571 
1572   return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1573 }
1574 
1575 SDValue
1576 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1577       const {
1578   const SDLoc &dl(Op);
1579   MVT VecTy = ty(Op);
1580   SDValue VecV = Op.getOperand(0);
1581   SDValue ValV = Op.getOperand(1);
1582   SDValue IdxV = Op.getOperand(2);
1583   MVT ElemTy = ty(VecV).getVectorElementType();
1584   if (ElemTy == MVT::i1)
1585     return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1586 
1587   if (ElemTy == MVT::f16) {
1588     SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
1589         tyVector(VecTy, MVT::i16),
1590         DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1591         DAG.getBitcast(MVT::i16, ValV), IdxV);
1592     return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1593   }
1594 
1595   return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1596 }
1597 
1598 SDValue
1599 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1600       const {
1601   SDValue SrcV = Op.getOperand(0);
1602   MVT SrcTy = ty(SrcV);
1603   MVT DstTy = ty(Op);
1604   SDValue IdxV = Op.getOperand(1);
1605   unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
1606   assert(Idx % DstTy.getVectorNumElements() == 0);
1607   (void)Idx;
1608   const SDLoc &dl(Op);
1609 
1610   MVT ElemTy = SrcTy.getVectorElementType();
1611   if (ElemTy == MVT::i1)
1612     return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1613 
1614   return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG);
1615 }
1616 
1617 SDValue
1618 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1619       const {
1620   // Idx does not need to be a constant.
1621   SDValue VecV = Op.getOperand(0);
1622   SDValue ValV = Op.getOperand(1);
1623   SDValue IdxV = Op.getOperand(2);
1624 
1625   const SDLoc &dl(Op);
1626   MVT VecTy = ty(VecV);
1627   MVT ElemTy = VecTy.getVectorElementType();
1628   if (ElemTy == MVT::i1)
1629     return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1630 
1631   return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1632 }
1633 
1634 SDValue
1635 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1636   // Lower any-extends of boolean vectors to sign-extends, since they
1637   // translate directly to Q2V. Zero-extending could also be done equally
1638   // fast, but Q2V is used/recognized in more places.
1639   // For all other vectors, use zero-extend.
1640   MVT ResTy = ty(Op);
1641   SDValue InpV = Op.getOperand(0);
1642   MVT ElemTy = ty(InpV).getVectorElementType();
1643   if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1644     return LowerHvxSignExt(Op, DAG);
1645   return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1646 }
1647 
1648 SDValue
1649 HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1650   MVT ResTy = ty(Op);
1651   SDValue InpV = Op.getOperand(0);
1652   MVT ElemTy = ty(InpV).getVectorElementType();
1653   if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1654     return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1655   return Op;
1656 }
1657 
1658 SDValue
1659 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1660   MVT ResTy = ty(Op);
1661   SDValue InpV = Op.getOperand(0);
1662   MVT ElemTy = ty(InpV).getVectorElementType();
1663   if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1664     return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1665   return Op;
1666 }
1667 
1668 SDValue
1669 HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1670   // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1671   // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1672   const SDLoc &dl(Op);
1673   MVT ResTy = ty(Op);
1674   SDValue InpV = Op.getOperand(0);
1675   assert(ResTy == ty(InpV));
1676 
1677   // Calculate the vectors of 1 and bitwidth(x).
1678   MVT ElemTy = ty(InpV).getVectorElementType();
1679   unsigned ElemWidth = ElemTy.getSizeInBits();
1680 
1681   SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1682                              DAG.getConstant(1, dl, MVT::i32));
1683   SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1684                              DAG.getConstant(ElemWidth, dl, MVT::i32));
1685   SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1686                               DAG.getConstant(-1, dl, MVT::i32));
1687 
1688   // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1689   // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1690   // it separately in custom combine or selection).
1691   SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1692                           {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1693                            DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1694   return DAG.getNode(ISD::SUB, dl, ResTy,
1695                      {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1696 }
1697 
1698 SDValue
1699 HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1700   MVT ResTy = ty(Op);
1701   assert(ResTy.isVector());
1702   const SDLoc &dl(Op);
1703   SmallVector<int,256> ShuffMask;
1704 
1705   MVT ElemTy = ResTy.getVectorElementType();
1706   unsigned VecLen = ResTy.getVectorNumElements();
1707   SDValue Vs = Op.getOperand(0);
1708   SDValue Vt = Op.getOperand(1);
1709   bool IsSigned = Op.getOpcode() == ISD::MULHS;
1710 
1711   if (ElemTy == MVT::i8 || ElemTy == MVT::i16) {
1712     // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
1713     // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
1714     // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
1715     // For i16, use V6_vmpyhv, which behaves in an analogous way to
1716     // V6_vmpybv: results Lo and Hi are products of even/odd elements
1717     // respectively.
1718     MVT ExtTy = typeExtElem(ResTy, 2);
1719     unsigned MpyOpc = ElemTy == MVT::i8
1720         ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv)
1721         : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv);
1722     SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
1723 
1724     // Discard low halves of the resulting values, collect the high halves.
1725     for (unsigned I = 0; I < VecLen; I += 2) {
1726       ShuffMask.push_back(I+1);         // Pick even element.
1727       ShuffMask.push_back(I+VecLen+1);  // Pick odd element.
1728     }
1729     VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
1730     SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
1731     return DAG.getBitcast(ResTy, BS);
1732   }
1733 
1734   assert(ElemTy == MVT::i32);
1735   SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
1736 
1737   auto MulHS_V60 = [&](SDValue Vs, SDValue Vt) {
1738     // mulhs(Vs,Vt) =
1739     //   = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1740     //   = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16
1741     //      + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1742     //   = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16
1743     //      + Lo(Vs) *us Vt] >> 32
1744     // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to
1745     // anything, so it cannot produce any carry over to higher bits),
1746     // so everything in [] can be shifted by 16 without loss of precision.
1747     //   = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16
1748     //   = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16
1749     // Denote Hi(Vs) = Vs':
1750     //   = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16
1751     //   = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16
1752     SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG);
1753     // Get Vs':
1754     SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG);
1755     SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
1756                           {T0, S0, Vt}, DAG);
1757     // Shift by 16:
1758     SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG);
1759     // Get Vs'*Hi(Vt):
1760     SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG);
1761     // Add:
1762     SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2});
1763     return T3;
1764   };
1765 
1766   auto MulHS_V62 = [&](SDValue Vs, SDValue Vt) {
1767     MVT PairTy = typeJoin({ResTy, ResTy});
1768     SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {Vs, Vt}, DAG);
1769     SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy,
1770                           {T0, Vs, Vt}, DAG);
1771     return opSplit(T1, dl, DAG).second;
1772   };
1773 
1774   if (IsSigned) {
1775     if (Subtarget.useHVXV62Ops())
1776       return MulHS_V62(Vs, Vt);
1777     return MulHS_V60(Vs, Vt);
1778   }
1779 
1780   // Unsigned mulhw. (Would expansion using signed mulhw be better?)
1781 
1782   auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) {
1783     return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair);
1784   };
1785   auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) {
1786     return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair);
1787   };
1788 
1789   MVT PairTy = typeJoin({ResTy, ResTy});
1790   SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy,
1791                        {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
1792   // Multiply-unsigned halfwords:
1793   //   LoVec = Vs.uh[2i] * Vt.uh[2i],
1794   //   HiVec = Vs.uh[2i+1] * Vt.uh[2i+1]
1795   SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG);
1796   // The low halves in the LoVec of the pair can be discarded. They are
1797   // not added to anything (in the full-precision product), so they cannot
1798   // produce a carry into the higher bits.
1799   SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG);
1800   // Swap low and high halves in Vt, and do the halfword multiplication
1801   // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i].
1802   SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG);
1803   SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG);
1804   // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs).
1805   // These products are words, but cannot be added directly because the
1806   // sums could overflow. Add these products, by halfwords, where each sum
1807   // of a pair of halfwords gives a word.
1808   SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
1809                         {LoVec(T2), HiVec(T2)}, DAG);
1810   // Add the high halfwords from the products of the low halfwords.
1811   SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)});
1812   SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG);
1813   SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)});
1814   SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6});
1815   return T7;
1816 }
1817 
1818 SDValue
1819 HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1820   SDValue Val = Op.getOperand(0);
1821   MVT ResTy = ty(Op);
1822   MVT ValTy = ty(Val);
1823   const SDLoc &dl(Op);
1824 
1825   if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1826     unsigned HwLen = Subtarget.getVectorLength();
1827     MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1828     SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1829     unsigned BitWidth = ResTy.getSizeInBits();
1830 
1831     if (BitWidth < 64) {
1832       SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1833           dl, MVT::i32, DAG);
1834       if (BitWidth == 32)
1835         return W0;
1836       assert(BitWidth < 32u);
1837       return DAG.getZExtOrTrunc(W0, dl, ResTy);
1838     }
1839 
1840     // The result is >= 64 bits. The only options are 64 or 128.
1841     assert(BitWidth == 64 || BitWidth == 128);
1842     SmallVector<SDValue,4> Words;
1843     for (unsigned i = 0; i != BitWidth/32; ++i) {
1844       SDValue W = extractHvxElementReg(
1845           VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
1846       Words.push_back(W);
1847     }
1848     SmallVector<SDValue,2> Combines;
1849     assert(Words.size() % 2 == 0);
1850     for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
1851       SDValue C = DAG.getNode(
1852           HexagonISD::COMBINE, dl, MVT::i64, {Words[i+1], Words[i]});
1853       Combines.push_back(C);
1854     }
1855 
1856     if (BitWidth == 64)
1857       return Combines[0];
1858 
1859     return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
1860   }
1861   if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
1862     // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
1863     unsigned BitWidth = ValTy.getSizeInBits();
1864     unsigned HwLen = Subtarget.getVectorLength();
1865     assert(BitWidth == HwLen);
1866 
1867     MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
1868     SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
1869     // Splat each byte of Val 8 times.
1870     // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
1871     // where b0, b1,..., b15 are least to most significant bytes of I.
1872     SmallVector<SDValue, 128> Bytes;
1873     // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
1874     // These are bytes with the LSB rotated left with respect to their index.
1875     SmallVector<SDValue, 128> Tmp;
1876     for (unsigned I = 0; I != HwLen / 8; ++I) {
1877       SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
1878       SDValue Byte =
1879           DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
1880       for (unsigned J = 0; J != 8; ++J) {
1881         Bytes.push_back(Byte);
1882         Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
1883       }
1884     }
1885 
1886     MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
1887     SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
1888     SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
1889 
1890     // Each Byte in the I2V will be set iff corresponding bit is set in Val.
1891     I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
1892     return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
1893   }
1894 
1895   return Op;
1896 }
1897 
1898 SDValue
1899 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
1900   // Sign- and zero-extends are legal.
1901   assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
1902   return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
1903                      Op.getOperand(0));
1904 }
1905 
1906 SDValue
1907 HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
1908   MVT ResTy = ty(Op);
1909   if (ResTy.getVectorElementType() != MVT::i1)
1910     return Op;
1911 
1912   const SDLoc &dl(Op);
1913   unsigned HwLen = Subtarget.getVectorLength();
1914   unsigned VecLen = ResTy.getVectorNumElements();
1915   assert(HwLen % VecLen == 0);
1916   unsigned ElemSize = HwLen / VecLen;
1917 
1918   MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
1919   SDValue S =
1920       DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
1921                   DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
1922                   DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
1923   return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
1924 }
1925 
1926 SDValue
1927 HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
1928   if (SDValue S = getVectorShiftByInt(Op, DAG))
1929     return S;
1930   return Op;
1931 }
1932 
1933 SDValue
1934 HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
1935       const SDLoc &dl(Op);
1936   MVT ResTy = ty(Op);
1937 
1938   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1939   bool Use64b = Subtarget.useHVX64BOps();
1940   unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast
1941                                 : Intrinsic::hexagon_V6_pred_typecast_128B;
1942   if (IntNo == IntPredCast) {
1943     SDValue Vs = Op.getOperand(1);
1944     MVT OpTy = ty(Vs);
1945     if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) {
1946       if (ResTy == OpTy)
1947         return Vs;
1948       return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs);
1949     }
1950   }
1951 
1952   return Op;
1953 }
1954 
1955 SDValue
1956 HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
1957   const SDLoc &dl(Op);
1958   unsigned HwLen = Subtarget.getVectorLength();
1959   MachineFunction &MF = DAG.getMachineFunction();
1960   auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
1961   SDValue Mask = MaskN->getMask();
1962   SDValue Chain = MaskN->getChain();
1963   SDValue Base = MaskN->getBasePtr();
1964   auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
1965 
1966   unsigned Opc = Op->getOpcode();
1967   assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
1968 
1969   if (Opc == ISD::MLOAD) {
1970     MVT ValTy = ty(Op);
1971     SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
1972     SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
1973     if (isUndef(Thru))
1974       return Load;
1975     SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
1976     return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
1977   }
1978 
1979   // MSTORE
1980   // HVX only has aligned masked stores.
1981 
1982   // TODO: Fold negations of the mask into the store.
1983   unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
1984   SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
1985   SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
1986 
1987   if (MaskN->getAlign().value() % HwLen == 0) {
1988     SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
1989                              {Mask, Base, Offset0, Value, Chain}, DAG);
1990     DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
1991     return Store;
1992   }
1993 
1994   // Unaligned case.
1995   auto StoreAlign = [&](SDValue V, SDValue A) {
1996     SDValue Z = getZero(dl, ty(V), DAG);
1997     // TODO: use funnel shifts?
1998     // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
1999     // upper half.
2000     SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2001     SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2002     return std::make_pair(LoV, HiV);
2003   };
2004 
2005   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2006   MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2007   SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2008   VectorPair Tmp = StoreAlign(MaskV, Base);
2009   VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2010                       DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2011   VectorPair ValueU = StoreAlign(Value, Base);
2012 
2013   SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2014   SDValue StoreLo =
2015       getInstr(StoreOpc, dl, MVT::Other,
2016                {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2017   SDValue StoreHi =
2018       getInstr(StoreOpc, dl, MVT::Other,
2019                {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2020   DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2021   DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2022   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2023 }
2024 
2025 SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2026                                                 SelectionDAG &DAG) const {
2027   // This conversion only applies to QFloat.
2028   assert(Subtarget.useHVXQFloatOps());
2029 
2030   assert(Op->getOpcode() == ISD::FP_EXTEND);
2031 
2032   MVT VecTy = ty(Op);
2033   MVT ArgTy = ty(Op.getOperand(0));
2034   const SDLoc &dl(Op);
2035   assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2036 
2037   SDValue F16Vec = Op.getOperand(0);
2038 
2039   APFloat FloatVal = APFloat(1.0f);
2040   bool Ignored;
2041   FloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
2042   SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2043   SDValue VmpyVec =
2044       getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2045 
2046   MVT HalfTy = typeSplit(VecTy).first;
2047   VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2048   SDValue LoVec =
2049       getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2050   SDValue HiVec =
2051       getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2052 
2053   SDValue ShuffVec =
2054       getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2055                {HiVec, LoVec, DAG.getConstant(-4, dl, MVT::i32)}, DAG);
2056 
2057   return ShuffVec;
2058 }
2059 
2060 SDValue
2061 HexagonTargetLowering::LowerHvxConvertFpInt(SDValue Op, SelectionDAG &DAG)
2062     const {
2063   // This conversion only applies to IEEE.
2064   assert(Subtarget.useHVXIEEEFPOps());
2065 
2066   unsigned Opc = Op.getOpcode();
2067   // Catch invalid conversion ops (just in case).
2068   assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
2069          Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2070   MVT ResTy = ty(Op);
2071 
2072   if (Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT) {
2073     MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2074     // There are only conversions of f16.
2075     if (FpTy != MVT::f16)
2076       return SDValue();
2077 
2078     MVT IntTy = ResTy.getVectorElementType();
2079     // Other int types aren't legal in HVX, so we shouldn't see them here.
2080     assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2081     // Conversions to i8 and i16 are legal.
2082     if (IntTy == MVT::i8 || IntTy == MVT::i16)
2083       return Op;
2084   } else {
2085     // Converting int -> fp.
2086     if (ResTy.getVectorElementType() != MVT::f16)
2087       return SDValue();
2088     MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2089     // Other int types aren't legal in HVX, so we shouldn't see them here.
2090     assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2091     // i8, i16 -> f16 is legal.
2092     if (IntTy == MVT::i8 || IntTy == MVT::i16)
2093       return Op;
2094   }
2095 
2096   return SDValue();
2097 }
2098 
2099 SDValue
2100 HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
2101   assert(!Op.isMachineOpcode());
2102   SmallVector<SDValue,2> OpsL, OpsH;
2103   const SDLoc &dl(Op);
2104 
2105   auto SplitVTNode = [&DAG,this] (const VTSDNode *N) {
2106     MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
2107     SDValue TV = DAG.getValueType(Ty);
2108     return std::make_pair(TV, TV);
2109   };
2110 
2111   for (SDValue A : Op.getNode()->ops()) {
2112     VectorPair P = Subtarget.isHVXVectorType(ty(A), true)
2113                     ? opSplit(A, dl, DAG)
2114                     : std::make_pair(A, A);
2115     // Special case for type operand.
2116     if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2117       if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
2118         P = SplitVTNode(N);
2119     }
2120     OpsL.push_back(P.first);
2121     OpsH.push_back(P.second);
2122   }
2123 
2124   MVT ResTy = ty(Op);
2125   MVT HalfTy = typeSplit(ResTy).first;
2126   SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
2127   SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
2128   SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H);
2129   return S;
2130 }
2131 
2132 SDValue
2133 HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
2134   auto *MemN = cast<MemSDNode>(Op.getNode());
2135 
2136   MVT MemTy = MemN->getMemoryVT().getSimpleVT();
2137   if (!isHvxPairTy(MemTy))
2138     return Op;
2139 
2140   const SDLoc &dl(Op);
2141   unsigned HwLen = Subtarget.getVectorLength();
2142   MVT SingleTy = typeSplit(MemTy).first;
2143   SDValue Chain = MemN->getChain();
2144   SDValue Base0 = MemN->getBasePtr();
2145   SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl);
2146   unsigned MemOpc = MemN->getOpcode();
2147 
2148   MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
2149   if (MachineMemOperand *MMO = MemN->getMemOperand()) {
2150     MachineFunction &MF = DAG.getMachineFunction();
2151     uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
2152                            ? (uint64_t)MemoryLocation::UnknownSize
2153                            : HwLen;
2154     MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
2155     MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
2156   }
2157 
2158   if (MemOpc == ISD::LOAD) {
2159     assert(cast<LoadSDNode>(Op)->isUnindexed());
2160     SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
2161     SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
2162     return DAG.getMergeValues(
2163         { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
2164           DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2165                       Load0.getValue(1), Load1.getValue(1)) }, dl);
2166   }
2167   if (MemOpc == ISD::STORE) {
2168     assert(cast<StoreSDNode>(Op)->isUnindexed());
2169     VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
2170     SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
2171     SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
2172     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
2173   }
2174 
2175   assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
2176 
2177   auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
2178   assert(MaskN->isUnindexed());
2179   VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
2180   SDValue Offset = DAG.getUNDEF(MVT::i32);
2181 
2182   if (MemOpc == ISD::MLOAD) {
2183     VectorPair Thru =
2184         opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
2185     SDValue MLoad0 =
2186         DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
2187                           Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
2188                           ISD::NON_EXTLOAD, false);
2189     SDValue MLoad1 =
2190         DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
2191                           Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
2192                           ISD::NON_EXTLOAD, false);
2193     return DAG.getMergeValues(
2194         { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
2195           DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2196                       MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
2197   }
2198   if (MemOpc == ISD::MSTORE) {
2199     VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
2200     SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
2201                                          Masks.first, SingleTy, MOp0,
2202                                          ISD::UNINDEXED, false, false);
2203     SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
2204                                          Masks.second, SingleTy, MOp1,
2205                                          ISD::UNINDEXED, false, false);
2206     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
2207   }
2208 
2209   std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
2210   llvm_unreachable(Name.c_str());
2211 }
2212 
2213 SDValue
2214 HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
2215   const SDLoc &dl(Op);
2216   auto *LoadN = cast<LoadSDNode>(Op.getNode());
2217   assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
2218   assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
2219          "Not widening loads of i1 yet");
2220 
2221   SDValue Chain = LoadN->getChain();
2222   SDValue Base = LoadN->getBasePtr();
2223   SDValue Offset = DAG.getUNDEF(MVT::i32);
2224 
2225   MVT ResTy = ty(Op);
2226   unsigned HwLen = Subtarget.getVectorLength();
2227   unsigned ResLen = ResTy.getStoreSize();
2228   assert(ResLen < HwLen && "vsetq(v1) prerequisite");
2229 
2230   MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2231   SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
2232                           {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
2233 
2234   MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
2235   MachineFunction &MF = DAG.getMachineFunction();
2236   auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
2237 
2238   SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
2239                                    DAG.getUNDEF(LoadTy), LoadTy, MemOp,
2240                                    ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
2241   SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
2242   return DAG.getMergeValues({Value, Chain}, dl);
2243 }
2244 
2245 SDValue
2246 HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
2247   const SDLoc &dl(Op);
2248   auto *StoreN = cast<StoreSDNode>(Op.getNode());
2249   assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
2250   assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
2251          "Not widening stores of i1 yet");
2252 
2253   SDValue Chain = StoreN->getChain();
2254   SDValue Base = StoreN->getBasePtr();
2255   SDValue Offset = DAG.getUNDEF(MVT::i32);
2256 
2257   SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
2258   MVT ValueTy = ty(Value);
2259   unsigned ValueLen = ValueTy.getVectorNumElements();
2260   unsigned HwLen = Subtarget.getVectorLength();
2261   assert(isPowerOf2_32(ValueLen));
2262 
2263   for (unsigned Len = ValueLen; Len < HwLen; ) {
2264     Value = opJoin({DAG.getUNDEF(ty(Value)), Value}, dl, DAG);
2265     Len = ty(Value).getVectorNumElements(); // This is Len *= 2
2266   }
2267   assert(ty(Value).getVectorNumElements() == HwLen);  // Paranoia
2268 
2269   assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
2270   MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2271   SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
2272                           {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
2273   MachineFunction &MF = DAG.getMachineFunction();
2274   auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
2275   return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
2276                             MemOp, ISD::UNINDEXED, false, false);
2277 }
2278 
2279 SDValue
2280 HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
2281   const SDLoc &dl(Op);
2282   SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2283   MVT ElemTy = ty(Op0).getVectorElementType();
2284   unsigned HwLen = Subtarget.getVectorLength();
2285 
2286   unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
2287   assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
2288   MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
2289   if (!Subtarget.isHVXVectorType(WideOpTy, true))
2290     return SDValue();
2291 
2292   SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
2293   SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
2294   EVT ResTy =
2295       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
2296   SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
2297                               {WideOp0, WideOp1, Op.getOperand(2)});
2298 
2299   EVT RetTy = getTypeToTransformTo(*DAG.getContext(), ty(Op));
2300   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
2301                      {SetCC, getZero(dl, MVT::i32, DAG)});
2302 }
2303 
2304 SDValue
2305 HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2306   const SDLoc &dl(Op);
2307   unsigned HwWidth = 8*Subtarget.getVectorLength();
2308 
2309   SDValue Op0 = Op.getOperand(0);
2310   MVT ResTy = ty(Op);
2311   MVT OpTy = ty(Op0);
2312   if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
2313     return SDValue();
2314 
2315   // .-res, op->      ScalarVec  Illegal      HVX
2316   // Scalar                  ok        -        -
2317   // Illegal      widen(insert)    widen        -
2318   // HVX                      -    widen       ok
2319 
2320   auto getFactor = [HwWidth](MVT Ty) {
2321     unsigned Width = Ty.getSizeInBits();
2322     return HwWidth > Width ? HwWidth / Width : 1;
2323   };
2324 
2325   auto getWideTy = [getFactor](MVT Ty) {
2326     unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
2327     return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
2328   };
2329 
2330   unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK
2331                                                        : HexagonISD::VUNPACKU;
2332   SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
2333   SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp);
2334   return WideRes;
2335 }
2336 
2337 SDValue
2338 HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
2339   const SDLoc &dl(Op);
2340   unsigned HwWidth = 8*Subtarget.getVectorLength();
2341 
2342   SDValue Op0 = Op.getOperand(0);
2343   MVT ResTy = ty(Op);
2344   MVT OpTy = ty(Op0);
2345   if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
2346     return SDValue();
2347 
2348   // .-res, op->  ScalarVec         Illegal      HVX
2349   // Scalar              ok  extract(widen)        -
2350   // Illegal              -           widen    widen
2351   // HVX                  -               -       ok
2352 
2353   auto getFactor = [HwWidth](MVT Ty) {
2354     unsigned Width = Ty.getSizeInBits();
2355     assert(HwWidth % Width == 0);
2356     return HwWidth / Width;
2357   };
2358 
2359   auto getWideTy = [getFactor](MVT Ty) {
2360     unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
2361     return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
2362   };
2363 
2364   if (Subtarget.isHVXVectorType(OpTy))
2365     return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0);
2366 
2367   assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?");
2368 
2369   SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
2370   SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy),
2371                                 WideOp);
2372   // If the original result wasn't legal and was supposed to be widened,
2373   // we're done.
2374   if (shouldWidenToHvx(ResTy, DAG))
2375     return WideRes;
2376 
2377   // The original result type wasn't meant to be widened to HVX, so
2378   // leave it as it is. Standard legalization should be able to deal
2379   // with it (since now it's a result of a target-idendependent ISD
2380   // node).
2381   assert(ResTy.isVector());
2382   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy,
2383                      {WideRes, getZero(dl, MVT::i32, DAG)});
2384 }
2385 
2386 SDValue
2387 HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
2388   unsigned Opc = Op.getOpcode();
2389   bool IsPairOp = isHvxPairTy(ty(Op)) ||
2390                   llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
2391                     return isHvxPairTy(ty(V));
2392                   });
2393 
2394   if (IsPairOp) {
2395     switch (Opc) {
2396       default:
2397         break;
2398       case ISD::LOAD:
2399       case ISD::STORE:
2400       case ISD::MLOAD:
2401       case ISD::MSTORE:
2402         return SplitHvxMemOp(Op, DAG);
2403       case ISD::SINT_TO_FP:
2404       case ISD::UINT_TO_FP:
2405       case ISD::FP_TO_SINT:
2406       case ISD::FP_TO_UINT:
2407         if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
2408           return SplitHvxPairOp(Op, DAG);
2409         break;
2410       case ISD::CTPOP:
2411       case ISD::CTLZ:
2412       case ISD::CTTZ:
2413       case ISD::MUL:
2414       case ISD::FADD:
2415       case ISD::FSUB:
2416       case ISD::FMUL:
2417       case ISD::FMINNUM:
2418       case ISD::FMAXNUM:
2419       case ISD::MULHS:
2420       case ISD::MULHU:
2421       case ISD::AND:
2422       case ISD::OR:
2423       case ISD::XOR:
2424       case ISD::SRA:
2425       case ISD::SHL:
2426       case ISD::SRL:
2427       case ISD::SMIN:
2428       case ISD::SMAX:
2429       case ISD::UMIN:
2430       case ISD::UMAX:
2431       case ISD::SETCC:
2432       case ISD::VSELECT:
2433       case ISD::SIGN_EXTEND:
2434       case ISD::ZERO_EXTEND:
2435       case ISD::SIGN_EXTEND_INREG:
2436       case ISD::SPLAT_VECTOR:
2437         return SplitHvxPairOp(Op, DAG);
2438     }
2439   }
2440 
2441   switch (Opc) {
2442     default:
2443       break;
2444     case ISD::BUILD_VECTOR:            return LowerHvxBuildVector(Op, DAG);
2445     case ISD::SPLAT_VECTOR:            return LowerHvxSplatVector(Op, DAG);
2446     case ISD::CONCAT_VECTORS:          return LowerHvxConcatVectors(Op, DAG);
2447     case ISD::INSERT_SUBVECTOR:        return LowerHvxInsertSubvector(Op, DAG);
2448     case ISD::INSERT_VECTOR_ELT:       return LowerHvxInsertElement(Op, DAG);
2449     case ISD::EXTRACT_SUBVECTOR:       return LowerHvxExtractSubvector(Op, DAG);
2450     case ISD::EXTRACT_VECTOR_ELT:      return LowerHvxExtractElement(Op, DAG);
2451     case ISD::BITCAST:                 return LowerHvxBitcast(Op, DAG);
2452     case ISD::ANY_EXTEND:              return LowerHvxAnyExt(Op, DAG);
2453     case ISD::SIGN_EXTEND:             return LowerHvxSignExt(Op, DAG);
2454     case ISD::ZERO_EXTEND:             return LowerHvxZeroExt(Op, DAG);
2455     case ISD::CTTZ:                    return LowerHvxCttz(Op, DAG);
2456     case ISD::SELECT:                  return LowerHvxSelect(Op, DAG);
2457     case ISD::SRA:
2458     case ISD::SHL:
2459     case ISD::SRL:                     return LowerHvxShift(Op, DAG);
2460     case ISD::MULHS:
2461     case ISD::MULHU:                   return LowerHvxMulh(Op, DAG);
2462     case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
2463     case ISD::SETCC:
2464     case ISD::INTRINSIC_VOID:          return Op;
2465     case ISD::INTRINSIC_WO_CHAIN:      return LowerHvxIntrinsic(Op, DAG);
2466     case ISD::MLOAD:
2467     case ISD::MSTORE:                  return LowerHvxMaskedOp(Op, DAG);
2468     // Unaligned loads will be handled by the default lowering.
2469     case ISD::LOAD:                    return SDValue();
2470     case ISD::FP_EXTEND:               return LowerHvxFpExtend(Op, DAG);
2471     case ISD::FP_TO_SINT:
2472     case ISD::FP_TO_UINT:
2473     case ISD::SINT_TO_FP:
2474     case ISD::UINT_TO_FP:              return LowerHvxConvertFpInt(Op, DAG);
2475   }
2476 #ifndef NDEBUG
2477   Op.dumpr(&DAG);
2478 #endif
2479   llvm_unreachable("Unhandled HVX operation");
2480 }
2481 
2482 void
2483 HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
2484       SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2485   unsigned Opc = N->getOpcode();
2486   SDValue Op(N, 0);
2487 
2488   switch (Opc) {
2489     case ISD::ANY_EXTEND:
2490     case ISD::SIGN_EXTEND:
2491     case ISD::ZERO_EXTEND:
2492       if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) {
2493         if (SDValue T = WidenHvxExtend(Op, DAG))
2494           Results.push_back(T);
2495       }
2496       break;
2497     case ISD::SETCC:
2498       if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) {
2499         if (SDValue T = WidenHvxSetCC(Op, DAG))
2500           Results.push_back(T);
2501       }
2502       break;
2503     case ISD::TRUNCATE:
2504       if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) {
2505         if (SDValue T = WidenHvxTruncate(Op, DAG))
2506           Results.push_back(T);
2507       }
2508       break;
2509     case ISD::STORE: {
2510       if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
2511         SDValue Store = WidenHvxStore(Op, DAG);
2512         Results.push_back(Store);
2513       }
2514       break;
2515     }
2516     case ISD::MLOAD:
2517       if (isHvxPairTy(ty(Op))) {
2518         SDValue S = SplitHvxMemOp(Op, DAG);
2519         assert(S->getOpcode() == ISD::MERGE_VALUES);
2520         Results.push_back(S.getOperand(0));
2521         Results.push_back(S.getOperand(1));
2522       }
2523       break;
2524     case ISD::MSTORE:
2525       if (isHvxPairTy(ty(Op->getOperand(1)))) {    // Stored value
2526         SDValue S = SplitHvxMemOp(Op, DAG);
2527         Results.push_back(S);
2528       }
2529       break;
2530     default:
2531       break;
2532   }
2533 }
2534 
2535 void
2536 HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
2537       SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2538   unsigned Opc = N->getOpcode();
2539   SDValue Op(N, 0);
2540   switch (Opc) {
2541     case ISD::ANY_EXTEND:
2542     case ISD::SIGN_EXTEND:
2543     case ISD::ZERO_EXTEND:
2544       if (shouldWidenToHvx(ty(Op), DAG)) {
2545         if (SDValue T = WidenHvxExtend(Op, DAG))
2546           Results.push_back(T);
2547       }
2548       break;
2549     case ISD::SETCC:
2550       if (shouldWidenToHvx(ty(Op), DAG)) {
2551         if (SDValue T = WidenHvxSetCC(Op, DAG))
2552           Results.push_back(T);
2553       }
2554       break;
2555     case ISD::TRUNCATE:
2556       if (shouldWidenToHvx(ty(Op), DAG)) {
2557         if (SDValue T = WidenHvxTruncate(Op, DAG))
2558           Results.push_back(T);
2559       }
2560       break;
2561     case ISD::LOAD: {
2562       if (shouldWidenToHvx(ty(Op), DAG)) {
2563         SDValue Load = WidenHvxLoad(Op, DAG);
2564         assert(Load->getOpcode() == ISD::MERGE_VALUES);
2565         Results.push_back(Load.getOperand(0));
2566         Results.push_back(Load.getOperand(1));
2567       }
2568       break;
2569     }
2570     case ISD::BITCAST:
2571       if (isHvxBoolTy(ty(N->getOperand(0)))) {
2572         SDValue Op(N, 0);
2573         SDValue C = LowerHvxBitcast(Op, DAG);
2574         Results.push_back(C);
2575       }
2576       break;
2577     default:
2578       break;
2579   }
2580 }
2581 
2582 SDValue
2583 HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
2584       const {
2585   const SDLoc &dl(N);
2586   SelectionDAG &DAG = DCI.DAG;
2587   SDValue Op(N, 0);
2588   unsigned Opc = Op.getOpcode();
2589   if (DCI.isBeforeLegalizeOps())
2590     return SDValue();
2591 
2592   SmallVector<SDValue, 4> Ops(N->ops().begin(), N->ops().end());
2593 
2594   switch (Opc) {
2595     case ISD::VSELECT: {
2596       // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
2597       SDValue Cond = Ops[0];
2598       if (Cond->getOpcode() == ISD::XOR) {
2599         SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
2600         if (C1->getOpcode() == HexagonISD::QTRUE)
2601           return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
2602       }
2603       break;
2604     }
2605     case HexagonISD::V2Q:
2606       if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
2607         if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
2608           return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
2609                              : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
2610       }
2611       break;
2612     case HexagonISD::Q2V:
2613       if (Ops[0].getOpcode() == HexagonISD::QTRUE)
2614         return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
2615                            DAG.getConstant(-1, dl, MVT::i32));
2616       if (Ops[0].getOpcode() == HexagonISD::QFALSE)
2617         return getZero(dl, ty(Op), DAG);
2618       break;
2619     case HexagonISD::VINSERTW0:
2620       if (isUndef(Ops[1]))
2621         return Ops[0];;
2622       break;
2623     case HexagonISD::VROR: {
2624       if (Ops[0].getOpcode() == HexagonISD::VROR) {
2625         SDValue Vec = Ops[0].getOperand(0);
2626         SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
2627         SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
2628         return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
2629       }
2630       break;
2631     }
2632   }
2633 
2634   return SDValue();
2635 }
2636 
2637 bool
2638 HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
2639   auto Action = getPreferredHvxVectorAction(Ty);
2640   if (Action == TargetLoweringBase::TypeWidenVector) {
2641     EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2642     assert(WideTy.isSimple());
2643     return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true);
2644   }
2645   return false;
2646 }
2647 
2648 bool
2649 HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
2650   if (!Subtarget.useHVXOps())
2651     return false;
2652   // If the type of any result, or any operand type are HVX vector types,
2653   // this is an HVX operation.
2654   auto IsHvxTy = [this](EVT Ty) {
2655     return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
2656   };
2657   auto IsHvxOp = [this](SDValue Op) {
2658     return Op.getValueType().isSimple() &&
2659            Subtarget.isHVXVectorType(ty(Op), true);
2660   };
2661   if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
2662     return true;
2663 
2664   // Check if this could be an HVX operation after type widening.
2665   auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
2666     if (!Op.getValueType().isSimple())
2667       return false;
2668     MVT ValTy = ty(Op);
2669     return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
2670   };
2671 
2672   for (int i = 0, e = N->getNumValues(); i != e; ++i) {
2673     if (IsWidenedToHvx(SDValue(N, i)))
2674       return true;
2675   }
2676   return llvm::any_of(N->ops(), IsWidenedToHvx);
2677 }
2678