1//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10//-----------------------------------
11// Vector Specific
12//-----------------------------------
13
14//
15// All vector instructions derive from NVPTXVecInst
16//
17
18class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern,
19  NVPTXInst sInst=NOP>
20  : NVPTXInst<outs, ins, asmstr, pattern> {
21  NVPTXInst scalarInst=sInst;
22}
23
24let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
25// Extract v2i16
26def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
27  (ins V2I16Regs:$src, i8imm:$c),
28                         "mov.u16 \t$dst, $src${c:vecelem};",
29                         [(set Int16Regs:$dst, (vector_extract
30                           (v2i16 V2I16Regs:$src), imm:$c))],
31                         IMOV16rr>;
32
33// Extract v4i16
34def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
35  (ins V4I16Regs:$src, i8imm:$c),
36                         "mov.u16 \t$dst, $src${c:vecelem};",
37                         [(set Int16Regs:$dst, (vector_extract
38                           (v4i16 V4I16Regs:$src), imm:$c))],
39                         IMOV16rr>;
40
41// Extract v2i8
42def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
43  (ins V2I8Regs:$src, i8imm:$c),
44                         "mov.u16 \t$dst, $src${c:vecelem};",
45                         [(set Int8Regs:$dst, (vector_extract
46                           (v2i8 V2I8Regs:$src), imm:$c))],
47                         IMOV8rr>;
48
49// Extract v4i8
50def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
51  (ins V4I8Regs:$src, i8imm:$c),
52                         "mov.u16 \t$dst, $src${c:vecelem};",
53                         [(set Int8Regs:$dst, (vector_extract
54                           (v4i8 V4I8Regs:$src), imm:$c))],
55                         IMOV8rr>;
56
57// Extract v2i32
58def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
59  (ins V2I32Regs:$src, i8imm:$c),
60                         "mov.u32 \t$dst, $src${c:vecelem};",
61                         [(set Int32Regs:$dst, (vector_extract
62                           (v2i32 V2I32Regs:$src), imm:$c))],
63                         IMOV32rr>;
64
65// Extract v2f32
66def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
67  (ins V2F32Regs:$src, i8imm:$c),
68                         "mov.f32 \t$dst, $src${c:vecelem};",
69                         [(set Float32Regs:$dst, (vector_extract
70                           (v2f32 V2F32Regs:$src), imm:$c))],
71                         FMOV32rr>;
72
73// Extract v2i64
74def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
75  (ins V2I64Regs:$src, i8imm:$c),
76                         "mov.u64 \t$dst, $src${c:vecelem};",
77                         [(set Int64Regs:$dst, (vector_extract
78                           (v2i64 V2I64Regs:$src), imm:$c))],
79                         IMOV64rr>;
80
81// Extract v2f64
82def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
83  (ins V2F64Regs:$src, i8imm:$c),
84                         "mov.f64 \t$dst, $src${c:vecelem};",
85                         [(set Float64Regs:$dst, (vector_extract
86                           (v2f64 V2F64Regs:$src), imm:$c))],
87                         FMOV64rr>;
88
89// Extract v4i32
90def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
91  (ins V4I32Regs:$src, i8imm:$c),
92                         "mov.u32 \t$dst, $src${c:vecelem};",
93                         [(set Int32Regs:$dst, (vector_extract
94                           (v4i32 V4I32Regs:$src), imm:$c))],
95                         IMOV32rr>;
96
97// Extract v4f32
98def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
99  (ins V4F32Regs:$src, i8imm:$c),
100                         "mov.f32 \t$dst, $src${c:vecelem};",
101                         [(set Float32Regs:$dst, (vector_extract
102                           (v4f32 V4F32Regs:$src), imm:$c))],
103                         FMOV32rr>;
104}
105
106let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in {
107// Insert v2i8
108def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
109  (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c),
110        "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
111        "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
112       [(set V2I8Regs:$dst,
113         (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
114                         IMOV8rr>;
115
116// Insert v4i8
117def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
118  (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c),
119                       "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
120                       "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
121       [(set V4I8Regs:$dst,
122         (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
123                         IMOV8rr>;
124
125// Insert v2i16
126def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
127  (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c),
128                       "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
129                       "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
130       [(set V2I16Regs:$dst,
131         (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
132                         IMOV16rr>;
133
134// Insert v4i16
135def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
136  (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c),
137                       "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
138                       "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
139       [(set V4I16Regs:$dst,
140         (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
141                         IMOV16rr>;
142
143// Insert v2i32
144def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
145  (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c),
146                       "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
147                       "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
148       [(set V2I32Regs:$dst,
149         (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
150                         IMOV32rr>;
151
152// Insert v2f32
153def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
154  (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c),
155                       "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
156                       "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
157       [(set V2F32Regs:$dst,
158         (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
159                         FMOV32rr>;
160
161// Insert v2i64
162def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
163  (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c),
164                       "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
165                       "\n\tmov.u64 \t$dst${c:vecelem}, $val;",
166       [(set V2I64Regs:$dst,
167         (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
168                         IMOV64rr>;
169
170// Insert v2f64
171def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
172  (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c),
173                       "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
174                       "\n\tmov.f64 \t$dst${c:vecelem}, $val;",
175       [(set V2F64Regs:$dst,
176         (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
177                         FMOV64rr>;
178
179// Insert v4i32
180def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
181  (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c),
182                       "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
183                       "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
184       [(set V4I32Regs:$dst,
185         (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
186                         IMOV32rr>;
187
188// Insert v4f32
189def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
190  (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c),
191                       "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
192                       "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
193       [(set V4F32Regs:$dst,
194         (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
195                         FMOV32rr>;
196}
197
198class BinOpAsmString<string c> {
199  string s = c;
200}
201
202class V4AsmStr<string opcode> : BinOpAsmString<
203                          !strconcat(!strconcat(!strconcat(!strconcat(
204                            !strconcat(!strconcat(!strconcat(
205                          opcode,  " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
206                          opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"),
207                          opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"),
208                          opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>;
209
210class V2AsmStr<string opcode> : BinOpAsmString<
211                           !strconcat(!strconcat(!strconcat(
212                           opcode,  " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
213                           opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>;
214
215class V4MADStr<string opcode> : BinOpAsmString<
216                          !strconcat(!strconcat(!strconcat(!strconcat(
217                            !strconcat(!strconcat(!strconcat(
218                          opcode,  " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
219                          opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"),
220                          opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"),
221                          opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>;
222
223class V2MADStr<string opcode> : BinOpAsmString<
224                           !strconcat(!strconcat(!strconcat(
225                           opcode,  " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
226                           opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>;
227
228class V4UnaryStr<string opcode> : BinOpAsmString<
229                          !strconcat(!strconcat(!strconcat(!strconcat(
230                            !strconcat(!strconcat(!strconcat(
231                          opcode,  " \t${dst}_0, ${a}_0;\n\t"),
232                          opcode), " \t${dst}_1, ${a}_1;\n\t"),
233                          opcode), " \t${dst}_2, ${a}_2;\n\t"),
234                          opcode), " \t${dst}_3, ${a}_3;")>;
235
236class V2UnaryStr<string opcode> : BinOpAsmString<
237                           !strconcat(!strconcat(!strconcat(
238                           opcode,  " \t${dst}_0, ${a}_0;\n\t"),
239                           opcode), " \t${dst}_1, ${a}_1;")>;
240
241class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass,
242  NVPTXInst sInst=NOP> :
243      NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b),
244                 asmstr.s,
245                 [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))],
246                 sInst>;
247
248class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1,
249                 NVPTXRegClass regclass2, NVPTXInst sInst=NOP> :
250      NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b),
251                 asmstr.s,
252                 [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))],
253                 sInst>;
254
255class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass,
256  NVPTXInst sInst=NOP> :
257      NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a),
258                 asmstr.s,
259                 [(set regclass:$dst, (OpNode regclass:$a))], sInst>;
260
261multiclass IntBinVOp<string asmstr, SDNode OpNode,
262                     NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst
263                     i16op=NOP, NVPTXInst i8op=NOP> {
264  def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs,
265    i64op>;
266  def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs,
267    i32op>;
268  def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs,
269    i32op>;
270  def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs,
271    i16op>;
272  def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs,
273    i16op>;
274  def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs,
275    i8op>;
276  def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs,
277    i8op>;
278}
279
280multiclass FloatBinVOp<string asmstr, SDNode OpNode,
281                       NVPTXInst f64=NOP, NVPTXInst f32=NOP,
282                       NVPTXInst f32_ftz=NOP> {
283  def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode,
284    V2F64Regs, f64>;
285  def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
286    V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
287  def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
288    V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
289  def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode,
290    V4F32Regs, f32>;
291  def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode,
292    V2F32Regs, f32>;
293}
294
295multiclass IntUnaryVOp<string asmstr, PatFrag OpNode,
296                       NVPTXInst i64op=NOP, NVPTXInst i32op=NOP,
297                       NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> {
298  def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode,
299    V2I64Regs, i64op>;
300  def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode,
301    V4I32Regs, i32op>;
302  def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode,
303    V2I32Regs, i32op>;
304  def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
305    V4I16Regs, i16op>;
306  def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
307    V2I16Regs, i16op>;
308  def V4I8  : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
309    V4I8Regs,   i8op>;
310  def V2I8  : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
311    V2I8Regs,   i8op>;
312}
313
314
315// Integer Arithmetic
316let VecInstType=isVecOther.Value in {
317defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>;
318defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>;
319
320def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs,
321  ADDCCi32rr>;
322def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs,
323  ADDCCi32rr>;
324def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs,
325  SUBCCi32rr>;
326def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs,
327  SUBCCi32rr>;
328def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs,
329  ADDCCCi32rr>;
330def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs,
331  ADDCCCi32rr>;
332def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs,
333  SUBCCCi32rr>;
334def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs,
335  SUBCCCi32rr>;
336
337def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs,
338  SHLi64rr>;
339def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs,
340  SHLi32rr>;
341def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs,
342  SHLi32rr>;
343def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs,
344  SHLi16rr>;
345def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs,
346  SHLi16rr>;
347def ShiftLV2I8  : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs,  V2I32Regs,
348  SHLi8rr>;
349def ShiftLV4I8  : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs,  V4I32Regs,
350  SHLi8rr>;
351}
352
353// cvt to v*i32, helpers for shift
354class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr,
355  NVPTXInst sInst=NOP> :
356      NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>;
357
358class VecCVTStrHelper<string op, string dest, string src> {
359  string s=!strconcat(op, !strconcat("\t",
360           !strconcat(dest, !strconcat(", ", !strconcat(src, ";")))));
361}
362
363class Vec2CVTStr<string op> {
364  string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
365           !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s));
366}
367
368class Vec4CVTStr<string op> {
369  string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
370           !strconcat("\n\t",
371           !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s,
372           !strconcat("\n\t",
373           !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s,
374           !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s))))));
375}
376
377let VecInstType=isVecOther.Value in {
378def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs,
379  Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
380def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs,
381  Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
382def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs,
383  Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
384def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs,
385  Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
386def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs,
387  Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>;
388}
389
390def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2),
391          (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
392def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2),
393          (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
394def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2),
395          (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
396
397def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2),
398          (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
399def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2),
400          (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
401
402let VecInstType=isVecOther.Value in {
403def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs,
404  SRAi64rr>;
405def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs,
406  SRAi32rr>;
407def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs,
408  SRAi32rr>;
409def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs,
410  SRAi16rr>;
411def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs,
412  SRAi16rr>;
413def ShiftRAV2I8  : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs,  V2I32Regs,
414  SRAi8rr>;
415def ShiftRAV4I8  : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs,  V4I32Regs,
416  SRAi8rr>;
417
418def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs,
419  SRLi64rr>;
420def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs,
421  SRLi32rr>;
422def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs,
423  SRLi32rr>;
424def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs,
425  SRLi16rr>;
426def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs,
427  SRLi16rr>;
428def ShiftRLV2I8  : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs,  V2I32Regs,
429  SRLi8rr>;
430def ShiftRLV4I8  : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs,  V4I32Regs,
431  SRLi8rr>;
432
433defm VMult   : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr,
434  MULTi8rr>;
435defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr,
436  MULTHSi16rr,
437  MULTHSi8rr>;
438defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr,
439  MULTHUi16rr,
440  MULTHUi8rr>;
441defm VSDiv   : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr,
442  SDIVi8rr>;
443defm VUDiv   : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr,
444  UDIVi8rr>;
445defm VSRem   : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr,
446  SREMi8rr>;
447defm VURem   : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr,
448  UREMi8rr>;
449}
450
451def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2),
452          (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
453def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2),
454          (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
455def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2),
456          (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
457
458def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2),
459          (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
460def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2),
461          (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
462
463def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2),
464          (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
465def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2),
466          (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
467def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2),
468          (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
469
470def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2),
471          (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
472def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2),
473          (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
474
475multiclass VMAD<string asmstr, NVPTXRegClass regclassv4,
476  NVPTXRegClass regclassv2,
477                SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP,
478                Predicate Pred> {
479  def V4 : NVPTXVecInst<(outs regclassv4:$dst),
480    (ins regclassv4:$a, regclassv4:$b, regclassv4:$c),
481                      V4MADStr<asmstr>.s,
482                      [(set regclassv4:$dst,
483                        (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))],
484                      sop>,
485           Requires<[Pred]>;
486  def V2 : NVPTXVecInst<(outs regclassv2:$dst),
487    (ins regclassv2:$a, regclassv2:$b, regclassv2:$c),
488                      V2MADStr<asmstr>.s,
489                      [(set regclassv2:$dst,
490                        (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))],
491                      sop>,
492           Requires<[Pred]>;
493}
494
495multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
496  Predicate Pred> {
497  def V2 : NVPTXVecInst<(outs regclass:$dst),
498    (ins regclass:$a, regclass:$b, regclass:$c),
499                      V2MADStr<asmstr>.s,
500                      [(set regclass:$dst, (add
501                        (mul regclass:$a, regclass:$b), regclass:$c))], sop>,
502           Requires<[Pred]>;
503}
504multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
505  Predicate Pred> {
506  def V2 : NVPTXVecInst<(outs regclass:$dst),
507    (ins regclass:$a, regclass:$b, regclass:$c),
508                      V2MADStr<asmstr>.s,
509                      [(set regclass:$dst, (fadd
510                        (fmul regclass:$a, regclass:$b), regclass:$c))], sop>,
511           Requires<[Pred]>;
512}
513
514let VecInstType=isVecOther.Value in {
515defm I8MAD  : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>;
516defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr,
517  true>;
518defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr,
519  true>;
520defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>;
521
522defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>;
523
524defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>;
525defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>;
526defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>;
527
528defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
529  FMAD32_ftzrrr, doFMADF32_ftz>;
530defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
531  FMA32_ftzrrr, doFMAF32_ftz>;
532defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr,
533  doFMADF32>;
534defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr,
535  doFMAF32>;
536defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>;
537}
538
539let VecInstType=isVecOther.Value in {
540def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs,
541  FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
542def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs,
543  FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
544def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs,
545  FDIV32rr_prec>, Requires<[reqPTX20]>;
546def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs,
547  FDIV32rr_prec>, Requires<[reqPTX20]>;
548def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs,
549  FDIV32rr_ftz>, Requires<[doF32FTZ]>;
550def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs,
551  FDIV32rr_ftz>, Requires<[doF32FTZ]>;
552def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>;
553def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>;
554def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>;
555}
556
557def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>;
558
559let VecInstType=isVecOther.Value in {
560def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs,
561  FNEGf32_ftz>, Requires<[doF32FTZ]>;
562def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs,
563  FNEGf32_ftz>, Requires<[doF32FTZ]>;
564def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>;
565def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>;
566def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>;
567
568// Logical Arithmetic
569defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>;
570defm VOr  : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>;
571defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>;
572
573defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>;
574}
575
576
577multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
578  def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)),
579          (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c,  V2F32Regs:$a)>,
580          Requires<[Pred]>;
581
582  def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c),
583          (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>,
584          Requires<[Pred]>;
585}
586
587defm V2FMAF32ext_ftz  : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>;
588defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>;
589defm V2FMAF32ext  : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>;
590defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>;
591
592multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
593  def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)),
594          (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c,  V4F32Regs:$a)>,
595          Requires<[Pred]>;
596
597  def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c),
598          (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>,
599          Requires<[Pred]>;
600}
601
602defm V4FMAF32ext_ftz  : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>;
603defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>;
604defm V4FMAF32ext  : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>;
605defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>;
606
607multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
608  def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)),
609          (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>,
610          Requires<[Pred]>;
611
612  def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c),
613          (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>,
614          Requires<[Pred]>;
615}
616
617defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>;
618
619class VecModStr<string vecsize, string elem, string extra, string l="">
620{
621  string t1 = !strconcat("${c", elem);
622  string t2 = !strconcat(t1, ":vecv");
623  string t3 = !strconcat(t2, vecsize);
624  string t4 = !strconcat(t3, extra);
625  string t5 = !strconcat(t4, l);
626  string s =  !strconcat(t5, "}");
627}
628class ShuffleOneLine<string vecsize, string elem, string type>
629{
630  string t1 = VecModStr<vecsize, elem, "comm", "1">.s;
631  string t2 = !strconcat(t1, "mov.");
632  string t3 = !strconcat(t2, type);
633  string t4 = !strconcat(t3, " \t${dst}_");
634  string t5 = !strconcat(t4, elem);
635  string t6 = !strconcat(t5, ", $src1");
636  string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s);
637  string t8 = !strconcat(t7, ";\n\t");
638  string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s);
639  string t10 = !strconcat(t9, "mov.");
640  string t11 = !strconcat(t10, type);
641  string t12 = !strconcat(t11, " \t${dst}_");
642  string t13 = !strconcat(t12, elem);
643  string t14 = !strconcat(t13, ", $src2");
644  string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s);
645  string s =   !strconcat(t15, ";");
646}
647class ShuffleAsmStr2<string type>
648{
649  string t1 = ShuffleOneLine<"2", "0", type>.s;
650  string t2 = !strconcat(t1, "\n\t");
651  string s  = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s);
652}
653class ShuffleAsmStr4<string type>
654{
655  string t1 = ShuffleOneLine<"4", "0", type>.s;
656  string t2 = !strconcat(t1, "\n\t");
657  string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s);
658  string t4 = !strconcat(t3, "\n\t");
659  string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s);
660  string t6 = !strconcat(t5, "\n\t");
661  string s  = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
662}
663
664let hasSideEffects=0, VecInstType=isVecShuffle.Value in {
665def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
666                       (ins  V4F32Regs:$src1, V4F32Regs:$src2,
667                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
668                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
669                                 ShuffleAsmStr4<"f32">.s),
670                       [], FMOV32rr>;
671
672def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst),
673                       (ins  V4I32Regs:$src1, V4I32Regs:$src2,
674                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
675                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
676                                 ShuffleAsmStr4<"u32">.s),
677                       [], IMOV32rr>;
678
679def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst),
680                       (ins  V4I16Regs:$src1, V4I16Regs:$src2,
681                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
682                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
683                                 ShuffleAsmStr4<"u16">.s),
684                       [], IMOV16rr>;
685
686def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst),
687                       (ins  V4I8Regs:$src1, V4I8Regs:$src2,
688                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
689                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
690                                 ShuffleAsmStr4<"u16">.s),
691                       [], IMOV8rr>;
692
693def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst),
694                       (ins  V2F32Regs:$src1, V2F32Regs:$src2,
695                             i8imm:$c0, i8imm:$c1),
696                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
697                                 ShuffleAsmStr2<"f32">.s),
698                       [], FMOV32rr>;
699
700def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst),
701                       (ins  V2I32Regs:$src1, V2I32Regs:$src2,
702                             i8imm:$c0, i8imm:$c1),
703                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
704                                 ShuffleAsmStr2<"u32">.s),
705                       [], IMOV32rr>;
706
707def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst),
708                       (ins  V2I8Regs:$src1, V2I8Regs:$src2,
709                             i8imm:$c0, i8imm:$c1),
710                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
711                                 ShuffleAsmStr2<"u16">.s),
712                       [], IMOV8rr>;
713
714def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst),
715                       (ins  V2I16Regs:$src1, V2I16Regs:$src2,
716                             i8imm:$c0, i8imm:$c1),
717                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
718                                 ShuffleAsmStr2<"u16">.s),
719                       [], IMOV16rr>;
720
721def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst),
722                       (ins  V2F64Regs:$src1, V2F64Regs:$src2,
723                             i8imm:$c0, i8imm:$c1),
724                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
725                                 ShuffleAsmStr2<"f64">.s),
726                       [], FMOV64rr>;
727
728def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
729                       (ins  V2I64Regs:$src1, V2I64Regs:$src2,
730                             i8imm:$c0, i8imm:$c1),
731                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
732                                 ShuffleAsmStr2<"u64">.s),
733                       [], IMOV64rr>;
734}
735
736def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
737  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
738  return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32);
739}]>;
740def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
741  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
742  return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32);
743}]>;
744def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
745  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
746  return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32);
747}]>;
748def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
749  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
750  return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32);
751}]>;
752
753// The spurious call is here to silence a compiler warning about N being
754// unused.
755def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs),
756                       (vector_shuffle node:$lhs, node:$rhs),
757                       [{ N->getGluedNode(); return true; }]>;
758
759def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)),
760          (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2,
761                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
762
763def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)),
764          (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2,
765                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
766                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
767
768def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)),
769          (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2,
770                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
771
772def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)),
773          (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2,
774                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
775
776def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)),
777          (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2,
778                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
779                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
780
781def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)),
782          (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2,
783                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
784
785def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)),
786          (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2,
787                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
788                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
789
790def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)),
791          (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2,
792                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
793
794def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)),
795          (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2,
796                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
797                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
798
799def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)),
800          (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2,
801                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
802
803class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
804  NVPTXInst si>
805                   : NVPTXVecInst<(outs vclass:$dst),
806                   (ins  sclass:$a1, sclass:$a2),
807                   !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"),
808                   [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))],
809                   si>;
810class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
811  NVPTXInst si>
812                   : NVPTXVecInst<(outs vclass:$dst),
813                   (ins  sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4),
814               !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"),
815                   [(set vclass:$dst,
816                     (build_vector sclass:$a1, sclass:$a2,
817                       sclass:$a3, sclass:$a4))], si>;
818
819let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in {
820def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs,
821  FMOV32rr>;
822def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs,
823  FMOV64rr>;
824
825def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs,
826  IMOV32rr>;
827def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs,
828  IMOV64rr>;
829def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs,
830  IMOV16rr>;
831def Build_Vector2_i8  : Build_Vector2<"mov.v2.u16",  V2I8Regs,  Int8Regs,
832  IMOV8rr>;
833
834def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs,
835  FMOV32rr>;
836
837def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs,
838  IMOV32rr>;
839def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs,
840  IMOV16rr>;
841def Build_Vector4_i8  : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs,
842  IMOV8rr>;
843}
844
845class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
846                 : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src),
847                   !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
848                   [], sop>;
849
850let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1,
851  VecInstType=isVecOther.Value in {
852def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
853def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
854
855def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>;
856def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>;
857
858def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>;
859def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>;
860
861def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>;
862def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>;
863
864def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>;
865def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>;
866}
867
868// extract subvector patterns
869def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
870                        SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>;
871
872def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)),
873                 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0),
874                                    (V4f32Extract V4F32Regs:$src, 1))>;
875def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)),
876                 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2),
877                                    (V4f32Extract V4F32Regs:$src, 3))>;
878def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)),
879                 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0),
880                                    (V4i32Extract V4I32Regs:$src, 1))>;
881def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)),
882                 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2),
883                                    (V4i32Extract V4I32Regs:$src, 3))>;
884def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)),
885                 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0),
886                                    (V4i16Extract V4I16Regs:$src, 1))>;
887def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)),
888                 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2),
889                                    (V4i16Extract V4I16Regs:$src, 3))>;
890def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)),
891                 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0),
892                                    (V4i8Extract V4I8Regs:$src, 1))>;
893def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)),
894                 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2),
895                                    (V4i8Extract V4I8Regs:$src, 3))>;
896
897// Select instructions
898class Select_OneLine<string type, string pos> {
899  string t1 = !strconcat("selp.", type);
900  string t2 = !strconcat(t1, " \t${dst}_");
901  string t3 = !strconcat(t2, pos);
902  string t4 = !strconcat(t3, ", ${src1}_");
903  string t5 = !strconcat(t4, pos);
904  string t6 = !strconcat(t5, ", ${src2}_");
905  string t7 = !strconcat(t6, pos);
906  string s  = !strconcat(t7, ", $p;");
907}
908
909class Select_Str2<string type> {
910  string t1 = Select_OneLine<type, "0">.s;
911  string t2 = !strconcat(t1, "\n\t");
912  string s  = !strconcat(t2, Select_OneLine<type, "1">.s);
913}
914
915class Select_Str4<string type> {
916  string t1 = Select_OneLine<type, "0">.s;
917  string t2 = !strconcat(t1, "\n\t");
918  string t3 = !strconcat(t2, Select_OneLine<type, "1">.s);
919  string t4 = !strconcat(t3, "\n\t");
920  string t5 = !strconcat(t4, Select_OneLine<type, "2">.s);
921  string t6 = !strconcat(t5, "\n\t");
922  string s  = !strconcat(t6, Select_OneLine<type, "3">.s);
923
924}
925
926class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop>
927      : NVPTXVecInst<(outs vclass:$dst),
928                     (ins  vclass:$src1, vclass:$src2, Int1Regs:$p),
929                     asmstr,
930                     [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1,
931                       vclass:$src2))],
932                     sop>;
933
934let VecInstType=isVecOther.Value in {
935def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>;
936def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>;
937def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>;
938def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>;
939def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>;
940def V4I8_Select  : Vec_Select<V4I8Regs,  Select_Str4<"b16">.s, SELECTi8rr>;
941def V2I8_Select  : Vec_Select<V2I8Regs,  Select_Str2<"b16">.s, SELECTi8rr>;
942
943def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>;
944def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>;
945def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>;
946}
947
948// Comparison instructions
949
950// setcc convenience fragments.
951def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs),
952                      (setcc node:$lhs, node:$rhs, SETOEQ)>;
953def vsetogt : PatFrag<(ops node:$lhs, node:$rhs),
954                      (setcc node:$lhs, node:$rhs, SETOGT)>;
955def vsetoge : PatFrag<(ops node:$lhs, node:$rhs),
956                      (setcc node:$lhs, node:$rhs, SETOGE)>;
957def vsetolt : PatFrag<(ops node:$lhs, node:$rhs),
958                      (setcc node:$lhs, node:$rhs, SETOLT)>;
959def vsetole : PatFrag<(ops node:$lhs, node:$rhs),
960                      (setcc node:$lhs, node:$rhs, SETOLE)>;
961def vsetone : PatFrag<(ops node:$lhs, node:$rhs),
962                      (setcc node:$lhs, node:$rhs, SETONE)>;
963def vseto   : PatFrag<(ops node:$lhs, node:$rhs),
964                      (setcc node:$lhs, node:$rhs, SETO)>;
965def vsetuo  : PatFrag<(ops node:$lhs, node:$rhs),
966                      (setcc node:$lhs, node:$rhs, SETUO)>;
967def vsetueq : PatFrag<(ops node:$lhs, node:$rhs),
968                      (setcc node:$lhs, node:$rhs, SETUEQ)>;
969def vsetugt : PatFrag<(ops node:$lhs, node:$rhs),
970                      (setcc node:$lhs, node:$rhs, SETUGT)>;
971def vsetuge : PatFrag<(ops node:$lhs, node:$rhs),
972                      (setcc node:$lhs, node:$rhs, SETUGE)>;
973def vsetult : PatFrag<(ops node:$lhs, node:$rhs),
974                      (setcc node:$lhs, node:$rhs, SETULT)>;
975def vsetule : PatFrag<(ops node:$lhs, node:$rhs),
976                      (setcc node:$lhs, node:$rhs, SETULE)>;
977def vsetune : PatFrag<(ops node:$lhs, node:$rhs),
978                      (setcc node:$lhs, node:$rhs, SETUNE)>;
979def vseteq  : PatFrag<(ops node:$lhs, node:$rhs),
980                      (setcc node:$lhs, node:$rhs, SETEQ)>;
981def vsetgt  : PatFrag<(ops node:$lhs, node:$rhs),
982                      (setcc node:$lhs, node:$rhs, SETGT)>;
983def vsetge  : PatFrag<(ops node:$lhs, node:$rhs),
984                      (setcc node:$lhs, node:$rhs, SETGE)>;
985def vsetlt  : PatFrag<(ops node:$lhs, node:$rhs),
986                      (setcc node:$lhs, node:$rhs, SETLT)>;
987def vsetle  : PatFrag<(ops node:$lhs, node:$rhs),
988                      (setcc node:$lhs, node:$rhs, SETLE)>;
989def vsetne  : PatFrag<(ops node:$lhs, node:$rhs),
990                      (setcc node:$lhs, node:$rhs, SETNE)>;
991
992class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass,
993  NVPTXInst sop>
994    : NVPTXVecInst<(outs outrclass:$dst),
995                   (ins  inrclass:$a, inrclass:$b),
996                   "Unsupported",
997                   [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))],
998                   sop>;
999
1000multiclass Vec_Compare_All<PatFrag op,
1001                           NVPTXInst inst8,
1002                           NVPTXInst inst16,
1003                           NVPTXInst inst32,
1004                           NVPTXInst inst64>
1005{
1006  def  V2I8 : Vec_Compare<op, V2I8Regs,  V2I8Regs,  inst8>;
1007  def  V4I8 : Vec_Compare<op, V4I8Regs,  V4I8Regs,  inst8>;
1008  def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>;
1009  def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>;
1010  def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>;
1011  def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>;
1012  def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>;
1013}
1014
1015let VecInstType=isVecOther.Value in {
1016  defm VecSGT : Vec_Compare_All<vsetgt,  ISetSGTi8rr_toi8, ISetSGTi16rr_toi16,
1017    ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>;
1018  defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16,
1019    ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>;
1020  defm VecSLT : Vec_Compare_All<vsetlt,  ISetSLTi8rr_toi8, ISetSLTi16rr_toi16,
1021    ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>;
1022  defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16,
1023    ISetULTi32rr_toi32, ISetULTi64rr_toi64>;
1024  defm VecSGE : Vec_Compare_All<vsetge,  ISetSGEi8rr_toi8, ISetSGEi16rr_toi16,
1025    ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>;
1026  defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16,
1027    ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>;
1028  defm VecSLE : Vec_Compare_All<vsetle,  ISetSLEi8rr_toi8, ISetSLEi16rr_toi16,
1029    ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>;
1030  defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16,
1031    ISetULEi32rr_toi32, ISetULEi64rr_toi64>;
1032  defm VecSEQ : Vec_Compare_All<vseteq,  ISetSEQi8rr_toi8, ISetSEQi16rr_toi16,
1033    ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>;
1034  defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16,
1035    ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>;
1036  defm VecSNE : Vec_Compare_All<vsetne,  ISetSNEi8rr_toi8, ISetSNEi16rr_toi16,
1037    ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>;
1038  defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16,
1039    ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>;
1040}
1041
1042multiclass FVec_Compare_All<PatFrag op,
1043                            NVPTXInst instf32,
1044                            NVPTXInst instf64>
1045{
1046  def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>;
1047  def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>;
1048  def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>;
1049}
1050
1051let VecInstType=isVecOther.Value in {
1052  defm FVecGT :  FVec_Compare_All<vsetogt, FSetGTf32rr_toi32,
1053    FSetGTf64rr_toi64>;
1054  defm FVecLT :  FVec_Compare_All<vsetolt, FSetLTf32rr_toi32,
1055    FSetLTf64rr_toi64>;
1056  defm FVecGE :  FVec_Compare_All<vsetoge, FSetGEf32rr_toi32,
1057    FSetGEf64rr_toi64>;
1058  defm FVecLE :  FVec_Compare_All<vsetole, FSetLEf32rr_toi32,
1059    FSetLEf64rr_toi64>;
1060  defm FVecEQ :  FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32,
1061    FSetEQf64rr_toi64>;
1062  defm FVecNE :  FVec_Compare_All<vsetone, FSetNEf32rr_toi32,
1063    FSetNEf64rr_toi64>;
1064
1065  defm FVecUGT :  FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32,
1066    FSetUGTf64rr_toi64>;
1067  defm FVecULT :  FVec_Compare_All<vsetult, FSetULTf32rr_toi32,
1068    FSetULTf64rr_toi64>;
1069  defm FVecUGE :  FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32,
1070    FSetUGEf64rr_toi64>;
1071  defm FVecULE :  FVec_Compare_All<vsetule, FSetULEf32rr_toi32,
1072    FSetULEf64rr_toi64>;
1073  defm FVecUEQ :  FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32,
1074    FSetUEQf64rr_toi64>;
1075  defm FVecUNE :  FVec_Compare_All<vsetune, FSetUNEf32rr_toi32,
1076    FSetUNEf64rr_toi64>;
1077
1078  defm FVecNUM :  FVec_Compare_All<vseto,  FSetNUMf32rr_toi32,
1079    FSetNUMf64rr_toi64>;
1080  defm FVecNAN :  FVec_Compare_All<vsetuo, FSetNANf32rr_toi32,
1081    FSetNANf64rr_toi64>;
1082}
1083
1084class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
1085      NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4),
1086                (ins i32imm:$a, i32imm:$b),
1087                !strconcat(!strconcat("ld.param", opstr),
1088                  "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>;
1089
1090class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
1091      NVPTXInst<(outs regclass:$d1, regclass:$d2),
1092                (ins i32imm:$a, i32imm:$b),
1093                !strconcat(!strconcat("ld.param", opstr),
1094                  "\t{{$d1, $d2}}, [retval0+$b];"), []>;
1095
1096
1097class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
1098      NVPTXInst<(outs),
1099                (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
1100                  i32imm:$a, i32imm:$b),
1101                !strconcat(!strconcat("st.param", opstr),
1102                  "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>;
1103
1104class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
1105      NVPTXInst<(outs),
1106                (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b),
1107                !strconcat(!strconcat("st.param", opstr),
1108                  "\t[param$a+$b], {{$s1, $s2}};"), []>;
1109
1110class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> :
1111      NVPTXInst<(outs),
1112                (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
1113                  i32imm:$a),
1114                !strconcat(!strconcat("st.param", opstr),
1115                  "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>;
1116
1117class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> :
1118      NVPTXInst<(outs),
1119                (ins regclass:$s1, regclass:$s2, i32imm:$a),
1120                !strconcat(!strconcat("st.param", opstr),
1121                  "\t[func_retval+$a], {{$s1, $s2}};"), []>;
1122
1123def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">;
1124def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">;
1125def LoadParamScalar4I8  : LoadParamScalar4Inst<Int8Regs, ".v4.b8">;
1126
1127def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">;
1128def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">;
1129def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">;
1130def LoadParamScalar2I8  : LoadParamScalar2Inst<Int32Regs, ".v2.b8">;
1131
1132def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">;
1133def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">;
1134def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">;
1135
1136def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">;
1137def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">;
1138def StoreParamScalar4I8  : StoreParamScalar4Inst<Int8Regs, ".v4.b8">;
1139
1140def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">;
1141def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">;
1142def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">;
1143def StoreParamScalar2I8  : StoreParamScalar2Inst<Int8Regs, ".v2.b8">;
1144
1145def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">;
1146def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">;
1147def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">;
1148
1149def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">;
1150def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">;
1151def StoreRetvalScalar4I8  : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">;
1152
1153def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">;
1154def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">;
1155def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">;
1156def StoreRetvalScalar2I8  : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">;
1157
1158def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">;
1159def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">;
1160def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">;
1161
1162class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>:
1163      NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b),
1164                "loadparam : $dst <- [$a, $b]",
1165                [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))],
1166                sop>;
1167
1168class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>
1169      : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1170                "storeparam : [$a, $b] <- $val",
1171                [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>;
1172
1173class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr,
1174  NVPTXInst sop=NOP>
1175      : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a),
1176                "storeretval : retval[$a] <- $val",
1177                [(StoreRetval (i32 imm:$a), regclass:$val)], sop>;
1178
1179let VecInstType=isVecLD.Value in {
1180def LoadParamV4I32  : LoadParamVecInst<V4I32Regs, ".v4.b32",
1181  LoadParamScalar4I32>;
1182def LoadParamV4I16  : LoadParamVecInst<V4I16Regs, ".v4.b16",
1183  LoadParamScalar4I16>;
1184def LoadParamV4I8   : LoadParamVecInst<V4I8Regs, ".v4.b8",
1185  LoadParamScalar4I8>;
1186
1187def LoadParamV2I64  : LoadParamVecInst<V2I64Regs, ".v2.b64",
1188  LoadParamScalar2I64>;
1189def LoadParamV2I32  : LoadParamVecInst<V2I32Regs, ".v2.b32",
1190  LoadParamScalar2I32>;
1191def LoadParamV2I16  : LoadParamVecInst<V2I16Regs, ".v2.b16",
1192  LoadParamScalar2I16>;
1193def LoadParamV2I8   : LoadParamVecInst<V2I8Regs, ".v2.b8",
1194  LoadParamScalar2I8>;
1195
1196def LoadParamV4F32  : LoadParamVecInst<V4F32Regs, ".v4.f32",
1197  LoadParamScalar4F32>;
1198def LoadParamV2F32  : LoadParamVecInst<V2F32Regs, ".v2.f32",
1199  LoadParamScalar2F32>;
1200def LoadParamV2F64  : LoadParamVecInst<V2F64Regs, ".v2.f64",
1201  LoadParamScalar2F64>;
1202}
1203
1204let VecInstType=isVecST.Value in {
1205def StoreParamV4I32  : StoreParamVecInst<V4I32Regs, ".v4.b32",
1206  StoreParamScalar4I32>;
1207def StoreParamV4I16  : StoreParamVecInst<V4I16Regs, ".v4.b16",
1208  StoreParamScalar4I16>;
1209def StoreParamV4I8   : StoreParamVecInst<V4I8Regs, ".v4.b8",
1210  StoreParamScalar4I8>;
1211
1212def StoreParamV2I64  : StoreParamVecInst<V2I64Regs, ".v2.b64",
1213  StoreParamScalar2I64>;
1214def StoreParamV2I32  : StoreParamVecInst<V2I32Regs, ".v2.b32",
1215  StoreParamScalar2I32>;
1216def StoreParamV2I16  : StoreParamVecInst<V2I16Regs, ".v2.b16",
1217  StoreParamScalar2I16>;
1218def StoreParamV2I8   : StoreParamVecInst<V2I8Regs, ".v2.b8",
1219  StoreParamScalar2I8>;
1220
1221def StoreParamV4F32  : StoreParamVecInst<V4F32Regs, ".v4.f32",
1222  StoreParamScalar4F32>;
1223def StoreParamV2F32  : StoreParamVecInst<V2F32Regs, ".v2.f32",
1224  StoreParamScalar2F32>;
1225def StoreParamV2F64  : StoreParamVecInst<V2F64Regs, ".v2.f64",
1226  StoreParamScalar2F64>;
1227
1228def StoreRetvalV4I32  : StoreRetvalVecInst<V4I32Regs, ".v4.b32",
1229  StoreRetvalScalar4I32>;
1230def StoreRetvalV4I16  : StoreRetvalVecInst<V4I16Regs, ".v4.b16",
1231  StoreRetvalScalar4I16>;
1232def StoreRetvalV4I8   : StoreRetvalVecInst<V4I8Regs,  ".v4.b8",
1233  StoreRetvalScalar4I8>;
1234
1235def StoreRetvalV2I64  : StoreRetvalVecInst<V2I64Regs, ".v2.b64",
1236  StoreRetvalScalar2I64>;
1237def StoreRetvalV2I32  : StoreRetvalVecInst<V2I32Regs, ".v2.b32",
1238  StoreRetvalScalar2I32>;
1239def StoreRetvalV2I16  : StoreRetvalVecInst<V2I16Regs, ".v2.b16",
1240  StoreRetvalScalar2I16>;
1241def StoreRetvalV2I8   : StoreRetvalVecInst<V2I8Regs,  ".v2.b8",
1242  StoreRetvalScalar2I8>;
1243
1244def StoreRetvalV4F32  : StoreRetvalVecInst<V4F32Regs, ".v4.f32",
1245  StoreRetvalScalar4F32>;
1246def StoreRetvalV2F32  : StoreRetvalVecInst<V2F32Regs, ".v2.f32",
1247  StoreRetvalScalar2F32>;
1248def StoreRetvalV2F64  : StoreRetvalVecInst<V2F64Regs, ".v2.f64",
1249  StoreRetvalScalar2F64>;
1250
1251}
1252
1253
1254// Int vector to int scalar bit convert
1255// v4i8 -> i32
1256def : Pat<(i32 (bitconvert V4I8Regs:$s)),
1257          (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1258                     (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>;
1259// v4i16 -> i64
1260def : Pat<(i64 (bitconvert V4I16Regs:$s)),
1261          (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
1262            (V4i16Extract V4I16Regs:$s,1),
1263                     (V4i16Extract V4I16Regs:$s,2),
1264                     (V4i16Extract V4I16Regs:$s,3))>;
1265// v2i8 -> i16
1266def : Pat<(i16 (bitconvert V2I8Regs:$s)),
1267          (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>;
1268// v2i16 -> i32
1269def : Pat<(i32 (bitconvert V2I16Regs:$s)),
1270          (V2I16toI32 (V2i16Extract V2I16Regs:$s,0),
1271            (V2i16Extract V2I16Regs:$s,1))>;
1272// v2i32 -> i64
1273def : Pat<(i64 (bitconvert V2I32Regs:$s)),
1274          (V2I32toI64 (V2i32Extract V2I32Regs:$s,0),
1275            (V2i32Extract V2I32Regs:$s,1))>;
1276
1277// Int scalar to int vector bit convert
1278let VecInstType=isVecDest.Value in {
1279// i32 -> v4i8
1280def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s),
1281                                "Error!",
1282                                [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))],
1283                                I32toV4I8>;
1284// i64 -> v4i16
1285def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s),
1286                                 "Error!",
1287                                [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))],
1288                                 I64toV4I16>;
1289// i16 -> v2i8
1290def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s),
1291                                "Error!",
1292                               [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))],
1293                                I16toV2I8>;
1294// i32 -> v2i16
1295def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s),
1296                                 "Error!",
1297                                [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))],
1298                                 I32toV2I16>;
1299// i64 -> v2i32
1300def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s),
1301                                  "Error!",
1302                                [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))],
1303                                  I64toV2I32>;
1304}
1305
1306// Int vector to int vector bit convert
1307// v4i8 -> v2i16
1308def : Pat<(v2i16 (bitconvert V4I8Regs:$s)),
1309          (VecI32toV2I16
1310          (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1311                    (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
1312// v4i16 -> v2i32
1313def : Pat<(v2i32 (bitconvert V4I16Regs:$s)),
1314          (VecI64toV2I32
1315       (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
1316                (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
1317// v2i16 -> v4i8
1318def : Pat<(v4i8 (bitconvert V2I16Regs:$s)),
1319          (VecI32toV4I8
1320    (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
1321// v2i32 -> v4i16
1322def : Pat<(v4i16 (bitconvert V2I32Regs:$s)),
1323          (VecI64toV4I16
1324    (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
1325// v2i64 -> v4i32
1326def : Pat<(v4i32 (bitconvert V2I64Regs:$s)),
1327          (Build_Vector4_i32
1328            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0),
1329            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1),
1330            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0),
1331            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>;
1332// v4i32 -> v2i64
1333def : Pat<(v2i64 (bitconvert V4I32Regs:$s)),
1334          (Build_Vector2_i64
1335      (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)),
1336    (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>;
1337
1338// Fp scalar to fp vector convert
1339// f64 -> v2f32
1340let VecInstType=isVecDest.Value in {
1341def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s),
1342                                  "Error!",
1343                              [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))],
1344                                  F64toV2F32>;
1345}
1346
1347// Fp vector to fp scalar convert
1348// v2f32 -> f64
1349def : Pat<(f64 (bitconvert V2F32Regs:$s)),
1350     (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>;
1351
1352// Fp scalar to int vector convert
1353// f32 -> v4i8
1354def : Pat<(v4i8 (bitconvert Float32Regs:$s)),
1355          (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>;
1356// f32 -> v2i16
1357def : Pat<(v2i16 (bitconvert Float32Regs:$s)),
1358          (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>;
1359// f64 -> v4i16
1360def : Pat<(v4i16 (bitconvert Float64Regs:$s)),
1361          (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>;
1362// f64 -> v2i32
1363def : Pat<(v2i32 (bitconvert Float64Regs:$s)),
1364          (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>;
1365
1366// Int vector to fp scalar convert
1367// v4i8 -> f32
1368def : Pat<(f32 (bitconvert V4I8Regs:$s)),
1369          (BITCONVERT_32_I2F
1370          (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1371                    (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
1372// v4i16 -> f64
1373def : Pat<(f64 (bitconvert V4I16Regs:$s)),
1374          (BITCONVERT_64_I2F
1375       (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
1376                (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
1377// v2i16 -> f32
1378def : Pat<(f32 (bitconvert V2I16Regs:$s)),
1379          (BITCONVERT_32_I2F
1380    (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
1381// v2i32 -> f64
1382def : Pat<(f64 (bitconvert V2I32Regs:$s)),
1383          (BITCONVERT_64_I2F
1384    (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
1385
1386// Int scalar to fp vector convert
1387// i64 -> v2f32
1388def : Pat<(v2f32 (bitconvert Int64Regs:$s)),
1389          (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>;
1390
1391// Fp vector to int scalar convert
1392// v2f32 -> i64
1393def : Pat<(i64 (bitconvert V2F32Regs:$s)),
1394          (BITCONVERT_64_F2I
1395    (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>;
1396
1397// Int vector to fp vector convert
1398// v2i64 -> v4f32
1399def : Pat<(v4f32 (bitconvert V2I64Regs:$s)),
1400          (Build_Vector4_f32
1401            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1402              (V2i64Extract V2I64Regs:$s, 0)), 0)),
1403            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1404              (V2i64Extract V2I64Regs:$s, 0)), 1)),
1405            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1406              (V2i64Extract V2I64Regs:$s, 1)), 0)),
1407            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1408              (V2i64Extract V2I64Regs:$s, 1)), 1)))>;
1409// v2i64 -> v2f64
1410def : Pat<(v2f64 (bitconvert V2I64Regs:$s)),
1411    (Build_Vector2_f64
1412            (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)),
1413            (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>;
1414// v2i32 -> v2f32
1415def : Pat<(v2f32 (bitconvert V2I32Regs:$s)),
1416    (Build_Vector2_f32
1417            (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)),
1418            (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>;
1419// v4i32 -> v2f64
1420def : Pat<(v2f64 (bitconvert V4I32Regs:$s)),
1421          (Build_Vector2_f64
1422           (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0),
1423             (V4i32Extract V4I32Regs:$s,1))),
1424           (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2),
1425             (V4i32Extract V4I32Regs:$s,3))))>;
1426// v4i32 -> v4f32
1427def : Pat<(v4f32 (bitconvert V4I32Regs:$s)),
1428    (Build_Vector4_f32
1429            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)),
1430            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)),
1431            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)),
1432            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>;
1433// v4i16 -> v2f32
1434def : Pat<(v2f32 (bitconvert V4I16Regs:$s)),
1435          (VecF64toV2F32 (BITCONVERT_64_I2F
1436          (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
1437            (V4i16Extract V4I16Regs:$s,1),
1438                      (V4i16Extract V4I16Regs:$s,2),
1439                      (V4i16Extract V4I16Regs:$s,3))))>;
1440
1441// Fp vector to int vector convert
1442// v2i64 <- v4f32
1443def : Pat<(v2i64 (bitconvert V4F32Regs:$s)),
1444          (Build_Vector2_i64
1445           (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0),
1446             (V4f32Extract V4F32Regs:$s,1))),
1447           (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2),
1448             (V4f32Extract V4F32Regs:$s,3))))>;
1449// v2i64 <- v2f64
1450def : Pat<(v2i64 (bitconvert V2F64Regs:$s)),
1451    (Build_Vector2_i64
1452            (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)),
1453            (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>;
1454// v2i32 <- v2f32
1455def : Pat<(v2i32 (bitconvert V2F32Regs:$s)),
1456    (Build_Vector2_i32
1457            (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)),
1458            (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>;
1459// v4i32 <- v2f64
1460def : Pat<(v4i32 (bitconvert V2F64Regs:$s)),
1461          (Build_Vector4_i32
1462            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1463              (V2f64Extract V2F64Regs:$s, 0)), 0)),
1464            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1465              (V2f64Extract V2F64Regs:$s, 0)), 1)),
1466            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1467              (V2f64Extract V2F64Regs:$s, 1)), 0)),
1468            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1469              (V2f64Extract V2F64Regs:$s, 1)), 1)))>;
1470// v4i32 <- v4f32
1471def : Pat<(v4i32 (bitconvert V4F32Regs:$s)),
1472          (Build_Vector4_i32
1473            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)),
1474            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)),
1475            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)),
1476            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>;
1477// v4i16 <- v2f32
1478def : Pat<(v4i16 (bitconvert V2F32Regs:$s)),
1479          (VecI64toV4I16 (BITCONVERT_64_F2I
1480          (V2F32toF64 (V2f32Extract V2F32Regs:$s,0),
1481            (V2f32Extract V2F32Regs:$s,1))))>;
1482