1 /*
2  * Copyright 2020 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 #include "codegen/nv50_ir.h"
23 #include "codegen/nv50_ir_build_util.h"
24 
25 #include "codegen/nv50_ir_target_nvc0.h"
26 #include "codegen/nv50_ir_lowering_gv100.h"
27 
28 #include <limits>
29 
30 namespace nv50_ir {
31 
32 bool
handleCMP(Instruction * i)33 GV100LegalizeSSA::handleCMP(Instruction *i)
34 {
35    Value *pred = bld.getSSA(1, FILE_PREDICATE);
36 
37    bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred,
38              i->sType, bld.mkImm(0), i->getSrc(2))->ftz = i->ftz;
39    bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
40    return true;
41 }
42 
43 // NIR deals with most of these for us, but codegen generates more in pointer
44 // calculations from other lowering passes.
45 bool
handleIADD64(Instruction * i)46 GV100LegalizeSSA::handleIADD64(Instruction *i)
47 {
48    Value *carry = bld.getSSA(1, FILE_PREDICATE);
49    Value *def[2] = { bld.getSSA(), bld.getSSA() };
50    Value *src[2][2];
51 
52    for (int s = 0; s < 2; s++) {
53       if (i->getSrc(s)->reg.size == 8) {
54          bld.mkSplit(src[s], 4, i->getSrc(s));
55       } else {
56          src[s][0] = i->getSrc(s);
57          src[s][1] = bld.mkImm(0);
58       }
59    }
60 
61    bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])->
62       setFlagsDef(1, carry);
63    bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])->
64       setFlagsSrc(2, carry);
65    bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]);
66    return true;
67 }
68 
69 bool
handleIMAD_HIGH(Instruction * i)70 GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i)
71 {
72    Value *def = bld.getSSA(8), *defs[2];
73    Value *src2;
74 
75    if (i->srcExists(2) &&
76        (!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) {
77       Value *src2s[2] = { bld.getSSA(), bld.getSSA() };
78       bld.mkMov(src2s[0], bld.mkImm(0));
79       bld.mkMov(src2s[1], i->getSrc(2));
80       src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0);
81    } else {
82       src2 = bld.mkImm(0);
83    }
84 
85    bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def,
86              i->getSrc(0), i->getSrc(1), src2);
87 
88    bld.mkSplit(defs, 4, def);
89    i->def(0).replace(defs[1], false);
90    return true;
91 }
92 
93 // XXX: We should be able to do this in GV100LoweringPass, but codegen messes
94 //      up somehow and swaps the condcode without swapping the sources.
95 //      - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test
96 bool
handleIMNMX(Instruction * i)97 GV100LegalizeSSA::handleIMNMX(Instruction *i)
98 {
99    Value *pred = bld.getSSA(1, FILE_PREDICATE);
100 
101    bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred,
102              i->sType, i->getSrc(0), i->getSrc(1));
103    bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
104    return true;
105 }
106 
107 bool
handleIMUL(Instruction * i)108 GV100LegalizeSSA::handleIMUL(Instruction *i)
109 {
110    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
111       return handleIMAD_HIGH(i);
112 
113    bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
114              bld.mkImm(0));
115    return true;
116 }
117 
118 bool
handleLOP2(Instruction * i)119 GV100LegalizeSSA::handleLOP2(Instruction *i)
120 {
121    uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0;
122    uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1;
123    uint8_t subOp;
124 
125    if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
126       src0 = ~src0;
127    if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
128       src1 = ~src1;
129 
130    switch (i->op) {
131    case OP_AND: subOp = src0 & src1; break;
132    case OP_OR : subOp = src0 | src1; break;
133    case OP_XOR: subOp = src0 ^ src1; break;
134    default:
135       unreachable("invalid LOP2 opcode");
136    }
137 
138    bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1),
139              bld.mkImm(0))->subOp = subOp;
140    return true;
141 }
142 
143 bool
handleNOT(Instruction * i)144 GV100LegalizeSSA::handleNOT(Instruction *i)
145 {
146    bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0),
147              bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1;
148    return true;
149 }
150 
151 bool
handlePREEX2(Instruction * i)152 GV100LegalizeSSA::handlePREEX2(Instruction *i)
153 {
154    i->def(0).replace(i->src(0), false);
155    return true;
156 }
157 
158 bool
handleQUADON(Instruction * i)159 GV100LegalizeSSA::handleQUADON(Instruction *i)
160 {
161    bld.mkBMov(i->getDef(0), bld.mkTSVal(TS_MACTIVE));
162    Instruction *b = bld.mkBMov(bld.mkTSVal(TS_PQUAD_MACTIVE), i->getDef(0));
163    b->fixed = 1;
164    return true;
165 }
166 
167 bool
handleQUADPOP(Instruction * i)168 GV100LegalizeSSA::handleQUADPOP(Instruction *i)
169 {
170    Instruction *b = bld.mkBMov(bld.mkTSVal(TS_MACTIVE), i->getSrc(0));
171    b->fixed = 1;
172    return true;
173 }
174 
175 bool
handleSET(Instruction * i)176 GV100LegalizeSSA::handleSET(Instruction *i)
177 {
178    Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
179    Value *pred = bld.getSSA(1, FILE_PREDICATE), *met;
180    Instruction *xsetp;
181 
182    if (isFloatType(i->dType)) {
183       if (i->sType == TYPE_F32)
184          return false; // HW has FSET.BF
185       met = bld.mkImm(0x3f800000);
186    } else {
187       met = bld.mkImm(0xffffffff);
188    }
189 
190    xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType,
191                      i->getSrc(0), i->getSrc(1));
192    xsetp->src(0).mod = i->src(0).mod;
193    xsetp->src(1).mod = i->src(1).mod;
194    xsetp->setSrc(2, src2);
195    xsetp->ftz = i->ftz;
196 
197    i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred);
198    i->src(2).mod = Modifier(NV50_IR_MOD_NOT);
199    return true;
200 }
201 
202 bool
handleSHFL(Instruction * i)203 GV100LegalizeSSA::handleSHFL(Instruction *i)
204 {
205    Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE);
206    sync->fixed = 1;
207    sync->setSrc(0, bld.mkImm(0xffffffff));
208    i->bb->insertBefore(i, sync);
209    return false;
210 }
211 
212 bool
handleShift(Instruction * i)213 GV100LegalizeSSA::handleShift(Instruction *i)
214 {
215    Value *zero = bld.mkImm(0);
216    Value *src1 = i->getSrc(1);
217    Value *src0, *src2;
218    uint8_t subOp = i->op == OP_SHL ? NV50_IR_SUBOP_SHF_L : NV50_IR_SUBOP_SHF_R;
219 
220    if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) {
221       src0 = i->getSrc(0);
222       src2 = zero;
223    } else {
224       src0 = zero;
225       src2 = i->getSrc(0);
226       subOp |= NV50_IR_SUBOP_SHF_HI;
227    }
228    if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)
229       subOp |= NV50_IR_SUBOP_SHF_W;
230 
231    bld.mkOp3(OP_SHF, i->dType, i->getDef(0), src0, src1, src2)->subOp = subOp;
232    return true;
233 }
234 
235 bool
handleSUB(Instruction * i)236 GV100LegalizeSSA::handleSUB(Instruction *i)
237 {
238    Instruction *xadd =
239       bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1));
240    xadd->src(0).mod = i->src(0).mod;
241    xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
242    xadd->ftz = i->ftz;
243    return true;
244 }
245 
246 bool
visit(Instruction * i)247 GV100LegalizeSSA::visit(Instruction *i)
248 {
249    bool lowered = false;
250 
251    bld.setPosition(i, false);
252    if (i->sType == TYPE_F32 && i->dType != TYPE_F16 &&
253        prog->getType() != Program::TYPE_COMPUTE)
254       handleFTZ(i);
255 
256    switch (i->op) {
257    case OP_AND:
258    case OP_OR:
259    case OP_XOR:
260       if (i->def(0).getFile() != FILE_PREDICATE)
261          lowered = handleLOP2(i);
262       break;
263    case OP_NOT:
264       lowered = handleNOT(i);
265       break;
266    case OP_SHL:
267    case OP_SHR:
268       lowered = handleShift(i);
269       break;
270    case OP_SET:
271    case OP_SET_AND:
272    case OP_SET_OR:
273    case OP_SET_XOR:
274       if (i->def(0).getFile() != FILE_PREDICATE)
275          lowered = handleSET(i);
276       break;
277    case OP_SLCT:
278       lowered = handleCMP(i);
279       break;
280    case OP_PREEX2:
281       lowered = handlePREEX2(i);
282       break;
283    case OP_MUL:
284       if (!isFloatType(i->dType))
285          lowered = handleIMUL(i);
286       break;
287    case OP_MAD:
288       if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH)
289          lowered = handleIMAD_HIGH(i);
290       break;
291    case OP_SHFL:
292       lowered = handleSHFL(i);
293       break;
294    case OP_QUADON:
295       lowered = handleQUADON(i);
296       break;
297    case OP_QUADPOP:
298       lowered = handleQUADPOP(i);
299       break;
300    case OP_SUB:
301       lowered = handleSUB(i);
302       break;
303    case OP_MAX:
304    case OP_MIN:
305       if (!isFloatType(i->dType))
306          lowered = handleIMNMX(i);
307       break;
308    case OP_ADD:
309       if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8)
310          lowered = handleIADD64(i);
311       break;
312    case OP_PFETCH:
313       handlePFETCH(i);
314       break;
315    case OP_LOAD:
316       handleLOAD(i);
317       break;
318    default:
319       break;
320    }
321 
322    if (lowered)
323       delete_Instruction(prog, i);
324 
325    return true;
326 }
327 
328 bool
handleDMNMX(Instruction * i)329 GV100LoweringPass::handleDMNMX(Instruction *i)
330 {
331    Value *pred = bld.getSSA(1, FILE_PREDICATE);
332    Value *src0[2], *src1[2], *dest[2];
333 
334    bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred,
335              i->sType, i->getSrc(0), i->getSrc(1));
336    bld.mkSplit(src0, 4, i->getSrc(0));
337    bld.mkSplit(src1, 4, i->getSrc(1));
338    bld.mkSplit(dest, 4, i->getDef(0));
339    bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred);
340    bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred);
341    bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]);
342    return true;
343 }
344 
345 bool
handleEXTBF(Instruction * i)346 GV100LoweringPass::handleEXTBF(Instruction *i)
347 {
348    Value *bit = bld.getScratch();
349    Value *cnt = bld.getScratch();
350    Value *mask = bld.getScratch();
351    Value *zero = bld.mkImm(0);
352 
353    bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
354    bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
355    bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt);
356    bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask);
357    bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit);
358    if (isSignedType(i->dType))
359       bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt);
360 
361    return true;
362 }
363 
364 bool
handleFLOW(Instruction * i)365 GV100LoweringPass::handleFLOW(Instruction *i)
366 {
367    i->op = OP_BRA;
368    return false;
369 }
370 
371 bool
handleI2I(Instruction * i)372 GV100LoweringPass::handleI2I(Instruction *i)
373 {
374    bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))->
375       subOp = i->subOp;
376    bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0));
377    return true;
378 }
379 
380 bool
handleINSBF(Instruction * i)381 GV100LoweringPass::handleINSBF(Instruction *i)
382 {
383    Value *bit = bld.getScratch();
384    Value *cnt = bld.getScratch();
385    Value *mask = bld.getScratch();
386    Value *src0 = bld.getScratch();
387    Value *zero = bld.mkImm(0);
388 
389    bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
390    bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
391    bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt);
392 
393    bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask);
394    bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit);
395 
396    bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit);
397    bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)->
398       subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c));
399 
400    return true;
401 }
402 
403 bool
handlePINTERP(Instruction * i)404 GV100LoweringPass::handlePINTERP(Instruction *i)
405 {
406    Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
407    Instruction *ipa, *mul;
408 
409    ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2);
410    ipa->ipa = i->ipa;
411    mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1));
412 
413    if (i->getInterpMode() == NV50_IR_INTERP_SC) {
414       ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE));
415       mul->setPredicate(CC_NOT_P, ipa->getDef(1));
416    }
417 
418    return true;
419 }
420 
421 bool
handlePREFLOW(Instruction * i)422 GV100LoweringPass::handlePREFLOW(Instruction *i)
423 {
424    return true;
425 }
426 
427 bool
handlePRESIN(Instruction * i)428 GV100LoweringPass::handlePRESIN(Instruction *i)
429 {
430    const float f = 1.0 / (2.0 * 3.14159265);
431    bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f));
432    return true;
433 }
434 
435 bool
visit(Instruction * i)436 GV100LoweringPass::visit(Instruction *i)
437 {
438    bool lowered = false;
439 
440    bld.setPosition(i, false);
441 
442    switch (i->op) {
443    case OP_BREAK:
444    case OP_CONT:
445       lowered = handleFLOW(i);
446       break;
447    case OP_PREBREAK:
448    case OP_PRECONT:
449       lowered = handlePREFLOW(i);
450       break;
451    case OP_CVT:
452       if (i->src(0).getFile() != FILE_PREDICATE &&
453           i->def(0).getFile() != FILE_PREDICATE &&
454           !isFloatType(i->dType) && !isFloatType(i->sType))
455          lowered = handleI2I(i);
456       break;
457    case OP_EXTBF:
458       lowered = handleEXTBF(i);
459       break;
460    case OP_INSBF:
461       lowered = handleINSBF(i);
462       break;
463    case OP_MAX:
464    case OP_MIN:
465       if (i->dType == TYPE_F64)
466          lowered = handleDMNMX(i);
467       break;
468    case OP_PINTERP:
469       lowered = handlePINTERP(i);
470       break;
471    case OP_PRESIN:
472       lowered = handlePRESIN(i);
473       break;
474    default:
475       break;
476    }
477 
478    if (lowered)
479       delete_Instruction(prog, i);
480 
481    return true;
482 }
483 
484 } // namespace nv50_ir
485