1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir_target_nvc0.h"
24 
25 namespace nv50_ir {
26 
27 // Argh, all these assertions ...
28 
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32    CodeEmitterNVC0(const TargetNVC0 *, Program::Type);
33 
34    virtual bool emitInstruction(Instruction *);
35    virtual uint32_t getMinEncodingSize(const Instruction *) const;
36    virtual void prepareEmission(Function *);
37 
38 private:
39    const TargetNVC0 *targNVC0;
40 
41    Program::Type progType;
42 
43    const bool writeIssueDelays;
44 
45 private:
46    void emitForm_A(const Instruction *, uint64_t);
47    void emitForm_B(const Instruction *, uint64_t);
48    void emitForm_S(const Instruction *, uint32_t, bool pred);
49 
50    void emitPredicate(const Instruction *);
51 
52    void setAddress16(const ValueRef&);
53    void setAddress24(const ValueRef&);
54    void setAddressByFile(const ValueRef&);
55    void setImmediate(const Instruction *, const int s); // needs op already set
56    void setImmediateS8(const ValueRef&);
57    void setSUConst16(const Instruction *, const int s);
58    void setSUPred(const Instruction *, const int s);
59    void setPDSTL(const Instruction *, const int d);
60 
61    void emitCondCode(CondCode cc, int pos);
62    void emitInterpMode(const Instruction *);
63    void emitLoadStoreType(DataType ty);
64    void emitSUGType(DataType);
65    void emitSUAddr(const TexInstruction *);
66    void emitSUDim(const TexInstruction *);
67    void emitCachingMode(CacheMode c);
68 
69    void emitShortSrc2(const ValueRef&);
70 
71    inline uint8_t getSRegEncoding(const ValueRef&);
72 
73    void roundMode_A(const Instruction *);
74    void roundMode_C(const Instruction *);
75    void roundMode_CS(const Instruction *);
76 
77    void emitNegAbs12(const Instruction *);
78 
79    void emitNOP(const Instruction *);
80 
81    void emitLOAD(const Instruction *);
82    void emitSTORE(const Instruction *);
83    void emitMOV(const Instruction *);
84    void emitATOM(const Instruction *);
85    void emitMEMBAR(const Instruction *);
86    void emitCCTL(const Instruction *);
87 
88    void emitINTERP(const Instruction *);
89    void emitAFETCH(const Instruction *);
90    void emitPFETCH(const Instruction *);
91    void emitVFETCH(const Instruction *);
92    void emitEXPORT(const Instruction *);
93    void emitOUT(const Instruction *);
94 
95    void emitUADD(const Instruction *);
96    void emitFADD(const Instruction *);
97    void emitDADD(const Instruction *);
98    void emitUMUL(const Instruction *);
99    void emitFMUL(const Instruction *);
100    void emitDMUL(const Instruction *);
101    void emitIMAD(const Instruction *);
102    void emitISAD(const Instruction *);
103    void emitSHLADD(const Instruction *a);
104    void emitFMAD(const Instruction *);
105    void emitDMAD(const Instruction *);
106    void emitMADSP(const Instruction *);
107 
108    void emitNOT(Instruction *);
109    void emitLogicOp(const Instruction *, uint8_t subOp);
110    void emitPOPC(const Instruction *);
111    void emitINSBF(const Instruction *);
112    void emitEXTBF(const Instruction *);
113    void emitBFIND(const Instruction *);
114    void emitPERMT(const Instruction *);
115    void emitShift(const Instruction *);
116 
117    void emitSFnOp(const Instruction *, uint8_t subOp);
118 
119    void emitCVT(Instruction *);
120    void emitMINMAX(const Instruction *);
121    void emitPreOp(const Instruction *);
122 
123    void emitSET(const CmpInstruction *);
124    void emitSLCT(const CmpInstruction *);
125    void emitSELP(const Instruction *);
126 
127    void emitTEXBAR(const Instruction *);
128    void emitTEX(const TexInstruction *);
129    void emitTEXCSAA(const TexInstruction *);
130    void emitTXQ(const TexInstruction *);
131 
132    void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
133 
134    void emitFlow(const Instruction *);
135    void emitBAR(const Instruction *);
136 
137    void emitSUCLAMPMode(uint16_t);
138    void emitSUCalc(Instruction *);
139    void emitSULDGB(const TexInstruction *);
140    void emitSUSTGx(const TexInstruction *);
141 
142    void emitSULDB(const TexInstruction *);
143    void emitSUSTx(const TexInstruction *);
144    void emitSULEA(const TexInstruction *);
145 
146    void emitVSHL(const Instruction *);
147    void emitVectorSubOp(const Instruction *);
148 
149    void emitPIXLD(const Instruction *);
150 
151    void emitSHFL(const Instruction *);
152 
153    void emitVOTE(const Instruction *);
154 
155    inline void defId(const ValueDef&, const int pos);
156    inline void defId(const Instruction *, int d, const int pos);
157    inline void srcId(const ValueRef&, const int pos);
158    inline void srcId(const ValueRef *, const int pos);
159    inline void srcId(const Instruction *, int s, const int pos);
160    inline void srcAddr32(const ValueRef&, int pos, int shr);
161 
162    inline bool isLIMM(const ValueRef&, DataType ty);
163 };
164 
165 // for better visibility
166 #define HEX64(h, l) 0x##h##l##ULL
167 
168 #define SDATA(a) ((a).rep()->reg.data)
169 #define DDATA(a) ((a).rep()->reg.data)
170 
srcId(const ValueRef & src,const int pos)171 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
172 {
173    code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
174 }
175 
srcId(const ValueRef * src,const int pos)176 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
177 {
178    code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
179 }
180 
srcId(const Instruction * insn,int s,int pos)181 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
182 {
183    int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
184    code[pos / 32] |= r << (pos % 32);
185 }
186 
187 void
srcAddr32(const ValueRef & src,int pos,int shr)188 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
189 {
190    const uint32_t offset = SDATA(src).offset >> shr;
191 
192    code[pos / 32] |= offset << (pos % 32);
193    if (pos && (pos < 32))
194       code[1] |= offset >> (32 - pos);
195 }
196 
defId(const ValueDef & def,const int pos)197 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
198 {
199    code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : 63) << (pos % 32);
200 }
201 
defId(const Instruction * insn,int d,const int pos)202 void CodeEmitterNVC0::defId(const Instruction *insn, int d, const int pos)
203 {
204    if (insn->defExists(d))
205       defId(insn->def(d), pos);
206    else
207       code[pos / 32] |= 63 << (pos % 32);
208 }
209 
isLIMM(const ValueRef & ref,DataType ty)210 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
211 {
212    const ImmediateValue *imm = ref.get()->asImm();
213 
214    if (ty == TYPE_F32)
215       return imm && imm->reg.data.u32 & 0xfff;
216    else
217       return imm && (imm->reg.data.s32 > 0x7ffff ||
218                      imm->reg.data.s32 < -0x80000);
219 }
220 
221 void
roundMode_A(const Instruction * insn)222 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
223 {
224    switch (insn->rnd) {
225    case ROUND_M: code[1] |= 1 << 23; break;
226    case ROUND_P: code[1] |= 2 << 23; break;
227    case ROUND_Z: code[1] |= 3 << 23; break;
228    default:
229       assert(insn->rnd == ROUND_N);
230       break;
231    }
232 }
233 
234 void
emitNegAbs12(const Instruction * i)235 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
236 {
237    if (i->src(1).mod.abs()) code[0] |= 1 << 6;
238    if (i->src(0).mod.abs()) code[0] |= 1 << 7;
239    if (i->src(1).mod.neg()) code[0] |= 1 << 8;
240    if (i->src(0).mod.neg()) code[0] |= 1 << 9;
241 }
242 
emitCondCode(CondCode cc,int pos)243 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
244 {
245    uint8_t val;
246 
247    switch (cc) {
248    case CC_LT:  val = 0x1; break;
249    case CC_LTU: val = 0x9; break;
250    case CC_EQ:  val = 0x2; break;
251    case CC_EQU: val = 0xa; break;
252    case CC_LE:  val = 0x3; break;
253    case CC_LEU: val = 0xb; break;
254    case CC_GT:  val = 0x4; break;
255    case CC_GTU: val = 0xc; break;
256    case CC_NE:  val = 0x5; break;
257    case CC_NEU: val = 0xd; break;
258    case CC_GE:  val = 0x6; break;
259    case CC_GEU: val = 0xe; break;
260    case CC_TR:  val = 0xf; break;
261    case CC_FL:  val = 0x0; break;
262 
263    case CC_A:  val = 0x14; break;
264    case CC_NA: val = 0x13; break;
265    case CC_S:  val = 0x15; break;
266    case CC_NS: val = 0x12; break;
267    case CC_C:  val = 0x16; break;
268    case CC_NC: val = 0x11; break;
269    case CC_O:  val = 0x17; break;
270    case CC_NO: val = 0x10; break;
271 
272    default:
273       val = 0;
274       assert(!"invalid condition code");
275       break;
276    }
277    code[pos / 32] |= val << (pos % 32);
278 }
279 
280 void
emitPredicate(const Instruction * i)281 CodeEmitterNVC0::emitPredicate(const Instruction *i)
282 {
283    if (i->predSrc >= 0) {
284       assert(i->getPredicate()->reg.file == FILE_PREDICATE);
285       srcId(i->src(i->predSrc), 10);
286       if (i->cc == CC_NOT_P)
287          code[0] |= 0x2000; // negate
288    } else {
289       code[0] |= 0x1c00;
290    }
291 }
292 
293 void
setAddressByFile(const ValueRef & src)294 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
295 {
296    switch (src.getFile()) {
297    case FILE_MEMORY_GLOBAL:
298       srcAddr32(src, 26, 0);
299       break;
300    case FILE_MEMORY_LOCAL:
301    case FILE_MEMORY_SHARED:
302       setAddress24(src);
303       break;
304    default:
305       assert(src.getFile() == FILE_MEMORY_CONST);
306       setAddress16(src);
307       break;
308    }
309 }
310 
311 void
setAddress16(const ValueRef & src)312 CodeEmitterNVC0::setAddress16(const ValueRef& src)
313 {
314    Symbol *sym = src.get()->asSym();
315 
316    assert(sym);
317 
318    code[0] |= (sym->reg.data.offset & 0x003f) << 26;
319    code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
320 }
321 
322 void
setAddress24(const ValueRef & src)323 CodeEmitterNVC0::setAddress24(const ValueRef& src)
324 {
325    Symbol *sym = src.get()->asSym();
326 
327    assert(sym);
328 
329    code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
330    code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
331 }
332 
333 void
setImmediate(const Instruction * i,const int s)334 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
335 {
336    const ImmediateValue *imm = i->src(s).get()->asImm();
337    uint32_t u32;
338 
339    assert(imm);
340    u32 = imm->reg.data.u32;
341 
342    if ((code[0] & 0xf) == 0x1) {
343       // double immediate
344       uint64_t u64 = imm->reg.data.u64;
345       assert(!(u64 & 0x00000fffffffffffULL));
346       assert(!(code[1] & 0xc000));
347       code[0] |= ((u64 >> 44) & 0x3f) << 26;
348       code[1] |= 0xc000 | (u64 >> 50);
349    } else
350    if ((code[0] & 0xf) == 0x2) {
351       // LIMM
352       code[0] |= (u32 & 0x3f) << 26;
353       code[1] |= u32 >> 6;
354    } else
355    if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
356       // integer immediate
357       assert((u32 & 0xfff80000) == 0 || (u32 & 0xfff80000) == 0xfff80000);
358       assert(!(code[1] & 0xc000));
359       u32 &= 0xfffff;
360       code[0] |= (u32 & 0x3f) << 26;
361       code[1] |= 0xc000 | (u32 >> 6);
362    } else {
363       // float immediate
364       assert(!(u32 & 0x00000fff));
365       assert(!(code[1] & 0xc000));
366       code[0] |= ((u32 >> 12) & 0x3f) << 26;
367       code[1] |= 0xc000 | (u32 >> 18);
368    }
369 }
370 
setImmediateS8(const ValueRef & ref)371 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
372 {
373    const ImmediateValue *imm = ref.get()->asImm();
374 
375    int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
376 
377    assert(s8 == imm->reg.data.s32);
378 
379    code[0] |= (s8 & 0x3f) << 26;
380    code[0] |= (s8 >> 6) << 8;
381 }
382 
setPDSTL(const Instruction * i,const int d)383 void CodeEmitterNVC0::setPDSTL(const Instruction *i, const int d)
384 {
385    assert(d < 0 || (i->defExists(d) && i->def(d).getFile() == FILE_PREDICATE));
386 
387    uint32_t pred = d >= 0 ? DDATA(i->def(d)).id : 7;
388 
389    code[0] |= (pred & 3) << 8;
390    code[1] |= (pred & 4) << (26 - 2);
391 }
392 
393 void
emitForm_A(const Instruction * i,uint64_t opc)394 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
395 {
396    code[0] = opc;
397    code[1] = opc >> 32;
398 
399    emitPredicate(i);
400 
401    defId(i->def(0), 14);
402 
403    int s1 = 26;
404    if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
405       s1 = 49;
406 
407    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
408       switch (i->getSrc(s)->reg.file) {
409       case FILE_MEMORY_CONST:
410          assert(!(code[1] & 0xc000));
411          code[1] |= (s == 2) ? 0x8000 : 0x4000;
412          code[1] |= i->getSrc(s)->reg.fileIndex << 10;
413          setAddress16(i->src(s));
414          break;
415       case FILE_IMMEDIATE:
416          assert(s == 1 ||
417                 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
418          assert(!(code[1] & 0xc000));
419          setImmediate(i, s);
420          break;
421       case FILE_GPR:
422          if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
423             break;
424          srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
425          break;
426       default:
427          if (i->op == OP_SELP) {
428             // OP_SELP is used to implement shared+atomics on Fermi.
429             assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
430             srcId(i->src(s), 49);
431          }
432          // ignore here, can be predicate or flags, but must not be address
433          break;
434       }
435    }
436 }
437 
438 void
emitForm_B(const Instruction * i,uint64_t opc)439 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
440 {
441    code[0] = opc;
442    code[1] = opc >> 32;
443 
444    emitPredicate(i);
445 
446    defId(i->def(0), 14);
447 
448    switch (i->src(0).getFile()) {
449    case FILE_MEMORY_CONST:
450       assert(!(code[1] & 0xc000));
451       code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
452       setAddress16(i->src(0));
453       break;
454    case FILE_IMMEDIATE:
455       assert(!(code[1] & 0xc000));
456       setImmediate(i, 0);
457       break;
458    case FILE_GPR:
459       srcId(i->src(0), 26);
460       break;
461    default:
462       // ignore here, can be predicate or flags, but must not be address
463       break;
464    }
465 }
466 
467 void
emitForm_S(const Instruction * i,uint32_t opc,bool pred)468 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
469 {
470    code[0] = opc;
471 
472    int ss2a = 0;
473    if (opc == 0x0d || opc == 0x0e)
474       ss2a = 2;
475 
476    defId(i->def(0), 14);
477    srcId(i->src(0), 20);
478 
479    assert(pred || (i->predSrc < 0));
480    if (pred)
481       emitPredicate(i);
482 
483    for (int s = 1; s < 3 && i->srcExists(s); ++s) {
484       if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
485          assert(!(code[0] & (0x300 >> ss2a)));
486          switch (i->src(s).get()->reg.fileIndex) {
487          case 0:  code[0] |= 0x100 >> ss2a; break;
488          case 1:  code[0] |= 0x200 >> ss2a; break;
489          case 16: code[0] |= 0x300 >> ss2a; break;
490          default:
491             ERROR("invalid c[] space for short form\n");
492             break;
493          }
494          if (s == 1)
495             code[0] |= i->getSrc(s)->reg.data.offset << 24;
496          else
497             code[0] |= i->getSrc(s)->reg.data.offset << 6;
498       } else
499       if (i->src(s).getFile() == FILE_IMMEDIATE) {
500          assert(s == 1);
501          setImmediateS8(i->src(s));
502       } else
503       if (i->src(s).getFile() == FILE_GPR) {
504          srcId(i->src(s), (s == 1) ? 26 : 8);
505       }
506    }
507 }
508 
509 void
emitShortSrc2(const ValueRef & src)510 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
511 {
512    if (src.getFile() == FILE_MEMORY_CONST) {
513       switch (src.get()->reg.fileIndex) {
514       case 0:  code[0] |= 0x100; break;
515       case 1:  code[0] |= 0x200; break;
516       case 16: code[0] |= 0x300; break;
517       default:
518          assert(!"unsupported file index for short op");
519          break;
520       }
521       srcAddr32(src, 20, 2);
522    } else {
523       srcId(src, 20);
524       assert(src.getFile() == FILE_GPR);
525    }
526 }
527 
528 void
emitNOP(const Instruction * i)529 CodeEmitterNVC0::emitNOP(const Instruction *i)
530 {
531    code[0] = 0x000001e4;
532    code[1] = 0x40000000;
533    emitPredicate(i);
534 }
535 
536 void
emitFMAD(const Instruction * i)537 CodeEmitterNVC0::emitFMAD(const Instruction *i)
538 {
539    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
540 
541    if (i->encSize == 8) {
542       if (isLIMM(i->src(1), TYPE_F32)) {
543          emitForm_A(i, HEX64(20000000, 00000002));
544       } else {
545          emitForm_A(i, HEX64(30000000, 00000000));
546 
547          if (i->src(2).mod.neg())
548             code[0] |= 1 << 8;
549       }
550       roundMode_A(i);
551 
552       if (neg1)
553          code[0] |= 1 << 9;
554 
555       if (i->saturate)
556          code[0] |= 1 << 5;
557 
558       if (i->dnz)
559          code[0] |= 1 << 7;
560       else
561       if (i->ftz)
562          code[0] |= 1 << 6;
563    } else {
564       assert(!i->saturate && !i->src(2).mod.neg());
565       emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
566                  false);
567       if (neg1)
568          code[0] |= 1 << 4;
569    }
570 }
571 
572 void
emitDMAD(const Instruction * i)573 CodeEmitterNVC0::emitDMAD(const Instruction *i)
574 {
575    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
576 
577    emitForm_A(i, HEX64(20000000, 00000001));
578 
579    if (i->src(2).mod.neg())
580       code[0] |= 1 << 8;
581 
582    roundMode_A(i);
583 
584    if (neg1)
585       code[0] |= 1 << 9;
586 
587    assert(!i->saturate);
588    assert(!i->ftz);
589 }
590 
591 void
emitFMUL(const Instruction * i)592 CodeEmitterNVC0::emitFMUL(const Instruction *i)
593 {
594    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
595 
596    assert(i->postFactor >= -3 && i->postFactor <= 3);
597 
598    if (i->encSize == 8) {
599       if (isLIMM(i->src(1), TYPE_F32)) {
600          assert(i->postFactor == 0); // constant folded, hopefully
601          emitForm_A(i, HEX64(30000000, 00000002));
602       } else {
603          emitForm_A(i, HEX64(58000000, 00000000));
604          roundMode_A(i);
605          code[1] |= ((i->postFactor > 0) ?
606                      (7 - i->postFactor) : (0 - i->postFactor)) << 17;
607       }
608       if (neg)
609          code[1] ^= 1 << 25; // aliases with LIMM sign bit
610 
611       if (i->saturate)
612          code[0] |= 1 << 5;
613 
614       if (i->dnz)
615          code[0] |= 1 << 7;
616       else
617       if (i->ftz)
618          code[0] |= 1 << 6;
619    } else {
620       assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
621       emitForm_S(i, 0xa8, true);
622    }
623 }
624 
625 void
emitDMUL(const Instruction * i)626 CodeEmitterNVC0::emitDMUL(const Instruction *i)
627 {
628    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
629 
630    emitForm_A(i, HEX64(50000000, 00000001));
631    roundMode_A(i);
632 
633    if (neg)
634       code[0] |= 1 << 9;
635 
636    assert(!i->saturate);
637    assert(!i->ftz);
638    assert(!i->dnz);
639    assert(!i->postFactor);
640 }
641 
642 void
emitUMUL(const Instruction * i)643 CodeEmitterNVC0::emitUMUL(const Instruction *i)
644 {
645    if (i->encSize == 8) {
646       if (isLIMM(i->src(1), TYPE_U32)) {
647          emitForm_A(i, HEX64(10000000, 00000002));
648       } else {
649          emitForm_A(i, HEX64(50000000, 00000003));
650       }
651       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
652          code[0] |= 1 << 6;
653       if (i->sType == TYPE_S32)
654          code[0] |= 1 << 5;
655       if (i->dType == TYPE_S32)
656          code[0] |= 1 << 7;
657    } else {
658       emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
659 
660       if (i->sType == TYPE_S32)
661          code[0] |= 1 << 6;
662    }
663 }
664 
665 void
emitFADD(const Instruction * i)666 CodeEmitterNVC0::emitFADD(const Instruction *i)
667 {
668    if (i->encSize == 8) {
669       if (isLIMM(i->src(1), TYPE_F32)) {
670          assert(!i->saturate);
671          emitForm_A(i, HEX64(28000000, 00000002));
672 
673          code[0] |= i->src(0).mod.abs() << 7;
674          code[0] |= i->src(0).mod.neg() << 9;
675 
676          if (i->src(1).mod.abs())
677             code[1] &= 0xfdffffff;
678          if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
679             code[1] ^= 0x02000000;
680       } else {
681          emitForm_A(i, HEX64(50000000, 00000000));
682 
683          roundMode_A(i);
684          if (i->saturate)
685             code[1] |= 1 << 17;
686 
687          emitNegAbs12(i);
688          if (i->op == OP_SUB) code[0] ^= 1 << 8;
689       }
690       if (i->ftz)
691          code[0] |= 1 << 5;
692    } else {
693       assert(!i->saturate && i->op != OP_SUB &&
694              !i->src(0).mod.abs() &&
695              !i->src(1).mod.neg() && !i->src(1).mod.abs());
696 
697       emitForm_S(i, 0x49, true);
698 
699       if (i->src(0).mod.neg())
700          code[0] |= 1 << 7;
701    }
702 }
703 
704 void
emitDADD(const Instruction * i)705 CodeEmitterNVC0::emitDADD(const Instruction *i)
706 {
707    assert(i->encSize == 8);
708    emitForm_A(i, HEX64(48000000, 00000001));
709    roundMode_A(i);
710    assert(!i->saturate);
711    assert(!i->ftz);
712    emitNegAbs12(i);
713    if (i->op == OP_SUB)
714       code[0] ^= 1 << 8;
715 }
716 
717 void
emitUADD(const Instruction * i)718 CodeEmitterNVC0::emitUADD(const Instruction *i)
719 {
720    uint32_t addOp = 0;
721 
722    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
723 
724    if (i->src(0).mod.neg())
725       addOp |= 0x200;
726    if (i->src(1).mod.neg())
727       addOp |= 0x100;
728    if (i->op == OP_SUB)
729       addOp ^= 0x100;
730 
731    assert(addOp != 0x300); // would be add-plus-one
732 
733    if (i->encSize == 8) {
734       if (isLIMM(i->src(1), TYPE_U32)) {
735          emitForm_A(i, HEX64(08000000, 00000002));
736          if (i->flagsDef >= 0)
737             code[1] |= 1 << 26; // write carry
738       } else {
739          emitForm_A(i, HEX64(48000000, 00000003));
740          if (i->flagsDef >= 0)
741             code[1] |= 1 << 16; // write carry
742       }
743       code[0] |= addOp;
744 
745       if (i->saturate)
746          code[0] |= 1 << 5;
747       if (i->flagsSrc >= 0) // add carry
748          code[0] |= 1 << 6;
749    } else {
750       assert(!(addOp & 0x100));
751       emitForm_S(i, (addOp >> 3) |
752                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
753    }
754 }
755 
756 void
emitIMAD(const Instruction * i)757 CodeEmitterNVC0::emitIMAD(const Instruction *i)
758 {
759    uint8_t addOp =
760       i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
761 
762    assert(i->encSize == 8);
763    emitForm_A(i, HEX64(20000000, 00000003));
764 
765    assert(addOp != 3);
766    code[0] |= addOp << 8;
767 
768    if (isSignedType(i->dType))
769       code[0] |= 1 << 7;
770    if (isSignedType(i->sType))
771       code[0] |= 1 << 5;
772 
773    code[1] |= i->saturate << 24;
774 
775    if (i->flagsDef >= 0) code[1] |= 1 << 16;
776    if (i->flagsSrc >= 0) code[1] |= 1 << 23;
777 
778    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
779       code[0] |= 1 << 6;
780 }
781 
782 void
emitSHLADD(const Instruction * i)783 CodeEmitterNVC0::emitSHLADD(const Instruction *i)
784 {
785    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
786    const ImmediateValue *imm = i->src(1).get()->asImm();
787    assert(imm);
788 
789    code[0] = 0x00000003;
790    code[1] = 0x40000000 | addOp << 23;
791 
792    emitPredicate(i);
793 
794    defId(i->def(0), 14);
795    srcId(i->src(0), 20);
796 
797    if (i->flagsDef >= 0)
798       code[1] |= 1 << 16;
799 
800    assert(!(imm->reg.data.u32 & 0xffffffe0));
801    code[0] |= imm->reg.data.u32 << 5;
802 
803    switch (i->src(2).getFile()) {
804    case FILE_GPR:
805       srcId(i->src(2), 26);
806       break;
807    case FILE_MEMORY_CONST:
808       code[1] |= 0x4000;
809       code[1] |= i->getSrc(2)->reg.fileIndex << 10;
810       setAddress16(i->src(2));
811       break;
812    case FILE_IMMEDIATE:
813       setImmediate(i, 2);
814       break;
815    default:
816       assert(!"bad src2 file");
817       break;
818    }
819 }
820 
821 void
emitMADSP(const Instruction * i)822 CodeEmitterNVC0::emitMADSP(const Instruction *i)
823 {
824    assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
825 
826    emitForm_A(i, HEX64(00000000, 00000003));
827 
828    if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
829       code[1] |= 0x01800000;
830    } else {
831       code[0] |= (i->subOp & 0x00f) << 7;
832       code[0] |= (i->subOp & 0x0f0) << 1;
833       code[0] |= (i->subOp & 0x100) >> 3;
834       code[0] |= (i->subOp & 0x200) >> 2;
835       code[1] |= (i->subOp & 0xc00) << 13;
836    }
837 
838    if (i->flagsDef >= 0)
839       code[1] |= 1 << 16;
840 }
841 
842 void
emitISAD(const Instruction * i)843 CodeEmitterNVC0::emitISAD(const Instruction *i)
844 {
845    assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
846    assert(i->encSize == 8);
847 
848    emitForm_A(i, HEX64(38000000, 00000003));
849 
850    if (i->dType == TYPE_S32)
851       code[0] |= 1 << 5;
852 }
853 
854 void
emitNOT(Instruction * i)855 CodeEmitterNVC0::emitNOT(Instruction *i)
856 {
857    assert(i->encSize == 8);
858    if (i->getPredicate())
859       i->moveSources(1, 1);
860    i->setSrc(1, i->src(0));
861    emitForm_A(i, HEX64(68000000, 000001c3));
862 }
863 
864 void
emitLogicOp(const Instruction * i,uint8_t subOp)865 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
866 {
867    if (i->def(0).getFile() == FILE_PREDICATE) {
868       code[0] = 0x00000004 | (subOp << 30);
869       code[1] = 0x0c000000;
870 
871       emitPredicate(i);
872 
873       defId(i->def(0), 17);
874       srcId(i->src(0), 20);
875       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
876       srcId(i->src(1), 26);
877       if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
878 
879       if (i->defExists(1)) {
880          defId(i->def(1), 14);
881       } else {
882          code[0] |= 7 << 14;
883       }
884       // (a OP b) OP c
885       if (i->predSrc != 2 && i->srcExists(2)) {
886          code[1] |= subOp << 21;
887          srcId(i->src(2), 49);
888          if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
889       } else {
890          code[1] |= 0x000e0000;
891       }
892    } else
893    if (i->encSize == 8) {
894       if (isLIMM(i->src(1), TYPE_U32)) {
895          emitForm_A(i, HEX64(38000000, 00000002));
896 
897          if (i->flagsDef >= 0)
898             code[1] |= 1 << 26;
899       } else {
900          emitForm_A(i, HEX64(68000000, 00000003));
901 
902          if (i->flagsDef >= 0)
903             code[1] |= 1 << 16;
904       }
905       code[0] |= subOp << 6;
906 
907       if (i->flagsSrc >= 0) // carry
908          code[0] |= 1 << 5;
909 
910       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
911       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
912    } else {
913       emitForm_S(i, (subOp << 5) |
914                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
915    }
916 }
917 
918 void
emitPOPC(const Instruction * i)919 CodeEmitterNVC0::emitPOPC(const Instruction *i)
920 {
921    emitForm_A(i, HEX64(54000000, 00000004));
922 
923    if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
924    if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
925 }
926 
927 void
emitINSBF(const Instruction * i)928 CodeEmitterNVC0::emitINSBF(const Instruction *i)
929 {
930    emitForm_A(i, HEX64(28000000, 00000003));
931 }
932 
933 void
emitEXTBF(const Instruction * i)934 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
935 {
936    emitForm_A(i, HEX64(70000000, 00000003));
937 
938    if (i->dType == TYPE_S32)
939       code[0] |= 1 << 5;
940    if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
941       code[0] |= 1 << 8;
942 }
943 
944 void
emitBFIND(const Instruction * i)945 CodeEmitterNVC0::emitBFIND(const Instruction *i)
946 {
947    emitForm_B(i, HEX64(78000000, 00000003));
948 
949    if (i->dType == TYPE_S32)
950       code[0] |= 1 << 5;
951    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
952       code[0] |= 1 << 8;
953    if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
954       code[0] |= 1 << 6;
955 }
956 
957 void
emitPERMT(const Instruction * i)958 CodeEmitterNVC0::emitPERMT(const Instruction *i)
959 {
960    emitForm_A(i, HEX64(24000000, 00000004));
961 
962    code[0] |= i->subOp << 5;
963 }
964 
965 void
emitShift(const Instruction * i)966 CodeEmitterNVC0::emitShift(const Instruction *i)
967 {
968    if (i->op == OP_SHR) {
969       emitForm_A(i, HEX64(58000000, 00000003)
970                  | (isSignedType(i->dType) ? 0x20 : 0x00));
971    } else {
972       emitForm_A(i, HEX64(60000000, 00000003));
973    }
974 
975    if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
976       code[0] |= 1 << 9;
977 }
978 
979 void
emitPreOp(const Instruction * i)980 CodeEmitterNVC0::emitPreOp(const Instruction *i)
981 {
982    if (i->encSize == 8) {
983       emitForm_B(i, HEX64(60000000, 00000000));
984 
985       if (i->op == OP_PREEX2)
986          code[0] |= 0x20;
987 
988       if (i->src(0).mod.abs()) code[0] |= 1 << 6;
989       if (i->src(0).mod.neg()) code[0] |= 1 << 8;
990    } else {
991       emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
992    }
993 }
994 
995 void
emitSFnOp(const Instruction * i,uint8_t subOp)996 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
997 {
998    if (i->encSize == 8) {
999       code[0] = 0x00000000 | (subOp << 26);
1000       code[1] = 0xc8000000;
1001 
1002       emitPredicate(i);
1003 
1004       defId(i->def(0), 14);
1005       srcId(i->src(0), 20);
1006 
1007       assert(i->src(0).getFile() == FILE_GPR);
1008 
1009       if (i->saturate) code[0] |= 1 << 5;
1010 
1011       if (i->src(0).mod.abs()) code[0] |= 1 << 7;
1012       if (i->src(0).mod.neg()) code[0] |= 1 << 9;
1013    } else {
1014       emitForm_S(i, 0x80000008 | (subOp << 26), true);
1015 
1016       assert(!i->src(0).mod.neg());
1017       if (i->src(0).mod.abs()) code[0] |= 1 << 30;
1018    }
1019 }
1020 
1021 void
emitMINMAX(const Instruction * i)1022 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
1023 {
1024    uint64_t op;
1025 
1026    assert(i->encSize == 8);
1027 
1028    op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
1029 
1030    if (i->ftz)
1031       op |= 1 << 5;
1032    else
1033    if (!isFloatType(i->dType)) {
1034       op |= isSignedType(i->dType) ? 0x23 : 0x03;
1035       op |= i->subOp << 6;
1036    }
1037    if (i->dType == TYPE_F64)
1038       op |= 0x01;
1039 
1040    emitForm_A(i, op);
1041    emitNegAbs12(i);
1042 
1043    if (i->flagsDef >= 0)
1044       code[1] |= 1 << 16;
1045 }
1046 
1047 void
roundMode_C(const Instruction * i)1048 CodeEmitterNVC0::roundMode_C(const Instruction *i)
1049 {
1050    switch (i->rnd) {
1051    case ROUND_M:  code[1] |= 1 << 17; break;
1052    case ROUND_P:  code[1] |= 2 << 17; break;
1053    case ROUND_Z:  code[1] |= 3 << 17; break;
1054    case ROUND_NI: code[0] |= 1 << 7; break;
1055    case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
1056    case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
1057    case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
1058    case ROUND_N: break;
1059    default:
1060       assert(!"invalid round mode");
1061       break;
1062    }
1063 }
1064 
1065 void
roundMode_CS(const Instruction * i)1066 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
1067 {
1068    switch (i->rnd) {
1069    case ROUND_M:
1070    case ROUND_MI: code[0] |= 1 << 16; break;
1071    case ROUND_P:
1072    case ROUND_PI: code[0] |= 2 << 16; break;
1073    case ROUND_Z:
1074    case ROUND_ZI: code[0] |= 3 << 16; break;
1075    default:
1076       break;
1077    }
1078 }
1079 
1080 void
emitCVT(Instruction * i)1081 CodeEmitterNVC0::emitCVT(Instruction *i)
1082 {
1083    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1084    DataType dType;
1085 
1086    switch (i->op) {
1087    case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
1088    case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
1089    case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1090    default:
1091       break;
1092    }
1093 
1094    const bool sat = (i->op == OP_SAT) || i->saturate;
1095    const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
1096    const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
1097 
1098    if (i->op == OP_NEG && i->dType == TYPE_U32)
1099       dType = TYPE_S32;
1100    else
1101       dType = i->dType;
1102 
1103    if (i->encSize == 8) {
1104       emitForm_B(i, HEX64(10000000, 00000004));
1105 
1106       roundMode_C(i);
1107 
1108       // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1109       code[0] |= util_logbase2(typeSizeof(dType)) << 20;
1110       code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
1111 
1112       // for 8/16 source types, the byte/word is in subOp. word 1 is
1113       // represented as 2.
1114       if (!isFloatType(i->sType))
1115          code[1] |= i->subOp << 0x17;
1116       else
1117          code[1] |= i->subOp << 0x18;
1118 
1119       if (sat)
1120          code[0] |= 0x20;
1121       if (abs)
1122          code[0] |= 1 << 6;
1123       if (neg && i->op != OP_ABS)
1124          code[0] |= 1 << 8;
1125 
1126       if (i->ftz)
1127          code[1] |= 1 << 23;
1128 
1129       if (isSignedIntType(dType))
1130          code[0] |= 0x080;
1131       if (isSignedIntType(i->sType))
1132          code[0] |= 0x200;
1133 
1134       if (isFloatType(dType)) {
1135          if (!isFloatType(i->sType))
1136             code[1] |= 0x08000000;
1137       } else {
1138          if (isFloatType(i->sType))
1139             code[1] |= 0x04000000;
1140          else
1141             code[1] |= 0x0c000000;
1142       }
1143    } else {
1144       if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
1145          code[0] = 0x298;
1146       } else
1147       if (isFloatType(dType)) {
1148          if (isFloatType(i->sType))
1149             code[0] = 0x098;
1150          else
1151             code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
1152       } else {
1153          assert(isFloatType(i->sType));
1154 
1155          code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
1156       }
1157 
1158       if (neg) code[0] |= 1 << 16;
1159       if (sat) code[0] |= 1 << 18;
1160       if (abs) code[0] |= 1 << 19;
1161 
1162       roundMode_CS(i);
1163    }
1164 }
1165 
1166 void
emitSET(const CmpInstruction * i)1167 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1168 {
1169    uint32_t hi;
1170    uint32_t lo = 0;
1171 
1172    if (i->sType == TYPE_F64)
1173       lo = 0x1;
1174    else
1175    if (!isFloatType(i->sType))
1176       lo = 0x3;
1177 
1178    if (isSignedIntType(i->sType))
1179       lo |= 0x20;
1180    if (isFloatType(i->dType)) {
1181       if (isFloatType(i->sType))
1182          lo |= 0x20;
1183       else
1184          lo |= 0x80;
1185    }
1186 
1187    switch (i->op) {
1188    case OP_SET_AND: hi = 0x10000000; break;
1189    case OP_SET_OR:  hi = 0x10200000; break;
1190    case OP_SET_XOR: hi = 0x10400000; break;
1191    default:
1192       hi = 0x100e0000;
1193       break;
1194    }
1195    emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1196 
1197    if (i->op != OP_SET)
1198       srcId(i->src(2), 32 + 17);
1199 
1200    if (i->def(0).getFile() == FILE_PREDICATE) {
1201       if (i->sType == TYPE_F32)
1202          code[1] += 0x10000000;
1203       else
1204          code[1] += 0x08000000;
1205 
1206       code[0] &= ~0xfc000;
1207       defId(i->def(0), 17);
1208       if (i->defExists(1))
1209          defId(i->def(1), 14);
1210       else
1211          code[0] |= 0x1c000;
1212    }
1213 
1214    if (i->ftz)
1215       code[1] |= 1 << 27;
1216    if (i->flagsSrc >= 0)
1217       code[0] |= 1 << 6;
1218 
1219    emitCondCode(i->setCond, 32 + 23);
1220    emitNegAbs12(i);
1221 }
1222 
1223 void
emitSLCT(const CmpInstruction * i)1224 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1225 {
1226    uint64_t op;
1227 
1228    switch (i->dType) {
1229    case TYPE_S32:
1230       op = HEX64(30000000, 00000023);
1231       break;
1232    case TYPE_U32:
1233       op = HEX64(30000000, 00000003);
1234       break;
1235    case TYPE_F32:
1236       op = HEX64(38000000, 00000000);
1237       break;
1238    default:
1239       assert(!"invalid type for SLCT");
1240       op = 0;
1241       break;
1242    }
1243    emitForm_A(i, op);
1244 
1245    CondCode cc = i->setCond;
1246 
1247    if (i->src(2).mod.neg())
1248       cc = reverseCondCode(cc);
1249 
1250    emitCondCode(cc, 32 + 23);
1251 
1252    if (i->ftz)
1253       code[0] |= 1 << 5;
1254 }
1255 
1256 void
nvc0_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)1257 nvc0_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1258 {
1259    int loc = entry->loc;
1260    bool val = false;
1261    switch (entry->ipa) {
1262    case 0:
1263       val = data.force_persample_interp;
1264       break;
1265    case 1:
1266       val = data.msaa;
1267       break;
1268    }
1269    if (val)
1270       code[loc + 1] |= 1 << 20;
1271    else
1272       code[loc + 1] &= ~(1 << 20);
1273 }
1274 
emitSELP(const Instruction * i)1275 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1276 {
1277    emitForm_A(i, HEX64(20000000, 00000004));
1278 
1279    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1280       code[1] |= 1 << 20;
1281 
1282    if (i->subOp >= 1) {
1283       addInterp(i->subOp - 1, 0, nvc0_selpFlip);
1284    }
1285 }
1286 
emitTEXBAR(const Instruction * i)1287 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1288 {
1289    code[0] = 0x00000006 | (i->subOp << 26);
1290    code[1] = 0xf0000000;
1291    emitPredicate(i);
1292    emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1293 }
1294 
emitTEXCSAA(const TexInstruction * i)1295 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1296 {
1297    code[0] = 0x00000086;
1298    code[1] = 0xd0000000;
1299 
1300    code[1] |= i->tex.r;
1301    code[1] |= i->tex.s << 8;
1302 
1303    if (i->tex.liveOnly)
1304       code[0] |= 1 << 9;
1305 
1306    defId(i->def(0), 14);
1307    srcId(i->src(0), 20);
1308 }
1309 
1310 static inline bool
isNextIndependentTex(const TexInstruction * i)1311 isNextIndependentTex(const TexInstruction *i)
1312 {
1313    if (!i->next || !isTextureOp(i->next->op))
1314       return false;
1315    if (i->getDef(0)->interfers(i->next->getSrc(0)))
1316       return false;
1317    return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1318 }
1319 
1320 void
emitTEX(const TexInstruction * i)1321 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1322 {
1323    code[0] = 0x00000006;
1324 
1325    if (isNextIndependentTex(i))
1326       code[0] |= 0x080; // t mode
1327    else
1328       code[0] |= 0x100; // p mode
1329 
1330    if (i->tex.liveOnly)
1331       code[0] |= 1 << 9;
1332 
1333    switch (i->op) {
1334    case OP_TEX: code[1] = 0x80000000; break;
1335    case OP_TXB: code[1] = 0x84000000; break;
1336    case OP_TXL: code[1] = 0x86000000; break;
1337    case OP_TXF: code[1] = 0x90000000; break;
1338    case OP_TXG: code[1] = 0xa0000000; break;
1339    case OP_TXLQ: code[1] = 0xb0000000; break;
1340    case OP_TXD: code[1] = 0xe0000000; break;
1341    default:
1342       assert(!"invalid texture op");
1343       break;
1344    }
1345    if (i->op == OP_TXF) {
1346       if (!i->tex.levelZero)
1347          code[1] |= 0x02000000;
1348    } else
1349    if (i->tex.levelZero) {
1350       code[1] |= 0x02000000;
1351    }
1352 
1353    if (i->op != OP_TXD && i->tex.derivAll)
1354       code[1] |= 1 << 13;
1355 
1356    defId(i->def(0), 14);
1357    srcId(i->src(0), 20);
1358 
1359    emitPredicate(i);
1360 
1361    if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1362 
1363    code[1] |= i->tex.mask << 14;
1364 
1365    code[1] |= i->tex.r;
1366    code[1] |= i->tex.s << 8;
1367    if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1368       code[1] |= 1 << 18; // in 1st source (with array index)
1369 
1370    // texture target:
1371    code[1] |= (i->tex.target.getDim() - 1) << 20;
1372    if (i->tex.target.isCube())
1373       code[1] += 2 << 20;
1374    if (i->tex.target.isArray())
1375       code[1] |= 1 << 19;
1376    if (i->tex.target.isShadow())
1377       code[1] |= 1 << 24;
1378 
1379    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1380 
1381    if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1382       // lzero
1383       if (i->op == OP_TXL)
1384          code[1] &= ~(1 << 26);
1385       else
1386       if (i->op == OP_TXF)
1387          code[1] &= ~(1 << 25);
1388    }
1389    if (i->tex.target == TEX_TARGET_2D_MS ||
1390        i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1391       code[1] |= 1 << 23;
1392 
1393    if (i->tex.useOffsets == 1)
1394       code[1] |= 1 << 22;
1395    if (i->tex.useOffsets == 4)
1396       code[1] |= 1 << 23;
1397 
1398    srcId(i, src1, 26);
1399 }
1400 
1401 void
emitTXQ(const TexInstruction * i)1402 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1403 {
1404    code[0] = 0x00000086;
1405    code[1] = 0xc0000000;
1406 
1407    switch (i->tex.query) {
1408    case TXQ_DIMS:            code[1] |= 0 << 22; break;
1409    case TXQ_TYPE:            code[1] |= 1 << 22; break;
1410    case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1411    case TXQ_FILTER:          code[1] |= 3 << 22; break;
1412    case TXQ_LOD:             code[1] |= 4 << 22; break;
1413    case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
1414    default:
1415       assert(!"invalid texture query");
1416       break;
1417    }
1418 
1419    code[1] |= i->tex.mask << 14;
1420 
1421    code[1] |= i->tex.r;
1422    code[1] |= i->tex.s << 8;
1423    if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1424       code[1] |= 1 << 18;
1425 
1426    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1427 
1428    defId(i->def(0), 14);
1429    srcId(i->src(0), 20);
1430    srcId(i, src1, 26);
1431 
1432    emitPredicate(i);
1433 }
1434 
1435 void
emitQUADOP(const Instruction * i,uint8_t qOp,uint8_t laneMask)1436 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1437 {
1438    code[0] = 0x00000200 | (laneMask << 6); // dall
1439    code[1] = 0x48000000 | qOp;
1440 
1441    defId(i->def(0), 14);
1442    srcId(i->src(0), 20);
1443    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
1444 
1445    emitPredicate(i);
1446 }
1447 
1448 void
emitFlow(const Instruction * i)1449 CodeEmitterNVC0::emitFlow(const Instruction *i)
1450 {
1451    const FlowInstruction *f = i->asFlow();
1452 
1453    unsigned mask; // bit 0: predicate, bit 1: target
1454 
1455    code[0] = 0x00000007;
1456 
1457    switch (i->op) {
1458    case OP_BRA:
1459       code[1] = f->absolute ? 0x00000000 : 0x40000000;
1460       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1461          code[0] |= 0x4000;
1462       mask = 3;
1463       break;
1464    case OP_CALL:
1465       code[1] = f->absolute ? 0x10000000 : 0x50000000;
1466       if (f->indirect)
1467          code[0] |= 0x4000; // indirect calls always use c[] source
1468       mask = 2;
1469       break;
1470 
1471    case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
1472    case OP_RET:     code[1] = 0x90000000; mask = 1; break;
1473    case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1474    case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
1475    case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
1476 
1477    case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
1478    case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1479    case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
1480    case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
1481 
1482    case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
1483    case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1484    case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
1485    default:
1486       assert(!"invalid flow operation");
1487       return;
1488    }
1489 
1490    if (mask & 1) {
1491       emitPredicate(i);
1492       if (i->flagsSrc < 0)
1493          code[0] |= 0x1e0;
1494    }
1495 
1496    if (!f)
1497       return;
1498 
1499    if (f->allWarp)
1500       code[0] |= 1 << 15;
1501    if (f->limit)
1502       code[0] |= 1 << 16;
1503 
1504    if (f->indirect) {
1505       if (code[0] & 0x4000) {
1506          assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1507          setAddress16(i->src(0));
1508          code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1509          if (f->op == OP_BRA)
1510             srcId(f->src(0).getIndirect(0), 20);
1511       } else {
1512          srcId(f, 0, 20);
1513       }
1514    }
1515 
1516    if (f->op == OP_CALL) {
1517       if (f->indirect) {
1518          // nothing
1519       } else
1520       if (f->builtin) {
1521          assert(f->absolute);
1522          uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1523          addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1524          addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1525       } else {
1526          assert(!f->absolute);
1527          int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1528          code[0] |= (pcRel & 0x3f) << 26;
1529          code[1] |= (pcRel >> 6) & 0x3ffff;
1530       }
1531    } else
1532    if (mask & 2) {
1533       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1534       if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1535          pcRel += 8;
1536       // currently we don't want absolute branches
1537       assert(!f->absolute);
1538       code[0] |= (pcRel & 0x3f) << 26;
1539       code[1] |= (pcRel >> 6) & 0x3ffff;
1540    }
1541 }
1542 
1543 void
emitBAR(const Instruction * i)1544 CodeEmitterNVC0::emitBAR(const Instruction *i)
1545 {
1546    Value *rDef = NULL, *pDef = NULL;
1547 
1548    switch (i->subOp) {
1549    case NV50_IR_SUBOP_BAR_ARRIVE:   code[0] = 0x84; break;
1550    case NV50_IR_SUBOP_BAR_RED_AND:  code[0] = 0x24; break;
1551    case NV50_IR_SUBOP_BAR_RED_OR:   code[0] = 0x44; break;
1552    case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1553    default:
1554       code[0] = 0x04;
1555       assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1556       break;
1557    }
1558    code[1] = 0x50000000;
1559 
1560    code[0] |= 63 << 14;
1561    code[1] |= 7 << 21;
1562 
1563    emitPredicate(i);
1564 
1565    // barrier id
1566    if (i->src(0).getFile() == FILE_GPR) {
1567       srcId(i->src(0), 20);
1568    } else {
1569       ImmediateValue *imm = i->getSrc(0)->asImm();
1570       assert(imm);
1571       code[0] |= imm->reg.data.u32 << 20;
1572       code[1] |= 0x8000;
1573    }
1574 
1575    // thread count
1576    if (i->src(1).getFile() == FILE_GPR) {
1577       srcId(i->src(1), 26);
1578    } else {
1579       ImmediateValue *imm = i->getSrc(1)->asImm();
1580       assert(imm);
1581       assert(imm->reg.data.u32 <= 0xfff);
1582       code[0] |= imm->reg.data.u32 << 26;
1583       code[1] |= imm->reg.data.u32 >> 6;
1584       code[1] |= 0x4000;
1585    }
1586 
1587    if (i->srcExists(2) && (i->predSrc != 2)) {
1588       srcId(i->src(2), 32 + 17);
1589       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1590          code[1] |= 1 << 20;
1591    } else {
1592       code[1] |= 7 << 17;
1593    }
1594 
1595    if (i->defExists(0)) {
1596       if (i->def(0).getFile() == FILE_GPR)
1597          rDef = i->getDef(0);
1598       else
1599          pDef = i->getDef(0);
1600 
1601       if (i->defExists(1)) {
1602          if (i->def(1).getFile() == FILE_GPR)
1603             rDef = i->getDef(1);
1604          else
1605             pDef = i->getDef(1);
1606       }
1607    }
1608    if (rDef) {
1609       code[0] &= ~(63 << 14);
1610       defId(rDef, 14);
1611    }
1612    if (pDef) {
1613       code[1] &= ~(7 << 21);
1614       defId(pDef, 32 + 21);
1615    }
1616 }
1617 
1618 void
emitAFETCH(const Instruction * i)1619 CodeEmitterNVC0::emitAFETCH(const Instruction *i)
1620 {
1621    code[0] = 0x00000006;
1622    code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
1623 
1624    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1625       code[0] |= 0x200;
1626 
1627    emitPredicate(i);
1628 
1629    defId(i->def(0), 14);
1630    srcId(i->src(0).getIndirect(0), 20);
1631 }
1632 
1633 void
emitPFETCH(const Instruction * i)1634 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1635 {
1636    uint32_t prim = i->src(0).get()->reg.data.u32;
1637 
1638    code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1639    code[1] = 0x00000000 | (prim >> 6);
1640 
1641    emitPredicate(i);
1642 
1643    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1644 
1645    defId(i->def(0), 14);
1646    srcId(i, src1, 20);
1647 }
1648 
1649 void
emitVFETCH(const Instruction * i)1650 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1651 {
1652    code[0] = 0x00000006;
1653    code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1654 
1655    if (i->perPatch)
1656       code[0] |= 0x100;
1657    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1658       code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1659 
1660    emitPredicate(i);
1661 
1662    code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1663 
1664    defId(i->def(0), 14);
1665    srcId(i->src(0).getIndirect(0), 20);
1666    srcId(i->src(0).getIndirect(1), 26); // vertex address
1667 }
1668 
1669 void
emitEXPORT(const Instruction * i)1670 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1671 {
1672    unsigned int size = typeSizeof(i->dType);
1673 
1674    code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1675    code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1676 
1677    assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1678 
1679    if (i->perPatch)
1680       code[0] |= 0x100;
1681 
1682    emitPredicate(i);
1683 
1684    assert(i->src(1).getFile() == FILE_GPR);
1685 
1686    srcId(i->src(0).getIndirect(0), 20);
1687    srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1688    srcId(i->src(1), 26);
1689 }
1690 
1691 void
emitOUT(const Instruction * i)1692 CodeEmitterNVC0::emitOUT(const Instruction *i)
1693 {
1694    code[0] = 0x00000006;
1695    code[1] = 0x1c000000;
1696 
1697    emitPredicate(i);
1698 
1699    defId(i->def(0), 14); // new secret address
1700    srcId(i->src(0), 20); // old secret address, should be 0 initially
1701 
1702    assert(i->src(0).getFile() == FILE_GPR);
1703 
1704    if (i->op == OP_EMIT)
1705       code[0] |= 1 << 5;
1706    if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1707       code[0] |= 1 << 6;
1708 
1709    // vertex stream
1710    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1711       unsigned int stream = SDATA(i->src(1)).u32;
1712       assert(stream < 4);
1713       if (stream) {
1714          code[1] |= 0xc000;
1715          code[0] |= stream << 26;
1716       } else {
1717          srcId(NULL, 26);
1718       }
1719    } else {
1720       srcId(i->src(1), 26);
1721    }
1722 }
1723 
1724 void
emitInterpMode(const Instruction * i)1725 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1726 {
1727    if (i->encSize == 8) {
1728       code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1729    } else {
1730       if (i->getInterpMode() == NV50_IR_INTERP_SC)
1731          code[0] |= 0x80;
1732       assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1733    }
1734 }
1735 
1736 void
nvc0_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)1737 nvc0_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1738 {
1739    int ipa = entry->ipa;
1740    int reg = entry->reg;
1741    int loc = entry->loc;
1742 
1743    if (data.flatshade &&
1744        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1745       ipa = NV50_IR_INTERP_FLAT;
1746       reg = 0x3f;
1747    } else if (data.force_persample_interp &&
1748               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1749               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1750       ipa |= NV50_IR_INTERP_CENTROID;
1751    }
1752    code[loc + 0] &= ~(0xf << 6);
1753    code[loc + 0] |= ipa << 6;
1754    code[loc + 0] &= ~(0x3f << 26);
1755    code[loc + 0] |= reg << 26;
1756 }
1757 
1758 void
emitINTERP(const Instruction * i)1759 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1760 {
1761    const uint32_t base = i->getSrc(0)->reg.data.offset;
1762 
1763    if (i->encSize == 8) {
1764       code[0] = 0x00000000;
1765       code[1] = 0xc0000000 | (base & 0xffff);
1766 
1767       if (i->saturate)
1768          code[0] |= 1 << 5;
1769 
1770       if (i->op == OP_PINTERP) {
1771          srcId(i->src(1), 26);
1772          addInterp(i->ipa, SDATA(i->src(1)).id, nvc0_interpApply);
1773       } else {
1774          code[0] |= 0x3f << 26;
1775          addInterp(i->ipa, 0x3f, nvc0_interpApply);
1776       }
1777 
1778       srcId(i->src(0).getIndirect(0), 20);
1779    } else {
1780       assert(i->op == OP_PINTERP);
1781       code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1782       srcId(i->src(1), 20);
1783    }
1784    emitInterpMode(i);
1785 
1786    emitPredicate(i);
1787    defId(i->def(0), 14);
1788 
1789    if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1790       srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
1791    else
1792       code[1] |= 0x3f << 17;
1793 }
1794 
1795 void
emitLoadStoreType(DataType ty)1796 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1797 {
1798    uint8_t val;
1799 
1800    switch (ty) {
1801    case TYPE_U8:
1802       val = 0x00;
1803       break;
1804    case TYPE_S8:
1805       val = 0x20;
1806       break;
1807    case TYPE_F16:
1808    case TYPE_U16:
1809       val = 0x40;
1810       break;
1811    case TYPE_S16:
1812       val = 0x60;
1813       break;
1814    case TYPE_F32:
1815    case TYPE_U32:
1816    case TYPE_S32:
1817       val = 0x80;
1818       break;
1819    case TYPE_F64:
1820    case TYPE_U64:
1821    case TYPE_S64:
1822       val = 0xa0;
1823       break;
1824    case TYPE_B128:
1825       val = 0xc0;
1826       break;
1827    default:
1828       val = 0x80;
1829       assert(!"invalid type");
1830       break;
1831    }
1832    code[0] |= val;
1833 }
1834 
1835 void
emitCachingMode(CacheMode c)1836 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1837 {
1838    uint32_t val;
1839 
1840    switch (c) {
1841    case CACHE_CA:
1842 // case CACHE_WB:
1843       val = 0x000;
1844       break;
1845    case CACHE_CG:
1846       val = 0x100;
1847       break;
1848    case CACHE_CS:
1849       val = 0x200;
1850       break;
1851    case CACHE_CV:
1852 // case CACHE_WT:
1853       val = 0x300;
1854       break;
1855    default:
1856       val = 0;
1857       assert(!"invalid caching mode");
1858       break;
1859    }
1860    code[0] |= val;
1861 }
1862 
1863 static inline bool
uses64bitAddress(const Instruction * ldst)1864 uses64bitAddress(const Instruction *ldst)
1865 {
1866    return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1867       ldst->src(0).isIndirect(0) &&
1868       ldst->getIndirect(0, 0)->reg.size == 8;
1869 }
1870 
1871 void
emitSTORE(const Instruction * i)1872 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1873 {
1874    uint32_t opc;
1875 
1876    switch (i->src(0).getFile()) {
1877    case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1878    case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
1879    case FILE_MEMORY_SHARED:
1880       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1881          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1882             opc = 0xb8000000;
1883          else
1884             opc = 0xcc000000;
1885       } else {
1886          opc = 0xc9000000;
1887       }
1888       break;
1889    default:
1890       assert(!"invalid memory file");
1891       opc = 0;
1892       break;
1893    }
1894    code[0] = 0x00000005;
1895    code[1] = opc;
1896 
1897    if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
1898       // Unlocked store on shared memory can fail.
1899       if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
1900           i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1901          assert(i->defExists(0));
1902          setPDSTL(i, 0);
1903       }
1904    }
1905 
1906    setAddressByFile(i->src(0));
1907    srcId(i->src(1), 14);
1908    srcId(i->src(0).getIndirect(0), 20);
1909    if (uses64bitAddress(i))
1910       code[1] |= 1 << 26;
1911 
1912    emitPredicate(i);
1913 
1914    emitLoadStoreType(i->dType);
1915    emitCachingMode(i->cache);
1916 }
1917 
1918 void
emitLOAD(const Instruction * i)1919 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1920 {
1921    uint32_t opc;
1922 
1923    code[0] = 0x00000005;
1924 
1925    switch (i->src(0).getFile()) {
1926    case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1927    case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
1928    case FILE_MEMORY_SHARED:
1929       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1930          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1931             opc = 0xa8000000;
1932          else
1933             opc = 0xc4000000;
1934       } else {
1935          opc = 0xc1000000;
1936       }
1937       break;
1938    case FILE_MEMORY_CONST:
1939       if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1940          emitMOV(i); // not sure if this is any better
1941          return;
1942       }
1943       opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1944       code[0] = 0x00000006 | (i->subOp << 8);
1945       break;
1946    default:
1947       assert(!"invalid memory file");
1948       opc = 0;
1949       break;
1950    }
1951    code[1] = opc;
1952 
1953    int r = 0, p = -1;
1954    if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
1955       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1956          if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1957             r = -1;
1958             p = 0;
1959          } else if (i->defExists(1)) { // r, p
1960             p = 1;
1961          } else {
1962             assert(!"Expected predicate dest for load locked");
1963          }
1964       }
1965    }
1966 
1967    if (r >= 0)
1968       defId(i->def(r), 14);
1969    else
1970       code[0] |= 63 << 14;
1971 
1972    if (p >= 0) {
1973       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1974          setPDSTL(i, p);
1975       else
1976          defId(i->def(p), 32 + 18);
1977    }
1978 
1979    setAddressByFile(i->src(0));
1980    srcId(i->src(0).getIndirect(0), 20);
1981    if (uses64bitAddress(i))
1982       code[1] |= 1 << 26;
1983 
1984    emitPredicate(i);
1985 
1986    emitLoadStoreType(i->dType);
1987    emitCachingMode(i->cache);
1988 }
1989 
1990 uint8_t
getSRegEncoding(const ValueRef & ref)1991 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1992 {
1993    switch (SDATA(ref).sv.sv) {
1994    case SV_LANEID:        return 0x00;
1995    case SV_PHYSID:        return 0x03;
1996    case SV_VERTEX_COUNT:  return 0x10;
1997    case SV_INVOCATION_ID: return 0x11;
1998    case SV_YDIR:          return 0x12;
1999    case SV_THREAD_KILL:   return 0x13;
2000    case SV_COMBINED_TID:  return 0x20;
2001    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
2002    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
2003    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
2004    case SV_GRIDID:        return 0x2c;
2005    case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
2006    case SV_LBASE:         return 0x34;
2007    case SV_SBASE:         return 0x30;
2008    case SV_LANEMASK_EQ:   return 0x38;
2009    case SV_LANEMASK_LT:   return 0x39;
2010    case SV_LANEMASK_LE:   return 0x3a;
2011    case SV_LANEMASK_GT:   return 0x3b;
2012    case SV_LANEMASK_GE:   return 0x3c;
2013    case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
2014    default:
2015       assert(!"no sreg for system value");
2016       return 0;
2017    }
2018 }
2019 
2020 void
emitMOV(const Instruction * i)2021 CodeEmitterNVC0::emitMOV(const Instruction *i)
2022 {
2023    assert(!i->saturate);
2024    if (i->def(0).getFile() == FILE_PREDICATE) {
2025       if (i->src(0).getFile() == FILE_GPR) {
2026          code[0] = 0xfc01c003;
2027          code[1] = 0x1a8e0000;
2028          srcId(i->src(0), 20);
2029       } else {
2030          code[0] = 0x0001c004;
2031          code[1] = 0x0c0e0000;
2032          if (i->src(0).getFile() == FILE_IMMEDIATE) {
2033             code[0] |= 7 << 20;
2034             if (!i->getSrc(0)->reg.data.u32)
2035                code[0] |= 1 << 23;
2036          } else {
2037             srcId(i->src(0), 20);
2038          }
2039       }
2040       defId(i->def(0), 17);
2041       emitPredicate(i);
2042    } else
2043    if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2044       uint8_t sr = getSRegEncoding(i->src(0));
2045 
2046       if (i->encSize == 8) {
2047          code[0] = 0x00000004 | (sr << 26);
2048          code[1] = 0x2c000000;
2049       } else {
2050          code[0] = 0x40000008 | (sr << 20);
2051       }
2052       defId(i->def(0), 14);
2053 
2054       emitPredicate(i);
2055    } else
2056    if (i->encSize == 8) {
2057       uint64_t opc;
2058 
2059       if (i->src(0).getFile() == FILE_IMMEDIATE)
2060          opc = HEX64(18000000, 000001e2);
2061       else
2062       if (i->src(0).getFile() == FILE_PREDICATE)
2063          opc = HEX64(080e0000, 1c000004);
2064       else
2065          opc = HEX64(28000000, 00000004);
2066 
2067       if (i->src(0).getFile() != FILE_PREDICATE)
2068          opc |= i->lanes << 5;
2069 
2070       emitForm_B(i, opc);
2071 
2072       // Explicitly emit the predicate source as emitForm_B skips it.
2073       if (i->src(0).getFile() == FILE_PREDICATE)
2074          srcId(i->src(0), 20);
2075    } else {
2076       uint32_t imm;
2077 
2078       if (i->src(0).getFile() == FILE_IMMEDIATE) {
2079          imm = SDATA(i->src(0)).u32;
2080          if (imm & 0xfff00000) {
2081             assert(!(imm & 0x000fffff));
2082             code[0] = 0x00000318 | imm;
2083          } else {
2084             assert(imm < 0x800 && ((int32_t)imm >= -0x800));
2085             code[0] = 0x00000118 | (imm << 20);
2086          }
2087       } else {
2088          code[0] = 0x0028;
2089          emitShortSrc2(i->src(0));
2090       }
2091       defId(i->def(0), 14);
2092 
2093       emitPredicate(i);
2094    }
2095 }
2096 
2097 void
emitATOM(const Instruction * i)2098 CodeEmitterNVC0::emitATOM(const Instruction *i)
2099 {
2100    const bool hasDst = i->defExists(0);
2101    const bool casOrExch =
2102       i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
2103       i->subOp == NV50_IR_SUBOP_ATOM_CAS;
2104 
2105    if (i->dType == TYPE_U64) {
2106       switch (i->subOp) {
2107       case NV50_IR_SUBOP_ATOM_ADD:
2108          code[0] = 0x205;
2109          if (hasDst)
2110             code[1] = 0x507e0000;
2111          else
2112             code[1] = 0x10000000;
2113          break;
2114       case NV50_IR_SUBOP_ATOM_EXCH:
2115          code[0] = 0x305;
2116          code[1] = 0x507e0000;
2117          break;
2118       case NV50_IR_SUBOP_ATOM_CAS:
2119          code[0] = 0x325;
2120          code[1] = 0x50000000;
2121          break;
2122       default:
2123          assert(!"invalid u64 red op");
2124          break;
2125       }
2126    } else
2127    if (i->dType == TYPE_U32) {
2128       switch (i->subOp) {
2129       case NV50_IR_SUBOP_ATOM_EXCH:
2130          code[0] = 0x105;
2131          code[1] = 0x507e0000;
2132          break;
2133       case NV50_IR_SUBOP_ATOM_CAS:
2134          code[0] = 0x125;
2135          code[1] = 0x50000000;
2136          break;
2137       default:
2138          code[0] = 0x5 | (i->subOp << 5);
2139          if (hasDst)
2140             code[1] = 0x507e0000;
2141          else
2142             code[1] = 0x10000000;
2143          break;
2144       }
2145    } else
2146    if (i->dType == TYPE_S32) {
2147       assert(i->subOp <= 2);
2148       code[0] = 0x205 | (i->subOp << 5);
2149       if (hasDst)
2150          code[1] = 0x587e0000;
2151       else
2152          code[1] = 0x18000000;
2153    } else
2154    if (i->dType == TYPE_F32) {
2155       assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
2156       code[0] = 0x205;
2157       if (hasDst)
2158          code[1] = 0x687e0000;
2159       else
2160          code[1] = 0x28000000;
2161    }
2162 
2163    emitPredicate(i);
2164 
2165    srcId(i->src(1), 14);
2166 
2167    if (hasDst)
2168       defId(i->def(0), 32 + 11);
2169    else
2170    if (casOrExch)
2171       code[1] |= 63 << 11;
2172 
2173    if (hasDst || casOrExch) {
2174       const int32_t offset = SDATA(i->src(0)).offset;
2175       assert(offset < 0x80000 && offset >= -0x80000);
2176       code[0] |= offset << 26;
2177       code[1] |= (offset & 0x1ffc0) >> 6;
2178       code[1] |= (offset & 0xe0000) << 6;
2179    } else {
2180       srcAddr32(i->src(0), 26, 0);
2181    }
2182    if (i->getIndirect(0, 0)) {
2183       srcId(i->getIndirect(0, 0), 20);
2184       if (i->getIndirect(0, 0)->reg.size == 8)
2185          code[1] |= 1 << 26;
2186    } else {
2187       code[0] |= 63 << 20;
2188    }
2189 
2190    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2191       assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
2192       code[1] |= (SDATA(i->src(1)).id + 1) << 17;
2193    }
2194 }
2195 
2196 void
emitMEMBAR(const Instruction * i)2197 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
2198 {
2199    switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
2200    case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
2201    case NV50_IR_SUBOP_MEMBAR_GL:  code[0] = 0x25; break;
2202    default:
2203       code[0] = 0x45;
2204       assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
2205       break;
2206    }
2207    code[1] = 0xe0000000;
2208 
2209    emitPredicate(i);
2210 }
2211 
2212 void
emitCCTL(const Instruction * i)2213 CodeEmitterNVC0::emitCCTL(const Instruction *i)
2214 {
2215    code[0] = 0x00000005 | (i->subOp << 5);
2216 
2217    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2218       code[1] = 0x98000000;
2219       srcAddr32(i->src(0), 28, 2);
2220    } else {
2221       code[1] = 0xd0000000;
2222       setAddress24(i->src(0));
2223    }
2224    if (uses64bitAddress(i))
2225       code[1] |= 1 << 26;
2226    srcId(i->src(0).getIndirect(0), 20);
2227 
2228    emitPredicate(i);
2229 
2230    defId(i, 0, 14);
2231 }
2232 
2233 void
emitSUCLAMPMode(uint16_t subOp)2234 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
2235 {
2236    uint8_t m;
2237    switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
2238    case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
2239    case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
2240    case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
2241    case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
2242    case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
2243    case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
2244    case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
2245    case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
2246    case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
2247    case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
2248    case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
2249    case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
2250    case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
2251    case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
2252    case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
2253    default:
2254       return;
2255    }
2256    code[0] |= m << 5;
2257    if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
2258       code[1] |= 1 << 16;
2259 }
2260 
2261 void
emitSUCalc(Instruction * i)2262 CodeEmitterNVC0::emitSUCalc(Instruction *i)
2263 {
2264    ImmediateValue *imm = NULL;
2265    uint64_t opc;
2266 
2267    if (i->srcExists(2)) {
2268       imm = i->getSrc(2)->asImm();
2269       if (imm)
2270          i->setSrc(2, NULL); // special case, make emitForm_A not assert
2271    }
2272 
2273    switch (i->op) {
2274    case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
2275    case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
2276    case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
2277    default:
2278       assert(0);
2279       return;
2280    }
2281    emitForm_A(i, opc);
2282 
2283    if (i->op == OP_SUCLAMP) {
2284       if (i->dType == TYPE_S32)
2285          code[0] |= 1 << 9;
2286       emitSUCLAMPMode(i->subOp);
2287    }
2288 
2289    if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
2290          code[1] |= 1 << 16;
2291 
2292    if (i->op != OP_SUEAU) {
2293       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2294          code[0] |= 63 << 14;
2295          code[1] |= i->getDef(0)->reg.data.id << 23;
2296       } else
2297       if (i->defExists(1)) { // r, p
2298          assert(i->def(1).getFile() == FILE_PREDICATE);
2299          code[1] |= i->getDef(1)->reg.data.id << 23;
2300       } else { // r, #
2301          code[1] |= 7 << 23;
2302       }
2303    }
2304    if (imm) {
2305       assert(i->op == OP_SUCLAMP);
2306       i->setSrc(2, imm);
2307       code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
2308    }
2309 }
2310 
2311 void
emitSUGType(DataType ty)2312 CodeEmitterNVC0::emitSUGType(DataType ty)
2313 {
2314    switch (ty) {
2315    case TYPE_S32: code[1] |= 1 << 13; break;
2316    case TYPE_U8:  code[1] |= 2 << 13; break;
2317    case TYPE_S8:  code[1] |= 3 << 13; break;
2318    default:
2319       assert(ty == TYPE_U32);
2320       break;
2321    }
2322 }
2323 
2324 void
setSUConst16(const Instruction * i,const int s)2325 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2326 {
2327    const uint32_t offset = i->getSrc(s)->reg.data.offset;
2328 
2329    assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2330    assert(offset == (offset & 0xfffc));
2331 
2332    code[1] |= 1 << 21;
2333    code[0] |= offset << 24;
2334    code[1] |= offset >> 8;
2335    code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2336 }
2337 
2338 void
setSUPred(const Instruction * i,const int s)2339 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2340 {
2341    if (!i->srcExists(s) || (i->predSrc == s)) {
2342       code[1] |= 0x7 << 17;
2343    } else {
2344       if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2345          code[1] |= 1 << 20;
2346       srcId(i->src(s), 32 + 17);
2347    }
2348 }
2349 
2350 void
emitSULDGB(const TexInstruction * i)2351 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2352 {
2353    code[0] = 0x5;
2354    code[1] = 0xd4000000 | (i->subOp << 15);
2355 
2356    emitLoadStoreType(i->dType);
2357    emitSUGType(i->sType);
2358    emitCachingMode(i->cache);
2359 
2360    emitPredicate(i);
2361    defId(i->def(0), 14); // destination
2362    srcId(i->src(0), 20); // address
2363    // format
2364    if (i->src(1).getFile() == FILE_GPR)
2365       srcId(i->src(1), 26);
2366    else
2367       setSUConst16(i, 1);
2368    setSUPred(i, 2);
2369 }
2370 
2371 void
emitSUSTGx(const TexInstruction * i)2372 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2373 {
2374    code[0] = 0x5;
2375    code[1] = 0xdc000000 | (i->subOp << 15);
2376 
2377    if (i->op == OP_SUSTP)
2378       code[1] |= i->tex.mask << 22;
2379    else
2380       emitLoadStoreType(i->dType);
2381    emitSUGType(i->sType);
2382    emitCachingMode(i->cache);
2383 
2384    emitPredicate(i);
2385    srcId(i->src(0), 20); // address
2386    // format
2387    if (i->src(1).getFile() == FILE_GPR)
2388       srcId(i->src(1), 26);
2389    else
2390       setSUConst16(i, 1);
2391    srcId(i->src(3), 14); // values
2392    setSUPred(i, 2);
2393 }
2394 
2395 void
emitSUAddr(const TexInstruction * i)2396 CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)
2397 {
2398    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2399 
2400    if (i->tex.rIndirectSrc < 0) {
2401       code[1] |= 0x00004000;
2402       code[0] |= i->tex.r << 26;
2403    } else {
2404       srcId(i, i->tex.rIndirectSrc, 26);
2405    }
2406 }
2407 
2408 void
emitSUDim(const TexInstruction * i)2409 CodeEmitterNVC0::emitSUDim(const TexInstruction *i)
2410 {
2411    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2412 
2413    code[1] |= (i->tex.target.getDim() - 1) << 12;
2414    if (i->tex.target.isArray() || i->tex.target.isCube() ||
2415        i->tex.target.getDim() == 3) {
2416       // use e2d mode for 3-dim images, arrays and cubes.
2417       code[1] |= 3 << 12;
2418    }
2419 
2420    srcId(i->src(0), 20);
2421 }
2422 
2423 void
emitSULEA(const TexInstruction * i)2424 CodeEmitterNVC0::emitSULEA(const TexInstruction *i)
2425 {
2426    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2427 
2428    code[0] = 0x5;
2429    code[1] = 0xf0000000;
2430 
2431    emitPredicate(i);
2432    emitLoadStoreType(i->sType);
2433 
2434    defId(i->def(0), 14);
2435 
2436    if (i->defExists(1)) {
2437       defId(i->def(1), 32 + 22);
2438    } else {
2439       code[1] |= 7 << 22;
2440    }
2441 
2442    emitSUAddr(i);
2443    emitSUDim(i);
2444 }
2445 
2446 void
emitSULDB(const TexInstruction * i)2447 CodeEmitterNVC0::emitSULDB(const TexInstruction *i)
2448 {
2449    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2450 
2451    code[0] = 0x5;
2452    code[1] = 0xd4000000 | (i->subOp << 15);
2453 
2454    emitPredicate(i);
2455    emitLoadStoreType(i->dType);
2456 
2457    defId(i->def(0), 14);
2458 
2459    emitCachingMode(i->cache);
2460    emitSUAddr(i);
2461    emitSUDim(i);
2462 }
2463 
2464 void
emitSUSTx(const TexInstruction * i)2465 CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)
2466 {
2467    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2468 
2469    code[0] = 0x5;
2470    code[1] = 0xdc000000 | (i->subOp << 15);
2471 
2472    if (i->op == OP_SUSTP)
2473       code[1] |= i->tex.mask << 17;
2474    else
2475       emitLoadStoreType(i->dType);
2476 
2477    emitPredicate(i);
2478 
2479    srcId(i->src(1), 14);
2480 
2481    emitCachingMode(i->cache);
2482    emitSUAddr(i);
2483    emitSUDim(i);
2484 }
2485 
2486 void
emitVectorSubOp(const Instruction * i)2487 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2488 {
2489    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2490    case 0:
2491       code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2492       code[1] |= (i->subOp & 0x00e0) >> 5;  // vsrc2
2493       code[1] |= (i->subOp & 0x0100) << 7;  // vsrc2
2494       code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2495       break;
2496    case 1:
2497       code[1] |= (i->subOp & 0x000f) << 8;  // v2src1
2498       code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2499       code[1] |= (i->subOp & 0x01e0) >> 1;  // v2src2
2500       code[1] |= (i->subOp & 0x0200) << 6;  // v2src2
2501       code[1] |= (i->subOp & 0x3c00) << 2;  // v4dst
2502       code[1] |= (i->mask & 0x3) << 2;
2503       break;
2504    case 2:
2505       code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2506       code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2507       code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2508       code[1] |= (i->mask & 0x3) << 2;
2509       code[1] |= (i->mask & 0xc) << 21;
2510       break;
2511    default:
2512       assert(0);
2513       break;
2514    }
2515 }
2516 
2517 void
emitVSHL(const Instruction * i)2518 CodeEmitterNVC0::emitVSHL(const Instruction *i)
2519 {
2520    uint64_t opc = 0x4;
2521 
2522    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2523    case 0: opc |= 0xe8ULL << 56; break;
2524    case 1: opc |= 0xb4ULL << 56; break;
2525    case 2: opc |= 0x94ULL << 56; break;
2526    default:
2527       assert(0);
2528       break;
2529    }
2530    if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2531       if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2532       if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2533    } else {
2534       if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2535       if (isSignedType(i->sType)) opc |= 1 << 6;
2536    }
2537    emitForm_A(i, opc);
2538    emitVectorSubOp(i);
2539 
2540    if (i->saturate)
2541       code[0] |= 1 << 9;
2542    if (i->flagsDef >= 0)
2543       code[1] |= 1 << 16;
2544 }
2545 
2546 void
emitPIXLD(const Instruction * i)2547 CodeEmitterNVC0::emitPIXLD(const Instruction *i)
2548 {
2549    assert(i->encSize == 8);
2550    emitForm_A(i, HEX64(10000000, 00000006));
2551    code[0] |= i->subOp << 5;
2552    code[1] |= 0x00e00000;
2553 }
2554 
2555 void
emitSHFL(const Instruction * i)2556 CodeEmitterNVC0::emitSHFL(const Instruction *i)
2557 {
2558    const ImmediateValue *imm;
2559 
2560    assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
2561 
2562    code[0] = 0x00000005;
2563    code[1] = 0x88000000 | (i->subOp << 23);
2564 
2565    emitPredicate(i);
2566 
2567    defId(i->def(0), 14);
2568    srcId(i->src(0), 20);
2569 
2570    switch (i->src(1).getFile()) {
2571    case FILE_GPR:
2572       srcId(i->src(1), 26);
2573       break;
2574    case FILE_IMMEDIATE:
2575       imm = i->getSrc(1)->asImm();
2576       assert(imm && imm->reg.data.u32 < 0x20);
2577       code[0] |= imm->reg.data.u32 << 26;
2578       code[0] |= 1 << 5;
2579       break;
2580    default:
2581       assert(!"invalid src1 file");
2582       break;
2583    }
2584 
2585    switch (i->src(2).getFile()) {
2586    case FILE_GPR:
2587       srcId(i->src(2), 49);
2588       break;
2589    case FILE_IMMEDIATE:
2590       imm = i->getSrc(2)->asImm();
2591       assert(imm && imm->reg.data.u32 < 0x2000);
2592       code[1] |= imm->reg.data.u32 << 10;
2593       code[0] |= 1 << 6;
2594       break;
2595    default:
2596       assert(!"invalid src2 file");
2597       break;
2598    }
2599 
2600    setPDSTL(i, i->defExists(1) ? 1 : -1);
2601 }
2602 
2603 void
emitVOTE(const Instruction * i)2604 CodeEmitterNVC0::emitVOTE(const Instruction *i)
2605 {
2606    const ImmediateValue *imm;
2607    uint32_t u32;
2608 
2609    code[0] = 0x00000004 | (i->subOp << 5);
2610    code[1] = 0x48000000;
2611 
2612    emitPredicate(i);
2613 
2614    unsigned rp = 0;
2615    for (int d = 0; i->defExists(d); d++) {
2616       if (i->def(d).getFile() == FILE_PREDICATE) {
2617          assert(!(rp & 2));
2618          rp |= 2;
2619          defId(i->def(d), 32 + 22);
2620       } else if (i->def(d).getFile() == FILE_GPR) {
2621          assert(!(rp & 1));
2622          rp |= 1;
2623          defId(i->def(d), 14);
2624       } else {
2625          assert(!"Unhandled def");
2626       }
2627    }
2628    if (!(rp & 1))
2629       code[0] |= 63 << 14;
2630    if (!(rp & 2))
2631       code[1] |= 7 << 22;
2632 
2633    switch (i->src(0).getFile()) {
2634    case FILE_PREDICATE:
2635       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
2636          code[0] |= 1 << 23;
2637       srcId(i->src(0), 20);
2638       break;
2639    case FILE_IMMEDIATE:
2640       imm = i->getSrc(0)->asImm();
2641       assert(imm);
2642       u32 = imm->reg.data.u32;
2643       assert(u32 == 0 || u32 == 1);
2644       code[0] |= (u32 == 1 ? 0x7 : 0xf) << 20;
2645       break;
2646    default:
2647       assert(!"Unhandled src");
2648       break;
2649    }
2650 }
2651 
2652 bool
emitInstruction(Instruction * insn)2653 CodeEmitterNVC0::emitInstruction(Instruction *insn)
2654 {
2655    unsigned int size = insn->encSize;
2656 
2657    if (writeIssueDelays && !(codeSize & 0x3f))
2658       size += 8;
2659 
2660    if (!insn->encSize) {
2661       ERROR("skipping unencodable instruction: "); insn->print();
2662       return false;
2663    } else
2664    if (codeSize + size > codeSizeLimit) {
2665       ERROR("code emitter output buffer too small\n");
2666       return false;
2667    }
2668 
2669    if (writeIssueDelays) {
2670       if (!(codeSize & 0x3f)) {
2671          code[0] = 0x00000007; // cf issue delay "instruction"
2672          code[1] = 0x20000000;
2673          code += 2;
2674          codeSize += 8;
2675       }
2676       const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2677       uint32_t *data = code - (id * 2 + 2);
2678       if (id <= 2) {
2679          data[0] |= insn->sched << (id * 8 + 4);
2680       } else
2681       if (id == 3) {
2682          data[0] |= insn->sched << 28;
2683          data[1] |= insn->sched >> 4;
2684       } else {
2685          data[1] |= insn->sched << ((id - 4) * 8 + 4);
2686       }
2687    }
2688 
2689    // assert that instructions with multiple defs don't corrupt registers
2690    for (int d = 0; insn->defExists(d); ++d)
2691       assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2692 
2693    switch (insn->op) {
2694    case OP_MOV:
2695    case OP_RDSV:
2696       emitMOV(insn);
2697       break;
2698    case OP_NOP:
2699       break;
2700    case OP_LOAD:
2701       emitLOAD(insn);
2702       break;
2703    case OP_STORE:
2704       emitSTORE(insn);
2705       break;
2706    case OP_LINTERP:
2707    case OP_PINTERP:
2708       emitINTERP(insn);
2709       break;
2710    case OP_VFETCH:
2711       emitVFETCH(insn);
2712       break;
2713    case OP_EXPORT:
2714       emitEXPORT(insn);
2715       break;
2716    case OP_PFETCH:
2717       emitPFETCH(insn);
2718       break;
2719    case OP_AFETCH:
2720       emitAFETCH(insn);
2721       break;
2722    case OP_EMIT:
2723    case OP_RESTART:
2724       emitOUT(insn);
2725       break;
2726    case OP_ADD:
2727    case OP_SUB:
2728       if (insn->dType == TYPE_F64)
2729          emitDADD(insn);
2730       else if (isFloatType(insn->dType))
2731          emitFADD(insn);
2732       else
2733          emitUADD(insn);
2734       break;
2735    case OP_MUL:
2736       if (insn->dType == TYPE_F64)
2737          emitDMUL(insn);
2738       else if (isFloatType(insn->dType))
2739          emitFMUL(insn);
2740       else
2741          emitUMUL(insn);
2742       break;
2743    case OP_MAD:
2744    case OP_FMA:
2745       if (insn->dType == TYPE_F64)
2746          emitDMAD(insn);
2747       else if (isFloatType(insn->dType))
2748          emitFMAD(insn);
2749       else
2750          emitIMAD(insn);
2751       break;
2752    case OP_SAD:
2753       emitISAD(insn);
2754       break;
2755    case OP_SHLADD:
2756       emitSHLADD(insn);
2757       break;
2758    case OP_NOT:
2759       emitNOT(insn);
2760       break;
2761    case OP_AND:
2762       emitLogicOp(insn, 0);
2763       break;
2764    case OP_OR:
2765       emitLogicOp(insn, 1);
2766       break;
2767    case OP_XOR:
2768       emitLogicOp(insn, 2);
2769       break;
2770    case OP_SHL:
2771    case OP_SHR:
2772       emitShift(insn);
2773       break;
2774    case OP_SET:
2775    case OP_SET_AND:
2776    case OP_SET_OR:
2777    case OP_SET_XOR:
2778       emitSET(insn->asCmp());
2779       break;
2780    case OP_SELP:
2781       emitSELP(insn);
2782       break;
2783    case OP_SLCT:
2784       emitSLCT(insn->asCmp());
2785       break;
2786    case OP_MIN:
2787    case OP_MAX:
2788       emitMINMAX(insn);
2789       break;
2790    case OP_ABS:
2791    case OP_NEG:
2792    case OP_CEIL:
2793    case OP_FLOOR:
2794    case OP_TRUNC:
2795    case OP_SAT:
2796       emitCVT(insn);
2797       break;
2798    case OP_CVT:
2799       if (insn->def(0).getFile() == FILE_PREDICATE ||
2800           insn->src(0).getFile() == FILE_PREDICATE)
2801          emitMOV(insn);
2802       else
2803          emitCVT(insn);
2804       break;
2805    case OP_RSQ:
2806       emitSFnOp(insn, 5 + 2 * insn->subOp);
2807       break;
2808    case OP_RCP:
2809       emitSFnOp(insn, 4 + 2 * insn->subOp);
2810       break;
2811    case OP_LG2:
2812       emitSFnOp(insn, 3);
2813       break;
2814    case OP_EX2:
2815       emitSFnOp(insn, 2);
2816       break;
2817    case OP_SIN:
2818       emitSFnOp(insn, 1);
2819       break;
2820    case OP_COS:
2821       emitSFnOp(insn, 0);
2822       break;
2823    case OP_PRESIN:
2824    case OP_PREEX2:
2825       emitPreOp(insn);
2826       break;
2827    case OP_TEX:
2828    case OP_TXB:
2829    case OP_TXL:
2830    case OP_TXD:
2831    case OP_TXF:
2832    case OP_TXG:
2833    case OP_TXLQ:
2834       emitTEX(insn->asTex());
2835       break;
2836    case OP_TXQ:
2837       emitTXQ(insn->asTex());
2838       break;
2839    case OP_TEXBAR:
2840       emitTEXBAR(insn);
2841       break;
2842    case OP_SUBFM:
2843    case OP_SUCLAMP:
2844    case OP_SUEAU:
2845       emitSUCalc(insn);
2846       break;
2847    case OP_MADSP:
2848       emitMADSP(insn);
2849       break;
2850    case OP_SULDB:
2851       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2852          emitSULDGB(insn->asTex());
2853       else
2854          emitSULDB(insn->asTex());
2855       break;
2856    case OP_SUSTB:
2857    case OP_SUSTP:
2858       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2859          emitSUSTGx(insn->asTex());
2860       else
2861          emitSUSTx(insn->asTex());
2862       break;
2863    case OP_SULEA:
2864       emitSULEA(insn->asTex());
2865       break;
2866    case OP_ATOM:
2867       emitATOM(insn);
2868       break;
2869    case OP_BRA:
2870    case OP_CALL:
2871    case OP_PRERET:
2872    case OP_RET:
2873    case OP_DISCARD:
2874    case OP_EXIT:
2875    case OP_PRECONT:
2876    case OP_CONT:
2877    case OP_PREBREAK:
2878    case OP_BREAK:
2879    case OP_JOINAT:
2880    case OP_BRKPT:
2881    case OP_QUADON:
2882    case OP_QUADPOP:
2883       emitFlow(insn);
2884       break;
2885    case OP_QUADOP:
2886       emitQUADOP(insn, insn->subOp, insn->lanes);
2887       break;
2888    case OP_DFDX:
2889       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2890       break;
2891    case OP_DFDY:
2892       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2893       break;
2894    case OP_POPCNT:
2895       emitPOPC(insn);
2896       break;
2897    case OP_INSBF:
2898       emitINSBF(insn);
2899       break;
2900    case OP_EXTBF:
2901       emitEXTBF(insn);
2902       break;
2903    case OP_BFIND:
2904       emitBFIND(insn);
2905       break;
2906    case OP_PERMT:
2907       emitPERMT(insn);
2908       break;
2909    case OP_JOIN:
2910       emitNOP(insn);
2911       insn->join = 1;
2912       break;
2913    case OP_BAR:
2914       emitBAR(insn);
2915       break;
2916    case OP_MEMBAR:
2917       emitMEMBAR(insn);
2918       break;
2919    case OP_CCTL:
2920       emitCCTL(insn);
2921       break;
2922    case OP_VSHL:
2923       emitVSHL(insn);
2924       break;
2925    case OP_PIXLD:
2926       emitPIXLD(insn);
2927       break;
2928    case OP_SHFL:
2929       emitSHFL(insn);
2930       break;
2931    case OP_VOTE:
2932       emitVOTE(insn);
2933       break;
2934    case OP_PHI:
2935    case OP_UNION:
2936    case OP_CONSTRAINT:
2937       ERROR("operation should have been eliminated");
2938       return false;
2939    case OP_EXP:
2940    case OP_LOG:
2941    case OP_SQRT:
2942    case OP_POW:
2943       ERROR("operation should have been lowered\n");
2944       return false;
2945    default:
2946       ERROR("unknown op: %u\n", insn->op);
2947       return false;
2948    }
2949 
2950    if (insn->join) {
2951       code[0] |= 0x10;
2952       assert(insn->encSize == 8);
2953    }
2954 
2955    code += insn->encSize / 4;
2956    codeSize += insn->encSize;
2957    return true;
2958 }
2959 
2960 uint32_t
getMinEncodingSize(const Instruction * i) const2961 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2962 {
2963    const Target::OpInfo &info = targ->getOpInfo(i);
2964 
2965    if (writeIssueDelays || info.minEncSize == 8 || true)
2966       return 8;
2967 
2968    if (i->ftz || i->saturate || i->join)
2969       return 8;
2970    if (i->rnd != ROUND_N)
2971       return 8;
2972    if (i->predSrc >= 0 && i->op == OP_MAD)
2973       return 8;
2974 
2975    if (i->op == OP_PINTERP) {
2976       if (i->getSampleMode() || true) // XXX: grr, short op doesn't work
2977          return 8;
2978    } else
2979    if (i->op == OP_MOV && i->lanes != 0xf) {
2980       return 8;
2981    }
2982 
2983    for (int s = 0; i->srcExists(s); ++s) {
2984       if (i->src(s).isIndirect(0))
2985          return 8;
2986 
2987       if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2988          if (SDATA(i->src(s)).offset >= 0x100)
2989             return 8;
2990          if (i->getSrc(s)->reg.fileIndex > 1 &&
2991              i->getSrc(s)->reg.fileIndex != 16)
2992              return 8;
2993       } else
2994       if (i->src(s).getFile() == FILE_IMMEDIATE) {
2995          if (i->dType == TYPE_F32) {
2996             if (SDATA(i->src(s)).u32 >= 0x100)
2997                return 8;
2998          } else {
2999             if (SDATA(i->src(s)).u32 > 0xff)
3000                return 8;
3001          }
3002       }
3003 
3004       if (i->op == OP_CVT)
3005          continue;
3006       if (i->src(s).mod != Modifier(0)) {
3007          if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
3008             if (i->op != OP_RSQ)
3009                return 8;
3010          if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
3011             if (i->op != OP_ADD || s != 0)
3012                return 8;
3013       }
3014    }
3015 
3016    return 4;
3017 }
3018 
3019 // Simplified, erring on safe side.
3020 class SchedDataCalculator : public Pass
3021 {
3022 public:
SchedDataCalculator(const Target * targ)3023    SchedDataCalculator(const Target *targ) : score(NULL), prevData(0),
3024       prevOp(OP_NOP), targ(targ) { }
3025 
3026 private:
3027    struct RegScores
3028    {
3029       struct Resource {
3030          int st[DATA_FILE_COUNT]; // LD to LD delay 3
3031          int ld[DATA_FILE_COUNT]; // ST to ST delay 3
3032          int tex; // TEX to non-TEX delay 17 (0x11)
3033          int sfu; // SFU to SFU delay 3 (except PRE-ops)
3034          int imul; // integer MUL to MUL delay 3
3035       } res;
3036       struct ScoreData {
3037          int r[256];
3038          int p[8];
3039          int c;
3040       } rd, wr;
3041       int base;
3042       int regs;
3043 
rebasenv50_ir::SchedDataCalculator::RegScores3044       void rebase(const int base)
3045       {
3046          const int delta = this->base - base;
3047          if (!delta)
3048             return;
3049          this->base = 0;
3050 
3051          for (int i = 0; i < regs; ++i) {
3052             rd.r[i] += delta;
3053             wr.r[i] += delta;
3054          }
3055          for (int i = 0; i < 8; ++i) {
3056             rd.p[i] += delta;
3057             wr.p[i] += delta;
3058          }
3059          rd.c += delta;
3060          wr.c += delta;
3061 
3062          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3063             res.ld[f] += delta;
3064             res.st[f] += delta;
3065          }
3066          res.sfu += delta;
3067          res.imul += delta;
3068          res.tex += delta;
3069       }
wipenv50_ir::SchedDataCalculator::RegScores3070       void wipe(int regs)
3071       {
3072          memset(&rd, 0, sizeof(rd));
3073          memset(&wr, 0, sizeof(wr));
3074          memset(&res, 0, sizeof(res));
3075          this->regs = regs;
3076       }
getLatestnv50_ir::SchedDataCalculator::RegScores3077       int getLatest(const ScoreData& d) const
3078       {
3079          int max = 0;
3080          for (int i = 0; i < regs; ++i)
3081             if (d.r[i] > max)
3082                max = d.r[i];
3083          for (int i = 0; i < 8; ++i)
3084             if (d.p[i] > max)
3085                max = d.p[i];
3086          if (d.c > max)
3087             max = d.c;
3088          return max;
3089       }
getLatestRdnv50_ir::SchedDataCalculator::RegScores3090       inline int getLatestRd() const
3091       {
3092          return getLatest(rd);
3093       }
getLatestWrnv50_ir::SchedDataCalculator::RegScores3094       inline int getLatestWr() const
3095       {
3096          return getLatest(wr);
3097       }
getLatestnv50_ir::SchedDataCalculator::RegScores3098       inline int getLatest() const
3099       {
3100          const int a = getLatestRd();
3101          const int b = getLatestWr();
3102 
3103          int max = MAX2(a, b);
3104          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3105             max = MAX2(res.ld[f], max);
3106             max = MAX2(res.st[f], max);
3107          }
3108          max = MAX2(res.sfu, max);
3109          max = MAX2(res.imul, max);
3110          max = MAX2(res.tex, max);
3111          return max;
3112       }
setMaxnv50_ir::SchedDataCalculator::RegScores3113       void setMax(const RegScores *that)
3114       {
3115          for (int i = 0; i < regs; ++i) {
3116             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3117             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3118          }
3119          for (int i = 0; i < 8; ++i) {
3120             rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3121             wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3122          }
3123          rd.c = MAX2(rd.c, that->rd.c);
3124          wr.c = MAX2(wr.c, that->wr.c);
3125 
3126          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3127             res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
3128             res.st[f] = MAX2(res.st[f], that->res.st[f]);
3129          }
3130          res.sfu = MAX2(res.sfu, that->res.sfu);
3131          res.imul = MAX2(res.imul, that->res.imul);
3132          res.tex = MAX2(res.tex, that->res.tex);
3133       }
printnv50_ir::SchedDataCalculator::RegScores3134       void print(int cycle)
3135       {
3136          for (int i = 0; i < regs; ++i) {
3137             if (rd.r[i] > cycle)
3138                INFO("rd $r%i @ %i\n", i, rd.r[i]);
3139             if (wr.r[i] > cycle)
3140                INFO("wr $r%i @ %i\n", i, wr.r[i]);
3141          }
3142          for (int i = 0; i < 8; ++i) {
3143             if (rd.p[i] > cycle)
3144                INFO("rd $p%i @ %i\n", i, rd.p[i]);
3145             if (wr.p[i] > cycle)
3146                INFO("wr $p%i @ %i\n", i, wr.p[i]);
3147          }
3148          if (rd.c > cycle)
3149             INFO("rd $c @ %i\n", rd.c);
3150          if (wr.c > cycle)
3151             INFO("wr $c @ %i\n", wr.c);
3152          if (res.sfu > cycle)
3153             INFO("sfu @ %i\n", res.sfu);
3154          if (res.imul > cycle)
3155             INFO("imul @ %i\n", res.imul);
3156          if (res.tex > cycle)
3157             INFO("tex @ %i\n", res.tex);
3158       }
3159    };
3160 
3161    RegScores *score; // for current BB
3162    std::vector<RegScores> scoreBoards;
3163    int prevData;
3164    operation prevOp;
3165 
3166    const Target *targ;
3167 
3168    bool visit(Function *);
3169    bool visit(BasicBlock *);
3170 
3171    void commitInsn(const Instruction *, int cycle);
3172    int calcDelay(const Instruction *, int cycle) const;
3173    void setDelay(Instruction *, int delay, Instruction *next);
3174 
3175    void recordRd(const Value *, const int ready);
3176    void recordWr(const Value *, const int ready);
3177    void checkRd(const Value *, int cycle, int& delay) const;
3178    void checkWr(const Value *, int cycle, int& delay) const;
3179 
3180    int getCycles(const Instruction *, int origDelay) const;
3181 };
3182 
3183 void
setDelay(Instruction * insn,int delay,Instruction * next)3184 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
3185 {
3186    if (insn->op == OP_EXIT || insn->op == OP_RET)
3187       delay = MAX2(delay, 14);
3188 
3189    if (insn->op == OP_TEXBAR) {
3190       // TODO: except if results not used before EXIT
3191       insn->sched = 0xc2;
3192    } else
3193    if (insn->op == OP_JOIN || insn->join) {
3194       insn->sched = 0x00;
3195    } else
3196    if (delay >= 0 || prevData == 0x04 ||
3197        !next || !targ->canDualIssue(insn, next)) {
3198       insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
3199       if (prevOp == OP_EXPORT)
3200          insn->sched |= 0x40;
3201       else
3202          insn->sched |= 0x20;
3203    } else {
3204       insn->sched = 0x04; // dual-issue
3205    }
3206 
3207    if (prevData != 0x04 || prevOp != OP_EXPORT)
3208       if (insn->sched != 0x04 || insn->op == OP_EXPORT)
3209          prevOp = insn->op;
3210 
3211    prevData = insn->sched;
3212 }
3213 
3214 int
getCycles(const Instruction * insn,int origDelay) const3215 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
3216 {
3217    if (insn->sched & 0x80) {
3218       int c = (insn->sched & 0x0f) * 2 + 1;
3219       if (insn->op == OP_TEXBAR && origDelay > 0)
3220          c += origDelay;
3221       return c;
3222    }
3223    if (insn->sched & 0x60)
3224       return (insn->sched & 0x1f) + 1;
3225    return (insn->sched == 0x04) ? 0 : 32;
3226 }
3227 
3228 bool
visit(Function * func)3229 SchedDataCalculator::visit(Function *func)
3230 {
3231    int regs = targ->getFileSize(FILE_GPR) + 1;
3232    scoreBoards.resize(func->cfg.getSize());
3233    for (size_t i = 0; i < scoreBoards.size(); ++i)
3234       scoreBoards[i].wipe(regs);
3235    return true;
3236 }
3237 
3238 bool
visit(BasicBlock * bb)3239 SchedDataCalculator::visit(BasicBlock *bb)
3240 {
3241    Instruction *insn;
3242    Instruction *next = NULL;
3243 
3244    int cycle = 0;
3245 
3246    prevData = 0x00;
3247    prevOp = OP_NOP;
3248    score = &scoreBoards.at(bb->getId());
3249 
3250    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
3251       // back branches will wait until all target dependencies are satisfied
3252       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
3253          continue;
3254       BasicBlock *in = BasicBlock::get(ei.getNode());
3255       if (in->getExit()) {
3256          if (prevData != 0x04)
3257             prevData = in->getExit()->sched;
3258          prevOp = in->getExit()->op;
3259       }
3260       score->setMax(&scoreBoards.at(in->getId()));
3261    }
3262    if (bb->cfg.incidentCount() > 1)
3263       prevOp = OP_NOP;
3264 
3265 #ifdef NVC0_DEBUG_SCHED_DATA
3266    INFO("=== BB:%i initial scores\n", bb->getId());
3267    score->print(cycle);
3268 #endif
3269 
3270    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
3271       next = insn->next;
3272 
3273       commitInsn(insn, cycle);
3274       int delay = calcDelay(next, cycle);
3275       setDelay(insn, delay, next);
3276       cycle += getCycles(insn, delay);
3277 
3278 #ifdef NVC0_DEBUG_SCHED_DATA
3279       INFO("cycle %i, sched %02x\n", cycle, insn->sched);
3280       insn->print();
3281       next->print();
3282 #endif
3283    }
3284    if (!insn)
3285       return true;
3286    commitInsn(insn, cycle);
3287 
3288    int bbDelay = -1;
3289 
3290    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
3291       BasicBlock *out = BasicBlock::get(ei.getNode());
3292 
3293       if (ei.getType() != Graph::Edge::BACK) {
3294          // only test the first instruction of the outgoing block
3295          next = out->getEntry();
3296          if (next)
3297             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
3298       } else {
3299          // wait until all dependencies are satisfied
3300          const int regsFree = score->getLatest();
3301          next = out->getFirst();
3302          for (int c = cycle; next && c < regsFree; next = next->next) {
3303             bbDelay = MAX2(bbDelay, calcDelay(next, c));
3304             c += getCycles(next, bbDelay);
3305          }
3306          next = NULL;
3307       }
3308    }
3309    if (bb->cfg.outgoingCount() != 1)
3310       next = NULL;
3311    setDelay(insn, bbDelay, next);
3312    cycle += getCycles(insn, bbDelay);
3313 
3314    score->rebase(cycle); // common base for initializing out blocks' scores
3315    return true;
3316 }
3317 
3318 #define NVE4_MAX_ISSUE_DELAY 0x1f
3319 int
calcDelay(const Instruction * insn,int cycle) const3320 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
3321 {
3322    int delay = 0, ready = cycle;
3323 
3324    for (int s = 0; insn->srcExists(s); ++s)
3325       checkRd(insn->getSrc(s), cycle, delay);
3326    // WAR & WAW don't seem to matter
3327    // for (int s = 0; insn->srcExists(s); ++s)
3328    //   recordRd(insn->getSrc(s), cycle);
3329 
3330    switch (Target::getOpClass(insn->op)) {
3331    case OPCLASS_SFU:
3332       ready = score->res.sfu;
3333       break;
3334    case OPCLASS_ARITH:
3335       if (insn->op == OP_MUL && !isFloatType(insn->dType))
3336          ready = score->res.imul;
3337       break;
3338    case OPCLASS_TEXTURE:
3339       ready = score->res.tex;
3340       break;
3341    case OPCLASS_LOAD:
3342       ready = score->res.ld[insn->src(0).getFile()];
3343       break;
3344    case OPCLASS_STORE:
3345       ready = score->res.st[insn->src(0).getFile()];
3346       break;
3347    default:
3348       break;
3349    }
3350    if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
3351       ready = MAX2(ready, score->res.tex);
3352 
3353    delay = MAX2(delay, ready - cycle);
3354 
3355    // if can issue next cycle, delay is 0, not 1
3356    return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
3357 }
3358 
3359 void
commitInsn(const Instruction * insn,int cycle)3360 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
3361 {
3362    const int ready = cycle + targ->getLatency(insn);
3363 
3364    for (int d = 0; insn->defExists(d); ++d)
3365       recordWr(insn->getDef(d), ready);
3366    // WAR & WAW don't seem to matter
3367    // for (int s = 0; insn->srcExists(s); ++s)
3368    //   recordRd(insn->getSrc(s), cycle);
3369 
3370    switch (Target::getOpClass(insn->op)) {
3371    case OPCLASS_SFU:
3372       score->res.sfu = cycle + 4;
3373       break;
3374    case OPCLASS_ARITH:
3375       if (insn->op == OP_MUL && !isFloatType(insn->dType))
3376          score->res.imul = cycle + 4;
3377       break;
3378    case OPCLASS_TEXTURE:
3379       score->res.tex = cycle + 18;
3380       break;
3381    case OPCLASS_LOAD:
3382       if (insn->src(0).getFile() == FILE_MEMORY_CONST)
3383          break;
3384       score->res.ld[insn->src(0).getFile()] = cycle + 4;
3385       score->res.st[insn->src(0).getFile()] = ready;
3386       break;
3387    case OPCLASS_STORE:
3388       score->res.st[insn->src(0).getFile()] = cycle + 4;
3389       score->res.ld[insn->src(0).getFile()] = ready;
3390       break;
3391    case OPCLASS_OTHER:
3392       if (insn->op == OP_TEXBAR)
3393          score->res.tex = cycle;
3394       break;
3395    default:
3396       break;
3397    }
3398 
3399 #ifdef NVC0_DEBUG_SCHED_DATA
3400    score->print(cycle);
3401 #endif
3402 }
3403 
3404 void
checkRd(const Value * v,int cycle,int & delay) const3405 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
3406 {
3407    int ready = cycle;
3408    int a, b;
3409 
3410    switch (v->reg.file) {
3411    case FILE_GPR:
3412       a = v->reg.data.id;
3413       b = a + v->reg.size / 4;
3414       for (int r = a; r < b; ++r)
3415          ready = MAX2(ready, score->rd.r[r]);
3416       break;
3417    case FILE_PREDICATE:
3418       ready = MAX2(ready, score->rd.p[v->reg.data.id]);
3419       break;
3420    case FILE_FLAGS:
3421       ready = MAX2(ready, score->rd.c);
3422       break;
3423    case FILE_SHADER_INPUT:
3424    case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
3425    case FILE_MEMORY_LOCAL:
3426    case FILE_MEMORY_CONST:
3427    case FILE_MEMORY_SHARED:
3428    case FILE_MEMORY_GLOBAL:
3429    case FILE_SYSTEM_VALUE:
3430       // TODO: any restrictions here ?
3431       break;
3432    case FILE_IMMEDIATE:
3433       break;
3434    default:
3435       assert(0);
3436       break;
3437    }
3438    if (cycle < ready)
3439       delay = MAX2(delay, ready - cycle);
3440 }
3441 
3442 void
checkWr(const Value * v,int cycle,int & delay) const3443 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
3444 {
3445    int ready = cycle;
3446    int a, b;
3447 
3448    switch (v->reg.file) {
3449    case FILE_GPR:
3450       a = v->reg.data.id;
3451       b = a + v->reg.size / 4;
3452       for (int r = a; r < b; ++r)
3453          ready = MAX2(ready, score->wr.r[r]);
3454       break;
3455    case FILE_PREDICATE:
3456       ready = MAX2(ready, score->wr.p[v->reg.data.id]);
3457       break;
3458    default:
3459       assert(v->reg.file == FILE_FLAGS);
3460       ready = MAX2(ready, score->wr.c);
3461       break;
3462    }
3463    if (cycle < ready)
3464       delay = MAX2(delay, ready - cycle);
3465 }
3466 
3467 void
recordWr(const Value * v,const int ready)3468 SchedDataCalculator::recordWr(const Value *v, const int ready)
3469 {
3470    int a = v->reg.data.id;
3471 
3472    if (v->reg.file == FILE_GPR) {
3473       int b = a + v->reg.size / 4;
3474       for (int r = a; r < b; ++r)
3475          score->rd.r[r] = ready;
3476    } else
3477    // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3478    if (v->reg.file == FILE_PREDICATE) {
3479       score->rd.p[a] = ready + 4;
3480    } else {
3481       assert(v->reg.file == FILE_FLAGS);
3482       score->rd.c = ready + 4;
3483    }
3484 }
3485 
3486 void
recordRd(const Value * v,const int ready)3487 SchedDataCalculator::recordRd(const Value *v, const int ready)
3488 {
3489    int a = v->reg.data.id;
3490 
3491    if (v->reg.file == FILE_GPR) {
3492       int b = a + v->reg.size / 4;
3493       for (int r = a; r < b; ++r)
3494          score->wr.r[r] = ready;
3495    } else
3496    if (v->reg.file == FILE_PREDICATE) {
3497       score->wr.p[a] = ready;
3498    } else
3499    if (v->reg.file == FILE_FLAGS) {
3500       score->wr.c = ready;
3501    }
3502 }
3503 
3504 bool
calculateSchedDataNVC0(const Target * targ,Function * func)3505 calculateSchedDataNVC0(const Target *targ, Function *func)
3506 {
3507    SchedDataCalculator sched(targ);
3508    return sched.run(func, true, true);
3509 }
3510 
3511 void
prepareEmission(Function * func)3512 CodeEmitterNVC0::prepareEmission(Function *func)
3513 {
3514    CodeEmitter::prepareEmission(func);
3515 
3516    if (targ->hasSWSched)
3517       calculateSchedDataNVC0(targ, func);
3518 }
3519 
CodeEmitterNVC0(const TargetNVC0 * target,Program::Type type)3520 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target, Program::Type type)
3521    : CodeEmitter(target),
3522      targNVC0(target),
3523      progType(type),
3524      writeIssueDelays(target->hasSWSched)
3525 {
3526    code = NULL;
3527    codeSize = codeSizeLimit = 0;
3528    relocInfo = NULL;
3529 }
3530 
3531 CodeEmitter *
createCodeEmitterNVC0(Program::Type type)3532 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
3533 {
3534    CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this, type);
3535    return emit;
3536 }
3537 
3538 CodeEmitter *
getCodeEmitter(Program::Type type)3539 TargetNVC0::getCodeEmitter(Program::Type type)
3540 {
3541    if (chipset >= NVISA_GK20A_CHIPSET)
3542       return createCodeEmitterGK110(type);
3543    return createCodeEmitterNVC0(type);
3544 }
3545 
3546 } // namespace nv50_ir
3547