1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target_nv50.h"
25 
26 namespace nv50_ir {
27 
28 #define NV50_OP_ENC_LONG     0
29 #define NV50_OP_ENC_SHORT    1
30 #define NV50_OP_ENC_IMM      2
31 #define NV50_OP_ENC_LONG_ALT 3
32 
33 class CodeEmitterNV50 : public CodeEmitter
34 {
35 public:
36    CodeEmitterNV50(Program::Type, const TargetNV50 *);
37 
38    virtual bool emitInstruction(Instruction *);
39 
40    virtual uint32_t getMinEncodingSize(const Instruction *) const;
41 
42    virtual void prepareEmission(Function *);
43 
44 private:
45    Program::Type progType;
46 
47    const TargetNV50 *targNV50;
48 
49 private:
50    inline void defId(const ValueDef&, const int pos);
51    inline void srcId(const ValueRef&, const int pos);
52    inline void srcId(const ValueRef *, const int pos);
53 
54    inline void srcAddr16(const ValueRef&, bool adj, const int pos);
55    inline void srcAddr8(const ValueRef&, const int pos);
56 
57    void emitFlagsRd(const Instruction *);
58    void emitFlagsWr(const Instruction *);
59 
60    void emitCondCode(CondCode cc, DataType ty, int pos);
61 
62    inline void setARegBits(unsigned int);
63 
64    void setAReg16(const Instruction *, int s);
65    void setImmediate(const Instruction *, int s);
66 
67    void setDst(const Value *);
68    void setDst(const Instruction *, int d);
69    void setSrcFileBits(const Instruction *, int enc);
70    void setSrc(const Instruction *, unsigned int s, int slot);
71 
72    void emitForm_MAD(const Instruction *);
73    void emitForm_ADD(const Instruction *);
74    void emitForm_MUL(const Instruction *);
75    void emitForm_IMM(const Instruction *);
76 
77    void emitLoadStoreSizeLG(DataType ty, int pos);
78    void emitLoadStoreSizeCS(DataType ty);
79 
80    void roundMode_MAD(const Instruction *);
81    void roundMode_CVT(RoundMode);
82 
83    void emitMNeg12(const Instruction *);
84 
85    void emitLOAD(const Instruction *);
86    void emitSTORE(const Instruction *);
87    void emitMOV(const Instruction *);
88    void emitRDSV(const Instruction *);
89    void emitNOP();
90    void emitINTERP(const Instruction *);
91    void emitPFETCH(const Instruction *);
92    void emitOUT(const Instruction *);
93 
94    void emitUADD(const Instruction *);
95    void emitAADD(const Instruction *);
96    void emitFADD(const Instruction *);
97    void emitDADD(const Instruction *);
98    void emitIMUL(const Instruction *);
99    void emitFMUL(const Instruction *);
100    void emitDMUL(const Instruction *);
101    void emitFMAD(const Instruction *);
102    void emitDMAD(const Instruction *);
103    void emitIMAD(const Instruction *);
104    void emitISAD(const Instruction *);
105 
106    void emitMINMAX(const Instruction *);
107 
108    void emitPreOp(const Instruction *);
109    void emitSFnOp(const Instruction *, uint8_t subOp);
110 
111    void emitShift(const Instruction *);
112    void emitARL(const Instruction *, unsigned int shl);
113    void emitLogicOp(const Instruction *);
114    void emitNOT(const Instruction *);
115 
116    void emitCVT(const Instruction *);
117    void emitSET(const Instruction *);
118 
119    void emitTEX(const TexInstruction *);
120    void emitTXQ(const TexInstruction *);
121    void emitTEXPREP(const TexInstruction *);
122 
123    void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
124 
125    void emitFlow(const Instruction *, uint8_t flowOp);
126    void emitPRERETEmu(const FlowInstruction *);
127    void emitBAR(const Instruction *);
128 
129    void emitATOM(const Instruction *);
130 };
131 
132 #define SDATA(a) ((a).rep()->reg.data)
133 #define DDATA(a) ((a).rep()->reg.data)
134 
srcId(const ValueRef & src,const int pos)135 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
136 {
137    assert(src.get());
138    code[pos / 32] |= SDATA(src).id << (pos % 32);
139 }
140 
srcId(const ValueRef * src,const int pos)141 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
142 {
143    assert(src->get());
144    code[pos / 32] |= SDATA(*src).id << (pos % 32);
145 }
146 
srcAddr16(const ValueRef & src,bool adj,const int pos)147 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
148 {
149    assert(src.get());
150 
151    int32_t offset = SDATA(src).offset;
152 
153    assert(!adj || src.get()->reg.size <= 4);
154    if (adj)
155       offset /= src.get()->reg.size;
156 
157    assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
158 
159    if (offset < 0)
160       offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
161 
162    code[pos / 32] |= offset << (pos % 32);
163 }
164 
srcAddr8(const ValueRef & src,const int pos)165 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
166 {
167    assert(src.get());
168 
169    uint32_t offset = SDATA(src).offset;
170 
171    assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
172 
173    code[pos / 32] |= (offset >> 2) << (pos % 32);
174 }
175 
defId(const ValueDef & def,const int pos)176 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
177 {
178    assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
179 
180    code[pos / 32] |= DDATA(def).id << (pos % 32);
181 }
182 
183 void
roundMode_MAD(const Instruction * insn)184 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
185 {
186    switch (insn->rnd) {
187    case ROUND_M: code[1] |= 1 << 22; break;
188    case ROUND_P: code[1] |= 2 << 22; break;
189    case ROUND_Z: code[1] |= 3 << 22; break;
190    default:
191       assert(insn->rnd == ROUND_N);
192       break;
193    }
194 }
195 
196 void
emitMNeg12(const Instruction * i)197 CodeEmitterNV50::emitMNeg12(const Instruction *i)
198 {
199    code[1] |= i->src(0).mod.neg() << 26;
200    code[1] |= i->src(1).mod.neg() << 27;
201 }
202 
emitCondCode(CondCode cc,DataType ty,int pos)203 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
204 {
205    uint8_t enc;
206 
207    assert(pos >= 32 || pos <= 27);
208 
209    switch (cc) {
210    case CC_LT:  enc = 0x1; break;
211    case CC_LTU: enc = 0x9; break;
212    case CC_EQ:  enc = 0x2; break;
213    case CC_EQU: enc = 0xa; break;
214    case CC_LE:  enc = 0x3; break;
215    case CC_LEU: enc = 0xb; break;
216    case CC_GT:  enc = 0x4; break;
217    case CC_GTU: enc = 0xc; break;
218    case CC_NE:  enc = 0x5; break;
219    case CC_NEU: enc = 0xd; break;
220    case CC_GE:  enc = 0x6; break;
221    case CC_GEU: enc = 0xe; break;
222    case CC_TR:  enc = 0xf; break;
223    case CC_FL:  enc = 0x0; break;
224 
225    case CC_O:  enc = 0x10; break;
226    case CC_C:  enc = 0x11; break;
227    case CC_A:  enc = 0x12; break;
228    case CC_S:  enc = 0x13; break;
229    case CC_NS: enc = 0x1c; break;
230    case CC_NA: enc = 0x1d; break;
231    case CC_NC: enc = 0x1e; break;
232    case CC_NO: enc = 0x1f; break;
233 
234    default:
235       enc = 0;
236       assert(!"invalid condition code");
237       break;
238    }
239    if (ty != TYPE_NONE && !isFloatType(ty))
240       enc &= ~0x8; // unordered only exists for float types
241 
242    code[pos / 32] |= enc << (pos % 32);
243 }
244 
245 void
emitFlagsRd(const Instruction * i)246 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
247 {
248    int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
249 
250    assert(!(code[1] & 0x00003f80));
251 
252    if (s >= 0) {
253       assert(i->getSrc(s)->reg.file == FILE_FLAGS);
254       emitCondCode(i->cc, TYPE_NONE, 32 + 7);
255       srcId(i->src(s), 32 + 12);
256    } else {
257       code[1] |= 0x0780;
258    }
259 }
260 
261 void
emitFlagsWr(const Instruction * i)262 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
263 {
264    assert(!(code[1] & 0x70));
265 
266    int flagsDef = i->flagsDef;
267 
268    // find flags definition and check that it is the last def
269    if (flagsDef < 0) {
270       for (int d = 0; i->defExists(d); ++d)
271          if (i->def(d).getFile() == FILE_FLAGS)
272             flagsDef = d;
273       if (flagsDef >= 0 && false) // TODO: enforce use of flagsDef at some point
274          WARN("Instruction::flagsDef was not set properly\n");
275    }
276    if (flagsDef == 0 && i->defExists(1))
277       WARN("flags def should not be the primary definition\n");
278 
279    if (flagsDef >= 0)
280       code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
281 
282 }
283 
284 void
setARegBits(unsigned int u)285 CodeEmitterNV50::setARegBits(unsigned int u)
286 {
287    code[0] |= (u & 3) << 26;
288    code[1] |= (u & 4);
289 }
290 
291 void
setAReg16(const Instruction * i,int s)292 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
293 {
294    if (i->srcExists(s)) {
295       s = i->src(s).indirect[0];
296       if (s >= 0)
297          setARegBits(SDATA(i->src(s)).id + 1);
298    }
299 }
300 
301 void
setImmediate(const Instruction * i,int s)302 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
303 {
304    const ImmediateValue *imm = i->src(s).get()->asImm();
305    assert(imm);
306 
307    uint32_t u = imm->reg.data.u32;
308 
309    if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
310       u = ~u;
311 
312    code[1] |= 3;
313    code[0] |= (u & 0x3f) << 16;
314    code[1] |= (u >> 6) << 2;
315 }
316 
317 void
setDst(const Value * dst)318 CodeEmitterNV50::setDst(const Value *dst)
319 {
320    const Storage *reg = &dst->join->reg;
321 
322    assert(reg->file != FILE_ADDRESS);
323 
324    if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
325       code[0] |= (127 << 2) | 1;
326       code[1] |= 8;
327    } else {
328       int id;
329       if (reg->file == FILE_SHADER_OUTPUT) {
330          code[1] |= 8;
331          id = reg->data.offset / 4;
332       } else {
333          id = reg->data.id;
334       }
335       code[0] |= id << 2;
336    }
337 }
338 
339 void
setDst(const Instruction * i,int d)340 CodeEmitterNV50::setDst(const Instruction *i, int d)
341 {
342    if (i->defExists(d)) {
343       setDst(i->getDef(d));
344    } else
345    if (!d) {
346       code[0] |= 0x01fc; // bit bucket
347       code[1] |= 0x0008;
348    }
349 }
350 
351 // 3 * 2 bits:
352 // 0: r
353 // 1: a/s
354 // 2: c
355 // 3: i
356 void
setSrcFileBits(const Instruction * i,int enc)357 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
358 {
359    uint8_t mode = 0;
360 
361    for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
362       switch (i->src(s).getFile()) {
363       case FILE_GPR:
364          break;
365       case FILE_MEMORY_SHARED:
366       case FILE_SHADER_INPUT:
367          mode |= 1 << (s * 2);
368          break;
369       case FILE_MEMORY_CONST:
370          mode |= 2 << (s * 2);
371          break;
372       case FILE_IMMEDIATE:
373          mode |= 3 << (s * 2);
374          break;
375       default:
376          ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
377          assert(0);
378          break;
379       }
380    }
381    switch (mode) {
382    case 0x00: // rrr
383       break;
384    case 0x01: // arr/grr
385       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
386          code[0] |= 0x01800000;
387          if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
388             code[1] |= 0x00200000;
389       } else {
390          if (enc == NV50_OP_ENC_SHORT)
391             code[0] |= 0x01000000;
392          else
393             code[1] |= 0x00200000;
394       }
395       break;
396    case 0x03: // irr
397       assert(i->op == OP_MOV);
398       return;
399    case 0x0c: // rir
400       break;
401    case 0x0d: // gir
402       assert(progType == Program::TYPE_GEOMETRY ||
403              progType == Program::TYPE_COMPUTE);
404       code[0] |= 0x01000000;
405       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
406          int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
407          assert(reg < 3);
408          code[0] |= (reg + 1) << 26;
409       }
410       break;
411    case 0x08: // rcr
412       code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
413       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
414       break;
415    case 0x09: // acr/gcr
416       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
417          code[0] |= 0x01800000;
418       } else {
419          code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
420          code[1] |= 0x00200000;
421       }
422       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
423       break;
424    case 0x20: // rrc
425       code[0] |= 0x01000000;
426       code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
427       break;
428    case 0x21: // arc
429       code[0] |= 0x01000000;
430       code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
431       assert(progType != Program::TYPE_GEOMETRY);
432       break;
433    default:
434       ERROR("not encodable: %x\n", mode);
435       assert(0);
436       break;
437    }
438    if (progType != Program::TYPE_COMPUTE)
439       return;
440 
441    if ((mode & 3) == 1) {
442       const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
443 
444       switch (i->sType) {
445       case TYPE_U8:
446          break;
447       case TYPE_U16:
448          code[0] |= 1 << pos;
449          break;
450       case TYPE_S16:
451          code[0] |= 2 << pos;
452          break;
453       default:
454          code[0] |= 3 << pos;
455          assert(i->getSrc(0)->reg.size == 4);
456          break;
457       }
458    }
459 }
460 
461 void
setSrc(const Instruction * i,unsigned int s,int slot)462 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
463 {
464    if (Target::operationSrcNr[i->op] <= s)
465       return;
466    const Storage *reg = &i->src(s).rep()->reg;
467 
468    unsigned int id = (reg->file == FILE_GPR) ?
469       reg->data.id :
470       reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
471 
472    switch (slot) {
473    case 0: code[0] |= id << 9; break;
474    case 1: code[0] |= id << 16; break;
475    case 2: code[1] |= id << 14; break;
476    default:
477       assert(0);
478       break;
479    }
480 }
481 
482 // the default form:
483 //  - long instruction
484 //  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
485 //  - address & flags
486 void
emitForm_MAD(const Instruction * i)487 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
488 {
489    assert(i->encSize == 8);
490    code[0] |= 1;
491 
492    emitFlagsRd(i);
493    emitFlagsWr(i);
494 
495    setDst(i, 0);
496 
497    setSrcFileBits(i, NV50_OP_ENC_LONG);
498    setSrc(i, 0, 0);
499    setSrc(i, 1, 1);
500    setSrc(i, 2, 2);
501 
502    if (i->getIndirect(0, 0)) {
503       assert(!i->srcExists(1) || !i->getIndirect(1, 0));
504       assert(!i->srcExists(2) || !i->getIndirect(2, 0));
505       setAReg16(i, 0);
506    } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
507       assert(!i->srcExists(2) || !i->getIndirect(2, 0));
508       setAReg16(i, 1);
509    } else {
510       setAReg16(i, 2);
511    }
512 }
513 
514 // like default form, but 2nd source in slot 2, and no 3rd source
515 void
emitForm_ADD(const Instruction * i)516 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
517 {
518    assert(i->encSize == 8);
519    code[0] |= 1;
520 
521    emitFlagsRd(i);
522    emitFlagsWr(i);
523 
524    setDst(i, 0);
525 
526    setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
527    setSrc(i, 0, 0);
528    if (i->predSrc != 1)
529       setSrc(i, 1, 2);
530 
531    if (i->getIndirect(0, 0)) {
532       assert(!i->getIndirect(1, 0));
533       setAReg16(i, 0);
534    } else {
535       setAReg16(i, 1);
536    }
537 }
538 
539 // default short form (rr, ar, rc, gr)
540 void
emitForm_MUL(const Instruction * i)541 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
542 {
543    assert(i->encSize == 4 && !(code[0] & 1));
544    assert(i->defExists(0));
545    assert(!i->getPredicate());
546 
547    setDst(i, 0);
548 
549    setSrcFileBits(i, NV50_OP_ENC_SHORT);
550    setSrc(i, 0, 0);
551    setSrc(i, 1, 1);
552 }
553 
554 // usual immediate form
555 // - 1 to 3 sources where second is immediate (rir, gir)
556 // - no address or predicate possible
557 void
emitForm_IMM(const Instruction * i)558 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
559 {
560    assert(i->encSize == 8);
561    code[0] |= 1;
562 
563    assert(i->defExists(0) && i->srcExists(0));
564 
565    setDst(i, 0);
566 
567    setSrcFileBits(i, NV50_OP_ENC_IMM);
568    if (Target::operationSrcNr[i->op] > 1) {
569       setSrc(i, 0, 0);
570       setImmediate(i, 1);
571       // If there is another source, it has to be the same as the dest reg.
572    } else {
573       setImmediate(i, 0);
574    }
575 }
576 
577 void
emitLoadStoreSizeLG(DataType ty,int pos)578 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
579 {
580    uint8_t enc;
581 
582    switch (ty) {
583    case TYPE_F32: // fall through
584    case TYPE_S32: // fall through
585    case TYPE_U32:  enc = 0x6; break;
586    case TYPE_B128: enc = 0x5; break;
587    case TYPE_F64: // fall through
588    case TYPE_S64: // fall through
589    case TYPE_U64:  enc = 0x4; break;
590    case TYPE_S16:  enc = 0x3; break;
591    case TYPE_U16:  enc = 0x2; break;
592    case TYPE_S8:   enc = 0x1; break;
593    case TYPE_U8:   enc = 0x0; break;
594    default:
595       enc = 0;
596       assert(!"invalid load/store type");
597       break;
598    }
599    code[pos / 32] |= enc << (pos % 32);
600 }
601 
602 void
emitLoadStoreSizeCS(DataType ty)603 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
604 {
605    switch (ty) {
606    case TYPE_U8: break;
607    case TYPE_U16: code[1] |= 0x4000; break;
608    case TYPE_S16: code[1] |= 0x8000; break;
609    case TYPE_F32:
610    case TYPE_S32:
611    case TYPE_U32: code[1] |= 0xc000; break;
612    default:
613       assert(0);
614       break;
615    }
616 }
617 
618 void
emitLOAD(const Instruction * i)619 CodeEmitterNV50::emitLOAD(const Instruction *i)
620 {
621    DataFile sf = i->src(0).getFile();
622    ASSERTED int32_t offset = i->getSrc(0)->reg.data.offset;
623 
624    switch (sf) {
625    case FILE_SHADER_INPUT:
626       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
627          code[0] = 0x11800001;
628       else
629          // use 'mov' where we can
630          code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
631       code[1] = 0x00200000 | (i->lanes << 14);
632       if (typeSizeof(i->dType) == 4)
633          code[1] |= 0x04000000;
634       break;
635    case FILE_MEMORY_SHARED:
636       if (targ->getChipset() >= 0x84) {
637          assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
638          code[0] = 0x10000001;
639          code[1] = 0x40000000;
640 
641          if (typeSizeof(i->dType) == 4)
642             code[1] |= 0x04000000;
643 
644          emitLoadStoreSizeCS(i->sType);
645 
646          if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
647             code[1] |= 0x00800000;
648       } else {
649          assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
650          code[0] = 0x10000001;
651          code[1] = 0x00200000 | (i->lanes << 14);
652          emitLoadStoreSizeCS(i->sType);
653       }
654       break;
655    case FILE_MEMORY_CONST:
656       code[0] = 0x10000001;
657       code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
658       if (typeSizeof(i->dType) == 4)
659          code[1] |= 0x04000000;
660       emitLoadStoreSizeCS(i->sType);
661       break;
662    case FILE_MEMORY_LOCAL:
663       code[0] = 0xd0000001;
664       code[1] = 0x40000000;
665       break;
666    case FILE_MEMORY_GLOBAL:
667       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
668       code[1] = 0x80000000;
669       break;
670    default:
671       assert(!"invalid load source file");
672       break;
673    }
674    if (sf == FILE_MEMORY_LOCAL ||
675        sf == FILE_MEMORY_GLOBAL)
676       emitLoadStoreSizeLG(i->sType, 21 + 32);
677 
678    setDst(i, 0);
679 
680    emitFlagsRd(i);
681    emitFlagsWr(i);
682 
683    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
684       srcId(*i->src(0).getIndirect(0), 9);
685    } else {
686       setAReg16(i, 0);
687       srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
688    }
689 }
690 
691 void
emitSTORE(const Instruction * i)692 CodeEmitterNV50::emitSTORE(const Instruction *i)
693 {
694    DataFile f = i->getSrc(0)->reg.file;
695    int32_t offset = i->getSrc(0)->reg.data.offset;
696 
697    switch (f) {
698    case FILE_SHADER_OUTPUT:
699       code[0] = 0x00000001 | ((offset >> 2) << 9);
700       code[1] = 0x80c00000;
701       srcId(i->src(1), 32 + 14);
702       break;
703    case FILE_MEMORY_GLOBAL:
704       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
705       code[1] = 0xa0000000;
706       emitLoadStoreSizeLG(i->dType, 21 + 32);
707       srcId(i->src(1), 2);
708       break;
709    case FILE_MEMORY_LOCAL:
710       code[0] = 0xd0000001;
711       code[1] = 0x60000000;
712       emitLoadStoreSizeLG(i->dType, 21 + 32);
713       srcId(i->src(1), 2);
714       break;
715    case FILE_MEMORY_SHARED:
716       code[0] = 0x00000001;
717       code[1] = 0xe0000000;
718       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
719          code[1] |= 0x00800000;
720       switch (typeSizeof(i->dType)) {
721       case 1:
722          code[0] |= offset << 9;
723          code[1] |= 0x00400000;
724          break;
725       case 2:
726          code[0] |= (offset >> 1) << 9;
727          break;
728       case 4:
729          code[0] |= (offset >> 2) << 9;
730          code[1] |= 0x04200000;
731          break;
732       default:
733          assert(0);
734          break;
735       }
736       srcId(i->src(1), 32 + 14);
737       break;
738    default:
739       assert(!"invalid store destination file");
740       break;
741    }
742 
743    if (f == FILE_MEMORY_GLOBAL)
744       srcId(*i->src(0).getIndirect(0), 9);
745    else
746       setAReg16(i, 0);
747 
748    if (f == FILE_MEMORY_LOCAL)
749       srcAddr16(i->src(0), false, 9);
750 
751    emitFlagsRd(i);
752 }
753 
754 void
emitMOV(const Instruction * i)755 CodeEmitterNV50::emitMOV(const Instruction *i)
756 {
757    DataFile sf = i->getSrc(0)->reg.file;
758    DataFile df = i->getDef(0)->reg.file;
759 
760    assert(sf == FILE_GPR || df == FILE_GPR);
761 
762    if (sf == FILE_FLAGS) {
763       assert(i->flagsSrc >= 0);
764       code[0] = 0x00000001;
765       code[1] = 0x20000000;
766       defId(i->def(0), 2);
767       emitFlagsRd(i);
768    } else
769    if (sf == FILE_ADDRESS) {
770       code[0] = 0x00000001;
771       code[1] = 0x40000000;
772       defId(i->def(0), 2);
773       setARegBits(SDATA(i->src(0)).id + 1);
774       emitFlagsRd(i);
775    } else
776    if (df == FILE_FLAGS) {
777       assert(i->flagsDef >= 0);
778       code[0] = 0x00000001;
779       code[1] = 0xa0000000;
780       srcId(i->src(0), 9);
781       emitFlagsRd(i);
782       emitFlagsWr(i);
783    } else
784    if (sf == FILE_IMMEDIATE) {
785       code[0] = 0x10000001;
786       code[1] = 0x00000003;
787       emitForm_IMM(i);
788 
789       code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
790    } else {
791       if (i->encSize == 4) {
792          code[0] = 0x10000000;
793          code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
794          defId(i->def(0), 2);
795       } else {
796          code[0] = 0x10000001;
797          code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
798          code[1] |= (i->lanes << 14);
799          setDst(i, 0);
800          emitFlagsRd(i);
801       }
802       srcId(i->src(0), 9);
803    }
804    if (df == FILE_SHADER_OUTPUT) {
805       assert(i->encSize == 8);
806       code[1] |= 0x8;
807    }
808 }
809 
getSRegEncoding(const ValueRef & ref)810 static inline uint8_t getSRegEncoding(const ValueRef &ref)
811 {
812    switch (SDATA(ref).sv.sv) {
813    case SV_PHYSID:        return 0;
814    case SV_CLOCK:         return 1;
815    case SV_VERTEX_STRIDE: return 3;
816 // case SV_PM_COUNTER:    return 4 + SDATA(ref).sv.index;
817    case SV_SAMPLE_INDEX:  return 8;
818    default:
819       assert(!"no sreg for system value");
820       return 0;
821    }
822 }
823 
824 void
emitRDSV(const Instruction * i)825 CodeEmitterNV50::emitRDSV(const Instruction *i)
826 {
827    code[0] = 0x00000001;
828    code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
829    defId(i->def(0), 2);
830    emitFlagsRd(i);
831 }
832 
833 void
emitNOP()834 CodeEmitterNV50::emitNOP()
835 {
836    code[0] = 0xf0000001;
837    code[1] = 0xe0000000;
838 }
839 
840 void
emitQUADOP(const Instruction * i,uint8_t lane,uint8_t quOp)841 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
842 {
843    code[0] = 0xc0000000 | (lane << 16);
844    code[1] = 0x80000000;
845 
846    code[0] |= (quOp & 0x03) << 20;
847    code[1] |= (quOp & 0xfc) << 20;
848 
849    emitForm_ADD(i);
850 
851    if (!i->srcExists(1) || i->predSrc == 1)
852       srcId(i->src(0), 32 + 14);
853 }
854 
855 /* NOTE: This returns the base address of a vertex inside the primitive.
856  * src0 is an immediate, the index (not offset) of the vertex
857  * inside the primitive. XXX: signed or unsigned ?
858  * src1 (may be NULL) should use whatever units the hardware requires
859  * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
860  */
861 void
emitPFETCH(const Instruction * i)862 CodeEmitterNV50::emitPFETCH(const Instruction *i)
863 {
864    const uint32_t prim = i->src(0).get()->reg.data.u32;
865    assert(prim <= 127);
866 
867    if (i->def(0).getFile() == FILE_ADDRESS) {
868       // shl $aX a[] 0
869       code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
870       code[1] = 0xc0200000;
871       code[0] |= prim << 9;
872       assert(!i->srcExists(1));
873    } else
874    if (i->srcExists(1)) {
875       // ld b32 $rX a[$aX+base]
876       code[0] = 0x00000001;
877       code[1] = 0x04200000 | (0xf << 14);
878       defId(i->def(0), 2);
879       code[0] |= prim << 9;
880       setARegBits(SDATA(i->src(1)).id + 1);
881    } else {
882       // mov b32 $rX a[]
883       code[0] = 0x10000001;
884       code[1] = 0x04200000 | (0xf << 14);
885       defId(i->def(0), 2);
886       code[0] |= prim << 9;
887    }
888    emitFlagsRd(i);
889 }
890 
891 void
nv50_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)892 nv50_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
893 {
894    int ipa = entry->ipa;
895    int encSize = entry->reg;
896    int loc = entry->loc;
897 
898    if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
899        (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
900       if (data.force_persample_interp) {
901          if (encSize == 8)
902             code[loc + 1] |= 1 << 16;
903          else
904             code[loc + 0] |= 1 << 24;
905       } else {
906          if (encSize == 8)
907             code[loc + 1] &= ~(1 << 16);
908          else
909             code[loc + 0] &= ~(1 << 24);
910       }
911    }
912 }
913 
914 void
emitINTERP(const Instruction * i)915 CodeEmitterNV50::emitINTERP(const Instruction *i)
916 {
917    code[0] = 0x80000000;
918 
919    defId(i->def(0), 2);
920    srcAddr8(i->src(0), 16);
921    setAReg16(i, 0);
922 
923    if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
924       code[0] |= 1 << 8;
925    } else {
926       if (i->op == OP_PINTERP) {
927          code[0] |= 1 << 25;
928          srcId(i->src(1), 9);
929       }
930       if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
931          code[0] |= 1 << 24;
932    }
933 
934    if (i->encSize == 8) {
935       if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
936          code[1] = 4 << 16;
937       else
938          code[1] = (code[0] & (3 << 24)) >> (24 - 16);
939       code[0] &= ~0x03000000;
940       code[0] |= 1;
941       emitFlagsRd(i);
942    }
943 
944    addInterp(i->ipa, i->encSize, nv50_interpApply);
945 }
946 
947 void
emitMINMAX(const Instruction * i)948 CodeEmitterNV50::emitMINMAX(const Instruction *i)
949 {
950    if (i->dType == TYPE_F64) {
951       code[0] = 0xe0000000;
952       code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
953    } else {
954       code[0] = 0x30000000;
955       code[1] = 0x80000000;
956       if (i->op == OP_MIN)
957          code[1] |= 0x20000000;
958 
959       switch (i->dType) {
960       case TYPE_F32: code[0] |= 0x80000000; break;
961       case TYPE_S32: code[1] |= 0x8c000000; break;
962       case TYPE_U32: code[1] |= 0x84000000; break;
963       case TYPE_S16: code[1] |= 0x80000000; break;
964       case TYPE_U16: break;
965       default:
966          assert(0);
967          break;
968       }
969    }
970 
971    code[1] |= i->src(0).mod.abs() << 20;
972    code[1] |= i->src(0).mod.neg() << 26;
973    code[1] |= i->src(1).mod.abs() << 19;
974    code[1] |= i->src(1).mod.neg() << 27;
975 
976    emitForm_MAD(i);
977 }
978 
979 void
emitFMAD(const Instruction * i)980 CodeEmitterNV50::emitFMAD(const Instruction *i)
981 {
982    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
983    const int neg_add = i->src(2).mod.neg();
984 
985    code[0] = 0xe0000000;
986 
987    if (i->src(1).getFile() == FILE_IMMEDIATE) {
988       code[1] = 0;
989       emitForm_IMM(i);
990       code[0] |= neg_mul << 15;
991       code[0] |= neg_add << 22;
992       if (i->saturate)
993          code[0] |= 1 << 8;
994    } else
995    if (i->encSize == 4) {
996       emitForm_MUL(i);
997       code[0] |= neg_mul << 15;
998       code[0] |= neg_add << 22;
999       if (i->saturate)
1000          code[0] |= 1 << 8;
1001    } else {
1002       code[1]  = neg_mul << 26;
1003       code[1] |= neg_add << 27;
1004       if (i->saturate)
1005          code[1] |= 1 << 29;
1006       emitForm_MAD(i);
1007    }
1008 }
1009 
1010 void
emitDMAD(const Instruction * i)1011 CodeEmitterNV50::emitDMAD(const Instruction *i)
1012 {
1013    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1014    const int neg_add = i->src(2).mod.neg();
1015 
1016    assert(i->encSize == 8);
1017    assert(!i->saturate);
1018 
1019    code[1] = 0x40000000;
1020    code[0] = 0xe0000000;
1021 
1022    code[1] |= neg_mul << 26;
1023    code[1] |= neg_add << 27;
1024 
1025    roundMode_MAD(i);
1026 
1027    emitForm_MAD(i);
1028 }
1029 
1030 void
emitFADD(const Instruction * i)1031 CodeEmitterNV50::emitFADD(const Instruction *i)
1032 {
1033    const int neg0 = i->src(0).mod.neg();
1034    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1035 
1036    code[0] = 0xb0000000;
1037 
1038    assert(!(i->src(0).mod | i->src(1).mod).abs());
1039 
1040    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1041       code[1] = 0;
1042       emitForm_IMM(i);
1043       code[0] |= neg0 << 15;
1044       code[0] |= neg1 << 22;
1045       if (i->saturate)
1046          code[0] |= 1 << 8;
1047    } else
1048    if (i->encSize == 8) {
1049       code[1] = 0;
1050       emitForm_ADD(i);
1051       code[1] |= neg0 << 26;
1052       code[1] |= neg1 << 27;
1053       if (i->saturate)
1054          code[1] |= 1 << 29;
1055    } else {
1056       emitForm_MUL(i);
1057       code[0] |= neg0 << 15;
1058       code[0] |= neg1 << 22;
1059       if (i->saturate)
1060          code[0] |= 1 << 8;
1061    }
1062 }
1063 
1064 void
emitDADD(const Instruction * i)1065 CodeEmitterNV50::emitDADD(const Instruction *i)
1066 {
1067    const int neg0 = i->src(0).mod.neg();
1068    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1069 
1070    assert(!(i->src(0).mod | i->src(1).mod).abs());
1071    assert(!i->saturate);
1072    assert(i->encSize == 8);
1073 
1074    code[1] = 0x60000000;
1075    code[0] = 0xe0000000;
1076 
1077    emitForm_ADD(i);
1078 
1079    code[1] |= neg0 << 26;
1080    code[1] |= neg1 << 27;
1081 }
1082 
1083 void
emitUADD(const Instruction * i)1084 CodeEmitterNV50::emitUADD(const Instruction *i)
1085 {
1086    const int neg0 = i->src(0).mod.neg();
1087    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1088 
1089    code[0] = 0x20000000;
1090 
1091    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1092       code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
1093       code[1] = 0;
1094       emitForm_IMM(i);
1095    } else
1096    if (i->encSize == 8) {
1097       code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1098       emitForm_ADD(i);
1099    } else {
1100       code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
1101       emitForm_MUL(i);
1102    }
1103    assert(!(neg0 && neg1));
1104    code[0] |= neg0 << 28;
1105    code[0] |= neg1 << 22;
1106 
1107    if (i->flagsSrc >= 0) {
1108       // addc == sub | subr
1109       assert(!(code[0] & 0x10400000) && !i->getPredicate());
1110       code[0] |= 0x10400000;
1111       srcId(i->src(i->flagsSrc), 32 + 12);
1112    }
1113 }
1114 
1115 void
emitAADD(const Instruction * i)1116 CodeEmitterNV50::emitAADD(const Instruction *i)
1117 {
1118    const int s = (i->op == OP_MOV) ? 0 : 1;
1119 
1120    code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1121    code[1] = 0x20000000;
1122 
1123    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1124 
1125    emitFlagsRd(i);
1126 
1127    if (s && i->srcExists(0))
1128       setARegBits(SDATA(i->src(0)).id + 1);
1129 }
1130 
1131 void
emitIMUL(const Instruction * i)1132 CodeEmitterNV50::emitIMUL(const Instruction *i)
1133 {
1134    code[0] = 0x40000000;
1135 
1136    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1137       if (i->sType == TYPE_S16)
1138          code[0] |= 0x8100;
1139       code[1] = 0;
1140       emitForm_IMM(i);
1141    } else
1142    if (i->encSize == 8) {
1143       code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1144       emitForm_MAD(i);
1145    } else {
1146       if (i->sType == TYPE_S16)
1147          code[0] |= 0x8100;
1148       emitForm_MUL(i);
1149    }
1150 }
1151 
1152 void
emitFMUL(const Instruction * i)1153 CodeEmitterNV50::emitFMUL(const Instruction *i)
1154 {
1155    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1156 
1157    code[0] = 0xc0000000;
1158 
1159    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1160       code[1] = 0;
1161       emitForm_IMM(i);
1162       if (neg)
1163          code[0] |= 0x8000;
1164       if (i->saturate)
1165          code[0] |= 1 << 8;
1166    } else
1167    if (i->encSize == 8) {
1168       code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1169       if (neg)
1170          code[1] |= 0x08000000;
1171       if (i->saturate)
1172          code[1] |= 1 << 20;
1173       emitForm_MAD(i);
1174    } else {
1175       emitForm_MUL(i);
1176       if (neg)
1177          code[0] |= 0x8000;
1178       if (i->saturate)
1179          code[0] |= 1 << 8;
1180    }
1181 }
1182 
1183 void
emitDMUL(const Instruction * i)1184 CodeEmitterNV50::emitDMUL(const Instruction *i)
1185 {
1186    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1187 
1188    assert(!i->saturate);
1189    assert(i->encSize == 8);
1190 
1191    code[1] = 0x80000000;
1192    code[0] = 0xe0000000;
1193 
1194    if (neg)
1195       code[1] |= 0x08000000;
1196 
1197    roundMode_CVT(i->rnd);
1198 
1199    emitForm_MAD(i);
1200 }
1201 
1202 void
emitIMAD(const Instruction * i)1203 CodeEmitterNV50::emitIMAD(const Instruction *i)
1204 {
1205    int mode;
1206    code[0] = 0x60000000;
1207 
1208    assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
1209    if (!isSignedType(i->sType))
1210       mode = 0;
1211    else if (i->saturate)
1212       mode = 2;
1213    else
1214       mode = 1;
1215 
1216    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1217       code[1] = 0;
1218       emitForm_IMM(i);
1219       code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1220       if (i->flagsSrc >= 0) {
1221          assert(!(code[0] & 0x10400000));
1222          assert(SDATA(i->src(i->flagsSrc)).id == 0);
1223          code[0] |= 0x10400000;
1224       }
1225    } else
1226    if (i->encSize == 4) {
1227       emitForm_MUL(i);
1228       code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1229       if (i->flagsSrc >= 0) {
1230          assert(!(code[0] & 0x10400000));
1231          assert(SDATA(i->src(i->flagsSrc)).id == 0);
1232          code[0] |= 0x10400000;
1233       }
1234    } else {
1235       code[1] = mode << 29;
1236       emitForm_MAD(i);
1237 
1238       if (i->flagsSrc >= 0) {
1239          // add with carry from $cX
1240          assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1241          code[1] |= 0xc << 24;
1242          srcId(i->src(i->flagsSrc), 32 + 12);
1243       }
1244    }
1245 }
1246 
1247 void
emitISAD(const Instruction * i)1248 CodeEmitterNV50::emitISAD(const Instruction *i)
1249 {
1250    if (i->encSize == 8) {
1251       code[0] = 0x50000000;
1252       switch (i->sType) {
1253       case TYPE_U32: code[1] = 0x04000000; break;
1254       case TYPE_S32: code[1] = 0x0c000000; break;
1255       case TYPE_U16: code[1] = 0x00000000; break;
1256       case TYPE_S16: code[1] = 0x08000000; break;
1257       default:
1258          assert(0);
1259          break;
1260       }
1261       emitForm_MAD(i);
1262    } else {
1263       switch (i->sType) {
1264       case TYPE_U32: code[0] = 0x50008000; break;
1265       case TYPE_S32: code[0] = 0x50008100; break;
1266       case TYPE_U16: code[0] = 0x50000000; break;
1267       case TYPE_S16: code[0] = 0x50000100; break;
1268       default:
1269          assert(0);
1270          break;
1271       }
1272       emitForm_MUL(i);
1273    }
1274 }
1275 
1276 static void
alphatestSet(const FixupEntry * entry,uint32_t * code,const FixupData & data)1277 alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1278 {
1279    int loc = entry->loc;
1280    int enc;
1281 
1282    switch (data.alphatest) {
1283    case PIPE_FUNC_NEVER: enc = 0x0; break;
1284    case PIPE_FUNC_LESS: enc = 0x1; break;
1285    case PIPE_FUNC_EQUAL: enc = 0x2; break;
1286    case PIPE_FUNC_LEQUAL: enc = 0x3; break;
1287    case PIPE_FUNC_GREATER: enc = 0x4; break;
1288    case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;
1289    case PIPE_FUNC_GEQUAL: enc = 0x6; break;
1290    default:
1291    case PIPE_FUNC_ALWAYS: enc = 0xf; break;
1292    }
1293 
1294    code[loc + 1] &= ~(0x1f << 14);
1295    code[loc + 1] |= enc << 14;
1296 }
1297 
1298 void
emitSET(const Instruction * i)1299 CodeEmitterNV50::emitSET(const Instruction *i)
1300 {
1301    code[0] = 0x30000000;
1302    code[1] = 0x60000000;
1303 
1304    switch (i->sType) {
1305    case TYPE_F64:
1306       code[0] = 0xe0000000;
1307       code[1] = 0xe0000000;
1308       break;
1309    case TYPE_F32: code[0] |= 0x80000000; break;
1310    case TYPE_S32: code[1] |= 0x0c000000; break;
1311    case TYPE_U32: code[1] |= 0x04000000; break;
1312    case TYPE_S16: code[1] |= 0x08000000; break;
1313    case TYPE_U16: break;
1314    default:
1315       assert(0);
1316       break;
1317    }
1318 
1319    emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1320 
1321    if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1322    if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1323    if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1324    if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1325 
1326    emitForm_MAD(i);
1327 
1328    if (i->subOp == 1) {
1329       addInterp(0, 0, alphatestSet);
1330    }
1331 }
1332 
1333 void
roundMode_CVT(RoundMode rnd)1334 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1335 {
1336    switch (rnd) {
1337    case ROUND_NI: code[1] |= 0x08000000; break;
1338    case ROUND_M:  code[1] |= 0x00020000; break;
1339    case ROUND_MI: code[1] |= 0x08020000; break;
1340    case ROUND_P:  code[1] |= 0x00040000; break;
1341    case ROUND_PI: code[1] |= 0x08040000; break;
1342    case ROUND_Z:  code[1] |= 0x00060000; break;
1343    case ROUND_ZI: code[1] |= 0x08060000; break;
1344    default:
1345       assert(rnd == ROUND_N);
1346       break;
1347    }
1348 }
1349 
1350 void
emitCVT(const Instruction * i)1351 CodeEmitterNV50::emitCVT(const Instruction *i)
1352 {
1353    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1354    RoundMode rnd;
1355    DataType dType;
1356 
1357    switch (i->op) {
1358    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1359    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1360    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1361    default:
1362       rnd = i->rnd;
1363       break;
1364    }
1365 
1366    if (i->op == OP_NEG && i->dType == TYPE_U32)
1367       dType = TYPE_S32;
1368    else
1369       dType = i->dType;
1370 
1371    code[0] = 0xa0000000;
1372 
1373    switch (dType) {
1374    case TYPE_F64:
1375       switch (i->sType) {
1376       case TYPE_F64: code[1] = 0xc4404000; break;
1377       case TYPE_S64: code[1] = 0x44414000; break;
1378       case TYPE_U64: code[1] = 0x44404000; break;
1379       case TYPE_F32: code[1] = 0xc4400000; break;
1380       case TYPE_S32: code[1] = 0x44410000; break;
1381       case TYPE_U32: code[1] = 0x44400000; break;
1382       default:
1383          assert(0);
1384          break;
1385       }
1386       break;
1387    case TYPE_S64:
1388       switch (i->sType) {
1389       case TYPE_F64: code[1] = 0x8c404000; break;
1390       case TYPE_F32: code[1] = 0x8c400000; break;
1391       default:
1392          assert(0);
1393          break;
1394       }
1395       break;
1396    case TYPE_U64:
1397       switch (i->sType) {
1398       case TYPE_F64: code[1] = 0x84404000; break;
1399       case TYPE_F32: code[1] = 0x84400000; break;
1400       default:
1401          assert(0);
1402          break;
1403       }
1404       break;
1405    case TYPE_F32:
1406       switch (i->sType) {
1407       case TYPE_F64: code[1] = 0xc0404000; break;
1408       case TYPE_S64: code[1] = 0x40414000; break;
1409       case TYPE_U64: code[1] = 0x40404000; break;
1410       case TYPE_F32: code[1] = 0xc4004000; break;
1411       case TYPE_S32: code[1] = 0x44014000; break;
1412       case TYPE_U32: code[1] = 0x44004000; break;
1413       case TYPE_F16: code[1] = 0xc4000000; break;
1414       case TYPE_U16: code[1] = 0x44000000; break;
1415       case TYPE_S16: code[1] = 0x44010000; break;
1416       case TYPE_S8:  code[1] = 0x44018000; break;
1417       case TYPE_U8:  code[1] = 0x44008000; break;
1418       default:
1419          assert(0);
1420          break;
1421       }
1422       break;
1423    case TYPE_S32:
1424       switch (i->sType) {
1425       case TYPE_F64: code[1] = 0x88404000; break;
1426       case TYPE_F32: code[1] = 0x8c004000; break;
1427       case TYPE_S32: code[1] = 0x0c014000; break;
1428       case TYPE_U32: code[1] = 0x0c004000; break;
1429       case TYPE_F16: code[1] = 0x8c000000; break;
1430       case TYPE_S16: code[1] = 0x0c010000; break;
1431       case TYPE_U16: code[1] = 0x0c000000; break;
1432       case TYPE_S8:  code[1] = 0x0c018000; break;
1433       case TYPE_U8:  code[1] = 0x0c008000; break;
1434       default:
1435          assert(0);
1436          break;
1437       }
1438       break;
1439    case TYPE_U32:
1440       switch (i->sType) {
1441       case TYPE_F64: code[1] = 0x80404000; break;
1442       case TYPE_F32: code[1] = 0x84004000; break;
1443       case TYPE_S32: code[1] = 0x04014000; break;
1444       case TYPE_U32: code[1] = 0x04004000; break;
1445       case TYPE_F16: code[1] = 0x84000000; break;
1446       case TYPE_S16: code[1] = 0x04010000; break;
1447       case TYPE_U16: code[1] = 0x04000000; break;
1448       case TYPE_S8:  code[1] = 0x04018000; break;
1449       case TYPE_U8:  code[1] = 0x04008000; break;
1450       default:
1451          assert(0);
1452          break;
1453       }
1454       break;
1455    case TYPE_F16:
1456       switch (i->sType) {
1457       case TYPE_F16: code[1] = 0xc0000000; break;
1458       case TYPE_F32: code[1] = 0xc0004000; break;
1459       default:
1460          assert(0);
1461          break;
1462       }
1463       break;
1464    case TYPE_S16:
1465       switch (i->sType) {
1466       case TYPE_F32: code[1] = 0x88004000; break;
1467       case TYPE_S32: code[1] = 0x08014000; break;
1468       case TYPE_U32: code[1] = 0x08004000; break;
1469       case TYPE_F16: code[1] = 0x88000000; break;
1470       case TYPE_S16: code[1] = 0x08010000; break;
1471       case TYPE_U16: code[1] = 0x08000000; break;
1472       case TYPE_S8:  code[1] = 0x08018000; break;
1473       case TYPE_U8:  code[1] = 0x08008000; break;
1474       default:
1475          assert(0);
1476          break;
1477       }
1478       break;
1479    case TYPE_U16:
1480       switch (i->sType) {
1481       case TYPE_F32: code[1] = 0x80004000; break;
1482       case TYPE_S32: code[1] = 0x00014000; break;
1483       case TYPE_U32: code[1] = 0x00004000; break;
1484       case TYPE_F16: code[1] = 0x80000000; break;
1485       case TYPE_S16: code[1] = 0x00010000; break;
1486       case TYPE_U16: code[1] = 0x00000000; break;
1487       case TYPE_S8:  code[1] = 0x00018000; break;
1488       case TYPE_U8:  code[1] = 0x00008000; break;
1489       default:
1490          assert(0);
1491          break;
1492       }
1493       break;
1494    case TYPE_S8:
1495       switch (i->sType) {
1496       case TYPE_S32: code[1] = 0x08094000; break;
1497       case TYPE_U32: code[1] = 0x08084000; break;
1498       case TYPE_F16: code[1] = 0x88080000; break;
1499       case TYPE_S16: code[1] = 0x08090000; break;
1500       case TYPE_U16: code[1] = 0x08080000; break;
1501       case TYPE_S8:  code[1] = 0x08098000; break;
1502       case TYPE_U8:  code[1] = 0x08088000; break;
1503       default:
1504          assert(0);
1505          break;
1506       }
1507       break;
1508    case TYPE_U8:
1509       switch (i->sType) {
1510       case TYPE_S32: code[1] = 0x00094000; break;
1511       case TYPE_U32: code[1] = 0x00084000; break;
1512       case TYPE_F16: code[1] = 0x80080000; break;
1513       case TYPE_S16: code[1] = 0x00090000; break;
1514       case TYPE_U16: code[1] = 0x00080000; break;
1515       case TYPE_S8:  code[1] = 0x00098000; break;
1516       case TYPE_U8:  code[1] = 0x00088000; break;
1517       default:
1518          assert(0);
1519          break;
1520       }
1521       break;
1522    default:
1523       assert(0);
1524       break;
1525    }
1526    if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1527       code[1] |= 0x00004000;
1528 
1529    roundMode_CVT(rnd);
1530 
1531    switch (i->op) {
1532    case OP_ABS: code[1] |= 1 << 20; break;
1533    case OP_SAT: code[1] |= 1 << 19; break;
1534    case OP_NEG: code[1] |= 1 << 29; break;
1535    default:
1536       break;
1537    }
1538    code[1] ^= i->src(0).mod.neg() << 29;
1539    code[1] |= i->src(0).mod.abs() << 20;
1540    if (i->saturate)
1541       code[1] |= 1 << 19;
1542 
1543    assert(i->op != OP_ABS || !i->src(0).mod.neg());
1544 
1545    emitForm_MAD(i);
1546 }
1547 
1548 void
emitPreOp(const Instruction * i)1549 CodeEmitterNV50::emitPreOp(const Instruction *i)
1550 {
1551    code[0] = 0xb0000000;
1552    code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1553 
1554    code[1] |= i->src(0).mod.abs() << 20;
1555    code[1] |= i->src(0).mod.neg() << 26;
1556 
1557    emitForm_MAD(i);
1558 }
1559 
1560 void
emitSFnOp(const Instruction * i,uint8_t subOp)1561 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1562 {
1563    code[0] = 0x90000000;
1564 
1565    if (i->encSize == 4) {
1566       assert(i->op == OP_RCP);
1567       assert(!i->saturate);
1568       code[0] |= i->src(0).mod.abs() << 15;
1569       code[0] |= i->src(0).mod.neg() << 22;
1570       emitForm_MUL(i);
1571    } else {
1572       code[1] = subOp << 29;
1573       code[1] |= i->src(0).mod.abs() << 20;
1574       code[1] |= i->src(0).mod.neg() << 26;
1575       if (i->saturate) {
1576          assert(subOp == 6 && i->op == OP_EX2);
1577          code[1] |= 1 << 27;
1578       }
1579       emitForm_MAD(i);
1580    }
1581 }
1582 
1583 void
emitNOT(const Instruction * i)1584 CodeEmitterNV50::emitNOT(const Instruction *i)
1585 {
1586    code[0] = 0xd0000000;
1587    code[1] = 0x0002c000;
1588 
1589    switch (i->sType) {
1590    case TYPE_U32:
1591    case TYPE_S32:
1592       code[1] |= 0x04000000;
1593       break;
1594    default:
1595       break;
1596    }
1597    emitForm_MAD(i);
1598    setSrc(i, 0, 1);
1599 }
1600 
1601 void
emitLogicOp(const Instruction * i)1602 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1603 {
1604    code[0] = 0xd0000000;
1605    code[1] = 0;
1606 
1607    if (i->src(1).getFile() == FILE_IMMEDIATE) {
1608       switch (i->op) {
1609       case OP_OR:  code[0] |= 0x0100; break;
1610       case OP_XOR: code[0] |= 0x8000; break;
1611       default:
1612          assert(i->op == OP_AND);
1613          break;
1614       }
1615       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1616          code[0] |= 1 << 22;
1617 
1618       emitForm_IMM(i);
1619    } else {
1620       switch (i->op) {
1621       case OP_AND: code[1] = 0x00000000; break;
1622       case OP_OR:  code[1] = 0x00004000; break;
1623       case OP_XOR: code[1] = 0x00008000; break;
1624       default:
1625          assert(0);
1626          break;
1627       }
1628       if (typeSizeof(i->dType) == 4)
1629          code[1] |= 0x04000000;
1630       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1631          code[1] |= 1 << 16;
1632       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1633          code[1] |= 1 << 17;
1634 
1635       emitForm_MAD(i);
1636    }
1637 }
1638 
1639 void
emitARL(const Instruction * i,unsigned int shl)1640 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1641 {
1642    code[0] = 0x00000001 | (shl << 16);
1643    code[1] = 0xc0000000;
1644 
1645    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1646 
1647    setSrcFileBits(i, NV50_OP_ENC_IMM);
1648    setSrc(i, 0, 0);
1649    emitFlagsRd(i);
1650 }
1651 
1652 void
emitShift(const Instruction * i)1653 CodeEmitterNV50::emitShift(const Instruction *i)
1654 {
1655    if (i->def(0).getFile() == FILE_ADDRESS) {
1656       assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1657       emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1658    } else {
1659       code[0] = 0x30000001;
1660       code[1] = (i->op == OP_SHR) ? 0xe0000000 : 0xc0000000;
1661       if (typeSizeof(i->dType) == 4)
1662          code[1] |= 0x04000000;
1663       if (i->op == OP_SHR && isSignedType(i->sType))
1664           code[1] |= 1 << 27;
1665 
1666       if (i->src(1).getFile() == FILE_IMMEDIATE) {
1667          code[1] |= 1 << 20;
1668          code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1669          defId(i->def(0), 2);
1670          srcId(i->src(0), 9);
1671          emitFlagsRd(i);
1672       } else {
1673          emitForm_MAD(i);
1674       }
1675    }
1676 }
1677 
1678 void
emitOUT(const Instruction * i)1679 CodeEmitterNV50::emitOUT(const Instruction *i)
1680 {
1681    code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1682    code[1] = 0xc0000000;
1683 
1684    emitFlagsRd(i);
1685 }
1686 
1687 void
emitTEX(const TexInstruction * i)1688 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1689 {
1690    code[0] = 0xf0000001;
1691    code[1] = 0x00000000;
1692 
1693    switch (i->op) {
1694    case OP_TXB:
1695       code[1] = 0x20000000;
1696       break;
1697    case OP_TXL:
1698       code[1] = 0x40000000;
1699       break;
1700    case OP_TXF:
1701       code[0] |= 0x01000000;
1702       break;
1703    case OP_TXG:
1704       code[0] |= 0x01000000;
1705       code[1] = 0x80000000;
1706       break;
1707    case OP_TXLQ:
1708       code[1] = 0x60020000;
1709       break;
1710    default:
1711       assert(i->op == OP_TEX);
1712       break;
1713    }
1714 
1715    code[0] |= i->tex.r << 9;
1716    code[0] |= i->tex.s << 17;
1717 
1718    int argc = i->tex.target.getArgCount();
1719 
1720    if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1721       argc += 1;
1722    if (i->tex.target.isShadow())
1723       argc += 1;
1724    assert(argc <= 4);
1725 
1726    code[0] |= (argc - 1) << 22;
1727 
1728    if (i->tex.target.isCube()) {
1729       code[0] |= 0x08000000;
1730    } else
1731    if (i->tex.useOffsets) {
1732       code[1] |= (i->tex.offset[0] & 0xf) << 24;
1733       code[1] |= (i->tex.offset[1] & 0xf) << 20;
1734       code[1] |= (i->tex.offset[2] & 0xf) << 16;
1735    }
1736 
1737    code[0] |= (i->tex.mask & 0x3) << 25;
1738    code[1] |= (i->tex.mask & 0xc) << 12;
1739 
1740    if (i->tex.liveOnly)
1741       code[1] |= 1 << 2;
1742    if (i->tex.derivAll)
1743       code[1] |= 1 << 3;
1744 
1745    defId(i->def(0), 2);
1746 
1747    emitFlagsRd(i);
1748 }
1749 
1750 void
emitTXQ(const TexInstruction * i)1751 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1752 {
1753    assert(i->tex.query == TXQ_DIMS);
1754 
1755    code[0] = 0xf0000001;
1756    code[1] = 0x60000000;
1757 
1758    code[0] |= i->tex.r << 9;
1759    code[0] |= i->tex.s << 17;
1760 
1761    code[0] |= (i->tex.mask & 0x3) << 25;
1762    code[1] |= (i->tex.mask & 0xc) << 12;
1763 
1764    defId(i->def(0), 2);
1765 
1766    emitFlagsRd(i);
1767 }
1768 
1769 void
emitTEXPREP(const TexInstruction * i)1770 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1771 {
1772    code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1773    code[1] = 0x60010000;
1774 
1775    code[0] |= (i->tex.mask & 0x3) << 25;
1776    code[1] |= (i->tex.mask & 0xc) << 12;
1777    defId(i->def(0), 2);
1778 
1779    emitFlagsRd(i);
1780 }
1781 
1782 void
emitPRERETEmu(const FlowInstruction * i)1783 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1784 {
1785    uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1786 
1787    code[0] = 0x10000003; // bra
1788    code[1] = 0x00000780; // always
1789 
1790    switch (i->subOp) {
1791    case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1792       break;
1793    case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1794       pos += 8;
1795       break;
1796    default:
1797       assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1798       code[0] = 0x20000003; // call
1799       code[1] = 0x00000000; // no predicate
1800       break;
1801    }
1802    addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1803    addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1804 }
1805 
1806 void
emitFlow(const Instruction * i,uint8_t flowOp)1807 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1808 {
1809    const FlowInstruction *f = i->asFlow();
1810    bool hasPred = false;
1811    bool hasTarg = false;
1812 
1813    code[0] = 0x00000003 | (flowOp << 28);
1814    code[1] = 0x00000000;
1815 
1816    switch (i->op) {
1817    case OP_BRA:
1818       hasPred = true;
1819       hasTarg = true;
1820       break;
1821    case OP_BREAK:
1822    case OP_BRKPT:
1823    case OP_DISCARD:
1824    case OP_RET:
1825       hasPred = true;
1826       break;
1827    case OP_CALL:
1828    case OP_PREBREAK:
1829    case OP_JOINAT:
1830       hasTarg = true;
1831       break;
1832    case OP_PRERET:
1833       hasTarg = true;
1834       if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1835          emitPRERETEmu(f);
1836          return;
1837       }
1838       break;
1839    default:
1840       break;
1841    }
1842 
1843    if (hasPred)
1844       emitFlagsRd(i);
1845 
1846    if (hasTarg && f) {
1847       uint32_t pos;
1848 
1849       if (f->op == OP_CALL) {
1850          if (f->builtin) {
1851             pos = targNV50->getBuiltinOffset(f->target.builtin);
1852          } else {
1853             pos = f->target.fn->binPos;
1854          }
1855       } else {
1856          pos = f->target.bb->binPos;
1857       }
1858 
1859       code[0] |= ((pos >>  2) & 0xffff) << 11;
1860       code[1] |= ((pos >> 18) & 0x003f) << 14;
1861 
1862       RelocEntry::Type relocTy;
1863 
1864       relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1865 
1866       addReloc(relocTy, 0, pos, 0x07fff800, 9);
1867       addReloc(relocTy, 1, pos, 0x000fc000, -4);
1868    }
1869 }
1870 
1871 void
emitBAR(const Instruction * i)1872 CodeEmitterNV50::emitBAR(const Instruction *i)
1873 {
1874    ImmediateValue *barId = i->getSrc(0)->asImm();
1875    assert(barId);
1876 
1877    code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1878    code[1] = 0x00004000;
1879 
1880    if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1881       code[0] |= 1 << 26;
1882 }
1883 
1884 void
emitATOM(const Instruction * i)1885 CodeEmitterNV50::emitATOM(const Instruction *i)
1886 {
1887    uint8_t subOp;
1888    switch (i->subOp) {
1889    case NV50_IR_SUBOP_ATOM_ADD:  subOp = 0x0; break;
1890    case NV50_IR_SUBOP_ATOM_MIN:  subOp = 0x7; break;
1891    case NV50_IR_SUBOP_ATOM_MAX:  subOp = 0x6; break;
1892    case NV50_IR_SUBOP_ATOM_INC:  subOp = 0x4; break;
1893    case NV50_IR_SUBOP_ATOM_DEC:  subOp = 0x5; break;
1894    case NV50_IR_SUBOP_ATOM_AND:  subOp = 0xa; break;
1895    case NV50_IR_SUBOP_ATOM_OR:   subOp = 0xb; break;
1896    case NV50_IR_SUBOP_ATOM_XOR:  subOp = 0xc; break;
1897    case NV50_IR_SUBOP_ATOM_CAS:  subOp = 0x2; break;
1898    case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1899    default:
1900       assert(!"invalid subop");
1901       return;
1902    }
1903    code[0] = 0xd0000001;
1904    code[1] = 0xc0c00000 | (subOp << 2);
1905    if (isSignedType(i->dType))
1906       code[1] |= 1 << 21;
1907 
1908    // args
1909    emitFlagsRd(i);
1910    if (i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
1911        i->subOp == NV50_IR_SUBOP_ATOM_CAS ||
1912        i->defExists(0)) {
1913       code[1] |= 0x20000000;
1914       setDst(i, 0);
1915       setSrc(i, 1, 1);
1916       // g[] pointer
1917       code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1918    } else {
1919       srcId(i->src(1), 2);
1920       // g[] pointer
1921       code[0] |= i->getSrc(0)->reg.fileIndex << 16;
1922    }
1923    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1924       setSrc(i, 2, 2);
1925 
1926    srcId(i->getIndirect(0, 0), 9);
1927 }
1928 
1929 bool
emitInstruction(Instruction * insn)1930 CodeEmitterNV50::emitInstruction(Instruction *insn)
1931 {
1932    if (!insn->encSize) {
1933       ERROR("skipping unencodable instruction: "); insn->print();
1934       return false;
1935    } else
1936    if (codeSize + insn->encSize > codeSizeLimit) {
1937       ERROR("code emitter output buffer too small\n");
1938       return false;
1939    }
1940 
1941    if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1942       INFO("EMIT: "); insn->print();
1943    }
1944 
1945    switch (insn->op) {
1946    case OP_MOV:
1947       emitMOV(insn);
1948       break;
1949    case OP_EXIT:
1950    case OP_NOP:
1951    case OP_JOIN:
1952       emitNOP();
1953       break;
1954    case OP_VFETCH:
1955    case OP_LOAD:
1956       emitLOAD(insn);
1957       break;
1958    case OP_EXPORT:
1959    case OP_STORE:
1960       emitSTORE(insn);
1961       break;
1962    case OP_PFETCH:
1963       emitPFETCH(insn);
1964       break;
1965    case OP_RDSV:
1966       emitRDSV(insn);
1967       break;
1968    case OP_LINTERP:
1969    case OP_PINTERP:
1970       emitINTERP(insn);
1971       break;
1972    case OP_ADD:
1973    case OP_SUB:
1974       if (insn->dType == TYPE_F64)
1975          emitDADD(insn);
1976       else if (isFloatType(insn->dType))
1977          emitFADD(insn);
1978       else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1979          emitAADD(insn);
1980       else
1981          emitUADD(insn);
1982       break;
1983    case OP_MUL:
1984       if (insn->dType == TYPE_F64)
1985          emitDMUL(insn);
1986       else if (isFloatType(insn->dType))
1987          emitFMUL(insn);
1988       else
1989          emitIMUL(insn);
1990       break;
1991    case OP_MAD:
1992    case OP_FMA:
1993       if (insn->dType == TYPE_F64)
1994          emitDMAD(insn);
1995       else if (isFloatType(insn->dType))
1996          emitFMAD(insn);
1997       else
1998          emitIMAD(insn);
1999       break;
2000    case OP_SAD:
2001       emitISAD(insn);
2002       break;
2003    case OP_NOT:
2004       emitNOT(insn);
2005       break;
2006    case OP_AND:
2007    case OP_OR:
2008    case OP_XOR:
2009       emitLogicOp(insn);
2010       break;
2011    case OP_SHL:
2012    case OP_SHR:
2013       emitShift(insn);
2014       break;
2015    case OP_SET:
2016       emitSET(insn);
2017       break;
2018    case OP_MIN:
2019    case OP_MAX:
2020       emitMINMAX(insn);
2021       break;
2022    case OP_CEIL:
2023    case OP_FLOOR:
2024    case OP_TRUNC:
2025    case OP_ABS:
2026    case OP_NEG:
2027    case OP_SAT:
2028       emitCVT(insn);
2029       break;
2030    case OP_CVT:
2031       if (insn->def(0).getFile() == FILE_ADDRESS)
2032          emitARL(insn, 0);
2033       else
2034       if (insn->def(0).getFile() == FILE_FLAGS ||
2035           insn->src(0).getFile() == FILE_FLAGS ||
2036           insn->src(0).getFile() == FILE_ADDRESS)
2037          emitMOV(insn);
2038       else
2039          emitCVT(insn);
2040       break;
2041    case OP_RCP:
2042       emitSFnOp(insn, 0);
2043       break;
2044    case OP_RSQ:
2045       emitSFnOp(insn, 2);
2046       break;
2047    case OP_LG2:
2048       emitSFnOp(insn, 3);
2049       break;
2050    case OP_SIN:
2051       emitSFnOp(insn, 4);
2052       break;
2053    case OP_COS:
2054       emitSFnOp(insn, 5);
2055       break;
2056    case OP_EX2:
2057       emitSFnOp(insn, 6);
2058       break;
2059    case OP_PRESIN:
2060    case OP_PREEX2:
2061       emitPreOp(insn);
2062       break;
2063    case OP_TEX:
2064    case OP_TXB:
2065    case OP_TXL:
2066    case OP_TXF:
2067    case OP_TXG:
2068    case OP_TXLQ:
2069       emitTEX(insn->asTex());
2070       break;
2071    case OP_TXQ:
2072       emitTXQ(insn->asTex());
2073       break;
2074    case OP_TEXPREP:
2075       emitTEXPREP(insn->asTex());
2076       break;
2077    case OP_EMIT:
2078    case OP_RESTART:
2079       emitOUT(insn);
2080       break;
2081    case OP_DISCARD:
2082       emitFlow(insn, 0x0);
2083       break;
2084    case OP_BRA:
2085       emitFlow(insn, 0x1);
2086       break;
2087    case OP_CALL:
2088       emitFlow(insn, 0x2);
2089       break;
2090    case OP_RET:
2091       emitFlow(insn, 0x3);
2092       break;
2093    case OP_PREBREAK:
2094       emitFlow(insn, 0x4);
2095       break;
2096    case OP_BREAK:
2097       emitFlow(insn, 0x5);
2098       break;
2099    case OP_QUADON:
2100       emitFlow(insn, 0x6);
2101       break;
2102    case OP_QUADPOP:
2103       emitFlow(insn, 0x7);
2104       break;
2105    case OP_JOINAT:
2106       emitFlow(insn, 0xa);
2107       break;
2108    case OP_PRERET:
2109       emitFlow(insn, 0xd);
2110       break;
2111    case OP_QUADOP:
2112       emitQUADOP(insn, insn->lanes, insn->subOp);
2113       break;
2114    case OP_DFDX:
2115       emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
2116       break;
2117    case OP_DFDY:
2118       emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
2119       break;
2120    case OP_ATOM:
2121       emitATOM(insn);
2122       break;
2123    case OP_BAR:
2124       emitBAR(insn);
2125       break;
2126    case OP_PHI:
2127    case OP_UNION:
2128    case OP_CONSTRAINT:
2129       ERROR("operation should have been eliminated\n");
2130       return false;
2131    case OP_EXP:
2132    case OP_LOG:
2133    case OP_SQRT:
2134    case OP_POW:
2135    case OP_SELP:
2136    case OP_SLCT:
2137    case OP_TXD:
2138    case OP_PRECONT:
2139    case OP_CONT:
2140    case OP_POPCNT:
2141    case OP_INSBF:
2142    case OP_EXTBF:
2143       ERROR("operation should have been lowered\n");
2144       return false;
2145    default:
2146       ERROR("unknown op: %u\n", insn->op);
2147       return false;
2148    }
2149    if (insn->join || insn->op == OP_JOIN)
2150       code[1] |= 0x2;
2151    else
2152    if (insn->exit || insn->op == OP_EXIT)
2153       code[1] |= 0x1;
2154 
2155    assert((insn->encSize == 8) == (code[0] & 1));
2156 
2157    code += insn->encSize / 4;
2158    codeSize += insn->encSize;
2159    return true;
2160 }
2161 
2162 uint32_t
getMinEncodingSize(const Instruction * i) const2163 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
2164 {
2165    const Target::OpInfo &info = targ->getOpInfo(i);
2166 
2167    if (info.minEncSize > 4 || i->dType == TYPE_F64)
2168       return 8;
2169 
2170    // check constraints on dst and src operands
2171    for (int d = 0; i->defExists(d); ++d) {
2172       if (i->def(d).rep()->reg.data.id > 63 ||
2173           i->def(d).rep()->reg.file != FILE_GPR)
2174          return 8;
2175    }
2176 
2177    for (int s = 0; i->srcExists(s); ++s) {
2178       DataFile sf = i->src(s).getFile();
2179       if (sf != FILE_GPR)
2180          if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
2181             return 8;
2182       if (i->src(s).rep()->reg.data.id > 63)
2183          return 8;
2184    }
2185 
2186    // check modifiers & rounding
2187    if (i->join || i->lanes != 0xf || i->exit)
2188       return 8;
2189    if (i->op == OP_MUL && i->rnd != ROUND_N)
2190       return 8;
2191 
2192    if (i->asTex())
2193       return 8; // TODO: short tex encoding
2194 
2195    // check constraints on short MAD
2196    if (info.srcNr >= 2 && i->srcExists(2)) {
2197       if (!i->defExists(0) ||
2198           (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
2199           DDATA(i->def(0)).id != SDATA(i->src(2)).id)
2200          return 8;
2201    }
2202 
2203    return info.minEncSize;
2204 }
2205 
2206 // Change the encoding size of an instruction after BBs have been scheduled.
2207 static void
makeInstructionLong(Instruction * insn)2208 makeInstructionLong(Instruction *insn)
2209 {
2210    if (insn->encSize == 8)
2211       return;
2212    Function *fn = insn->bb->getFunction();
2213    int n = 0;
2214    int adj = 4;
2215 
2216    for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
2217 
2218    if (n & 1) {
2219       adj = 8;
2220       insn->next->encSize = 8;
2221    } else
2222    if (insn->prev && insn->prev->encSize == 4) {
2223       adj = 8;
2224       insn->prev->encSize = 8;
2225    }
2226    insn->encSize = 8;
2227 
2228    for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
2229       fn->bbArray[i]->binPos += adj;
2230    }
2231    fn->binSize += adj;
2232    insn->bb->binSize += adj;
2233 }
2234 
2235 static bool
trySetExitModifier(Instruction * insn)2236 trySetExitModifier(Instruction *insn)
2237 {
2238    if (insn->op == OP_DISCARD ||
2239        insn->op == OP_QUADON ||
2240        insn->op == OP_QUADPOP)
2241       return false;
2242    for (int s = 0; insn->srcExists(s); ++s)
2243       if (insn->src(s).getFile() == FILE_IMMEDIATE)
2244          return false;
2245    if (insn->asFlow()) {
2246       if (insn->op == OP_CALL) // side effects !
2247          return false;
2248       if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
2249          return false;
2250       insn->op = OP_EXIT;
2251    }
2252    insn->exit = 1;
2253    makeInstructionLong(insn);
2254    return true;
2255 }
2256 
2257 static void
replaceExitWithModifier(Function * func)2258 replaceExitWithModifier(Function *func)
2259 {
2260    BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2261 
2262    if (!epilogue->getExit() ||
2263        epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2264       return;
2265 
2266    if (epilogue->getEntry()->op != OP_EXIT) {
2267       Instruction *insn = epilogue->getExit()->prev;
2268       if (!insn || !trySetExitModifier(insn))
2269          return;
2270       insn->exit = 1;
2271    } else {
2272       for (Graph::EdgeIterator ei = func->cfgExit->incident();
2273            !ei.end(); ei.next()) {
2274          BasicBlock *bb = BasicBlock::get(ei.getNode());
2275          Instruction *i = bb->getExit();
2276 
2277          if (!i || !trySetExitModifier(i))
2278             return;
2279       }
2280    }
2281 
2282    int adj = epilogue->getExit()->encSize;
2283    epilogue->binSize -= adj;
2284    func->binSize -= adj;
2285    delete_Instruction(func->getProgram(), epilogue->getExit());
2286 
2287    // There may be BB's that are laid out after the exit block
2288    for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
2289       func->bbArray[i]->binPos -= adj;
2290    }
2291 }
2292 
2293 void
prepareEmission(Function * func)2294 CodeEmitterNV50::prepareEmission(Function *func)
2295 {
2296    CodeEmitter::prepareEmission(func);
2297 
2298    replaceExitWithModifier(func);
2299 }
2300 
CodeEmitterNV50(Program::Type type,const TargetNV50 * target)2301 CodeEmitterNV50::CodeEmitterNV50(Program::Type type, const TargetNV50 *target) :
2302    CodeEmitter(target), progType(type), targNV50(target)
2303 {
2304    targ = target; // specialized
2305    code = NULL;
2306    codeSize = codeSizeLimit = 0;
2307    relocInfo = NULL;
2308 }
2309 
2310 CodeEmitter *
getCodeEmitter(Program::Type type)2311 TargetNV50::getCodeEmitter(Program::Type type)
2312 {
2313    CodeEmitterNV50 *emit = new CodeEmitterNV50(type, this);
2314    return emit;
2315 }
2316 
2317 } // namespace nv50_ir
2318