1 /*
2  * Copyright 2014 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Ben Skeggs <bskeggs@redhat.com>
23  */
24 
25 #include "codegen/nv50_ir_target_gm107.h"
26 #include "codegen/nv50_ir_sched_gm107.h"
27 
28 //#define GM107_DEBUG_SCHED_DATA
29 
30 namespace nv50_ir {
31 
32 class CodeEmitterGM107 : public CodeEmitter
33 {
34 public:
35    CodeEmitterGM107(const TargetGM107 *);
36 
37    virtual bool emitInstruction(Instruction *);
38    virtual uint32_t getMinEncodingSize(const Instruction *) const;
39 
40    virtual void prepareEmission(Program *);
41    virtual void prepareEmission(Function *);
42 
setProgramType(Program::Type pType)43    inline void setProgramType(Program::Type pType) { progType = pType; }
44 
45 private:
46    const TargetGM107 *targGM107;
47 
48    Program::Type progType;
49 
50    const Instruction *insn;
51    const bool writeIssueDelays;
52    uint32_t *data;
53 
54 private:
55    inline void emitField(uint32_t *, int, int, uint32_t);
emitField(int b,int s,uint32_t v)56    inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
57 
58    inline void emitInsn(uint32_t, bool);
emitInsn(uint32_t o)59    inline void emitInsn(uint32_t o) { emitInsn(o, true); }
60    inline void emitPred();
61    inline void emitGPR(int, const Value *);
emitGPR(int pos)62    inline void emitGPR(int pos) {
63       emitGPR(pos, (const Value *)NULL);
64    }
emitGPR(int pos,const ValueRef & ref)65    inline void emitGPR(int pos, const ValueRef &ref) {
66       emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
67    }
emitGPR(int pos,const ValueRef * ref)68    inline void emitGPR(int pos, const ValueRef *ref) {
69       emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
70    }
emitGPR(int pos,const ValueDef & def)71    inline void emitGPR(int pos, const ValueDef &def) {
72       emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
73    }
74    inline void emitSYS(int, const Value *);
emitSYS(int pos,const ValueRef & ref)75    inline void emitSYS(int pos, const ValueRef &ref) {
76       emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
77    }
78    inline void emitPRED(int, const Value *);
emitPRED(int pos)79    inline void emitPRED(int pos) {
80       emitPRED(pos, (const Value *)NULL);
81    }
emitPRED(int pos,const ValueRef & ref)82    inline void emitPRED(int pos, const ValueRef &ref) {
83       emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
84    }
emitPRED(int pos,const ValueDef & def)85    inline void emitPRED(int pos, const ValueDef &def) {
86       emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
87    }
88    inline void emitADDR(int, int, int, int, const ValueRef &);
89    inline void emitCBUF(int, int, int, int, int, const ValueRef &);
90    inline bool longIMMD(const ValueRef &);
91    inline void emitIMMD(int, int, const ValueRef &);
92 
93    void emitCond3(int, CondCode);
94    void emitCond4(int, CondCode);
emitCond5(int pos,CondCode cc)95    void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
96    inline void emitO(int);
97    inline void emitP(int);
98    inline void emitSAT(int);
99    inline void emitCC(int);
100    inline void emitX(int);
101    inline void emitABS(int, const ValueRef &);
102    inline void emitNEG(int, const ValueRef &);
103    inline void emitNEG2(int, const ValueRef &, const ValueRef &);
104    inline void emitFMZ(int, int);
105    inline void emitRND(int, RoundMode, int);
emitRND(int pos)106    inline void emitRND(int pos) {
107       emitRND(pos, insn->rnd, -1);
108    }
109    inline void emitPDIV(int);
110    inline void emitINV(int, const ValueRef &);
111 
112    void emitEXIT();
113    void emitBRA();
114    void emitCAL();
115    void emitPCNT();
116    void emitCONT();
117    void emitPBK();
118    void emitBRK();
119    void emitPRET();
120    void emitRET();
121    void emitSSY();
122    void emitSYNC();
123    void emitSAM();
124    void emitRAM();
125 
126    void emitPSETP();
127 
128    void emitMOV();
129    void emitS2R();
130    void emitCS2R();
131    void emitF2F();
132    void emitF2I();
133    void emitI2F();
134    void emitI2I();
135    void emitSEL();
136    void emitSHFL();
137 
138    void emitDADD();
139    void emitDMUL();
140    void emitDFMA();
141    void emitDMNMX();
142    void emitDSET();
143    void emitDSETP();
144 
145    void emitFADD();
146    void emitFMUL();
147    void emitFFMA();
148    void emitMUFU();
149    void emitFMNMX();
150    void emitRRO();
151    void emitFCMP();
152    void emitFSET();
153    void emitFSETP();
154    void emitFSWZADD();
155 
156    void emitLOP();
157    void emitNOT();
158    void emitIADD();
159    void emitIMUL();
160    void emitIMAD();
161    void emitISCADD();
162    void emitXMAD();
163    void emitIMNMX();
164    void emitICMP();
165    void emitISET();
166    void emitISETP();
167    void emitSHL();
168    void emitSHR();
169    void emitSHF();
170    void emitPOPC();
171    void emitBFI();
172    void emitBFE();
173    void emitFLO();
174    void emitPRMT();
175 
176    void emitLDSTs(int, DataType);
177    void emitLDSTc(int);
178    void emitLDC();
179    void emitLDL();
180    void emitLDS();
181    void emitLD();
182    void emitSTL();
183    void emitSTS();
184    void emitST();
185    void emitALD();
186    void emitAST();
187    void emitISBERD();
188    void emitAL2P();
189    void emitIPA();
190    void emitATOM();
191    void emitATOMS();
192    void emitRED();
193    void emitCCTL();
194 
195    void emitPIXLD();
196 
197    void emitTEXs(int);
198    void emitTEX();
199    void emitTEXS();
200    void emitTLD();
201    void emitTLD4();
202    void emitTXD();
203    void emitTXQ();
204    void emitTMML();
205    void emitDEPBAR();
206 
207    void emitNOP();
208    void emitKIL();
209    void emitOUT();
210 
211    void emitBAR();
212    void emitMEMBAR();
213 
214    void emitVOTE();
215 
216    void emitSUTarget();
217    void emitSUHandle(const int s);
218    void emitSUSTx();
219    void emitSULDx();
220    void emitSUREDx();
221 };
222 
223 /*******************************************************************************
224  * general instruction layout/fields
225  ******************************************************************************/
226 
227 void
emitField(uint32_t * data,int b,int s,uint32_t v)228 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
229 {
230    if (b >= 0) {
231       uint32_t m = ((1ULL << s) - 1);
232       uint64_t d = (uint64_t)(v & m) << b;
233       assert(!(v & ~m) || (v & ~m) == ~m);
234       data[1] |= d >> 32;
235       data[0] |= d;
236    }
237 }
238 
239 void
emitPred()240 CodeEmitterGM107::emitPred()
241 {
242    if (insn->predSrc >= 0) {
243       emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
244       emitField(19, 1, insn->cc == CC_NOT_P);
245    } else {
246       emitField(16, 3, 7);
247    }
248 }
249 
250 void
emitInsn(uint32_t hi,bool pred)251 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
252 {
253    code[0] = 0x00000000;
254    code[1] = hi;
255    if (pred)
256       emitPred();
257 }
258 
259 void
emitGPR(int pos,const Value * val)260 CodeEmitterGM107::emitGPR(int pos, const Value *val)
261 {
262    emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
263              val->reg.data.id : 255);
264 }
265 
266 void
emitSYS(int pos,const Value * val)267 CodeEmitterGM107::emitSYS(int pos, const Value *val)
268 {
269    int id = val ? val->reg.data.id : -1;
270 
271    switch (id) {
272    case SV_LANEID         : id = 0x00; break;
273    case SV_VERTEX_COUNT   : id = 0x10; break;
274    case SV_INVOCATION_ID  : id = 0x11; break;
275    case SV_THREAD_KILL    : id = 0x13; break;
276    case SV_INVOCATION_INFO: id = 0x1d; break;
277    case SV_COMBINED_TID   : id = 0x20; break;
278    case SV_TID            : id = 0x21 + val->reg.data.sv.index; break;
279    case SV_CTAID          : id = 0x25 + val->reg.data.sv.index; break;
280    case SV_LANEMASK_EQ    : id = 0x38; break;
281    case SV_LANEMASK_LT    : id = 0x39; break;
282    case SV_LANEMASK_LE    : id = 0x3a; break;
283    case SV_LANEMASK_GT    : id = 0x3b; break;
284    case SV_LANEMASK_GE    : id = 0x3c; break;
285    case SV_CLOCK          : id = 0x50 + val->reg.data.sv.index; break;
286    default:
287       assert(!"invalid system value");
288       id = 0;
289       break;
290    }
291 
292    emitField(pos, 8, id);
293 }
294 
295 void
emitPRED(int pos,const Value * val)296 CodeEmitterGM107::emitPRED(int pos, const Value *val)
297 {
298    emitField(pos, 3, val ? val->reg.data.id : 7);
299 }
300 
301 void
emitADDR(int gpr,int off,int len,int shr,const ValueRef & ref)302 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
303                            const ValueRef &ref)
304 {
305    const Value *v = ref.get();
306    assert(!(v->reg.data.offset & ((1 << shr) - 1)));
307    if (gpr >= 0)
308       emitGPR(gpr, ref.getIndirect(0));
309    emitField(off, len, v->reg.data.offset >> shr);
310 }
311 
312 void
emitCBUF(int buf,int gpr,int off,int len,int shr,const ValueRef & ref)313 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
314                            const ValueRef &ref)
315 {
316    const Value *v = ref.get();
317    const Symbol *s = v->asSym();
318 
319    assert(!(s->reg.data.offset & ((1 << shr) - 1)));
320 
321    emitField(buf,  5, v->reg.fileIndex);
322    if (gpr >= 0)
323       emitGPR(gpr, ref.getIndirect(0));
324    emitField(off, 16, s->reg.data.offset >> shr);
325 }
326 
327 bool
longIMMD(const ValueRef & ref)328 CodeEmitterGM107::longIMMD(const ValueRef &ref)
329 {
330    if (ref.getFile() == FILE_IMMEDIATE) {
331       const ImmediateValue *imm = ref.get()->asImm();
332       if (isFloatType(insn->sType))
333          return imm->reg.data.u32 & 0xfff;
334       else
335          return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
336    }
337    return false;
338 }
339 
340 void
emitIMMD(int pos,int len,const ValueRef & ref)341 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
342 {
343    const ImmediateValue *imm = ref.get()->asImm();
344    uint32_t val = imm->reg.data.u32;
345 
346    if (len == 19) {
347       if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
348          assert(!(val & 0x00000fff));
349          val >>= 12;
350       } else if (insn->sType == TYPE_F64) {
351          assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
352          val = imm->reg.data.u64 >> 44;
353       } else {
354          assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
355       }
356       emitField( 56,   1, (val & 0x80000) >> 19);
357       emitField(pos, len, (val & 0x7ffff));
358    } else {
359       emitField(pos, len, val);
360    }
361 }
362 
363 /*******************************************************************************
364  * modifiers
365  ******************************************************************************/
366 
367 void
emitCond3(int pos,CondCode code)368 CodeEmitterGM107::emitCond3(int pos, CondCode code)
369 {
370    int data = 0;
371 
372    switch (code) {
373    case CC_FL : data = 0x00; break;
374    case CC_LTU:
375    case CC_LT : data = 0x01; break;
376    case CC_EQU:
377    case CC_EQ : data = 0x02; break;
378    case CC_LEU:
379    case CC_LE : data = 0x03; break;
380    case CC_GTU:
381    case CC_GT : data = 0x04; break;
382    case CC_NEU:
383    case CC_NE : data = 0x05; break;
384    case CC_GEU:
385    case CC_GE : data = 0x06; break;
386    case CC_TR : data = 0x07; break;
387    default:
388       assert(!"invalid cond3");
389       break;
390    }
391 
392    emitField(pos, 3, data);
393 }
394 
395 void
emitCond4(int pos,CondCode code)396 CodeEmitterGM107::emitCond4(int pos, CondCode code)
397 {
398    int data = 0;
399 
400    switch (code) {
401    case CC_FL: data = 0x00; break;
402    case CC_LT: data = 0x01; break;
403    case CC_EQ: data = 0x02; break;
404    case CC_LE: data = 0x03; break;
405    case CC_GT: data = 0x04; break;
406    case CC_NE: data = 0x05; break;
407    case CC_GE: data = 0x06; break;
408 //   case CC_NUM: data = 0x07; break;
409 //   case CC_NAN: data = 0x08; break;
410    case CC_LTU: data = 0x09; break;
411    case CC_EQU: data = 0x0a; break;
412    case CC_LEU: data = 0x0b; break;
413    case CC_GTU: data = 0x0c; break;
414    case CC_NEU: data = 0x0d; break;
415    case CC_GEU: data = 0x0e; break;
416    case CC_TR:  data = 0x0f; break;
417    default:
418       assert(!"invalid cond4");
419       break;
420    }
421 
422    emitField(pos, 4, data);
423 }
424 
425 void
emitO(int pos)426 CodeEmitterGM107::emitO(int pos)
427 {
428    emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
429 }
430 
431 void
emitP(int pos)432 CodeEmitterGM107::emitP(int pos)
433 {
434    emitField(pos, 1, insn->perPatch);
435 }
436 
437 void
emitSAT(int pos)438 CodeEmitterGM107::emitSAT(int pos)
439 {
440    emitField(pos, 1, insn->saturate);
441 }
442 
443 void
emitCC(int pos)444 CodeEmitterGM107::emitCC(int pos)
445 {
446    emitField(pos, 1, insn->flagsDef >= 0);
447 }
448 
449 void
emitX(int pos)450 CodeEmitterGM107::emitX(int pos)
451 {
452    emitField(pos, 1, insn->flagsSrc >= 0);
453 }
454 
455 void
emitABS(int pos,const ValueRef & ref)456 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
457 {
458    emitField(pos, 1, ref.mod.abs());
459 }
460 
461 void
emitNEG(int pos,const ValueRef & ref)462 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
463 {
464    emitField(pos, 1, ref.mod.neg());
465 }
466 
467 void
emitNEG2(int pos,const ValueRef & a,const ValueRef & b)468 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
469 {
470    emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
471 }
472 
473 void
emitFMZ(int pos,int len)474 CodeEmitterGM107::emitFMZ(int pos, int len)
475 {
476    emitField(pos, len, insn->dnz << 1 | insn->ftz);
477 }
478 
479 void
emitRND(int rmp,RoundMode rnd,int rip)480 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
481 {
482    int rm = 0, ri = 0;
483    switch (rnd) {
484    case ROUND_NI: ri = 1;
485    case ROUND_N : rm = 0; break;
486    case ROUND_MI: ri = 1;
487    case ROUND_M : rm = 1; break;
488    case ROUND_PI: ri = 1;
489    case ROUND_P : rm = 2; break;
490    case ROUND_ZI: ri = 1;
491    case ROUND_Z : rm = 3; break;
492    default:
493       assert(!"invalid round mode");
494       break;
495    }
496    emitField(rip, 1, ri);
497    emitField(rmp, 2, rm);
498 }
499 
500 void
emitPDIV(int pos)501 CodeEmitterGM107::emitPDIV(int pos)
502 {
503    assert(insn->postFactor >= -3 && insn->postFactor <= 3);
504    if (insn->postFactor > 0)
505       emitField(pos, 3, 7 - insn->postFactor);
506    else
507       emitField(pos, 3, 0 - insn->postFactor);
508 }
509 
510 void
emitINV(int pos,const ValueRef & ref)511 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
512 {
513    emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
514 }
515 
516 /*******************************************************************************
517  * control flow
518  ******************************************************************************/
519 
520 void
emitEXIT()521 CodeEmitterGM107::emitEXIT()
522 {
523    emitInsn (0xe3000000);
524    emitCond5(0x00, CC_TR);
525 }
526 
527 void
emitBRA()528 CodeEmitterGM107::emitBRA()
529 {
530    const FlowInstruction *insn = this->insn->asFlow();
531    int gpr = -1;
532 
533    if (insn->indirect) {
534       if (insn->absolute)
535          emitInsn(0xe2000000); // JMX
536       else
537          emitInsn(0xe2500000); // BRX
538       gpr = 0x08;
539    } else {
540       if (insn->absolute)
541          emitInsn(0xe2100000); // JMP
542       else
543          emitInsn(0xe2400000); // BRA
544       emitField(0x07, 1, insn->allWarp);
545    }
546 
547    emitField(0x06, 1, insn->limit);
548    emitCond5(0x00, CC_TR);
549 
550    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
551       int32_t pos = insn->target.bb->binPos;
552       if (writeIssueDelays && !(pos & 0x1f))
553          pos += 8;
554       if (!insn->absolute)
555          emitField(0x14, 24, pos - (codeSize + 8));
556       else
557          emitField(0x14, 32, pos);
558    } else {
559       emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
560       emitField(0x05, 1, 1);
561    }
562 }
563 
564 void
emitCAL()565 CodeEmitterGM107::emitCAL()
566 {
567    const FlowInstruction *insn = this->insn->asFlow();
568 
569    if (insn->absolute) {
570       emitInsn(0xe2200000, false); // JCAL
571    } else {
572       emitInsn(0xe2600000, false); // CAL
573    }
574 
575    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
576       if (!insn->absolute)
577          emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
578       else {
579          if (insn->builtin) {
580             int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
581             addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000,  20);
582             addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
583          } else {
584             emitField(0x14, 32, insn->target.bb->binPos);
585          }
586       }
587    } else {
588       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
589       emitField(0x05, 1, 1);
590    }
591 }
592 
593 void
emitPCNT()594 CodeEmitterGM107::emitPCNT()
595 {
596    const FlowInstruction *insn = this->insn->asFlow();
597 
598    emitInsn(0xe2b00000, false);
599 
600    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
601       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
602    } else {
603       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
604       emitField(0x05, 1, 1);
605    }
606 }
607 
608 void
emitCONT()609 CodeEmitterGM107::emitCONT()
610 {
611    emitInsn (0xe3500000);
612    emitCond5(0x00, CC_TR);
613 }
614 
615 void
emitPBK()616 CodeEmitterGM107::emitPBK()
617 {
618    const FlowInstruction *insn = this->insn->asFlow();
619 
620    emitInsn(0xe2a00000, false);
621 
622    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
623       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
624    } else {
625       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
626       emitField(0x05, 1, 1);
627    }
628 }
629 
630 void
emitBRK()631 CodeEmitterGM107::emitBRK()
632 {
633    emitInsn (0xe3400000);
634    emitCond5(0x00, CC_TR);
635 }
636 
637 void
emitPRET()638 CodeEmitterGM107::emitPRET()
639 {
640    const FlowInstruction *insn = this->insn->asFlow();
641 
642    emitInsn(0xe2700000, false);
643 
644    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
645       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
646    } else {
647       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
648       emitField(0x05, 1, 1);
649    }
650 }
651 
652 void
emitRET()653 CodeEmitterGM107::emitRET()
654 {
655    emitInsn (0xe3200000);
656    emitCond5(0x00, CC_TR);
657 }
658 
659 void
emitSSY()660 CodeEmitterGM107::emitSSY()
661 {
662    const FlowInstruction *insn = this->insn->asFlow();
663 
664    emitInsn(0xe2900000, false);
665 
666    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
667       emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
668    } else {
669       emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
670       emitField(0x05, 1, 1);
671    }
672 }
673 
674 void
emitSYNC()675 CodeEmitterGM107::emitSYNC()
676 {
677    emitInsn (0xf0f80000);
678    emitCond5(0x00, CC_TR);
679 }
680 
681 void
emitSAM()682 CodeEmitterGM107::emitSAM()
683 {
684    emitInsn(0xe3700000, false);
685 }
686 
687 void
emitRAM()688 CodeEmitterGM107::emitRAM()
689 {
690    emitInsn(0xe3800000, false);
691 }
692 
693 /*******************************************************************************
694  * predicate/cc
695  ******************************************************************************/
696 
697 void
emitPSETP()698 CodeEmitterGM107::emitPSETP()
699 {
700 
701    emitInsn(0x50900000);
702 
703    switch (insn->op) {
704    case OP_AND: emitField(0x18, 3, 0); break;
705    case OP_OR:  emitField(0x18, 3, 1); break;
706    case OP_XOR: emitField(0x18, 3, 2); break;
707    default:
708       assert(!"unexpected operation");
709       break;
710    }
711 
712    // emitINV (0x2a);
713    emitPRED(0x27); // TODO: support 3-arg
714    emitINV (0x20, insn->src(1));
715    emitPRED(0x1d, insn->src(1));
716    emitINV (0x0f, insn->src(0));
717    emitPRED(0x0c, insn->src(0));
718    emitPRED(0x03, insn->def(0));
719    emitPRED(0x00);
720 }
721 
722 /*******************************************************************************
723  * movement / conversion
724  ******************************************************************************/
725 
726 void
emitMOV()727 CodeEmitterGM107::emitMOV()
728 {
729    if (insn->src(0).getFile() != FILE_IMMEDIATE) {
730       switch (insn->src(0).getFile()) {
731       case FILE_GPR:
732          if (insn->def(0).getFile() == FILE_PREDICATE) {
733             emitInsn(0x5b6a0000);
734             emitGPR (0x08);
735          } else {
736             emitInsn(0x5c980000);
737          }
738          emitGPR (0x14, insn->src(0));
739          break;
740       case FILE_MEMORY_CONST:
741          emitInsn(0x4c980000);
742          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
743          break;
744       case FILE_IMMEDIATE:
745          emitInsn(0x38980000);
746          emitIMMD(0x14, 19, insn->src(0));
747          break;
748       case FILE_PREDICATE:
749          emitInsn(0x50880000);
750          emitPRED(0x0c, insn->src(0));
751          emitPRED(0x1d);
752          emitPRED(0x27);
753          break;
754       default:
755          assert(!"bad src file");
756          break;
757       }
758       if (insn->def(0).getFile() != FILE_PREDICATE &&
759           insn->src(0).getFile() != FILE_PREDICATE)
760          emitField(0x27, 4, insn->lanes);
761    } else {
762       emitInsn (0x01000000);
763       emitIMMD (0x14, 32, insn->src(0));
764       emitField(0x0c, 4, insn->lanes);
765    }
766 
767    if (insn->def(0).getFile() == FILE_PREDICATE) {
768       emitPRED(0x27);
769       emitPRED(0x03, insn->def(0));
770       emitPRED(0x00);
771    } else {
772       emitGPR(0x00, insn->def(0));
773    }
774 }
775 
776 void
emitS2R()777 CodeEmitterGM107::emitS2R()
778 {
779    emitInsn(0xf0c80000);
780    emitSYS (0x14, insn->src(0));
781    emitGPR (0x00, insn->def(0));
782 }
783 
784 void
emitCS2R()785 CodeEmitterGM107::emitCS2R()
786 {
787    emitInsn(0x50c80000);
788    emitSYS (0x14, insn->src(0));
789    emitGPR (0x00, insn->def(0));
790 }
791 
792 void
emitF2F()793 CodeEmitterGM107::emitF2F()
794 {
795    RoundMode rnd = insn->rnd;
796 
797    switch (insn->op) {
798    case OP_FLOOR: rnd = ROUND_MI; break;
799    case OP_CEIL : rnd = ROUND_PI; break;
800    case OP_TRUNC: rnd = ROUND_ZI; break;
801    default:
802       break;
803    }
804 
805    switch (insn->src(0).getFile()) {
806    case FILE_GPR:
807       emitInsn(0x5ca80000);
808       emitGPR (0x14, insn->src(0));
809       break;
810    case FILE_MEMORY_CONST:
811       emitInsn(0x4ca80000);
812       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
813       break;
814    case FILE_IMMEDIATE:
815       emitInsn(0x38a80000);
816       emitIMMD(0x14, 19, insn->src(0));
817       break;
818    default:
819       assert(!"bad src0 file");
820       break;
821    }
822 
823    emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
824    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
825    emitCC   (0x2f);
826    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
827    emitFMZ  (0x2c, 1);
828    emitField(0x29, 1, insn->subOp);
829    emitRND  (0x27, rnd, 0x2a);
830    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
831    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
832    emitGPR  (0x00, insn->def(0));
833 }
834 
835 void
emitF2I()836 CodeEmitterGM107::emitF2I()
837 {
838    RoundMode rnd = insn->rnd;
839 
840    switch (insn->op) {
841    case OP_FLOOR: rnd = ROUND_M; break;
842    case OP_CEIL : rnd = ROUND_P; break;
843    case OP_TRUNC: rnd = ROUND_Z; break;
844    default:
845       break;
846    }
847 
848    switch (insn->src(0).getFile()) {
849    case FILE_GPR:
850       emitInsn(0x5cb00000);
851       emitGPR (0x14, insn->src(0));
852       break;
853    case FILE_MEMORY_CONST:
854       emitInsn(0x4cb00000);
855       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
856       break;
857    case FILE_IMMEDIATE:
858       emitInsn(0x38b00000);
859       emitIMMD(0x14, 19, insn->src(0));
860       break;
861    default:
862       assert(!"bad src0 file");
863       break;
864    }
865 
866    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
867    emitCC   (0x2f);
868    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
869    emitFMZ  (0x2c, 1);
870    emitRND  (0x27, rnd, 0x2a);
871    emitField(0x0c, 1, isSignedType(insn->dType));
872    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
873    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
874    emitGPR  (0x00, insn->def(0));
875 }
876 
877 void
emitI2F()878 CodeEmitterGM107::emitI2F()
879 {
880    RoundMode rnd = insn->rnd;
881 
882    switch (insn->op) {
883    case OP_FLOOR: rnd = ROUND_M; break;
884    case OP_CEIL : rnd = ROUND_P; break;
885    case OP_TRUNC: rnd = ROUND_Z; break;
886    default:
887       break;
888    }
889 
890    switch (insn->src(0).getFile()) {
891    case FILE_GPR:
892       emitInsn(0x5cb80000);
893       emitGPR (0x14, insn->src(0));
894       break;
895    case FILE_MEMORY_CONST:
896       emitInsn(0x4cb80000);
897       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
898       break;
899    case FILE_IMMEDIATE:
900       emitInsn(0x38b80000);
901       emitIMMD(0x14, 19, insn->src(0));
902       break;
903    default:
904       assert(!"bad src0 file");
905       break;
906    }
907 
908    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
909    emitCC   (0x2f);
910    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
911    emitField(0x29, 2, insn->subOp);
912    emitRND  (0x27, rnd, -1);
913    emitField(0x0d, 1, isSignedType(insn->sType));
914    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
915    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
916    emitGPR  (0x00, insn->def(0));
917 }
918 
919 void
emitI2I()920 CodeEmitterGM107::emitI2I()
921 {
922    switch (insn->src(0).getFile()) {
923    case FILE_GPR:
924       emitInsn(0x5ce00000);
925       emitGPR (0x14, insn->src(0));
926       break;
927    case FILE_MEMORY_CONST:
928       emitInsn(0x4ce00000);
929       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
930       break;
931    case FILE_IMMEDIATE:
932       emitInsn(0x38e00000);
933       emitIMMD(0x14, 19, insn->src(0));
934       break;
935    default:
936       assert(!"bad src0 file");
937       break;
938    }
939 
940    emitSAT  (0x32);
941    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
942    emitCC   (0x2f);
943    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
944    emitField(0x29, 2, insn->subOp);
945    emitField(0x0d, 1, isSignedType(insn->sType));
946    emitField(0x0c, 1, isSignedType(insn->dType));
947    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
948    emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
949    emitGPR  (0x00, insn->def(0));
950 }
951 
952 void
gm107_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)953 gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
954 {
955    int loc = entry->loc;
956    bool val = false;
957    switch (entry->ipa) {
958    case 0:
959       val = data.force_persample_interp;
960       break;
961    case 1:
962       val = data.msaa;
963       break;
964    }
965    if (val)
966       code[loc + 1] |= 1 << 10;
967    else
968       code[loc + 1] &= ~(1 << 10);
969 }
970 
971 void
emitSEL()972 CodeEmitterGM107::emitSEL()
973 {
974    switch (insn->src(1).getFile()) {
975    case FILE_GPR:
976       emitInsn(0x5ca00000);
977       emitGPR (0x14, insn->src(1));
978       break;
979    case FILE_MEMORY_CONST:
980       emitInsn(0x4ca00000);
981       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
982       break;
983    case FILE_IMMEDIATE:
984       emitInsn(0x38a00000);
985       emitIMMD(0x14, 19, insn->src(1));
986       break;
987    default:
988       assert(!"bad src1 file");
989       break;
990    }
991 
992    emitINV (0x2a, insn->src(2));
993    emitPRED(0x27, insn->src(2));
994    emitGPR (0x08, insn->src(0));
995    emitGPR (0x00, insn->def(0));
996 
997    if (insn->subOp >= 1) {
998       addInterp(insn->subOp - 1, 0, gm107_selpFlip);
999    }
1000 }
1001 
1002 void
emitSHFL()1003 CodeEmitterGM107::emitSHFL()
1004 {
1005    int type = 0;
1006 
1007    emitInsn (0xef100000);
1008 
1009    switch (insn->src(1).getFile()) {
1010    case FILE_GPR:
1011       emitGPR(0x14, insn->src(1));
1012       break;
1013    case FILE_IMMEDIATE:
1014       emitIMMD(0x14, 5, insn->src(1));
1015       type |= 1;
1016       break;
1017    default:
1018       assert(!"invalid src1 file");
1019       break;
1020    }
1021 
1022    switch (insn->src(2).getFile()) {
1023    case FILE_GPR:
1024       emitGPR(0x27, insn->src(2));
1025       break;
1026    case FILE_IMMEDIATE:
1027       emitIMMD(0x22, 13, insn->src(2));
1028       type |= 2;
1029       break;
1030    default:
1031       assert(!"invalid src2 file");
1032       break;
1033    }
1034 
1035    if (!insn->defExists(1))
1036       emitPRED(0x30);
1037    else {
1038       assert(insn->def(1).getFile() == FILE_PREDICATE);
1039       emitPRED(0x30, insn->def(1));
1040    }
1041 
1042    emitField(0x1e, 2, insn->subOp);
1043    emitField(0x1c, 2, type);
1044    emitGPR  (0x08, insn->src(0));
1045    emitGPR  (0x00, insn->def(0));
1046 }
1047 
1048 /*******************************************************************************
1049  * double
1050  ******************************************************************************/
1051 
1052 void
emitDADD()1053 CodeEmitterGM107::emitDADD()
1054 {
1055    switch (insn->src(1).getFile()) {
1056    case FILE_GPR:
1057       emitInsn(0x5c700000);
1058       emitGPR (0x14, insn->src(1));
1059       break;
1060    case FILE_MEMORY_CONST:
1061       emitInsn(0x4c700000);
1062       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1063       break;
1064    case FILE_IMMEDIATE:
1065       emitInsn(0x38700000);
1066       emitIMMD(0x14, 19, insn->src(1));
1067       break;
1068    default:
1069       assert(!"bad src1 file");
1070       break;
1071    }
1072    emitABS(0x31, insn->src(1));
1073    emitNEG(0x30, insn->src(0));
1074    emitCC (0x2f);
1075    emitABS(0x2e, insn->src(0));
1076    emitNEG(0x2d, insn->src(1));
1077 
1078    if (insn->op == OP_SUB)
1079       code[1] ^= 0x00002000;
1080 
1081    emitGPR(0x08, insn->src(0));
1082    emitGPR(0x00, insn->def(0));
1083 }
1084 
1085 void
emitDMUL()1086 CodeEmitterGM107::emitDMUL()
1087 {
1088    switch (insn->src(1).getFile()) {
1089    case FILE_GPR:
1090       emitInsn(0x5c800000);
1091       emitGPR (0x14, insn->src(1));
1092       break;
1093    case FILE_MEMORY_CONST:
1094       emitInsn(0x4c800000);
1095       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1096       break;
1097    case FILE_IMMEDIATE:
1098       emitInsn(0x38800000);
1099       emitIMMD(0x14, 19, insn->src(1));
1100       break;
1101    default:
1102       assert(!"bad src1 file");
1103       break;
1104    }
1105 
1106    emitNEG2(0x30, insn->src(0), insn->src(1));
1107    emitCC  (0x2f);
1108    emitRND (0x27);
1109    emitGPR (0x08, insn->src(0));
1110    emitGPR (0x00, insn->def(0));
1111 }
1112 
1113 void
emitDFMA()1114 CodeEmitterGM107::emitDFMA()
1115 {
1116    switch(insn->src(2).getFile()) {
1117    case FILE_GPR:
1118       switch (insn->src(1).getFile()) {
1119       case FILE_GPR:
1120          emitInsn(0x5b700000);
1121          emitGPR (0x14, insn->src(1));
1122          break;
1123       case FILE_MEMORY_CONST:
1124          emitInsn(0x4b700000);
1125          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1126          break;
1127       case FILE_IMMEDIATE:
1128          emitInsn(0x36700000);
1129          emitIMMD(0x14, 19, insn->src(1));
1130          break;
1131       default:
1132          assert(!"bad src1 file");
1133          break;
1134       }
1135       emitGPR (0x27, insn->src(2));
1136       break;
1137    case FILE_MEMORY_CONST:
1138       emitInsn(0x53700000);
1139       emitGPR (0x27, insn->src(1));
1140       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1141       break;
1142    default:
1143       assert(!"bad src2 file");
1144       break;
1145    }
1146 
1147    emitRND (0x32);
1148    emitNEG (0x31, insn->src(2));
1149    emitNEG2(0x30, insn->src(0), insn->src(1));
1150    emitCC  (0x2f);
1151    emitGPR (0x08, insn->src(0));
1152    emitGPR (0x00, insn->def(0));
1153 }
1154 
1155 void
emitDMNMX()1156 CodeEmitterGM107::emitDMNMX()
1157 {
1158    switch (insn->src(1).getFile()) {
1159    case FILE_GPR:
1160       emitInsn(0x5c500000);
1161       emitGPR (0x14, insn->src(1));
1162       break;
1163    case FILE_MEMORY_CONST:
1164       emitInsn(0x4c500000);
1165       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1166       break;
1167    case FILE_IMMEDIATE:
1168       emitInsn(0x38500000);
1169       emitIMMD(0x14, 19, insn->src(1));
1170       break;
1171    default:
1172       assert(!"bad src1 file");
1173       break;
1174    }
1175 
1176    emitABS  (0x31, insn->src(1));
1177    emitNEG  (0x30, insn->src(0));
1178    emitCC   (0x2f);
1179    emitABS  (0x2e, insn->src(0));
1180    emitNEG  (0x2d, insn->src(1));
1181    emitField(0x2a, 1, insn->op == OP_MAX);
1182    emitPRED (0x27);
1183    emitGPR  (0x08, insn->src(0));
1184    emitGPR  (0x00, insn->def(0));
1185 }
1186 
1187 void
emitDSET()1188 CodeEmitterGM107::emitDSET()
1189 {
1190    const CmpInstruction *insn = this->insn->asCmp();
1191 
1192    switch (insn->src(1).getFile()) {
1193    case FILE_GPR:
1194       emitInsn(0x59000000);
1195       emitGPR (0x14, insn->src(1));
1196       break;
1197    case FILE_MEMORY_CONST:
1198       emitInsn(0x49000000);
1199       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1200       break;
1201    case FILE_IMMEDIATE:
1202       emitInsn(0x32000000);
1203       emitIMMD(0x14, 19, insn->src(1));
1204       break;
1205    default:
1206       assert(!"bad src1 file");
1207       break;
1208    }
1209 
1210    if (insn->op != OP_SET) {
1211       switch (insn->op) {
1212       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1213       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1214       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1215       default:
1216          assert(!"invalid set op");
1217          break;
1218       }
1219       emitPRED(0x27, insn->src(2));
1220    } else {
1221       emitPRED(0x27);
1222    }
1223 
1224    emitABS  (0x36, insn->src(0));
1225    emitNEG  (0x35, insn->src(1));
1226    emitField(0x34, 1, insn->dType == TYPE_F32);
1227    emitCond4(0x30, insn->setCond);
1228    emitCC   (0x2f);
1229    emitABS  (0x2c, insn->src(1));
1230    emitNEG  (0x2b, insn->src(0));
1231    emitGPR  (0x08, insn->src(0));
1232    emitGPR  (0x00, insn->def(0));
1233 }
1234 
1235 void
emitDSETP()1236 CodeEmitterGM107::emitDSETP()
1237 {
1238    const CmpInstruction *insn = this->insn->asCmp();
1239 
1240    switch (insn->src(1).getFile()) {
1241    case FILE_GPR:
1242       emitInsn(0x5b800000);
1243       emitGPR (0x14, insn->src(1));
1244       break;
1245    case FILE_MEMORY_CONST:
1246       emitInsn(0x4b800000);
1247       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1248       break;
1249    case FILE_IMMEDIATE:
1250       emitInsn(0x36800000);
1251       emitIMMD(0x14, 19, insn->src(1));
1252       break;
1253    default:
1254       assert(!"bad src1 file");
1255       break;
1256    }
1257 
1258    if (insn->op != OP_SET) {
1259       switch (insn->op) {
1260       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1261       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1262       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1263       default:
1264          assert(!"invalid set op");
1265          break;
1266       }
1267       emitPRED(0x27, insn->src(2));
1268    } else {
1269       emitPRED(0x27);
1270    }
1271 
1272    emitCond4(0x30, insn->setCond);
1273    emitABS  (0x2c, insn->src(1));
1274    emitNEG  (0x2b, insn->src(0));
1275    emitGPR  (0x08, insn->src(0));
1276    emitABS  (0x07, insn->src(0));
1277    emitNEG  (0x06, insn->src(1));
1278    emitPRED (0x03, insn->def(0));
1279    if (insn->defExists(1))
1280       emitPRED(0x00, insn->def(1));
1281    else
1282       emitPRED(0x00);
1283 }
1284 
1285 /*******************************************************************************
1286  * float
1287  ******************************************************************************/
1288 
1289 void
emitFADD()1290 CodeEmitterGM107::emitFADD()
1291 {
1292    if (!longIMMD(insn->src(1))) {
1293       switch (insn->src(1).getFile()) {
1294       case FILE_GPR:
1295          emitInsn(0x5c580000);
1296          emitGPR (0x14, insn->src(1));
1297          break;
1298       case FILE_MEMORY_CONST:
1299          emitInsn(0x4c580000);
1300          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1301          break;
1302       case FILE_IMMEDIATE:
1303          emitInsn(0x38580000);
1304          emitIMMD(0x14, 19, insn->src(1));
1305          break;
1306       default:
1307          assert(!"bad src1 file");
1308          break;
1309       }
1310       emitSAT(0x32);
1311       emitABS(0x31, insn->src(1));
1312       emitNEG(0x30, insn->src(0));
1313       emitCC (0x2f);
1314       emitABS(0x2e, insn->src(0));
1315       emitNEG(0x2d, insn->src(1));
1316       emitFMZ(0x2c, 1);
1317 
1318       if (insn->op == OP_SUB)
1319          code[1] ^= 0x00002000;
1320    } else {
1321       emitInsn(0x08000000);
1322       emitABS(0x39, insn->src(1));
1323       emitNEG(0x38, insn->src(0));
1324       emitFMZ(0x37, 1);
1325       emitABS(0x36, insn->src(0));
1326       emitNEG(0x35, insn->src(1));
1327       emitCC  (0x34);
1328       emitIMMD(0x14, 32, insn->src(1));
1329 
1330       if (insn->op == OP_SUB)
1331          code[1] ^= 0x00080000;
1332    }
1333 
1334    emitGPR(0x08, insn->src(0));
1335    emitGPR(0x00, insn->def(0));
1336 }
1337 
1338 void
emitFMUL()1339 CodeEmitterGM107::emitFMUL()
1340 {
1341    if (!longIMMD(insn->src(1))) {
1342       switch (insn->src(1).getFile()) {
1343       case FILE_GPR:
1344          emitInsn(0x5c680000);
1345          emitGPR (0x14, insn->src(1));
1346          break;
1347       case FILE_MEMORY_CONST:
1348          emitInsn(0x4c680000);
1349          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1350          break;
1351       case FILE_IMMEDIATE:
1352          emitInsn(0x38680000);
1353          emitIMMD(0x14, 19, insn->src(1));
1354          break;
1355       default:
1356          assert(!"bad src1 file");
1357          break;
1358       }
1359       emitSAT (0x32);
1360       emitNEG2(0x30, insn->src(0), insn->src(1));
1361       emitCC  (0x2f);
1362       emitFMZ (0x2c, 2);
1363       emitPDIV(0x29);
1364       emitRND (0x27);
1365    } else {
1366       emitInsn(0x1e000000);
1367       emitSAT (0x37);
1368       emitFMZ (0x35, 2);
1369       emitCC  (0x34);
1370       emitIMMD(0x14, 32, insn->src(1));
1371       if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1372          code[1] ^= 0x00080000; /* flip immd sign bit */
1373    }
1374 
1375    emitGPR(0x08, insn->src(0));
1376    emitGPR(0x00, insn->def(0));
1377 }
1378 
1379 void
emitFFMA()1380 CodeEmitterGM107::emitFFMA()
1381 {
1382    bool isLongIMMD = false;
1383    switch(insn->src(2).getFile()) {
1384    case FILE_GPR:
1385       switch (insn->src(1).getFile()) {
1386       case FILE_GPR:
1387          emitInsn(0x59800000);
1388          emitGPR (0x14, insn->src(1));
1389          break;
1390       case FILE_MEMORY_CONST:
1391          emitInsn(0x49800000);
1392          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1393          break;
1394       case FILE_IMMEDIATE:
1395          if (longIMMD(insn->getSrc(1))) {
1396             assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1397             isLongIMMD = true;
1398             emitInsn(0x0c000000);
1399             emitIMMD(0x14, 32, insn->src(1));
1400          } else {
1401             emitInsn(0x32800000);
1402             emitIMMD(0x14, 19, insn->src(1));
1403          }
1404          break;
1405       default:
1406          assert(!"bad src1 file");
1407          break;
1408       }
1409       if (!isLongIMMD)
1410          emitGPR (0x27, insn->src(2));
1411       break;
1412    case FILE_MEMORY_CONST:
1413       emitInsn(0x51800000);
1414       emitGPR (0x27, insn->src(1));
1415       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1416       break;
1417    default:
1418       assert(!"bad src2 file");
1419       break;
1420    }
1421 
1422    if (isLongIMMD) {
1423       emitNEG (0x39, insn->src(2));
1424       emitNEG2(0x38, insn->src(0), insn->src(1));
1425       emitSAT (0x37);
1426       emitCC  (0x34);
1427    } else {
1428       emitRND (0x33);
1429       emitSAT (0x32);
1430       emitNEG (0x31, insn->src(2));
1431       emitNEG2(0x30, insn->src(0), insn->src(1));
1432       emitCC  (0x2f);
1433    }
1434 
1435    emitFMZ(0x35, 2);
1436    emitGPR(0x08, insn->src(0));
1437    emitGPR(0x00, insn->def(0));
1438 }
1439 
1440 void
emitMUFU()1441 CodeEmitterGM107::emitMUFU()
1442 {
1443    int mufu = 0;
1444 
1445    switch (insn->op) {
1446    case OP_COS: mufu = 0; break;
1447    case OP_SIN: mufu = 1; break;
1448    case OP_EX2: mufu = 2; break;
1449    case OP_LG2: mufu = 3; break;
1450    case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1451    case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1452    case OP_SQRT: mufu = 8; break;
1453    default:
1454       assert(!"invalid mufu");
1455       break;
1456    }
1457 
1458    emitInsn (0x50800000);
1459    emitSAT  (0x32);
1460    emitNEG  (0x30, insn->src(0));
1461    emitABS  (0x2e, insn->src(0));
1462    emitField(0x14, 4, mufu);
1463    emitGPR  (0x08, insn->src(0));
1464    emitGPR  (0x00, insn->def(0));
1465 }
1466 
1467 void
emitFMNMX()1468 CodeEmitterGM107::emitFMNMX()
1469 {
1470    switch (insn->src(1).getFile()) {
1471    case FILE_GPR:
1472       emitInsn(0x5c600000);
1473       emitGPR (0x14, insn->src(1));
1474       break;
1475    case FILE_MEMORY_CONST:
1476       emitInsn(0x4c600000);
1477       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1478       break;
1479    case FILE_IMMEDIATE:
1480       emitInsn(0x38600000);
1481       emitIMMD(0x14, 19, insn->src(1));
1482       break;
1483    default:
1484       assert(!"bad src1 file");
1485       break;
1486    }
1487 
1488    emitField(0x2a, 1, insn->op == OP_MAX);
1489    emitPRED (0x27);
1490 
1491    emitABS(0x31, insn->src(1));
1492    emitNEG(0x30, insn->src(0));
1493    emitCC (0x2f);
1494    emitABS(0x2e, insn->src(0));
1495    emitNEG(0x2d, insn->src(1));
1496    emitFMZ(0x2c, 1);
1497    emitGPR(0x08, insn->src(0));
1498    emitGPR(0x00, insn->def(0));
1499 }
1500 
1501 void
emitRRO()1502 CodeEmitterGM107::emitRRO()
1503 {
1504    switch (insn->src(0).getFile()) {
1505    case FILE_GPR:
1506       emitInsn(0x5c900000);
1507       emitGPR (0x14, insn->src(0));
1508       break;
1509    case FILE_MEMORY_CONST:
1510       emitInsn(0x4c900000);
1511       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1512       break;
1513    case FILE_IMMEDIATE:
1514       emitInsn(0x38900000);
1515       emitIMMD(0x14, 19, insn->src(0));
1516       break;
1517    default:
1518       assert(!"bad src file");
1519       break;
1520    }
1521 
1522    emitABS  (0x31, insn->src(0));
1523    emitNEG  (0x2d, insn->src(0));
1524    emitField(0x27, 1, insn->op == OP_PREEX2);
1525    emitGPR  (0x00, insn->def(0));
1526 }
1527 
1528 void
emitFCMP()1529 CodeEmitterGM107::emitFCMP()
1530 {
1531    const CmpInstruction *insn = this->insn->asCmp();
1532    CondCode cc = insn->setCond;
1533 
1534    if (insn->src(2).mod.neg())
1535       cc = reverseCondCode(cc);
1536 
1537    switch(insn->src(2).getFile()) {
1538    case FILE_GPR:
1539       switch (insn->src(1).getFile()) {
1540       case FILE_GPR:
1541          emitInsn(0x5ba00000);
1542          emitGPR (0x14, insn->src(1));
1543          break;
1544       case FILE_MEMORY_CONST:
1545          emitInsn(0x4ba00000);
1546          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1547          break;
1548       case FILE_IMMEDIATE:
1549          emitInsn(0x36a00000);
1550          emitIMMD(0x14, 19, insn->src(1));
1551          break;
1552       default:
1553          assert(!"bad src1 file");
1554          break;
1555       }
1556       emitGPR (0x27, insn->src(2));
1557       break;
1558    case FILE_MEMORY_CONST:
1559       emitInsn(0x53a00000);
1560       emitGPR (0x27, insn->src(1));
1561       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1562       break;
1563    default:
1564       assert(!"bad src2 file");
1565       break;
1566    }
1567 
1568    emitCond4(0x30, cc);
1569    emitFMZ  (0x2f, 1);
1570    emitGPR  (0x08, insn->src(0));
1571    emitGPR  (0x00, insn->def(0));
1572 }
1573 
1574 void
emitFSET()1575 CodeEmitterGM107::emitFSET()
1576 {
1577    const CmpInstruction *insn = this->insn->asCmp();
1578 
1579    switch (insn->src(1).getFile()) {
1580    case FILE_GPR:
1581       emitInsn(0x58000000);
1582       emitGPR (0x14, insn->src(1));
1583       break;
1584    case FILE_MEMORY_CONST:
1585       emitInsn(0x48000000);
1586       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1587       break;
1588    case FILE_IMMEDIATE:
1589       emitInsn(0x30000000);
1590       emitIMMD(0x14, 19, insn->src(1));
1591       break;
1592    default:
1593       assert(!"bad src1 file");
1594       break;
1595    }
1596 
1597    if (insn->op != OP_SET) {
1598       switch (insn->op) {
1599       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1600       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1601       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1602       default:
1603          assert(!"invalid set op");
1604          break;
1605       }
1606       emitPRED(0x27, insn->src(2));
1607    } else {
1608       emitPRED(0x27);
1609    }
1610 
1611    emitFMZ  (0x37, 1);
1612    emitABS  (0x36, insn->src(0));
1613    emitNEG  (0x35, insn->src(1));
1614    emitField(0x34, 1, insn->dType == TYPE_F32);
1615    emitCond4(0x30, insn->setCond);
1616    emitCC   (0x2f);
1617    emitABS  (0x2c, insn->src(1));
1618    emitNEG  (0x2b, insn->src(0));
1619    emitGPR  (0x08, insn->src(0));
1620    emitGPR  (0x00, insn->def(0));
1621 }
1622 
1623 void
emitFSETP()1624 CodeEmitterGM107::emitFSETP()
1625 {
1626    const CmpInstruction *insn = this->insn->asCmp();
1627 
1628    switch (insn->src(1).getFile()) {
1629    case FILE_GPR:
1630       emitInsn(0x5bb00000);
1631       emitGPR (0x14, insn->src(1));
1632       break;
1633    case FILE_MEMORY_CONST:
1634       emitInsn(0x4bb00000);
1635       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1636       break;
1637    case FILE_IMMEDIATE:
1638       emitInsn(0x36b00000);
1639       emitIMMD(0x14, 19, insn->src(1));
1640       break;
1641    default:
1642       assert(!"bad src1 file");
1643       break;
1644    }
1645 
1646    if (insn->op != OP_SET) {
1647       switch (insn->op) {
1648       case OP_SET_AND: emitField(0x2d, 2, 0); break;
1649       case OP_SET_OR : emitField(0x2d, 2, 1); break;
1650       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1651       default:
1652          assert(!"invalid set op");
1653          break;
1654       }
1655       emitPRED(0x27, insn->src(2));
1656    } else {
1657       emitPRED(0x27);
1658    }
1659 
1660    emitCond4(0x30, insn->setCond);
1661    emitFMZ  (0x2f, 1);
1662    emitABS  (0x2c, insn->src(1));
1663    emitNEG  (0x2b, insn->src(0));
1664    emitGPR  (0x08, insn->src(0));
1665    emitABS  (0x07, insn->src(0));
1666    emitNEG  (0x06, insn->src(1));
1667    emitPRED (0x03, insn->def(0));
1668    if (insn->defExists(1))
1669       emitPRED(0x00, insn->def(1));
1670    else
1671       emitPRED(0x00);
1672 }
1673 
1674 void
emitFSWZADD()1675 CodeEmitterGM107::emitFSWZADD()
1676 {
1677    emitInsn (0x50f80000);
1678    emitCC   (0x2f);
1679    emitFMZ  (0x2c, 1);
1680    emitRND  (0x27);
1681    emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1682    emitField(0x1c, 8, insn->subOp);
1683    if (insn->predSrc != 1)
1684       emitGPR  (0x14, insn->src(1));
1685    else
1686       emitGPR  (0x14);
1687    emitGPR  (0x08, insn->src(0));
1688    emitGPR  (0x00, insn->def(0));
1689 }
1690 
1691 /*******************************************************************************
1692  * integer
1693  ******************************************************************************/
1694 
1695 void
emitLOP()1696 CodeEmitterGM107::emitLOP()
1697 {
1698    int lop = 0;
1699 
1700    switch (insn->op) {
1701    case OP_AND: lop = 0; break;
1702    case OP_OR : lop = 1; break;
1703    case OP_XOR: lop = 2; break;
1704    default:
1705       assert(!"invalid lop");
1706       break;
1707    }
1708 
1709    if (!longIMMD(insn->src(1))) {
1710       switch (insn->src(1).getFile()) {
1711       case FILE_GPR:
1712          emitInsn(0x5c400000);
1713          emitGPR (0x14, insn->src(1));
1714          break;
1715       case FILE_MEMORY_CONST:
1716          emitInsn(0x4c400000);
1717          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1718          break;
1719       case FILE_IMMEDIATE:
1720          emitInsn(0x38400000);
1721          emitIMMD(0x14, 19, insn->src(1));
1722          break;
1723       default:
1724          assert(!"bad src1 file");
1725          break;
1726       }
1727       emitPRED (0x30);
1728       emitCC   (0x2f);
1729       emitX    (0x2b);
1730       emitField(0x29, 2, lop);
1731       emitINV  (0x28, insn->src(1));
1732       emitINV  (0x27, insn->src(0));
1733    } else {
1734       emitInsn (0x04000000);
1735       emitX    (0x39);
1736       emitINV  (0x38, insn->src(1));
1737       emitINV  (0x37, insn->src(0));
1738       emitField(0x35, 2, lop);
1739       emitCC   (0x34);
1740       emitIMMD (0x14, 32, insn->src(1));
1741    }
1742 
1743    emitGPR  (0x08, insn->src(0));
1744    emitGPR  (0x00, insn->def(0));
1745 }
1746 
1747 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1748 void
emitNOT()1749 CodeEmitterGM107::emitNOT()
1750 {
1751    if (!longIMMD(insn->src(0))) {
1752       switch (insn->src(0).getFile()) {
1753       case FILE_GPR:
1754          emitInsn(0x5c400700);
1755          emitGPR (0x14, insn->src(0));
1756          break;
1757       case FILE_MEMORY_CONST:
1758          emitInsn(0x4c400700);
1759          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1760          break;
1761       case FILE_IMMEDIATE:
1762          emitInsn(0x38400700);
1763          emitIMMD(0x14, 19, insn->src(0));
1764          break;
1765       default:
1766          assert(!"bad src1 file");
1767          break;
1768       }
1769       emitPRED (0x30);
1770    } else {
1771       emitInsn (0x05600000);
1772       emitIMMD (0x14, 32, insn->src(1));
1773    }
1774 
1775    emitGPR(0x08);
1776    emitGPR(0x00, insn->def(0));
1777 }
1778 
1779 void
emitIADD()1780 CodeEmitterGM107::emitIADD()
1781 {
1782    if (!longIMMD(insn->src(1))) {
1783       switch (insn->src(1).getFile()) {
1784       case FILE_GPR:
1785          emitInsn(0x5c100000);
1786          emitGPR (0x14, insn->src(1));
1787          break;
1788       case FILE_MEMORY_CONST:
1789          emitInsn(0x4c100000);
1790          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1791          break;
1792       case FILE_IMMEDIATE:
1793          emitInsn(0x38100000);
1794          emitIMMD(0x14, 19, insn->src(1));
1795          break;
1796       default:
1797          assert(!"bad src1 file");
1798          break;
1799       }
1800       emitSAT(0x32);
1801       emitNEG(0x31, insn->src(0));
1802       emitNEG(0x30, insn->src(1));
1803       emitCC (0x2f);
1804       emitX  (0x2b);
1805    } else {
1806       emitInsn(0x1c000000);
1807       emitNEG (0x38, insn->src(0));
1808       emitSAT (0x36);
1809       emitX   (0x35);
1810       emitCC  (0x34);
1811       emitIMMD(0x14, 32, insn->src(1));
1812    }
1813 
1814    if (insn->op == OP_SUB)
1815       code[1] ^= 0x00010000;
1816 
1817    emitGPR(0x08, insn->src(0));
1818    emitGPR(0x00, insn->def(0));
1819 }
1820 
1821 void
emitIMUL()1822 CodeEmitterGM107::emitIMUL()
1823 {
1824    if (!longIMMD(insn->src(1))) {
1825       switch (insn->src(1).getFile()) {
1826       case FILE_GPR:
1827          emitInsn(0x5c380000);
1828          emitGPR (0x14, insn->src(1));
1829          break;
1830       case FILE_MEMORY_CONST:
1831          emitInsn(0x4c380000);
1832          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1833          break;
1834       case FILE_IMMEDIATE:
1835          emitInsn(0x38380000);
1836          emitIMMD(0x14, 19, insn->src(1));
1837          break;
1838       default:
1839          assert(!"bad src1 file");
1840          break;
1841       }
1842       emitCC   (0x2f);
1843       emitField(0x29, 1, isSignedType(insn->sType));
1844       emitField(0x28, 1, isSignedType(insn->dType));
1845       emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1846    } else {
1847       emitInsn (0x1f000000);
1848       emitField(0x37, 1, isSignedType(insn->sType));
1849       emitField(0x36, 1, isSignedType(insn->dType));
1850       emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1851       emitCC   (0x34);
1852       emitIMMD (0x14, 32, insn->src(1));
1853    }
1854 
1855    emitGPR(0x08, insn->src(0));
1856    emitGPR(0x00, insn->def(0));
1857 }
1858 
1859 void
emitIMAD()1860 CodeEmitterGM107::emitIMAD()
1861 {
1862    /*XXX: imad32i exists, but not using it as third src overlaps dst */
1863    switch(insn->src(2).getFile()) {
1864    case FILE_GPR:
1865       switch (insn->src(1).getFile()) {
1866       case FILE_GPR:
1867          emitInsn(0x5a000000);
1868          emitGPR (0x14, insn->src(1));
1869          break;
1870       case FILE_MEMORY_CONST:
1871          emitInsn(0x4a000000);
1872          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1873          break;
1874       case FILE_IMMEDIATE:
1875          emitInsn(0x34000000);
1876          emitIMMD(0x14, 19, insn->src(1));
1877          break;
1878       default:
1879          assert(!"bad src1 file");
1880          break;
1881       }
1882       emitGPR (0x27, insn->src(2));
1883       break;
1884    case FILE_MEMORY_CONST:
1885       emitInsn(0x52000000);
1886       emitGPR (0x27, insn->src(1));
1887       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1888       break;
1889    default:
1890       assert(!"bad src2 file");
1891       break;
1892    }
1893 
1894    emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1895    emitField(0x35, 1, isSignedType(insn->sType));
1896    emitNEG  (0x34, insn->src(2));
1897    emitNEG2 (0x33, insn->src(0), insn->src(1));
1898    emitSAT  (0x32);
1899    emitX    (0x31);
1900    emitField(0x30, 1, isSignedType(insn->dType));
1901    emitCC   (0x2f);
1902    emitGPR  (0x08, insn->src(0));
1903    emitGPR  (0x00, insn->def(0));
1904 }
1905 
1906 void
emitISCADD()1907 CodeEmitterGM107::emitISCADD()
1908 {
1909    assert(insn->src(1).get()->asImm());
1910 
1911    switch (insn->src(2).getFile()) {
1912    case FILE_GPR:
1913       emitInsn(0x5c180000);
1914       emitGPR (0x14, insn->src(2));
1915       break;
1916    case FILE_MEMORY_CONST:
1917       emitInsn(0x4c180000);
1918       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1919       break;
1920    case FILE_IMMEDIATE:
1921       emitInsn(0x38180000);
1922       emitIMMD(0x14, 19, insn->src(2));
1923       break;
1924    default:
1925       assert(!"bad src1 file");
1926       break;
1927    }
1928    emitNEG (0x31, insn->src(0));
1929    emitNEG (0x30, insn->src(2));
1930    emitCC  (0x2f);
1931    emitIMMD(0x27, 5, insn->src(1));
1932    emitGPR (0x08, insn->src(0));
1933    emitGPR (0x00, insn->def(0));
1934 }
1935 
1936 void
emitXMAD()1937 CodeEmitterGM107::emitXMAD()
1938 {
1939    assert(insn->src(0).getFile() == FILE_GPR);
1940 
1941    bool constbuf = false;
1942    bool psl_mrg = true;
1943    bool immediate = false;
1944    if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1945       assert(insn->src(1).getFile() == FILE_GPR);
1946       constbuf = true;
1947       psl_mrg = false;
1948       emitInsn(0x51000000);
1949       emitGPR(0x27, insn->src(1));
1950       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1951    } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1952       assert(insn->src(2).getFile() == FILE_GPR);
1953       constbuf = true;
1954       emitInsn(0x4e000000);
1955       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1956       emitGPR(0x27, insn->src(2));
1957    } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1958       assert(insn->src(2).getFile() == FILE_GPR);
1959       assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1960       immediate = true;
1961       emitInsn(0x36000000);
1962       emitIMMD(0x14, 16, insn->src(1));
1963       emitGPR(0x27, insn->src(2));
1964    } else {
1965       assert(insn->src(1).getFile() == FILE_GPR);
1966       assert(insn->src(2).getFile() == FILE_GPR);
1967       emitInsn(0x5b000000);
1968       emitGPR(0x14, insn->src(1));
1969       emitGPR(0x27, insn->src(2));
1970    }
1971 
1972    if (psl_mrg)
1973       emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1974 
1975    unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1976    cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1977    emitField(0x32, constbuf ? 2 : 3, cmode);
1978 
1979    emitX(constbuf ? 0x36 : 0x26);
1980    emitCC(0x2f);
1981 
1982    emitGPR(0x0, insn->def(0));
1983    emitGPR(0x8, insn->src(0));
1984 
1985    // source flags
1986    if (isSignedType(insn->sType)) {
1987       uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1988       emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1989    }
1990    emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1991    if (!immediate) {
1992       bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1993       emitField(constbuf ? 0x34 : 0x23, 1, h1);
1994    }
1995 }
1996 
1997 void
emitIMNMX()1998 CodeEmitterGM107::emitIMNMX()
1999 {
2000    switch (insn->src(1).getFile()) {
2001    case FILE_GPR:
2002       emitInsn(0x5c200000);
2003       emitGPR (0x14, insn->src(1));
2004       break;
2005    case FILE_MEMORY_CONST:
2006       emitInsn(0x4c200000);
2007       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2008       break;
2009    case FILE_IMMEDIATE:
2010       emitInsn(0x38200000);
2011       emitIMMD(0x14, 19, insn->src(1));
2012       break;
2013    default:
2014       assert(!"bad src1 file");
2015       break;
2016    }
2017 
2018    emitField(0x30, 1, isSignedType(insn->dType));
2019    emitCC   (0x2f);
2020    emitField(0x2b, 2, insn->subOp);
2021    emitField(0x2a, 1, insn->op == OP_MAX);
2022    emitPRED (0x27);
2023    emitGPR  (0x08, insn->src(0));
2024    emitGPR  (0x00, insn->def(0));
2025 }
2026 
2027 void
emitICMP()2028 CodeEmitterGM107::emitICMP()
2029 {
2030    const CmpInstruction *insn = this->insn->asCmp();
2031    CondCode cc = insn->setCond;
2032 
2033    if (insn->src(2).mod.neg())
2034       cc = reverseCondCode(cc);
2035 
2036    switch(insn->src(2).getFile()) {
2037    case FILE_GPR:
2038       switch (insn->src(1).getFile()) {
2039       case FILE_GPR:
2040          emitInsn(0x5b400000);
2041          emitGPR (0x14, insn->src(1));
2042          break;
2043       case FILE_MEMORY_CONST:
2044          emitInsn(0x4b400000);
2045          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2046          break;
2047       case FILE_IMMEDIATE:
2048          emitInsn(0x36400000);
2049          emitIMMD(0x14, 19, insn->src(1));
2050          break;
2051       default:
2052          assert(!"bad src1 file");
2053          break;
2054       }
2055       emitGPR (0x27, insn->src(2));
2056       break;
2057    case FILE_MEMORY_CONST:
2058       emitInsn(0x53400000);
2059       emitGPR (0x27, insn->src(1));
2060       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2061       break;
2062    default:
2063       assert(!"bad src2 file");
2064       break;
2065    }
2066 
2067    emitCond3(0x31, cc);
2068    emitField(0x30, 1, isSignedType(insn->sType));
2069    emitGPR  (0x08, insn->src(0));
2070    emitGPR  (0x00, insn->def(0));
2071 }
2072 
2073 void
emitISET()2074 CodeEmitterGM107::emitISET()
2075 {
2076    const CmpInstruction *insn = this->insn->asCmp();
2077 
2078    switch (insn->src(1).getFile()) {
2079    case FILE_GPR:
2080       emitInsn(0x5b500000);
2081       emitGPR (0x14, insn->src(1));
2082       break;
2083    case FILE_MEMORY_CONST:
2084       emitInsn(0x4b500000);
2085       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2086       break;
2087    case FILE_IMMEDIATE:
2088       emitInsn(0x36500000);
2089       emitIMMD(0x14, 19, insn->src(1));
2090       break;
2091    default:
2092       assert(!"bad src1 file");
2093       break;
2094    }
2095 
2096    if (insn->op != OP_SET) {
2097       switch (insn->op) {
2098       case OP_SET_AND: emitField(0x2d, 2, 0); break;
2099       case OP_SET_OR : emitField(0x2d, 2, 1); break;
2100       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2101       default:
2102          assert(!"invalid set op");
2103          break;
2104       }
2105       emitPRED(0x27, insn->src(2));
2106    } else {
2107       emitPRED(0x27);
2108    }
2109 
2110    emitCond3(0x31, insn->setCond);
2111    emitField(0x30, 1, isSignedType(insn->sType));
2112    emitCC   (0x2f);
2113    emitField(0x2c, 1, insn->dType == TYPE_F32);
2114    emitX    (0x2b);
2115    emitGPR  (0x08, insn->src(0));
2116    emitGPR  (0x00, insn->def(0));
2117 }
2118 
2119 void
emitISETP()2120 CodeEmitterGM107::emitISETP()
2121 {
2122    const CmpInstruction *insn = this->insn->asCmp();
2123 
2124    switch (insn->src(1).getFile()) {
2125    case FILE_GPR:
2126       emitInsn(0x5b600000);
2127       emitGPR (0x14, insn->src(1));
2128       break;
2129    case FILE_MEMORY_CONST:
2130       emitInsn(0x4b600000);
2131       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2132       break;
2133    case FILE_IMMEDIATE:
2134       emitInsn(0x36600000);
2135       emitIMMD(0x14, 19, insn->src(1));
2136       break;
2137    default:
2138       assert(!"bad src1 file");
2139       break;
2140    }
2141 
2142    if (insn->op != OP_SET) {
2143       switch (insn->op) {
2144       case OP_SET_AND: emitField(0x2d, 2, 0); break;
2145       case OP_SET_OR : emitField(0x2d, 2, 1); break;
2146       case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2147       default:
2148          assert(!"invalid set op");
2149          break;
2150       }
2151       emitPRED(0x27, insn->src(2));
2152    } else {
2153       emitPRED(0x27);
2154    }
2155 
2156    emitCond3(0x31, insn->setCond);
2157    emitField(0x30, 1, isSignedType(insn->sType));
2158    emitX    (0x2b);
2159    emitGPR  (0x08, insn->src(0));
2160    emitPRED (0x03, insn->def(0));
2161    if (insn->defExists(1))
2162       emitPRED(0x00, insn->def(1));
2163    else
2164       emitPRED(0x00);
2165 }
2166 
2167 void
emitSHL()2168 CodeEmitterGM107::emitSHL()
2169 {
2170    switch (insn->src(1).getFile()) {
2171    case FILE_GPR:
2172       emitInsn(0x5c480000);
2173       emitGPR (0x14, insn->src(1));
2174       break;
2175    case FILE_MEMORY_CONST:
2176       emitInsn(0x4c480000);
2177       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2178       break;
2179    case FILE_IMMEDIATE:
2180       emitInsn(0x38480000);
2181       emitIMMD(0x14, 19, insn->src(1));
2182       break;
2183    default:
2184       assert(!"bad src1 file");
2185       break;
2186    }
2187 
2188    emitCC   (0x2f);
2189    emitX    (0x2b);
2190    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2191    emitGPR  (0x08, insn->src(0));
2192    emitGPR  (0x00, insn->def(0));
2193 }
2194 
2195 void
emitSHR()2196 CodeEmitterGM107::emitSHR()
2197 {
2198    switch (insn->src(1).getFile()) {
2199    case FILE_GPR:
2200       emitInsn(0x5c280000);
2201       emitGPR (0x14, insn->src(1));
2202       break;
2203    case FILE_MEMORY_CONST:
2204       emitInsn(0x4c280000);
2205       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2206       break;
2207    case FILE_IMMEDIATE:
2208       emitInsn(0x38280000);
2209       emitIMMD(0x14, 19, insn->src(1));
2210       break;
2211    default:
2212       assert(!"bad src1 file");
2213       break;
2214    }
2215 
2216    emitField(0x30, 1, isSignedType(insn->dType));
2217    emitCC   (0x2f);
2218    emitX    (0x2c);
2219    emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2220    emitGPR  (0x08, insn->src(0));
2221    emitGPR  (0x00, insn->def(0));
2222 }
2223 
2224 void
emitSHF()2225 CodeEmitterGM107::emitSHF()
2226 {
2227    unsigned type;
2228 
2229    switch (insn->src(1).getFile()) {
2230    case FILE_GPR:
2231       emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2232       emitGPR(0x14, insn->src(1));
2233       break;
2234    case FILE_IMMEDIATE:
2235       emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2236       emitIMMD(0x14, 19, insn->src(1));
2237       break;
2238    default:
2239       assert(!"bad src1 file");
2240       break;
2241    }
2242 
2243    switch (insn->sType) {
2244    case TYPE_U64:
2245       type = 2;
2246       break;
2247    case TYPE_S64:
2248       type = 3;
2249       break;
2250    default:
2251       type = 0;
2252       break;
2253    }
2254 
2255    emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2256    emitX    (0x31);
2257    emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2258    emitCC   (0x2f);
2259    emitGPR  (0x27, insn->src(2));
2260    emitField(0x25, 2, type);
2261    emitGPR  (0x08, insn->src(0));
2262    emitGPR  (0x00, insn->def(0));
2263 }
2264 
2265 void
emitPOPC()2266 CodeEmitterGM107::emitPOPC()
2267 {
2268    switch (insn->src(0).getFile()) {
2269    case FILE_GPR:
2270       emitInsn(0x5c080000);
2271       emitGPR (0x14, insn->src(0));
2272       break;
2273    case FILE_MEMORY_CONST:
2274       emitInsn(0x4c080000);
2275       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2276       break;
2277    case FILE_IMMEDIATE:
2278       emitInsn(0x38080000);
2279       emitIMMD(0x14, 19, insn->src(0));
2280       break;
2281    default:
2282       assert(!"bad src1 file");
2283       break;
2284    }
2285 
2286    emitINV(0x28, insn->src(0));
2287    emitGPR(0x00, insn->def(0));
2288 }
2289 
2290 void
emitBFI()2291 CodeEmitterGM107::emitBFI()
2292 {
2293    switch(insn->src(2).getFile()) {
2294    case FILE_GPR:
2295       switch (insn->src(1).getFile()) {
2296       case FILE_GPR:
2297          emitInsn(0x5bf00000);
2298          emitGPR (0x14, insn->src(1));
2299          break;
2300       case FILE_MEMORY_CONST:
2301          emitInsn(0x4bf00000);
2302          emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2303          break;
2304       case FILE_IMMEDIATE:
2305          emitInsn(0x36f00000);
2306          emitIMMD(0x14, 19, insn->src(1));
2307          break;
2308       default:
2309          assert(!"bad src1 file");
2310          break;
2311       }
2312       emitGPR (0x27, insn->src(2));
2313       break;
2314    case FILE_MEMORY_CONST:
2315       emitInsn(0x53f00000);
2316       emitGPR (0x27, insn->src(1));
2317       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2318       break;
2319    default:
2320       assert(!"bad src2 file");
2321       break;
2322    }
2323 
2324    emitCC   (0x2f);
2325    emitGPR  (0x08, insn->src(0));
2326    emitGPR  (0x00, insn->def(0));
2327 }
2328 
2329 void
emitBFE()2330 CodeEmitterGM107::emitBFE()
2331 {
2332    switch (insn->src(1).getFile()) {
2333    case FILE_GPR:
2334       emitInsn(0x5c000000);
2335       emitGPR (0x14, insn->src(1));
2336       break;
2337    case FILE_MEMORY_CONST:
2338       emitInsn(0x4c000000);
2339       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2340       break;
2341    case FILE_IMMEDIATE:
2342       emitInsn(0x38000000);
2343       emitIMMD(0x14, 19, insn->src(1));
2344       break;
2345    default:
2346       assert(!"bad src1 file");
2347       break;
2348    }
2349 
2350    emitField(0x30, 1, isSignedType(insn->dType));
2351    emitCC   (0x2f);
2352    emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2353    emitGPR  (0x08, insn->src(0));
2354    emitGPR  (0x00, insn->def(0));
2355 }
2356 
2357 void
emitFLO()2358 CodeEmitterGM107::emitFLO()
2359 {
2360    switch (insn->src(0).getFile()) {
2361    case FILE_GPR:
2362       emitInsn(0x5c300000);
2363       emitGPR (0x14, insn->src(0));
2364       break;
2365    case FILE_MEMORY_CONST:
2366       emitInsn(0x4c300000);
2367       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2368       break;
2369    case FILE_IMMEDIATE:
2370       emitInsn(0x38300000);
2371       emitIMMD(0x14, 19, insn->src(0));
2372       break;
2373    default:
2374       assert(!"bad src1 file");
2375       break;
2376    }
2377 
2378    emitField(0x30, 1, isSignedType(insn->dType));
2379    emitCC   (0x2f);
2380    emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2381    emitINV  (0x28, insn->src(0));
2382    emitGPR  (0x00, insn->def(0));
2383 }
2384 
2385 void
emitPRMT()2386 CodeEmitterGM107::emitPRMT()
2387 {
2388    switch (insn->src(1).getFile()) {
2389    case FILE_GPR:
2390       emitInsn(0x5bc00000);
2391       emitGPR (0x14, insn->src(1));
2392       break;
2393    case FILE_MEMORY_CONST:
2394       emitInsn(0x4bc00000);
2395       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2396       break;
2397    case FILE_IMMEDIATE:
2398       emitInsn(0x36c00000);
2399       emitIMMD(0x14, 19, insn->src(1));
2400       break;
2401    default:
2402       assert(!"bad src1 file");
2403       break;
2404    }
2405 
2406    emitField(0x30, 3, insn->subOp);
2407    emitGPR  (0x27, insn->src(2));
2408    emitGPR  (0x08, insn->src(0));
2409    emitGPR  (0x00, insn->def(0));
2410 }
2411 
2412 /*******************************************************************************
2413  * memory
2414  ******************************************************************************/
2415 
2416 void
emitLDSTs(int pos,DataType type)2417 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2418 {
2419    int data = 0;
2420 
2421    switch (typeSizeof(type)) {
2422    case  1: data = isSignedType(type) ? 1 : 0; break;
2423    case  2: data = isSignedType(type) ? 3 : 2; break;
2424    case  4: data = 4; break;
2425    case  8: data = 5; break;
2426    case 16: data = 6; break;
2427    default:
2428       assert(!"bad type");
2429       break;
2430    }
2431 
2432    emitField(pos, 3, data);
2433 }
2434 
2435 void
emitLDSTc(int pos)2436 CodeEmitterGM107::emitLDSTc(int pos)
2437 {
2438    int mode = 0;
2439 
2440    switch (insn->cache) {
2441    case CACHE_CA: mode = 0; break;
2442    case CACHE_CG: mode = 1; break;
2443    case CACHE_CS: mode = 2; break;
2444    case CACHE_CV: mode = 3; break;
2445    default:
2446       assert(!"invalid caching mode");
2447       break;
2448    }
2449 
2450    emitField(pos, 2, mode);
2451 }
2452 
2453 void
emitLDC()2454 CodeEmitterGM107::emitLDC()
2455 {
2456    emitInsn (0xef900000);
2457    emitLDSTs(0x30, insn->dType);
2458    emitField(0x2c, 2, insn->subOp);
2459    emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2460    emitGPR  (0x00, insn->def(0));
2461 }
2462 
2463 void
emitLDL()2464 CodeEmitterGM107::emitLDL()
2465 {
2466    emitInsn (0xef400000);
2467    emitLDSTs(0x30, insn->dType);
2468    emitLDSTc(0x2c);
2469    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2470    emitGPR  (0x00, insn->def(0));
2471 }
2472 
2473 void
emitLDS()2474 CodeEmitterGM107::emitLDS()
2475 {
2476    emitInsn (0xef480000);
2477    emitLDSTs(0x30, insn->dType);
2478    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2479    emitGPR  (0x00, insn->def(0));
2480 }
2481 
2482 void
emitLD()2483 CodeEmitterGM107::emitLD()
2484 {
2485    emitInsn (0x80000000);
2486    emitPRED (0x3a);
2487    emitLDSTc(0x38);
2488    emitLDSTs(0x35, insn->dType);
2489    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2490    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2491    emitGPR  (0x00, insn->def(0));
2492 }
2493 
2494 void
emitSTL()2495 CodeEmitterGM107::emitSTL()
2496 {
2497    emitInsn (0xef500000);
2498    emitLDSTs(0x30, insn->dType);
2499    emitLDSTc(0x2c);
2500    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2501    emitGPR  (0x00, insn->src(1));
2502 }
2503 
2504 void
emitSTS()2505 CodeEmitterGM107::emitSTS()
2506 {
2507    emitInsn (0xef580000);
2508    emitLDSTs(0x30, insn->dType);
2509    emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2510    emitGPR  (0x00, insn->src(1));
2511 }
2512 
2513 void
emitST()2514 CodeEmitterGM107::emitST()
2515 {
2516    emitInsn (0xa0000000);
2517    emitPRED (0x3a);
2518    emitLDSTc(0x38);
2519    emitLDSTs(0x35, insn->dType);
2520    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2521    emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2522    emitGPR  (0x00, insn->src(1));
2523 }
2524 
2525 void
emitALD()2526 CodeEmitterGM107::emitALD()
2527 {
2528    emitInsn (0xefd80000);
2529    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2530    emitGPR  (0x27, insn->src(0).getIndirect(1));
2531    emitO    (0x20);
2532    emitP    (0x1f);
2533    emitADDR (0x08, 20, 10, 0, insn->src(0));
2534    emitGPR  (0x00, insn->def(0));
2535 }
2536 
2537 void
emitAST()2538 CodeEmitterGM107::emitAST()
2539 {
2540    emitInsn (0xeff00000);
2541    emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2542    emitGPR  (0x27, insn->src(0).getIndirect(1));
2543    emitP    (0x1f);
2544    emitADDR (0x08, 20, 10, 0, insn->src(0));
2545    emitGPR  (0x00, insn->src(1));
2546 }
2547 
2548 void
emitISBERD()2549 CodeEmitterGM107::emitISBERD()
2550 {
2551    emitInsn(0xefd00000);
2552    emitGPR (0x08, insn->src(0));
2553    emitGPR (0x00, insn->def(0));
2554 }
2555 
2556 void
emitAL2P()2557 CodeEmitterGM107::emitAL2P()
2558 {
2559    emitInsn (0xefa00000);
2560    emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2561    emitPRED (0x2c);
2562    emitO    (0x20);
2563    emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2564    emitGPR  (0x08, insn->src(0).getIndirect(0));
2565    emitGPR  (0x00, insn->def(0));
2566 }
2567 
2568 void
gm107_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)2569 gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2570 {
2571    int ipa = entry->ipa;
2572    int reg = entry->reg;
2573    int loc = entry->loc;
2574 
2575    if (data.flatshade &&
2576        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2577       ipa = NV50_IR_INTERP_FLAT;
2578       reg = 0xff;
2579    } else if (data.force_persample_interp &&
2580               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2581               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2582       ipa |= NV50_IR_INTERP_CENTROID;
2583    }
2584    code[loc + 1] &= ~(0xf << 0x14);
2585    code[loc + 1] |= (ipa & 0x3) << 0x16;
2586    code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2587    code[loc + 0] &= ~(0xff << 0x14);
2588    code[loc + 0] |= reg << 0x14;
2589 }
2590 
2591 void
emitIPA()2592 CodeEmitterGM107::emitIPA()
2593 {
2594    int ipam = 0, ipas = 0;
2595 
2596    switch (insn->getInterpMode()) {
2597    case NV50_IR_INTERP_LINEAR     : ipam = 0; break;
2598    case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2599    case NV50_IR_INTERP_FLAT       : ipam = 2; break;
2600    case NV50_IR_INTERP_SC         : ipam = 3; break;
2601    default:
2602       assert(!"invalid ipa mode");
2603       break;
2604    }
2605 
2606    switch (insn->getSampleMode()) {
2607    case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2608    case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2609    case NV50_IR_INTERP_OFFSET  : ipas = 2; break;
2610    default:
2611       assert(!"invalid ipa sample mode");
2612       break;
2613    }
2614 
2615    emitInsn (0xe0000000);
2616    emitField(0x36, 2, ipam);
2617    emitField(0x34, 2, ipas);
2618    emitSAT  (0x33);
2619    emitField(0x2f, 3, 7);
2620    emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2621    if ((code[0] & 0x0000ff00) != 0x0000ff00)
2622       code[1] |= 0x00000040; /* .idx */
2623    emitGPR(0x00, insn->def(0));
2624 
2625    if (insn->op == OP_PINTERP) {
2626       emitGPR(0x14, insn->src(1));
2627       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2628          emitGPR(0x27, insn->src(2));
2629       addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply);
2630    } else {
2631       if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2632          emitGPR(0x27, insn->src(1));
2633       emitGPR(0x14);
2634       addInterp(insn->ipa, 0xff, gm107_interpApply);
2635    }
2636 
2637    if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2638       emitGPR(0x27);
2639 }
2640 
2641 void
emitATOM()2642 CodeEmitterGM107::emitATOM()
2643 {
2644    unsigned dType, subOp;
2645 
2646    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2647       switch (insn->dType) {
2648       case TYPE_U32: dType = 0; break;
2649       case TYPE_U64: dType = 1; break;
2650       default: assert(!"unexpected dType"); dType = 0; break;
2651       }
2652       subOp = 15;
2653 
2654       emitInsn (0xee000000);
2655    } else {
2656       switch (insn->dType) {
2657       case TYPE_U32: dType = 0; break;
2658       case TYPE_S32: dType = 1; break;
2659       case TYPE_U64: dType = 2; break;
2660       case TYPE_F32: dType = 3; break;
2661       case TYPE_B128: dType = 4; break;
2662       case TYPE_S64: dType = 5; break;
2663       default: assert(!"unexpected dType"); dType = 0; break;
2664       }
2665       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2666          subOp = 8;
2667       else
2668          subOp = insn->subOp;
2669 
2670       emitInsn (0xed000000);
2671    }
2672 
2673    emitField(0x34, 4, subOp);
2674    emitField(0x31, 3, dType);
2675    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2676    emitGPR  (0x14, insn->src(1));
2677    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2678    emitGPR  (0x00, insn->def(0));
2679 }
2680 
2681 void
emitATOMS()2682 CodeEmitterGM107::emitATOMS()
2683 {
2684    unsigned dType, subOp;
2685 
2686    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2687       switch (insn->dType) {
2688       case TYPE_U32: dType = 0; break;
2689       case TYPE_U64: dType = 1; break;
2690       default: assert(!"unexpected dType"); dType = 0; break;
2691       }
2692       subOp = 4;
2693 
2694       emitInsn (0xee000000);
2695       emitField(0x34, 1, dType);
2696    } else {
2697       switch (insn->dType) {
2698       case TYPE_U32: dType = 0; break;
2699       case TYPE_S32: dType = 1; break;
2700       case TYPE_U64: dType = 2; break;
2701       case TYPE_S64: dType = 3; break;
2702       default: assert(!"unexpected dType"); dType = 0; break;
2703       }
2704 
2705       if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2706          subOp = 8;
2707       else
2708          subOp = insn->subOp;
2709 
2710       emitInsn (0xec000000);
2711       emitField(0x1c, 3, dType);
2712    }
2713 
2714    emitField(0x34, 4, subOp);
2715    emitGPR  (0x14, insn->src(1));
2716    emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2717    emitGPR  (0x00, insn->def(0));
2718 }
2719 
2720 void
emitRED()2721 CodeEmitterGM107::emitRED()
2722 {
2723    unsigned dType;
2724 
2725    switch (insn->dType) {
2726    case TYPE_U32: dType = 0; break;
2727    case TYPE_S32: dType = 1; break;
2728    case TYPE_U64: dType = 2; break;
2729    case TYPE_F32: dType = 3; break;
2730    case TYPE_B128: dType = 4; break;
2731    case TYPE_S64: dType = 5; break;
2732    default: assert(!"unexpected dType"); dType = 0; break;
2733    }
2734 
2735    emitInsn (0xebf80000);
2736    emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2737    emitField(0x17, 3, insn->subOp);
2738    emitField(0x14, 3, dType);
2739    emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2740    emitGPR  (0x00, insn->src(1));
2741 }
2742 
2743 void
emitCCTL()2744 CodeEmitterGM107::emitCCTL()
2745 {
2746    unsigned width;
2747    if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2748       emitInsn(0xef600000);
2749       width = 30;
2750    } else {
2751       emitInsn(0xef800000);
2752       width = 22;
2753    }
2754    emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2755    emitADDR (0x08, 0x16, width, 2, insn->src(0));
2756    emitField(0x00, 4, insn->subOp);
2757 }
2758 
2759 /*******************************************************************************
2760  * surface
2761  ******************************************************************************/
2762 
2763 void
emitPIXLD()2764 CodeEmitterGM107::emitPIXLD()
2765 {
2766    emitInsn (0xefe80000);
2767    emitPRED (0x2d);
2768    emitField(0x1f, 3, insn->subOp);
2769    emitGPR  (0x08, insn->src(0));
2770    emitGPR  (0x00, insn->def(0));
2771 }
2772 
2773 /*******************************************************************************
2774  * texture
2775  ******************************************************************************/
2776 
2777 void
emitTEXs(int pos)2778 CodeEmitterGM107::emitTEXs(int pos)
2779 {
2780    int src1 = insn->predSrc == 1 ? 2 : 1;
2781    if (insn->srcExists(src1))
2782       emitGPR(pos, insn->src(src1));
2783    else
2784       emitGPR(pos);
2785 }
2786 
2787 static uint8_t
getTEXSMask(uint8_t mask)2788 getTEXSMask(uint8_t mask)
2789 {
2790    switch (mask) {
2791    case 0x1: return 0x0;
2792    case 0x2: return 0x1;
2793    case 0x3: return 0x4;
2794    case 0x4: return 0x2;
2795    case 0x7: return 0x0;
2796    case 0x8: return 0x3;
2797    case 0x9: return 0x5;
2798    case 0xa: return 0x6;
2799    case 0xb: return 0x1;
2800    case 0xc: return 0x7;
2801    case 0xd: return 0x2;
2802    case 0xe: return 0x3;
2803    case 0xf: return 0x4;
2804    default:
2805       assert(!"invalid mask");
2806       return 0;
2807    }
2808 }
2809 
2810 static uint8_t
getTEXSTarget(const TexInstruction * tex)2811 getTEXSTarget(const TexInstruction *tex)
2812 {
2813    assert(tex->op == OP_TEX || tex->op == OP_TXL);
2814 
2815    switch (tex->tex.target.getEnum()) {
2816    case TEX_TARGET_1D:
2817       assert(tex->tex.levelZero);
2818       return 0x0;
2819    case TEX_TARGET_2D:
2820    case TEX_TARGET_RECT:
2821       if (tex->tex.levelZero)
2822          return 0x2;
2823       if (tex->op == OP_TXL)
2824          return 0x3;
2825       return 0x1;
2826    case TEX_TARGET_2D_SHADOW:
2827    case TEX_TARGET_RECT_SHADOW:
2828       if (tex->tex.levelZero)
2829          return 0x6;
2830       if (tex->op == OP_TXL)
2831          return 0x5;
2832       return 0x4;
2833    case TEX_TARGET_2D_ARRAY:
2834       if (tex->tex.levelZero)
2835          return 0x8;
2836       return 0x7;
2837    case TEX_TARGET_2D_ARRAY_SHADOW:
2838       assert(tex->tex.levelZero);
2839       return 0x9;
2840    case TEX_TARGET_3D:
2841       if (tex->tex.levelZero)
2842          return 0xb;
2843       assert(tex->op != OP_TXL);
2844       return 0xa;
2845    case TEX_TARGET_CUBE:
2846       assert(!tex->tex.levelZero);
2847       if (tex->op == OP_TXL)
2848          return 0xd;
2849       return 0xc;
2850    default:
2851       assert(false);
2852       return 0x0;
2853    }
2854 }
2855 
2856 static uint8_t
getTLDSTarget(const TexInstruction * tex)2857 getTLDSTarget(const TexInstruction *tex)
2858 {
2859    switch (tex->tex.target.getEnum()) {
2860    case TEX_TARGET_1D:
2861       if (tex->tex.levelZero)
2862          return 0x0;
2863       return 0x1;
2864    case TEX_TARGET_2D:
2865    case TEX_TARGET_RECT:
2866       if (tex->tex.levelZero)
2867          return tex->tex.useOffsets ? 0x4 : 0x2;
2868       return tex->tex.useOffsets ? 0xc : 0x5;
2869    case TEX_TARGET_2D_MS:
2870       assert(tex->tex.levelZero);
2871       return 0x6;
2872    case TEX_TARGET_3D:
2873       assert(tex->tex.levelZero);
2874       return 0x7;
2875    case TEX_TARGET_2D_ARRAY:
2876       assert(tex->tex.levelZero);
2877       return 0x8;
2878 
2879    default:
2880       assert(false);
2881       return 0x0;
2882    }
2883 }
2884 
2885 void
emitTEX()2886 CodeEmitterGM107::emitTEX()
2887 {
2888    const TexInstruction *insn = this->insn->asTex();
2889    int lodm = 0;
2890 
2891    if (!insn->tex.levelZero) {
2892       switch (insn->op) {
2893       case OP_TEX: lodm = 0; break;
2894       case OP_TXB: lodm = 2; break;
2895       case OP_TXL: lodm = 3; break;
2896       default:
2897          assert(!"invalid tex op");
2898          break;
2899       }
2900    } else {
2901       lodm = 1;
2902    }
2903 
2904    if (insn->tex.rIndirectSrc >= 0) {
2905       emitInsn (0xdeb80000);
2906       emitField(0x25, 2, lodm);
2907       emitField(0x24, 1, insn->tex.useOffsets == 1);
2908    } else {
2909       emitInsn (0xc0380000);
2910       emitField(0x37, 2, lodm);
2911       emitField(0x36, 1, insn->tex.useOffsets == 1);
2912       emitField(0x24, 13, insn->tex.r);
2913    }
2914 
2915    emitField(0x32, 1, insn->tex.target.isShadow());
2916    emitField(0x31, 1, insn->tex.liveOnly);
2917    emitField(0x23, 1, insn->tex.derivAll);
2918    emitField(0x1f, 4, insn->tex.mask);
2919    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2920                       insn->tex.target.getDim() - 1);
2921    emitField(0x1c, 1, insn->tex.target.isArray());
2922    emitTEXs (0x14);
2923    emitGPR  (0x08, insn->src(0));
2924    emitGPR  (0x00, insn->def(0));
2925 }
2926 
2927 void
emitTEXS()2928 CodeEmitterGM107::emitTEXS()
2929 {
2930    const TexInstruction *insn = this->insn->asTex();
2931    assert(!insn->tex.derivAll);
2932 
2933    switch (insn->op) {
2934    case OP_TEX:
2935    case OP_TXL:
2936       emitInsn (0xd8000000);
2937       emitField(0x35, 4, getTEXSTarget(insn));
2938       emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2939       break;
2940    case OP_TXF:
2941       emitInsn (0xda000000);
2942       emitField(0x35, 4, getTLDSTarget(insn));
2943       emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2944       break;
2945    case OP_TXG:
2946       assert(insn->tex.useOffsets != 4);
2947       emitInsn (0xdf000000);
2948       emitField(0x34, 2, insn->tex.gatherComp);
2949       emitField(0x33, 1, insn->tex.useOffsets == 1);
2950       emitField(0x32, 1, insn->tex.target.isShadow());
2951       break;
2952    default:
2953       unreachable("unknown op in emitTEXS()");
2954       break;
2955    }
2956 
2957    emitField(0x31, 1, insn->tex.liveOnly);
2958    emitField(0x24, 13, insn->tex.r);
2959    if (insn->defExists(1))
2960       emitGPR(0x1c, insn->def(1));
2961    else
2962       emitGPR(0x1c);
2963    if (insn->srcExists(1))
2964       emitGPR(0x14, insn->getSrc(1));
2965    else
2966       emitGPR(0x14);
2967    emitGPR  (0x08, insn->src(0));
2968    emitGPR  (0x00, insn->def(0));
2969 }
2970 
2971 void
emitTLD()2972 CodeEmitterGM107::emitTLD()
2973 {
2974    const TexInstruction *insn = this->insn->asTex();
2975 
2976    if (insn->tex.rIndirectSrc >= 0) {
2977       emitInsn (0xdd380000);
2978    } else {
2979       emitInsn (0xdc380000);
2980       emitField(0x24, 13, insn->tex.r);
2981    }
2982 
2983    emitField(0x37, 1, insn->tex.levelZero == 0);
2984    emitField(0x32, 1, insn->tex.target.isMS());
2985    emitField(0x31, 1, insn->tex.liveOnly);
2986    emitField(0x23, 1, insn->tex.useOffsets == 1);
2987    emitField(0x1f, 4, insn->tex.mask);
2988    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2989                       insn->tex.target.getDim() - 1);
2990    emitField(0x1c, 1, insn->tex.target.isArray());
2991    emitTEXs (0x14);
2992    emitGPR  (0x08, insn->src(0));
2993    emitGPR  (0x00, insn->def(0));
2994 }
2995 
2996 void
emitTLD4()2997 CodeEmitterGM107::emitTLD4()
2998 {
2999    const TexInstruction *insn = this->insn->asTex();
3000 
3001    if (insn->tex.rIndirectSrc >= 0) {
3002       emitInsn (0xdef80000);
3003       emitField(0x26, 2, insn->tex.gatherComp);
3004       emitField(0x25, 2, insn->tex.useOffsets == 4);
3005       emitField(0x24, 2, insn->tex.useOffsets == 1);
3006    } else {
3007       emitInsn (0xc8380000);
3008       emitField(0x38, 2, insn->tex.gatherComp);
3009       emitField(0x37, 2, insn->tex.useOffsets == 4);
3010       emitField(0x36, 2, insn->tex.useOffsets == 1);
3011       emitField(0x24, 13, insn->tex.r);
3012    }
3013 
3014    emitField(0x32, 1, insn->tex.target.isShadow());
3015    emitField(0x31, 1, insn->tex.liveOnly);
3016    emitField(0x23, 1, insn->tex.derivAll);
3017    emitField(0x1f, 4, insn->tex.mask);
3018    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3019                       insn->tex.target.getDim() - 1);
3020    emitField(0x1c, 1, insn->tex.target.isArray());
3021    emitTEXs (0x14);
3022    emitGPR  (0x08, insn->src(0));
3023    emitGPR  (0x00, insn->def(0));
3024 }
3025 
3026 void
emitTXD()3027 CodeEmitterGM107::emitTXD()
3028 {
3029    const TexInstruction *insn = this->insn->asTex();
3030 
3031    if (insn->tex.rIndirectSrc >= 0) {
3032       emitInsn (0xde780000);
3033    } else {
3034       emitInsn (0xde380000);
3035       emitField(0x24, 13, insn->tex.r);
3036    }
3037 
3038    emitField(0x31, 1, insn->tex.liveOnly);
3039    emitField(0x23, 1, insn->tex.useOffsets == 1);
3040    emitField(0x1f, 4, insn->tex.mask);
3041    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3042                       insn->tex.target.getDim() - 1);
3043    emitField(0x1c, 1, insn->tex.target.isArray());
3044    emitTEXs (0x14);
3045    emitGPR  (0x08, insn->src(0));
3046    emitGPR  (0x00, insn->def(0));
3047 }
3048 
3049 void
emitTMML()3050 CodeEmitterGM107::emitTMML()
3051 {
3052    const TexInstruction *insn = this->insn->asTex();
3053 
3054    if (insn->tex.rIndirectSrc >= 0) {
3055       emitInsn (0xdf600000);
3056    } else {
3057       emitInsn (0xdf580000);
3058       emitField(0x24, 13, insn->tex.r);
3059    }
3060 
3061    emitField(0x31, 1, insn->tex.liveOnly);
3062    emitField(0x23, 1, insn->tex.derivAll);
3063    emitField(0x1f, 4, insn->tex.mask);
3064    emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3065                       insn->tex.target.getDim() - 1);
3066    emitField(0x1c, 1, insn->tex.target.isArray());
3067    emitTEXs (0x14);
3068    emitGPR  (0x08, insn->src(0));
3069    emitGPR  (0x00, insn->def(0));
3070 }
3071 
3072 void
emitTXQ()3073 CodeEmitterGM107::emitTXQ()
3074 {
3075    const TexInstruction *insn = this->insn->asTex();
3076    int type = 0;
3077 
3078    switch (insn->tex.query) {
3079    case TXQ_DIMS           : type = 0x01; break;
3080    case TXQ_TYPE           : type = 0x02; break;
3081    case TXQ_SAMPLE_POSITION: type = 0x05; break;
3082    case TXQ_FILTER         : type = 0x10; break;
3083    case TXQ_LOD            : type = 0x12; break;
3084    case TXQ_WRAP           : type = 0x14; break;
3085    case TXQ_BORDER_COLOUR  : type = 0x16; break;
3086    default:
3087       assert(!"invalid txq query");
3088       break;
3089    }
3090 
3091    if (insn->tex.rIndirectSrc >= 0) {
3092       emitInsn (0xdf500000);
3093    } else {
3094       emitInsn (0xdf480000);
3095       emitField(0x24, 13, insn->tex.r);
3096    }
3097 
3098    emitField(0x31, 1, insn->tex.liveOnly);
3099    emitField(0x1f, 4, insn->tex.mask);
3100    emitField(0x16, 6, type);
3101    emitGPR  (0x08, insn->src(0));
3102    emitGPR  (0x00, insn->def(0));
3103 }
3104 
3105 void
emitDEPBAR()3106 CodeEmitterGM107::emitDEPBAR()
3107 {
3108    emitInsn (0xf0f00000);
3109    emitField(0x1d, 1, 1); /* le */
3110    emitField(0x1a, 3, 5);
3111    emitField(0x14, 6, insn->subOp);
3112    emitField(0x00, 6, insn->subOp);
3113 }
3114 
3115 /*******************************************************************************
3116  * misc
3117  ******************************************************************************/
3118 
3119 void
emitNOP()3120 CodeEmitterGM107::emitNOP()
3121 {
3122    emitInsn(0x50b00000);
3123 }
3124 
3125 void
emitKIL()3126 CodeEmitterGM107::emitKIL()
3127 {
3128    emitInsn (0xe3300000);
3129    emitCond5(0x00, CC_TR);
3130 }
3131 
3132 void
emitOUT()3133 CodeEmitterGM107::emitOUT()
3134 {
3135    const int cut  = insn->op == OP_RESTART || insn->subOp;
3136    const int emit = insn->op == OP_EMIT;
3137 
3138    switch (insn->src(1).getFile()) {
3139    case FILE_GPR:
3140       emitInsn(0xfbe00000);
3141       emitGPR (0x14, insn->src(1));
3142       break;
3143    case FILE_IMMEDIATE:
3144       emitInsn(0xf6e00000);
3145       emitIMMD(0x14, 19, insn->src(1));
3146       break;
3147    case FILE_MEMORY_CONST:
3148       emitInsn(0xebe00000);
3149       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3150       break;
3151    default:
3152       assert(!"bad src1 file");
3153       break;
3154    }
3155 
3156    emitField(0x27, 2, (cut << 1) | emit);
3157    emitGPR  (0x08, insn->src(0));
3158    emitGPR  (0x00, insn->def(0));
3159 }
3160 
3161 void
emitBAR()3162 CodeEmitterGM107::emitBAR()
3163 {
3164    uint8_t subop;
3165 
3166    emitInsn (0xf0a80000);
3167 
3168    switch (insn->subOp) {
3169    case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3170    case NV50_IR_SUBOP_BAR_RED_AND:  subop = 0x0a; break;
3171    case NV50_IR_SUBOP_BAR_RED_OR:   subop = 0x12; break;
3172    case NV50_IR_SUBOP_BAR_ARRIVE:   subop = 0x81; break;
3173    default:
3174       subop = 0x80;
3175       assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3176       break;
3177    }
3178 
3179    emitField(0x20, 8, subop);
3180 
3181    // barrier id
3182    if (insn->src(0).getFile() == FILE_GPR) {
3183       emitGPR(0x08, insn->src(0));
3184    } else {
3185       ImmediateValue *imm = insn->getSrc(0)->asImm();
3186       assert(imm);
3187       emitField(0x08, 8, imm->reg.data.u32);
3188       emitField(0x2b, 1, 1);
3189    }
3190 
3191    // thread count
3192    if (insn->src(1).getFile() == FILE_GPR) {
3193       emitGPR(0x14, insn->src(1));
3194    } else {
3195       ImmediateValue *imm = insn->getSrc(0)->asImm();
3196       assert(imm);
3197       emitField(0x14, 12, imm->reg.data.u32);
3198       emitField(0x2c, 1, 1);
3199    }
3200 
3201    if (insn->srcExists(2) && (insn->predSrc != 2)) {
3202       emitPRED (0x27, insn->src(2));
3203       emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3204    } else {
3205       emitField(0x27, 3, 7);
3206    }
3207 }
3208 
3209 void
emitMEMBAR()3210 CodeEmitterGM107::emitMEMBAR()
3211 {
3212    emitInsn (0xef980000);
3213    emitField(0x08, 2, insn->subOp >> 2);
3214 }
3215 
3216 void
emitVOTE()3217 CodeEmitterGM107::emitVOTE()
3218 {
3219    const ImmediateValue *imm;
3220    uint32_t u32;
3221 
3222    int r = -1, p = -1;
3223    for (int i = 0; insn->defExists(i); i++) {
3224       if (insn->def(i).getFile() == FILE_GPR)
3225          r = i;
3226       else if (insn->def(i).getFile() == FILE_PREDICATE)
3227          p = i;
3228    }
3229 
3230    emitInsn (0x50d80000);
3231    emitField(0x30, 2, insn->subOp);
3232    if (r >= 0)
3233       emitGPR  (0x00, insn->def(r));
3234    else
3235       emitGPR  (0x00);
3236    if (p >= 0)
3237       emitPRED (0x2d, insn->def(p));
3238    else
3239       emitPRED (0x2d);
3240 
3241    switch (insn->src(0).getFile()) {
3242    case FILE_PREDICATE:
3243       emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3244       emitPRED (0x27, insn->src(0));
3245       break;
3246    case FILE_IMMEDIATE:
3247       imm = insn->getSrc(0)->asImm();
3248       assert(imm);
3249       u32 = imm->reg.data.u32;
3250       assert(u32 == 0 || u32 == 1);
3251       emitPRED(0x27);
3252       emitField(0x2a, 1, u32 == 0);
3253       break;
3254    default:
3255       assert(!"Unhandled src");
3256       break;
3257    }
3258 }
3259 
3260 void
emitSUTarget()3261 CodeEmitterGM107::emitSUTarget()
3262 {
3263    const TexInstruction *insn = this->insn->asTex();
3264    int target = 0;
3265 
3266    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3267 
3268    if (insn->tex.target == TEX_TARGET_BUFFER) {
3269       target = 2;
3270    } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3271       target = 4;
3272    } else if (insn->tex.target == TEX_TARGET_2D ||
3273               insn->tex.target == TEX_TARGET_RECT) {
3274       target = 6;
3275    } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3276               insn->tex.target == TEX_TARGET_CUBE ||
3277               insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3278       target = 8;
3279    } else if (insn->tex.target == TEX_TARGET_3D) {
3280       target = 10;
3281    } else {
3282       assert(insn->tex.target == TEX_TARGET_1D);
3283    }
3284    emitField(0x20, 4, target);
3285 }
3286 
3287 void
emitSUHandle(const int s)3288 CodeEmitterGM107::emitSUHandle(const int s)
3289 {
3290    const TexInstruction *insn = this->insn->asTex();
3291 
3292    assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3293 
3294    if (insn->src(s).getFile() == FILE_GPR) {
3295       emitGPR(0x27, insn->src(s));
3296    } else {
3297       ImmediateValue *imm = insn->getSrc(s)->asImm();
3298       assert(imm);
3299       emitField(0x33, 1, 1);
3300       emitField(0x24, 13, imm->reg.data.u32);
3301    }
3302 }
3303 
3304 void
emitSUSTx()3305 CodeEmitterGM107::emitSUSTx()
3306 {
3307    const TexInstruction *insn = this->insn->asTex();
3308 
3309    emitInsn(0xeb200000);
3310    if (insn->op == OP_SUSTB)
3311       emitField(0x34, 1, 1);
3312    emitSUTarget();
3313 
3314    emitLDSTc(0x18);
3315    emitField(0x14, 4, 0xf); // rgba
3316    emitGPR  (0x08, insn->src(0));
3317    emitGPR  (0x00, insn->src(1));
3318 
3319    emitSUHandle(2);
3320 }
3321 
3322 void
emitSULDx()3323 CodeEmitterGM107::emitSULDx()
3324 {
3325    const TexInstruction *insn = this->insn->asTex();
3326    int type = 0;
3327 
3328    emitInsn(0xeb000000);
3329    if (insn->op == OP_SULDB)
3330       emitField(0x34, 1, 1);
3331    emitSUTarget();
3332 
3333    switch (insn->dType) {
3334    case TYPE_S8:   type = 1; break;
3335    case TYPE_U16:  type = 2; break;
3336    case TYPE_S16:  type = 3; break;
3337    case TYPE_U32:  type = 4; break;
3338    case TYPE_U64:  type = 5; break;
3339    case TYPE_B128: type = 6; break;
3340    default:
3341       assert(insn->dType == TYPE_U8);
3342       break;
3343    }
3344    emitLDSTc(0x18);
3345    emitField(0x14, 3, type);
3346    emitGPR  (0x00, insn->def(0));
3347    emitGPR  (0x08, insn->src(0));
3348 
3349    emitSUHandle(1);
3350 }
3351 
3352 void
emitSUREDx()3353 CodeEmitterGM107::emitSUREDx()
3354 {
3355    const TexInstruction *insn = this->insn->asTex();
3356    uint8_t type = 0, subOp;
3357 
3358    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3359       emitInsn(0xeac00000);
3360    else
3361       emitInsn(0xea600000);
3362 
3363    if (insn->op == OP_SUREDB)
3364       emitField(0x34, 1, 1);
3365    emitSUTarget();
3366 
3367    // destination type
3368    switch (insn->dType) {
3369    case TYPE_S32: type = 1; break;
3370    case TYPE_U64: type = 2; break;
3371    case TYPE_F32: type = 3; break;
3372    case TYPE_S64: type = 5; break;
3373    default:
3374       assert(insn->dType == TYPE_U32);
3375       break;
3376    }
3377 
3378    // atomic operation
3379    if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3380       subOp = 0;
3381    } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3382       subOp = 8;
3383    } else {
3384       subOp = insn->subOp;
3385    }
3386 
3387    emitField(0x24, 3, type);
3388    emitField(0x1d, 4, subOp);
3389    emitGPR  (0x14, insn->src(1));
3390    emitGPR  (0x08, insn->src(0));
3391    emitGPR  (0x00, insn->def(0));
3392 
3393    emitSUHandle(2);
3394 }
3395 
3396 /*******************************************************************************
3397  * assembler front-end
3398  ******************************************************************************/
3399 
3400 bool
emitInstruction(Instruction * i)3401 CodeEmitterGM107::emitInstruction(Instruction *i)
3402 {
3403    const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3404    bool ret = true;
3405 
3406    insn = i;
3407 
3408    if (insn->encSize != 8) {
3409       ERROR("skipping undecodable instruction: "); insn->print();
3410       return false;
3411    } else
3412    if (codeSize + size > codeSizeLimit) {
3413       ERROR("code emitter output buffer too small\n");
3414       return false;
3415    }
3416 
3417    if (writeIssueDelays) {
3418       int n = ((codeSize & 0x1f) / 8) - 1;
3419       if (n < 0) {
3420          data = code;
3421          data[0] = 0x00000000;
3422          data[1] = 0x00000000;
3423          code += 2;
3424          codeSize += 8;
3425          n++;
3426       }
3427 
3428       emitField(data, n * 21, 21, insn->sched);
3429    }
3430 
3431    switch (insn->op) {
3432    case OP_EXIT:
3433       emitEXIT();
3434       break;
3435    case OP_BRA:
3436       emitBRA();
3437       break;
3438    case OP_CALL:
3439       emitCAL();
3440       break;
3441    case OP_PRECONT:
3442       emitPCNT();
3443       break;
3444    case OP_CONT:
3445       emitCONT();
3446       break;
3447    case OP_PREBREAK:
3448       emitPBK();
3449       break;
3450    case OP_BREAK:
3451       emitBRK();
3452       break;
3453    case OP_PRERET:
3454       emitPRET();
3455       break;
3456    case OP_RET:
3457       emitRET();
3458       break;
3459    case OP_JOINAT:
3460       emitSSY();
3461       break;
3462    case OP_JOIN:
3463       emitSYNC();
3464       break;
3465    case OP_QUADON:
3466       emitSAM();
3467       break;
3468    case OP_QUADPOP:
3469       emitRAM();
3470       break;
3471    case OP_MOV:
3472       emitMOV();
3473       break;
3474    case OP_RDSV:
3475       if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3476          emitCS2R();
3477       else
3478          emitS2R();
3479       break;
3480    case OP_ABS:
3481    case OP_NEG:
3482    case OP_SAT:
3483    case OP_FLOOR:
3484    case OP_CEIL:
3485    case OP_TRUNC:
3486    case OP_CVT:
3487       if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3488                                  insn->src(0).getFile() == FILE_PREDICATE)) {
3489          emitMOV();
3490       } else if (isFloatType(insn->dType)) {
3491          if (isFloatType(insn->sType))
3492             emitF2F();
3493          else
3494             emitI2F();
3495       } else {
3496          if (isFloatType(insn->sType))
3497             emitF2I();
3498          else
3499             emitI2I();
3500       }
3501       break;
3502    case OP_SHFL:
3503       emitSHFL();
3504       break;
3505    case OP_ADD:
3506    case OP_SUB:
3507       if (isFloatType(insn->dType)) {
3508          if (insn->dType == TYPE_F64)
3509             emitDADD();
3510          else
3511             emitFADD();
3512       } else {
3513          emitIADD();
3514       }
3515       break;
3516    case OP_MUL:
3517       if (isFloatType(insn->dType)) {
3518          if (insn->dType == TYPE_F64)
3519             emitDMUL();
3520          else
3521             emitFMUL();
3522       } else {
3523          emitIMUL();
3524       }
3525       break;
3526    case OP_MAD:
3527    case OP_FMA:
3528       if (isFloatType(insn->dType)) {
3529          if (insn->dType == TYPE_F64)
3530             emitDFMA();
3531          else
3532             emitFFMA();
3533       } else {
3534          emitIMAD();
3535       }
3536       break;
3537    case OP_SHLADD:
3538       emitISCADD();
3539       break;
3540    case OP_XMAD:
3541       emitXMAD();
3542       break;
3543    case OP_MIN:
3544    case OP_MAX:
3545       if (isFloatType(insn->dType)) {
3546          if (insn->dType == TYPE_F64)
3547             emitDMNMX();
3548          else
3549             emitFMNMX();
3550       } else {
3551          emitIMNMX();
3552       }
3553       break;
3554    case OP_SHL:
3555       if (typeSizeof(insn->sType) == 8)
3556          emitSHF();
3557       else
3558          emitSHL();
3559       break;
3560    case OP_SHR:
3561       if (typeSizeof(insn->sType) == 8)
3562          emitSHF();
3563       else
3564          emitSHR();
3565       break;
3566    case OP_POPCNT:
3567       emitPOPC();
3568       break;
3569    case OP_INSBF:
3570       emitBFI();
3571       break;
3572    case OP_EXTBF:
3573       emitBFE();
3574       break;
3575    case OP_BFIND:
3576       emitFLO();
3577       break;
3578    case OP_PERMT:
3579       emitPRMT();
3580       break;
3581    case OP_SLCT:
3582       if (isFloatType(insn->dType))
3583          emitFCMP();
3584       else
3585          emitICMP();
3586       break;
3587    case OP_SET:
3588    case OP_SET_AND:
3589    case OP_SET_OR:
3590    case OP_SET_XOR:
3591       if (insn->def(0).getFile() != FILE_PREDICATE) {
3592          if (isFloatType(insn->sType))
3593             if (insn->sType == TYPE_F64)
3594                emitDSET();
3595             else
3596                emitFSET();
3597          else
3598             emitISET();
3599       } else {
3600          if (isFloatType(insn->sType))
3601             if (insn->sType == TYPE_F64)
3602                emitDSETP();
3603             else
3604                emitFSETP();
3605          else
3606             emitISETP();
3607       }
3608       break;
3609    case OP_SELP:
3610       emitSEL();
3611       break;
3612    case OP_PRESIN:
3613    case OP_PREEX2:
3614       emitRRO();
3615       break;
3616    case OP_COS:
3617    case OP_SIN:
3618    case OP_EX2:
3619    case OP_LG2:
3620    case OP_RCP:
3621    case OP_RSQ:
3622    case OP_SQRT:
3623       emitMUFU();
3624       break;
3625    case OP_AND:
3626    case OP_OR:
3627    case OP_XOR:
3628       switch (insn->def(0).getFile()) {
3629       case FILE_GPR: emitLOP(); break;
3630       case FILE_PREDICATE: emitPSETP(); break;
3631       default:
3632          assert(!"invalid bool op");
3633       }
3634       break;
3635    case OP_NOT:
3636       emitNOT();
3637       break;
3638    case OP_LOAD:
3639       switch (insn->src(0).getFile()) {
3640       case FILE_MEMORY_CONST : emitLDC(); break;
3641       case FILE_MEMORY_LOCAL : emitLDL(); break;
3642       case FILE_MEMORY_SHARED: emitLDS(); break;
3643       case FILE_MEMORY_GLOBAL: emitLD(); break;
3644       default:
3645          assert(!"invalid load");
3646          emitNOP();
3647          break;
3648       }
3649       break;
3650    case OP_STORE:
3651       switch (insn->src(0).getFile()) {
3652       case FILE_MEMORY_LOCAL : emitSTL(); break;
3653       case FILE_MEMORY_SHARED: emitSTS(); break;
3654       case FILE_MEMORY_GLOBAL: emitST(); break;
3655       default:
3656          assert(!"invalid store");
3657          emitNOP();
3658          break;
3659       }
3660       break;
3661    case OP_ATOM:
3662       if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3663          emitATOMS();
3664       else
3665          if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3666             emitRED();
3667          else
3668             emitATOM();
3669       break;
3670    case OP_CCTL:
3671       emitCCTL();
3672       break;
3673    case OP_VFETCH:
3674       emitALD();
3675       break;
3676    case OP_EXPORT:
3677       emitAST();
3678       break;
3679    case OP_PFETCH:
3680       emitISBERD();
3681       break;
3682    case OP_AFETCH:
3683       emitAL2P();
3684       break;
3685    case OP_LINTERP:
3686    case OP_PINTERP:
3687       emitIPA();
3688       break;
3689    case OP_PIXLD:
3690       emitPIXLD();
3691       break;
3692    case OP_TEX:
3693    case OP_TXL:
3694       if (insn->asTex()->tex.scalar)
3695          emitTEXS();
3696       else
3697          emitTEX();
3698       break;
3699    case OP_TXB:
3700       emitTEX();
3701       break;
3702    case OP_TXF:
3703       if (insn->asTex()->tex.scalar)
3704          emitTEXS();
3705       else
3706          emitTLD();
3707       break;
3708    case OP_TXG:
3709       if (insn->asTex()->tex.scalar)
3710          emitTEXS();
3711       else
3712          emitTLD4();
3713       break;
3714    case OP_TXD:
3715       emitTXD();
3716       break;
3717    case OP_TXQ:
3718       emitTXQ();
3719       break;
3720    case OP_TXLQ:
3721       emitTMML();
3722       break;
3723    case OP_TEXBAR:
3724       emitDEPBAR();
3725       break;
3726    case OP_QUADOP:
3727       emitFSWZADD();
3728       break;
3729    case OP_NOP:
3730       emitNOP();
3731       break;
3732    case OP_DISCARD:
3733       emitKIL();
3734       break;
3735    case OP_EMIT:
3736    case OP_RESTART:
3737       emitOUT();
3738       break;
3739    case OP_BAR:
3740       emitBAR();
3741       break;
3742    case OP_MEMBAR:
3743       emitMEMBAR();
3744       break;
3745    case OP_VOTE:
3746       emitVOTE();
3747       break;
3748    case OP_SUSTB:
3749    case OP_SUSTP:
3750       emitSUSTx();
3751       break;
3752    case OP_SULDB:
3753    case OP_SULDP:
3754       emitSULDx();
3755       break;
3756    case OP_SUREDB:
3757    case OP_SUREDP:
3758       emitSUREDx();
3759       break;
3760    default:
3761       assert(!"invalid opcode");
3762       emitNOP();
3763       ret = false;
3764       break;
3765    }
3766 
3767    if (insn->join) {
3768       /*XXX*/
3769    }
3770 
3771    code += 2;
3772    codeSize += 8;
3773    return ret;
3774 }
3775 
3776 uint32_t
getMinEncodingSize(const Instruction * i) const3777 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3778 {
3779    return 8;
3780 }
3781 
3782 /*******************************************************************************
3783  * sched data calculator
3784  ******************************************************************************/
3785 
3786 inline void
emitStall(Instruction * insn,uint8_t cnt)3787 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3788 {
3789    assert(cnt < 16);
3790    insn->sched |= cnt;
3791 }
3792 
3793 inline void
emitYield(Instruction * insn)3794 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3795 {
3796    insn->sched |= 1 << 4;
3797 }
3798 
3799 inline void
emitWrDepBar(Instruction * insn,uint8_t id)3800 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3801 {
3802    assert(id < 6);
3803    if ((insn->sched & 0xe0) == 0xe0)
3804       insn->sched ^= 0xe0;
3805    insn->sched |= id << 5;
3806 }
3807 
3808 inline void
emitRdDepBar(Instruction * insn,uint8_t id)3809 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3810 {
3811    assert(id < 6);
3812    if ((insn->sched & 0x700) == 0x700)
3813       insn->sched ^= 0x700;
3814    insn->sched |= id << 8;
3815 }
3816 
3817 inline void
emitWtDepBar(Instruction * insn,uint8_t id)3818 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3819 {
3820    assert(id < 6);
3821    insn->sched |= 1 << (11 + id);
3822 }
3823 
3824 inline void
emitReuse(Instruction * insn,uint8_t id)3825 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3826 {
3827    assert(id < 4);
3828    insn->sched |= 1 << (17 + id);
3829 }
3830 
3831 inline void
printSchedInfo(int cycle,const Instruction * insn) const3832 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3833                                          const Instruction *insn) const
3834 {
3835    uint8_t st, yl, wr, rd, wt, ru;
3836 
3837    st = (insn->sched & 0x00000f) >> 0;
3838    yl = (insn->sched & 0x000010) >> 4;
3839    wr = (insn->sched & 0x0000e0) >> 5;
3840    rd = (insn->sched & 0x000700) >> 8;
3841    wt = (insn->sched & 0x01f800) >> 11;
3842    ru = (insn->sched & 0x1e0000) >> 17;
3843 
3844    INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3845         cycle, st, yl, wr, rd, wt, ru);
3846 }
3847 
3848 inline int
getStall(const Instruction * insn) const3849 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3850 {
3851    return insn->sched & 0xf;
3852 }
3853 
3854 inline int
getWrDepBar(const Instruction * insn) const3855 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3856 {
3857    return (insn->sched & 0x0000e0) >> 5;
3858 }
3859 
3860 inline int
getRdDepBar(const Instruction * insn) const3861 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3862 {
3863    return (insn->sched & 0x000700) >> 8;
3864 }
3865 
3866 inline int
getWtDepBar(const Instruction * insn) const3867 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3868 {
3869    return (insn->sched & 0x01f800) >> 11;
3870 }
3871 
3872 // Emit the reuse flag which allows to make use of the new memory hierarchy
3873 // introduced since Maxwell, the operand reuse cache.
3874 //
3875 // It allows to reduce bank conflicts by caching operands. Each time you issue
3876 // an instruction, that flag can tell the hw which operands are going to be
3877 // re-used by the next instruction. Note that the next instruction has to use
3878 // the same GPR id in the same operand slot.
3879 void
setReuseFlag(Instruction * insn)3880 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3881 {
3882    Instruction *next = insn->next;
3883    BitSet defs(255, true);
3884 
3885    if (!targ->isReuseSupported(insn))
3886       return;
3887 
3888    for (int d = 0; insn->defExists(d); ++d) {
3889       const Value *def = insn->def(d).rep();
3890       if (insn->def(d).getFile() != FILE_GPR)
3891          continue;
3892       if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3893          continue;
3894       defs.set(def->reg.data.id);
3895    }
3896 
3897    for (int s = 0; insn->srcExists(s); s++) {
3898       const Value *src = insn->src(s).rep();
3899       if (insn->src(s).getFile() != FILE_GPR)
3900          continue;
3901       if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3902          continue;
3903       if (defs.test(src->reg.data.id))
3904          continue;
3905       if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3906          continue;
3907       if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3908          continue;
3909       assert(s < 4);
3910       emitReuse(insn, s);
3911    }
3912 }
3913 
3914 void
recordWr(const Value * v,int cycle,int ready)3915 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3916 {
3917    int a = v->reg.data.id, b;
3918 
3919    switch (v->reg.file) {
3920    case FILE_GPR:
3921       b = a + v->reg.size / 4;
3922       for (int r = a; r < b; ++r)
3923          score->rd.r[r] = ready;
3924       break;
3925    case FILE_PREDICATE:
3926       // To immediately use a predicate set by any instructions, the minimum
3927       // number of stall counts is 13.
3928       score->rd.p[a] = cycle + 13;
3929       break;
3930    case FILE_FLAGS:
3931       score->rd.c = ready;
3932       break;
3933    default:
3934       break;
3935    }
3936 }
3937 
3938 void
checkRd(const Value * v,int cycle,int & delay) const3939 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3940 {
3941    int a = v->reg.data.id, b;
3942    int ready = cycle;
3943 
3944    switch (v->reg.file) {
3945    case FILE_GPR:
3946       b = a + v->reg.size / 4;
3947       for (int r = a; r < b; ++r)
3948          ready = MAX2(ready, score->rd.r[r]);
3949       break;
3950    case FILE_PREDICATE:
3951       ready = MAX2(ready, score->rd.p[a]);
3952       break;
3953    case FILE_FLAGS:
3954       ready = MAX2(ready, score->rd.c);
3955       break;
3956    default:
3957       break;
3958    }
3959    if (cycle < ready)
3960       delay = MAX2(delay, ready - cycle);
3961 }
3962 
3963 void
commitInsn(const Instruction * insn,int cycle)3964 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3965 {
3966    const int ready = cycle + targ->getLatency(insn);
3967 
3968    for (int d = 0; insn->defExists(d); ++d)
3969       recordWr(insn->getDef(d), cycle, ready);
3970 
3971 #ifdef GM107_DEBUG_SCHED_DATA
3972    score->print(cycle);
3973 #endif
3974 }
3975 
3976 #define GM107_MIN_ISSUE_DELAY 0x1
3977 #define GM107_MAX_ISSUE_DELAY 0xf
3978 
3979 int
calcDelay(const Instruction * insn,int cycle) const3980 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3981 {
3982    int delay = 0, ready = cycle;
3983 
3984    for (int s = 0; insn->srcExists(s); ++s)
3985       checkRd(insn->getSrc(s), cycle, delay);
3986 
3987    // TODO: make use of getReadLatency()!
3988 
3989    return MAX2(delay, ready - cycle);
3990 }
3991 
3992 void
setDelay(Instruction * insn,int delay,const Instruction * next)3993 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3994                                    const Instruction *next)
3995 {
3996    const OpClass cl = targ->getOpClass(insn->op);
3997    int wr, rd;
3998 
3999    if (insn->op == OP_EXIT ||
4000        insn->op == OP_BAR ||
4001        insn->op == OP_MEMBAR) {
4002       delay = GM107_MAX_ISSUE_DELAY;
4003    } else
4004    if (insn->op == OP_QUADON ||
4005        insn->op == OP_QUADPOP) {
4006       delay = 0xd;
4007    } else
4008    if (cl == OPCLASS_FLOW || insn->join) {
4009       delay = 0xd;
4010    }
4011 
4012    if (!next || !targ->canDualIssue(insn, next)) {
4013       delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4014    } else {
4015       delay = 0x0; // dual-issue
4016    }
4017 
4018    wr = getWrDepBar(insn);
4019    rd = getRdDepBar(insn);
4020 
4021    if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4022       // Barriers take one additional clock cycle to become active on top of
4023       // the clock consumed by the instruction producing it.
4024       if (!next || insn->bb != next->bb) {
4025          delay = 0x2;
4026       } else {
4027          int wt = getWtDepBar(next);
4028          if ((wt & (1 << wr)) | (wt & (1 << rd)))
4029             delay = 0x2;
4030       }
4031    }
4032 
4033    emitStall(insn, delay);
4034 }
4035 
4036 
4037 // Return true when the given instruction needs to emit a read dependency
4038 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4039 // setting the maximum number of stall counts is not enough.
4040 bool
needRdDepBar(const Instruction * insn) const4041 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4042 {
4043    BitSet srcs(255, true), defs(255, true);
4044    int a, b;
4045 
4046    if (!targ->isBarrierRequired(insn))
4047       return false;
4048 
4049    // Do not emit a read dependency barrier when the instruction doesn't use
4050    // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4051    for (int s = 0; insn->srcExists(s); ++s) {
4052       const Value *src = insn->src(s).rep();
4053       if (insn->src(s).getFile() != FILE_GPR)
4054          continue;
4055       if (src->reg.data.id == 255)
4056          continue;
4057 
4058       a = src->reg.data.id;
4059       b = a + src->reg.size / 4;
4060       for (int r = a; r < b; ++r)
4061          srcs.set(r);
4062    }
4063 
4064    if (!srcs.popCount())
4065       return false;
4066 
4067    // Do not emit a read dependency barrier when the output GPRs are equal to
4068    // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4069    // be produced and WaR hazards are prevented.
4070    for (int d = 0; insn->defExists(d); ++d) {
4071       const Value *def = insn->def(d).rep();
4072       if (insn->def(d).getFile() != FILE_GPR)
4073          continue;
4074       if (def->reg.data.id == 255)
4075          continue;
4076 
4077       a = def->reg.data.id;
4078       b = a + def->reg.size / 4;
4079       for (int r = a; r < b; ++r)
4080          defs.set(r);
4081    }
4082 
4083    srcs.andNot(defs);
4084    if (!srcs.popCount())
4085       return false;
4086 
4087    return true;
4088 }
4089 
4090 // Return true when the given instruction needs to emit a write dependency
4091 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4092 // setting the maximum number of stall counts is not enough. This is only legal
4093 // if the instruction output something.
4094 bool
needWrDepBar(const Instruction * insn) const4095 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4096 {
4097    if (!targ->isBarrierRequired(insn))
4098       return false;
4099 
4100    for (int d = 0; insn->defExists(d); ++d) {
4101       if (insn->def(d).getFile() == FILE_GPR ||
4102           insn->def(d).getFile() == FILE_FLAGS ||
4103           insn->def(d).getFile() == FILE_PREDICATE)
4104          return true;
4105    }
4106    return false;
4107 }
4108 
4109 // Helper function for findFirstUse() and findFirstDef()
4110 bool
doesInsnWriteTo(const Instruction * insn,const Value * val) const4111 SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4112                                           const Value *val) const
4113 {
4114    if (val->reg.file != FILE_GPR &&
4115        val->reg.file != FILE_PREDICATE &&
4116        val->reg.file != FILE_FLAGS)
4117       return false;
4118 
4119    for (int d = 0; insn->defExists(d); ++d) {
4120       const Value* def = insn->getDef(d);
4121       int minGPR = def->reg.data.id;
4122       int maxGPR = minGPR + def->reg.size / 4 - 1;
4123 
4124       if (def->reg.file != val->reg.file)
4125          continue;
4126 
4127       if (def->reg.file == FILE_GPR) {
4128          if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4129              val->reg.data.id > maxGPR)
4130             continue;
4131          return true;
4132       } else
4133       if (def->reg.file == FILE_PREDICATE) {
4134          if (val->reg.data.id != minGPR)
4135             continue;
4136          return true;
4137       } else
4138       if (def->reg.file == FILE_FLAGS) {
4139          if (val->reg.data.id != minGPR)
4140             continue;
4141          return true;
4142       }
4143    }
4144 
4145    return false;
4146 }
4147 
4148 // Find the next instruction inside the same basic block which uses (reads or
4149 // writes from) the output of the given instruction in order to avoid RaW and
4150 // WaW hazards.
4151 Instruction *
findFirstUse(const Instruction * bari) const4152 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4153 {
4154    Instruction *insn, *next;
4155 
4156    if (!bari->defExists(0))
4157       return NULL;
4158 
4159    for (insn = bari->next; insn != NULL; insn = next) {
4160       next = insn->next;
4161 
4162       for (int s = 0; insn->srcExists(s); ++s)
4163          if (doesInsnWriteTo(bari, insn->getSrc(s)))
4164             return insn;
4165 
4166       for (int d = 0; insn->defExists(d); ++d)
4167          if (doesInsnWriteTo(bari, insn->getDef(d)))
4168             return insn;
4169    }
4170    return NULL;
4171 }
4172 
4173 // Find the next instruction inside the same basic block which overwrites, at
4174 // least, one source of the given instruction in order to avoid WaR hazards.
4175 Instruction *
findFirstDef(const Instruction * bari) const4176 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4177 {
4178    Instruction *insn, *next;
4179 
4180    if (!bari->srcExists(0))
4181       return NULL;
4182 
4183    for (insn = bari->next; insn != NULL; insn = next) {
4184       next = insn->next;
4185 
4186       for (int s = 0; bari->srcExists(s); ++s)
4187          if (doesInsnWriteTo(insn, bari->getSrc(s)))
4188             return insn;
4189    }
4190    return NULL;
4191 }
4192 
4193 // Dependency barriers:
4194 // This pass is a bit ugly and could probably be improved by performing a
4195 // better allocation.
4196 //
4197 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4198 // dependency barriers using the control codes.
4199 bool
insertBarriers(BasicBlock * bb)4200 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4201 {
4202    std::list<LiveBarUse> live_uses;
4203    std::list<LiveBarDef> live_defs;
4204    Instruction *insn, *next;
4205    BitSet bars(6, true);
4206    int bar_id;
4207 
4208    for (insn = bb->getEntry(); insn != NULL; insn = next) {
4209       Instruction *usei = NULL, *defi = NULL;
4210       bool need_wr_bar, need_rd_bar;
4211 
4212       next = insn->next;
4213 
4214       // Expire old barrier uses.
4215       for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4216            it != live_uses.end();) {
4217          if (insn->serial >= it->usei->serial) {
4218             int wr = getWrDepBar(it->insn);
4219             emitWtDepBar(insn, wr);
4220             bars.clr(wr); // free barrier
4221             it = live_uses.erase(it);
4222             continue;
4223          }
4224          ++it;
4225       }
4226 
4227       // Expire old barrier defs.
4228       for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4229            it != live_defs.end();) {
4230          if (insn->serial >= it->defi->serial) {
4231             int rd = getRdDepBar(it->insn);
4232             emitWtDepBar(insn, rd);
4233             bars.clr(rd); // free barrier
4234             it = live_defs.erase(it);
4235             continue;
4236          }
4237          ++it;
4238       }
4239 
4240       need_wr_bar = needWrDepBar(insn);
4241       need_rd_bar = needRdDepBar(insn);
4242 
4243       if (need_wr_bar) {
4244          // When the instruction requires to emit a write dependency barrier
4245          // (all which write something at a variable latency), find the next
4246          // instruction which reads the outputs (or writes to them, potentially
4247          // completing before this insn.
4248          usei = findFirstUse(insn);
4249 
4250          // Allocate and emit a new barrier.
4251          bar_id = bars.findFreeRange(1);
4252          if (bar_id == -1)
4253             bar_id = 5;
4254          bars.set(bar_id);
4255          emitWrDepBar(insn, bar_id);
4256          if (usei)
4257             live_uses.push_back(LiveBarUse(insn, usei));
4258       }
4259 
4260       if (need_rd_bar) {
4261          // When the instruction requires to emit a read dependency barrier
4262          // (all which read something at a variable latency), find the next
4263          // instruction which will write the inputs.
4264          defi = findFirstDef(insn);
4265 
4266          if (usei && defi && usei->serial <= defi->serial)
4267             continue;
4268 
4269          // Allocate and emit a new barrier.
4270          bar_id = bars.findFreeRange(1);
4271          if (bar_id == -1)
4272             bar_id = 5;
4273          bars.set(bar_id);
4274          emitRdDepBar(insn, bar_id);
4275          if (defi)
4276             live_defs.push_back(LiveBarDef(insn, defi));
4277       }
4278    }
4279 
4280    // Remove unnecessary barrier waits.
4281    BitSet alive_bars(6, true);
4282    for (insn = bb->getEntry(); insn != NULL; insn = next) {
4283       int wr, rd, wt;
4284 
4285       next = insn->next;
4286 
4287       wr = getWrDepBar(insn);
4288       rd = getRdDepBar(insn);
4289       wt = getWtDepBar(insn);
4290 
4291       for (int idx = 0; idx < 6; ++idx) {
4292          if (!(wt & (1 << idx)))
4293             continue;
4294          if (!alive_bars.test(idx)) {
4295             insn->sched &= ~(1 << (11  + idx));
4296          } else {
4297             alive_bars.clr(idx);
4298          }
4299       }
4300 
4301       if (wr < 6)
4302          alive_bars.set(wr);
4303       if (rd < 6)
4304          alive_bars.set(rd);
4305    }
4306 
4307    return true;
4308 }
4309 
4310 bool
visit(Function * func)4311 SchedDataCalculatorGM107::visit(Function *func)
4312 {
4313    ArrayList insns;
4314 
4315    func->orderInstructions(insns);
4316 
4317    scoreBoards.resize(func->cfg.getSize());
4318    for (size_t i = 0; i < scoreBoards.size(); ++i)
4319       scoreBoards[i].wipe();
4320    return true;
4321 }
4322 
4323 bool
visit(BasicBlock * bb)4324 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4325 {
4326    Instruction *insn, *next = NULL;
4327    int cycle = 0;
4328 
4329    for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4330       /*XXX*/
4331       insn->sched = 0x7e0;
4332    }
4333 
4334    if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4335       return true;
4336 
4337    // Insert read/write dependency barriers for instructions which don't
4338    // operate at a fixed latency.
4339    insertBarriers(bb);
4340 
4341    score = &scoreBoards.at(bb->getId());
4342 
4343    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4344       // back branches will wait until all target dependencies are satisfied
4345       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4346          continue;
4347       BasicBlock *in = BasicBlock::get(ei.getNode());
4348       score->setMax(&scoreBoards.at(in->getId()));
4349    }
4350 
4351 #ifdef GM107_DEBUG_SCHED_DATA
4352    INFO("=== BB:%i initial scores\n", bb->getId());
4353    score->print(cycle);
4354 #endif
4355 
4356    // Because barriers are allocated locally (intra-BB), we have to make sure
4357    // that all produced barriers have been consumed before entering inside a
4358    // new basic block. The best way is to do a global allocation pre RA but
4359    // it's really more difficult, especially because of the phi nodes. Anyways,
4360    // it seems like that waiting on a barrier which has already been consumed
4361    // doesn't add any additional cost, it's just not elegant!
4362    Instruction *start = bb->getEntry();
4363    if (start && bb->cfg.incidentCount() > 0) {
4364       for (int b = 0; b < 6; b++)
4365          emitWtDepBar(start, b);
4366    }
4367 
4368    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4369       next = insn->next;
4370 
4371       commitInsn(insn, cycle);
4372       int delay = calcDelay(next, cycle);
4373       setDelay(insn, delay, next);
4374       cycle += getStall(insn);
4375 
4376       setReuseFlag(insn);
4377 
4378       // XXX: The yield flag seems to destroy a bunch of things when it is
4379       // set on every instruction, need investigation.
4380       //emitYield(insn);
4381 
4382 #ifdef GM107_DEBUG_SCHED_DATA
4383       printSchedInfo(cycle, insn);
4384       insn->print();
4385       next->print();
4386 #endif
4387    }
4388 
4389    if (!insn)
4390       return true;
4391    commitInsn(insn, cycle);
4392 
4393    int bbDelay = -1;
4394 
4395 #ifdef GM107_DEBUG_SCHED_DATA
4396    fprintf(stderr, "last instruction is : ");
4397    insn->print();
4398    fprintf(stderr, "cycle=%d\n", cycle);
4399 #endif
4400 
4401    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4402       BasicBlock *out = BasicBlock::get(ei.getNode());
4403 
4404       if (ei.getType() != Graph::Edge::BACK) {
4405          // Only test the first instruction of the outgoing block.
4406          next = out->getEntry();
4407          if (next) {
4408             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4409          } else {
4410             // When the outgoing BB is empty, make sure to set the number of
4411             // stall counts needed by the instruction because we don't know the
4412             // next instruction.
4413             bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4414          }
4415       } else {
4416          // Wait until all dependencies are satisfied.
4417          const int regsFree = score->getLatest();
4418          next = out->getFirst();
4419          for (int c = cycle; next && c < regsFree; next = next->next) {
4420             bbDelay = MAX2(bbDelay, calcDelay(next, c));
4421             c += getStall(next);
4422          }
4423          next = NULL;
4424       }
4425    }
4426    if (bb->cfg.outgoingCount() != 1)
4427       next = NULL;
4428    setDelay(insn, bbDelay, next);
4429    cycle += getStall(insn);
4430 
4431    score->rebase(cycle); // common base for initializing out blocks' scores
4432    return true;
4433 }
4434 
4435 /*******************************************************************************
4436  * main
4437  ******************************************************************************/
4438 
4439 void
prepareEmission(Function * func)4440 CodeEmitterGM107::prepareEmission(Function *func)
4441 {
4442    SchedDataCalculatorGM107 sched(targGM107);
4443    CodeEmitter::prepareEmission(func);
4444    sched.run(func, true, true);
4445 }
4446 
sizeToBundlesGM107(uint32_t size)4447 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4448 {
4449    return (size + 23) / 24;
4450 }
4451 
4452 void
prepareEmission(Program * prog)4453 CodeEmitterGM107::prepareEmission(Program *prog)
4454 {
4455    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4456         !fi.end(); fi.next()) {
4457       Function *func = reinterpret_cast<Function *>(fi.get());
4458       func->binPos = prog->binSize;
4459       prepareEmission(func);
4460 
4461       // adjust sizes & positions for schedulding info:
4462       if (prog->getTarget()->hasSWSched) {
4463          uint32_t adjPos = func->binPos;
4464          BasicBlock *bb = NULL;
4465          for (int i = 0; i < func->bbCount; ++i) {
4466             bb = func->bbArray[i];
4467             int32_t adjSize = bb->binSize;
4468             if (adjPos % 32) {
4469                adjSize -= 32 - adjPos % 32;
4470                if (adjSize < 0)
4471                   adjSize = 0;
4472             }
4473             adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4474             bb->binPos = adjPos;
4475             bb->binSize = adjSize;
4476             adjPos += adjSize;
4477          }
4478          if (bb)
4479             func->binSize = adjPos - func->binPos;
4480       }
4481 
4482       prog->binSize += func->binSize;
4483    }
4484 }
4485 
CodeEmitterGM107(const TargetGM107 * target)4486 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4487    : CodeEmitter(target),
4488      targGM107(target),
4489      progType(Program::TYPE_VERTEX),
4490      insn(NULL),
4491      writeIssueDelays(target->hasSWSched),
4492      data(NULL)
4493 {
4494    code = NULL;
4495    codeSize = codeSizeLimit = 0;
4496    relocInfo = NULL;
4497 }
4498 
4499 CodeEmitter *
createCodeEmitterGM107(Program::Type type)4500 TargetGM107::createCodeEmitterGM107(Program::Type type)
4501 {
4502    CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4503    emit->setProgramType(type);
4504    return emit;
4505 }
4506 
4507 } // namespace nv50_ir
4508