1 /*
2 * Copyright 2012 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 // CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
26
27 namespace nv50_ir {
28
29 class CodeEmitterGK110 : public CodeEmitter
30 {
31 public:
32 CodeEmitterGK110(const TargetNVC0 *, Program::Type);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 private:
39 const TargetNVC0 *targNVC0;
40
41 Program::Type progType;
42
43 const bool writeIssueDelays;
44
45 private:
46 void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
47 void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
48 void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier, int sCount = 3);
49
50 void emitPredicate(const Instruction *);
51
52 void setCAddress14(const ValueRef&);
53 void setShortImmediate(const Instruction *, const int s);
54 void setImmediate32(const Instruction *, const int s, Modifier);
55 void setSUConst16(const Instruction *, const int s);
56
57 void modNegAbsF32_3b(const Instruction *, const int s);
58
59 void emitCondCode(CondCode cc, int pos, uint8_t mask);
60 void emitInterpMode(const Instruction *);
61 void emitLoadStoreType(DataType ty, const int pos);
62 void emitCachingMode(CacheMode c, const int pos);
63 void emitSUGType(DataType, const int pos);
64 void emitSUCachingMode(CacheMode c);
65
66 inline uint8_t getSRegEncoding(const ValueRef&);
67
68 void emitRoundMode(RoundMode, const int pos, const int rintPos);
69 void emitRoundModeF(RoundMode, const int pos);
70 void emitRoundModeI(RoundMode, const int pos);
71
72 void emitNegAbs12(const Instruction *);
73
74 void emitNOP(const Instruction *);
75
76 void emitLOAD(const Instruction *);
77 void emitSTORE(const Instruction *);
78 void emitMOV(const Instruction *);
79 void emitATOM(const Instruction *);
80 void emitCCTL(const Instruction *);
81
82 void emitINTERP(const Instruction *);
83 void emitAFETCH(const Instruction *);
84 void emitPFETCH(const Instruction *);
85 void emitVFETCH(const Instruction *);
86 void emitEXPORT(const Instruction *);
87 void emitOUT(const Instruction *);
88
89 void emitUADD(const Instruction *);
90 void emitFADD(const Instruction *);
91 void emitDADD(const Instruction *);
92 void emitIMUL(const Instruction *);
93 void emitFMUL(const Instruction *);
94 void emitDMUL(const Instruction *);
95 void emitIMAD(const Instruction *);
96 void emitISAD(const Instruction *);
97 void emitSHLADD(const Instruction *);
98 void emitFMAD(const Instruction *);
99 void emitDMAD(const Instruction *);
100 void emitMADSP(const Instruction *i);
101
102 void emitNOT(const Instruction *);
103 void emitLogicOp(const Instruction *, uint8_t subOp);
104 void emitPOPC(const Instruction *);
105 void emitINSBF(const Instruction *);
106 void emitEXTBF(const Instruction *);
107 void emitBFIND(const Instruction *);
108 void emitPERMT(const Instruction *);
109 void emitShift(const Instruction *);
110 void emitShift64(const Instruction *);
111
112 void emitSFnOp(const Instruction *, uint8_t subOp);
113
114 void emitCVT(const Instruction *);
115 void emitMINMAX(const Instruction *);
116 void emitPreOp(const Instruction *);
117
118 void emitSET(const CmpInstruction *);
119 void emitSLCT(const CmpInstruction *);
120 void emitSELP(const Instruction *);
121
122 void emitTEXBAR(const Instruction *);
123 void emitTEX(const TexInstruction *);
124 void emitTEXCSAA(const TexInstruction *);
125 void emitTXQ(const TexInstruction *);
126
127 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
128
129 void emitPIXLD(const Instruction *);
130
131 void emitBAR(const Instruction *);
132 void emitMEMBAR(const Instruction *);
133
134 void emitFlow(const Instruction *);
135
136 void emitSHFL(const Instruction *);
137
138 void emitVOTE(const Instruction *);
139
140 void emitSULDGB(const TexInstruction *);
141 void emitSUSTGx(const TexInstruction *);
142 void emitSUCLAMPMode(uint16_t);
143 void emitSUCalc(Instruction *);
144
145 void emitVSHL(const Instruction *);
146 void emitVectorSubOp(const Instruction *);
147
148 inline void defId(const ValueDef&, const int pos);
149 inline void srcId(const ValueRef&, const int pos);
150 inline void srcId(const ValueRef *, const int pos);
151 inline void srcId(const Instruction *, int s, const int pos);
152
153 inline void srcAddr32(const ValueRef&, const int pos); // address / 4
154
155 inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
156 };
157
158 #define GK110_GPR_ZERO 255
159
160 #define NEG_(b, s) \
161 if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
162 #define ABS_(b, s) \
163 if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
164
165 #define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \
166 code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
167
168 #define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
169 #define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
170
171 #define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
172
173 #define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
174
175 #define SDATA(a) ((a).rep()->reg.data)
176 #define DDATA(a) ((a).rep()->reg.data)
177
srcId(const ValueRef & src,const int pos)178 void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
179 {
180 code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
181 }
182
srcId(const ValueRef * src,const int pos)183 void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
184 {
185 code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
186 }
187
srcId(const Instruction * insn,int s,int pos)188 void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
189 {
190 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
191 code[pos / 32] |= r << (pos % 32);
192 }
193
srcAddr32(const ValueRef & src,const int pos)194 void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
195 {
196 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
197 }
198
defId(const ValueDef & def,const int pos)199 void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
200 {
201 code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
202 }
203
isLIMM(const ValueRef & ref,DataType ty,bool mod)204 bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
205 {
206 const ImmediateValue *imm = ref.get()->asImm();
207
208 if (ty == TYPE_F32)
209 return imm && imm->reg.data.u32 & 0xfff;
210 else
211 return imm && (imm->reg.data.s32 > 0x7ffff ||
212 imm->reg.data.s32 < -0x80000);
213 }
214
215 void
emitRoundMode(RoundMode rnd,const int pos,const int rintPos)216 CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
217 {
218 bool rint = false;
219 uint8_t n;
220
221 switch (rnd) {
222 case ROUND_MI: rint = true; FALLTHROUGH; case ROUND_M: n = 1; break;
223 case ROUND_PI: rint = true; FALLTHROUGH; case ROUND_P: n = 2; break;
224 case ROUND_ZI: rint = true; FALLTHROUGH; case ROUND_Z: n = 3; break;
225 default:
226 rint = rnd == ROUND_NI;
227 n = 0;
228 assert(rnd == ROUND_N || rnd == ROUND_NI);
229 break;
230 }
231 code[pos / 32] |= n << (pos % 32);
232 if (rint && rintPos >= 0)
233 code[rintPos / 32] |= 1 << (rintPos % 32);
234 }
235
236 void
emitRoundModeF(RoundMode rnd,const int pos)237 CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
238 {
239 uint8_t n;
240
241 switch (rnd) {
242 case ROUND_M: n = 1; break;
243 case ROUND_P: n = 2; break;
244 case ROUND_Z: n = 3; break;
245 default:
246 n = 0;
247 assert(rnd == ROUND_N);
248 break;
249 }
250 code[pos / 32] |= n << (pos % 32);
251 }
252
253 void
emitRoundModeI(RoundMode rnd,const int pos)254 CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
255 {
256 uint8_t n;
257
258 switch (rnd) {
259 case ROUND_MI: n = 1; break;
260 case ROUND_PI: n = 2; break;
261 case ROUND_ZI: n = 3; break;
262 default:
263 n = 0;
264 assert(rnd == ROUND_NI);
265 break;
266 }
267 code[pos / 32] |= n << (pos % 32);
268 }
269
emitCondCode(CondCode cc,int pos,uint8_t mask)270 void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
271 {
272 uint8_t n;
273
274 switch (cc) {
275 case CC_FL: n = 0x00; break;
276 case CC_LT: n = 0x01; break;
277 case CC_EQ: n = 0x02; break;
278 case CC_LE: n = 0x03; break;
279 case CC_GT: n = 0x04; break;
280 case CC_NE: n = 0x05; break;
281 case CC_GE: n = 0x06; break;
282 case CC_LTU: n = 0x09; break;
283 case CC_EQU: n = 0x0a; break;
284 case CC_LEU: n = 0x0b; break;
285 case CC_GTU: n = 0x0c; break;
286 case CC_NEU: n = 0x0d; break;
287 case CC_GEU: n = 0x0e; break;
288 case CC_TR: n = 0x0f; break;
289 case CC_NO: n = 0x10; break;
290 case CC_NC: n = 0x11; break;
291 case CC_NS: n = 0x12; break;
292 case CC_NA: n = 0x13; break;
293 case CC_A: n = 0x14; break;
294 case CC_S: n = 0x15; break;
295 case CC_C: n = 0x16; break;
296 case CC_O: n = 0x17; break;
297 default:
298 n = 0;
299 assert(!"invalid condition code");
300 break;
301 }
302 code[pos / 32] |= (n & mask) << (pos % 32);
303 }
304
305 void
emitPredicate(const Instruction * i)306 CodeEmitterGK110::emitPredicate(const Instruction *i)
307 {
308 if (i->predSrc >= 0) {
309 srcId(i->src(i->predSrc), 18);
310 if (i->cc == CC_NOT_P)
311 code[0] |= 8 << 18; // negate
312 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
313 } else {
314 code[0] |= 7 << 18;
315 }
316 }
317
318 void
setCAddress14(const ValueRef & src)319 CodeEmitterGK110::setCAddress14(const ValueRef& src)
320 {
321 const Storage& res = src.get()->asSym()->reg;
322 const int32_t addr = res.data.offset / 4;
323
324 code[0] |= (addr & 0x01ff) << 23;
325 code[1] |= (addr & 0x3e00) >> 9;
326 code[1] |= res.fileIndex << 5;
327 }
328
329 void
setShortImmediate(const Instruction * i,const int s)330 CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
331 {
332 const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
333 const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
334
335 if (i->sType == TYPE_F32) {
336 assert(!(u32 & 0x00000fff));
337 code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
338 code[1] |= ((u32 & 0x7fe00000) >> 21);
339 code[1] |= ((u32 & 0x80000000) >> 4);
340 } else
341 if (i->sType == TYPE_F64) {
342 assert(!(u64 & 0x00000fffffffffffULL));
343 code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
344 code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
345 code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
346 } else {
347 assert((u32 & 0xfff80000) == 0 || (u32 & 0xfff80000) == 0xfff80000);
348 code[0] |= (u32 & 0x001ff) << 23;
349 code[1] |= (u32 & 0x7fe00) >> 9;
350 code[1] |= (u32 & 0x80000) << 8;
351 }
352 }
353
354 void
setImmediate32(const Instruction * i,const int s,Modifier mod)355 CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
356 Modifier mod)
357 {
358 uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
359
360 if (mod) {
361 ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
362 mod.applyTo(imm);
363 u32 = imm.reg.data.u32;
364 }
365
366 code[0] |= u32 << 23;
367 code[1] |= u32 >> 9;
368 }
369
370 void
emitForm_L(const Instruction * i,uint32_t opc,uint8_t ctg,Modifier mod,int sCount)371 CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
372 Modifier mod, int sCount)
373 {
374 code[0] = ctg;
375 code[1] = opc << 20;
376
377 emitPredicate(i);
378
379 defId(i->def(0), 2);
380
381 for (int s = 0; s < sCount && i->srcExists(s); ++s) {
382 switch (i->src(s).getFile()) {
383 case FILE_GPR:
384 srcId(i->src(s), s ? 42 : 10);
385 break;
386 case FILE_IMMEDIATE:
387 setImmediate32(i, s, mod);
388 break;
389 default:
390 break;
391 }
392 }
393 }
394
395
396 void
emitForm_C(const Instruction * i,uint32_t opc,uint8_t ctg)397 CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
398 {
399 code[0] = ctg;
400 code[1] = opc << 20;
401
402 emitPredicate(i);
403
404 defId(i->def(0), 2);
405
406 switch (i->src(0).getFile()) {
407 case FILE_MEMORY_CONST:
408 code[1] |= 0x4 << 28;
409 setCAddress14(i->src(0));
410 break;
411 case FILE_GPR:
412 code[1] |= 0xc << 28;
413 srcId(i->src(0), 23);
414 break;
415 default:
416 assert(0);
417 break;
418 }
419 }
420
421 // 0x2 for GPR, c[] and 0x1 for short immediate
422 void
emitForm_21(const Instruction * i,uint32_t opc2,uint32_t opc1)423 CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
424 uint32_t opc1)
425 {
426 const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
427
428 int s1 = 23;
429 if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
430 s1 = 42;
431
432 if (imm) {
433 code[0] = 0x1;
434 code[1] = opc1 << 20;
435 } else {
436 code[0] = 0x2;
437 code[1] = (0xc << 28) | (opc2 << 20);
438 }
439
440 emitPredicate(i);
441
442 defId(i->def(0), 2);
443
444 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
445 switch (i->src(s).getFile()) {
446 case FILE_MEMORY_CONST:
447 code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
448 setCAddress14(i->src(s));
449 break;
450 case FILE_IMMEDIATE:
451 setShortImmediate(i, s);
452 break;
453 case FILE_GPR:
454 srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
455 break;
456 default:
457 if (i->op == OP_SELP) {
458 assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
459 srcId(i->src(s), 42);
460 }
461 // ignore here, can be predicate or flags, but must not be address
462 break;
463 }
464 }
465 // 0x0 = invalid
466 // 0xc = rrr
467 // 0x8 = rrc
468 // 0x4 = rcr
469 assert(imm || (code[1] & (0xc << 28)));
470 }
471
472 inline void
modNegAbsF32_3b(const Instruction * i,const int s)473 CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
474 {
475 if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
476 if (i->src(s).mod.neg()) code[1] ^= (1 << 27);
477 }
478
479 void
emitNOP(const Instruction * i)480 CodeEmitterGK110::emitNOP(const Instruction *i)
481 {
482 code[0] = 0x00003c02;
483 code[1] = 0x85800000;
484
485 if (i)
486 emitPredicate(i);
487 else
488 code[0] = 0x001c3c02;
489 }
490
491 void
emitFMAD(const Instruction * i)492 CodeEmitterGK110::emitFMAD(const Instruction *i)
493 {
494 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
495
496 if (isLIMM(i->src(1), TYPE_F32)) {
497 assert(i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id);
498
499 // last source is dst, so force 2 sources
500 emitForm_L(i, 0x600, 0x0, 0, 2);
501
502 if (i->flagsDef >= 0)
503 code[1] |= 1 << 23;
504
505 SAT_(3a);
506 NEG_(3c, 2);
507
508 if (neg1) {
509 code[1] |= 1 << 27;
510 }
511 } else {
512 emitForm_21(i, 0x0c0, 0x940);
513
514 NEG_(34, 2);
515 SAT_(35);
516 RND_(36, F);
517
518 if (code[0] & 0x1) {
519 if (neg1)
520 code[1] ^= 1 << 27;
521 } else
522 if (neg1) {
523 code[1] |= 1 << 19;
524 }
525 }
526
527 FTZ_(38);
528 DNZ_(39);
529 }
530
531 void
emitDMAD(const Instruction * i)532 CodeEmitterGK110::emitDMAD(const Instruction *i)
533 {
534 assert(!i->saturate);
535 assert(!i->ftz);
536
537 emitForm_21(i, 0x1b8, 0xb38);
538
539 NEG_(34, 2);
540 RND_(36, F);
541
542 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
543
544 if (code[0] & 0x1) {
545 if (neg1)
546 code[1] ^= 1 << 27;
547 } else
548 if (neg1) {
549 code[1] |= 1 << 19;
550 }
551 }
552
553 void
emitMADSP(const Instruction * i)554 CodeEmitterGK110::emitMADSP(const Instruction *i)
555 {
556 emitForm_21(i, 0x140, 0xa40);
557
558 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
559 code[1] |= 0x00c00000;
560 } else {
561 code[1] |= (i->subOp & 0x00f) << 19; // imadp1
562 code[1] |= (i->subOp & 0x0f0) << 20; // imadp2
563 code[1] |= (i->subOp & 0x100) << 11; // imadp3
564 code[1] |= (i->subOp & 0x200) << 15; // imadp3
565 code[1] |= (i->subOp & 0xc00) << 12; // imadp3
566 }
567
568 if (i->flagsDef >= 0)
569 code[1] |= 1 << 18;
570 }
571
572 void
emitFMUL(const Instruction * i)573 CodeEmitterGK110::emitFMUL(const Instruction *i)
574 {
575 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
576
577 assert(i->postFactor >= -3 && i->postFactor <= 3);
578
579 if (isLIMM(i->src(1), TYPE_F32)) {
580 emitForm_L(i, 0x200, 0x2, Modifier(0));
581
582 FTZ_(38);
583 DNZ_(39);
584 SAT_(3a);
585 if (neg)
586 code[1] ^= 1 << 22;
587
588 assert(i->postFactor == 0);
589 } else {
590 emitForm_21(i, 0x234, 0xc34);
591 code[1] |= ((i->postFactor > 0) ?
592 (7 - i->postFactor) : (0 - i->postFactor)) << 12;
593
594 RND_(2a, F);
595 FTZ_(2f);
596 DNZ_(30);
597 SAT_(35);
598
599 if (code[0] & 0x1) {
600 if (neg)
601 code[1] ^= 1 << 27;
602 } else
603 if (neg) {
604 code[1] |= 1 << 19;
605 }
606 }
607 }
608
609 void
emitDMUL(const Instruction * i)610 CodeEmitterGK110::emitDMUL(const Instruction *i)
611 {
612 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
613
614 assert(!i->postFactor);
615 assert(!i->saturate);
616 assert(!i->ftz);
617 assert(!i->dnz);
618
619 emitForm_21(i, 0x240, 0xc40);
620
621 RND_(2a, F);
622
623 if (code[0] & 0x1) {
624 if (neg)
625 code[1] ^= 1 << 27;
626 } else
627 if (neg) {
628 code[1] |= 1 << 19;
629 }
630 }
631
632 void
emitIMUL(const Instruction * i)633 CodeEmitterGK110::emitIMUL(const Instruction *i)
634 {
635 assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
636 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
637
638 if (isLIMM(i->src(1), TYPE_S32)) {
639 emitForm_L(i, 0x280, 2, Modifier(0));
640
641 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
642 code[1] |= 1 << 24;
643 if (i->sType == TYPE_S32)
644 code[1] |= 3 << 25;
645 } else {
646 emitForm_21(i, 0x21c, 0xc1c);
647
648 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
649 code[1] |= 1 << 10;
650 if (i->sType == TYPE_S32)
651 code[1] |= 3 << 11;
652 }
653 }
654
655 void
emitFADD(const Instruction * i)656 CodeEmitterGK110::emitFADD(const Instruction *i)
657 {
658 if (isLIMM(i->src(1), TYPE_F32)) {
659 assert(i->rnd == ROUND_N);
660 assert(!i->saturate);
661
662 Modifier mod = i->src(1).mod ^
663 Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
664
665 emitForm_L(i, 0x400, 0, mod);
666
667 FTZ_(3a);
668 NEG_(3b, 0);
669 ABS_(39, 0);
670 } else {
671 emitForm_21(i, 0x22c, 0xc2c);
672
673 FTZ_(2f);
674 RND_(2a, F);
675 ABS_(31, 0);
676 NEG_(33, 0);
677 SAT_(35);
678
679 if (code[0] & 0x1) {
680 modNegAbsF32_3b(i, 1);
681 if (i->op == OP_SUB) code[1] ^= 1 << 27;
682 } else {
683 ABS_(34, 1);
684 NEG_(30, 1);
685 if (i->op == OP_SUB) code[1] ^= 1 << 16;
686 }
687 }
688 }
689
690 void
emitDADD(const Instruction * i)691 CodeEmitterGK110::emitDADD(const Instruction *i)
692 {
693 assert(!i->saturate);
694 assert(!i->ftz);
695
696 emitForm_21(i, 0x238, 0xc38);
697 RND_(2a, F);
698 ABS_(31, 0);
699 NEG_(33, 0);
700 if (code[0] & 0x1) {
701 modNegAbsF32_3b(i, 1);
702 if (i->op == OP_SUB) code[1] ^= 1 << 27;
703 } else {
704 NEG_(30, 1);
705 ABS_(34, 1);
706 if (i->op == OP_SUB) code[1] ^= 1 << 16;
707 }
708 }
709
710 void
emitUADD(const Instruction * i)711 CodeEmitterGK110::emitUADD(const Instruction *i)
712 {
713 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
714
715 if (i->op == OP_SUB)
716 addOp ^= 1;
717
718 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
719
720 if (isLIMM(i->src(1), TYPE_S32)) {
721 emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
722
723 if (addOp & 2)
724 code[1] |= 1 << 27;
725
726 assert(i->flagsDef < 0);
727 assert(i->flagsSrc < 0);
728
729 SAT_(39);
730 } else {
731 emitForm_21(i, 0x208, 0xc08);
732
733 assert(addOp != 3); // would be add-plus-one
734
735 code[1] |= addOp << 19;
736
737 if (i->flagsDef >= 0)
738 code[1] |= 1 << 18; // write carry
739 if (i->flagsSrc >= 0)
740 code[1] |= 1 << 14; // add carry
741
742 SAT_(35);
743 }
744 }
745
746 void
emitIMAD(const Instruction * i)747 CodeEmitterGK110::emitIMAD(const Instruction *i)
748 {
749 uint8_t addOp =
750 i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
751
752 emitForm_21(i, 0x100, 0xa00);
753
754 assert(addOp != 3);
755 code[1] |= addOp << 26;
756
757 if (i->sType == TYPE_S32)
758 code[1] |= (1 << 19) | (1 << 24);
759
760 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
761 code[1] |= 1 << 25;
762
763 if (i->flagsDef >= 0) code[1] |= 1 << 18;
764 if (i->flagsSrc >= 0) code[1] |= 1 << 20;
765
766 SAT_(35);
767 }
768
769 void
emitISAD(const Instruction * i)770 CodeEmitterGK110::emitISAD(const Instruction *i)
771 {
772 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
773
774 emitForm_21(i, 0x1f4, 0xb74);
775
776 if (i->dType == TYPE_S32)
777 code[1] |= 1 << 19;
778 }
779
780 void
emitSHLADD(const Instruction * i)781 CodeEmitterGK110::emitSHLADD(const Instruction *i)
782 {
783 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
784 const ImmediateValue *imm = i->src(1).get()->asImm();
785 assert(imm);
786
787 if (i->src(2).getFile() == FILE_IMMEDIATE) {
788 code[0] = 0x1;
789 code[1] = 0xc0c << 20;
790 } else {
791 code[0] = 0x2;
792 code[1] = 0x20c << 20;
793 }
794 code[1] |= addOp << 19;
795
796 emitPredicate(i);
797
798 defId(i->def(0), 2);
799 srcId(i->src(0), 10);
800
801 if (i->flagsDef >= 0)
802 code[1] |= 1 << 18;
803
804 assert(!(imm->reg.data.u32 & 0xffffffe0));
805 code[1] |= imm->reg.data.u32 << 10;
806
807 switch (i->src(2).getFile()) {
808 case FILE_GPR:
809 assert(code[0] & 0x2);
810 code[1] |= 0xc << 28;
811 srcId(i->src(2), 23);
812 break;
813 case FILE_MEMORY_CONST:
814 assert(code[0] & 0x2);
815 code[1] |= 0x4 << 28;
816 setCAddress14(i->src(2));
817 break;
818 case FILE_IMMEDIATE:
819 assert(code[0] & 0x1);
820 setShortImmediate(i, 2);
821 break;
822 default:
823 assert(!"bad src2 file");
824 break;
825 }
826 }
827
828 void
emitNOT(const Instruction * i)829 CodeEmitterGK110::emitNOT(const Instruction *i)
830 {
831 code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
832 code[1] = 0x22003800;
833
834 emitPredicate(i);
835
836 defId(i->def(0), 2);
837
838 switch (i->src(0).getFile()) {
839 case FILE_GPR:
840 code[1] |= 0xc << 28;
841 srcId(i->src(0), 23);
842 break;
843 case FILE_MEMORY_CONST:
844 code[1] |= 0x4 << 28;
845 setCAddress14(i->src(0));
846 break;
847 default:
848 assert(0);
849 break;
850 }
851 }
852
853 void
emitLogicOp(const Instruction * i,uint8_t subOp)854 CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
855 {
856 if (i->def(0).getFile() == FILE_PREDICATE) {
857 code[0] = 0x00000002 | (subOp << 27);
858 code[1] = 0x84800000;
859
860 emitPredicate(i);
861
862 defId(i->def(0), 5);
863 srcId(i->src(0), 14);
864 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17;
865 srcId(i->src(1), 32);
866 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3;
867
868 if (i->defExists(1)) {
869 defId(i->def(1), 2);
870 } else {
871 code[0] |= 7 << 2;
872 }
873 // (a OP b) OP c
874 if (i->predSrc != 2 && i->srcExists(2)) {
875 code[1] |= subOp << 16;
876 srcId(i->src(2), 42);
877 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13;
878 } else {
879 code[1] |= 7 << 10;
880 }
881 } else
882 if (isLIMM(i->src(1), TYPE_S32)) {
883 emitForm_L(i, 0x200, 0, i->src(1).mod);
884 code[1] |= subOp << 24;
885 NOT_(3a, 0);
886 } else {
887 emitForm_21(i, 0x220, 0xc20);
888 code[1] |= subOp << 12;
889 NOT_(2a, 0);
890 NOT_(2b, 1);
891 }
892 }
893
894 void
emitPOPC(const Instruction * i)895 CodeEmitterGK110::emitPOPC(const Instruction *i)
896 {
897 assert(!isLIMM(i->src(1), TYPE_S32, true));
898
899 emitForm_21(i, 0x204, 0xc04);
900
901 NOT_(2a, 0);
902 if (!(code[0] & 0x1))
903 NOT_(2b, 1);
904 }
905
906 void
emitINSBF(const Instruction * i)907 CodeEmitterGK110::emitINSBF(const Instruction *i)
908 {
909 emitForm_21(i, 0x1f8, 0xb78);
910 }
911
912 void
emitEXTBF(const Instruction * i)913 CodeEmitterGK110::emitEXTBF(const Instruction *i)
914 {
915 emitForm_21(i, 0x600, 0xc00);
916
917 if (i->dType == TYPE_S32)
918 code[1] |= 0x80000;
919 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
920 code[1] |= 0x800;
921 }
922
923 void
emitBFIND(const Instruction * i)924 CodeEmitterGK110::emitBFIND(const Instruction *i)
925 {
926 emitForm_C(i, 0x218, 0x2);
927
928 if (i->dType == TYPE_S32)
929 code[1] |= 0x80000;
930 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
931 code[1] |= 0x800;
932 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
933 code[1] |= 0x1000;
934 }
935
936 void
emitPERMT(const Instruction * i)937 CodeEmitterGK110::emitPERMT(const Instruction *i)
938 {
939 emitForm_21(i, 0x1e0, 0xb60);
940
941 code[1] |= i->subOp << 19;
942 }
943
944 void
emitShift(const Instruction * i)945 CodeEmitterGK110::emitShift(const Instruction *i)
946 {
947 if (i->op == OP_SHR) {
948 emitForm_21(i, 0x214, 0xc14);
949 if (isSignedType(i->dType))
950 code[1] |= 1 << 19;
951 } else {
952 emitForm_21(i, 0x224, 0xc24);
953 }
954
955 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
956 code[1] |= 1 << 10;
957 }
958
959 void
emitShift64(const Instruction * i)960 CodeEmitterGK110::emitShift64(const Instruction *i)
961 {
962 if (i->op == OP_SHR) {
963 emitForm_21(i, 0x27c, 0xc7c);
964 if (isSignedType(i->sType))
965 code[1] |= 0x100;
966 if (i->subOp & NV50_IR_SUBOP_SHIFT_HIGH)
967 code[1] |= 1 << 19;
968 } else {
969 emitForm_21(i, 0xdfc, 0xf7c);
970 }
971 code[1] |= 0x200;
972
973 if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)
974 code[1] |= 1 << 21;
975 }
976
977 void
emitPreOp(const Instruction * i)978 CodeEmitterGK110::emitPreOp(const Instruction *i)
979 {
980 emitForm_C(i, 0x248, 0x2);
981
982 if (i->op == OP_PREEX2)
983 code[1] |= 1 << 10;
984
985 NEG_(30, 0);
986 ABS_(34, 0);
987 }
988
989 void
emitSFnOp(const Instruction * i,uint8_t subOp)990 CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
991 {
992 code[0] = 0x00000002 | (subOp << 23);
993 code[1] = 0x84000000;
994
995 emitPredicate(i);
996
997 defId(i->def(0), 2);
998 srcId(i->src(0), 10);
999
1000 NEG_(33, 0);
1001 ABS_(31, 0);
1002 SAT_(35);
1003 }
1004
1005 void
emitMINMAX(const Instruction * i)1006 CodeEmitterGK110::emitMINMAX(const Instruction *i)
1007 {
1008 uint32_t op2, op1;
1009
1010 switch (i->dType) {
1011 case TYPE_U32:
1012 case TYPE_S32:
1013 op2 = 0x210;
1014 op1 = 0xc10;
1015 break;
1016 case TYPE_F32:
1017 op2 = 0x230;
1018 op1 = 0xc30;
1019 break;
1020 case TYPE_F64:
1021 op2 = 0x228;
1022 op1 = 0xc28;
1023 break;
1024 default:
1025 assert(0);
1026 op2 = 0;
1027 op1 = 0;
1028 break;
1029 }
1030 emitForm_21(i, op2, op1);
1031
1032 if (i->dType == TYPE_S32)
1033 code[1] |= 1 << 19;
1034 code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
1035 code[1] |= i->subOp << 14;
1036 if (i->flagsDef >= 0)
1037 code[1] |= i->subOp << 18;
1038
1039 FTZ_(2f);
1040 ABS_(31, 0);
1041 NEG_(33, 0);
1042 if (code[0] & 0x1) {
1043 modNegAbsF32_3b(i, 1);
1044 } else {
1045 ABS_(34, 1);
1046 NEG_(30, 1);
1047 }
1048 }
1049
1050 void
emitCVT(const Instruction * i)1051 CodeEmitterGK110::emitCVT(const Instruction *i)
1052 {
1053 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1054 const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
1055 const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
1056
1057 bool sat = i->saturate;
1058 bool abs = i->src(0).mod.abs();
1059 bool neg = i->src(0).mod.neg();
1060
1061 RoundMode rnd = i->rnd;
1062
1063 switch (i->op) {
1064 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1065 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1066 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1067 case OP_SAT: sat = true; break;
1068 case OP_NEG: neg = !neg; break;
1069 case OP_ABS: abs = true; neg = false; break;
1070 default:
1071 break;
1072 }
1073
1074 DataType dType;
1075
1076 if (i->op == OP_NEG && i->dType == TYPE_U32)
1077 dType = TYPE_S32;
1078 else
1079 dType = i->dType;
1080
1081
1082 uint32_t op;
1083
1084 if (f2f) op = 0x254;
1085 else if (f2i) op = 0x258;
1086 else if (i2f) op = 0x25c;
1087 else op = 0x260;
1088
1089 emitForm_C(i, op, 0x2);
1090
1091 FTZ_(2f);
1092 if (neg) code[1] |= 1 << 16;
1093 if (abs) code[1] |= 1 << 20;
1094 if (sat) code[1] |= 1 << 21;
1095
1096 emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
1097
1098 code[0] |= typeSizeofLog2(dType) << 10;
1099 code[0] |= typeSizeofLog2(i->sType) << 12;
1100 code[1] |= i->subOp << 12;
1101
1102 if (isSignedIntType(dType))
1103 code[0] |= 0x4000;
1104 if (isSignedIntType(i->sType))
1105 code[0] |= 0x8000;
1106 }
1107
1108 void
emitSET(const CmpInstruction * i)1109 CodeEmitterGK110::emitSET(const CmpInstruction *i)
1110 {
1111 uint16_t op1, op2;
1112
1113 if (i->def(0).getFile() == FILE_PREDICATE) {
1114 switch (i->sType) {
1115 case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
1116 case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
1117 default:
1118 op2 = 0x1b0;
1119 op1 = 0xb30;
1120 break;
1121 }
1122 emitForm_21(i, op2, op1);
1123
1124 NEG_(2e, 0);
1125 ABS_(9, 0);
1126 if (!(code[0] & 0x1)) {
1127 NEG_(8, 1);
1128 ABS_(2f, 1);
1129 } else {
1130 modNegAbsF32_3b(i, 1);
1131 }
1132 FTZ_(32);
1133
1134 // normal DST field is negated predicate result
1135 code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
1136 if (i->defExists(1))
1137 defId(i->def(1), 2);
1138 else
1139 code[0] |= 0x1c;
1140 } else {
1141 switch (i->sType) {
1142 case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
1143 case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
1144 default:
1145 op2 = 0x1a8;
1146 op1 = 0xb28;
1147 break;
1148 }
1149 emitForm_21(i, op2, op1);
1150
1151 NEG_(2e, 0);
1152 ABS_(39, 0);
1153 if (!(code[0] & 0x1)) {
1154 NEG_(38, 1);
1155 ABS_(2f, 1);
1156 } else {
1157 modNegAbsF32_3b(i, 1);
1158 }
1159 FTZ_(3a);
1160
1161 if (i->dType == TYPE_F32) {
1162 if (isFloatType(i->sType))
1163 code[1] |= 1 << 23;
1164 else
1165 code[1] |= 1 << 15;
1166 }
1167 }
1168 if (i->sType == TYPE_S32)
1169 code[1] |= 1 << 19;
1170
1171 if (i->op != OP_SET) {
1172 switch (i->op) {
1173 case OP_SET_AND: code[1] |= 0x0 << 16; break;
1174 case OP_SET_OR: code[1] |= 0x1 << 16; break;
1175 case OP_SET_XOR: code[1] |= 0x2 << 16; break;
1176 default:
1177 assert(0);
1178 break;
1179 }
1180 srcId(i->src(2), 0x2a);
1181 } else {
1182 code[1] |= 0x7 << 10;
1183 }
1184 if (i->flagsSrc >= 0)
1185 code[1] |= 1 << 14;
1186 emitCondCode(i->setCond,
1187 isFloatType(i->sType) ? 0x33 : 0x34,
1188 isFloatType(i->sType) ? 0xf : 0x7);
1189 }
1190
1191 void
emitSLCT(const CmpInstruction * i)1192 CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
1193 {
1194 CondCode cc = i->setCond;
1195 if (i->src(2).mod.neg())
1196 cc = reverseCondCode(cc);
1197
1198 if (i->dType == TYPE_F32) {
1199 emitForm_21(i, 0x1d0, 0xb50);
1200 FTZ_(32);
1201 emitCondCode(cc, 0x33, 0xf);
1202 } else {
1203 emitForm_21(i, 0x1a0, 0xb20);
1204 emitCondCode(cc, 0x34, 0x7);
1205 if (i->dType == TYPE_S32)
1206 code[1] |= 1 << 19;
1207 }
1208 }
1209
1210 void
gk110_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)1211 gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1212 {
1213 int loc = entry->loc;
1214 bool val = false;
1215 switch (entry->ipa) {
1216 case 0:
1217 val = data.force_persample_interp;
1218 break;
1219 case 1:
1220 val = data.msaa;
1221 break;
1222 }
1223 if (val)
1224 code[loc + 1] |= 1 << 13;
1225 else
1226 code[loc + 1] &= ~(1 << 13);
1227 }
1228
emitSELP(const Instruction * i)1229 void CodeEmitterGK110::emitSELP(const Instruction *i)
1230 {
1231 emitForm_21(i, 0x250, 0x050);
1232
1233 if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1234 code[1] |= 1 << 13;
1235
1236 if (i->subOp >= 1) {
1237 addInterp(i->subOp - 1, 0, gk110_selpFlip);
1238 }
1239 }
1240
emitTEXBAR(const Instruction * i)1241 void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
1242 {
1243 code[0] = 0x0000003e | (i->subOp << 23);
1244 code[1] = 0x77000000;
1245
1246 emitPredicate(i);
1247 }
1248
emitTEXCSAA(const TexInstruction * i)1249 void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
1250 {
1251 code[0] = 0x00000002;
1252 code[1] = 0x76c00000;
1253
1254 code[1] |= i->tex.r << 9;
1255 // code[1] |= i->tex.s << (9 + 8);
1256
1257 if (i->tex.liveOnly)
1258 code[0] |= 0x80000000;
1259
1260 defId(i->def(0), 2);
1261 srcId(i->src(0), 10);
1262 }
1263
1264 static inline bool
isNextIndependentTex(const TexInstruction * i)1265 isNextIndependentTex(const TexInstruction *i)
1266 {
1267 if (!i->next || !isTextureOp(i->next->op))
1268 return false;
1269 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1270 return false;
1271 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1272 }
1273
1274 void
emitTEX(const TexInstruction * i)1275 CodeEmitterGK110::emitTEX(const TexInstruction *i)
1276 {
1277 const bool ind = i->tex.rIndirectSrc >= 0;
1278
1279 if (ind) {
1280 code[0] = 0x00000002;
1281 switch (i->op) {
1282 case OP_TXD:
1283 code[1] = 0x7e000000;
1284 break;
1285 case OP_TXLQ:
1286 code[1] = 0x7e800000;
1287 break;
1288 case OP_TXF:
1289 code[1] = 0x78000000;
1290 break;
1291 case OP_TXG:
1292 code[1] = 0x7dc00000;
1293 break;
1294 default:
1295 code[1] = 0x7d800000;
1296 break;
1297 }
1298 } else {
1299 switch (i->op) {
1300 case OP_TXD:
1301 code[0] = 0x00000002;
1302 code[1] = 0x76000000;
1303 code[1] |= i->tex.r << 9;
1304 break;
1305 case OP_TXLQ:
1306 code[0] = 0x00000002;
1307 code[1] = 0x76800000;
1308 code[1] |= i->tex.r << 9;
1309 break;
1310 case OP_TXF:
1311 code[0] = 0x00000002;
1312 code[1] = 0x70000000;
1313 code[1] |= i->tex.r << 13;
1314 break;
1315 case OP_TXG:
1316 code[0] = 0x00000001;
1317 code[1] = 0x70000000;
1318 code[1] |= i->tex.r << 15;
1319 break;
1320 default:
1321 code[0] = 0x00000001;
1322 code[1] = 0x60000000;
1323 code[1] |= i->tex.r << 15;
1324 break;
1325 }
1326 }
1327
1328 code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
1329
1330 if (i->tex.liveOnly)
1331 code[0] |= 0x80000000;
1332
1333 switch (i->op) {
1334 case OP_TEX: break;
1335 case OP_TXB: code[1] |= 0x2000; break;
1336 case OP_TXL: code[1] |= 0x3000; break;
1337 case OP_TXF: break;
1338 case OP_TXG: break;
1339 case OP_TXD: break;
1340 case OP_TXLQ: break;
1341 default:
1342 assert(!"invalid texture op");
1343 break;
1344 }
1345
1346 if (i->op == OP_TXF) {
1347 if (!i->tex.levelZero)
1348 code[1] |= 0x1000;
1349 } else
1350 if (i->tex.levelZero) {
1351 code[1] |= 0x1000;
1352 }
1353
1354 if (i->op != OP_TXD && i->tex.derivAll)
1355 code[1] |= 0x200;
1356
1357 emitPredicate(i);
1358
1359 code[1] |= i->tex.mask << 2;
1360
1361 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1362
1363 defId(i->def(0), 2);
1364 srcId(i->src(0), 10);
1365 srcId(i, src1, 23);
1366
1367 if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
1368
1369 // texture target:
1370 code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
1371 if (i->tex.target.isArray())
1372 code[1] |= 0x40;
1373 if (i->tex.target.isShadow())
1374 code[1] |= 0x400;
1375 if (i->tex.target == TEX_TARGET_2D_MS ||
1376 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1377 code[1] |= 0x800;
1378
1379 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1380 // ?
1381 }
1382
1383 if (i->tex.useOffsets == 1) {
1384 switch (i->op) {
1385 case OP_TXF: code[1] |= 0x200; break;
1386 case OP_TXD: code[1] |= 0x00400000; break;
1387 default: code[1] |= 0x800; break;
1388 }
1389 }
1390 if (i->tex.useOffsets == 4)
1391 code[1] |= 0x1000;
1392 }
1393
1394 void
emitTXQ(const TexInstruction * i)1395 CodeEmitterGK110::emitTXQ(const TexInstruction *i)
1396 {
1397 code[0] = 0x00000002;
1398 code[1] = 0x75400001;
1399
1400 switch (i->tex.query) {
1401 case TXQ_DIMS: code[0] |= 0x01 << 25; break;
1402 case TXQ_TYPE: code[0] |= 0x02 << 25; break;
1403 case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
1404 case TXQ_FILTER: code[0] |= 0x10 << 25; break;
1405 case TXQ_LOD: code[0] |= 0x12 << 25; break;
1406 case TXQ_BORDER_COLOUR: code[0] |= 0x16 << 25; break;
1407 default:
1408 assert(!"invalid texture query");
1409 break;
1410 }
1411
1412 code[1] |= i->tex.mask << 2;
1413 code[1] |= i->tex.r << 9;
1414 if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
1415 code[1] |= 0x08000000;
1416
1417 defId(i->def(0), 2);
1418 srcId(i->src(0), 10);
1419
1420 emitPredicate(i);
1421 }
1422
1423 void
emitQUADOP(const Instruction * i,uint8_t qOp,uint8_t laneMask)1424 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1425 {
1426 code[0] = 0x00000002 | ((qOp & 1) << 31);
1427 code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall
1428
1429 defId(i->def(0), 2);
1430 srcId(i->src(0), 10);
1431 srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
1432
1433 emitPredicate(i);
1434 }
1435
1436 void
emitPIXLD(const Instruction * i)1437 CodeEmitterGK110::emitPIXLD(const Instruction *i)
1438 {
1439 emitForm_L(i, 0x7f4, 2, Modifier(0));
1440 code[1] |= i->subOp << 2;
1441 code[1] |= 0x00070000;
1442 }
1443
1444 void
emitBAR(const Instruction * i)1445 CodeEmitterGK110::emitBAR(const Instruction *i)
1446 {
1447 code[0] = 0x00000002;
1448 code[1] = 0x85400000;
1449
1450 switch (i->subOp) {
1451 case NV50_IR_SUBOP_BAR_ARRIVE: code[1] |= 0x08; break;
1452 case NV50_IR_SUBOP_BAR_RED_AND: code[1] |= 0x50; break;
1453 case NV50_IR_SUBOP_BAR_RED_OR: code[1] |= 0x90; break;
1454 case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;
1455 default:
1456 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1457 break;
1458 }
1459
1460 emitPredicate(i);
1461
1462 // barrier id
1463 if (i->src(0).getFile() == FILE_GPR) {
1464 srcId(i->src(0), 10);
1465 } else {
1466 ImmediateValue *imm = i->getSrc(0)->asImm();
1467 assert(imm);
1468 code[0] |= imm->reg.data.u32 << 10;
1469 code[1] |= 0x8000;
1470 }
1471
1472 // thread count
1473 if (i->src(1).getFile() == FILE_GPR) {
1474 srcId(i->src(1), 23);
1475 } else {
1476 ImmediateValue *imm = i->getSrc(0)->asImm();
1477 assert(imm);
1478 assert(imm->reg.data.u32 <= 0xfff);
1479 code[0] |= imm->reg.data.u32 << 23;
1480 code[1] |= imm->reg.data.u32 >> 9;
1481 code[1] |= 0x4000;
1482 }
1483
1484 if (i->srcExists(2) && (i->predSrc != 2)) {
1485 srcId(i->src(2), 32 + 10);
1486 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1487 code[1] |= 1 << 13;
1488 } else {
1489 code[1] |= 7 << 10;
1490 }
1491 }
1492
emitMEMBAR(const Instruction * i)1493 void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
1494 {
1495 code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
1496 code[1] = 0x7cc00000;
1497
1498 emitPredicate(i);
1499 }
1500
1501 void
emitFlow(const Instruction * i)1502 CodeEmitterGK110::emitFlow(const Instruction *i)
1503 {
1504 const FlowInstruction *f = i->asFlow();
1505
1506 unsigned mask; // bit 0: predicate, bit 1: target
1507
1508 code[0] = 0x00000000;
1509
1510 switch (i->op) {
1511 case OP_BRA:
1512 code[1] = f->absolute ? 0x10800000 : 0x12000000;
1513 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1514 code[0] |= 0x80;
1515 mask = 3;
1516 break;
1517 case OP_CALL:
1518 code[1] = f->absolute ? 0x11000000 : 0x13000000;
1519 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1520 code[0] |= 0x80;
1521 mask = 2;
1522 break;
1523
1524 case OP_EXIT: code[1] = 0x18000000; mask = 1; break;
1525 case OP_RET: code[1] = 0x19000000; mask = 1; break;
1526 case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
1527 case OP_BREAK: code[1] = 0x1a000000; mask = 1; break;
1528 case OP_CONT: code[1] = 0x1a800000; mask = 1; break;
1529
1530 case OP_JOINAT: code[1] = 0x14800000; mask = 2; break;
1531 case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
1532 case OP_PRECONT: code[1] = 0x15800000; mask = 2; break;
1533 case OP_PRERET: code[1] = 0x13800000; mask = 2; break;
1534
1535 case OP_QUADON: code[1] = 0x1b800000; mask = 0; break;
1536 case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
1537 case OP_BRKPT: code[1] = 0x00000000; mask = 0; break;
1538 default:
1539 assert(!"invalid flow operation");
1540 return;
1541 }
1542
1543 if (mask & 1) {
1544 emitPredicate(i);
1545 if (i->flagsSrc < 0)
1546 code[0] |= 0x3c;
1547 }
1548
1549 if (!f)
1550 return;
1551
1552 if (f->allWarp)
1553 code[0] |= 1 << 9;
1554 if (f->limit)
1555 code[0] |= 1 << 8;
1556
1557 if (f->op == OP_CALL) {
1558 if (f->builtin) {
1559 assert(f->absolute);
1560 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1561 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
1562 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
1563 } else {
1564 assert(!f->absolute);
1565 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1566 code[0] |= (pcRel & 0x1ff) << 23;
1567 code[1] |= (pcRel >> 9) & 0x7fff;
1568 }
1569 } else
1570 if (mask & 2) {
1571 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1572 if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1573 pcRel += 8;
1574 // currently we don't want absolute branches
1575 assert(!f->absolute);
1576 code[0] |= (pcRel & 0x1ff) << 23;
1577 code[1] |= (pcRel >> 9) & 0x7fff;
1578 }
1579 }
1580
1581 void
emitSHFL(const Instruction * i)1582 CodeEmitterGK110::emitSHFL(const Instruction *i)
1583 {
1584 const ImmediateValue *imm;
1585
1586 code[0] = 0x00000002;
1587 code[1] = 0x78800000 | (i->subOp << 1);
1588
1589 emitPredicate(i);
1590
1591 defId(i->def(0), 2);
1592 srcId(i->src(0), 10);
1593
1594 switch (i->src(1).getFile()) {
1595 case FILE_GPR:
1596 srcId(i->src(1), 23);
1597 break;
1598 case FILE_IMMEDIATE:
1599 imm = i->getSrc(1)->asImm();
1600 assert(imm && imm->reg.data.u32 < 0x20);
1601 code[0] |= imm->reg.data.u32 << 23;
1602 code[0] |= 1 << 31;
1603 break;
1604 default:
1605 assert(!"invalid src1 file");
1606 break;
1607 }
1608
1609 switch (i->src(2).getFile()) {
1610 case FILE_GPR:
1611 srcId(i->src(2), 42);
1612 break;
1613 case FILE_IMMEDIATE:
1614 imm = i->getSrc(2)->asImm();
1615 assert(imm && imm->reg.data.u32 < 0x2000);
1616 code[1] |= imm->reg.data.u32 << 5;
1617 code[1] |= 1;
1618 break;
1619 default:
1620 assert(!"invalid src2 file");
1621 break;
1622 }
1623
1624 if (!i->defExists(1))
1625 code[1] |= 7 << 19;
1626 else {
1627 assert(i->def(1).getFile() == FILE_PREDICATE);
1628 defId(i->def(1), 51);
1629 }
1630 }
1631
1632 void
emitVOTE(const Instruction * i)1633 CodeEmitterGK110::emitVOTE(const Instruction *i)
1634 {
1635 const ImmediateValue *imm;
1636 uint32_t u32;
1637
1638 code[0] = 0x00000002;
1639 code[1] = 0x86c00000 | (i->subOp << 19);
1640
1641 emitPredicate(i);
1642
1643 unsigned rp = 0;
1644 for (int d = 0; i->defExists(d); d++) {
1645 if (i->def(d).getFile() == FILE_PREDICATE) {
1646 assert(!(rp & 2));
1647 rp |= 2;
1648 defId(i->def(d), 48);
1649 } else if (i->def(d).getFile() == FILE_GPR) {
1650 assert(!(rp & 1));
1651 rp |= 1;
1652 defId(i->def(d), 2);
1653 } else {
1654 assert(!"Unhandled def");
1655 }
1656 }
1657 if (!(rp & 1))
1658 code[0] |= 255 << 2;
1659 if (!(rp & 2))
1660 code[1] |= 7 << 16;
1661
1662 switch (i->src(0).getFile()) {
1663 case FILE_PREDICATE:
1664 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
1665 code[0] |= 1 << 13;
1666 srcId(i->src(0), 42);
1667 break;
1668 case FILE_IMMEDIATE:
1669 imm = i->getSrc(0)->asImm();
1670 assert(imm);
1671 u32 = imm->reg.data.u32;
1672 assert(u32 == 0 || u32 == 1);
1673 code[1] |= (u32 == 1 ? 0x7 : 0xf) << 10;
1674 break;
1675 default:
1676 assert(!"Unhandled src");
1677 break;
1678 }
1679 }
1680
1681 void
emitSUGType(DataType ty,const int pos)1682 CodeEmitterGK110::emitSUGType(DataType ty, const int pos)
1683 {
1684 uint8_t n = 0;
1685
1686 switch (ty) {
1687 case TYPE_S32: n = 1; break;
1688 case TYPE_U8: n = 2; break;
1689 case TYPE_S8: n = 3; break;
1690 default:
1691 assert(ty == TYPE_U32);
1692 break;
1693 }
1694 code[pos / 32] |= n << (pos % 32);
1695 }
1696
1697 void
emitSUCachingMode(CacheMode c)1698 CodeEmitterGK110::emitSUCachingMode(CacheMode c)
1699 {
1700 uint8_t n = 0;
1701
1702 switch (c) {
1703 case CACHE_CA:
1704 // case CACHE_WB:
1705 n = 0;
1706 break;
1707 case CACHE_CG:
1708 n = 1;
1709 break;
1710 case CACHE_CS:
1711 n = 2;
1712 break;
1713 case CACHE_CV:
1714 // case CACHE_WT:
1715 n = 3;
1716 break;
1717 default:
1718 assert(!"invalid caching mode");
1719 break;
1720 }
1721 code[0] |= (n & 1) << 31;
1722 code[1] |= (n & 2) >> 1;
1723 }
1724
1725 void
setSUConst16(const Instruction * i,const int s)1726 CodeEmitterGK110::setSUConst16(const Instruction *i, const int s)
1727 {
1728 const uint32_t offset = i->getSrc(s)->reg.data.offset;
1729
1730 assert(offset == (offset & 0xfffc));
1731
1732 code[0] |= offset << 21;
1733 code[1] |= offset >> 11;
1734 code[1] |= i->getSrc(s)->reg.fileIndex << 5;
1735 }
1736
1737 void
emitSULDGB(const TexInstruction * i)1738 CodeEmitterGK110::emitSULDGB(const TexInstruction *i)
1739 {
1740 code[0] = 0x00000002;
1741 code[1] = 0x30000000 | (i->subOp << 14);
1742
1743 if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1744 emitLoadStoreType(i->dType, 0x38);
1745 emitCachingMode(i->cache, 0x36);
1746
1747 // format
1748 setSUConst16(i, 1);
1749 } else {
1750 assert(i->src(1).getFile() == FILE_GPR);
1751 code[1] |= 0x49800000;
1752
1753 emitLoadStoreType(i->dType, 0x21);
1754 emitSUCachingMode(i->cache);
1755
1756 srcId(i->src(1), 23);
1757 }
1758
1759 emitSUGType(i->sType, 0x34);
1760
1761 emitPredicate(i);
1762 defId(i->def(0), 2); // destination
1763 srcId(i->src(0), 10); // address
1764
1765 // surface predicate
1766 if (!i->srcExists(2) || (i->predSrc == 2)) {
1767 code[1] |= 0x7 << 10;
1768 } else {
1769 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1770 code[1] |= 1 << 13;
1771 srcId(i->src(2), 32 + 10);
1772 }
1773 }
1774
1775 void
emitSUSTGx(const TexInstruction * i)1776 CodeEmitterGK110::emitSUSTGx(const TexInstruction *i)
1777 {
1778 assert(i->op == OP_SUSTP);
1779
1780 code[0] = 0x00000002;
1781 code[1] = 0x38000000;
1782
1783 if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1784 code[0] |= i->subOp << 2;
1785
1786 if (i->op == OP_SUSTP)
1787 code[0] |= i->tex.mask << 4;
1788
1789 emitSUGType(i->sType, 0x8);
1790 emitCachingMode(i->cache, 0x36);
1791
1792 // format
1793 setSUConst16(i, 1);
1794 } else {
1795 assert(i->src(1).getFile() == FILE_GPR);
1796
1797 code[0] |= i->subOp << 23;
1798 code[1] |= 0x41c00000;
1799
1800 if (i->op == OP_SUSTP)
1801 code[0] |= i->tex.mask << 25;
1802
1803 emitSUGType(i->sType, 0x1d);
1804 emitSUCachingMode(i->cache);
1805
1806 srcId(i->src(1), 2);
1807 }
1808
1809 emitPredicate(i);
1810 srcId(i->src(0), 10); // address
1811 srcId(i->src(3), 42); // values
1812
1813 // surface predicate
1814 if (!i->srcExists(2) || (i->predSrc == 2)) {
1815 code[1] |= 0x7 << 18;
1816 } else {
1817 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1818 code[1] |= 1 << 21;
1819 srcId(i->src(2), 32 + 18);
1820 }
1821 }
1822
1823 void
emitSUCLAMPMode(uint16_t subOp)1824 CodeEmitterGK110::emitSUCLAMPMode(uint16_t subOp)
1825 {
1826 uint8_t m;
1827 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1828 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1829 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1830 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1831 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1832 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1833 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1834 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1835 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1836 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1837 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1838 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1839 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1840 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1841 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1842 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1843 default:
1844 return;
1845 }
1846 code[1] |= m << 20;
1847 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1848 code[1] |= 1 << 24;
1849 }
1850
1851 void
emitSUCalc(Instruction * i)1852 CodeEmitterGK110::emitSUCalc(Instruction *i)
1853 {
1854 ImmediateValue *imm = NULL;
1855 uint64_t opc1, opc2;
1856
1857 if (i->srcExists(2)) {
1858 imm = i->getSrc(2)->asImm();
1859 if (imm)
1860 i->setSrc(2, NULL); // special case, make emitForm_21 not assert
1861 }
1862
1863 switch (i->op) {
1864 case OP_SUCLAMP: opc1 = 0xb00; opc2 = 0x580; break;
1865 case OP_SUBFM: opc1 = 0xb68; opc2 = 0x1e8; break;
1866 case OP_SUEAU: opc1 = 0xb6c; opc2 = 0x1ec; break;
1867 default:
1868 assert(0);
1869 return;
1870 }
1871 emitForm_21(i, opc2, opc1);
1872
1873 if (i->op == OP_SUCLAMP) {
1874 if (i->dType == TYPE_S32)
1875 code[1] |= 1 << 19;
1876 emitSUCLAMPMode(i->subOp);
1877 }
1878
1879 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1880 code[1] |= 1 << 18;
1881
1882 if (i->op != OP_SUEAU) {
1883 const uint8_t pos = i->op == OP_SUBFM ? 19 : 16;
1884 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1885 code[0] |= 255 << 2;
1886 code[1] |= i->getDef(1)->reg.data.id << pos;
1887 } else
1888 if (i->defExists(1)) { // r, p
1889 assert(i->def(1).getFile() == FILE_PREDICATE);
1890 code[1] |= i->getDef(1)->reg.data.id << pos;
1891 } else { // r, #
1892 code[1] |= 7 << pos;
1893 }
1894 }
1895
1896 if (imm) {
1897 assert(i->op == OP_SUCLAMP);
1898 i->setSrc(2, imm);
1899 code[1] |= (imm->reg.data.u32 & 0x3f) << 10; // sint6
1900 }
1901 }
1902
1903
1904 void
emitVectorSubOp(const Instruction * i)1905 CodeEmitterGK110::emitVectorSubOp(const Instruction *i)
1906 {
1907 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
1908 case 0:
1909 code[1] |= (i->subOp & 0x000f) << 7; // vsrc1
1910 code[1] |= (i->subOp & 0x00e0) >> 6; // vsrc2
1911 code[1] |= (i->subOp & 0x0100) << 13; // vsrc2
1912 code[1] |= (i->subOp & 0x3c00) << 12; // vdst
1913 break;
1914 default:
1915 assert(0);
1916 break;
1917 }
1918 }
1919
1920 void
emitVSHL(const Instruction * i)1921 CodeEmitterGK110::emitVSHL(const Instruction *i)
1922 {
1923 code[0] = 0x00000002;
1924 code[1] = 0xb8000000;
1925
1926 assert(NV50_IR_SUBOP_Vn(i->subOp) == 0);
1927
1928 if (isSignedType(i->dType)) code[1] |= 1 << 25;
1929 if (isSignedType(i->sType)) code[1] |= 1 << 19;
1930
1931 emitVectorSubOp(i);
1932
1933 emitPredicate(i);
1934 defId(i->def(0), 2);
1935 srcId(i->src(0), 10);
1936
1937 if (i->getSrc(1)->reg.file == FILE_IMMEDIATE) {
1938 ImmediateValue *imm = i->getSrc(1)->asImm();
1939 assert(imm);
1940 code[0] |= (imm->reg.data.u32 & 0x01ff) << 23;
1941 code[1] |= (imm->reg.data.u32 & 0xfe00) >> 9;
1942 } else {
1943 assert(i->getSrc(1)->reg.file == FILE_GPR);
1944 code[1] |= 1 << 21;
1945 srcId(i->src(1), 23);
1946 }
1947 srcId(i->src(2), 42);
1948
1949 if (i->saturate)
1950 code[0] |= 1 << 22;
1951 if (i->flagsDef >= 0)
1952 code[1] |= 1 << 18;
1953 }
1954
1955 void
emitAFETCH(const Instruction * i)1956 CodeEmitterGK110::emitAFETCH(const Instruction *i)
1957 {
1958 uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
1959
1960 code[0] = 0x00000002 | (offset << 23);
1961 code[1] = 0x7d000000 | (offset >> 9);
1962
1963 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1964 code[1] |= 0x8;
1965
1966 emitPredicate(i);
1967
1968 defId(i->def(0), 2);
1969 srcId(i->src(0).getIndirect(0), 10);
1970 }
1971
1972 void
emitPFETCH(const Instruction * i)1973 CodeEmitterGK110::emitPFETCH(const Instruction *i)
1974 {
1975 uint32_t prim = i->src(0).get()->reg.data.u32;
1976
1977 code[0] = 0x00000002 | ((prim & 0xff) << 23);
1978 code[1] = 0x7f800000;
1979
1980 emitPredicate(i);
1981
1982 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1983
1984 defId(i->def(0), 2);
1985 srcId(i, src1, 10);
1986 }
1987
1988 void
emitVFETCH(const Instruction * i)1989 CodeEmitterGK110::emitVFETCH(const Instruction *i)
1990 {
1991 unsigned int size = typeSizeof(i->dType);
1992 uint32_t offset = i->src(0).get()->reg.data.offset;
1993
1994 code[0] = 0x00000002 | (offset << 23);
1995 code[1] = 0x7ec00000 | (offset >> 9);
1996 code[1] |= (size / 4 - 1) << 18;
1997
1998 if (i->perPatch)
1999 code[1] |= 0x4;
2000 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
2001 code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
2002
2003 emitPredicate(i);
2004
2005 defId(i->def(0), 2);
2006 srcId(i->src(0).getIndirect(0), 10);
2007 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
2008 }
2009
2010 void
emitEXPORT(const Instruction * i)2011 CodeEmitterGK110::emitEXPORT(const Instruction *i)
2012 {
2013 unsigned int size = typeSizeof(i->dType);
2014 uint32_t offset = i->src(0).get()->reg.data.offset;
2015
2016 code[0] = 0x00000002 | (offset << 23);
2017 code[1] = 0x7f000000 | (offset >> 9);
2018 code[1] |= (size / 4 - 1) << 18;
2019
2020 if (i->perPatch)
2021 code[1] |= 0x4;
2022
2023 emitPredicate(i);
2024
2025 assert(i->src(1).getFile() == FILE_GPR);
2026
2027 srcId(i->src(0).getIndirect(0), 10);
2028 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
2029 srcId(i->src(1), 2);
2030 }
2031
2032 void
emitOUT(const Instruction * i)2033 CodeEmitterGK110::emitOUT(const Instruction *i)
2034 {
2035 assert(i->src(0).getFile() == FILE_GPR);
2036
2037 emitForm_21(i, 0x1f0, 0xb70);
2038
2039 if (i->op == OP_EMIT)
2040 code[1] |= 1 << 10;
2041 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
2042 code[1] |= 1 << 11;
2043 }
2044
2045 void
emitInterpMode(const Instruction * i)2046 CodeEmitterGK110::emitInterpMode(const Instruction *i)
2047 {
2048 code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
2049 code[1] |= (i->ipa & 0xc) << (19 - 2);
2050 }
2051
2052 void
gk110_interpApply(const struct FixupEntry * entry,uint32_t * code,const FixupData & data)2053 gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const FixupData& data)
2054 {
2055 int ipa = entry->ipa;
2056 int reg = entry->reg;
2057 int loc = entry->loc;
2058
2059 if (data.flatshade &&
2060 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2061 ipa = NV50_IR_INTERP_FLAT;
2062 reg = 0xff;
2063 } else if (data.force_persample_interp &&
2064 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2065 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2066 ipa |= NV50_IR_INTERP_CENTROID;
2067 }
2068 code[loc + 1] &= ~(0xf << 19);
2069 code[loc + 1] |= (ipa & 0x3) << 21;
2070 code[loc + 1] |= (ipa & 0xc) << (19 - 2);
2071 code[loc + 0] &= ~(0xff << 23);
2072 code[loc + 0] |= reg << 23;
2073 }
2074
2075 void
emitINTERP(const Instruction * i)2076 CodeEmitterGK110::emitINTERP(const Instruction *i)
2077 {
2078 const uint32_t base = i->getSrc(0)->reg.data.offset;
2079
2080 code[0] = 0x00000002 | (base << 31);
2081 code[1] = 0x74800000 | (base >> 1);
2082
2083 if (i->saturate)
2084 code[1] |= 1 << 18;
2085
2086 if (i->op == OP_PINTERP) {
2087 srcId(i->src(1), 23);
2088 addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply);
2089 } else {
2090 code[0] |= 0xff << 23;
2091 addInterp(i->ipa, 0xff, gk110_interpApply);
2092 }
2093
2094 srcId(i->src(0).getIndirect(0), 10);
2095 emitInterpMode(i);
2096
2097 emitPredicate(i);
2098 defId(i->def(0), 2);
2099
2100 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
2101 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
2102 else
2103 code[1] |= 0xff << 10;
2104 }
2105
2106 void
emitLoadStoreType(DataType ty,const int pos)2107 CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
2108 {
2109 uint8_t n;
2110
2111 switch (ty) {
2112 case TYPE_U8:
2113 n = 0;
2114 break;
2115 case TYPE_S8:
2116 n = 1;
2117 break;
2118 case TYPE_U16:
2119 n = 2;
2120 break;
2121 case TYPE_S16:
2122 n = 3;
2123 break;
2124 case TYPE_F32:
2125 case TYPE_U32:
2126 case TYPE_S32:
2127 n = 4;
2128 break;
2129 case TYPE_F64:
2130 case TYPE_U64:
2131 case TYPE_S64:
2132 n = 5;
2133 break;
2134 case TYPE_B128:
2135 n = 6;
2136 break;
2137 default:
2138 n = 0;
2139 assert(!"invalid ld/st type");
2140 break;
2141 }
2142 code[pos / 32] |= n << (pos % 32);
2143 }
2144
2145 void
emitCachingMode(CacheMode c,const int pos)2146 CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
2147 {
2148 uint8_t n;
2149
2150 switch (c) {
2151 case CACHE_CA:
2152 // case CACHE_WB:
2153 n = 0;
2154 break;
2155 case CACHE_CG:
2156 n = 1;
2157 break;
2158 case CACHE_CS:
2159 n = 2;
2160 break;
2161 case CACHE_CV:
2162 // case CACHE_WT:
2163 n = 3;
2164 break;
2165 default:
2166 n = 0;
2167 assert(!"invalid caching mode");
2168 break;
2169 }
2170 code[pos / 32] |= n << (pos % 32);
2171 }
2172
2173 void
emitSTORE(const Instruction * i)2174 CodeEmitterGK110::emitSTORE(const Instruction *i)
2175 {
2176 int32_t offset = SDATA(i->src(0)).offset;
2177
2178 switch (i->src(0).getFile()) {
2179 case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
2180 case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break;
2181 case FILE_MEMORY_SHARED:
2182 code[0] = 0x00000002;
2183 if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
2184 code[1] = 0x78400000;
2185 else
2186 code[1] = 0x7ac00000;
2187 break;
2188 default:
2189 assert(!"invalid memory file");
2190 break;
2191 }
2192
2193 if (code[0] & 0x2) {
2194 offset &= 0xffffff;
2195 emitLoadStoreType(i->dType, 0x33);
2196 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2197 emitCachingMode(i->cache, 0x2f);
2198 } else {
2199 emitLoadStoreType(i->dType, 0x38);
2200 emitCachingMode(i->cache, 0x3b);
2201 }
2202 code[0] |= offset << 23;
2203 code[1] |= offset >> 9;
2204
2205 // Unlocked store on shared memory can fail.
2206 if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2207 i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
2208 assert(i->defExists(0));
2209 defId(i->def(0), 32 + 16);
2210 }
2211
2212 emitPredicate(i);
2213
2214 srcId(i->src(1), 2);
2215 srcId(i->src(0).getIndirect(0), 10);
2216 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2217 i->src(0).isIndirect(0) &&
2218 i->getIndirect(0, 0)->reg.size == 8)
2219 code[1] |= 1 << 23;
2220 }
2221
2222 void
emitLOAD(const Instruction * i)2223 CodeEmitterGK110::emitLOAD(const Instruction *i)
2224 {
2225 int32_t offset = SDATA(i->src(0)).offset;
2226
2227 switch (i->src(0).getFile()) {
2228 case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
2229 case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break;
2230 case FILE_MEMORY_SHARED:
2231 code[0] = 0x00000002;
2232 if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
2233 code[1] = 0x77400000;
2234 else
2235 code[1] = 0x7a400000;
2236 break;
2237 case FILE_MEMORY_CONST:
2238 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
2239 emitMOV(i);
2240 return;
2241 }
2242 offset &= 0xffff;
2243 code[0] = 0x00000002;
2244 code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
2245 code[1] |= i->subOp << 15;
2246 break;
2247 default:
2248 assert(!"invalid memory file");
2249 break;
2250 }
2251
2252 if (code[0] & 0x2) {
2253 offset &= 0xffffff;
2254 emitLoadStoreType(i->dType, 0x33);
2255 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2256 emitCachingMode(i->cache, 0x2f);
2257 } else {
2258 emitLoadStoreType(i->dType, 0x38);
2259 emitCachingMode(i->cache, 0x3b);
2260 }
2261 code[0] |= offset << 23;
2262 code[1] |= offset >> 9;
2263
2264 // Locked store on shared memory can fail.
2265 int r = 0, p = -1;
2266 if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2267 i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
2268 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2269 r = -1;
2270 p = 0;
2271 } else if (i->defExists(1)) { // r, p
2272 p = 1;
2273 } else {
2274 assert(!"Expected predicate dest for load locked");
2275 }
2276 }
2277
2278 emitPredicate(i);
2279
2280 if (r >= 0)
2281 defId(i->def(r), 2);
2282 else
2283 code[0] |= 255 << 2;
2284
2285 if (p >= 0)
2286 defId(i->def(p), 32 + 16);
2287
2288 if (i->getIndirect(0, 0)) {
2289 srcId(i->src(0).getIndirect(0), 10);
2290 if (i->getIndirect(0, 0)->reg.size == 8)
2291 code[1] |= 1 << 23;
2292 } else {
2293 code[0] |= 255 << 10;
2294 }
2295 }
2296
2297 uint8_t
getSRegEncoding(const ValueRef & ref)2298 CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
2299 {
2300 switch (SDATA(ref).sv.sv) {
2301 case SV_LANEID: return 0x00;
2302 case SV_PHYSID: return 0x03;
2303 case SV_VERTEX_COUNT: return 0x10;
2304 case SV_INVOCATION_ID: return 0x11;
2305 case SV_YDIR: return 0x12;
2306 case SV_THREAD_KILL: return 0x13;
2307 case SV_COMBINED_TID: return 0x20;
2308 case SV_TID: return 0x21 + SDATA(ref).sv.index;
2309 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
2310 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
2311 case SV_GRIDID: return 0x2c;
2312 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
2313 case SV_LBASE: return 0x34;
2314 case SV_SBASE: return 0x30;
2315 case SV_LANEMASK_EQ: return 0x38;
2316 case SV_LANEMASK_LT: return 0x39;
2317 case SV_LANEMASK_LE: return 0x3a;
2318 case SV_LANEMASK_GT: return 0x3b;
2319 case SV_LANEMASK_GE: return 0x3c;
2320 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
2321 default:
2322 assert(!"no sreg for system value");
2323 return 0;
2324 }
2325 }
2326
2327 void
emitMOV(const Instruction * i)2328 CodeEmitterGK110::emitMOV(const Instruction *i)
2329 {
2330 if (i->def(0).getFile() == FILE_PREDICATE) {
2331 if (i->src(0).getFile() == FILE_GPR) {
2332 // Use ISETP.NE.AND dst, PT, src, RZ, PT
2333 code[0] = 0x00000002;
2334 code[1] = 0xdb500000;
2335
2336 code[0] |= 0x7 << 2;
2337 code[0] |= 0xff << 23;
2338 code[1] |= 0x7 << 10;
2339 srcId(i->src(0), 10);
2340 } else
2341 if (i->src(0).getFile() == FILE_PREDICATE) {
2342 // Use PSETP.AND.AND dst, PT, src, PT, PT
2343 code[0] = 0x00000002;
2344 code[1] = 0x84800000;
2345
2346 code[0] |= 0x7 << 2;
2347 code[1] |= 0x7 << 0;
2348 code[1] |= 0x7 << 10;
2349
2350 srcId(i->src(0), 14);
2351 } else {
2352 assert(!"Unexpected source for predicate destination");
2353 emitNOP(i);
2354 }
2355 emitPredicate(i);
2356 defId(i->def(0), 5);
2357 } else
2358 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2359 code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
2360 code[1] = 0x86400000;
2361 emitPredicate(i);
2362 defId(i->def(0), 2);
2363 } else
2364 if (i->src(0).getFile() == FILE_IMMEDIATE) {
2365 code[0] = 0x00000002 | (i->lanes << 14);
2366 code[1] = 0x74000000;
2367 emitPredicate(i);
2368 defId(i->def(0), 2);
2369 setImmediate32(i, 0, Modifier(0));
2370 } else
2371 if (i->src(0).getFile() == FILE_PREDICATE) {
2372 code[0] = 0x00000002;
2373 code[1] = 0x84401c07;
2374 emitPredicate(i);
2375 defId(i->def(0), 2);
2376 srcId(i->src(0), 14);
2377 } else {
2378 emitForm_C(i, 0x24c, 2);
2379 code[1] |= i->lanes << 10;
2380 }
2381 }
2382
2383 static inline bool
uses64bitAddress(const Instruction * ldst)2384 uses64bitAddress(const Instruction *ldst)
2385 {
2386 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2387 ldst->src(0).isIndirect(0) &&
2388 ldst->getIndirect(0, 0)->reg.size == 8;
2389 }
2390
2391 void
emitATOM(const Instruction * i)2392 CodeEmitterGK110::emitATOM(const Instruction *i)
2393 {
2394 const bool hasDst = i->defExists(0);
2395 const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH;
2396
2397 code[0] = 0x00000002;
2398 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
2399 code[1] = 0x77800000;
2400 else
2401 code[1] = 0x68000000;
2402
2403 switch (i->subOp) {
2404 case NV50_IR_SUBOP_ATOM_CAS: break;
2405 case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break;
2406 default: code[1] |= i->subOp << 23; break;
2407 }
2408
2409 switch (i->dType) {
2410 case TYPE_U32: break;
2411 case TYPE_S32: code[1] |= 0x00100000; break;
2412 case TYPE_U64: code[1] |= 0x00200000; break;
2413 case TYPE_F32: code[1] |= 0x00300000; break;
2414 case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */
2415 case TYPE_S64: code[1] |= 0x00500000; break;
2416 default: assert(!"unsupported type"); break;
2417 }
2418
2419 emitPredicate(i);
2420
2421 /* TODO: cas: check that src regs line up */
2422 /* TODO: cas: flip bits if $r255 is used */
2423 srcId(i->src(1), 23);
2424
2425 if (hasDst) {
2426 defId(i->def(0), 2);
2427 } else
2428 if (!exch) {
2429 code[0] |= 255 << 2;
2430 }
2431
2432 if (hasDst || !exch) {
2433 const int32_t offset = SDATA(i->src(0)).offset;
2434 assert(offset < 0x80000 && offset >= -0x80000);
2435 code[0] |= (offset & 1) << 31;
2436 code[1] |= (offset & 0xffffe) >> 1;
2437 } else {
2438 srcAddr32(i->src(0), 31);
2439 }
2440
2441 if (i->getIndirect(0, 0)) {
2442 srcId(i->getIndirect(0, 0), 10);
2443 if (i->getIndirect(0, 0)->reg.size == 8)
2444 code[1] |= 1 << 19;
2445 } else {
2446 code[0] |= 255 << 10;
2447 }
2448 }
2449
2450 void
emitCCTL(const Instruction * i)2451 CodeEmitterGK110::emitCCTL(const Instruction *i)
2452 {
2453 int32_t offset = SDATA(i->src(0)).offset;
2454
2455 code[0] = 0x00000002 | (i->subOp << 2);
2456
2457 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2458 code[1] = 0x7b000000;
2459 } else {
2460 code[1] = 0x7c000000;
2461 offset &= 0xffffff;
2462 }
2463 code[0] |= offset << 23;
2464 code[1] |= offset >> 9;
2465
2466 if (uses64bitAddress(i))
2467 code[1] |= 1 << 23;
2468 srcId(i->src(0).getIndirect(0), 10);
2469
2470 emitPredicate(i);
2471 }
2472
2473 bool
emitInstruction(Instruction * insn)2474 CodeEmitterGK110::emitInstruction(Instruction *insn)
2475 {
2476 const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
2477
2478 if (insn->encSize != 8) {
2479 ERROR("skipping unencodable instruction: ");
2480 insn->print();
2481 return false;
2482 } else
2483 if (codeSize + size > codeSizeLimit) {
2484 ERROR("code emitter output buffer too small\n");
2485 return false;
2486 }
2487
2488 if (writeIssueDelays) {
2489 int id = (codeSize & 0x3f) / 8 - 1;
2490 if (id < 0) {
2491 id += 1;
2492 code[0] = 0x00000000; // cf issue delay "instruction"
2493 code[1] = 0x08000000;
2494 code += 2;
2495 codeSize += 8;
2496 }
2497 uint32_t *data = code - (id * 2 + 2);
2498
2499 switch (id) {
2500 case 0: data[0] |= insn->sched << 2; break;
2501 case 1: data[0] |= insn->sched << 10; break;
2502 case 2: data[0] |= insn->sched << 18; break;
2503 case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
2504 case 4: data[1] |= insn->sched << 2; break;
2505 case 5: data[1] |= insn->sched << 10; break;
2506 case 6: data[1] |= insn->sched << 18; break;
2507 default:
2508 assert(0);
2509 break;
2510 }
2511 }
2512
2513 // assert that instructions with multiple defs don't corrupt registers
2514 for (int d = 0; insn->defExists(d); ++d)
2515 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2516
2517 switch (insn->op) {
2518 case OP_MOV:
2519 case OP_RDSV:
2520 emitMOV(insn);
2521 break;
2522 case OP_NOP:
2523 break;
2524 case OP_LOAD:
2525 emitLOAD(insn);
2526 break;
2527 case OP_STORE:
2528 emitSTORE(insn);
2529 break;
2530 case OP_LINTERP:
2531 case OP_PINTERP:
2532 emitINTERP(insn);
2533 break;
2534 case OP_VFETCH:
2535 emitVFETCH(insn);
2536 break;
2537 case OP_EXPORT:
2538 emitEXPORT(insn);
2539 break;
2540 case OP_AFETCH:
2541 emitAFETCH(insn);
2542 break;
2543 case OP_PFETCH:
2544 emitPFETCH(insn);
2545 break;
2546 case OP_EMIT:
2547 case OP_RESTART:
2548 emitOUT(insn);
2549 break;
2550 case OP_ADD:
2551 case OP_SUB:
2552 if (insn->dType == TYPE_F64)
2553 emitDADD(insn);
2554 else if (isFloatType(insn->dType))
2555 emitFADD(insn);
2556 else
2557 emitUADD(insn);
2558 break;
2559 case OP_MUL:
2560 if (insn->dType == TYPE_F64)
2561 emitDMUL(insn);
2562 else if (isFloatType(insn->dType))
2563 emitFMUL(insn);
2564 else
2565 emitIMUL(insn);
2566 break;
2567 case OP_MAD:
2568 case OP_FMA:
2569 if (insn->dType == TYPE_F64)
2570 emitDMAD(insn);
2571 else if (isFloatType(insn->dType))
2572 emitFMAD(insn);
2573 else
2574 emitIMAD(insn);
2575 break;
2576 case OP_MADSP:
2577 emitMADSP(insn);
2578 break;
2579 case OP_SAD:
2580 emitISAD(insn);
2581 break;
2582 case OP_SHLADD:
2583 emitSHLADD(insn);
2584 break;
2585 case OP_NOT:
2586 emitNOT(insn);
2587 break;
2588 case OP_AND:
2589 emitLogicOp(insn, 0);
2590 break;
2591 case OP_OR:
2592 emitLogicOp(insn, 1);
2593 break;
2594 case OP_XOR:
2595 emitLogicOp(insn, 2);
2596 break;
2597 case OP_SHL:
2598 case OP_SHR:
2599 if (typeSizeof(insn->sType) == 8)
2600 emitShift64(insn);
2601 else
2602 emitShift(insn);
2603 break;
2604 case OP_SET:
2605 case OP_SET_AND:
2606 case OP_SET_OR:
2607 case OP_SET_XOR:
2608 emitSET(insn->asCmp());
2609 break;
2610 case OP_SELP:
2611 emitSELP(insn);
2612 break;
2613 case OP_SLCT:
2614 emitSLCT(insn->asCmp());
2615 break;
2616 case OP_MIN:
2617 case OP_MAX:
2618 emitMINMAX(insn);
2619 break;
2620 case OP_ABS:
2621 case OP_NEG:
2622 case OP_CEIL:
2623 case OP_FLOOR:
2624 case OP_TRUNC:
2625 case OP_SAT:
2626 emitCVT(insn);
2627 break;
2628 case OP_CVT:
2629 if (insn->def(0).getFile() == FILE_PREDICATE ||
2630 insn->src(0).getFile() == FILE_PREDICATE)
2631 emitMOV(insn);
2632 else
2633 emitCVT(insn);
2634 break;
2635 case OP_RSQ:
2636 emitSFnOp(insn, 5 + 2 * insn->subOp);
2637 break;
2638 case OP_RCP:
2639 emitSFnOp(insn, 4 + 2 * insn->subOp);
2640 break;
2641 case OP_LG2:
2642 emitSFnOp(insn, 3);
2643 break;
2644 case OP_EX2:
2645 emitSFnOp(insn, 2);
2646 break;
2647 case OP_SIN:
2648 emitSFnOp(insn, 1);
2649 break;
2650 case OP_COS:
2651 emitSFnOp(insn, 0);
2652 break;
2653 case OP_PRESIN:
2654 case OP_PREEX2:
2655 emitPreOp(insn);
2656 break;
2657 case OP_TEX:
2658 case OP_TXB:
2659 case OP_TXL:
2660 case OP_TXD:
2661 case OP_TXF:
2662 case OP_TXG:
2663 case OP_TXLQ:
2664 emitTEX(insn->asTex());
2665 break;
2666 case OP_TXQ:
2667 emitTXQ(insn->asTex());
2668 break;
2669 case OP_TEXBAR:
2670 emitTEXBAR(insn);
2671 break;
2672 case OP_PIXLD:
2673 emitPIXLD(insn);
2674 break;
2675 case OP_BRA:
2676 case OP_CALL:
2677 case OP_PRERET:
2678 case OP_RET:
2679 case OP_DISCARD:
2680 case OP_EXIT:
2681 case OP_PRECONT:
2682 case OP_CONT:
2683 case OP_PREBREAK:
2684 case OP_BREAK:
2685 case OP_JOINAT:
2686 case OP_BRKPT:
2687 case OP_QUADON:
2688 case OP_QUADPOP:
2689 emitFlow(insn);
2690 break;
2691 case OP_QUADOP:
2692 emitQUADOP(insn, insn->subOp, insn->lanes);
2693 break;
2694 case OP_DFDX:
2695 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2696 break;
2697 case OP_DFDY:
2698 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2699 break;
2700 case OP_POPCNT:
2701 emitPOPC(insn);
2702 break;
2703 case OP_INSBF:
2704 emitINSBF(insn);
2705 break;
2706 case OP_EXTBF:
2707 emitEXTBF(insn);
2708 break;
2709 case OP_BFIND:
2710 emitBFIND(insn);
2711 break;
2712 case OP_PERMT:
2713 emitPERMT(insn);
2714 break;
2715 case OP_JOIN:
2716 emitNOP(insn);
2717 insn->join = 1;
2718 break;
2719 case OP_BAR:
2720 emitBAR(insn);
2721 break;
2722 case OP_MEMBAR:
2723 emitMEMBAR(insn);
2724 break;
2725 case OP_ATOM:
2726 emitATOM(insn);
2727 break;
2728 case OP_CCTL:
2729 emitCCTL(insn);
2730 break;
2731 case OP_SHFL:
2732 emitSHFL(insn);
2733 break;
2734 case OP_VOTE:
2735 emitVOTE(insn);
2736 break;
2737 case OP_SULDB:
2738 emitSULDGB(insn->asTex());
2739 break;
2740 case OP_SUSTB:
2741 case OP_SUSTP:
2742 emitSUSTGx(insn->asTex());
2743 break;
2744 case OP_SUBFM:
2745 case OP_SUCLAMP:
2746 case OP_SUEAU:
2747 emitSUCalc(insn);
2748 break;
2749 case OP_VSHL:
2750 emitVSHL(insn);
2751 break;
2752 case OP_PHI:
2753 case OP_UNION:
2754 case OP_CONSTRAINT:
2755 ERROR("operation should have been eliminated");
2756 return false;
2757 case OP_EXP:
2758 case OP_LOG:
2759 case OP_SQRT:
2760 case OP_POW:
2761 ERROR("operation should have been lowered\n");
2762 return false;
2763 default:
2764 ERROR("unknown op: %u\n", insn->op);
2765 return false;
2766 }
2767
2768 if (insn->join)
2769 code[0] |= 1 << 22;
2770
2771 code += 2;
2772 codeSize += 8;
2773 return true;
2774 }
2775
2776 uint32_t
getMinEncodingSize(const Instruction * i) const2777 CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
2778 {
2779 // No more short instruction encodings.
2780 return 8;
2781 }
2782
2783 void
prepareEmission(Function * func)2784 CodeEmitterGK110::prepareEmission(Function *func)
2785 {
2786 const Target *targ = func->getProgram()->getTarget();
2787
2788 CodeEmitter::prepareEmission(func);
2789
2790 if (targ->hasSWSched)
2791 calculateSchedDataNVC0(targ, func);
2792 }
2793
CodeEmitterGK110(const TargetNVC0 * target,Program::Type type)2794 CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target, Program::Type type)
2795 : CodeEmitter(target),
2796 targNVC0(target),
2797 progType(type),
2798 writeIssueDelays(target->hasSWSched)
2799 {
2800 code = NULL;
2801 codeSize = codeSizeLimit = 0;
2802 relocInfo = NULL;
2803 }
2804
2805 CodeEmitter *
createCodeEmitterGK110(Program::Type type)2806 TargetNVC0::createCodeEmitterGK110(Program::Type type)
2807 {
2808 CodeEmitterGK110 *emit = new CodeEmitterGK110(this, type);
2809 return emit;
2810 }
2811
2812 } // namespace nv50_ir
2813