1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target_nv50.h"
25
26 namespace nv50_ir {
27
28 #define NV50_OP_ENC_LONG 0
29 #define NV50_OP_ENC_SHORT 1
30 #define NV50_OP_ENC_IMM 2
31 #define NV50_OP_ENC_LONG_ALT 3
32
33 class CodeEmitterNV50 : public CodeEmitter
34 {
35 public:
36 CodeEmitterNV50(Program::Type, const TargetNV50 *);
37
38 virtual bool emitInstruction(Instruction *);
39
40 virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
42 virtual void prepareEmission(Function *);
43
44 private:
45 Program::Type progType;
46
47 const TargetNV50 *targNV50;
48
49 private:
50 inline void defId(const ValueDef&, const int pos);
51 inline void srcId(const ValueRef&, const int pos);
52 inline void srcId(const ValueRef *, const int pos);
53
54 inline void srcAddr16(const ValueRef&, bool adj, const int pos);
55 inline void srcAddr8(const ValueRef&, const int pos);
56
57 void emitFlagsRd(const Instruction *);
58 void emitFlagsWr(const Instruction *);
59
60 void emitCondCode(CondCode cc, DataType ty, int pos);
61
62 inline void setARegBits(unsigned int);
63
64 void setAReg16(const Instruction *, int s);
65 void setImmediate(const Instruction *, int s);
66
67 void setDst(const Value *);
68 void setDst(const Instruction *, int d);
69 void setSrcFileBits(const Instruction *, int enc);
70 void setSrc(const Instruction *, unsigned int s, int slot);
71
72 void emitForm_MAD(const Instruction *);
73 void emitForm_ADD(const Instruction *);
74 void emitForm_MUL(const Instruction *);
75 void emitForm_IMM(const Instruction *);
76
77 void emitLoadStoreSizeLG(DataType ty, int pos);
78 void emitLoadStoreSizeCS(DataType ty);
79
80 void roundMode_MAD(const Instruction *);
81 void roundMode_CVT(RoundMode);
82
83 void emitMNeg12(const Instruction *);
84
85 void emitLOAD(const Instruction *);
86 void emitSTORE(const Instruction *);
87 void emitMOV(const Instruction *);
88 void emitRDSV(const Instruction *);
89 void emitNOP();
90 void emitINTERP(const Instruction *);
91 void emitPFETCH(const Instruction *);
92 void emitOUT(const Instruction *);
93
94 void emitUADD(const Instruction *);
95 void emitAADD(const Instruction *);
96 void emitFADD(const Instruction *);
97 void emitDADD(const Instruction *);
98 void emitIMUL(const Instruction *);
99 void emitFMUL(const Instruction *);
100 void emitDMUL(const Instruction *);
101 void emitFMAD(const Instruction *);
102 void emitDMAD(const Instruction *);
103 void emitIMAD(const Instruction *);
104 void emitISAD(const Instruction *);
105
106 void emitMINMAX(const Instruction *);
107
108 void emitPreOp(const Instruction *);
109 void emitSFnOp(const Instruction *, uint8_t subOp);
110
111 void emitShift(const Instruction *);
112 void emitARL(const Instruction *, unsigned int shl);
113 void emitLogicOp(const Instruction *);
114 void emitNOT(const Instruction *);
115
116 void emitCVT(const Instruction *);
117 void emitSET(const Instruction *);
118
119 void emitTEX(const TexInstruction *);
120 void emitTXQ(const TexInstruction *);
121 void emitTEXPREP(const TexInstruction *);
122
123 void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
124
125 void emitFlow(const Instruction *, uint8_t flowOp);
126 void emitPRERETEmu(const FlowInstruction *);
127 void emitBAR(const Instruction *);
128
129 void emitATOM(const Instruction *);
130 };
131
132 #define SDATA(a) ((a).rep()->reg.data)
133 #define DDATA(a) ((a).rep()->reg.data)
134
srcId(const ValueRef & src,const int pos)135 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
136 {
137 assert(src.get());
138 code[pos / 32] |= SDATA(src).id << (pos % 32);
139 }
140
srcId(const ValueRef * src,const int pos)141 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
142 {
143 assert(src->get());
144 code[pos / 32] |= SDATA(*src).id << (pos % 32);
145 }
146
srcAddr16(const ValueRef & src,bool adj,const int pos)147 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
148 {
149 assert(src.get());
150
151 int32_t offset = SDATA(src).offset;
152
153 assert(!adj || src.get()->reg.size <= 4);
154 if (adj)
155 offset /= src.get()->reg.size;
156
157 assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
158
159 if (offset < 0)
160 offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
161
162 code[pos / 32] |= offset << (pos % 32);
163 }
164
srcAddr8(const ValueRef & src,const int pos)165 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
166 {
167 assert(src.get());
168
169 uint32_t offset = SDATA(src).offset;
170
171 assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
172
173 code[pos / 32] |= (offset >> 2) << (pos % 32);
174 }
175
defId(const ValueDef & def,const int pos)176 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
177 {
178 assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
179
180 code[pos / 32] |= DDATA(def).id << (pos % 32);
181 }
182
183 void
roundMode_MAD(const Instruction * insn)184 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
185 {
186 switch (insn->rnd) {
187 case ROUND_M: code[1] |= 1 << 22; break;
188 case ROUND_P: code[1] |= 2 << 22; break;
189 case ROUND_Z: code[1] |= 3 << 22; break;
190 default:
191 assert(insn->rnd == ROUND_N);
192 break;
193 }
194 }
195
196 void
emitMNeg12(const Instruction * i)197 CodeEmitterNV50::emitMNeg12(const Instruction *i)
198 {
199 code[1] |= i->src(0).mod.neg() << 26;
200 code[1] |= i->src(1).mod.neg() << 27;
201 }
202
emitCondCode(CondCode cc,DataType ty,int pos)203 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
204 {
205 uint8_t enc;
206
207 assert(pos >= 32 || pos <= 27);
208
209 switch (cc) {
210 case CC_LT: enc = 0x1; break;
211 case CC_LTU: enc = 0x9; break;
212 case CC_EQ: enc = 0x2; break;
213 case CC_EQU: enc = 0xa; break;
214 case CC_LE: enc = 0x3; break;
215 case CC_LEU: enc = 0xb; break;
216 case CC_GT: enc = 0x4; break;
217 case CC_GTU: enc = 0xc; break;
218 case CC_NE: enc = 0x5; break;
219 case CC_NEU: enc = 0xd; break;
220 case CC_GE: enc = 0x6; break;
221 case CC_GEU: enc = 0xe; break;
222 case CC_TR: enc = 0xf; break;
223 case CC_FL: enc = 0x0; break;
224
225 case CC_O: enc = 0x10; break;
226 case CC_C: enc = 0x11; break;
227 case CC_A: enc = 0x12; break;
228 case CC_S: enc = 0x13; break;
229 case CC_NS: enc = 0x1c; break;
230 case CC_NA: enc = 0x1d; break;
231 case CC_NC: enc = 0x1e; break;
232 case CC_NO: enc = 0x1f; break;
233
234 default:
235 enc = 0;
236 assert(!"invalid condition code");
237 break;
238 }
239 if (ty != TYPE_NONE && !isFloatType(ty))
240 enc &= ~0x8; // unordered only exists for float types
241
242 code[pos / 32] |= enc << (pos % 32);
243 }
244
245 void
emitFlagsRd(const Instruction * i)246 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
247 {
248 int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
249
250 assert(!(code[1] & 0x00003f80));
251
252 if (s >= 0) {
253 assert(i->getSrc(s)->reg.file == FILE_FLAGS);
254 emitCondCode(i->cc, TYPE_NONE, 32 + 7);
255 srcId(i->src(s), 32 + 12);
256 } else {
257 code[1] |= 0x0780;
258 }
259 }
260
261 void
emitFlagsWr(const Instruction * i)262 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
263 {
264 assert(!(code[1] & 0x70));
265
266 int flagsDef = i->flagsDef;
267
268 // find flags definition and check that it is the last def
269 if (flagsDef < 0) {
270 for (int d = 0; i->defExists(d); ++d)
271 if (i->def(d).getFile() == FILE_FLAGS)
272 flagsDef = d;
273 if (flagsDef >= 0 && false) // TODO: enforce use of flagsDef at some point
274 WARN("Instruction::flagsDef was not set properly\n");
275 }
276 if (flagsDef == 0 && i->defExists(1))
277 WARN("flags def should not be the primary definition\n");
278
279 if (flagsDef >= 0)
280 code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
281
282 }
283
284 void
setARegBits(unsigned int u)285 CodeEmitterNV50::setARegBits(unsigned int u)
286 {
287 code[0] |= (u & 3) << 26;
288 code[1] |= (u & 4);
289 }
290
291 void
setAReg16(const Instruction * i,int s)292 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
293 {
294 if (i->srcExists(s)) {
295 s = i->src(s).indirect[0];
296 if (s >= 0)
297 setARegBits(SDATA(i->src(s)).id + 1);
298 }
299 }
300
301 void
setImmediate(const Instruction * i,int s)302 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
303 {
304 const ImmediateValue *imm = i->src(s).get()->asImm();
305 assert(imm);
306
307 uint32_t u = imm->reg.data.u32;
308
309 if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
310 u = ~u;
311
312 code[1] |= 3;
313 code[0] |= (u & 0x3f) << 16;
314 code[1] |= (u >> 6) << 2;
315 }
316
317 void
setDst(const Value * dst)318 CodeEmitterNV50::setDst(const Value *dst)
319 {
320 const Storage *reg = &dst->join->reg;
321
322 assert(reg->file != FILE_ADDRESS);
323
324 if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
325 code[0] |= (127 << 2) | 1;
326 code[1] |= 8;
327 } else {
328 int id;
329 if (reg->file == FILE_SHADER_OUTPUT) {
330 code[1] |= 8;
331 id = reg->data.offset / 4;
332 } else {
333 id = reg->data.id;
334 }
335 code[0] |= id << 2;
336 }
337 }
338
339 void
setDst(const Instruction * i,int d)340 CodeEmitterNV50::setDst(const Instruction *i, int d)
341 {
342 if (i->defExists(d)) {
343 setDst(i->getDef(d));
344 } else
345 if (!d) {
346 code[0] |= 0x01fc; // bit bucket
347 code[1] |= 0x0008;
348 }
349 }
350
351 // 3 * 2 bits:
352 // 0: r
353 // 1: a/s
354 // 2: c
355 // 3: i
356 void
setSrcFileBits(const Instruction * i,int enc)357 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
358 {
359 uint8_t mode = 0;
360
361 for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
362 switch (i->src(s).getFile()) {
363 case FILE_GPR:
364 break;
365 case FILE_MEMORY_SHARED:
366 case FILE_SHADER_INPUT:
367 mode |= 1 << (s * 2);
368 break;
369 case FILE_MEMORY_CONST:
370 mode |= 2 << (s * 2);
371 break;
372 case FILE_IMMEDIATE:
373 mode |= 3 << (s * 2);
374 break;
375 default:
376 ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
377 assert(0);
378 break;
379 }
380 }
381 switch (mode) {
382 case 0x00: // rrr
383 break;
384 case 0x01: // arr/grr
385 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
386 code[0] |= 0x01800000;
387 if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
388 code[1] |= 0x00200000;
389 } else {
390 if (enc == NV50_OP_ENC_SHORT)
391 code[0] |= 0x01000000;
392 else
393 code[1] |= 0x00200000;
394 }
395 break;
396 case 0x03: // irr
397 assert(i->op == OP_MOV);
398 return;
399 case 0x0c: // rir
400 break;
401 case 0x0d: // gir
402 assert(progType == Program::TYPE_GEOMETRY ||
403 progType == Program::TYPE_COMPUTE);
404 code[0] |= 0x01000000;
405 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
406 int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
407 assert(reg < 3);
408 code[0] |= (reg + 1) << 26;
409 }
410 break;
411 case 0x08: // rcr
412 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
413 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
414 break;
415 case 0x09: // acr/gcr
416 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
417 code[0] |= 0x01800000;
418 } else {
419 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
420 code[1] |= 0x00200000;
421 }
422 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
423 break;
424 case 0x20: // rrc
425 code[0] |= 0x01000000;
426 code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
427 break;
428 case 0x21: // arc
429 code[0] |= 0x01000000;
430 code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
431 assert(progType != Program::TYPE_GEOMETRY);
432 break;
433 default:
434 ERROR("not encodable: %x\n", mode);
435 assert(0);
436 break;
437 }
438 if (progType != Program::TYPE_COMPUTE)
439 return;
440
441 if ((mode & 3) == 1) {
442 const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
443
444 switch (i->sType) {
445 case TYPE_U8:
446 break;
447 case TYPE_U16:
448 code[0] |= 1 << pos;
449 break;
450 case TYPE_S16:
451 code[0] |= 2 << pos;
452 break;
453 default:
454 code[0] |= 3 << pos;
455 assert(i->getSrc(0)->reg.size == 4);
456 break;
457 }
458 }
459 }
460
461 void
setSrc(const Instruction * i,unsigned int s,int slot)462 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
463 {
464 if (Target::operationSrcNr[i->op] <= s)
465 return;
466 const Storage *reg = &i->src(s).rep()->reg;
467
468 unsigned int id = (reg->file == FILE_GPR) ?
469 reg->data.id :
470 reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
471
472 switch (slot) {
473 case 0: code[0] |= id << 9; break;
474 case 1: code[0] |= id << 16; break;
475 case 2: code[1] |= id << 14; break;
476 default:
477 assert(0);
478 break;
479 }
480 }
481
482 // the default form:
483 // - long instruction
484 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
485 // - address & flags
486 void
emitForm_MAD(const Instruction * i)487 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
488 {
489 assert(i->encSize == 8);
490 code[0] |= 1;
491
492 emitFlagsRd(i);
493 emitFlagsWr(i);
494
495 setDst(i, 0);
496
497 setSrcFileBits(i, NV50_OP_ENC_LONG);
498 setSrc(i, 0, 0);
499 setSrc(i, 1, 1);
500 setSrc(i, 2, 2);
501
502 if (i->getIndirect(0, 0)) {
503 assert(!i->srcExists(1) || !i->getIndirect(1, 0));
504 assert(!i->srcExists(2) || !i->getIndirect(2, 0));
505 setAReg16(i, 0);
506 } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
507 assert(!i->srcExists(2) || !i->getIndirect(2, 0));
508 setAReg16(i, 1);
509 } else {
510 setAReg16(i, 2);
511 }
512 }
513
514 // like default form, but 2nd source in slot 2, and no 3rd source
515 void
emitForm_ADD(const Instruction * i)516 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
517 {
518 assert(i->encSize == 8);
519 code[0] |= 1;
520
521 emitFlagsRd(i);
522 emitFlagsWr(i);
523
524 setDst(i, 0);
525
526 setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
527 setSrc(i, 0, 0);
528 if (i->predSrc != 1)
529 setSrc(i, 1, 2);
530
531 if (i->getIndirect(0, 0)) {
532 assert(!i->getIndirect(1, 0));
533 setAReg16(i, 0);
534 } else {
535 setAReg16(i, 1);
536 }
537 }
538
539 // default short form (rr, ar, rc, gr)
540 void
emitForm_MUL(const Instruction * i)541 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
542 {
543 assert(i->encSize == 4 && !(code[0] & 1));
544 assert(i->defExists(0));
545 assert(!i->getPredicate());
546
547 setDst(i, 0);
548
549 setSrcFileBits(i, NV50_OP_ENC_SHORT);
550 setSrc(i, 0, 0);
551 setSrc(i, 1, 1);
552 }
553
554 // usual immediate form
555 // - 1 to 3 sources where second is immediate (rir, gir)
556 // - no address or predicate possible
557 void
emitForm_IMM(const Instruction * i)558 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
559 {
560 assert(i->encSize == 8);
561 code[0] |= 1;
562
563 assert(i->defExists(0) && i->srcExists(0));
564
565 setDst(i, 0);
566
567 setSrcFileBits(i, NV50_OP_ENC_IMM);
568 if (Target::operationSrcNr[i->op] > 1) {
569 setSrc(i, 0, 0);
570 setImmediate(i, 1);
571 // If there is another source, it has to be the same as the dest reg.
572 } else {
573 setImmediate(i, 0);
574 }
575 }
576
577 void
emitLoadStoreSizeLG(DataType ty,int pos)578 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
579 {
580 uint8_t enc;
581
582 switch (ty) {
583 case TYPE_F32: // fall through
584 case TYPE_S32: // fall through
585 case TYPE_U32: enc = 0x6; break;
586 case TYPE_B128: enc = 0x5; break;
587 case TYPE_F64: // fall through
588 case TYPE_S64: // fall through
589 case TYPE_U64: enc = 0x4; break;
590 case TYPE_S16: enc = 0x3; break;
591 case TYPE_U16: enc = 0x2; break;
592 case TYPE_S8: enc = 0x1; break;
593 case TYPE_U8: enc = 0x0; break;
594 default:
595 enc = 0;
596 assert(!"invalid load/store type");
597 break;
598 }
599 code[pos / 32] |= enc << (pos % 32);
600 }
601
602 void
emitLoadStoreSizeCS(DataType ty)603 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
604 {
605 switch (ty) {
606 case TYPE_U8: break;
607 case TYPE_U16: code[1] |= 0x4000; break;
608 case TYPE_S16: code[1] |= 0x8000; break;
609 case TYPE_F32:
610 case TYPE_S32:
611 case TYPE_U32: code[1] |= 0xc000; break;
612 default:
613 assert(0);
614 break;
615 }
616 }
617
618 void
emitLOAD(const Instruction * i)619 CodeEmitterNV50::emitLOAD(const Instruction *i)
620 {
621 DataFile sf = i->src(0).getFile();
622 ASSERTED int32_t offset = i->getSrc(0)->reg.data.offset;
623
624 switch (sf) {
625 case FILE_SHADER_INPUT:
626 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
627 code[0] = 0x11800001;
628 else
629 // use 'mov' where we can
630 code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
631 code[1] = 0x00200000 | (i->lanes << 14);
632 if (typeSizeof(i->dType) == 4)
633 code[1] |= 0x04000000;
634 break;
635 case FILE_MEMORY_SHARED:
636 if (targ->getChipset() >= 0x84) {
637 assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
638 code[0] = 0x10000001;
639 code[1] = 0x40000000;
640
641 if (typeSizeof(i->dType) == 4)
642 code[1] |= 0x04000000;
643
644 emitLoadStoreSizeCS(i->sType);
645
646 if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
647 code[1] |= 0x00800000;
648 } else {
649 assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
650 code[0] = 0x10000001;
651 code[1] = 0x00200000 | (i->lanes << 14);
652 emitLoadStoreSizeCS(i->sType);
653 }
654 break;
655 case FILE_MEMORY_CONST:
656 code[0] = 0x10000001;
657 code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
658 if (typeSizeof(i->dType) == 4)
659 code[1] |= 0x04000000;
660 emitLoadStoreSizeCS(i->sType);
661 break;
662 case FILE_MEMORY_LOCAL:
663 code[0] = 0xd0000001;
664 code[1] = 0x40000000;
665 break;
666 case FILE_MEMORY_GLOBAL:
667 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
668 code[1] = 0x80000000;
669 break;
670 default:
671 assert(!"invalid load source file");
672 break;
673 }
674 if (sf == FILE_MEMORY_LOCAL ||
675 sf == FILE_MEMORY_GLOBAL)
676 emitLoadStoreSizeLG(i->sType, 21 + 32);
677
678 setDst(i, 0);
679
680 emitFlagsRd(i);
681 emitFlagsWr(i);
682
683 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
684 srcId(*i->src(0).getIndirect(0), 9);
685 } else {
686 setAReg16(i, 0);
687 srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
688 }
689 }
690
691 void
emitSTORE(const Instruction * i)692 CodeEmitterNV50::emitSTORE(const Instruction *i)
693 {
694 DataFile f = i->getSrc(0)->reg.file;
695 int32_t offset = i->getSrc(0)->reg.data.offset;
696
697 switch (f) {
698 case FILE_SHADER_OUTPUT:
699 code[0] = 0x00000001 | ((offset >> 2) << 9);
700 code[1] = 0x80c00000;
701 srcId(i->src(1), 32 + 14);
702 break;
703 case FILE_MEMORY_GLOBAL:
704 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
705 code[1] = 0xa0000000;
706 emitLoadStoreSizeLG(i->dType, 21 + 32);
707 srcId(i->src(1), 2);
708 break;
709 case FILE_MEMORY_LOCAL:
710 code[0] = 0xd0000001;
711 code[1] = 0x60000000;
712 emitLoadStoreSizeLG(i->dType, 21 + 32);
713 srcId(i->src(1), 2);
714 break;
715 case FILE_MEMORY_SHARED:
716 code[0] = 0x00000001;
717 code[1] = 0xe0000000;
718 if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
719 code[1] |= 0x00800000;
720 switch (typeSizeof(i->dType)) {
721 case 1:
722 code[0] |= offset << 9;
723 code[1] |= 0x00400000;
724 break;
725 case 2:
726 code[0] |= (offset >> 1) << 9;
727 break;
728 case 4:
729 code[0] |= (offset >> 2) << 9;
730 code[1] |= 0x04200000;
731 break;
732 default:
733 assert(0);
734 break;
735 }
736 srcId(i->src(1), 32 + 14);
737 break;
738 default:
739 assert(!"invalid store destination file");
740 break;
741 }
742
743 if (f == FILE_MEMORY_GLOBAL)
744 srcId(*i->src(0).getIndirect(0), 9);
745 else
746 setAReg16(i, 0);
747
748 if (f == FILE_MEMORY_LOCAL)
749 srcAddr16(i->src(0), false, 9);
750
751 emitFlagsRd(i);
752 }
753
754 void
emitMOV(const Instruction * i)755 CodeEmitterNV50::emitMOV(const Instruction *i)
756 {
757 DataFile sf = i->getSrc(0)->reg.file;
758 DataFile df = i->getDef(0)->reg.file;
759
760 assert(sf == FILE_GPR || df == FILE_GPR);
761
762 if (sf == FILE_FLAGS) {
763 assert(i->flagsSrc >= 0);
764 code[0] = 0x00000001;
765 code[1] = 0x20000000;
766 defId(i->def(0), 2);
767 emitFlagsRd(i);
768 } else
769 if (sf == FILE_ADDRESS) {
770 code[0] = 0x00000001;
771 code[1] = 0x40000000;
772 defId(i->def(0), 2);
773 setARegBits(SDATA(i->src(0)).id + 1);
774 emitFlagsRd(i);
775 } else
776 if (df == FILE_FLAGS) {
777 assert(i->flagsDef >= 0);
778 code[0] = 0x00000001;
779 code[1] = 0xa0000000;
780 srcId(i->src(0), 9);
781 emitFlagsRd(i);
782 emitFlagsWr(i);
783 } else
784 if (sf == FILE_IMMEDIATE) {
785 code[0] = 0x10000001;
786 code[1] = 0x00000003;
787 emitForm_IMM(i);
788
789 code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
790 } else {
791 if (i->encSize == 4) {
792 code[0] = 0x10000000;
793 code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
794 defId(i->def(0), 2);
795 } else {
796 code[0] = 0x10000001;
797 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
798 code[1] |= (i->lanes << 14);
799 setDst(i, 0);
800 emitFlagsRd(i);
801 }
802 srcId(i->src(0), 9);
803 }
804 if (df == FILE_SHADER_OUTPUT) {
805 assert(i->encSize == 8);
806 code[1] |= 0x8;
807 }
808 }
809
getSRegEncoding(const ValueRef & ref)810 static inline uint8_t getSRegEncoding(const ValueRef &ref)
811 {
812 switch (SDATA(ref).sv.sv) {
813 case SV_PHYSID: return 0;
814 case SV_CLOCK: return 1;
815 case SV_VERTEX_STRIDE: return 3;
816 // case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index;
817 case SV_SAMPLE_INDEX: return 8;
818 default:
819 assert(!"no sreg for system value");
820 return 0;
821 }
822 }
823
824 void
emitRDSV(const Instruction * i)825 CodeEmitterNV50::emitRDSV(const Instruction *i)
826 {
827 code[0] = 0x00000001;
828 code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
829 defId(i->def(0), 2);
830 emitFlagsRd(i);
831 }
832
833 void
emitNOP()834 CodeEmitterNV50::emitNOP()
835 {
836 code[0] = 0xf0000001;
837 code[1] = 0xe0000000;
838 }
839
840 void
emitQUADOP(const Instruction * i,uint8_t lane,uint8_t quOp)841 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
842 {
843 code[0] = 0xc0000000 | (lane << 16);
844 code[1] = 0x80000000;
845
846 code[0] |= (quOp & 0x03) << 20;
847 code[1] |= (quOp & 0xfc) << 20;
848
849 emitForm_ADD(i);
850
851 if (!i->srcExists(1) || i->predSrc == 1)
852 srcId(i->src(0), 32 + 14);
853 }
854
855 /* NOTE: This returns the base address of a vertex inside the primitive.
856 * src0 is an immediate, the index (not offset) of the vertex
857 * inside the primitive. XXX: signed or unsigned ?
858 * src1 (may be NULL) should use whatever units the hardware requires
859 * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
860 */
861 void
emitPFETCH(const Instruction * i)862 CodeEmitterNV50::emitPFETCH(const Instruction *i)
863 {
864 const uint32_t prim = i->src(0).get()->reg.data.u32;
865 assert(prim <= 127);
866
867 if (i->def(0).getFile() == FILE_ADDRESS) {
868 // shl $aX a[] 0
869 code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
870 code[1] = 0xc0200000;
871 code[0] |= prim << 9;
872 assert(!i->srcExists(1));
873 } else
874 if (i->srcExists(1)) {
875 // ld b32 $rX a[$aX+base]
876 code[0] = 0x00000001;
877 code[1] = 0x04200000 | (0xf << 14);
878 defId(i->def(0), 2);
879 code[0] |= prim << 9;
880 setARegBits(SDATA(i->src(1)).id + 1);
881 } else {
882 // mov b32 $rX a[]
883 code[0] = 0x10000001;
884 code[1] = 0x04200000 | (0xf << 14);
885 defId(i->def(0), 2);
886 code[0] |= prim << 9;
887 }
888 emitFlagsRd(i);
889 }
890
891 void
nv50_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)892 nv50_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
893 {
894 int ipa = entry->ipa;
895 int encSize = entry->reg;
896 int loc = entry->loc;
897
898 if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
899 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
900 if (data.force_persample_interp) {
901 if (encSize == 8)
902 code[loc + 1] |= 1 << 16;
903 else
904 code[loc + 0] |= 1 << 24;
905 } else {
906 if (encSize == 8)
907 code[loc + 1] &= ~(1 << 16);
908 else
909 code[loc + 0] &= ~(1 << 24);
910 }
911 }
912 }
913
914 void
emitINTERP(const Instruction * i)915 CodeEmitterNV50::emitINTERP(const Instruction *i)
916 {
917 code[0] = 0x80000000;
918
919 defId(i->def(0), 2);
920 srcAddr8(i->src(0), 16);
921 setAReg16(i, 0);
922
923 if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
924 code[0] |= 1 << 8;
925 } else {
926 if (i->op == OP_PINTERP) {
927 code[0] |= 1 << 25;
928 srcId(i->src(1), 9);
929 }
930 if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
931 code[0] |= 1 << 24;
932 }
933
934 if (i->encSize == 8) {
935 if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
936 code[1] = 4 << 16;
937 else
938 code[1] = (code[0] & (3 << 24)) >> (24 - 16);
939 code[0] &= ~0x03000000;
940 code[0] |= 1;
941 emitFlagsRd(i);
942 }
943
944 addInterp(i->ipa, i->encSize, nv50_interpApply);
945 }
946
947 void
emitMINMAX(const Instruction * i)948 CodeEmitterNV50::emitMINMAX(const Instruction *i)
949 {
950 if (i->dType == TYPE_F64) {
951 code[0] = 0xe0000000;
952 code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
953 } else {
954 code[0] = 0x30000000;
955 code[1] = 0x80000000;
956 if (i->op == OP_MIN)
957 code[1] |= 0x20000000;
958
959 switch (i->dType) {
960 case TYPE_F32: code[0] |= 0x80000000; break;
961 case TYPE_S32: code[1] |= 0x8c000000; break;
962 case TYPE_U32: code[1] |= 0x84000000; break;
963 case TYPE_S16: code[1] |= 0x80000000; break;
964 case TYPE_U16: break;
965 default:
966 assert(0);
967 break;
968 }
969 }
970
971 code[1] |= i->src(0).mod.abs() << 20;
972 code[1] |= i->src(0).mod.neg() << 26;
973 code[1] |= i->src(1).mod.abs() << 19;
974 code[1] |= i->src(1).mod.neg() << 27;
975
976 emitForm_MAD(i);
977 }
978
979 void
emitFMAD(const Instruction * i)980 CodeEmitterNV50::emitFMAD(const Instruction *i)
981 {
982 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
983 const int neg_add = i->src(2).mod.neg();
984
985 code[0] = 0xe0000000;
986
987 if (i->src(1).getFile() == FILE_IMMEDIATE) {
988 code[1] = 0;
989 emitForm_IMM(i);
990 code[0] |= neg_mul << 15;
991 code[0] |= neg_add << 22;
992 if (i->saturate)
993 code[0] |= 1 << 8;
994 } else
995 if (i->encSize == 4) {
996 emitForm_MUL(i);
997 code[0] |= neg_mul << 15;
998 code[0] |= neg_add << 22;
999 if (i->saturate)
1000 code[0] |= 1 << 8;
1001 } else {
1002 code[1] = neg_mul << 26;
1003 code[1] |= neg_add << 27;
1004 if (i->saturate)
1005 code[1] |= 1 << 29;
1006 emitForm_MAD(i);
1007 }
1008 }
1009
1010 void
emitDMAD(const Instruction * i)1011 CodeEmitterNV50::emitDMAD(const Instruction *i)
1012 {
1013 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1014 const int neg_add = i->src(2).mod.neg();
1015
1016 assert(i->encSize == 8);
1017 assert(!i->saturate);
1018
1019 code[1] = 0x40000000;
1020 code[0] = 0xe0000000;
1021
1022 code[1] |= neg_mul << 26;
1023 code[1] |= neg_add << 27;
1024
1025 roundMode_MAD(i);
1026
1027 emitForm_MAD(i);
1028 }
1029
1030 void
emitFADD(const Instruction * i)1031 CodeEmitterNV50::emitFADD(const Instruction *i)
1032 {
1033 const int neg0 = i->src(0).mod.neg();
1034 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1035
1036 code[0] = 0xb0000000;
1037
1038 assert(!(i->src(0).mod | i->src(1).mod).abs());
1039
1040 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1041 code[1] = 0;
1042 emitForm_IMM(i);
1043 code[0] |= neg0 << 15;
1044 code[0] |= neg1 << 22;
1045 if (i->saturate)
1046 code[0] |= 1 << 8;
1047 } else
1048 if (i->encSize == 8) {
1049 code[1] = 0;
1050 emitForm_ADD(i);
1051 code[1] |= neg0 << 26;
1052 code[1] |= neg1 << 27;
1053 if (i->saturate)
1054 code[1] |= 1 << 29;
1055 } else {
1056 emitForm_MUL(i);
1057 code[0] |= neg0 << 15;
1058 code[0] |= neg1 << 22;
1059 if (i->saturate)
1060 code[0] |= 1 << 8;
1061 }
1062 }
1063
1064 void
emitDADD(const Instruction * i)1065 CodeEmitterNV50::emitDADD(const Instruction *i)
1066 {
1067 const int neg0 = i->src(0).mod.neg();
1068 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1069
1070 assert(!(i->src(0).mod | i->src(1).mod).abs());
1071 assert(!i->saturate);
1072 assert(i->encSize == 8);
1073
1074 code[1] = 0x60000000;
1075 code[0] = 0xe0000000;
1076
1077 emitForm_ADD(i);
1078
1079 code[1] |= neg0 << 26;
1080 code[1] |= neg1 << 27;
1081 }
1082
1083 void
emitUADD(const Instruction * i)1084 CodeEmitterNV50::emitUADD(const Instruction *i)
1085 {
1086 const int neg0 = i->src(0).mod.neg();
1087 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1088
1089 code[0] = 0x20000000;
1090
1091 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1092 code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
1093 code[1] = 0;
1094 emitForm_IMM(i);
1095 } else
1096 if (i->encSize == 8) {
1097 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1098 emitForm_ADD(i);
1099 } else {
1100 code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
1101 emitForm_MUL(i);
1102 }
1103 assert(!(neg0 && neg1));
1104 code[0] |= neg0 << 28;
1105 code[0] |= neg1 << 22;
1106
1107 if (i->flagsSrc >= 0) {
1108 // addc == sub | subr
1109 assert(!(code[0] & 0x10400000) && !i->getPredicate());
1110 code[0] |= 0x10400000;
1111 srcId(i->src(i->flagsSrc), 32 + 12);
1112 }
1113 }
1114
1115 void
emitAADD(const Instruction * i)1116 CodeEmitterNV50::emitAADD(const Instruction *i)
1117 {
1118 const int s = (i->op == OP_MOV) ? 0 : 1;
1119
1120 code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1121 code[1] = 0x20000000;
1122
1123 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1124
1125 emitFlagsRd(i);
1126
1127 if (s && i->srcExists(0))
1128 setARegBits(SDATA(i->src(0)).id + 1);
1129 }
1130
1131 void
emitIMUL(const Instruction * i)1132 CodeEmitterNV50::emitIMUL(const Instruction *i)
1133 {
1134 code[0] = 0x40000000;
1135
1136 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1137 if (i->sType == TYPE_S16)
1138 code[0] |= 0x8100;
1139 code[1] = 0;
1140 emitForm_IMM(i);
1141 } else
1142 if (i->encSize == 8) {
1143 code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1144 emitForm_MAD(i);
1145 } else {
1146 if (i->sType == TYPE_S16)
1147 code[0] |= 0x8100;
1148 emitForm_MUL(i);
1149 }
1150 }
1151
1152 void
emitFMUL(const Instruction * i)1153 CodeEmitterNV50::emitFMUL(const Instruction *i)
1154 {
1155 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1156
1157 code[0] = 0xc0000000;
1158
1159 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1160 code[1] = 0;
1161 emitForm_IMM(i);
1162 if (neg)
1163 code[0] |= 0x8000;
1164 if (i->saturate)
1165 code[0] |= 1 << 8;
1166 } else
1167 if (i->encSize == 8) {
1168 code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1169 if (neg)
1170 code[1] |= 0x08000000;
1171 if (i->saturate)
1172 code[1] |= 1 << 20;
1173 emitForm_MAD(i);
1174 } else {
1175 emitForm_MUL(i);
1176 if (neg)
1177 code[0] |= 0x8000;
1178 if (i->saturate)
1179 code[0] |= 1 << 8;
1180 }
1181 }
1182
1183 void
emitDMUL(const Instruction * i)1184 CodeEmitterNV50::emitDMUL(const Instruction *i)
1185 {
1186 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1187
1188 assert(!i->saturate);
1189 assert(i->encSize == 8);
1190
1191 code[1] = 0x80000000;
1192 code[0] = 0xe0000000;
1193
1194 if (neg)
1195 code[1] |= 0x08000000;
1196
1197 roundMode_CVT(i->rnd);
1198
1199 emitForm_MAD(i);
1200 }
1201
1202 void
emitIMAD(const Instruction * i)1203 CodeEmitterNV50::emitIMAD(const Instruction *i)
1204 {
1205 int mode;
1206 code[0] = 0x60000000;
1207
1208 assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
1209 if (!isSignedType(i->sType))
1210 mode = 0;
1211 else if (i->saturate)
1212 mode = 2;
1213 else
1214 mode = 1;
1215
1216 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1217 code[1] = 0;
1218 emitForm_IMM(i);
1219 code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1220 if (i->flagsSrc >= 0) {
1221 assert(!(code[0] & 0x10400000));
1222 assert(SDATA(i->src(i->flagsSrc)).id == 0);
1223 code[0] |= 0x10400000;
1224 }
1225 } else
1226 if (i->encSize == 4) {
1227 emitForm_MUL(i);
1228 code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1229 if (i->flagsSrc >= 0) {
1230 assert(!(code[0] & 0x10400000));
1231 assert(SDATA(i->src(i->flagsSrc)).id == 0);
1232 code[0] |= 0x10400000;
1233 }
1234 } else {
1235 code[1] = mode << 29;
1236 emitForm_MAD(i);
1237
1238 if (i->flagsSrc >= 0) {
1239 // add with carry from $cX
1240 assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1241 code[1] |= 0xc << 24;
1242 srcId(i->src(i->flagsSrc), 32 + 12);
1243 }
1244 }
1245 }
1246
1247 void
emitISAD(const Instruction * i)1248 CodeEmitterNV50::emitISAD(const Instruction *i)
1249 {
1250 if (i->encSize == 8) {
1251 code[0] = 0x50000000;
1252 switch (i->sType) {
1253 case TYPE_U32: code[1] = 0x04000000; break;
1254 case TYPE_S32: code[1] = 0x0c000000; break;
1255 case TYPE_U16: code[1] = 0x00000000; break;
1256 case TYPE_S16: code[1] = 0x08000000; break;
1257 default:
1258 assert(0);
1259 break;
1260 }
1261 emitForm_MAD(i);
1262 } else {
1263 switch (i->sType) {
1264 case TYPE_U32: code[0] = 0x50008000; break;
1265 case TYPE_S32: code[0] = 0x50008100; break;
1266 case TYPE_U16: code[0] = 0x50000000; break;
1267 case TYPE_S16: code[0] = 0x50000100; break;
1268 default:
1269 assert(0);
1270 break;
1271 }
1272 emitForm_MUL(i);
1273 }
1274 }
1275
1276 static void
alphatestSet(const FixupEntry * entry,uint32_t * code,const FixupData & data)1277 alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1278 {
1279 int loc = entry->loc;
1280 int enc;
1281
1282 switch (data.alphatest) {
1283 case PIPE_FUNC_NEVER: enc = 0x0; break;
1284 case PIPE_FUNC_LESS: enc = 0x1; break;
1285 case PIPE_FUNC_EQUAL: enc = 0x2; break;
1286 case PIPE_FUNC_LEQUAL: enc = 0x3; break;
1287 case PIPE_FUNC_GREATER: enc = 0x4; break;
1288 case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;
1289 case PIPE_FUNC_GEQUAL: enc = 0x6; break;
1290 default:
1291 case PIPE_FUNC_ALWAYS: enc = 0xf; break;
1292 }
1293
1294 code[loc + 1] &= ~(0x1f << 14);
1295 code[loc + 1] |= enc << 14;
1296 }
1297
1298 void
emitSET(const Instruction * i)1299 CodeEmitterNV50::emitSET(const Instruction *i)
1300 {
1301 code[0] = 0x30000000;
1302 code[1] = 0x60000000;
1303
1304 switch (i->sType) {
1305 case TYPE_F64:
1306 code[0] = 0xe0000000;
1307 code[1] = 0xe0000000;
1308 break;
1309 case TYPE_F32: code[0] |= 0x80000000; break;
1310 case TYPE_S32: code[1] |= 0x0c000000; break;
1311 case TYPE_U32: code[1] |= 0x04000000; break;
1312 case TYPE_S16: code[1] |= 0x08000000; break;
1313 case TYPE_U16: break;
1314 default:
1315 assert(0);
1316 break;
1317 }
1318
1319 emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1320
1321 if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1322 if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1323 if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1324 if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1325
1326 emitForm_MAD(i);
1327
1328 if (i->subOp == 1) {
1329 addInterp(0, 0, alphatestSet);
1330 }
1331 }
1332
1333 void
roundMode_CVT(RoundMode rnd)1334 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1335 {
1336 switch (rnd) {
1337 case ROUND_NI: code[1] |= 0x08000000; break;
1338 case ROUND_M: code[1] |= 0x00020000; break;
1339 case ROUND_MI: code[1] |= 0x08020000; break;
1340 case ROUND_P: code[1] |= 0x00040000; break;
1341 case ROUND_PI: code[1] |= 0x08040000; break;
1342 case ROUND_Z: code[1] |= 0x00060000; break;
1343 case ROUND_ZI: code[1] |= 0x08060000; break;
1344 default:
1345 assert(rnd == ROUND_N);
1346 break;
1347 }
1348 }
1349
1350 void
emitCVT(const Instruction * i)1351 CodeEmitterNV50::emitCVT(const Instruction *i)
1352 {
1353 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1354 RoundMode rnd;
1355 DataType dType;
1356
1357 switch (i->op) {
1358 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1359 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1360 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1361 default:
1362 rnd = i->rnd;
1363 break;
1364 }
1365
1366 if (i->op == OP_NEG && i->dType == TYPE_U32)
1367 dType = TYPE_S32;
1368 else
1369 dType = i->dType;
1370
1371 code[0] = 0xa0000000;
1372
1373 switch (dType) {
1374 case TYPE_F64:
1375 switch (i->sType) {
1376 case TYPE_F64: code[1] = 0xc4404000; break;
1377 case TYPE_S64: code[1] = 0x44414000; break;
1378 case TYPE_U64: code[1] = 0x44404000; break;
1379 case TYPE_F32: code[1] = 0xc4400000; break;
1380 case TYPE_S32: code[1] = 0x44410000; break;
1381 case TYPE_U32: code[1] = 0x44400000; break;
1382 default:
1383 assert(0);
1384 break;
1385 }
1386 break;
1387 case TYPE_S64:
1388 switch (i->sType) {
1389 case TYPE_F64: code[1] = 0x8c404000; break;
1390 case TYPE_F32: code[1] = 0x8c400000; break;
1391 default:
1392 assert(0);
1393 break;
1394 }
1395 break;
1396 case TYPE_U64:
1397 switch (i->sType) {
1398 case TYPE_F64: code[1] = 0x84404000; break;
1399 case TYPE_F32: code[1] = 0x84400000; break;
1400 default:
1401 assert(0);
1402 break;
1403 }
1404 break;
1405 case TYPE_F32:
1406 switch (i->sType) {
1407 case TYPE_F64: code[1] = 0xc0404000; break;
1408 case TYPE_S64: code[1] = 0x40414000; break;
1409 case TYPE_U64: code[1] = 0x40404000; break;
1410 case TYPE_F32: code[1] = 0xc4004000; break;
1411 case TYPE_S32: code[1] = 0x44014000; break;
1412 case TYPE_U32: code[1] = 0x44004000; break;
1413 case TYPE_F16: code[1] = 0xc4000000; break;
1414 case TYPE_U16: code[1] = 0x44000000; break;
1415 case TYPE_S16: code[1] = 0x44010000; break;
1416 case TYPE_S8: code[1] = 0x44018000; break;
1417 case TYPE_U8: code[1] = 0x44008000; break;
1418 default:
1419 assert(0);
1420 break;
1421 }
1422 break;
1423 case TYPE_S32:
1424 switch (i->sType) {
1425 case TYPE_F64: code[1] = 0x88404000; break;
1426 case TYPE_F32: code[1] = 0x8c004000; break;
1427 case TYPE_S32: code[1] = 0x0c014000; break;
1428 case TYPE_U32: code[1] = 0x0c004000; break;
1429 case TYPE_F16: code[1] = 0x8c000000; break;
1430 case TYPE_S16: code[1] = 0x0c010000; break;
1431 case TYPE_U16: code[1] = 0x0c000000; break;
1432 case TYPE_S8: code[1] = 0x0c018000; break;
1433 case TYPE_U8: code[1] = 0x0c008000; break;
1434 default:
1435 assert(0);
1436 break;
1437 }
1438 break;
1439 case TYPE_U32:
1440 switch (i->sType) {
1441 case TYPE_F64: code[1] = 0x80404000; break;
1442 case TYPE_F32: code[1] = 0x84004000; break;
1443 case TYPE_S32: code[1] = 0x04014000; break;
1444 case TYPE_U32: code[1] = 0x04004000; break;
1445 case TYPE_F16: code[1] = 0x84000000; break;
1446 case TYPE_S16: code[1] = 0x04010000; break;
1447 case TYPE_U16: code[1] = 0x04000000; break;
1448 case TYPE_S8: code[1] = 0x04018000; break;
1449 case TYPE_U8: code[1] = 0x04008000; break;
1450 default:
1451 assert(0);
1452 break;
1453 }
1454 break;
1455 case TYPE_F16:
1456 switch (i->sType) {
1457 case TYPE_F16: code[1] = 0xc0000000; break;
1458 case TYPE_F32: code[1] = 0xc0004000; break;
1459 default:
1460 assert(0);
1461 break;
1462 }
1463 break;
1464 case TYPE_S16:
1465 switch (i->sType) {
1466 case TYPE_F32: code[1] = 0x88004000; break;
1467 case TYPE_S32: code[1] = 0x08014000; break;
1468 case TYPE_U32: code[1] = 0x08004000; break;
1469 case TYPE_F16: code[1] = 0x88000000; break;
1470 case TYPE_S16: code[1] = 0x08010000; break;
1471 case TYPE_U16: code[1] = 0x08000000; break;
1472 case TYPE_S8: code[1] = 0x08018000; break;
1473 case TYPE_U8: code[1] = 0x08008000; break;
1474 default:
1475 assert(0);
1476 break;
1477 }
1478 break;
1479 case TYPE_U16:
1480 switch (i->sType) {
1481 case TYPE_F32: code[1] = 0x80004000; break;
1482 case TYPE_S32: code[1] = 0x00014000; break;
1483 case TYPE_U32: code[1] = 0x00004000; break;
1484 case TYPE_F16: code[1] = 0x80000000; break;
1485 case TYPE_S16: code[1] = 0x00010000; break;
1486 case TYPE_U16: code[1] = 0x00000000; break;
1487 case TYPE_S8: code[1] = 0x00018000; break;
1488 case TYPE_U8: code[1] = 0x00008000; break;
1489 default:
1490 assert(0);
1491 break;
1492 }
1493 break;
1494 case TYPE_S8:
1495 switch (i->sType) {
1496 case TYPE_S32: code[1] = 0x08094000; break;
1497 case TYPE_U32: code[1] = 0x08084000; break;
1498 case TYPE_F16: code[1] = 0x88080000; break;
1499 case TYPE_S16: code[1] = 0x08090000; break;
1500 case TYPE_U16: code[1] = 0x08080000; break;
1501 case TYPE_S8: code[1] = 0x08098000; break;
1502 case TYPE_U8: code[1] = 0x08088000; break;
1503 default:
1504 assert(0);
1505 break;
1506 }
1507 break;
1508 case TYPE_U8:
1509 switch (i->sType) {
1510 case TYPE_S32: code[1] = 0x00094000; break;
1511 case TYPE_U32: code[1] = 0x00084000; break;
1512 case TYPE_F16: code[1] = 0x80080000; break;
1513 case TYPE_S16: code[1] = 0x00090000; break;
1514 case TYPE_U16: code[1] = 0x00080000; break;
1515 case TYPE_S8: code[1] = 0x00098000; break;
1516 case TYPE_U8: code[1] = 0x00088000; break;
1517 default:
1518 assert(0);
1519 break;
1520 }
1521 break;
1522 default:
1523 assert(0);
1524 break;
1525 }
1526 if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1527 code[1] |= 0x00004000;
1528
1529 roundMode_CVT(rnd);
1530
1531 switch (i->op) {
1532 case OP_ABS: code[1] |= 1 << 20; break;
1533 case OP_SAT: code[1] |= 1 << 19; break;
1534 case OP_NEG: code[1] |= 1 << 29; break;
1535 default:
1536 break;
1537 }
1538 code[1] ^= i->src(0).mod.neg() << 29;
1539 code[1] |= i->src(0).mod.abs() << 20;
1540 if (i->saturate)
1541 code[1] |= 1 << 19;
1542
1543 assert(i->op != OP_ABS || !i->src(0).mod.neg());
1544
1545 emitForm_MAD(i);
1546 }
1547
1548 void
emitPreOp(const Instruction * i)1549 CodeEmitterNV50::emitPreOp(const Instruction *i)
1550 {
1551 code[0] = 0xb0000000;
1552 code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1553
1554 code[1] |= i->src(0).mod.abs() << 20;
1555 code[1] |= i->src(0).mod.neg() << 26;
1556
1557 emitForm_MAD(i);
1558 }
1559
1560 void
emitSFnOp(const Instruction * i,uint8_t subOp)1561 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1562 {
1563 code[0] = 0x90000000;
1564
1565 if (i->encSize == 4) {
1566 assert(i->op == OP_RCP);
1567 assert(!i->saturate);
1568 code[0] |= i->src(0).mod.abs() << 15;
1569 code[0] |= i->src(0).mod.neg() << 22;
1570 emitForm_MUL(i);
1571 } else {
1572 code[1] = subOp << 29;
1573 code[1] |= i->src(0).mod.abs() << 20;
1574 code[1] |= i->src(0).mod.neg() << 26;
1575 if (i->saturate) {
1576 assert(subOp == 6 && i->op == OP_EX2);
1577 code[1] |= 1 << 27;
1578 }
1579 emitForm_MAD(i);
1580 }
1581 }
1582
1583 void
emitNOT(const Instruction * i)1584 CodeEmitterNV50::emitNOT(const Instruction *i)
1585 {
1586 code[0] = 0xd0000000;
1587 code[1] = 0x0002c000;
1588
1589 switch (i->sType) {
1590 case TYPE_U32:
1591 case TYPE_S32:
1592 code[1] |= 0x04000000;
1593 break;
1594 default:
1595 break;
1596 }
1597 emitForm_MAD(i);
1598 setSrc(i, 0, 1);
1599 }
1600
1601 void
emitLogicOp(const Instruction * i)1602 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1603 {
1604 code[0] = 0xd0000000;
1605 code[1] = 0;
1606
1607 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1608 switch (i->op) {
1609 case OP_OR: code[0] |= 0x0100; break;
1610 case OP_XOR: code[0] |= 0x8000; break;
1611 default:
1612 assert(i->op == OP_AND);
1613 break;
1614 }
1615 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1616 code[0] |= 1 << 22;
1617
1618 emitForm_IMM(i);
1619 } else {
1620 switch (i->op) {
1621 case OP_AND: code[1] = 0x00000000; break;
1622 case OP_OR: code[1] = 0x00004000; break;
1623 case OP_XOR: code[1] = 0x00008000; break;
1624 default:
1625 assert(0);
1626 break;
1627 }
1628 if (typeSizeof(i->dType) == 4)
1629 code[1] |= 0x04000000;
1630 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1631 code[1] |= 1 << 16;
1632 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1633 code[1] |= 1 << 17;
1634
1635 emitForm_MAD(i);
1636 }
1637 }
1638
1639 void
emitARL(const Instruction * i,unsigned int shl)1640 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1641 {
1642 code[0] = 0x00000001 | (shl << 16);
1643 code[1] = 0xc0000000;
1644
1645 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1646
1647 setSrcFileBits(i, NV50_OP_ENC_IMM);
1648 setSrc(i, 0, 0);
1649 emitFlagsRd(i);
1650 }
1651
1652 void
emitShift(const Instruction * i)1653 CodeEmitterNV50::emitShift(const Instruction *i)
1654 {
1655 if (i->def(0).getFile() == FILE_ADDRESS) {
1656 assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1657 emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1658 } else {
1659 code[0] = 0x30000001;
1660 code[1] = (i->op == OP_SHR) ? 0xe0000000 : 0xc0000000;
1661 if (typeSizeof(i->dType) == 4)
1662 code[1] |= 0x04000000;
1663 if (i->op == OP_SHR && isSignedType(i->sType))
1664 code[1] |= 1 << 27;
1665
1666 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1667 code[1] |= 1 << 20;
1668 code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1669 defId(i->def(0), 2);
1670 srcId(i->src(0), 9);
1671 emitFlagsRd(i);
1672 } else {
1673 emitForm_MAD(i);
1674 }
1675 }
1676 }
1677
1678 void
emitOUT(const Instruction * i)1679 CodeEmitterNV50::emitOUT(const Instruction *i)
1680 {
1681 code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1682 code[1] = 0xc0000000;
1683
1684 emitFlagsRd(i);
1685 }
1686
1687 void
emitTEX(const TexInstruction * i)1688 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1689 {
1690 code[0] = 0xf0000001;
1691 code[1] = 0x00000000;
1692
1693 switch (i->op) {
1694 case OP_TXB:
1695 code[1] = 0x20000000;
1696 break;
1697 case OP_TXL:
1698 code[1] = 0x40000000;
1699 break;
1700 case OP_TXF:
1701 code[0] |= 0x01000000;
1702 break;
1703 case OP_TXG:
1704 code[0] |= 0x01000000;
1705 code[1] = 0x80000000;
1706 break;
1707 case OP_TXLQ:
1708 code[1] = 0x60020000;
1709 break;
1710 default:
1711 assert(i->op == OP_TEX);
1712 break;
1713 }
1714
1715 code[0] |= i->tex.r << 9;
1716 code[0] |= i->tex.s << 17;
1717
1718 int argc = i->tex.target.getArgCount();
1719
1720 if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1721 argc += 1;
1722 if (i->tex.target.isShadow())
1723 argc += 1;
1724 assert(argc <= 4);
1725
1726 code[0] |= (argc - 1) << 22;
1727
1728 if (i->tex.target.isCube()) {
1729 code[0] |= 0x08000000;
1730 } else
1731 if (i->tex.useOffsets) {
1732 code[1] |= (i->tex.offset[0] & 0xf) << 24;
1733 code[1] |= (i->tex.offset[1] & 0xf) << 20;
1734 code[1] |= (i->tex.offset[2] & 0xf) << 16;
1735 }
1736
1737 code[0] |= (i->tex.mask & 0x3) << 25;
1738 code[1] |= (i->tex.mask & 0xc) << 12;
1739
1740 if (i->tex.liveOnly)
1741 code[1] |= 1 << 2;
1742 if (i->tex.derivAll)
1743 code[1] |= 1 << 3;
1744
1745 defId(i->def(0), 2);
1746
1747 emitFlagsRd(i);
1748 }
1749
1750 void
emitTXQ(const TexInstruction * i)1751 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1752 {
1753 assert(i->tex.query == TXQ_DIMS);
1754
1755 code[0] = 0xf0000001;
1756 code[1] = 0x60000000;
1757
1758 code[0] |= i->tex.r << 9;
1759 code[0] |= i->tex.s << 17;
1760
1761 code[0] |= (i->tex.mask & 0x3) << 25;
1762 code[1] |= (i->tex.mask & 0xc) << 12;
1763
1764 defId(i->def(0), 2);
1765
1766 emitFlagsRd(i);
1767 }
1768
1769 void
emitTEXPREP(const TexInstruction * i)1770 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1771 {
1772 code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1773 code[1] = 0x60010000;
1774
1775 code[0] |= (i->tex.mask & 0x3) << 25;
1776 code[1] |= (i->tex.mask & 0xc) << 12;
1777 defId(i->def(0), 2);
1778
1779 emitFlagsRd(i);
1780 }
1781
1782 void
emitPRERETEmu(const FlowInstruction * i)1783 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1784 {
1785 uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1786
1787 code[0] = 0x10000003; // bra
1788 code[1] = 0x00000780; // always
1789
1790 switch (i->subOp) {
1791 case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1792 break;
1793 case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1794 pos += 8;
1795 break;
1796 default:
1797 assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1798 code[0] = 0x20000003; // call
1799 code[1] = 0x00000000; // no predicate
1800 break;
1801 }
1802 addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1803 addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1804 }
1805
1806 void
emitFlow(const Instruction * i,uint8_t flowOp)1807 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1808 {
1809 const FlowInstruction *f = i->asFlow();
1810 bool hasPred = false;
1811 bool hasTarg = false;
1812
1813 code[0] = 0x00000003 | (flowOp << 28);
1814 code[1] = 0x00000000;
1815
1816 switch (i->op) {
1817 case OP_BRA:
1818 hasPred = true;
1819 hasTarg = true;
1820 break;
1821 case OP_BREAK:
1822 case OP_BRKPT:
1823 case OP_DISCARD:
1824 case OP_RET:
1825 hasPred = true;
1826 break;
1827 case OP_CALL:
1828 case OP_PREBREAK:
1829 case OP_JOINAT:
1830 hasTarg = true;
1831 break;
1832 case OP_PRERET:
1833 hasTarg = true;
1834 if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1835 emitPRERETEmu(f);
1836 return;
1837 }
1838 break;
1839 default:
1840 break;
1841 }
1842
1843 if (hasPred)
1844 emitFlagsRd(i);
1845
1846 if (hasTarg && f) {
1847 uint32_t pos;
1848
1849 if (f->op == OP_CALL) {
1850 if (f->builtin) {
1851 pos = targNV50->getBuiltinOffset(f->target.builtin);
1852 } else {
1853 pos = f->target.fn->binPos;
1854 }
1855 } else {
1856 pos = f->target.bb->binPos;
1857 }
1858
1859 code[0] |= ((pos >> 2) & 0xffff) << 11;
1860 code[1] |= ((pos >> 18) & 0x003f) << 14;
1861
1862 RelocEntry::Type relocTy;
1863
1864 relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1865
1866 addReloc(relocTy, 0, pos, 0x07fff800, 9);
1867 addReloc(relocTy, 1, pos, 0x000fc000, -4);
1868 }
1869 }
1870
1871 void
emitBAR(const Instruction * i)1872 CodeEmitterNV50::emitBAR(const Instruction *i)
1873 {
1874 ImmediateValue *barId = i->getSrc(0)->asImm();
1875 assert(barId);
1876
1877 code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1878 code[1] = 0x00004000;
1879
1880 if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1881 code[0] |= 1 << 26;
1882 }
1883
1884 void
emitATOM(const Instruction * i)1885 CodeEmitterNV50::emitATOM(const Instruction *i)
1886 {
1887 uint8_t subOp;
1888 switch (i->subOp) {
1889 case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break;
1890 case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break;
1891 case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break;
1892 case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break;
1893 case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break;
1894 case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break;
1895 case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break;
1896 case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break;
1897 case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break;
1898 case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1899 default:
1900 assert(!"invalid subop");
1901 return;
1902 }
1903 code[0] = 0xd0000001;
1904 code[1] = 0xc0c00000 | (subOp << 2);
1905 if (isSignedType(i->dType))
1906 code[1] |= 1 << 21;
1907
1908 // args
1909 emitFlagsRd(i);
1910 if (i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
1911 i->subOp == NV50_IR_SUBOP_ATOM_CAS ||
1912 i->defExists(0)) {
1913 code[1] |= 0x20000000;
1914 setDst(i, 0);
1915 setSrc(i, 1, 1);
1916 // g[] pointer
1917 code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1918 } else {
1919 srcId(i->src(1), 2);
1920 // g[] pointer
1921 code[0] |= i->getSrc(0)->reg.fileIndex << 16;
1922 }
1923 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1924 setSrc(i, 2, 2);
1925
1926 srcId(i->getIndirect(0, 0), 9);
1927 }
1928
1929 bool
emitInstruction(Instruction * insn)1930 CodeEmitterNV50::emitInstruction(Instruction *insn)
1931 {
1932 if (!insn->encSize) {
1933 ERROR("skipping unencodable instruction: "); insn->print();
1934 return false;
1935 } else
1936 if (codeSize + insn->encSize > codeSizeLimit) {
1937 ERROR("code emitter output buffer too small\n");
1938 return false;
1939 }
1940
1941 if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1942 INFO("EMIT: "); insn->print();
1943 }
1944
1945 switch (insn->op) {
1946 case OP_MOV:
1947 emitMOV(insn);
1948 break;
1949 case OP_EXIT:
1950 case OP_NOP:
1951 case OP_JOIN:
1952 emitNOP();
1953 break;
1954 case OP_VFETCH:
1955 case OP_LOAD:
1956 emitLOAD(insn);
1957 break;
1958 case OP_EXPORT:
1959 case OP_STORE:
1960 emitSTORE(insn);
1961 break;
1962 case OP_PFETCH:
1963 emitPFETCH(insn);
1964 break;
1965 case OP_RDSV:
1966 emitRDSV(insn);
1967 break;
1968 case OP_LINTERP:
1969 case OP_PINTERP:
1970 emitINTERP(insn);
1971 break;
1972 case OP_ADD:
1973 case OP_SUB:
1974 if (insn->dType == TYPE_F64)
1975 emitDADD(insn);
1976 else if (isFloatType(insn->dType))
1977 emitFADD(insn);
1978 else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1979 emitAADD(insn);
1980 else
1981 emitUADD(insn);
1982 break;
1983 case OP_MUL:
1984 if (insn->dType == TYPE_F64)
1985 emitDMUL(insn);
1986 else if (isFloatType(insn->dType))
1987 emitFMUL(insn);
1988 else
1989 emitIMUL(insn);
1990 break;
1991 case OP_MAD:
1992 case OP_FMA:
1993 if (insn->dType == TYPE_F64)
1994 emitDMAD(insn);
1995 else if (isFloatType(insn->dType))
1996 emitFMAD(insn);
1997 else
1998 emitIMAD(insn);
1999 break;
2000 case OP_SAD:
2001 emitISAD(insn);
2002 break;
2003 case OP_NOT:
2004 emitNOT(insn);
2005 break;
2006 case OP_AND:
2007 case OP_OR:
2008 case OP_XOR:
2009 emitLogicOp(insn);
2010 break;
2011 case OP_SHL:
2012 case OP_SHR:
2013 emitShift(insn);
2014 break;
2015 case OP_SET:
2016 emitSET(insn);
2017 break;
2018 case OP_MIN:
2019 case OP_MAX:
2020 emitMINMAX(insn);
2021 break;
2022 case OP_CEIL:
2023 case OP_FLOOR:
2024 case OP_TRUNC:
2025 case OP_ABS:
2026 case OP_NEG:
2027 case OP_SAT:
2028 emitCVT(insn);
2029 break;
2030 case OP_CVT:
2031 if (insn->def(0).getFile() == FILE_ADDRESS)
2032 emitARL(insn, 0);
2033 else
2034 if (insn->def(0).getFile() == FILE_FLAGS ||
2035 insn->src(0).getFile() == FILE_FLAGS ||
2036 insn->src(0).getFile() == FILE_ADDRESS)
2037 emitMOV(insn);
2038 else
2039 emitCVT(insn);
2040 break;
2041 case OP_RCP:
2042 emitSFnOp(insn, 0);
2043 break;
2044 case OP_RSQ:
2045 emitSFnOp(insn, 2);
2046 break;
2047 case OP_LG2:
2048 emitSFnOp(insn, 3);
2049 break;
2050 case OP_SIN:
2051 emitSFnOp(insn, 4);
2052 break;
2053 case OP_COS:
2054 emitSFnOp(insn, 5);
2055 break;
2056 case OP_EX2:
2057 emitSFnOp(insn, 6);
2058 break;
2059 case OP_PRESIN:
2060 case OP_PREEX2:
2061 emitPreOp(insn);
2062 break;
2063 case OP_TEX:
2064 case OP_TXB:
2065 case OP_TXL:
2066 case OP_TXF:
2067 case OP_TXG:
2068 case OP_TXLQ:
2069 emitTEX(insn->asTex());
2070 break;
2071 case OP_TXQ:
2072 emitTXQ(insn->asTex());
2073 break;
2074 case OP_TEXPREP:
2075 emitTEXPREP(insn->asTex());
2076 break;
2077 case OP_EMIT:
2078 case OP_RESTART:
2079 emitOUT(insn);
2080 break;
2081 case OP_DISCARD:
2082 emitFlow(insn, 0x0);
2083 break;
2084 case OP_BRA:
2085 emitFlow(insn, 0x1);
2086 break;
2087 case OP_CALL:
2088 emitFlow(insn, 0x2);
2089 break;
2090 case OP_RET:
2091 emitFlow(insn, 0x3);
2092 break;
2093 case OP_PREBREAK:
2094 emitFlow(insn, 0x4);
2095 break;
2096 case OP_BREAK:
2097 emitFlow(insn, 0x5);
2098 break;
2099 case OP_QUADON:
2100 emitFlow(insn, 0x6);
2101 break;
2102 case OP_QUADPOP:
2103 emitFlow(insn, 0x7);
2104 break;
2105 case OP_JOINAT:
2106 emitFlow(insn, 0xa);
2107 break;
2108 case OP_PRERET:
2109 emitFlow(insn, 0xd);
2110 break;
2111 case OP_QUADOP:
2112 emitQUADOP(insn, insn->lanes, insn->subOp);
2113 break;
2114 case OP_DFDX:
2115 emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
2116 break;
2117 case OP_DFDY:
2118 emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
2119 break;
2120 case OP_ATOM:
2121 emitATOM(insn);
2122 break;
2123 case OP_BAR:
2124 emitBAR(insn);
2125 break;
2126 case OP_PHI:
2127 case OP_UNION:
2128 case OP_CONSTRAINT:
2129 ERROR("operation should have been eliminated\n");
2130 return false;
2131 case OP_EXP:
2132 case OP_LOG:
2133 case OP_SQRT:
2134 case OP_POW:
2135 case OP_SELP:
2136 case OP_SLCT:
2137 case OP_TXD:
2138 case OP_PRECONT:
2139 case OP_CONT:
2140 case OP_POPCNT:
2141 case OP_INSBF:
2142 case OP_EXTBF:
2143 ERROR("operation should have been lowered\n");
2144 return false;
2145 default:
2146 ERROR("unknown op: %u\n", insn->op);
2147 return false;
2148 }
2149 if (insn->join || insn->op == OP_JOIN)
2150 code[1] |= 0x2;
2151 else
2152 if (insn->exit || insn->op == OP_EXIT)
2153 code[1] |= 0x1;
2154
2155 assert((insn->encSize == 8) == (code[0] & 1));
2156
2157 code += insn->encSize / 4;
2158 codeSize += insn->encSize;
2159 return true;
2160 }
2161
2162 uint32_t
getMinEncodingSize(const Instruction * i) const2163 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
2164 {
2165 const Target::OpInfo &info = targ->getOpInfo(i);
2166
2167 if (info.minEncSize > 4 || i->dType == TYPE_F64)
2168 return 8;
2169
2170 // check constraints on dst and src operands
2171 for (int d = 0; i->defExists(d); ++d) {
2172 if (i->def(d).rep()->reg.data.id > 63 ||
2173 i->def(d).rep()->reg.file != FILE_GPR)
2174 return 8;
2175 }
2176
2177 for (int s = 0; i->srcExists(s); ++s) {
2178 DataFile sf = i->src(s).getFile();
2179 if (sf != FILE_GPR)
2180 if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
2181 return 8;
2182 if (i->src(s).rep()->reg.data.id > 63)
2183 return 8;
2184 }
2185
2186 // check modifiers & rounding
2187 if (i->join || i->lanes != 0xf || i->exit)
2188 return 8;
2189 if (i->op == OP_MUL && i->rnd != ROUND_N)
2190 return 8;
2191
2192 if (i->asTex())
2193 return 8; // TODO: short tex encoding
2194
2195 // check constraints on short MAD
2196 if (info.srcNr >= 2 && i->srcExists(2)) {
2197 if (!i->defExists(0) ||
2198 (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
2199 DDATA(i->def(0)).id != SDATA(i->src(2)).id)
2200 return 8;
2201 }
2202
2203 return info.minEncSize;
2204 }
2205
2206 // Change the encoding size of an instruction after BBs have been scheduled.
2207 static void
makeInstructionLong(Instruction * insn)2208 makeInstructionLong(Instruction *insn)
2209 {
2210 if (insn->encSize == 8)
2211 return;
2212 Function *fn = insn->bb->getFunction();
2213 int n = 0;
2214 int adj = 4;
2215
2216 for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
2217
2218 if (n & 1) {
2219 adj = 8;
2220 insn->next->encSize = 8;
2221 } else
2222 if (insn->prev && insn->prev->encSize == 4) {
2223 adj = 8;
2224 insn->prev->encSize = 8;
2225 }
2226 insn->encSize = 8;
2227
2228 for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
2229 fn->bbArray[i]->binPos += adj;
2230 }
2231 fn->binSize += adj;
2232 insn->bb->binSize += adj;
2233 }
2234
2235 static bool
trySetExitModifier(Instruction * insn)2236 trySetExitModifier(Instruction *insn)
2237 {
2238 if (insn->op == OP_DISCARD ||
2239 insn->op == OP_QUADON ||
2240 insn->op == OP_QUADPOP)
2241 return false;
2242 for (int s = 0; insn->srcExists(s); ++s)
2243 if (insn->src(s).getFile() == FILE_IMMEDIATE)
2244 return false;
2245 if (insn->asFlow()) {
2246 if (insn->op == OP_CALL) // side effects !
2247 return false;
2248 if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
2249 return false;
2250 insn->op = OP_EXIT;
2251 }
2252 insn->exit = 1;
2253 makeInstructionLong(insn);
2254 return true;
2255 }
2256
2257 static void
replaceExitWithModifier(Function * func)2258 replaceExitWithModifier(Function *func)
2259 {
2260 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2261
2262 if (!epilogue->getExit() ||
2263 epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2264 return;
2265
2266 if (epilogue->getEntry()->op != OP_EXIT) {
2267 Instruction *insn = epilogue->getExit()->prev;
2268 if (!insn || !trySetExitModifier(insn))
2269 return;
2270 insn->exit = 1;
2271 } else {
2272 for (Graph::EdgeIterator ei = func->cfgExit->incident();
2273 !ei.end(); ei.next()) {
2274 BasicBlock *bb = BasicBlock::get(ei.getNode());
2275 Instruction *i = bb->getExit();
2276
2277 if (!i || !trySetExitModifier(i))
2278 return;
2279 }
2280 }
2281
2282 int adj = epilogue->getExit()->encSize;
2283 epilogue->binSize -= adj;
2284 func->binSize -= adj;
2285 delete_Instruction(func->getProgram(), epilogue->getExit());
2286
2287 // There may be BB's that are laid out after the exit block
2288 for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
2289 func->bbArray[i]->binPos -= adj;
2290 }
2291 }
2292
2293 void
prepareEmission(Function * func)2294 CodeEmitterNV50::prepareEmission(Function *func)
2295 {
2296 CodeEmitter::prepareEmission(func);
2297
2298 replaceExitWithModifier(func);
2299 }
2300
CodeEmitterNV50(Program::Type type,const TargetNV50 * target)2301 CodeEmitterNV50::CodeEmitterNV50(Program::Type type, const TargetNV50 *target) :
2302 CodeEmitter(target), progType(type), targNV50(target)
2303 {
2304 targ = target; // specialized
2305 code = NULL;
2306 codeSize = codeSizeLimit = 0;
2307 relocInfo = NULL;
2308 }
2309
2310 CodeEmitter *
getCodeEmitter(Program::Type type)2311 TargetNV50::getCodeEmitter(Program::Type type)
2312 {
2313 CodeEmitterNV50 *emit = new CodeEmitterNV50(type, this);
2314 return emit;
2315 }
2316
2317 } // namespace nv50_ir
2318