1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25 #include "codegen/nv50_ir_driver.h"
26
27 extern "C" {
28 #include "nouveau_debug.h"
29 }
30
31 namespace nv50_ir {
32
Modifier(operation op)33 Modifier::Modifier(operation op)
34 {
35 switch (op) {
36 case OP_NEG: bits = NV50_IR_MOD_NEG; break;
37 case OP_ABS: bits = NV50_IR_MOD_ABS; break;
38 case OP_SAT: bits = NV50_IR_MOD_SAT; break;
39 case OP_NOT: bits = NV50_IR_MOD_NOT; break;
40 default:
41 bits = 0;
42 break;
43 }
44 }
45
operator *(const Modifier m) const46 Modifier Modifier::operator*(const Modifier m) const
47 {
48 unsigned int a, b, c;
49
50 b = m.bits;
51 if (this->bits & NV50_IR_MOD_ABS)
52 b &= ~NV50_IR_MOD_NEG;
53
54 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
55 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
56
57 return Modifier(a | c);
58 }
59
ValueRef(Value * v)60 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
61 {
62 indirect[0] = -1;
63 indirect[1] = -1;
64 usedAsPtr = false;
65 set(v);
66 }
67
ValueRef(const ValueRef & ref)68 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
69 {
70 set(ref);
71 usedAsPtr = ref.usedAsPtr;
72 }
73
~ValueRef()74 ValueRef::~ValueRef()
75 {
76 this->set(NULL);
77 }
78
getImmediate(ImmediateValue & imm) const79 bool ValueRef::getImmediate(ImmediateValue &imm) const
80 {
81 const ValueRef *src = this;
82 Modifier m;
83 DataType type = src->insn->sType;
84
85 while (src) {
86 if (src->mod) {
87 if (src->insn->sType != type)
88 break;
89 m *= src->mod;
90 }
91 if (src->getFile() == FILE_IMMEDIATE) {
92 imm = *(src->value->asImm());
93 // The immediate's type isn't required to match its use, it's
94 // more of a hint; applying a modifier makes use of that hint.
95 imm.reg.type = type;
96 m.applyTo(imm);
97 return true;
98 }
99
100 Instruction *insn = src->value->getUniqueInsn();
101
102 if (insn && insn->op == OP_MOV) {
103 src = &insn->src(0);
104 if (src->mod)
105 WARN("OP_MOV with modifier encountered !\n");
106 } else {
107 src = NULL;
108 }
109 }
110 return false;
111 }
112
ValueDef(Value * v)113 ValueDef::ValueDef(Value *v) : value(NULL), origin(NULL), insn(NULL)
114 {
115 set(v);
116 }
117
ValueDef(const ValueDef & def)118 ValueDef::ValueDef(const ValueDef& def) : value(NULL), origin(NULL), insn(NULL)
119 {
120 set(def.get());
121 }
122
~ValueDef()123 ValueDef::~ValueDef()
124 {
125 this->set(NULL);
126 }
127
128 void
set(const ValueRef & ref)129 ValueRef::set(const ValueRef &ref)
130 {
131 this->set(ref.get());
132 mod = ref.mod;
133 indirect[0] = ref.indirect[0];
134 indirect[1] = ref.indirect[1];
135 }
136
137 void
set(Value * refVal)138 ValueRef::set(Value *refVal)
139 {
140 if (value == refVal)
141 return;
142 if (value)
143 value->uses.erase(this);
144 if (refVal)
145 refVal->uses.insert(this);
146
147 value = refVal;
148 }
149
150 void
set(Value * defVal)151 ValueDef::set(Value *defVal)
152 {
153 if (value == defVal)
154 return;
155 if (value)
156 value->defs.remove(this);
157 if (defVal)
158 defVal->defs.push_back(this);
159
160 value = defVal;
161 }
162
163 // Check if we can replace this definition's value by the value in @rep,
164 // including the source modifiers, i.e. make sure that all uses support
165 // @rep.mod.
166 bool
mayReplace(const ValueRef & rep)167 ValueDef::mayReplace(const ValueRef &rep)
168 {
169 if (!rep.mod)
170 return true;
171
172 if (!insn || !insn->bb) // Unbound instruction ?
173 return false;
174
175 const Target *target = insn->bb->getProgram()->getTarget();
176
177 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
178 ++it) {
179 Instruction *insn = (*it)->getInsn();
180 int s = -1;
181
182 for (int i = 0; insn->srcExists(i); ++i) {
183 if (insn->src(i).get() == value) {
184 // If there are multiple references to us we'd have to check if the
185 // combination of mods is still supported, but just bail for now.
186 if (&insn->src(i) != (*it))
187 return false;
188 s = i;
189 }
190 }
191 assert(s >= 0); // integrity of uses list
192
193 if (!target->isModSupported(insn, s, rep.mod))
194 return false;
195 }
196 return true;
197 }
198
199 void
replace(const ValueRef & repVal,bool doSet)200 ValueDef::replace(const ValueRef &repVal, bool doSet)
201 {
202 assert(mayReplace(repVal));
203
204 if (value == repVal.get())
205 return;
206
207 while (!value->uses.empty()) {
208 ValueRef *ref = *value->uses.begin();
209 ref->set(repVal.get());
210 ref->mod *= repVal.mod;
211 }
212
213 if (doSet)
214 set(repVal.get());
215 }
216
Value()217 Value::Value() : id(-1)
218 {
219 join = this;
220 memset(®, 0, sizeof(reg));
221 reg.size = 4;
222 }
223
LValue(Function * fn,DataFile file)224 LValue::LValue(Function *fn, DataFile file)
225 {
226 reg.file = file;
227 reg.size = (file != FILE_PREDICATE) ? 4 : 1;
228 reg.data.id = -1;
229
230 compMask = 0;
231 compound = 0;
232 ssa = 0;
233 fixedReg = 0;
234 noSpill = 0;
235
236 fn->add(this, this->id);
237 }
238
LValue(Function * fn,LValue * lval)239 LValue::LValue(Function *fn, LValue *lval)
240 {
241 assert(lval);
242
243 reg.file = lval->reg.file;
244 reg.size = lval->reg.size;
245 reg.data.id = -1;
246
247 compMask = 0;
248 compound = 0;
249 ssa = 0;
250 fixedReg = 0;
251 noSpill = 0;
252
253 fn->add(this, this->id);
254 }
255
256 LValue *
clone(ClonePolicy<Function> & pol) const257 LValue::clone(ClonePolicy<Function>& pol) const
258 {
259 LValue *that = new_LValue(pol.context(), reg.file);
260
261 pol.set<Value>(this, that);
262
263 that->reg.size = this->reg.size;
264 that->reg.type = this->reg.type;
265 that->reg.data = this->reg.data;
266
267 return that;
268 }
269
270 bool
isUniform() const271 LValue::isUniform() const
272 {
273 if (defs.size() > 1)
274 return false;
275 Instruction *insn = getInsn();
276 if (!insn)
277 return false;
278 // let's not try too hard here for now ...
279 return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
280 }
281
Symbol(Program * prog,DataFile f,ubyte fidx)282 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
283 {
284 baseSym = NULL;
285
286 reg.file = f;
287 reg.fileIndex = fidx;
288 reg.data.offset = 0;
289
290 prog->add(this, this->id);
291 }
292
293 Symbol *
clone(ClonePolicy<Function> & pol) const294 Symbol::clone(ClonePolicy<Function>& pol) const
295 {
296 Program *prog = pol.context()->getProgram();
297
298 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
299
300 pol.set<Value>(this, that);
301
302 that->reg.size = this->reg.size;
303 that->reg.type = this->reg.type;
304 that->reg.data = this->reg.data;
305
306 that->baseSym = this->baseSym;
307
308 return that;
309 }
310
311 bool
isUniform() const312 Symbol::isUniform() const
313 {
314 return
315 reg.file != FILE_SYSTEM_VALUE &&
316 reg.file != FILE_MEMORY_LOCAL &&
317 reg.file != FILE_SHADER_INPUT;
318 }
319
ImmediateValue(Program * prog,uint32_t uval)320 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
321 {
322 memset(®, 0, sizeof(reg));
323
324 reg.file = FILE_IMMEDIATE;
325 reg.size = 4;
326 reg.type = TYPE_U32;
327
328 reg.data.u32 = uval;
329
330 prog->add(this, this->id);
331 }
332
ImmediateValue(Program * prog,float fval)333 ImmediateValue::ImmediateValue(Program *prog, float fval)
334 {
335 memset(®, 0, sizeof(reg));
336
337 reg.file = FILE_IMMEDIATE;
338 reg.size = 4;
339 reg.type = TYPE_F32;
340
341 reg.data.f32 = fval;
342
343 prog->add(this, this->id);
344 }
345
ImmediateValue(Program * prog,double dval)346 ImmediateValue::ImmediateValue(Program *prog, double dval)
347 {
348 memset(®, 0, sizeof(reg));
349
350 reg.file = FILE_IMMEDIATE;
351 reg.size = 8;
352 reg.type = TYPE_F64;
353
354 reg.data.f64 = dval;
355
356 prog->add(this, this->id);
357 }
358
ImmediateValue(const ImmediateValue * proto,DataType ty)359 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
360 {
361 reg = proto->reg;
362
363 reg.type = ty;
364 reg.size = typeSizeof(ty);
365 }
366
367 ImmediateValue *
clone(ClonePolicy<Function> & pol) const368 ImmediateValue::clone(ClonePolicy<Function>& pol) const
369 {
370 Program *prog = pol.context()->getProgram();
371 ImmediateValue *that = new_ImmediateValue(prog, 0u);
372
373 pol.set<Value>(this, that);
374
375 that->reg.size = this->reg.size;
376 that->reg.type = this->reg.type;
377 that->reg.data = this->reg.data;
378
379 return that;
380 }
381
382 bool
isInteger(const int i) const383 ImmediateValue::isInteger(const int i) const
384 {
385 switch (reg.type) {
386 case TYPE_S8:
387 return reg.data.s8 == i;
388 case TYPE_U8:
389 return reg.data.u8 == i;
390 case TYPE_S16:
391 return reg.data.s16 == i;
392 case TYPE_U16:
393 return reg.data.u16 == i;
394 case TYPE_S32:
395 case TYPE_U32:
396 return reg.data.s32 == i; // as if ...
397 case TYPE_S64:
398 case TYPE_U64:
399 return reg.data.s64 == i; // as if ...
400 case TYPE_F32:
401 return reg.data.f32 == static_cast<float>(i);
402 case TYPE_F64:
403 return reg.data.f64 == static_cast<double>(i);
404 default:
405 return false;
406 }
407 }
408
409 bool
isNegative() const410 ImmediateValue::isNegative() const
411 {
412 switch (reg.type) {
413 case TYPE_S8: return reg.data.s8 < 0;
414 case TYPE_S16: return reg.data.s16 < 0;
415 case TYPE_S32:
416 case TYPE_U32: return reg.data.s32 < 0;
417 case TYPE_F32: return reg.data.u32 & (1 << 31);
418 case TYPE_F64: return reg.data.u64 & (1ULL << 63);
419 default:
420 return false;
421 }
422 }
423
424 bool
isPow2() const425 ImmediateValue::isPow2() const
426 {
427 if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
428 return util_is_power_of_two_or_zero64(reg.data.u64);
429 else
430 return util_is_power_of_two_or_zero(reg.data.u32);
431 }
432
433 void
applyLog2()434 ImmediateValue::applyLog2()
435 {
436 switch (reg.type) {
437 case TYPE_S8:
438 case TYPE_S16:
439 case TYPE_S32:
440 assert(!this->isNegative());
441 FALLTHROUGH;
442 case TYPE_U8:
443 case TYPE_U16:
444 case TYPE_U32:
445 reg.data.u32 = util_logbase2(reg.data.u32);
446 break;
447 case TYPE_S64:
448 assert(!this->isNegative());
449 FALLTHROUGH;
450 case TYPE_U64:
451 reg.data.u64 = util_logbase2_64(reg.data.u64);
452 break;
453 case TYPE_F32:
454 reg.data.f32 = log2f(reg.data.f32);
455 break;
456 case TYPE_F64:
457 reg.data.f64 = log2(reg.data.f64);
458 break;
459 default:
460 assert(0);
461 break;
462 }
463 }
464
465 bool
compare(CondCode cc,float fval) const466 ImmediateValue::compare(CondCode cc, float fval) const
467 {
468 if (reg.type != TYPE_F32)
469 ERROR("immediate value is not of type f32");
470
471 switch (static_cast<CondCode>(cc & 7)) {
472 case CC_TR: return true;
473 case CC_FL: return false;
474 case CC_LT: return reg.data.f32 < fval;
475 case CC_LE: return reg.data.f32 <= fval;
476 case CC_GT: return reg.data.f32 > fval;
477 case CC_GE: return reg.data.f32 >= fval;
478 case CC_EQ: return reg.data.f32 == fval;
479 case CC_NE: return reg.data.f32 != fval;
480 default:
481 assert(0);
482 return false;
483 }
484 }
485
486 ImmediateValue&
operator =(const ImmediateValue & that)487 ImmediateValue::operator=(const ImmediateValue &that)
488 {
489 this->reg = that.reg;
490 return (*this);
491 }
492
493 bool
interfers(const Value * that) const494 Value::interfers(const Value *that) const
495 {
496 uint32_t idA, idB;
497
498 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
499 return false;
500 if (this->asImm())
501 return false;
502
503 if (this->asSym()) {
504 idA = this->join->reg.data.offset;
505 idB = that->join->reg.data.offset;
506 } else {
507 idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
508 idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
509 }
510
511 if (idA < idB)
512 return (idA + this->reg.size > idB);
513 else
514 if (idA > idB)
515 return (idB + that->reg.size > idA);
516 else
517 return (idA == idB);
518 }
519
520 bool
equals(const Value * that,bool strict) const521 Value::equals(const Value *that, bool strict) const
522 {
523 if (strict)
524 return this == that;
525
526 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
527 return false;
528 if (that->reg.size != this->reg.size)
529 return false;
530
531 if (that->reg.data.id != this->reg.data.id)
532 return false;
533
534 return true;
535 }
536
537 bool
equals(const Value * that,bool strict) const538 ImmediateValue::equals(const Value *that, bool strict) const
539 {
540 const ImmediateValue *imm = that->asImm();
541 if (!imm)
542 return false;
543 return reg.data.u64 == imm->reg.data.u64;
544 }
545
546 bool
equals(const Value * that,bool strict) const547 Symbol::equals(const Value *that, bool strict) const
548 {
549 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
550 return false;
551 assert(that->asSym());
552
553 if (this->baseSym != that->asSym()->baseSym)
554 return false;
555
556 if (reg.file == FILE_SYSTEM_VALUE)
557 return (this->reg.data.sv.sv == that->reg.data.sv.sv &&
558 this->reg.data.sv.index == that->reg.data.sv.index);
559 return this->reg.data.offset == that->reg.data.offset;
560 }
561
init()562 void Instruction::init()
563 {
564 next = prev = 0;
565 serial = 0;
566
567 cc = CC_ALWAYS;
568 rnd = ROUND_N;
569 cache = CACHE_CA;
570 subOp = 0;
571
572 saturate = 0;
573 join = 0;
574 exit = 0;
575 terminator = 0;
576 ftz = 0;
577 dnz = 0;
578 perPatch = 0;
579 fixed = 0;
580 encSize = 0;
581 ipa = 0;
582 mask = 0;
583 precise = 0;
584
585 lanes = 0xf;
586
587 postFactor = 0;
588
589 predSrc = -1;
590 flagsDef = -1;
591 flagsSrc = -1;
592
593 sched = 0;
594 bb = NULL;
595 }
596
Instruction()597 Instruction::Instruction()
598 {
599 init();
600
601 op = OP_NOP;
602 dType = sType = TYPE_F32;
603
604 id = -1;
605 }
606
Instruction(Function * fn,operation opr,DataType ty)607 Instruction::Instruction(Function *fn, operation opr, DataType ty)
608 {
609 init();
610
611 op = opr;
612 dType = sType = ty;
613
614 fn->add(this, id);
615 }
616
~Instruction()617 Instruction::~Instruction()
618 {
619 if (bb) {
620 Function *fn = bb->getFunction();
621 bb->remove(this);
622 fn->allInsns.remove(id);
623 }
624
625 for (int s = 0; srcExists(s); ++s)
626 setSrc(s, NULL);
627 // must unlink defs too since the list pointers will get deallocated
628 for (int d = 0; defExists(d); ++d)
629 setDef(d, NULL);
630 }
631
632 void
setDef(int i,Value * val)633 Instruction::setDef(int i, Value *val)
634 {
635 int size = defs.size();
636 if (i >= size) {
637 defs.resize(i + 1);
638 while (size <= i)
639 defs[size++].setInsn(this);
640 }
641 defs[i].set(val);
642 }
643
644 void
setSrc(int s,Value * val)645 Instruction::setSrc(int s, Value *val)
646 {
647 int size = srcs.size();
648 if (s >= size) {
649 srcs.resize(s + 1);
650 while (size <= s)
651 srcs[size++].setInsn(this);
652 }
653 srcs[s].set(val);
654 }
655
656 void
setSrc(int s,const ValueRef & ref)657 Instruction::setSrc(int s, const ValueRef& ref)
658 {
659 setSrc(s, ref.get());
660 srcs[s].mod = ref.mod;
661 }
662
663 void
swapSources(int a,int b)664 Instruction::swapSources(int a, int b)
665 {
666 Value *value = srcs[a].get();
667 Modifier m = srcs[a].mod;
668
669 setSrc(a, srcs[b]);
670
671 srcs[b].set(value);
672 srcs[b].mod = m;
673 }
674
moveSourcesAdjustIndex(int8_t & index,int s,int delta)675 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
676 {
677 if (index >= s)
678 index += delta;
679 else
680 if ((delta < 0) && (index >= (s + delta)))
681 index = -1;
682 }
683
684 // Moves sources [@s,last_source] by @delta.
685 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
686 void
moveSources(const int s,const int delta)687 Instruction::moveSources(const int s, const int delta)
688 {
689 if (delta == 0)
690 return;
691 assert(s + delta >= 0);
692
693 int k;
694
695 for (k = 0; srcExists(k); ++k) {
696 for (int i = 0; i < 2; ++i)
697 moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
698 }
699 moveSourcesAdjustIndex(predSrc, s, delta);
700 moveSourcesAdjustIndex(flagsSrc, s, delta);
701 if (asTex()) {
702 TexInstruction *tex = asTex();
703 moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
704 moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
705 }
706
707 if (delta > 0) {
708 --k;
709 for (int p = k + delta; k >= s; --k, --p)
710 setSrc(p, src(k));
711 } else {
712 int p;
713 for (p = s; p < k; ++p)
714 setSrc(p + delta, src(p));
715 for (; (p + delta) < k; ++p)
716 setSrc(p + delta, NULL);
717 }
718 }
719
720 void
takeExtraSources(int s,Value * values[3])721 Instruction::takeExtraSources(int s, Value *values[3])
722 {
723 values[0] = getIndirect(s, 0);
724 if (values[0])
725 setIndirect(s, 0, NULL);
726
727 values[1] = getIndirect(s, 1);
728 if (values[1])
729 setIndirect(s, 1, NULL);
730
731 values[2] = getPredicate();
732 if (values[2])
733 setPredicate(cc, NULL);
734 }
735
736 void
putExtraSources(int s,Value * values[3])737 Instruction::putExtraSources(int s, Value *values[3])
738 {
739 if (values[0])
740 setIndirect(s, 0, values[0]);
741 if (values[1])
742 setIndirect(s, 1, values[1]);
743 if (values[2])
744 setPredicate(cc, values[2]);
745 }
746
747 Instruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const748 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
749 {
750 if (!i)
751 i = new_Instruction(pol.context(), op, dType);
752 #if !defined(NDEBUG) && defined(__cpp_rtti)
753 assert(typeid(*i) == typeid(*this));
754 #endif
755
756 pol.set<Instruction>(this, i);
757
758 i->sType = sType;
759
760 i->rnd = rnd;
761 i->cache = cache;
762 i->subOp = subOp;
763
764 i->saturate = saturate;
765 i->join = join;
766 i->exit = exit;
767 i->mask = mask;
768 i->ftz = ftz;
769 i->dnz = dnz;
770 i->ipa = ipa;
771 i->lanes = lanes;
772 i->perPatch = perPatch;
773
774 i->postFactor = postFactor;
775
776 for (int d = 0; defExists(d); ++d)
777 i->setDef(d, pol.get(getDef(d)));
778
779 for (int s = 0; srcExists(s); ++s) {
780 i->setSrc(s, pol.get(getSrc(s)));
781 i->src(s).mod = src(s).mod;
782 }
783
784 i->cc = cc;
785 i->predSrc = predSrc;
786 i->flagsDef = flagsDef;
787 i->flagsSrc = flagsSrc;
788
789 return i;
790 }
791
792 unsigned int
defCount(unsigned int mask,bool singleFile) const793 Instruction::defCount(unsigned int mask, bool singleFile) const
794 {
795 unsigned int i, n;
796
797 if (singleFile) {
798 unsigned int d = ffs(mask);
799 if (!d)
800 return 0;
801 for (i = d--; defExists(i); ++i)
802 if (getDef(i)->reg.file != getDef(d)->reg.file)
803 mask &= ~(1 << i);
804 }
805
806 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
807 n += mask & 1;
808 return n;
809 }
810
811 unsigned int
srcCount(unsigned int mask,bool singleFile) const812 Instruction::srcCount(unsigned int mask, bool singleFile) const
813 {
814 unsigned int i, n;
815
816 if (singleFile) {
817 unsigned int s = ffs(mask);
818 if (!s)
819 return 0;
820 for (i = s--; srcExists(i); ++i)
821 if (getSrc(i)->reg.file != getSrc(s)->reg.file)
822 mask &= ~(1 << i);
823 }
824
825 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
826 n += mask & 1;
827 return n;
828 }
829
830 bool
setIndirect(int s,int dim,Value * value)831 Instruction::setIndirect(int s, int dim, Value *value)
832 {
833 assert(this->srcExists(s));
834
835 int p = srcs[s].indirect[dim];
836 if (p < 0) {
837 if (!value)
838 return true;
839 p = srcs.size();
840 while (p > 0 && !srcExists(p - 1))
841 --p;
842 }
843 setSrc(p, value);
844 srcs[p].usedAsPtr = (value != 0);
845 srcs[s].indirect[dim] = value ? p : -1;
846 return true;
847 }
848
849 bool
setPredicate(CondCode ccode,Value * value)850 Instruction::setPredicate(CondCode ccode, Value *value)
851 {
852 cc = ccode;
853
854 if (!value) {
855 if (predSrc >= 0) {
856 srcs[predSrc].set(NULL);
857 predSrc = -1;
858 }
859 return true;
860 }
861
862 if (predSrc < 0) {
863 predSrc = srcs.size();
864 while (predSrc > 0 && !srcExists(predSrc - 1))
865 --predSrc;
866 }
867
868 setSrc(predSrc, value);
869 return true;
870 }
871
872 bool
writesPredicate() const873 Instruction::writesPredicate() const
874 {
875 for (int d = 0; defExists(d); ++d)
876 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
877 return true;
878 return false;
879 }
880
881 bool
canCommuteDefSrc(const Instruction * i) const882 Instruction::canCommuteDefSrc(const Instruction *i) const
883 {
884 for (int d = 0; defExists(d); ++d)
885 for (int s = 0; i->srcExists(s); ++s)
886 if (getDef(d)->interfers(i->getSrc(s)))
887 return false;
888 return true;
889 }
890
891 bool
canCommuteDefDef(const Instruction * i) const892 Instruction::canCommuteDefDef(const Instruction *i) const
893 {
894 for (int d = 0; defExists(d); ++d)
895 for (int c = 0; i->defExists(c); ++c)
896 if (getDef(d)->interfers(i->getDef(c)))
897 return false;
898 return true;
899 }
900
901 bool
isCommutationLegal(const Instruction * i) const902 Instruction::isCommutationLegal(const Instruction *i) const
903 {
904 return canCommuteDefDef(i) &&
905 canCommuteDefSrc(i) &&
906 i->canCommuteDefSrc(this);
907 }
908
TexInstruction(Function * fn,operation op)909 TexInstruction::TexInstruction(Function *fn, operation op)
910 : Instruction(fn, op, TYPE_F32), tex()
911 {
912 tex.rIndirectSrc = -1;
913 tex.sIndirectSrc = -1;
914
915 if (op == OP_TXF)
916 sType = TYPE_U32;
917 }
918
~TexInstruction()919 TexInstruction::~TexInstruction()
920 {
921 for (int c = 0; c < 3; ++c) {
922 dPdx[c].set(NULL);
923 dPdy[c].set(NULL);
924 }
925 for (int n = 0; n < 4; ++n)
926 for (int c = 0; c < 3; ++c)
927 offset[n][c].set(NULL);
928 }
929
930 TexInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const931 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
932 {
933 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
934 new_TexInstruction(pol.context(), op));
935
936 Instruction::clone(pol, tex);
937
938 tex->tex = this->tex;
939
940 if (op == OP_TXD) {
941 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
942 tex->dPdx[c].set(dPdx[c]);
943 tex->dPdy[c].set(dPdy[c]);
944 }
945 }
946
947 for (int n = 0; n < tex->tex.useOffsets; ++n)
948 for (int c = 0; c < 3; ++c)
949 tex->offset[n][c].set(offset[n][c]);
950
951 return tex;
952 }
953
954 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
955 {
956 { "1D", 1, 1, false, false, false },
957 { "2D", 2, 2, false, false, false },
958 { "2D_MS", 2, 3, false, false, false },
959 { "3D", 3, 3, false, false, false },
960 { "CUBE", 2, 3, false, true, false },
961 { "1D_SHADOW", 1, 1, false, false, true },
962 { "2D_SHADOW", 2, 2, false, false, true },
963 { "CUBE_SHADOW", 2, 3, false, true, true },
964 { "1D_ARRAY", 1, 2, true, false, false },
965 { "2D_ARRAY", 2, 3, true, false, false },
966 { "2D_MS_ARRAY", 2, 4, true, false, false },
967 { "CUBE_ARRAY", 2, 4, true, true, false },
968 { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
969 { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
970 { "RECT", 2, 2, false, false, false },
971 { "RECT_SHADOW", 2, 2, false, false, true },
972 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
973 { "BUFFER", 1, 1, false, false, false },
974 };
975
976 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
977 {
978 { "NONE", 0, { 0, 0, 0, 0 }, UINT },
979
980 { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT },
981 { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT },
982 { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT },
983 { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT },
984 { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT },
985 { "R32F", 1, { 32, 0, 0, 0 }, FLOAT },
986 { "R16F", 1, { 16, 0, 0, 0 }, FLOAT },
987
988 { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT },
989 { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT },
990 { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT },
991 { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT },
992 { "RG32UI", 2, { 32, 32, 0, 0 }, UINT },
993 { "RG16UI", 2, { 16, 16, 0, 0 }, UINT },
994 { "RG8UI", 2, { 8, 8, 0, 0 }, UINT },
995 { "R32UI", 1, { 32, 0, 0, 0 }, UINT },
996 { "R16UI", 1, { 16, 0, 0, 0 }, UINT },
997 { "R8UI", 1, { 8, 0, 0, 0 }, UINT },
998
999 { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT },
1000 { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT },
1001 { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT },
1002 { "RG32I", 2, { 32, 32, 0, 0 }, SINT },
1003 { "RG16I", 2, { 16, 16, 0, 0 }, SINT },
1004 { "RG8I", 2, { 8, 8, 0, 0 }, SINT },
1005 { "R32I", 1, { 32, 0, 0, 0 }, SINT },
1006 { "R16I", 1, { 16, 0, 0, 0 }, SINT },
1007 { "R8I", 1, { 8, 0, 0, 0 }, SINT },
1008
1009 { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM },
1010 { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM },
1011 { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM },
1012 { "RG16", 2, { 16, 16, 0, 0 }, UNORM },
1013 { "RG8", 2, { 8, 8, 0, 0 }, UNORM },
1014 { "R16", 1, { 16, 0, 0, 0 }, UNORM },
1015 { "R8", 1, { 8, 0, 0, 0 }, UNORM },
1016
1017 { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1018 { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM },
1019 { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM },
1020 { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM },
1021 { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM },
1022 { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM },
1023
1024 { "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true },
1025 };
1026
1027 const struct TexInstruction::ImgFormatDesc *
translateImgFormat(enum pipe_format format)1028 TexInstruction::translateImgFormat(enum pipe_format format)
1029 {
1030
1031 #define FMT_CASE(a, b) \
1032 case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b]
1033
1034 switch (format) {
1035 FMT_CASE(NONE, NONE);
1036
1037 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
1038 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
1039 FMT_CASE(R32G32_FLOAT, RG32F);
1040 FMT_CASE(R16G16_FLOAT, RG16F);
1041 FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
1042 FMT_CASE(R32_FLOAT, R32F);
1043 FMT_CASE(R16_FLOAT, R16F);
1044
1045 FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
1046 FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
1047 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
1048 FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
1049 FMT_CASE(R32G32_UINT, RG32UI);
1050 FMT_CASE(R16G16_UINT, RG16UI);
1051 FMT_CASE(R8G8_UINT, RG8UI);
1052 FMT_CASE(R32_UINT, R32UI);
1053 FMT_CASE(R16_UINT, R16UI);
1054 FMT_CASE(R8_UINT, R8UI);
1055
1056 FMT_CASE(R32G32B32A32_SINT, RGBA32I);
1057 FMT_CASE(R16G16B16A16_SINT, RGBA16I);
1058 FMT_CASE(R8G8B8A8_SINT, RGBA8I);
1059 FMT_CASE(R32G32_SINT, RG32I);
1060 FMT_CASE(R16G16_SINT, RG16I);
1061 FMT_CASE(R8G8_SINT, RG8I);
1062 FMT_CASE(R32_SINT, R32I);
1063 FMT_CASE(R16_SINT, R16I);
1064 FMT_CASE(R8_SINT, R8I);
1065
1066 FMT_CASE(R16G16B16A16_UNORM, RGBA16);
1067 FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
1068 FMT_CASE(R8G8B8A8_UNORM, RGBA8);
1069 FMT_CASE(R16G16_UNORM, RG16);
1070 FMT_CASE(R8G8_UNORM, RG8);
1071 FMT_CASE(R16_UNORM, R16);
1072 FMT_CASE(R8_UNORM, R8);
1073
1074 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
1075 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
1076 FMT_CASE(R16G16_SNORM, RG16_SNORM);
1077 FMT_CASE(R8G8_SNORM, RG8_SNORM);
1078 FMT_CASE(R16_SNORM, R16_SNORM);
1079 FMT_CASE(R8_SNORM, R8_SNORM);
1080
1081 FMT_CASE(B8G8R8A8_UNORM, BGRA8);
1082
1083 default:
1084 assert(!"Unexpected format");
1085 return &formatTable[nv50_ir::FMT_NONE];
1086 }
1087 }
1088
1089 void
setIndirectR(Value * v)1090 TexInstruction::setIndirectR(Value *v)
1091 {
1092 int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1093 if (p >= 0) {
1094 tex.rIndirectSrc = p;
1095 setSrc(p, v);
1096 srcs[p].usedAsPtr = !!v;
1097 }
1098 }
1099
1100 void
setIndirectS(Value * v)1101 TexInstruction::setIndirectS(Value *v)
1102 {
1103 int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1104 if (p >= 0) {
1105 tex.sIndirectSrc = p;
1106 setSrc(p, v);
1107 srcs[p].usedAsPtr = !!v;
1108 }
1109 }
1110
CmpInstruction(Function * fn,operation op)1111 CmpInstruction::CmpInstruction(Function *fn, operation op)
1112 : Instruction(fn, op, TYPE_F32)
1113 {
1114 setCond = CC_ALWAYS;
1115 }
1116
1117 CmpInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1118 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1119 {
1120 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1121 new_CmpInstruction(pol.context(), op));
1122 cmp->dType = dType;
1123 Instruction::clone(pol, cmp);
1124 cmp->setCond = setCond;
1125 return cmp;
1126 }
1127
FlowInstruction(Function * fn,operation op,void * targ)1128 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1129 : Instruction(fn, op, TYPE_NONE)
1130 {
1131 if (op == OP_CALL)
1132 target.fn = reinterpret_cast<Function *>(targ);
1133 else
1134 target.bb = reinterpret_cast<BasicBlock *>(targ);
1135
1136 if (op == OP_BRA ||
1137 op == OP_CONT || op == OP_BREAK ||
1138 op == OP_RET || op == OP_EXIT)
1139 terminator = 1;
1140 else
1141 if (op == OP_JOIN)
1142 terminator = targ ? 1 : 0;
1143
1144 allWarp = absolute = limit = builtin = indirect = 0;
1145 }
1146
1147 FlowInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1148 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1149 {
1150 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1151 new_FlowInstruction(pol.context(), op, NULL));
1152
1153 Instruction::clone(pol, flow);
1154 flow->allWarp = allWarp;
1155 flow->absolute = absolute;
1156 flow->limit = limit;
1157 flow->builtin = builtin;
1158
1159 if (builtin)
1160 flow->target.builtin = target.builtin;
1161 else
1162 if (op == OP_CALL)
1163 flow->target.fn = target.fn;
1164 else
1165 if (target.bb)
1166 flow->target.bb = pol.get<BasicBlock>(target.bb);
1167
1168 return flow;
1169 }
1170
Program(Type type,Target * arch)1171 Program::Program(Type type, Target *arch)
1172 : progType(type),
1173 target(arch),
1174 tlsSize(0),
1175 mem_Instruction(sizeof(Instruction), 6),
1176 mem_CmpInstruction(sizeof(CmpInstruction), 4),
1177 mem_TexInstruction(sizeof(TexInstruction), 4),
1178 mem_FlowInstruction(sizeof(FlowInstruction), 4),
1179 mem_LValue(sizeof(LValue), 8),
1180 mem_Symbol(sizeof(Symbol), 7),
1181 mem_ImmediateValue(sizeof(ImmediateValue), 7),
1182 driver(NULL),
1183 driver_out(NULL)
1184 {
1185 code = NULL;
1186 binSize = 0;
1187
1188 maxGPR = -1;
1189 fp64 = false;
1190 persampleInvocation = false;
1191
1192 main = new Function(this, "MAIN", ~0);
1193 calls.insert(&main->call);
1194
1195 dbgFlags = 0;
1196 optLevel = 0;
1197
1198 targetPriv = NULL;
1199 }
1200
~Program()1201 Program::~Program()
1202 {
1203 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1204 delete reinterpret_cast<Function *>(it.get());
1205
1206 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1207 releaseValue(reinterpret_cast<Value *>(it.get()));
1208 }
1209
releaseInstruction(Instruction * insn)1210 void Program::releaseInstruction(Instruction *insn)
1211 {
1212 // TODO: make this not suck so much
1213
1214 insn->~Instruction();
1215
1216 if (insn->asCmp())
1217 mem_CmpInstruction.release(insn);
1218 else
1219 if (insn->asTex())
1220 mem_TexInstruction.release(insn);
1221 else
1222 if (insn->asFlow())
1223 mem_FlowInstruction.release(insn);
1224 else
1225 mem_Instruction.release(insn);
1226 }
1227
releaseValue(Value * value)1228 void Program::releaseValue(Value *value)
1229 {
1230 value->~Value();
1231
1232 if (value->asLValue())
1233 mem_LValue.release(value);
1234 else
1235 if (value->asImm())
1236 mem_ImmediateValue.release(value);
1237 else
1238 if (value->asSym())
1239 mem_Symbol.release(value);
1240 }
1241
1242
1243 } // namespace nv50_ir
1244
1245 extern "C" {
1246
1247 static void
nv50_ir_init_prog_info(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1248 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
1249 struct nv50_ir_prog_info_out *info_out)
1250 {
1251 info_out->target = info->target;
1252 info_out->type = info->type;
1253 if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1254 info_out->prop.tp.domain = PIPE_PRIM_MAX;
1255 info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
1256 }
1257 if (info->type == PIPE_SHADER_GEOMETRY) {
1258 info_out->prop.gp.instanceCount = 1;
1259 info_out->prop.gp.maxVertices = 1;
1260 }
1261 if (info->type == PIPE_SHADER_COMPUTE) {
1262 info->prop.cp.numThreads[0] =
1263 info->prop.cp.numThreads[1] =
1264 info->prop.cp.numThreads[2] = 1;
1265 }
1266 info_out->bin.smemSize = info->bin.smemSize;
1267 info_out->io.genUserClip = info->io.genUserClip;
1268 info_out->io.instanceId = 0xff;
1269 info_out->io.vertexId = 0xff;
1270 info_out->io.edgeFlagIn = 0xff;
1271 info_out->io.edgeFlagOut = 0xff;
1272 info_out->io.fragDepth = 0xff;
1273 info_out->io.sampleMask = 0xff;
1274 }
1275
1276 int
nv50_ir_generate_code(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1277 nv50_ir_generate_code(struct nv50_ir_prog_info *info,
1278 struct nv50_ir_prog_info_out *info_out)
1279 {
1280 int ret = 0;
1281
1282 nv50_ir::Program::Type type;
1283
1284 nv50_ir_init_prog_info(info, info_out);
1285
1286 #define PROG_TYPE_CASE(a, b) \
1287 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1288
1289 switch (info->type) {
1290 PROG_TYPE_CASE(VERTEX, VERTEX);
1291 PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1292 PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1293 PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1294 PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1295 PROG_TYPE_CASE(COMPUTE, COMPUTE);
1296 default:
1297 INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1298 return -1;
1299 }
1300 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1301
1302 nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1303 if (!targ)
1304 return -1;
1305
1306 nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1307 if (!prog) {
1308 nv50_ir::Target::destroy(targ);
1309 return -1;
1310 }
1311 prog->driver = info;
1312 prog->driver_out = info_out;
1313 prog->dbgFlags = info->dbgFlags;
1314 prog->optLevel = info->optLevel;
1315
1316 switch (info->bin.sourceRep) {
1317 case PIPE_SHADER_IR_NIR:
1318 ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
1319 break;
1320 case PIPE_SHADER_IR_TGSI:
1321 ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
1322 break;
1323 default:
1324 ret = -1;
1325 break;
1326 }
1327 if (ret < 0)
1328 goto out;
1329 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1330 prog->print();
1331
1332 targ->parseDriverInfo(info, info_out);
1333 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1334
1335 prog->convertToSSA();
1336
1337 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1338 prog->print();
1339
1340 prog->optimizeSSA(info->optLevel);
1341 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1342
1343 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1344 prog->print();
1345
1346 if (!prog->registerAllocation()) {
1347 ret = -4;
1348 goto out;
1349 }
1350 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1351
1352 prog->optimizePostRA(info->optLevel);
1353
1354 if (!prog->emitBinary(info_out)) {
1355 ret = -5;
1356 goto out;
1357 }
1358
1359 out:
1360 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1361
1362 info_out->bin.maxGPR = prog->maxGPR;
1363 info_out->bin.code = prog->code;
1364 info_out->bin.codeSize = prog->binSize;
1365 info_out->bin.tlsSpace = prog->tlsSize;
1366
1367 delete prog;
1368 nv50_ir::Target::destroy(targ);
1369
1370 return ret;
1371 }
1372
1373 } // extern "C"
1374