1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25 #include "codegen/nv50_ir_driver.h"
26 
27 extern "C" {
28 #include "nouveau_debug.h"
29 }
30 
31 namespace nv50_ir {
32 
Modifier(operation op)33 Modifier::Modifier(operation op)
34 {
35    switch (op) {
36    case OP_NEG: bits = NV50_IR_MOD_NEG; break;
37    case OP_ABS: bits = NV50_IR_MOD_ABS; break;
38    case OP_SAT: bits = NV50_IR_MOD_SAT; break;
39    case OP_NOT: bits = NV50_IR_MOD_NOT; break;
40    default:
41       bits = 0;
42       break;
43    }
44 }
45 
operator *(const Modifier m) const46 Modifier Modifier::operator*(const Modifier m) const
47 {
48    unsigned int a, b, c;
49 
50    b = m.bits;
51    if (this->bits & NV50_IR_MOD_ABS)
52       b &= ~NV50_IR_MOD_NEG;
53 
54    a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
55    c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
56 
57    return Modifier(a | c);
58 }
59 
ValueRef(Value * v)60 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
61 {
62    indirect[0] = -1;
63    indirect[1] = -1;
64    usedAsPtr = false;
65    set(v);
66 }
67 
ValueRef(const ValueRef & ref)68 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
69 {
70    set(ref);
71    usedAsPtr = ref.usedAsPtr;
72 }
73 
~ValueRef()74 ValueRef::~ValueRef()
75 {
76    this->set(NULL);
77 }
78 
getImmediate(ImmediateValue & imm) const79 bool ValueRef::getImmediate(ImmediateValue &imm) const
80 {
81    const ValueRef *src = this;
82    Modifier m;
83    DataType type = src->insn->sType;
84 
85    while (src) {
86       if (src->mod) {
87          if (src->insn->sType != type)
88             break;
89          m *= src->mod;
90       }
91       if (src->getFile() == FILE_IMMEDIATE) {
92          imm = *(src->value->asImm());
93          // The immediate's type isn't required to match its use, it's
94          // more of a hint; applying a modifier makes use of that hint.
95          imm.reg.type = type;
96          m.applyTo(imm);
97          return true;
98       }
99 
100       Instruction *insn = src->value->getUniqueInsn();
101 
102       if (insn && insn->op == OP_MOV) {
103          src = &insn->src(0);
104          if (src->mod)
105             WARN("OP_MOV with modifier encountered !\n");
106       } else {
107          src = NULL;
108       }
109    }
110    return false;
111 }
112 
ValueDef(Value * v)113 ValueDef::ValueDef(Value *v) : value(NULL), origin(NULL), insn(NULL)
114 {
115    set(v);
116 }
117 
ValueDef(const ValueDef & def)118 ValueDef::ValueDef(const ValueDef& def) : value(NULL), origin(NULL), insn(NULL)
119 {
120    set(def.get());
121 }
122 
~ValueDef()123 ValueDef::~ValueDef()
124 {
125    this->set(NULL);
126 }
127 
128 void
set(const ValueRef & ref)129 ValueRef::set(const ValueRef &ref)
130 {
131    this->set(ref.get());
132    mod = ref.mod;
133    indirect[0] = ref.indirect[0];
134    indirect[1] = ref.indirect[1];
135 }
136 
137 void
set(Value * refVal)138 ValueRef::set(Value *refVal)
139 {
140    if (value == refVal)
141       return;
142    if (value)
143       value->uses.erase(this);
144    if (refVal)
145       refVal->uses.insert(this);
146 
147    value = refVal;
148 }
149 
150 void
set(Value * defVal)151 ValueDef::set(Value *defVal)
152 {
153    if (value == defVal)
154       return;
155    if (value)
156       value->defs.remove(this);
157    if (defVal)
158       defVal->defs.push_back(this);
159 
160    value = defVal;
161 }
162 
163 // Check if we can replace this definition's value by the value in @rep,
164 // including the source modifiers, i.e. make sure that all uses support
165 // @rep.mod.
166 bool
mayReplace(const ValueRef & rep)167 ValueDef::mayReplace(const ValueRef &rep)
168 {
169    if (!rep.mod)
170       return true;
171 
172    if (!insn || !insn->bb) // Unbound instruction ?
173       return false;
174 
175    const Target *target = insn->bb->getProgram()->getTarget();
176 
177    for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
178         ++it) {
179       Instruction *insn = (*it)->getInsn();
180       int s = -1;
181 
182       for (int i = 0; insn->srcExists(i); ++i) {
183          if (insn->src(i).get() == value) {
184             // If there are multiple references to us we'd have to check if the
185             // combination of mods is still supported, but just bail for now.
186             if (&insn->src(i) != (*it))
187                return false;
188             s = i;
189          }
190       }
191       assert(s >= 0); // integrity of uses list
192 
193       if (!target->isModSupported(insn, s, rep.mod))
194          return false;
195    }
196    return true;
197 }
198 
199 void
replace(const ValueRef & repVal,bool doSet)200 ValueDef::replace(const ValueRef &repVal, bool doSet)
201 {
202    assert(mayReplace(repVal));
203 
204    if (value == repVal.get())
205       return;
206 
207    while (!value->uses.empty()) {
208       ValueRef *ref = *value->uses.begin();
209       ref->set(repVal.get());
210       ref->mod *= repVal.mod;
211    }
212 
213    if (doSet)
214       set(repVal.get());
215 }
216 
Value()217 Value::Value() : id(-1)
218 {
219   join = this;
220   memset(&reg, 0, sizeof(reg));
221   reg.size = 4;
222 }
223 
LValue(Function * fn,DataFile file)224 LValue::LValue(Function *fn, DataFile file)
225 {
226    reg.file = file;
227    reg.size = (file != FILE_PREDICATE) ? 4 : 1;
228    reg.data.id = -1;
229 
230    compMask = 0;
231    compound = 0;
232    ssa = 0;
233    fixedReg = 0;
234    noSpill = 0;
235 
236    fn->add(this, this->id);
237 }
238 
LValue(Function * fn,LValue * lval)239 LValue::LValue(Function *fn, LValue *lval)
240 {
241    assert(lval);
242 
243    reg.file = lval->reg.file;
244    reg.size = lval->reg.size;
245    reg.data.id = -1;
246 
247    compMask = 0;
248    compound = 0;
249    ssa = 0;
250    fixedReg = 0;
251    noSpill = 0;
252 
253    fn->add(this, this->id);
254 }
255 
256 LValue *
clone(ClonePolicy<Function> & pol) const257 LValue::clone(ClonePolicy<Function>& pol) const
258 {
259    LValue *that = new_LValue(pol.context(), reg.file);
260 
261    pol.set<Value>(this, that);
262 
263    that->reg.size = this->reg.size;
264    that->reg.type = this->reg.type;
265    that->reg.data = this->reg.data;
266 
267    return that;
268 }
269 
270 bool
isUniform() const271 LValue::isUniform() const
272 {
273    if (defs.size() > 1)
274       return false;
275    Instruction *insn = getInsn();
276    if (!insn)
277       return false;
278    // let's not try too hard here for now ...
279    return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
280 }
281 
Symbol(Program * prog,DataFile f,ubyte fidx)282 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
283 {
284    baseSym = NULL;
285 
286    reg.file = f;
287    reg.fileIndex = fidx;
288    reg.data.offset = 0;
289 
290    prog->add(this, this->id);
291 }
292 
293 Symbol *
clone(ClonePolicy<Function> & pol) const294 Symbol::clone(ClonePolicy<Function>& pol) const
295 {
296    Program *prog = pol.context()->getProgram();
297 
298    Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
299 
300    pol.set<Value>(this, that);
301 
302    that->reg.size = this->reg.size;
303    that->reg.type = this->reg.type;
304    that->reg.data = this->reg.data;
305 
306    that->baseSym = this->baseSym;
307 
308    return that;
309 }
310 
311 bool
isUniform() const312 Symbol::isUniform() const
313 {
314    return
315       reg.file != FILE_SYSTEM_VALUE &&
316       reg.file != FILE_MEMORY_LOCAL &&
317       reg.file != FILE_SHADER_INPUT;
318 }
319 
ImmediateValue(Program * prog,uint32_t uval)320 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
321 {
322    memset(&reg, 0, sizeof(reg));
323 
324    reg.file = FILE_IMMEDIATE;
325    reg.size = 4;
326    reg.type = TYPE_U32;
327 
328    reg.data.u32 = uval;
329 
330    prog->add(this, this->id);
331 }
332 
ImmediateValue(Program * prog,float fval)333 ImmediateValue::ImmediateValue(Program *prog, float fval)
334 {
335    memset(&reg, 0, sizeof(reg));
336 
337    reg.file = FILE_IMMEDIATE;
338    reg.size = 4;
339    reg.type = TYPE_F32;
340 
341    reg.data.f32 = fval;
342 
343    prog->add(this, this->id);
344 }
345 
ImmediateValue(Program * prog,double dval)346 ImmediateValue::ImmediateValue(Program *prog, double dval)
347 {
348    memset(&reg, 0, sizeof(reg));
349 
350    reg.file = FILE_IMMEDIATE;
351    reg.size = 8;
352    reg.type = TYPE_F64;
353 
354    reg.data.f64 = dval;
355 
356    prog->add(this, this->id);
357 }
358 
ImmediateValue(const ImmediateValue * proto,DataType ty)359 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
360 {
361    reg = proto->reg;
362 
363    reg.type = ty;
364    reg.size = typeSizeof(ty);
365 }
366 
367 ImmediateValue *
clone(ClonePolicy<Function> & pol) const368 ImmediateValue::clone(ClonePolicy<Function>& pol) const
369 {
370    Program *prog = pol.context()->getProgram();
371    ImmediateValue *that = new_ImmediateValue(prog, 0u);
372 
373    pol.set<Value>(this, that);
374 
375    that->reg.size = this->reg.size;
376    that->reg.type = this->reg.type;
377    that->reg.data = this->reg.data;
378 
379    return that;
380 }
381 
382 bool
isInteger(const int i) const383 ImmediateValue::isInteger(const int i) const
384 {
385    switch (reg.type) {
386    case TYPE_S8:
387       return reg.data.s8 == i;
388    case TYPE_U8:
389       return reg.data.u8 == i;
390    case TYPE_S16:
391       return reg.data.s16 == i;
392    case TYPE_U16:
393       return reg.data.u16 == i;
394    case TYPE_S32:
395    case TYPE_U32:
396       return reg.data.s32 == i; // as if ...
397    case TYPE_S64:
398    case TYPE_U64:
399       return reg.data.s64 == i; // as if ...
400    case TYPE_F32:
401       return reg.data.f32 == static_cast<float>(i);
402    case TYPE_F64:
403       return reg.data.f64 == static_cast<double>(i);
404    default:
405       return false;
406    }
407 }
408 
409 bool
isNegative() const410 ImmediateValue::isNegative() const
411 {
412    switch (reg.type) {
413    case TYPE_S8:  return reg.data.s8 < 0;
414    case TYPE_S16: return reg.data.s16 < 0;
415    case TYPE_S32:
416    case TYPE_U32: return reg.data.s32 < 0;
417    case TYPE_F32: return reg.data.u32 & (1 << 31);
418    case TYPE_F64: return reg.data.u64 & (1ULL << 63);
419    default:
420       return false;
421    }
422 }
423 
424 bool
isPow2() const425 ImmediateValue::isPow2() const
426 {
427    if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
428       return util_is_power_of_two_or_zero64(reg.data.u64);
429    else
430       return util_is_power_of_two_or_zero(reg.data.u32);
431 }
432 
433 void
applyLog2()434 ImmediateValue::applyLog2()
435 {
436    switch (reg.type) {
437    case TYPE_S8:
438    case TYPE_S16:
439    case TYPE_S32:
440       assert(!this->isNegative());
441       FALLTHROUGH;
442    case TYPE_U8:
443    case TYPE_U16:
444    case TYPE_U32:
445       reg.data.u32 = util_logbase2(reg.data.u32);
446       break;
447    case TYPE_S64:
448       assert(!this->isNegative());
449       FALLTHROUGH;
450    case TYPE_U64:
451       reg.data.u64 = util_logbase2_64(reg.data.u64);
452       break;
453    case TYPE_F32:
454       reg.data.f32 = log2f(reg.data.f32);
455       break;
456    case TYPE_F64:
457       reg.data.f64 = log2(reg.data.f64);
458       break;
459    default:
460       assert(0);
461       break;
462    }
463 }
464 
465 bool
compare(CondCode cc,float fval) const466 ImmediateValue::compare(CondCode cc, float fval) const
467 {
468    if (reg.type != TYPE_F32)
469       ERROR("immediate value is not of type f32");
470 
471    switch (static_cast<CondCode>(cc & 7)) {
472    case CC_TR: return true;
473    case CC_FL: return false;
474    case CC_LT: return reg.data.f32 <  fval;
475    case CC_LE: return reg.data.f32 <= fval;
476    case CC_GT: return reg.data.f32 >  fval;
477    case CC_GE: return reg.data.f32 >= fval;
478    case CC_EQ: return reg.data.f32 == fval;
479    case CC_NE: return reg.data.f32 != fval;
480    default:
481       assert(0);
482       return false;
483    }
484 }
485 
486 ImmediateValue&
operator =(const ImmediateValue & that)487 ImmediateValue::operator=(const ImmediateValue &that)
488 {
489    this->reg = that.reg;
490    return (*this);
491 }
492 
493 bool
interfers(const Value * that) const494 Value::interfers(const Value *that) const
495 {
496    uint32_t idA, idB;
497 
498    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
499       return false;
500    if (this->asImm())
501       return false;
502 
503    if (this->asSym()) {
504       idA = this->join->reg.data.offset;
505       idB = that->join->reg.data.offset;
506    } else {
507       idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
508       idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
509    }
510 
511    if (idA < idB)
512       return (idA + this->reg.size > idB);
513    else
514    if (idA > idB)
515       return (idB + that->reg.size > idA);
516    else
517       return (idA == idB);
518 }
519 
520 bool
equals(const Value * that,bool strict) const521 Value::equals(const Value *that, bool strict) const
522 {
523    if (strict)
524       return this == that;
525 
526    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
527       return false;
528    if (that->reg.size != this->reg.size)
529       return false;
530 
531    if (that->reg.data.id != this->reg.data.id)
532       return false;
533 
534    return true;
535 }
536 
537 bool
equals(const Value * that,bool strict) const538 ImmediateValue::equals(const Value *that, bool strict) const
539 {
540    const ImmediateValue *imm = that->asImm();
541    if (!imm)
542       return false;
543    return reg.data.u64 == imm->reg.data.u64;
544 }
545 
546 bool
equals(const Value * that,bool strict) const547 Symbol::equals(const Value *that, bool strict) const
548 {
549    if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
550       return false;
551    assert(that->asSym());
552 
553    if (this->baseSym != that->asSym()->baseSym)
554       return false;
555 
556    if (reg.file == FILE_SYSTEM_VALUE)
557       return (this->reg.data.sv.sv    == that->reg.data.sv.sv &&
558               this->reg.data.sv.index == that->reg.data.sv.index);
559    return this->reg.data.offset == that->reg.data.offset;
560 }
561 
init()562 void Instruction::init()
563 {
564    next = prev = 0;
565    serial = 0;
566 
567    cc = CC_ALWAYS;
568    rnd = ROUND_N;
569    cache = CACHE_CA;
570    subOp = 0;
571 
572    saturate = 0;
573    join = 0;
574    exit = 0;
575    terminator = 0;
576    ftz = 0;
577    dnz = 0;
578    perPatch = 0;
579    fixed = 0;
580    encSize = 0;
581    ipa = 0;
582    mask = 0;
583    precise = 0;
584 
585    lanes = 0xf;
586 
587    postFactor = 0;
588 
589    predSrc = -1;
590    flagsDef = -1;
591    flagsSrc = -1;
592 
593    sched = 0;
594    bb = NULL;
595 }
596 
Instruction()597 Instruction::Instruction()
598 {
599    init();
600 
601    op = OP_NOP;
602    dType = sType = TYPE_F32;
603 
604    id = -1;
605 }
606 
Instruction(Function * fn,operation opr,DataType ty)607 Instruction::Instruction(Function *fn, operation opr, DataType ty)
608 {
609    init();
610 
611    op = opr;
612    dType = sType = ty;
613 
614    fn->add(this, id);
615 }
616 
~Instruction()617 Instruction::~Instruction()
618 {
619    if (bb) {
620       Function *fn = bb->getFunction();
621       bb->remove(this);
622       fn->allInsns.remove(id);
623    }
624 
625    for (int s = 0; srcExists(s); ++s)
626       setSrc(s, NULL);
627    // must unlink defs too since the list pointers will get deallocated
628    for (int d = 0; defExists(d); ++d)
629       setDef(d, NULL);
630 }
631 
632 void
setDef(int i,Value * val)633 Instruction::setDef(int i, Value *val)
634 {
635    int size = defs.size();
636    if (i >= size) {
637       defs.resize(i + 1);
638       while (size <= i)
639          defs[size++].setInsn(this);
640    }
641    defs[i].set(val);
642 }
643 
644 void
setSrc(int s,Value * val)645 Instruction::setSrc(int s, Value *val)
646 {
647    int size = srcs.size();
648    if (s >= size) {
649       srcs.resize(s + 1);
650       while (size <= s)
651          srcs[size++].setInsn(this);
652    }
653    srcs[s].set(val);
654 }
655 
656 void
setSrc(int s,const ValueRef & ref)657 Instruction::setSrc(int s, const ValueRef& ref)
658 {
659    setSrc(s, ref.get());
660    srcs[s].mod = ref.mod;
661 }
662 
663 void
swapSources(int a,int b)664 Instruction::swapSources(int a, int b)
665 {
666    Value *value = srcs[a].get();
667    Modifier m = srcs[a].mod;
668 
669    setSrc(a, srcs[b]);
670 
671    srcs[b].set(value);
672    srcs[b].mod = m;
673 }
674 
moveSourcesAdjustIndex(int8_t & index,int s,int delta)675 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
676 {
677    if (index >= s)
678       index += delta;
679    else
680    if ((delta < 0) && (index >= (s + delta)))
681       index = -1;
682 }
683 
684 // Moves sources [@s,last_source] by @delta.
685 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
686 void
moveSources(const int s,const int delta)687 Instruction::moveSources(const int s, const int delta)
688 {
689    if (delta == 0)
690       return;
691    assert(s + delta >= 0);
692 
693    int k;
694 
695    for (k = 0; srcExists(k); ++k) {
696       for (int i = 0; i < 2; ++i)
697          moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
698    }
699    moveSourcesAdjustIndex(predSrc, s, delta);
700    moveSourcesAdjustIndex(flagsSrc, s, delta);
701    if (asTex()) {
702       TexInstruction *tex = asTex();
703       moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
704       moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
705    }
706 
707    if (delta > 0) {
708       --k;
709       for (int p = k + delta; k >= s; --k, --p)
710          setSrc(p, src(k));
711    } else {
712       int p;
713       for (p = s; p < k; ++p)
714          setSrc(p + delta, src(p));
715       for (; (p + delta) < k; ++p)
716          setSrc(p + delta, NULL);
717    }
718 }
719 
720 void
takeExtraSources(int s,Value * values[3])721 Instruction::takeExtraSources(int s, Value *values[3])
722 {
723    values[0] = getIndirect(s, 0);
724    if (values[0])
725       setIndirect(s, 0, NULL);
726 
727    values[1] = getIndirect(s, 1);
728    if (values[1])
729       setIndirect(s, 1, NULL);
730 
731    values[2] = getPredicate();
732    if (values[2])
733       setPredicate(cc, NULL);
734 }
735 
736 void
putExtraSources(int s,Value * values[3])737 Instruction::putExtraSources(int s, Value *values[3])
738 {
739    if (values[0])
740       setIndirect(s, 0, values[0]);
741    if (values[1])
742       setIndirect(s, 1, values[1]);
743    if (values[2])
744       setPredicate(cc, values[2]);
745 }
746 
747 Instruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const748 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
749 {
750    if (!i)
751       i = new_Instruction(pol.context(), op, dType);
752 #if !defined(NDEBUG) && defined(__cpp_rtti)
753    assert(typeid(*i) == typeid(*this));
754 #endif
755 
756    pol.set<Instruction>(this, i);
757 
758    i->sType = sType;
759 
760    i->rnd = rnd;
761    i->cache = cache;
762    i->subOp = subOp;
763 
764    i->saturate = saturate;
765    i->join = join;
766    i->exit = exit;
767    i->mask = mask;
768    i->ftz = ftz;
769    i->dnz = dnz;
770    i->ipa = ipa;
771    i->lanes = lanes;
772    i->perPatch = perPatch;
773 
774    i->postFactor = postFactor;
775 
776    for (int d = 0; defExists(d); ++d)
777       i->setDef(d, pol.get(getDef(d)));
778 
779    for (int s = 0; srcExists(s); ++s) {
780       i->setSrc(s, pol.get(getSrc(s)));
781       i->src(s).mod = src(s).mod;
782    }
783 
784    i->cc = cc;
785    i->predSrc = predSrc;
786    i->flagsDef = flagsDef;
787    i->flagsSrc = flagsSrc;
788 
789    return i;
790 }
791 
792 unsigned int
defCount(unsigned int mask,bool singleFile) const793 Instruction::defCount(unsigned int mask, bool singleFile) const
794 {
795    unsigned int i, n;
796 
797    if (singleFile) {
798       unsigned int d = ffs(mask);
799       if (!d)
800          return 0;
801       for (i = d--; defExists(i); ++i)
802          if (getDef(i)->reg.file != getDef(d)->reg.file)
803             mask &= ~(1 << i);
804    }
805 
806    for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
807       n += mask & 1;
808    return n;
809 }
810 
811 unsigned int
srcCount(unsigned int mask,bool singleFile) const812 Instruction::srcCount(unsigned int mask, bool singleFile) const
813 {
814    unsigned int i, n;
815 
816    if (singleFile) {
817       unsigned int s = ffs(mask);
818       if (!s)
819          return 0;
820       for (i = s--; srcExists(i); ++i)
821          if (getSrc(i)->reg.file != getSrc(s)->reg.file)
822             mask &= ~(1 << i);
823    }
824 
825    for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
826       n += mask & 1;
827    return n;
828 }
829 
830 bool
setIndirect(int s,int dim,Value * value)831 Instruction::setIndirect(int s, int dim, Value *value)
832 {
833    assert(this->srcExists(s));
834 
835    int p = srcs[s].indirect[dim];
836    if (p < 0) {
837       if (!value)
838          return true;
839       p = srcs.size();
840       while (p > 0 && !srcExists(p - 1))
841          --p;
842    }
843    setSrc(p, value);
844    srcs[p].usedAsPtr = (value != 0);
845    srcs[s].indirect[dim] = value ? p : -1;
846    return true;
847 }
848 
849 bool
setPredicate(CondCode ccode,Value * value)850 Instruction::setPredicate(CondCode ccode, Value *value)
851 {
852    cc = ccode;
853 
854    if (!value) {
855       if (predSrc >= 0) {
856          srcs[predSrc].set(NULL);
857          predSrc = -1;
858       }
859       return true;
860    }
861 
862    if (predSrc < 0) {
863       predSrc = srcs.size();
864       while (predSrc > 0 && !srcExists(predSrc - 1))
865          --predSrc;
866    }
867 
868    setSrc(predSrc, value);
869    return true;
870 }
871 
872 bool
writesPredicate() const873 Instruction::writesPredicate() const
874 {
875    for (int d = 0; defExists(d); ++d)
876       if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
877          return true;
878    return false;
879 }
880 
881 bool
canCommuteDefSrc(const Instruction * i) const882 Instruction::canCommuteDefSrc(const Instruction *i) const
883 {
884    for (int d = 0; defExists(d); ++d)
885       for (int s = 0; i->srcExists(s); ++s)
886          if (getDef(d)->interfers(i->getSrc(s)))
887             return false;
888    return true;
889 }
890 
891 bool
canCommuteDefDef(const Instruction * i) const892 Instruction::canCommuteDefDef(const Instruction *i) const
893 {
894    for (int d = 0; defExists(d); ++d)
895       for (int c = 0; i->defExists(c); ++c)
896          if (getDef(d)->interfers(i->getDef(c)))
897             return false;
898    return true;
899 }
900 
901 bool
isCommutationLegal(const Instruction * i) const902 Instruction::isCommutationLegal(const Instruction *i) const
903 {
904    return canCommuteDefDef(i) &&
905       canCommuteDefSrc(i) &&
906       i->canCommuteDefSrc(this);
907 }
908 
TexInstruction(Function * fn,operation op)909 TexInstruction::TexInstruction(Function *fn, operation op)
910    : Instruction(fn, op, TYPE_F32), tex()
911 {
912    tex.rIndirectSrc = -1;
913    tex.sIndirectSrc = -1;
914 
915    if (op == OP_TXF)
916       sType = TYPE_U32;
917 }
918 
~TexInstruction()919 TexInstruction::~TexInstruction()
920 {
921    for (int c = 0; c < 3; ++c) {
922       dPdx[c].set(NULL);
923       dPdy[c].set(NULL);
924    }
925    for (int n = 0; n < 4; ++n)
926       for (int c = 0; c < 3; ++c)
927          offset[n][c].set(NULL);
928 }
929 
930 TexInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const931 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
932 {
933    TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
934                           new_TexInstruction(pol.context(), op));
935 
936    Instruction::clone(pol, tex);
937 
938    tex->tex = this->tex;
939 
940    if (op == OP_TXD) {
941       for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
942          tex->dPdx[c].set(dPdx[c]);
943          tex->dPdy[c].set(dPdy[c]);
944       }
945    }
946 
947    for (int n = 0; n < tex->tex.useOffsets; ++n)
948       for (int c = 0; c < 3; ++c)
949          tex->offset[n][c].set(offset[n][c]);
950 
951    return tex;
952 }
953 
954 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
955 {
956    { "1D",                1, 1, false, false, false },
957    { "2D",                2, 2, false, false, false },
958    { "2D_MS",             2, 3, false, false, false },
959    { "3D",                3, 3, false, false, false },
960    { "CUBE",              2, 3, false, true,  false },
961    { "1D_SHADOW",         1, 1, false, false, true  },
962    { "2D_SHADOW",         2, 2, false, false, true  },
963    { "CUBE_SHADOW",       2, 3, false, true,  true  },
964    { "1D_ARRAY",          1, 2, true,  false, false },
965    { "2D_ARRAY",          2, 3, true,  false, false },
966    { "2D_MS_ARRAY",       2, 4, true,  false, false },
967    { "CUBE_ARRAY",        2, 4, true,  true,  false },
968    { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
969    { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
970    { "RECT",              2, 2, false, false, false },
971    { "RECT_SHADOW",       2, 2, false, false, true  },
972    { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
973    { "BUFFER",            1, 1, false, false, false },
974 };
975 
976 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
977 {
978    { "NONE",         0, {  0,  0,  0,  0 },  UINT },
979 
980    { "RGBA32F",      4, { 32, 32, 32, 32 }, FLOAT },
981    { "RGBA16F",      4, { 16, 16, 16, 16 }, FLOAT },
982    { "RG32F",        2, { 32, 32,  0,  0 }, FLOAT },
983    { "RG16F",        2, { 16, 16,  0,  0 }, FLOAT },
984    { "R11G11B10F",   3, { 11, 11, 10,  0 }, FLOAT },
985    { "R32F",         1, { 32,  0,  0,  0 }, FLOAT },
986    { "R16F",         1, { 16,  0,  0,  0 }, FLOAT },
987 
988    { "RGBA32UI",     4, { 32, 32, 32, 32 },  UINT },
989    { "RGBA16UI",     4, { 16, 16, 16, 16 },  UINT },
990    { "RGB10A2UI",    4, { 10, 10, 10,  2 },  UINT },
991    { "RGBA8UI",      4, {  8,  8,  8,  8 },  UINT },
992    { "RG32UI",       2, { 32, 32,  0,  0 },  UINT },
993    { "RG16UI",       2, { 16, 16,  0,  0 },  UINT },
994    { "RG8UI",        2, {  8,  8,  0,  0 },  UINT },
995    { "R32UI",        1, { 32,  0,  0,  0 },  UINT },
996    { "R16UI",        1, { 16,  0,  0,  0 },  UINT },
997    { "R8UI",         1, {  8,  0,  0,  0 },  UINT },
998 
999    { "RGBA32I",      4, { 32, 32, 32, 32 },  SINT },
1000    { "RGBA16I",      4, { 16, 16, 16, 16 },  SINT },
1001    { "RGBA8I",       4, {  8,  8,  8,  8 },  SINT },
1002    { "RG32I",        2, { 32, 32,  0,  0 },  SINT },
1003    { "RG16I",        2, { 16, 16,  0,  0 },  SINT },
1004    { "RG8I",         2, {  8,  8,  0,  0 },  SINT },
1005    { "R32I",         1, { 32,  0,  0,  0 },  SINT },
1006    { "R16I",         1, { 16,  0,  0,  0 },  SINT },
1007    { "R8I",          1, {  8,  0,  0,  0 },  SINT },
1008 
1009    { "RGBA16",       4, { 16, 16, 16, 16 }, UNORM },
1010    { "RGB10A2",      4, { 10, 10, 10,  2 }, UNORM },
1011    { "RGBA8",        4, {  8,  8,  8,  8 }, UNORM },
1012    { "RG16",         2, { 16, 16,  0,  0 }, UNORM },
1013    { "RG8",          2, {  8,  8,  0,  0 }, UNORM },
1014    { "R16",          1, { 16,  0,  0,  0 }, UNORM },
1015    { "R8",           1, {  8,  0,  0,  0 }, UNORM },
1016 
1017    { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1018    { "RGBA8_SNORM",  4, {  8,  8,  8,  8 }, SNORM },
1019    { "RG16_SNORM",   2, { 16, 16,  0,  0 }, SNORM },
1020    { "RG8_SNORM",    2, {  8,  8,  0,  0 }, SNORM },
1021    { "R16_SNORM",    1, { 16,  0,  0,  0 }, SNORM },
1022    { "R8_SNORM",     1, {  8,  0,  0,  0 }, SNORM },
1023 
1024    { "BGRA8",        4, {  8,  8,  8,  8 }, UNORM, true },
1025 };
1026 
1027 const struct TexInstruction::ImgFormatDesc *
translateImgFormat(enum pipe_format format)1028 TexInstruction::translateImgFormat(enum pipe_format format)
1029 {
1030 
1031 #define FMT_CASE(a, b) \
1032   case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b]
1033 
1034    switch (format) {
1035    FMT_CASE(NONE, NONE);
1036 
1037    FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
1038    FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
1039    FMT_CASE(R32G32_FLOAT, RG32F);
1040    FMT_CASE(R16G16_FLOAT, RG16F);
1041    FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
1042    FMT_CASE(R32_FLOAT, R32F);
1043    FMT_CASE(R16_FLOAT, R16F);
1044 
1045    FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
1046    FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
1047    FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
1048    FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
1049    FMT_CASE(R32G32_UINT, RG32UI);
1050    FMT_CASE(R16G16_UINT, RG16UI);
1051    FMT_CASE(R8G8_UINT, RG8UI);
1052    FMT_CASE(R32_UINT, R32UI);
1053    FMT_CASE(R16_UINT, R16UI);
1054    FMT_CASE(R8_UINT, R8UI);
1055 
1056    FMT_CASE(R32G32B32A32_SINT, RGBA32I);
1057    FMT_CASE(R16G16B16A16_SINT, RGBA16I);
1058    FMT_CASE(R8G8B8A8_SINT, RGBA8I);
1059    FMT_CASE(R32G32_SINT, RG32I);
1060    FMT_CASE(R16G16_SINT, RG16I);
1061    FMT_CASE(R8G8_SINT, RG8I);
1062    FMT_CASE(R32_SINT, R32I);
1063    FMT_CASE(R16_SINT, R16I);
1064    FMT_CASE(R8_SINT, R8I);
1065 
1066    FMT_CASE(R16G16B16A16_UNORM, RGBA16);
1067    FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
1068    FMT_CASE(R8G8B8A8_UNORM, RGBA8);
1069    FMT_CASE(R16G16_UNORM, RG16);
1070    FMT_CASE(R8G8_UNORM, RG8);
1071    FMT_CASE(R16_UNORM, R16);
1072    FMT_CASE(R8_UNORM, R8);
1073 
1074    FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
1075    FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
1076    FMT_CASE(R16G16_SNORM, RG16_SNORM);
1077    FMT_CASE(R8G8_SNORM, RG8_SNORM);
1078    FMT_CASE(R16_SNORM, R16_SNORM);
1079    FMT_CASE(R8_SNORM, R8_SNORM);
1080 
1081    FMT_CASE(B8G8R8A8_UNORM, BGRA8);
1082 
1083    default:
1084       assert(!"Unexpected format");
1085       return &formatTable[nv50_ir::FMT_NONE];
1086    }
1087 }
1088 
1089 void
setIndirectR(Value * v)1090 TexInstruction::setIndirectR(Value *v)
1091 {
1092    int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1093    if (p >= 0) {
1094       tex.rIndirectSrc = p;
1095       setSrc(p, v);
1096       srcs[p].usedAsPtr = !!v;
1097    }
1098 }
1099 
1100 void
setIndirectS(Value * v)1101 TexInstruction::setIndirectS(Value *v)
1102 {
1103    int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1104    if (p >= 0) {
1105       tex.sIndirectSrc = p;
1106       setSrc(p, v);
1107       srcs[p].usedAsPtr = !!v;
1108    }
1109 }
1110 
CmpInstruction(Function * fn,operation op)1111 CmpInstruction::CmpInstruction(Function *fn, operation op)
1112    : Instruction(fn, op, TYPE_F32)
1113 {
1114    setCond = CC_ALWAYS;
1115 }
1116 
1117 CmpInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1118 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1119 {
1120    CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1121                           new_CmpInstruction(pol.context(), op));
1122    cmp->dType = dType;
1123    Instruction::clone(pol, cmp);
1124    cmp->setCond = setCond;
1125    return cmp;
1126 }
1127 
FlowInstruction(Function * fn,operation op,void * targ)1128 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1129    : Instruction(fn, op, TYPE_NONE)
1130 {
1131    if (op == OP_CALL)
1132       target.fn = reinterpret_cast<Function *>(targ);
1133    else
1134       target.bb = reinterpret_cast<BasicBlock *>(targ);
1135 
1136    if (op == OP_BRA ||
1137        op == OP_CONT || op == OP_BREAK ||
1138        op == OP_RET || op == OP_EXIT)
1139       terminator = 1;
1140    else
1141    if (op == OP_JOIN)
1142       terminator = targ ? 1 : 0;
1143 
1144    allWarp = absolute = limit = builtin = indirect = 0;
1145 }
1146 
1147 FlowInstruction *
clone(ClonePolicy<Function> & pol,Instruction * i) const1148 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1149 {
1150    FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1151                             new_FlowInstruction(pol.context(), op, NULL));
1152 
1153    Instruction::clone(pol, flow);
1154    flow->allWarp = allWarp;
1155    flow->absolute = absolute;
1156    flow->limit = limit;
1157    flow->builtin = builtin;
1158 
1159    if (builtin)
1160       flow->target.builtin = target.builtin;
1161    else
1162    if (op == OP_CALL)
1163       flow->target.fn = target.fn;
1164    else
1165    if (target.bb)
1166       flow->target.bb = pol.get<BasicBlock>(target.bb);
1167 
1168    return flow;
1169 }
1170 
Program(Type type,Target * arch)1171 Program::Program(Type type, Target *arch)
1172    : progType(type),
1173      target(arch),
1174      tlsSize(0),
1175      mem_Instruction(sizeof(Instruction), 6),
1176      mem_CmpInstruction(sizeof(CmpInstruction), 4),
1177      mem_TexInstruction(sizeof(TexInstruction), 4),
1178      mem_FlowInstruction(sizeof(FlowInstruction), 4),
1179      mem_LValue(sizeof(LValue), 8),
1180      mem_Symbol(sizeof(Symbol), 7),
1181      mem_ImmediateValue(sizeof(ImmediateValue), 7),
1182      driver(NULL),
1183      driver_out(NULL)
1184 {
1185    code = NULL;
1186    binSize = 0;
1187 
1188    maxGPR = -1;
1189    fp64 = false;
1190    persampleInvocation = false;
1191 
1192    main = new Function(this, "MAIN", ~0);
1193    calls.insert(&main->call);
1194 
1195    dbgFlags = 0;
1196    optLevel = 0;
1197 
1198    targetPriv = NULL;
1199 }
1200 
~Program()1201 Program::~Program()
1202 {
1203    for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1204       delete reinterpret_cast<Function *>(it.get());
1205 
1206    for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1207       releaseValue(reinterpret_cast<Value *>(it.get()));
1208 }
1209 
releaseInstruction(Instruction * insn)1210 void Program::releaseInstruction(Instruction *insn)
1211 {
1212    // TODO: make this not suck so much
1213 
1214    insn->~Instruction();
1215 
1216    if (insn->asCmp())
1217       mem_CmpInstruction.release(insn);
1218    else
1219    if (insn->asTex())
1220       mem_TexInstruction.release(insn);
1221    else
1222    if (insn->asFlow())
1223       mem_FlowInstruction.release(insn);
1224    else
1225       mem_Instruction.release(insn);
1226 }
1227 
releaseValue(Value * value)1228 void Program::releaseValue(Value *value)
1229 {
1230    value->~Value();
1231 
1232    if (value->asLValue())
1233       mem_LValue.release(value);
1234    else
1235    if (value->asImm())
1236       mem_ImmediateValue.release(value);
1237    else
1238    if (value->asSym())
1239       mem_Symbol.release(value);
1240 }
1241 
1242 
1243 } // namespace nv50_ir
1244 
1245 extern "C" {
1246 
1247 static void
nv50_ir_init_prog_info(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1248 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
1249                        struct nv50_ir_prog_info_out *info_out)
1250 {
1251    info_out->target = info->target;
1252    info_out->type = info->type;
1253    if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1254       info_out->prop.tp.domain = PIPE_PRIM_MAX;
1255       info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
1256    }
1257    if (info->type == PIPE_SHADER_GEOMETRY) {
1258       info_out->prop.gp.instanceCount = 1;
1259       info_out->prop.gp.maxVertices = 1;
1260    }
1261    if (info->type == PIPE_SHADER_COMPUTE) {
1262       info->prop.cp.numThreads[0] =
1263       info->prop.cp.numThreads[1] =
1264       info->prop.cp.numThreads[2] = 1;
1265    }
1266    info_out->bin.smemSize = info->bin.smemSize;
1267    info_out->io.genUserClip = info->io.genUserClip;
1268    info_out->io.instanceId = 0xff;
1269    info_out->io.vertexId = 0xff;
1270    info_out->io.edgeFlagIn = 0xff;
1271    info_out->io.edgeFlagOut = 0xff;
1272    info_out->io.fragDepth = 0xff;
1273    info_out->io.sampleMask = 0xff;
1274 }
1275 
1276 int
nv50_ir_generate_code(struct nv50_ir_prog_info * info,struct nv50_ir_prog_info_out * info_out)1277 nv50_ir_generate_code(struct nv50_ir_prog_info *info,
1278                       struct nv50_ir_prog_info_out *info_out)
1279 {
1280    int ret = 0;
1281 
1282    nv50_ir::Program::Type type;
1283 
1284    nv50_ir_init_prog_info(info, info_out);
1285 
1286 #define PROG_TYPE_CASE(a, b)                                      \
1287    case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1288 
1289    switch (info->type) {
1290    PROG_TYPE_CASE(VERTEX, VERTEX);
1291    PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1292    PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1293    PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1294    PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1295    PROG_TYPE_CASE(COMPUTE, COMPUTE);
1296    default:
1297       INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1298       return -1;
1299    }
1300    INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1301 
1302    nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1303    if (!targ)
1304       return -1;
1305 
1306    nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1307    if (!prog) {
1308       nv50_ir::Target::destroy(targ);
1309       return -1;
1310    }
1311    prog->driver = info;
1312    prog->driver_out = info_out;
1313    prog->dbgFlags = info->dbgFlags;
1314    prog->optLevel = info->optLevel;
1315 
1316    switch (info->bin.sourceRep) {
1317    case PIPE_SHADER_IR_NIR:
1318       ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
1319       break;
1320    case PIPE_SHADER_IR_TGSI:
1321       ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
1322       break;
1323    default:
1324       ret = -1;
1325       break;
1326    }
1327    if (ret < 0)
1328       goto out;
1329    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1330       prog->print();
1331 
1332    targ->parseDriverInfo(info, info_out);
1333    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1334 
1335    prog->convertToSSA();
1336 
1337    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1338       prog->print();
1339 
1340    prog->optimizeSSA(info->optLevel);
1341    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1342 
1343    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1344       prog->print();
1345 
1346    if (!prog->registerAllocation()) {
1347       ret = -4;
1348       goto out;
1349    }
1350    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1351 
1352    prog->optimizePostRA(info->optLevel);
1353 
1354    if (!prog->emitBinary(info_out)) {
1355       ret = -5;
1356       goto out;
1357    }
1358 
1359 out:
1360    INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1361 
1362    info_out->bin.maxGPR = prog->maxGPR;
1363    info_out->bin.code = prog->code;
1364    info_out->bin.codeSize = prog->binSize;
1365    info_out->bin.tlsSpace = prog->tlsSize;
1366 
1367    delete prog;
1368    nv50_ir::Target::destroy(targ);
1369 
1370    return ret;
1371 }
1372 
1373 } // extern "C"
1374