1 // Copyright 2018 yuzu Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4
5 #include "common/assert.h"
6 #include "common/common_types.h"
7 #include "common/logging/log.h"
8 #include "video_core/engines/shader_bytecode.h"
9 #include "video_core/shader/node_helper.h"
10 #include "video_core/shader/shader_ir.h"
11
12 namespace VideoCommon::Shader {
13
14 using Tegra::Shader::Instruction;
15 using Tegra::Shader::OpCode;
16 using Tegra::Shader::SubOp;
17
DecodeArithmetic(NodeBlock & bb,u32 pc)18 u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 Node op_a = GetRegister(instr.gpr8);
23
24 Node op_b = [&] {
25 if (instr.is_b_imm) {
26 return GetImmediate19(instr);
27 } else if (instr.is_b_gpr) {
28 return GetRegister(instr.gpr20);
29 } else {
30 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
31 }
32 }();
33
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::MOV_C:
36 case OpCode::Id::MOV_R: {
37 // MOV does not have neither 'abs' nor 'neg' bits.
38 SetRegister(bb, instr.gpr0, op_b);
39 break;
40 }
41 case OpCode::Id::FMUL_C:
42 case OpCode::Id::FMUL_R:
43 case OpCode::Id::FMUL_IMM: {
44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
45 if (instr.fmul.tab5cb8_2 != 0) {
46 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
47 instr.fmul.tab5cb8_2.Value());
48 }
49 if (instr.fmul.tab5c68_0 != 1) {
50 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
51 instr.fmul.tab5c68_0.Value());
52 }
53
54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
55
56 static constexpr std::array FmulPostFactor = {
57 1.000f, // None
58 0.500f, // Divide 2
59 0.250f, // Divide 4
60 0.125f, // Divide 8
61 8.000f, // Mul 8
62 4.000f, // Mul 4
63 2.000f, // Mul 2
64 };
65
66 if (instr.fmul.postfactor != 0) {
67 op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
68 Immediate(FmulPostFactor[instr.fmul.postfactor]));
69 }
70
71 // TODO(Rodrigo): Should precise be used when there's a postfactor?
72 Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
73
74 value = GetSaturatedFloat(value, instr.alu.saturate_d);
75
76 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
77 SetRegister(bb, instr.gpr0, value);
78 break;
79 }
80 case OpCode::Id::FADD_C:
81 case OpCode::Id::FADD_R:
82 case OpCode::Id::FADD_IMM: {
83 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
84 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
85
86 Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
87 value = GetSaturatedFloat(value, instr.alu.saturate_d);
88
89 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
90 SetRegister(bb, instr.gpr0, value);
91 break;
92 }
93 case OpCode::Id::MUFU: {
94 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
95
96 Node value = [&]() {
97 switch (instr.sub_op) {
98 case SubOp::Cos:
99 return Operation(OperationCode::FCos, PRECISE, op_a);
100 case SubOp::Sin:
101 return Operation(OperationCode::FSin, PRECISE, op_a);
102 case SubOp::Ex2:
103 return Operation(OperationCode::FExp2, PRECISE, op_a);
104 case SubOp::Lg2:
105 return Operation(OperationCode::FLog2, PRECISE, op_a);
106 case SubOp::Rcp:
107 return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
108 case SubOp::Rsq:
109 return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
110 case SubOp::Sqrt:
111 return Operation(OperationCode::FSqrt, PRECISE, op_a);
112 default:
113 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
114 return Immediate(0);
115 }
116 }();
117 value = GetSaturatedFloat(value, instr.alu.saturate_d);
118
119 SetRegister(bb, instr.gpr0, value);
120 break;
121 }
122 case OpCode::Id::FMNMX_C:
123 case OpCode::Id::FMNMX_R:
124 case OpCode::Id::FMNMX_IMM: {
125 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
126 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
127
128 const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
129
130 const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
131 const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
132 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
133
134 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
135 SetRegister(bb, instr.gpr0, value);
136 break;
137 }
138 case OpCode::Id::FCMP_RR:
139 case OpCode::Id::FCMP_RC:
140 case OpCode::Id::FCMP_IMMR: {
141 UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
142 Node op_c = GetRegister(instr.gpr39);
143 Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
144 SetRegister(
145 bb, instr.gpr0,
146 Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
147 break;
148 }
149 case OpCode::Id::RRO_C:
150 case OpCode::Id::RRO_R:
151 case OpCode::Id::RRO_IMM: {
152 LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
153
154 // Currently RRO is only implemented as a register move.
155 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
156 SetRegister(bb, instr.gpr0, op_b);
157 break;
158 }
159 default:
160 UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
161 }
162
163 return pc;
164 }
165
166 } // namespace VideoCommon::Shader
167