1 #include "algorithm.h"
2 #include "../../../plugins/loader.h"
3 #include <thread>
4 
5 #define INVALID_MNEMONIC "db"
6 #define DECODE_STATE(address) ENQUEUE_STATE(AssemblerAlgorithm::DecodeState, address, -1, nullptr)
7 
8 namespace REDasm {
9 
AssemblerAlgorithm()10 AssemblerAlgorithm::AssemblerAlgorithm(): StateMachine(), m_disassembler(nullptr), m_assembler(nullptr) { }
11 
AssemblerAlgorithm(DisassemblerAPI * disassembler,AssemblerPlugin * assembler)12 AssemblerAlgorithm::AssemblerAlgorithm(DisassemblerAPI *disassembler, AssemblerPlugin *assembler): StateMachine(), m_document(disassembler->document()), m_disassembler(disassembler), m_assembler(assembler), m_currentsegment(nullptr), m_analyzed(0)
13 {
14     m_loader = m_disassembler->loader();
15 
16     if(assembler->hasFlag(AssemblerFlags::CanEmulate))
17         m_emulator = std::unique_ptr<Emulator>(assembler->createEmulator(disassembler));
18 
19     REGISTER_STATE(AssemblerAlgorithm::DecodeState, &AssemblerAlgorithm::decodeState);
20     REGISTER_STATE(AssemblerAlgorithm::JumpState, &AssemblerAlgorithm::jumpState);
21     REGISTER_STATE(AssemblerAlgorithm::CallState, &AssemblerAlgorithm::callState);
22     REGISTER_STATE(AssemblerAlgorithm::BranchState, &AssemblerAlgorithm::branchState);
23     REGISTER_STATE(AssemblerAlgorithm::BranchMemoryState, &AssemblerAlgorithm::branchMemoryState);
24     REGISTER_STATE(AssemblerAlgorithm::AddressTableState, &AssemblerAlgorithm::addressTableState);
25     REGISTER_STATE(AssemblerAlgorithm::MemoryState, &AssemblerAlgorithm::memoryState);
26     REGISTER_STATE(AssemblerAlgorithm::PointerState, &AssemblerAlgorithm::pointerState);
27     REGISTER_STATE(AssemblerAlgorithm::ImmediateState, &AssemblerAlgorithm::immediateState);
28 }
29 
enqueue(address_t address)30 void AssemblerAlgorithm::enqueue(address_t address) { DECODE_STATE(address); }
31 
analyze()32 void AssemblerAlgorithm::analyze()
33 {
34     if(m_analyzed)
35     {
36         REDasm::status("Analyzing (Fast)...");
37         m_analyzer->analyzeFast();
38         m_disassembler->computeBasicBlocks();
39         m_document->moveToEP();
40         return;
41     }
42 
43     m_analyzed = true;
44     LoaderPlugin* loader = m_disassembler->loader();
45     m_analyzer.reset(loader->createAnalyzer(m_disassembler));
46 
47     REDasm::status("Analyzing...");
48     m_analyzer->analyze();
49     m_disassembler->computeBasicBlocks();
50     m_document->moveToEP();
51 
52     // Trigger a Fast Analysis when post disassembling is completed
53     EVENT_CONNECT(m_disassembler, busyChanged, this, [&]() {
54         if(m_disassembler->busy())
55             return;
56 
57         this->analyze();
58     });
59 }
60 
loadTargets(const InstructionPtr & instruction)61 void AssemblerAlgorithm::loadTargets(const InstructionPtr &instruction)
62 {
63     for(address_t target : instruction->meta.targets) // Get precalculated targets
64         m_disassembler->pushTarget(target, instruction->address);
65 }
66 
validateTarget(const InstructionPtr & instruction) const67 void AssemblerAlgorithm::validateTarget(const InstructionPtr &instruction) const
68 {
69     if(m_disassembler->getTargetsCount(instruction->address))
70         return;
71 
72     const Operand* op = instruction->target();
73 
74     if(op && !op->isNumeric())
75         return;
76 
77     REDasm::problem("No targets found for " + REDasm::quoted(instruction->mnemonic) + " @ " + REDasm::hex(instruction->address));
78 }
79 
validateState(const State & state) const80 bool AssemblerAlgorithm::validateState(const State &state) const
81 {
82     if(!StateMachine::validateState(state))
83         return false;
84 
85     return m_document->segment(state.address);
86 }
87 
onNewState(const State * state) const88 void AssemblerAlgorithm::onNewState(const State* state) const
89 {
90     REDasm::statusProgress("Analyzing @ " + REDasm::hex(state->address, m_assembler->bits()) +
91                            " >> " + state->name, this->pending());
92 }
93 
disassembleInstruction(address_t address,const InstructionPtr & instruction)94 u32 AssemblerAlgorithm::disassembleInstruction(address_t address, const InstructionPtr& instruction)
95 {
96     if(!this->canBeDisassembled(address))
97         return AssemblerAlgorithm::SKIP;
98 
99     Symbol* symbol = m_document->symbol(address);
100 
101     if(symbol && !symbol->isLocked() && !symbol->is(SymbolType::Code))
102         m_document->eraseSymbol(symbol->address);
103 
104     instruction->address = address;
105 
106     BufferView view = m_loader->view(address);
107     return m_assembler->decode(view, instruction) ? AssemblerAlgorithm::OK : AssemblerAlgorithm::FAIL;
108 }
109 
done(address_t address)110 void AssemblerAlgorithm::done(address_t address) { m_done.insert(address); }
111 
onDecoded(const InstructionPtr & instruction)112 void AssemblerAlgorithm::onDecoded(const InstructionPtr &instruction)
113 {
114     if(instruction->is(InstructionType::Branch))
115     {
116         this->loadTargets(instruction);
117         this->validateTarget(instruction);
118     }
119 
120     for(const Operand& op : instruction->operands)
121     {
122         if(!op.isNumeric() || op.displacementIsDynamic())
123         {
124             if(m_emulator && !m_emulator->hasError())
125                 this->emulateOperand(&op, instruction);
126 
127             if(!op.is(OperandType::Displacement)) // Try static displacement analysis
128                 continue;
129         }
130 
131         if(op.is(OperandType::Displacement))
132         {
133             if(op.displacementIsDynamic())
134                 EXECUTE_STATE(AssemblerAlgorithm::AddressTableState, op.disp.displacement, op.index, instruction);
135             else if(op.displacementCanBeAddress())
136                 EXECUTE_STATE(AssemblerAlgorithm::MemoryState, op.disp.displacement, op.index, instruction);
137         }
138         else if(op.is(OperandType::Memory))
139             EXECUTE_STATE(AssemblerAlgorithm::MemoryState, op.u_value, op.index, instruction);
140         else if(op.is(OperandType::Immediate))
141             EXECUTE_STATE(AssemblerAlgorithm::ImmediateState, op.u_value, op.index, instruction);
142 
143         this->onDecodedOperand(&op, instruction);
144     }
145 }
146 
onDecodeFailed(const InstructionPtr & instruction)147 void AssemblerAlgorithm::onDecodeFailed(const InstructionPtr &instruction)
148 {
149     REDasm::problem("Invalid instruction @ " + REDasm::hex(instruction->address));
150 
151     if(!instruction->size)
152         return;
153 
154     this->enqueue(instruction->endAddress());
155 }
156 
onDecodedOperand(const Operand * op,const InstructionPtr & instruction)157 void AssemblerAlgorithm::onDecodedOperand(const Operand *op, const InstructionPtr &instruction)
158 {
159     if(!op->isCharacter())
160         return;
161 
162     std::string charinfo = REDasm::hex(op->u_value, 8, true) + "=" + REDasm::quoted_s(std::string(1, static_cast<char>(op->u_value)));
163     m_document->autoComment(instruction->address, charinfo);
164 }
165 
onEmulatedOperand(const Operand * op,const InstructionPtr & instruction,u64 value)166 void AssemblerAlgorithm::onEmulatedOperand(const Operand *op, const InstructionPtr &instruction, u64 value)
167 {
168     Segment* segment = m_document->segment(value);
169 
170     if(!segment || segment->isPureCode()) // Don't flood "Pure-Code" segments with symbols
171         return;
172 
173     EXECUTE_STATE(AssemblerAlgorithm::AddressTableState, value, op->index, instruction);
174 }
175 
decodeState(const State * state)176 void AssemblerAlgorithm::decodeState(const State *state)
177 {
178     InstructionPtr instruction = std::make_shared<Instruction>();
179     u32 status = this->disassemble(state->address, instruction);
180 
181     if(status == AssemblerAlgorithm::SKIP)
182         return;
183 
184     m_document->instruction(instruction);
185 }
186 
jumpState(const State * state)187 void AssemblerAlgorithm::jumpState(const State *state)
188 {
189     s64 dir = BRANCH_DIRECTION(state->instruction, state->address);
190 
191     if(!dir)
192         m_document->autoComment(state->instruction->address, "Infinite loop");
193 
194     m_document->branch(state->address, dir);
195     DECODE_STATE(state->address);
196 }
197 
callState(const State * state)198 void AssemblerAlgorithm::callState(const State *state) { m_document->symbol(state->address, SymbolType::Function); }
199 
branchState(const State * state)200 void AssemblerAlgorithm::branchState(const State *state)
201 {
202     InstructionPtr instruction = state->instruction;
203 
204     if(instruction->is(InstructionType::Call))
205         FORWARD_STATE(AssemblerAlgorithm::CallState, state);
206     else if(instruction->is(InstructionType::Jump))
207         FORWARD_STATE(AssemblerAlgorithm::JumpState, state);
208     else
209     {
210         REDasm::problem("Invalid branch state for instruction " + REDasm::quoted(instruction->mnemonic) +
211                         " @ " + REDasm::hex(instruction->address, m_assembler->bits()));
212         return;
213     }
214 
215     m_disassembler->pushReference(state->address, instruction->address);
216     m_disassembler->pushTarget(state->address, instruction->address);
217 }
218 
branchMemoryState(const State * state)219 void AssemblerAlgorithm::branchMemoryState(const State *state)
220 {
221     InstructionPtr instruction = state->instruction;
222     m_disassembler->pushTarget(state->address, instruction->address);
223 
224     Symbol* symbol = m_document->symbol(state->address);
225 
226     if(symbol && symbol->isImport()) // Don't dereference imports
227         return;
228 
229     u64 value = 0;
230     m_disassembler->dereference(state->address, &value);
231     m_document->symbol(state->address, SymbolType::Data | SymbolType::Pointer);
232 
233     if(instruction->is(InstructionType::Call))
234         m_document->symbol(value, SymbolType::Function);
235     else
236         m_document->symbol(value, SymbolType::Code);
237 
238     m_disassembler->pushReference(value, state->address);
239 }
240 
addressTableState(const State * state)241 void AssemblerAlgorithm::addressTableState(const State *state)
242 {
243     InstructionPtr instruction = state->instruction;
244     s64 c = m_disassembler->checkAddressTable(instruction, state->address);
245 
246     if(c < 0)
247         return;
248 
249     if(c > 1)
250     {
251         m_disassembler->pushReference(state->address, instruction->address);
252         state_t fwdstate = AssemblerAlgorithm::BranchState;
253 
254         if(instruction->is(InstructionType::Call))
255             m_document->autoComment(instruction->address, "Call Table with " + std::to_string(c) + " cases(s)");
256         else if(instruction->is(InstructionType::Jump))
257             m_document->autoComment(instruction->address, "Jump Table with " + std::to_string(c) + " cases(s)");
258         else
259         {
260             m_document->autoComment(instruction->address, "Address Table with " + std::to_string(c) + " cases(s)");
261             fwdstate = AssemblerAlgorithm::MemoryState;
262         }
263 
264         ReferenceSet targets = m_disassembler->getTargets(instruction->address);
265 
266         for(address_t target : targets)
267             FORWARD_STATE_VALUE(fwdstate, target, state);
268 
269         return;
270     }
271 
272     const Operand* op = state->operand();
273 
274     if(op->is(OperandType::Displacement))
275         FORWARD_STATE(AssemblerAlgorithm::PointerState, state);
276     else if(op->is(OperandType::Memory))
277         FORWARD_STATE(AssemblerAlgorithm::MemoryState, state);
278     else
279         FORWARD_STATE(AssemblerAlgorithm::ImmediateState, state);
280 }
281 
memoryState(const State * state)282 void AssemblerAlgorithm::memoryState(const State *state)
283 {
284     u64 value = 0;
285 
286     if(!m_disassembler->dereference(state->address, &value))
287     {
288         FORWARD_STATE(AssemblerAlgorithm::ImmediateState, state);
289         return;
290     }
291 
292     InstructionPtr instruction = state->instruction;
293     m_disassembler->pushReference(state->address, instruction->address);
294 
295     if(instruction->is(InstructionType::Branch) && state->operand()->isTarget())
296         FORWARD_STATE(AssemblerAlgorithm::BranchMemoryState, state);
297     else
298         FORWARD_STATE(AssemblerAlgorithm::PointerState, state);
299 }
300 
pointerState(const State * state)301 void AssemblerAlgorithm::pointerState(const State *state)
302 {
303     u64 value = 0;
304 
305     if(!m_disassembler->dereference(state->address, &value))
306     {
307         FORWARD_STATE(AssemblerAlgorithm::ImmediateState, state);
308         return;
309     }
310 
311     m_document->symbol(state->address, SymbolType::Data | SymbolType::Pointer);
312     m_disassembler->checkLocation(state->address, value); // Create Symbol + XRefs
313 }
314 
immediateState(const State * state)315 void AssemblerAlgorithm::immediateState(const State *state)
316 {
317     InstructionPtr instruction = state->instruction;
318 
319     if(instruction->is(InstructionType::Branch) && state->operand()->isTarget())
320         FORWARD_STATE(AssemblerAlgorithm::BranchState, state);
321     else
322         m_disassembler->checkLocation(instruction->address, state->address); // Create Symbol + XRefs
323 }
324 
canBeDisassembled(address_t address)325 bool AssemblerAlgorithm::canBeDisassembled(address_t address)
326 {
327     BufferView view = m_loader->view(address);
328 
329     if(view.eob())
330         return false;
331 
332     if(!m_currentsegment || !m_currentsegment->contains(address))
333         m_currentsegment = m_document->segment(address);
334 
335     if(!m_currentsegment || !m_currentsegment->is(SegmentType::Code))
336         return false;
337 
338     if(!m_loader->offset(address).valid)
339         return false;
340 
341     return true;
342 }
343 
createInvalidInstruction(const InstructionPtr & instruction)344 void AssemblerAlgorithm::createInvalidInstruction(const InstructionPtr &instruction)
345 {
346     if(!instruction->size)
347         instruction->size = 1; // Invalid instruction uses at least 1 byte
348 
349     instruction->type = InstructionType::Invalid;
350     instruction->mnemonic = INVALID_MNEMONIC;
351 }
352 
disassemble(address_t address,const InstructionPtr & instruction)353 u32 AssemblerAlgorithm::disassemble(address_t address, const InstructionPtr &instruction)
354 {
355     auto it = m_done.find(address);
356 
357     if(it != m_done.end())
358         return AssemblerAlgorithm::SKIP;
359 
360     this->done(address);
361     u32 result = this->disassembleInstruction(address, instruction);
362 
363     if(result == AssemblerAlgorithm::FAIL)
364     {
365         this->createInvalidInstruction(instruction);
366         this->onDecodeFailed(instruction);
367     }
368     else
369     {
370         this->emulate(instruction);
371         this->onDecoded(instruction);
372     }
373 
374     return result;
375 }
376 
emulateOperand(const Operand * op,const InstructionPtr & instruction)377 void AssemblerAlgorithm::emulateOperand(const Operand *op, const InstructionPtr &instruction)
378 {
379     u64 value = 0;
380 
381     if(op->is(OperandType::Register))
382     {
383         if(!m_emulator->read(op, &value))
384             return;
385     }
386     else if(op->is(OperandType::Displacement))
387     {
388         if(!m_emulator->displacement(op, &value))
389             return;
390     }
391     else
392         return;
393 
394     this->onEmulatedOperand(op, instruction, value);
395 }
396 
emulate(const InstructionPtr & instruction)397 void AssemblerAlgorithm::emulate(const InstructionPtr &instruction)
398 {
399     if(!m_emulator)
400         return;
401 
402     m_emulator->emulate(instruction);
403 }
404 
405 
406 } // namespace REDasm
407