1 #include "algorithm.h"
2 #include "../../../plugins/loader.h"
3 #include <thread>
4
5 #define INVALID_MNEMONIC "db"
6 #define DECODE_STATE(address) ENQUEUE_STATE(AssemblerAlgorithm::DecodeState, address, -1, nullptr)
7
8 namespace REDasm {
9
AssemblerAlgorithm()10 AssemblerAlgorithm::AssemblerAlgorithm(): StateMachine(), m_disassembler(nullptr), m_assembler(nullptr) { }
11
AssemblerAlgorithm(DisassemblerAPI * disassembler,AssemblerPlugin * assembler)12 AssemblerAlgorithm::AssemblerAlgorithm(DisassemblerAPI *disassembler, AssemblerPlugin *assembler): StateMachine(), m_document(disassembler->document()), m_disassembler(disassembler), m_assembler(assembler), m_currentsegment(nullptr), m_analyzed(0)
13 {
14 m_loader = m_disassembler->loader();
15
16 if(assembler->hasFlag(AssemblerFlags::CanEmulate))
17 m_emulator = std::unique_ptr<Emulator>(assembler->createEmulator(disassembler));
18
19 REGISTER_STATE(AssemblerAlgorithm::DecodeState, &AssemblerAlgorithm::decodeState);
20 REGISTER_STATE(AssemblerAlgorithm::JumpState, &AssemblerAlgorithm::jumpState);
21 REGISTER_STATE(AssemblerAlgorithm::CallState, &AssemblerAlgorithm::callState);
22 REGISTER_STATE(AssemblerAlgorithm::BranchState, &AssemblerAlgorithm::branchState);
23 REGISTER_STATE(AssemblerAlgorithm::BranchMemoryState, &AssemblerAlgorithm::branchMemoryState);
24 REGISTER_STATE(AssemblerAlgorithm::AddressTableState, &AssemblerAlgorithm::addressTableState);
25 REGISTER_STATE(AssemblerAlgorithm::MemoryState, &AssemblerAlgorithm::memoryState);
26 REGISTER_STATE(AssemblerAlgorithm::PointerState, &AssemblerAlgorithm::pointerState);
27 REGISTER_STATE(AssemblerAlgorithm::ImmediateState, &AssemblerAlgorithm::immediateState);
28 }
29
enqueue(address_t address)30 void AssemblerAlgorithm::enqueue(address_t address) { DECODE_STATE(address); }
31
analyze()32 void AssemblerAlgorithm::analyze()
33 {
34 if(m_analyzed)
35 {
36 REDasm::status("Analyzing (Fast)...");
37 m_analyzer->analyzeFast();
38 m_disassembler->computeBasicBlocks();
39 m_document->moveToEP();
40 return;
41 }
42
43 m_analyzed = true;
44 LoaderPlugin* loader = m_disassembler->loader();
45 m_analyzer.reset(loader->createAnalyzer(m_disassembler));
46
47 REDasm::status("Analyzing...");
48 m_analyzer->analyze();
49 m_disassembler->computeBasicBlocks();
50 m_document->moveToEP();
51
52 // Trigger a Fast Analysis when post disassembling is completed
53 EVENT_CONNECT(m_disassembler, busyChanged, this, [&]() {
54 if(m_disassembler->busy())
55 return;
56
57 this->analyze();
58 });
59 }
60
loadTargets(const InstructionPtr & instruction)61 void AssemblerAlgorithm::loadTargets(const InstructionPtr &instruction)
62 {
63 for(address_t target : instruction->meta.targets) // Get precalculated targets
64 m_disassembler->pushTarget(target, instruction->address);
65 }
66
validateTarget(const InstructionPtr & instruction) const67 void AssemblerAlgorithm::validateTarget(const InstructionPtr &instruction) const
68 {
69 if(m_disassembler->getTargetsCount(instruction->address))
70 return;
71
72 const Operand* op = instruction->target();
73
74 if(op && !op->isNumeric())
75 return;
76
77 REDasm::problem("No targets found for " + REDasm::quoted(instruction->mnemonic) + " @ " + REDasm::hex(instruction->address));
78 }
79
validateState(const State & state) const80 bool AssemblerAlgorithm::validateState(const State &state) const
81 {
82 if(!StateMachine::validateState(state))
83 return false;
84
85 return m_document->segment(state.address);
86 }
87
onNewState(const State * state) const88 void AssemblerAlgorithm::onNewState(const State* state) const
89 {
90 REDasm::statusProgress("Analyzing @ " + REDasm::hex(state->address, m_assembler->bits()) +
91 " >> " + state->name, this->pending());
92 }
93
disassembleInstruction(address_t address,const InstructionPtr & instruction)94 u32 AssemblerAlgorithm::disassembleInstruction(address_t address, const InstructionPtr& instruction)
95 {
96 if(!this->canBeDisassembled(address))
97 return AssemblerAlgorithm::SKIP;
98
99 Symbol* symbol = m_document->symbol(address);
100
101 if(symbol && !symbol->isLocked() && !symbol->is(SymbolType::Code))
102 m_document->eraseSymbol(symbol->address);
103
104 instruction->address = address;
105
106 BufferView view = m_loader->view(address);
107 return m_assembler->decode(view, instruction) ? AssemblerAlgorithm::OK : AssemblerAlgorithm::FAIL;
108 }
109
done(address_t address)110 void AssemblerAlgorithm::done(address_t address) { m_done.insert(address); }
111
onDecoded(const InstructionPtr & instruction)112 void AssemblerAlgorithm::onDecoded(const InstructionPtr &instruction)
113 {
114 if(instruction->is(InstructionType::Branch))
115 {
116 this->loadTargets(instruction);
117 this->validateTarget(instruction);
118 }
119
120 for(const Operand& op : instruction->operands)
121 {
122 if(!op.isNumeric() || op.displacementIsDynamic())
123 {
124 if(m_emulator && !m_emulator->hasError())
125 this->emulateOperand(&op, instruction);
126
127 if(!op.is(OperandType::Displacement)) // Try static displacement analysis
128 continue;
129 }
130
131 if(op.is(OperandType::Displacement))
132 {
133 if(op.displacementIsDynamic())
134 EXECUTE_STATE(AssemblerAlgorithm::AddressTableState, op.disp.displacement, op.index, instruction);
135 else if(op.displacementCanBeAddress())
136 EXECUTE_STATE(AssemblerAlgorithm::MemoryState, op.disp.displacement, op.index, instruction);
137 }
138 else if(op.is(OperandType::Memory))
139 EXECUTE_STATE(AssemblerAlgorithm::MemoryState, op.u_value, op.index, instruction);
140 else if(op.is(OperandType::Immediate))
141 EXECUTE_STATE(AssemblerAlgorithm::ImmediateState, op.u_value, op.index, instruction);
142
143 this->onDecodedOperand(&op, instruction);
144 }
145 }
146
onDecodeFailed(const InstructionPtr & instruction)147 void AssemblerAlgorithm::onDecodeFailed(const InstructionPtr &instruction)
148 {
149 REDasm::problem("Invalid instruction @ " + REDasm::hex(instruction->address));
150
151 if(!instruction->size)
152 return;
153
154 this->enqueue(instruction->endAddress());
155 }
156
onDecodedOperand(const Operand * op,const InstructionPtr & instruction)157 void AssemblerAlgorithm::onDecodedOperand(const Operand *op, const InstructionPtr &instruction)
158 {
159 if(!op->isCharacter())
160 return;
161
162 std::string charinfo = REDasm::hex(op->u_value, 8, true) + "=" + REDasm::quoted_s(std::string(1, static_cast<char>(op->u_value)));
163 m_document->autoComment(instruction->address, charinfo);
164 }
165
onEmulatedOperand(const Operand * op,const InstructionPtr & instruction,u64 value)166 void AssemblerAlgorithm::onEmulatedOperand(const Operand *op, const InstructionPtr &instruction, u64 value)
167 {
168 Segment* segment = m_document->segment(value);
169
170 if(!segment || segment->isPureCode()) // Don't flood "Pure-Code" segments with symbols
171 return;
172
173 EXECUTE_STATE(AssemblerAlgorithm::AddressTableState, value, op->index, instruction);
174 }
175
decodeState(const State * state)176 void AssemblerAlgorithm::decodeState(const State *state)
177 {
178 InstructionPtr instruction = std::make_shared<Instruction>();
179 u32 status = this->disassemble(state->address, instruction);
180
181 if(status == AssemblerAlgorithm::SKIP)
182 return;
183
184 m_document->instruction(instruction);
185 }
186
jumpState(const State * state)187 void AssemblerAlgorithm::jumpState(const State *state)
188 {
189 s64 dir = BRANCH_DIRECTION(state->instruction, state->address);
190
191 if(!dir)
192 m_document->autoComment(state->instruction->address, "Infinite loop");
193
194 m_document->branch(state->address, dir);
195 DECODE_STATE(state->address);
196 }
197
callState(const State * state)198 void AssemblerAlgorithm::callState(const State *state) { m_document->symbol(state->address, SymbolType::Function); }
199
branchState(const State * state)200 void AssemblerAlgorithm::branchState(const State *state)
201 {
202 InstructionPtr instruction = state->instruction;
203
204 if(instruction->is(InstructionType::Call))
205 FORWARD_STATE(AssemblerAlgorithm::CallState, state);
206 else if(instruction->is(InstructionType::Jump))
207 FORWARD_STATE(AssemblerAlgorithm::JumpState, state);
208 else
209 {
210 REDasm::problem("Invalid branch state for instruction " + REDasm::quoted(instruction->mnemonic) +
211 " @ " + REDasm::hex(instruction->address, m_assembler->bits()));
212 return;
213 }
214
215 m_disassembler->pushReference(state->address, instruction->address);
216 m_disassembler->pushTarget(state->address, instruction->address);
217 }
218
branchMemoryState(const State * state)219 void AssemblerAlgorithm::branchMemoryState(const State *state)
220 {
221 InstructionPtr instruction = state->instruction;
222 m_disassembler->pushTarget(state->address, instruction->address);
223
224 Symbol* symbol = m_document->symbol(state->address);
225
226 if(symbol && symbol->isImport()) // Don't dereference imports
227 return;
228
229 u64 value = 0;
230 m_disassembler->dereference(state->address, &value);
231 m_document->symbol(state->address, SymbolType::Data | SymbolType::Pointer);
232
233 if(instruction->is(InstructionType::Call))
234 m_document->symbol(value, SymbolType::Function);
235 else
236 m_document->symbol(value, SymbolType::Code);
237
238 m_disassembler->pushReference(value, state->address);
239 }
240
addressTableState(const State * state)241 void AssemblerAlgorithm::addressTableState(const State *state)
242 {
243 InstructionPtr instruction = state->instruction;
244 s64 c = m_disassembler->checkAddressTable(instruction, state->address);
245
246 if(c < 0)
247 return;
248
249 if(c > 1)
250 {
251 m_disassembler->pushReference(state->address, instruction->address);
252 state_t fwdstate = AssemblerAlgorithm::BranchState;
253
254 if(instruction->is(InstructionType::Call))
255 m_document->autoComment(instruction->address, "Call Table with " + std::to_string(c) + " cases(s)");
256 else if(instruction->is(InstructionType::Jump))
257 m_document->autoComment(instruction->address, "Jump Table with " + std::to_string(c) + " cases(s)");
258 else
259 {
260 m_document->autoComment(instruction->address, "Address Table with " + std::to_string(c) + " cases(s)");
261 fwdstate = AssemblerAlgorithm::MemoryState;
262 }
263
264 ReferenceSet targets = m_disassembler->getTargets(instruction->address);
265
266 for(address_t target : targets)
267 FORWARD_STATE_VALUE(fwdstate, target, state);
268
269 return;
270 }
271
272 const Operand* op = state->operand();
273
274 if(op->is(OperandType::Displacement))
275 FORWARD_STATE(AssemblerAlgorithm::PointerState, state);
276 else if(op->is(OperandType::Memory))
277 FORWARD_STATE(AssemblerAlgorithm::MemoryState, state);
278 else
279 FORWARD_STATE(AssemblerAlgorithm::ImmediateState, state);
280 }
281
memoryState(const State * state)282 void AssemblerAlgorithm::memoryState(const State *state)
283 {
284 u64 value = 0;
285
286 if(!m_disassembler->dereference(state->address, &value))
287 {
288 FORWARD_STATE(AssemblerAlgorithm::ImmediateState, state);
289 return;
290 }
291
292 InstructionPtr instruction = state->instruction;
293 m_disassembler->pushReference(state->address, instruction->address);
294
295 if(instruction->is(InstructionType::Branch) && state->operand()->isTarget())
296 FORWARD_STATE(AssemblerAlgorithm::BranchMemoryState, state);
297 else
298 FORWARD_STATE(AssemblerAlgorithm::PointerState, state);
299 }
300
pointerState(const State * state)301 void AssemblerAlgorithm::pointerState(const State *state)
302 {
303 u64 value = 0;
304
305 if(!m_disassembler->dereference(state->address, &value))
306 {
307 FORWARD_STATE(AssemblerAlgorithm::ImmediateState, state);
308 return;
309 }
310
311 m_document->symbol(state->address, SymbolType::Data | SymbolType::Pointer);
312 m_disassembler->checkLocation(state->address, value); // Create Symbol + XRefs
313 }
314
immediateState(const State * state)315 void AssemblerAlgorithm::immediateState(const State *state)
316 {
317 InstructionPtr instruction = state->instruction;
318
319 if(instruction->is(InstructionType::Branch) && state->operand()->isTarget())
320 FORWARD_STATE(AssemblerAlgorithm::BranchState, state);
321 else
322 m_disassembler->checkLocation(instruction->address, state->address); // Create Symbol + XRefs
323 }
324
canBeDisassembled(address_t address)325 bool AssemblerAlgorithm::canBeDisassembled(address_t address)
326 {
327 BufferView view = m_loader->view(address);
328
329 if(view.eob())
330 return false;
331
332 if(!m_currentsegment || !m_currentsegment->contains(address))
333 m_currentsegment = m_document->segment(address);
334
335 if(!m_currentsegment || !m_currentsegment->is(SegmentType::Code))
336 return false;
337
338 if(!m_loader->offset(address).valid)
339 return false;
340
341 return true;
342 }
343
createInvalidInstruction(const InstructionPtr & instruction)344 void AssemblerAlgorithm::createInvalidInstruction(const InstructionPtr &instruction)
345 {
346 if(!instruction->size)
347 instruction->size = 1; // Invalid instruction uses at least 1 byte
348
349 instruction->type = InstructionType::Invalid;
350 instruction->mnemonic = INVALID_MNEMONIC;
351 }
352
disassemble(address_t address,const InstructionPtr & instruction)353 u32 AssemblerAlgorithm::disassemble(address_t address, const InstructionPtr &instruction)
354 {
355 auto it = m_done.find(address);
356
357 if(it != m_done.end())
358 return AssemblerAlgorithm::SKIP;
359
360 this->done(address);
361 u32 result = this->disassembleInstruction(address, instruction);
362
363 if(result == AssemblerAlgorithm::FAIL)
364 {
365 this->createInvalidInstruction(instruction);
366 this->onDecodeFailed(instruction);
367 }
368 else
369 {
370 this->emulate(instruction);
371 this->onDecoded(instruction);
372 }
373
374 return result;
375 }
376
emulateOperand(const Operand * op,const InstructionPtr & instruction)377 void AssemblerAlgorithm::emulateOperand(const Operand *op, const InstructionPtr &instruction)
378 {
379 u64 value = 0;
380
381 if(op->is(OperandType::Register))
382 {
383 if(!m_emulator->read(op, &value))
384 return;
385 }
386 else if(op->is(OperandType::Displacement))
387 {
388 if(!m_emulator->displacement(op, &value))
389 return;
390 }
391 else
392 return;
393
394 this->onEmulatedOperand(op, instruction, value);
395 }
396
emulate(const InstructionPtr & instruction)397 void AssemblerAlgorithm::emulate(const InstructionPtr &instruction)
398 {
399 if(!m_emulator)
400 return;
401
402 m_emulator->emulate(instruction);
403 }
404
405
406 } // namespace REDasm
407