1#!/usr/local/bin/python3.8 2# 3# PLASMA : Generate an indented asm code (pseudo-C) with colored syntax. 4# Copyright (C) 2015 Joel 5# 6# This program is free software: you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation, either version 3 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program. If not, see <http://www.gnu.org/licenses/>. 18# 19 20import bisect 21from time import time 22import subprocess 23 24from plasma.lib.utils import debug__, print_no_end, get_char, BYTES_PRINTABLE_SET 25from plasma.lib.colors import color_section 26 27T_BIN_ELF = 0 28T_BIN_PE = 1 29T_BIN_RAW = 2 30T_BIN_UNK = 3 31 32 33class SectionAbs(): 34 # virt_size: size of the mapped section in memory 35 def __init__(self, name, start, virt_size, real_size, is_exec, is_data, is_bss, data): 36 self.name = name 37 self.start = start 38 self.virt_size = virt_size 39 self.real_size = real_size 40 self.end = start + virt_size - 1 41 self.real_end = start + real_size - 1 42 self.is_exec = is_exec 43 self.is_data = is_data 44 self.is_bss = is_bss 45 self.data = data 46 self.big_endian = False # set in lib.disassembler 47 48 def print_header(self): 49 print_no_end(color_section(self.name.ljust(20))) 50 print_no_end(" [ ") 51 print_no_end(hex(self.start)) 52 print_no_end(" - ") 53 print_no_end(hex(self.end)) 54 print_no_end(" - %d - %d" % (self.virt_size, self.real_size)) 55 print(" ]") 56 57 def read(self, ad, size): 58 if ad > self.real_end: 59 return b"" 60 off = ad - self.start 61 return self.data[off:off + size] 62 63 def read_int(self, ad, size): 64 if size == 1: 65 return self.read_byte(ad) 66 if size == 2: 67 return self.read_word(ad) 68 if size == 4: 69 return self.read_dword(ad) 70 if size == 8: 71 return self.read_qword(ad) 72 return None 73 74 def read_byte(self, ad): 75 if ad > self.real_end: 76 return None 77 off = ad - self.start 78 return self.data[off] 79 80 def read_word(self, ad): 81 if ad > self.real_end: 82 return None 83 off = ad - self.start 84 w = self.data[off:off+2] 85 if len(w) != 2: 86 return None 87 if self.big_endian: 88 return (w[0] << 8) + w[1] 89 return (w[1] << 8) + w[0] 90 91 def read_dword(self, ad): 92 if ad > self.real_end: 93 return None 94 off = ad - self.start 95 w = self.data[off:off+4] 96 if len(w) != 4: 97 return None 98 if self.big_endian: 99 return (w[0] << 24) + (w[1] << 16) + (w[2] << 8) + w[3] 100 return (w[3] << 24) + (w[2] << 16) + (w[1] << 8) + w[0] 101 102 def read_qword(self, ad): 103 if ad > self.real_end: 104 return None 105 off = ad - self.start 106 w = self.data[off:off+8] 107 if len(w) != 8: 108 return None 109 if self.big_endian: 110 return (w[0] << 56) + (w[1] << 48) + (w[2] << 40) + (w[3] << 32) + \ 111 (w[4] << 24) + (w[5] << 16) + (w[6] << 8) + w[7] 112 return (w[7] << 56) + (w[6] << 48) + (w[5] << 40) + (w[4] << 32) + \ 113 (w[3] << 24) + (w[2] << 16) + (w[1] << 8) + w[0] 114 115 116class SegmentAbs(SectionAbs): 117 def __init__(self, name, start, virt_size, real_size, is_exec, is_data, 118 data, file_offset, big_endian): 119 self.name = name 120 self.start = start 121 self.virt_size = virt_size 122 self.real_size = real_size 123 self.end = start + virt_size - 1 124 self.real_end = start + real_size - 1 125 self.is_exec = is_exec 126 self.is_data = is_data 127 self.file_offset = file_offset 128 self.data = data 129 self.big_endian = big_endian 130 self.is_bss = False 131 132 133 134class Binary(object): 135 def __init__(self): 136 self.reverse_symbols = {} # ad -> name 137 self.symbols = {} # name -> ad 138 self.section_names = {} 139 self.demangled = {} # name -> ad 140 self.reverse_demangled = {} # ad -> name 141 self.imports = {} # ad -> True (the bool is just for msgpack to save the database) 142 self._abs_sections = {} # start section -> SectionAbs 143 self._sorted_sections = [] # bisect list, contains section start address 144 145 # for elf 146 self._abs_segments = {} 147 self._sorted_segments = [] 148 149 # To be compatible with CLE, used only in ELF 150 self.rebase_addr = 0 151 152 # It will be set in Disassembler ! 153 self.wordsize = 0 154 self.type = -1 155 156 # It will be set in Console ! 157 self.api = None 158 159 160 def get_section(self, ad): 161 i = bisect.bisect_right(self._sorted_sections, ad) 162 if not i: 163 return None 164 start = self._sorted_sections[i - 1] 165 s = self._abs_sections[start] 166 if ad <= s.end: 167 return s 168 return None 169 170 171 def add_section(self, start_address, name, virt_size, real_size, 172 is_exec, is_data, is_bss, data): 173 if is_exec or is_data: 174 bisect.insort_left(self._sorted_sections, start_address) 175 self._abs_sections[start_address] = SectionAbs( 176 name, 177 start_address, 178 virt_size, 179 real_size, 180 is_exec, 181 is_data, 182 is_bss, 183 data) 184 185 186 # for elf 187 def get_segment(self, ad): 188 i = bisect.bisect_right(self._sorted_segments, ad) 189 if not i: 190 return None 191 start = self._sorted_segments[i - 1] 192 s = self._abs_segments[start] 193 if ad <= s.end: 194 return s 195 return None 196 197 198 def is_address(self, ad): 199 s = self.get_section(ad) 200 return s is not None and s.start != 0 201 202 203 def get_next_section(self, ad): 204 i = bisect.bisect_right(self._sorted_sections, ad) 205 if i >= len(self._sorted_sections): 206 return None 207 start = self._sorted_sections[i] 208 s = self._abs_sections[start] 209 if ad <= s.end: 210 return s 211 return None 212 213 214 def get_first_addr(self): 215 return self._sorted_sections[0] 216 217 218 def get_last_addr(self): 219 ad = self._sorted_sections[-1] 220 return self._abs_sections[ad].end 221 222 223 def read(self, ad, size): 224 s = self.get_section(ad) 225 if s is None: 226 return b"" 227 return s.read(ad, size) 228 229 230 def read_byte(self, ad): 231 s = self.get_section(ad) 232 if ad > s.real_end: 233 return None 234 return s.read_byte(ad) 235 236 237 def rename_sym(self, name): 238 count = 0 239 n = "%s_%d" % (name, count) 240 while n in self.symbols: 241 n = "%s_%d" % (name, count) 242 count += 1 243 return n 244 245 246 # not optimized 247 def get_section_by_name(self, name): 248 for s in self._abs_sections.values(): 249 if s.name == name: 250 return s 251 return None 252 253 254 def get_prev_section(self, ad): 255 s = self.get_section(ad) 256 i = bisect.bisect_right(self._sorted_sections, s.start - 1) 257 if i == 0: 258 return None 259 start = self._sorted_sections[i - 1] 260 return self._abs_sections[start] 261 262 263 def iter_sections(self): 264 for ad in self._sorted_sections: 265 yield self._abs_sections[ad] 266 267 268 # TODO : move in SectionAbs 269 def get_string(self, addr, max_data_size=-1, s=None): 270 if s is None: 271 s = self.get_section(addr) 272 if s is None: 273 return None 274 275 data = s.data 276 off = addr - s.start 277 txt = [] 278 279 c = 0 280 i = 0 281 while (i < max_data_size or max_data_size == -1) and off < len(data): 282 c = data[off] 283 if c == 0: 284 break 285 if c not in BYTES_PRINTABLE_SET: 286 break 287 txt.append(get_char(c)) 288 off += 1 289 i += 1 290 291 if i == max_data_size: 292 if c != 0: 293 txt.append("...") 294 elif c != 0 or i == 0: 295 return None 296 297 return ''.join(txt) 298 299 300 # Returns the size of the string or 0 if it's not an ascii string 301 def is_string(self, addr, min_bytes=3, s=None): 302 if s is None: 303 s = self.get_section(addr) 304 if s is None: 305 return 0 306 307 data = s.data 308 off = addr - s.start 309 n = 0 310 c = 0 311 while off < len(data): 312 c = data[off] 313 if c == 0: 314 n += 1 315 break 316 if c in BYTES_PRINTABLE_SET: 317 n += 1 318 else: 319 break 320 off += 1 321 322 # consider this is a string when there are more than 2 chars 323 # with a null byte 324 if c == 0 and n >= min_bytes: 325 return n 326 return 0 327 328 329 def load_section_names(self): 330 # Used for the auto-completion 331 for ad, sec in self._abs_sections.items(): 332 self.section_names[sec.name] = ad 333 334 335 def demangle_symbols(self): 336 addr = [] 337 lookup_names = [] 338 for n, ad in self.symbols.items(): 339 if n.startswith("_Z") or n.startswith("__Z"): 340 addr.append(ad) 341 lookup_names.append(n.split("@@")[0]) 342 343 if not addr: 344 return 345 346 # http://stackoverflow.com/questions/6526500/c-name-mangling-library-for-python 347 args = ["c++filt", "-p"] 348 pipe = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 349 stdout, _ = pipe.communicate('\n'.join(lookup_names).encode('utf-8')) 350 demangled = stdout.split(b"\n")[:-1] 351 352 self.reverse_demangled = dict(zip(addr, demangled)) 353 354 for ad, n in self.reverse_demangled.items(): 355 n = n.decode() 356 self.reverse_demangled[ad] = n 357 self.demangled[n] = ad 358 359 360 def load_static_sym(self): 361 return 362 363 364 def load_dyn_sym(self): 365 return 366 367 368 def is_big_endian(self): 369 raise NotImplementedError 370 371 372 def get_entry_point(self): 373 raise NotImplementedError 374