1%% -*- erlang-indent-level: 4 -*- 2%%======================================================================= 3%% File : beam_disasm.erl 4%% Author : Kostis Sagonas 5%% Description : Disassembles an R5-R10 .beam file into symbolic BEAM code 6%%======================================================================= 7%% $Id: beam_disasm.erl,v 1.1 2008/12/17 09:53:41 mikpe Exp $ 8%%======================================================================= 9%% Notes: 10%% 1. It does NOT work for .beam files of previous BEAM versions. 11%% 2. If handling of new BEAM instructions is needed, this should be 12%% inserted at the end of function resolve_inst(). 13%%======================================================================= 14 15-module(beam_disasm). 16 17-export([file/1, format_error/1]). 18 19-author("Kostis Sagonas"). 20 21-include("beam_opcodes.hrl"). 22 23%%----------------------------------------------------------------------- 24 25-define(NO_DEBUG(Str,Xs),ok). 26-define(DEBUG(Str,Xs),io:format(Str,Xs)). 27-define(exit(Reason),exit({?MODULE,?LINE,Reason})). 28 29%%----------------------------------------------------------------------- 30%% Error information 31 32format_error({error, Module, Error}) -> 33 Module:format_error(Error); 34format_error({internal, Error}) -> 35 io_lib:format("~p: disassembly failed with reason ~P.", 36 [?MODULE, Error, 25]). 37 38%%----------------------------------------------------------------------- 39%% The main exported function 40%% File is either a file name or a binary containing the code. 41%% Returns `{beam_file, [...]}' or `{error, Module, Reason}'. 42%% Call `format_error({error, Module, Reason})' for an error string. 43%%----------------------------------------------------------------------- 44 45file(File) -> 46 case beam_lib:info(File) of 47 Info when list(Info) -> 48 {value,{chunks,Chunks}} = lists:keysearch(chunks,1,Info), 49 case catch process_chunks(File, Chunks) of 50 {'EXIT', Error} -> 51 {error, ?MODULE, {internal, Error}}; 52 Result -> 53 Result 54 end; 55 Error -> 56 Error 57 end. 58 59%%----------------------------------------------------------------------- 60%% Interface might need to be revised -- do not depend on it. 61%%----------------------------------------------------------------------- 62 63process_chunks(F,ChunkInfoList) -> 64 {ok,{_,Chunks}} = beam_lib:chunks(F, ["Atom","Code","StrT","ImpT","ExpT"]), 65 [{"Atom",AtomBin},{"Code",CodeBin},{"StrT",StrBin}, 66 {"ImpT",ImpBin},{"ExpT",ExpBin}] = Chunks, 67 LambdaBin = optional_chunk(F, "FunT", ChunkInfoList), 68 LocBin = optional_chunk(F, "LocT", ChunkInfoList), 69 AttrBin = optional_chunk(F, "Attr", ChunkInfoList), 70 CompBin = optional_chunk(F, "CInf", ChunkInfoList), 71 Atoms = beam_disasm_atoms(AtomBin), 72 Exports = beam_disasm_exports(ExpBin, Atoms), 73 Imports = beam_disasm_imports(ImpBin, Atoms), 74 LocFuns = beam_disasm_exports(LocBin, Atoms), 75 Lambdas = beam_disasm_lambdas(LambdaBin, Atoms), 76 Str = beam_disasm_strings(StrBin), 77 Str1 = binary_to_list(Str), %% for debugging -- use Str as far as poss. 78 Sym_Code = beam_disasm_code(CodeBin,Atoms,Imports,Str,Lambdas), 79 Attributes = beam_disasm_attributes(AttrBin), 80 CompInfo = beam_disasm_compilation_info(CompBin), 81 All = [{exports,Exports}, 82 {imports,Imports}, 83 {code,Sym_Code}, 84 {atoms,Atoms}, 85 {local_funs,LocFuns}, 86 {strings,Str1}, 87 {attributes,Attributes}, 88 {comp_info,CompInfo}], 89 {beam_file,[Item || {_Key,Data}=Item <- All, Data =/= none]}. 90 91%%----------------------------------------------------------------------- 92%% Retrieve an optional chunk or none if the chunk doesn't exist. 93%%----------------------------------------------------------------------- 94 95optional_chunk(F, ChunkTag, ChunkInfo) -> 96 case lists:keymember(ChunkTag, 1, ChunkInfo) of 97 true -> 98 {ok,{_,[{ChunkTag,Chunk}]}} = beam_lib:chunks(F, [ChunkTag]), 99 Chunk; 100 false -> none 101 end. 102 103%%----------------------------------------------------------------------- 104%% UTILITIES -- these actually exist in file "beam_lib" 105%% -- they should be moved into a common utils file. 106%%----------------------------------------------------------------------- 107 108i32([X1,X2,X3,X4]) -> 109 (X1 bsl 24) bor (X2 bsl 16) bor (X3 bsl 8) bor X4. 110 111get_int(B) -> 112 {I, B1} = split_binary(B, 4), 113 {i32(binary_to_list(I)), B1}. 114 115%%----------------------------------------------------------------------- 116%% Disassembles the atom table of a BEAM file. 117%% - atoms are stored in order 1 ... N (N = Num_atoms, in fact), 118%% - each atom name consists of a length byte, followed by that many 119%% bytes of name 120%% (nb: atom names max 255 chars?!) 121%%----------------------------------------------------------------------- 122 123beam_disasm_atoms(AtomTabBin) -> 124 {_NumAtoms,B} = get_int(AtomTabBin), 125 disasm_atoms(B). 126 127disasm_atoms(AtomBin) -> 128 disasm_atoms(binary_to_list(AtomBin),1). 129 130disasm_atoms([Len|Xs],N) -> 131 {AtomName,Rest} = get_atom_name(Len,Xs), 132 [{N,list_to_atom(AtomName)}|disasm_atoms(Rest,N+1)]; 133disasm_atoms([],_) -> 134 []. 135 136get_atom_name(Len,Xs) -> 137 get_atom_name(Len,Xs,[]). 138 139get_atom_name(N,[X|Xs],RevName) when N > 0 -> 140 get_atom_name(N-1,Xs,[X|RevName]); 141get_atom_name(0,Xs,RevName) -> 142 { lists:reverse(RevName), Xs }. 143 144%%----------------------------------------------------------------------- 145%% Disassembles the export table of a BEAM file. 146%%----------------------------------------------------------------------- 147 148beam_disasm_exports(none, _) -> none; 149beam_disasm_exports(ExpTabBin, Atoms) -> 150 {_NumAtoms,B} = get_int(ExpTabBin), 151 disasm_exports(B,Atoms). 152 153disasm_exports(Bin,Atoms) -> 154 resolve_exports(collect_exports(binary_to_list(Bin)),Atoms). 155 156collect_exports([F3,F2,F1,F0,A3,A2,A1,A0,L3,L2,L1,L0|Exps]) -> 157 [{i32([F3,F2,F1,F0]), % F = function (atom ID) 158 i32([A3,A2,A1,A0]), % A = arity (int) 159 i32([L3,L2,L1,L0])} % L = label (int) 160 |collect_exports(Exps)]; 161collect_exports([]) -> 162 []. 163 164resolve_exports(Exps,Atoms) -> 165 [ {lookup_key(F,Atoms), A, L} || {F,A,L} <- Exps ]. 166 167%%----------------------------------------------------------------------- 168%% Disassembles the import table of a BEAM file. 169%%----------------------------------------------------------------------- 170 171beam_disasm_imports(ExpTabBin,Atoms) -> 172 {_NumAtoms,B} = get_int(ExpTabBin), 173 disasm_imports(B,Atoms). 174 175disasm_imports(Bin,Atoms) -> 176 resolve_imports(collect_imports(binary_to_list(Bin)),Atoms). 177 178collect_imports([M3,M2,M1,M0,F3,F2,F1,F0,A3,A2,A1,A0|Exps]) -> 179 [{i32([M3,M2,M1,M0]), % M = module (atom ID) 180 i32([F3,F2,F1,F0]), % F = function (atom ID) 181 i32([A3,A2,A1,A0])} % A = arity (int) 182 |collect_imports(Exps)]; 183collect_imports([]) -> 184 []. 185 186resolve_imports(Exps,Atoms) -> 187 [{extfunc,lookup_key(M,Atoms),lookup_key(F,Atoms),A} || {M,F,A} <- Exps ]. 188 189%%----------------------------------------------------------------------- 190%% Disassembles the lambda (fun) table of a BEAM file. 191%%----------------------------------------------------------------------- 192 193beam_disasm_lambdas(none, _) -> none; 194beam_disasm_lambdas(<<_:32,Tab/binary>>, Atoms) -> 195 disasm_lambdas(Tab, Atoms, 0). 196 197disasm_lambdas(<<F:32,A:32,Lbl:32,Index:32,NumFree:32,OldUniq:32,More/binary>>, 198 Atoms, OldIndex) -> 199 Info = {lookup_key(F, Atoms),A,Lbl,Index,NumFree,OldUniq}, 200 [{OldIndex,Info}|disasm_lambdas(More, Atoms, OldIndex+1)]; 201disasm_lambdas(<<>>, _, _) -> []. 202 203%%----------------------------------------------------------------------- 204%% Disassembles the code chunk of a BEAM file: 205%% - The code is first disassembled into a long list of instructions. 206%% - This list is then split into functions and all names are resolved. 207%%----------------------------------------------------------------------- 208 209beam_disasm_code(CodeBin,Atoms,Imports,Str,Lambdas) -> 210 [_SS3,_SS2,_SS1,_SS0, % Sub-Size (length of information before code) 211 _IS3,_IS2,_IS1,_IS0, % Instruction Set Identifier (always 0) 212 _OM3,_OM2,_OM1,_OM0, % Opcode Max 213 _L3,_L2,_L1,_L0,_F3,_F2,_F1,_F0|Code] = binary_to_list(CodeBin), 214 case catch disasm_code(Code, Atoms) of 215 {'EXIT',Rsn} -> 216 ?NO_DEBUG('code disasm failed: ~p~n',[Rsn]), 217 ?exit(Rsn); 218 DisasmCode -> 219 Functions = get_function_chunks(DisasmCode), 220 LocLabels = local_labels(Functions), 221 [resolve_names(F,Imports,Str,LocLabels,Lambdas) || F <- Functions] 222 end. 223 224%%----------------------------------------------------------------------- 225 226disasm_code([B|Bs], Atoms) -> 227 {Instr,RestBs} = disasm_instr(B, Bs, Atoms), 228 [Instr|disasm_code(RestBs, Atoms)]; 229disasm_code([], _) -> []. 230 231%%----------------------------------------------------------------------- 232%% Splits the code stream into chunks representing the code of functions. 233%% 234%% NOTE: code actually looks like 235%% label L1: ... label Ln: 236%% func_info ... 237%% label entry: 238%% ... 239%% <on failure, use label Li to show where things died> 240%% ... 241%% So the labels before each func_info should be included as well. 242%% Ideally, only one such label is needed, but the BEAM compiler 243%% before R8 didn't care to remove the redundant ones. 244%%----------------------------------------------------------------------- 245 246get_function_chunks([I|Code]) -> 247 {LastI,RestCode,Labs} = split_head_labels(I,Code,[]), 248 get_funs(LastI,RestCode,Labs,[]); 249get_function_chunks([]) -> 250 ?exit(empty_code_segment). 251 252get_funs(PrevI,[I|Is],RevF,RevFs) -> 253 case I of 254 {func_info,_Info} -> 255 [H|T] = RevF, 256 {Last,Fun,TrailingLabels} = split_head_labels(H,T,[]), 257 get_funs(I, Is, [PrevI|TrailingLabels], add_funs([Last|Fun],RevFs)); 258 _ -> 259 get_funs(I, Is, [PrevI|RevF], RevFs) 260 end; 261get_funs(PrevI,[],RevF,RevFs) -> 262 case PrevI of 263 {int_code_end,[]} -> 264 emit_funs(add_fun(RevF,RevFs)); 265 _ -> 266 ?DEBUG('warning: code segment did not end with int_code_end~n',[]), 267 emit_funs(add_funs([PrevI|RevF],RevFs)) 268 end. 269 270split_head_labels({label,L},[I|Code],Labs) -> 271 split_head_labels(I,Code,[{label,L}|Labs]); 272split_head_labels(I,Code,Labs) -> 273 {I,Code,Labs}. 274 275add_fun([],Fs) -> 276 Fs; 277add_fun(F,Fs) -> 278 add_funs(F,Fs). 279 280add_funs(F,Fs) -> 281 [ lists:reverse(F) | Fs ]. 282 283emit_funs(Fs) -> 284 lists:reverse(Fs). 285 286%%----------------------------------------------------------------------- 287%% Collects local labels -- I am not sure this is 100% what is needed. 288%%----------------------------------------------------------------------- 289 290local_labels(Funs) -> 291 [local_label(Fun) || Fun <- Funs]. 292 293%% The first clause below attempts to provide some (limited form of) 294%% backwards compatibility; it is not needed for .beam files generated 295%% by the R8 compiler. The clause should one fine day be taken out. 296local_label([{label,_},{label,L}|Code]) -> 297 local_label([{label,L}|Code]); 298local_label([{label,_}, 299 {func_info,[M0,F0,{u,A}]}, 300 {label,[{u,L1}]}|_]) -> 301 {atom,M} = resolve_arg(M0), 302 {atom,F} = resolve_arg(F0), 303 {L1, {M, F, A}}; 304local_label(Code) -> 305 io:format('beam_disasm: no label in ~p~n', [Code]), 306 {-666,{none,none,0}}. 307 308%%----------------------------------------------------------------------- 309%% Disassembles a single BEAM instruction; most instructions are handled 310%% in a generic way; indexing instructions are handled separately. 311%%----------------------------------------------------------------------- 312 313disasm_instr(B, Bs, Atoms) -> 314 {SymOp,Arity} = beam_opcodes:opname(B), 315 case SymOp of 316 select_val -> 317 disasm_select_inst(select_val, Bs, Atoms); 318 select_tuple_arity -> 319 disasm_select_inst(select_tuple_arity, Bs, Atoms); 320 _ -> 321 case catch decode_n_args(Arity, Bs, Atoms) of 322 {'EXIT',Rsn} -> 323 ?NO_DEBUG("decode_n_args(~p,~p) failed~n",[Arity,Bs]), 324 {{'EXIT',{SymOp,Arity,Rsn}},[]}; 325 {Args,RestBs} -> 326 ?NO_DEBUG("instr ~p~n",[{SymOp,Args}]), 327 {{SymOp,Args}, RestBs} 328 end 329 end. 330 331%%----------------------------------------------------------------------- 332%% Disassembles a BEAM select_* instruction used for indexing. 333%% Currently handles {select_val,3} and {select_tuple_arity,3} insts. 334%% 335%% The arruments of a "select"-type instruction look as follows: 336%% <reg>, {f,FailLabel}, {list, <num cases>, [<case1> ... <caseN>]} 337%% where each case is of the form [symbol,{f,Label}]. 338%%----------------------------------------------------------------------- 339 340disasm_select_inst(Inst, Bs, Atoms) -> 341 {X, Bs1} = decode_arg(Bs, Atoms), 342 {F, Bs2} = decode_arg(Bs1, Atoms), 343 {Z, Bs3} = decode_arg(Bs2, Atoms), 344 {U, Bs4} = decode_arg(Bs3, Atoms), 345 {u,Len} = U, 346 {List, RestBs} = decode_n_args(Len, Bs4, Atoms), 347 {{Inst,[X,F,{Z,U,List}]},RestBs}. 348 349%%----------------------------------------------------------------------- 350%% decode_arg([Byte]) -> { Arg, [Byte] } 351%% 352%% - an arg can have variable length, so we must return arg + remaining bytes 353%% - decodes an argument into its 'raw' form: { Tag, Value } 354%% several types map to a single tag, so the byte code instr must then 355%% assign a type to it 356%%----------------------------------------------------------------------- 357 358decode_arg([B|Bs]) -> 359 Tag = decode_tag(B band 2#111), 360 ?NO_DEBUG('Tag = ~p, B = ~p, Bs = ~p~n',[Tag,B,Bs]), 361 case Tag of 362 z -> 363 decode_z_tagged(Tag, B, Bs); 364 _ -> 365 %% all other cases are handled as if they were integers 366 decode_int(Tag, B, Bs) 367 end. 368 369decode_arg([B|Bs0], Atoms) -> 370 Tag = decode_tag(B band 2#111), 371 ?NO_DEBUG('Tag = ~p, B = ~p, Bs = ~p~n',[Tag,B,Bs]), 372 case Tag of 373 z -> 374 decode_z_tagged(Tag, B, Bs0); 375 a -> 376 %% atom or nil 377 case decode_int(Tag, B, Bs0) of 378 {{a,0},Bs} -> {nil,Bs}; 379 {{a,I},Bs} -> {{atom,lookup_key(I, Atoms)},Bs} 380 end; 381 _ -> 382 %% all other cases are handled as if they were integers 383 decode_int(Tag, B, Bs0) 384 end. 385 386%%----------------------------------------------------------------------- 387%% Decodes an integer value. Handles positives, negatives, and bignums. 388%% 389%% Tries to do the opposite of: 390%% beam_asm:encode(1, 5) = [81] 391%% beam_asm:encode(1, 1000) = [105,232] 392%% beam_asm:encode(1, 2047) = [233,255] 393%% beam_asm:encode(1, 2048) = [25,8,0] 394%% beam_asm:encode(1,-1) = [25,255,255] 395%% beam_asm:encode(1,-4294967295) = [121,255,0,0,0,1] 396%% beam_asm:encode(1, 4294967295) = [121,0,255,255,255,255] 397%% beam_asm:encode(1, 429496729501) = [121,99,255,255,255,157] 398%%----------------------------------------------------------------------- 399 400decode_int(Tag,B,Bs) when (B band 16#08) == 0 -> 401 %% N < 16 = 4 bits, NNNN:0:TTT 402 N = B bsr 4, 403 {{Tag,N},Bs}; 404decode_int(Tag,B,Bs) when (B band 16#10) == 0 -> 405 %% N < 2048 = 11 bits = 3:8 bits, NNN:01:TTT, NNNNNNNN 406 [B1|Bs1] = Bs, 407 Val0 = B band 2#11100000, 408 N = (Val0 bsl 3) bor B1, 409 ?NO_DEBUG('NNN:01:TTT, NNNNNNNN = ~n~p:01:~p, ~p = ~p~n', [Val0,Tag,B,N]), 410 {{Tag,N},Bs1}; 411decode_int(Tag,B,Bs) -> 412 {Len,Bs1} = decode_int_length(B,Bs), 413 {IntBs,RemBs} = take_bytes(Len,Bs1), 414 N = build_arg(IntBs), 415 [F|_] = IntBs, 416 Num = if F > 127, Tag == i -> decode_negative(N,Len); 417 true -> N 418 end, 419 ?NO_DEBUG('Len = ~p, IntBs = ~p, Num = ~p~n', [Len,IntBs,Num]), 420 {{Tag,Num},RemBs}. 421 422decode_int_length(B,Bs) -> 423 %% The following imitates get_erlang_integer() in beam_load.c 424 %% Len is the size of the integer value in bytes 425 case B bsr 5 of 426 7 -> 427 {Arg,ArgBs} = decode_arg(Bs), 428 case Arg of 429 {u,L} -> 430 {L+9,ArgBs}; % 9 stands for 7+2 431 _ -> 432 ?exit({decode_int,weird_bignum_sublength,Arg}) 433 end; 434 L -> 435 {L+2,Bs} 436 end. 437 438decode_negative(N,Len) -> 439 N - (1 bsl (Len*8)). % 8 is number of bits in a byte 440 441%%----------------------------------------------------------------------- 442%% Decodes lists and floating point numbers. 443%%----------------------------------------------------------------------- 444 445decode_z_tagged(Tag,B,Bs) when (B band 16#08) == 0 -> 446 N = B bsr 4, 447 case N of 448 0 -> % float 449 decode_float(Bs); 450 1 -> % list 451 {{Tag,N},Bs}; 452 2 -> % fr 453 decode_fr(Bs); 454 3 -> % allocation list 455 decode_alloc_list(Bs); 456 _ -> 457 ?exit({decode_z_tagged,{invalid_extended_tag,N}}) 458 end; 459decode_z_tagged(_,B,_) -> 460 ?exit({decode_z_tagged,{weird_value,B}}). 461 462decode_float(Bs) -> 463 {FL,RestBs} = take_bytes(8,Bs), 464 <<Float:64/float>> = list_to_binary(FL), 465 {{float,Float},RestBs}. 466 467decode_fr(Bs) -> 468 {{u,Fr},RestBs} = decode_arg(Bs), 469 {{fr,Fr},RestBs}. 470 471decode_alloc_list(Bs) -> 472 {{u,N},RestBs} = decode_arg(Bs), 473 decode_alloc_list_1(N, RestBs, []). 474 475decode_alloc_list_1(0, RestBs, Acc) -> 476 {{u,{alloc,lists:reverse(Acc)}},RestBs}; 477decode_alloc_list_1(N, Bs0, Acc) -> 478 {{u,Type},Bs1} = decode_arg(Bs0), 479 {{u,Val},Bs} = decode_arg(Bs1), 480 case Type of 481 0 -> 482 decode_alloc_list_1(N-1, Bs, [{words,Val}|Acc]); 483 1 -> 484 decode_alloc_list_1(N-1, Bs, [{floats,Val}|Acc]) 485 end. 486 487%%----------------------------------------------------------------------- 488%% take N bytes from a stream, return { Taken_bytes, Remaining_bytes } 489%%----------------------------------------------------------------------- 490 491take_bytes(N,Bs) -> 492 take_bytes(N,Bs,[]). 493 494take_bytes(N,[B|Bs],Acc) when N > 0 -> 495 take_bytes(N-1,Bs,[B|Acc]); 496take_bytes(0,Bs,Acc) -> 497 { lists:reverse(Acc), Bs }. 498 499%%----------------------------------------------------------------------- 500%% from a list of bytes Bn,Bn-1,...,B1,B0 501%% build (Bn << 8*n) bor ... bor B1 << 8 bor B0 << 0 502%%----------------------------------------------------------------------- 503 504build_arg(Bs) -> 505 build_arg(Bs,0). 506 507build_arg([B|Bs],N) -> 508 build_arg(Bs, (N bsl 8) bor B); 509build_arg([],N) -> 510 N. 511 512%%----------------------------------------------------------------------- 513%% Decodes a bunch of arguments and returns them in a list 514%%----------------------------------------------------------------------- 515 516decode_n_args(N, Bs, Atoms) when N >= 0 -> 517 decode_n_args(N, [], Bs, Atoms). 518 519decode_n_args(N, Acc, Bs0, Atoms) when N > 0 -> 520 {A1,Bs} = decode_arg(Bs0, Atoms), 521 decode_n_args(N-1, [A1|Acc], Bs, Atoms); 522decode_n_args(0, Acc, Bs, _) -> 523 {lists:reverse(Acc),Bs}. 524 525%%----------------------------------------------------------------------- 526%% Convert a numeric tag value into a symbolic one 527%%----------------------------------------------------------------------- 528 529decode_tag(?tag_u) -> u; 530decode_tag(?tag_i) -> i; 531decode_tag(?tag_a) -> a; 532decode_tag(?tag_x) -> x; 533decode_tag(?tag_y) -> y; 534decode_tag(?tag_f) -> f; 535decode_tag(?tag_h) -> h; 536decode_tag(?tag_z) -> z; 537decode_tag(X) -> ?exit({unknown_tag,X}). 538 539%%----------------------------------------------------------------------- 540%% - replace all references {a,I} with the atom with index I (or {atom,A}) 541%% - replace all references to {i,K} in an external call position with 542%% the proper MFA (position in list, first elt = 0, yields MFA to use) 543%% - resolve strings, represented as <offset, length>, into their 544%% actual values by using string table 545%% (note: string table should be passed as a BINARY so that we can 546%% use binary_to_list/3!) 547%% - convert instruction to its readable form ... 548%% 549%% Currently, only the first three are done (systematically, at least). 550%% 551%% Note: It MAY be premature to remove the lists of args, since that 552%% representation means it is simpler to iterate over all args, etc. 553%%----------------------------------------------------------------------- 554 555resolve_names(Fun, Imports, Str, Lbls, Lambdas) -> 556 [resolve_inst(Instr, Imports, Str, Lbls, Lambdas) || Instr <- Fun]. 557 558%% 559%% New make_fun2/4 instruction added in August 2001 (R8). 560%% We handle it specially here to avoid adding an argument to 561%% the clause for every instruction. 562%% 563 564resolve_inst({make_fun2,Args},_,_,Lbls,Lambdas) -> 565 [OldIndex] = resolve_args(Args), 566 {value,{OldIndex,{F,A,_Lbl,_Index,NumFree,OldUniq}}} = 567 lists:keysearch(OldIndex, 1, Lambdas), 568 [{_,{M,_,_}}|_] = Lbls, % Slightly kludgy. 569 {make_fun2,{M,F,A},OldIndex,OldUniq,NumFree}; 570resolve_inst(Instr, Imports, Str, Lbls, _Lambdas) -> 571 resolve_inst(Instr, Imports, Str, Lbls). 572 573resolve_inst({label,[{u,L}]},_,_,_) -> 574 {label,L}; 575resolve_inst({func_info,RawMFA},_,_,_) -> 576 {func_info,resolve_args(RawMFA)}; 577% resolve_inst(int_code_end,_,_,_,_) -> % instruction already handled 578% int_code_end; % should not really be handled here 579resolve_inst({call,[{u,N},{f,L}]},_,_,Lbls) -> 580 {call,N,catch lookup_key(L,Lbls)}; 581resolve_inst({call_last,[{u,N},{f,L},{u,U}]},_,_,Lbls) -> 582 {call_last,N,catch lookup_key(L,Lbls),U}; 583resolve_inst({call_only,[{u,N},{f,L}]},_,_,Lbls) -> 584 {call_only,N,catch lookup_key(L,Lbls)}; 585resolve_inst({call_ext,[{u,N},{u,MFAix}]},Imports,_,_) -> 586 {call_ext,N,catch lists:nth(MFAix+1,Imports)}; 587resolve_inst({call_ext_last,[{u,N},{u,MFAix},{u,X}]},Imports,_,_) -> 588 {call_ext_last,N,catch lists:nth(MFAix+1,Imports),X}; 589resolve_inst({bif0,Args},Imports,_,_) -> 590 [Bif,Reg] = resolve_args(Args), 591 {extfunc,_Mod,BifName,_Arity} = lists:nth(Bif+1,Imports), 592 %?NO_DEBUG('bif0(~p, ~p)~n',[BifName,Reg]), 593 {bif,BifName,nofail,[],Reg}; 594resolve_inst({bif1,Args},Imports,_,_) -> 595 [F,Bif,A1,Reg] = resolve_args(Args), 596 {extfunc,_Mod,BifName,_Arity} = lists:nth(Bif+1,Imports), 597 %?NO_DEBUG('bif1(~p, ~p, ~p, ~p, ~p)~n',[Bif,BifName,F,[A1],Reg]), 598 {bif,BifName,F,[A1],Reg}; 599resolve_inst({bif2,Args},Imports,_,_) -> 600 [F,Bif,A1,A2,Reg] = resolve_args(Args), 601 {extfunc,_Mod,BifName,_Arity} = lists:nth(Bif+1,Imports), 602 %?NO_DEBUG('bif2(~p, ~p, ~p, ~p, ~p)~n',[Bif,BifName,F,[A1,A2],Reg]), 603 {bif,BifName,F,[A1,A2],Reg}; 604resolve_inst({allocate,[{u,X0},{u,X1}]},_,_,_) -> 605 {allocate,X0,X1}; 606resolve_inst({allocate_heap,[{u,X0},{u,X1},{u,X2}]},_,_,_) -> 607 {allocate_heap,X0,X1,X2}; 608resolve_inst({allocate_zero,[{u,X0},{u,X1}]},_,_,_) -> 609 {allocate_zero,X0,X1}; 610resolve_inst({allocate_heap_zero,[{u,X0},{u,X1},{u,X2}]},_,_,_) -> 611 {allocate_heap_zero,X0,X1,X2}; 612resolve_inst({test_heap,[{u,X0},{u,X1}]},_,_,_) -> 613 {test_heap,X0,X1}; 614resolve_inst({init,[Dst]},_,_,_) -> 615 {init,Dst}; 616resolve_inst({deallocate,[{u,L}]},_,_,_) -> 617 {deallocate,L}; 618resolve_inst({return,[]},_,_,_) -> 619 return; 620resolve_inst({send,[]},_,_,_) -> 621 send; 622resolve_inst({remove_message,[]},_,_,_) -> 623 remove_message; 624resolve_inst({timeout,[]},_,_,_) -> 625 timeout; 626resolve_inst({loop_rec,[Lbl,Dst]},_,_,_) -> 627 {loop_rec,Lbl,Dst}; 628resolve_inst({loop_rec_end,[Lbl]},_,_,_) -> 629 {loop_rec_end,Lbl}; 630resolve_inst({wait,[Lbl]},_,_,_) -> 631 {wait,Lbl}; 632resolve_inst({wait_timeout,[Lbl,Int]},_,_,_) -> 633 {wait_timeout,Lbl,resolve_arg(Int)}; 634resolve_inst({m_plus,Args},_,_,_) -> 635 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 636 {arithbif,'+',W,[SrcR1,SrcR2],DstR}; 637resolve_inst({m_minus,Args},_,_,_) -> 638 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 639 {arithbif,'-',W,[SrcR1,SrcR2],DstR}; 640resolve_inst({m_times,Args},_,_,_) -> 641 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 642 {arithbif,'*',W,[SrcR1,SrcR2],DstR}; 643resolve_inst({m_div,Args},_,_,_) -> 644 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 645 {arithbif,'/',W,[SrcR1,SrcR2],DstR}; 646resolve_inst({int_div,Args},_,_,_) -> 647 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 648 {arithbif,'div',W,[SrcR1,SrcR2],DstR}; 649resolve_inst({int_rem,Args},_,_,_) -> 650 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 651 {arithbif,'rem',W,[SrcR1,SrcR2],DstR}; 652resolve_inst({int_band,Args},_,_,_) -> 653 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 654 {arithbif,'band',W,[SrcR1,SrcR2],DstR}; 655resolve_inst({int_bor,Args},_,_,_) -> 656 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 657 {arithbif,'bor',W,[SrcR1,SrcR2],DstR}; 658resolve_inst({int_bxor,Args},_,_,_) -> 659 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 660 {arithbif,'bxor',W,[SrcR1,SrcR2],DstR}; 661resolve_inst({int_bsl,Args},_,_,_) -> 662 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 663 {arithbif,'bsl',W,[SrcR1,SrcR2],DstR}; 664resolve_inst({int_bsr,Args},_,_,_) -> 665 [W,SrcR1,SrcR2,DstR] = resolve_args(Args), 666 {arithbif,'bsr',W,[SrcR1,SrcR2],DstR}; 667resolve_inst({int_bnot,Args},_,_,_) -> 668 [W,SrcR,DstR] = resolve_args(Args), 669 {arithbif,'bnot',W,[SrcR],DstR}; 670resolve_inst({is_lt=I,Args0},_,_,_) -> 671 [L|Args] = resolve_args(Args0), 672 {test,I,L,Args}; 673resolve_inst({is_ge=I,Args0},_,_,_) -> 674 [L|Args] = resolve_args(Args0), 675 {test,I,L,Args}; 676resolve_inst({is_eq=I,Args0},_,_,_) -> 677 [L|Args] = resolve_args(Args0), 678 {test,I,L,Args}; 679resolve_inst({is_ne=I,Args0},_,_,_) -> 680 [L|Args] = resolve_args(Args0), 681 {test,I,L,Args}; 682resolve_inst({is_eq_exact=I,Args0},_,_,_) -> 683 [L|Args] = resolve_args(Args0), 684 {test,I,L,Args}; 685resolve_inst({is_ne_exact=I,Args0},_,_,_) -> 686 [L|Args] = resolve_args(Args0), 687 {test,I,L,Args}; 688resolve_inst({is_integer=I,Args0},_,_,_) -> 689 [L|Args] = resolve_args(Args0), 690 {test,I,L,Args}; 691resolve_inst({is_float=I,Args0},_,_,_) -> 692 [L|Args] = resolve_args(Args0), 693 {test,I,L,Args}; 694resolve_inst({is_number=I,Args0},_,_,_) -> 695 [L|Args] = resolve_args(Args0), 696 {test,I,L,Args}; 697resolve_inst({is_atom=I,Args0},_,_,_) -> 698 [L|Args] = resolve_args(Args0), 699 {test,I,L,Args}; 700resolve_inst({is_pid=I,Args0},_,_,_) -> 701 [L|Args] = resolve_args(Args0), 702 {test,I,L,Args}; 703resolve_inst({is_reference=I,Args0},_,_,_) -> 704 [L|Args] = resolve_args(Args0), 705 {test,I,L,Args}; 706resolve_inst({is_port=I,Args0},_,_,_) -> 707 [L|Args] = resolve_args(Args0), 708 {test,I,L,Args}; 709resolve_inst({is_nil=I,Args0},_,_,_) -> 710 [L|Args] = resolve_args(Args0), 711 {test,I,L,Args}; 712resolve_inst({is_binary=I,Args0},_,_,_) -> 713 [L|Args] = resolve_args(Args0), 714 {test,I,L,Args}; 715resolve_inst({is_constant=I,Args0},_,_,_) -> 716 [L|Args] = resolve_args(Args0), 717 {test,I,L,Args}; 718resolve_inst({is_list=I,Args0},_,_,_) -> 719 [L|Args] = resolve_args(Args0), 720 {test,I,L,Args}; 721resolve_inst({is_nonempty_list=I,Args0},_,_,_) -> 722 [L|Args] = resolve_args(Args0), 723 {test,I,L,Args}; 724resolve_inst({is_tuple=I,Args0},_,_,_) -> 725 [L|Args] = resolve_args(Args0), 726 {test,I,L,Args}; 727resolve_inst({test_arity=I,Args0},_,_,_) -> 728 [L|Args] = resolve_args(Args0), 729 {test,I,L,Args}; 730resolve_inst({select_val,Args},_,_,_) -> 731 [Reg,FLbl,{{z,1},{u,_Len},List0}] = Args, 732 List = resolve_args(List0), 733 {select_val,Reg,FLbl,{list,List}}; 734resolve_inst({select_tuple_arity,Args},_,_,_) -> 735 [Reg,FLbl,{{z,1},{u,_Len},List0}] = Args, 736 List = resolve_args(List0), 737 {select_tuple_arity,Reg,FLbl,{list,List}}; 738resolve_inst({jump,[Lbl]},_,_,_) -> 739 {jump,Lbl}; 740resolve_inst({'catch',[Dst,Lbl]},_,_,_) -> 741 {'catch',Dst,Lbl}; 742resolve_inst({catch_end,[Dst]},_,_,_) -> 743 {catch_end,Dst}; 744resolve_inst({move,[Src,Dst]},_,_,_) -> 745 {move,resolve_arg(Src),Dst}; 746resolve_inst({get_list,[Src,Dst1,Dst2]},_,_,_) -> 747 {get_list,Src,Dst1,Dst2}; 748resolve_inst({get_tuple_element,[Src,{u,Off},Dst]},_,_,_) -> 749 {get_tuple_element,resolve_arg(Src),Off,resolve_arg(Dst)}; 750resolve_inst({set_tuple_element,[Src,Dst,{u,Off}]},_,_,_) -> 751 {set_tuple_element,resolve_arg(Src),resolve_arg(Dst),Off}; 752resolve_inst({put_string,[{u,Len},{u,Off},Dst]},_,Strings,_) -> 753 String = if Len > 0 -> binary_to_list(Strings, Off+1, Off+Len); 754 true -> "" 755 end, 756?NO_DEBUG('put_string(~p, {string,~p}, ~p)~n',[Len,String,Dst]), 757 {put_string,Len,{string,String},Dst}; 758resolve_inst({put_list,[Src1,Src2,Dst]},_,_,_) -> 759 {put_list,resolve_arg(Src1),resolve_arg(Src2),Dst}; 760resolve_inst({put_tuple,[{u,Arity},Dst]},_,_,_) -> 761 {put_tuple,Arity,Dst}; 762resolve_inst({put,[Src]},_,_,_) -> 763 {put,resolve_arg(Src)}; 764resolve_inst({badmatch,[X]},_,_,_) -> 765 {badmatch,resolve_arg(X)}; 766resolve_inst({if_end,[]},_,_,_) -> 767 if_end; 768resolve_inst({case_end,[X]},_,_,_) -> 769 {case_end,resolve_arg(X)}; 770resolve_inst({call_fun,[{u,N}]},_,_,_) -> 771 {call_fun,N}; 772resolve_inst({make_fun,Args},_,_,Lbls) -> 773 [{f,L},Magic,FreeVars] = resolve_args(Args), 774 {make_fun,catch lookup_key(L,Lbls),Magic,FreeVars}; 775resolve_inst({is_function=I,Args0},_,_,_) -> 776 [L|Args] = resolve_args(Args0), 777 {test,I,L,Args}; 778resolve_inst({call_ext_only,[{u,N},{u,MFAix}]},Imports,_,_) -> 779 {call_ext_only,N,catch lists:nth(MFAix+1,Imports)}; 780%% 781%% Instructions for handling binaries added in R7A & R7B 782%% 783resolve_inst({bs_start_match,[F,Reg]},_,_,_) -> 784 {bs_start_match,F,Reg}; 785resolve_inst({bs_get_integer=I,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) -> 786 [A2,A5] = resolve_args([Arg2,Arg5]), 787 {test,I,Lbl,[A2,N,decode_field_flags(U),A5]}; 788resolve_inst({bs_get_float=I,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) -> 789 [A2,A5] = resolve_args([Arg2,Arg5]), 790 {test,I,Lbl,[A2,N,decode_field_flags(U),A5]}; 791resolve_inst({bs_get_binary=I,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) -> 792 [A2,A5] = resolve_args([Arg2,Arg5]), 793 {test,I,Lbl,[A2,N,decode_field_flags(U),A5]}; 794resolve_inst({bs_skip_bits,[Lbl,Arg2,{u,N},{u,U}]},_,_,_) -> 795 [A2] = resolve_args([Arg2]), 796 {test,bs_skip_bits,Lbl,[A2,N,decode_field_flags(U)]}; 797resolve_inst({bs_test_tail,[F,{u,N}]},_,_,_) -> 798 {test,bs_test_tail,F,[N]}; 799resolve_inst({bs_save,[{u,N}]},_,_,_) -> 800 {bs_save,N}; 801resolve_inst({bs_restore,[{u,N}]},_,_,_) -> 802 {bs_restore,N}; 803resolve_inst({bs_init,[{u,N},{u,U}]},_,_,_) -> 804 {bs_init,N,decode_field_flags(U)}; 805resolve_inst({bs_final,[F,X]},_,_,_) -> 806 {bs_final,F,X}; 807resolve_inst({bs_put_integer,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) -> 808 [A2,A5] = resolve_args([Arg2,Arg5]), 809 {bs_put_integer,Lbl,A2,N,decode_field_flags(U),A5}; 810resolve_inst({bs_put_binary,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) -> 811 [A2,A5] = resolve_args([Arg2,Arg5]), 812 ?NO_DEBUG('bs_put_binary(~p,~p,~p,~p,~p})~n',[Lbl,A2,N,U,A5]), 813 {bs_put_binary,Lbl,A2,N,decode_field_flags(U),A5}; 814resolve_inst({bs_put_float,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) -> 815 [A2,A5] = resolve_args([Arg2,Arg5]), 816 ?NO_DEBUG('bs_put_float(~p,~p,~p,~p,~p})~n',[Lbl,A2,N,U,A5]), 817 {bs_put_float,Lbl,A2,N,decode_field_flags(U),A5}; 818resolve_inst({bs_put_string,[{u,Len},{u,Off}]},_,Strings,_) -> 819 String = if Len > 0 -> binary_to_list(Strings, Off+1, Off+Len); 820 true -> "" 821 end, 822 ?NO_DEBUG('bs_put_string(~p, {string,~p})~n',[Len,String]), 823 {bs_put_string,Len,{string,String}}; 824resolve_inst({bs_need_buf,[{u,N}]},_,_,_) -> 825 {bs_need_buf,N}; 826 827%% 828%% Instructions for handling floating point numbers added in June 2001 (R8). 829%% 830resolve_inst({fclearerror,[]},_,_,_) -> 831 fclearerror; 832resolve_inst({fcheckerror,Args},_,_,_) -> 833 [Fail] = resolve_args(Args), 834 {fcheckerror,Fail}; 835resolve_inst({fmove,Args},_,_,_) -> 836 [FR,Reg] = resolve_args(Args), 837 {fmove,FR,Reg}; 838resolve_inst({fconv,Args},_,_,_) -> 839 [Reg,FR] = resolve_args(Args), 840 {fconv,Reg,FR}; 841resolve_inst({fadd=I,Args},_,_,_) -> 842 [F,A1,A2,Reg] = resolve_args(Args), 843 {arithfbif,I,F,[A1,A2],Reg}; 844resolve_inst({fsub=I,Args},_,_,_) -> 845 [F,A1,A2,Reg] = resolve_args(Args), 846 {arithfbif,I,F,[A1,A2],Reg}; 847resolve_inst({fmul=I,Args},_,_,_) -> 848 [F,A1,A2,Reg] = resolve_args(Args), 849 {arithfbif,I,F,[A1,A2],Reg}; 850resolve_inst({fdiv=I,Args},_,_,_) -> 851 [F,A1,A2,Reg] = resolve_args(Args), 852 {arithfbif,I,F,[A1,A2],Reg}; 853resolve_inst({fnegate,Args},_,_,_) -> 854 [F,Arg,Reg] = resolve_args(Args), 855 {arithfbif,fnegate,F,[Arg],Reg}; 856 857%% 858%% Instructions for try expressions added in January 2003 (R10). 859%% 860 861resolve_inst({'try',[Reg,Lbl]},_,_,_) -> % analogous to 'catch' 862 {'try',Reg,Lbl}; 863resolve_inst({try_end,[Reg]},_,_,_) -> % analogous to 'catch_end' 864 {try_end,Reg}; 865resolve_inst({try_case,[Reg]},_,_,_) -> % analogous to 'catch_end' 866 {try_case,Reg}; 867resolve_inst({try_case_end,[Reg]},_,_,_) -> 868 {try_case_end,Reg}; 869resolve_inst({raise,[Reg1,Reg2]},_,_,_) -> 870 {bif,raise,{f,0},[Reg1,Reg2],{x,0}}; 871 872%% 873%% New bit syntax instructions added in February 2004 (R10B). 874%% 875 876resolve_inst({bs_init2,[Lbl,Arg2,{u,W},{u,R},{u,F},Arg6]},_,_,_) -> 877 [A2,A6] = resolve_args([Arg2,Arg6]), 878 {bs_init2,Lbl,A2,W,R,decode_field_flags(F),A6}; 879resolve_inst({bs_bits_to_bytes,[Lbl,Arg2,Arg3]},_,_,_) -> 880 [A2,A3] = resolve_args([Arg2,Arg3]), 881 {bs_bits_to_bytes,Lbl,A2,A3}; 882resolve_inst({bs_add=I,[Lbl,Arg2,Arg3,Arg4,Arg5]},_,_,_) -> 883 [A2,A3,A4,A5] = resolve_args([Arg2,Arg3,Arg4,Arg5]), 884 {I,Lbl,[A2,A3,A4],A5}; 885 886%% 887%% New apply instructions added in April 2004 (R10B). 888%% 889resolve_inst({apply,[{u,Arity}]},_,_,_) -> 890 {apply,Arity}; 891resolve_inst({apply_last,[{u,Arity},{u,D}]},_,_,_) -> 892 {apply_last,Arity,D}; 893 894%% 895%% New test instruction added in April 2004 (R10B). 896%% 897resolve_inst({is_boolean=I,Args0},_,_,_) -> 898 [L|Args] = resolve_args(Args0), 899 {test,I,L,Args}; 900 901%% 902%% Catches instructions that are not yet handled. 903%% 904 905resolve_inst(X,_,_,_) -> ?exit({resolve_inst,X}). 906 907%%----------------------------------------------------------------------- 908%% Resolves arguments in a generic way. 909%%----------------------------------------------------------------------- 910 911resolve_args(Args) -> [resolve_arg(A) || A <- Args]. 912 913resolve_arg({u,N}) -> N; 914resolve_arg({i,N}) -> {integer,N}; 915resolve_arg({atom,Atom}=A) when is_atom(Atom) -> A; 916resolve_arg(nil) -> nil; 917resolve_arg(Arg) -> Arg. 918 919%%----------------------------------------------------------------------- 920%% The purpose of the following is just to add a hook for future changes. 921%% Currently, field flags are numbers 1-2-4-8 and only two of these 922%% numbers (BSF_LITTLE 2 -- BSF_SIGNED 4) have a semantic significance; 923%% others are just hints for speeding up the execution; see "erl_bits.h". 924%%----------------------------------------------------------------------- 925 926decode_field_flags(FF) -> 927 {field_flags,FF}. 928 929%%----------------------------------------------------------------------- 930%% Each string is denoted in the assembled code by its offset into this 931%% binary. This binary contains all strings concatenated together. 932%%----------------------------------------------------------------------- 933 934beam_disasm_strings(Bin) -> 935 Bin. 936 937%%----------------------------------------------------------------------- 938%% Disassembles the attributes of a BEAM file. 939%%----------------------------------------------------------------------- 940 941beam_disasm_attributes(none) -> none; 942beam_disasm_attributes(AttrBin) -> binary_to_term(AttrBin). 943 944%%----------------------------------------------------------------------- 945%% Disassembles the compilation information of a BEAM file. 946%%----------------------------------------------------------------------- 947 948beam_disasm_compilation_info(none) -> none; 949beam_disasm_compilation_info(Bin) -> binary_to_term(Bin). 950 951%%----------------------------------------------------------------------- 952%% Private Utilities 953%%----------------------------------------------------------------------- 954 955%%----------------------------------------------------------------------- 956 957lookup_key(Key,[{Key,Val}|_]) -> 958 Val; 959lookup_key(Key,[_|KVs]) -> 960 lookup_key(Key,KVs); 961lookup_key(Key,[]) -> 962 ?exit({lookup_key,{key_not_found,Key}}). 963 964%%----------------------------------------------------------------------- 965