1%% -*- erlang-indent-level: 4 -*-
2%%=======================================================================
3%% File        : beam_disasm.erl
4%% Author      : Kostis Sagonas
5%% Description : Disassembles an R5-R10 .beam file into symbolic BEAM code
6%%=======================================================================
7%% $Id: beam_disasm.erl,v 1.1 2008/12/17 09:53:41 mikpe Exp $
8%%=======================================================================
9%% Notes:
10%%   1. It does NOT work for .beam files of previous BEAM versions.
11%%   2. If handling of new BEAM instructions is needed, this should be
12%%      inserted at the end of function resolve_inst().
13%%=======================================================================
14
15-module(beam_disasm).
16
17-export([file/1, format_error/1]).
18
19-author("Kostis Sagonas").
20
21-include("beam_opcodes.hrl").
22
23%%-----------------------------------------------------------------------
24
25-define(NO_DEBUG(Str,Xs),ok).
26-define(DEBUG(Str,Xs),io:format(Str,Xs)).
27-define(exit(Reason),exit({?MODULE,?LINE,Reason})).
28
29%%-----------------------------------------------------------------------
30%% Error information
31
32format_error({error, Module, Error}) ->
33    Module:format_error(Error);
34format_error({internal, Error}) ->
35    io_lib:format("~p: disassembly failed with reason ~P.",
36		  [?MODULE, Error, 25]).
37
38%%-----------------------------------------------------------------------
39%% The main exported function
40%%   File is either a file name or a binary containing the code.
41%%   Returns `{beam_file, [...]}' or `{error, Module, Reason}'.
42%%   Call `format_error({error, Module, Reason})' for an error string.
43%%-----------------------------------------------------------------------
44
45file(File) ->
46    case beam_lib:info(File) of
47	Info when list(Info) ->
48	    {value,{chunks,Chunks}} = lists:keysearch(chunks,1,Info),
49	    case catch process_chunks(File, Chunks) of
50		{'EXIT', Error} ->
51		    {error, ?MODULE, {internal, Error}};
52		Result ->
53		    Result
54	    end;
55	Error ->
56	    Error
57    end.
58
59%%-----------------------------------------------------------------------
60%% Interface might need to be revised -- do not depend on it.
61%%-----------------------------------------------------------------------
62
63process_chunks(F,ChunkInfoList) ->
64    {ok,{_,Chunks}} = beam_lib:chunks(F, ["Atom","Code","StrT","ImpT","ExpT"]),
65    [{"Atom",AtomBin},{"Code",CodeBin},{"StrT",StrBin},
66     {"ImpT",ImpBin},{"ExpT",ExpBin}] = Chunks,
67    LambdaBin = optional_chunk(F, "FunT", ChunkInfoList),
68    LocBin = optional_chunk(F, "LocT", ChunkInfoList),
69    AttrBin = optional_chunk(F, "Attr", ChunkInfoList),
70    CompBin = optional_chunk(F, "CInf", ChunkInfoList),
71    Atoms = beam_disasm_atoms(AtomBin),
72    Exports = beam_disasm_exports(ExpBin, Atoms),
73    Imports = beam_disasm_imports(ImpBin, Atoms),
74    LocFuns = beam_disasm_exports(LocBin, Atoms),
75    Lambdas = beam_disasm_lambdas(LambdaBin, Atoms),
76    Str = beam_disasm_strings(StrBin),
77    Str1 = binary_to_list(Str),  %% for debugging -- use Str as far as poss.
78    Sym_Code = beam_disasm_code(CodeBin,Atoms,Imports,Str,Lambdas),
79    Attributes = beam_disasm_attributes(AttrBin),
80    CompInfo = beam_disasm_compilation_info(CompBin),
81    All = [{exports,Exports},
82	   {imports,Imports},
83	   {code,Sym_Code},
84	   {atoms,Atoms},
85	   {local_funs,LocFuns},
86	   {strings,Str1},
87	   {attributes,Attributes},
88	   {comp_info,CompInfo}],
89    {beam_file,[Item || {_Key,Data}=Item <- All, Data =/= none]}.
90
91%%-----------------------------------------------------------------------
92%% Retrieve an optional chunk or none if the chunk doesn't exist.
93%%-----------------------------------------------------------------------
94
95optional_chunk(F, ChunkTag, ChunkInfo) ->
96    case lists:keymember(ChunkTag, 1, ChunkInfo) of
97	true ->
98	    {ok,{_,[{ChunkTag,Chunk}]}} = beam_lib:chunks(F, [ChunkTag]),
99	    Chunk;
100	false -> none
101    end.
102
103%%-----------------------------------------------------------------------
104%% UTILITIES -- these actually exist in file "beam_lib"
105%%           -- they should be moved into a common utils file.
106%%-----------------------------------------------------------------------
107
108i32([X1,X2,X3,X4]) ->
109    (X1 bsl 24) bor (X2 bsl 16) bor (X3 bsl 8) bor X4.
110
111get_int(B) ->
112    {I, B1} = split_binary(B, 4),
113    {i32(binary_to_list(I)), B1}.
114
115%%-----------------------------------------------------------------------
116%% Disassembles the atom table of a BEAM file.
117%% - atoms are stored in order 1 ... N (N = Num_atoms, in fact),
118%% - each atom name consists of a length byte, followed by that many
119%%   bytes of name
120%% (nb: atom names max 255 chars?!)
121%%-----------------------------------------------------------------------
122
123beam_disasm_atoms(AtomTabBin) ->
124    {_NumAtoms,B} = get_int(AtomTabBin),
125    disasm_atoms(B).
126
127disasm_atoms(AtomBin) ->
128    disasm_atoms(binary_to_list(AtomBin),1).
129
130disasm_atoms([Len|Xs],N) ->
131    {AtomName,Rest} = get_atom_name(Len,Xs),
132    [{N,list_to_atom(AtomName)}|disasm_atoms(Rest,N+1)];
133disasm_atoms([],_) ->
134    [].
135
136get_atom_name(Len,Xs) ->
137    get_atom_name(Len,Xs,[]).
138
139get_atom_name(N,[X|Xs],RevName) when N > 0 ->
140    get_atom_name(N-1,Xs,[X|RevName]);
141get_atom_name(0,Xs,RevName) ->
142    { lists:reverse(RevName), Xs }.
143
144%%-----------------------------------------------------------------------
145%% Disassembles the export table of a BEAM file.
146%%-----------------------------------------------------------------------
147
148beam_disasm_exports(none, _) -> none;
149beam_disasm_exports(ExpTabBin, Atoms) ->
150    {_NumAtoms,B} = get_int(ExpTabBin),
151    disasm_exports(B,Atoms).
152
153disasm_exports(Bin,Atoms) ->
154    resolve_exports(collect_exports(binary_to_list(Bin)),Atoms).
155
156collect_exports([F3,F2,F1,F0,A3,A2,A1,A0,L3,L2,L1,L0|Exps]) ->
157    [{i32([F3,F2,F1,F0]),  % F = function (atom ID)
158      i32([A3,A2,A1,A0]),  % A = arity (int)
159      i32([L3,L2,L1,L0])}  % L = label (int)
160     |collect_exports(Exps)];
161collect_exports([]) ->
162    [].
163
164resolve_exports(Exps,Atoms) ->
165    [ {lookup_key(F,Atoms), A, L} || {F,A,L} <- Exps ].
166
167%%-----------------------------------------------------------------------
168%% Disassembles the import table of a BEAM file.
169%%-----------------------------------------------------------------------
170
171beam_disasm_imports(ExpTabBin,Atoms) ->
172    {_NumAtoms,B} = get_int(ExpTabBin),
173    disasm_imports(B,Atoms).
174
175disasm_imports(Bin,Atoms) ->
176    resolve_imports(collect_imports(binary_to_list(Bin)),Atoms).
177
178collect_imports([M3,M2,M1,M0,F3,F2,F1,F0,A3,A2,A1,A0|Exps]) ->
179    [{i32([M3,M2,M1,M0]),  % M = module (atom ID)
180      i32([F3,F2,F1,F0]),  % F = function (atom ID)
181      i32([A3,A2,A1,A0])}  % A = arity (int)
182     |collect_imports(Exps)];
183collect_imports([]) ->
184    [].
185
186resolve_imports(Exps,Atoms) ->
187    [{extfunc,lookup_key(M,Atoms),lookup_key(F,Atoms),A} || {M,F,A} <- Exps ].
188
189%%-----------------------------------------------------------------------
190%% Disassembles the lambda (fun) table of a BEAM file.
191%%-----------------------------------------------------------------------
192
193beam_disasm_lambdas(none, _) -> none;
194beam_disasm_lambdas(<<_:32,Tab/binary>>, Atoms) ->
195    disasm_lambdas(Tab, Atoms, 0).
196
197disasm_lambdas(<<F:32,A:32,Lbl:32,Index:32,NumFree:32,OldUniq:32,More/binary>>,
198	       Atoms, OldIndex) ->
199    Info = {lookup_key(F, Atoms),A,Lbl,Index,NumFree,OldUniq},
200    [{OldIndex,Info}|disasm_lambdas(More, Atoms, OldIndex+1)];
201disasm_lambdas(<<>>, _, _) -> [].
202
203%%-----------------------------------------------------------------------
204%% Disassembles the code chunk of a BEAM file:
205%%   - The code is first disassembled into a long list of instructions.
206%%   - This list is then split into functions and all names are resolved.
207%%-----------------------------------------------------------------------
208
209beam_disasm_code(CodeBin,Atoms,Imports,Str,Lambdas) ->
210    [_SS3,_SS2,_SS1,_SS0,  % Sub-Size (length of information before code)
211     _IS3,_IS2,_IS1,_IS0,  % Instruction Set Identifier (always 0)
212     _OM3,_OM2,_OM1,_OM0,  % Opcode Max
213     _L3,_L2,_L1,_L0,_F3,_F2,_F1,_F0|Code] = binary_to_list(CodeBin),
214    case catch disasm_code(Code, Atoms) of
215	{'EXIT',Rsn} ->
216	    ?NO_DEBUG('code disasm failed: ~p~n',[Rsn]),
217	    ?exit(Rsn);
218	DisasmCode ->
219	    Functions = get_function_chunks(DisasmCode),
220	    LocLabels = local_labels(Functions),
221	    [resolve_names(F,Imports,Str,LocLabels,Lambdas) || F <- Functions]
222    end.
223
224%%-----------------------------------------------------------------------
225
226disasm_code([B|Bs], Atoms) ->
227    {Instr,RestBs} = disasm_instr(B, Bs, Atoms),
228    [Instr|disasm_code(RestBs, Atoms)];
229disasm_code([], _) -> [].
230
231%%-----------------------------------------------------------------------
232%% Splits the code stream into chunks representing the code of functions.
233%%
234%% NOTE: code actually looks like
235%%   label L1: ... label Ln:
236%%     func_info ...
237%%   label entry:
238%%     ...
239%%     <on failure, use label Li to show where things died>
240%%     ...
241%% So the labels before each func_info should be included as well.
242%% Ideally, only one such label is needed, but the BEAM compiler
243%% before R8 didn't care to remove the redundant ones.
244%%-----------------------------------------------------------------------
245
246get_function_chunks([I|Code]) ->
247    {LastI,RestCode,Labs} = split_head_labels(I,Code,[]),
248    get_funs(LastI,RestCode,Labs,[]);
249get_function_chunks([]) ->
250    ?exit(empty_code_segment).
251
252get_funs(PrevI,[I|Is],RevF,RevFs) ->
253    case I of
254	{func_info,_Info} ->
255	    [H|T] = RevF,
256	    {Last,Fun,TrailingLabels} = split_head_labels(H,T,[]),
257	    get_funs(I, Is, [PrevI|TrailingLabels], add_funs([Last|Fun],RevFs));
258	_ ->
259	    get_funs(I, Is, [PrevI|RevF], RevFs)
260    end;
261get_funs(PrevI,[],RevF,RevFs) ->
262    case PrevI of
263	{int_code_end,[]} ->
264	    emit_funs(add_fun(RevF,RevFs));
265	_ ->
266	    ?DEBUG('warning: code segment did not end with int_code_end~n',[]),
267            emit_funs(add_funs([PrevI|RevF],RevFs))
268    end.
269
270split_head_labels({label,L},[I|Code],Labs) ->
271    split_head_labels(I,Code,[{label,L}|Labs]);
272split_head_labels(I,Code,Labs) ->
273    {I,Code,Labs}.
274
275add_fun([],Fs) ->
276    Fs;
277add_fun(F,Fs) ->
278    add_funs(F,Fs).
279
280add_funs(F,Fs) ->
281    [ lists:reverse(F) | Fs ].
282
283emit_funs(Fs) ->
284    lists:reverse(Fs).
285
286%%-----------------------------------------------------------------------
287%% Collects local labels -- I am not sure this is 100% what is needed.
288%%-----------------------------------------------------------------------
289
290local_labels(Funs) ->
291    [local_label(Fun) || Fun <- Funs].
292
293%% The first clause below attempts to provide some (limited form of)
294%% backwards compatibility; it is not needed for .beam files generated
295%% by the R8 compiler.  The clause should one fine day be taken out.
296local_label([{label,_},{label,L}|Code]) ->
297    local_label([{label,L}|Code]);
298local_label([{label,_},
299	     {func_info,[M0,F0,{u,A}]},
300	     {label,[{u,L1}]}|_]) ->
301    {atom,M} = resolve_arg(M0),
302    {atom,F} = resolve_arg(F0),
303    {L1, {M, F, A}};
304local_label(Code) ->
305    io:format('beam_disasm: no label in ~p~n', [Code]),
306    {-666,{none,none,0}}.
307
308%%-----------------------------------------------------------------------
309%% Disassembles a single BEAM instruction; most instructions are handled
310%% in a generic way; indexing instructions are handled separately.
311%%-----------------------------------------------------------------------
312
313disasm_instr(B, Bs, Atoms) ->
314    {SymOp,Arity} = beam_opcodes:opname(B),
315    case SymOp of
316	select_val ->
317	    disasm_select_inst(select_val, Bs, Atoms);
318	select_tuple_arity ->
319	    disasm_select_inst(select_tuple_arity, Bs, Atoms);
320	_ ->
321	    case catch decode_n_args(Arity, Bs, Atoms) of
322		{'EXIT',Rsn} ->
323		    ?NO_DEBUG("decode_n_args(~p,~p) failed~n",[Arity,Bs]),
324		    {{'EXIT',{SymOp,Arity,Rsn}},[]};
325		{Args,RestBs} ->
326		    ?NO_DEBUG("instr ~p~n",[{SymOp,Args}]),
327		    {{SymOp,Args}, RestBs}
328	    end
329    end.
330
331%%-----------------------------------------------------------------------
332%% Disassembles a BEAM select_* instruction used for indexing.
333%%   Currently handles {select_val,3} and {select_tuple_arity,3} insts.
334%%
335%%   The arruments of a "select"-type instruction look as follows:
336%%       <reg>, {f,FailLabel}, {list, <num cases>, [<case1> ... <caseN>]}
337%%   where each case is of the form [symbol,{f,Label}].
338%%-----------------------------------------------------------------------
339
340disasm_select_inst(Inst, Bs, Atoms) ->
341    {X, Bs1} = decode_arg(Bs, Atoms),
342    {F, Bs2} = decode_arg(Bs1, Atoms),
343    {Z, Bs3} = decode_arg(Bs2, Atoms),
344    {U, Bs4} = decode_arg(Bs3, Atoms),
345    {u,Len} = U,
346    {List, RestBs} = decode_n_args(Len, Bs4, Atoms),
347    {{Inst,[X,F,{Z,U,List}]},RestBs}.
348
349%%-----------------------------------------------------------------------
350%% decode_arg([Byte]) -> { Arg, [Byte] }
351%%
352%% - an arg can have variable length, so we must return arg + remaining bytes
353%% - decodes an argument into its 'raw' form: { Tag, Value }
354%%   several types map to a single tag, so the byte code instr must then
355%%   assign a type to it
356%%-----------------------------------------------------------------------
357
358decode_arg([B|Bs]) ->
359    Tag = decode_tag(B band 2#111),
360    ?NO_DEBUG('Tag = ~p, B = ~p, Bs = ~p~n',[Tag,B,Bs]),
361    case Tag of
362	z ->
363	    decode_z_tagged(Tag, B, Bs);
364	_ ->
365	    %% all other cases are handled as if they were integers
366	    decode_int(Tag, B, Bs)
367    end.
368
369decode_arg([B|Bs0], Atoms) ->
370    Tag = decode_tag(B band 2#111),
371    ?NO_DEBUG('Tag = ~p, B = ~p, Bs = ~p~n',[Tag,B,Bs]),
372    case Tag of
373	z ->
374	    decode_z_tagged(Tag, B, Bs0);
375	a ->
376	    %% atom or nil
377	    case decode_int(Tag, B, Bs0) of
378		{{a,0},Bs} -> {nil,Bs};
379		{{a,I},Bs} -> {{atom,lookup_key(I, Atoms)},Bs}
380	    end;
381	_ ->
382	    %% all other cases are handled as if they were integers
383	    decode_int(Tag, B, Bs0)
384    end.
385
386%%-----------------------------------------------------------------------
387%% Decodes an integer value.  Handles positives, negatives, and bignums.
388%%
389%% Tries to do the opposite of:
390%%   beam_asm:encode(1, 5) =            [81]
391%%   beam_asm:encode(1, 1000) =         [105,232]
392%%   beam_asm:encode(1, 2047) =         [233,255]
393%%   beam_asm:encode(1, 2048) =         [25,8,0]
394%%   beam_asm:encode(1,-1) =            [25,255,255]
395%%   beam_asm:encode(1,-4294967295) =   [121,255,0,0,0,1]
396%%   beam_asm:encode(1, 4294967295) =   [121,0,255,255,255,255]
397%%   beam_asm:encode(1, 429496729501) = [121,99,255,255,255,157]
398%%-----------------------------------------------------------------------
399
400decode_int(Tag,B,Bs) when (B band 16#08) == 0 ->
401    %% N < 16 = 4 bits, NNNN:0:TTT
402    N = B bsr 4,
403    {{Tag,N},Bs};
404decode_int(Tag,B,Bs) when (B band 16#10) == 0 ->
405    %% N < 2048 = 11 bits = 3:8 bits, NNN:01:TTT, NNNNNNNN
406    [B1|Bs1] = Bs,
407    Val0 = B band 2#11100000,
408    N = (Val0 bsl 3) bor B1,
409    ?NO_DEBUG('NNN:01:TTT, NNNNNNNN = ~n~p:01:~p, ~p = ~p~n', [Val0,Tag,B,N]),
410    {{Tag,N},Bs1};
411decode_int(Tag,B,Bs) ->
412    {Len,Bs1} = decode_int_length(B,Bs),
413    {IntBs,RemBs} = take_bytes(Len,Bs1),
414    N = build_arg(IntBs),
415    [F|_] = IntBs,
416    Num = if F > 127, Tag == i -> decode_negative(N,Len);
417	     true -> N
418	  end,
419    ?NO_DEBUG('Len = ~p, IntBs = ~p, Num = ~p~n', [Len,IntBs,Num]),
420    {{Tag,Num},RemBs}.
421
422decode_int_length(B,Bs) ->
423    %% The following imitates get_erlang_integer() in beam_load.c
424    %% Len is the size of the integer value in bytes
425    case B bsr 5 of
426	7 ->
427	    {Arg,ArgBs} = decode_arg(Bs),
428	    case Arg of
429		{u,L} ->
430		    {L+9,ArgBs};  % 9 stands for 7+2
431		_ ->
432		    ?exit({decode_int,weird_bignum_sublength,Arg})
433	    end;
434	L ->
435	    {L+2,Bs}
436    end.
437
438decode_negative(N,Len) ->
439    N - (1 bsl (Len*8)). % 8 is number of bits in a byte
440
441%%-----------------------------------------------------------------------
442%% Decodes lists and floating point numbers.
443%%-----------------------------------------------------------------------
444
445decode_z_tagged(Tag,B,Bs) when (B band 16#08) == 0 ->
446    N = B bsr 4,
447    case N of
448	0 -> % float
449	    decode_float(Bs);
450	1 -> % list
451	    {{Tag,N},Bs};
452	2 -> % fr
453	    decode_fr(Bs);
454	3 -> % allocation list
455	    decode_alloc_list(Bs);
456	_ ->
457	    ?exit({decode_z_tagged,{invalid_extended_tag,N}})
458    end;
459decode_z_tagged(_,B,_) ->
460    ?exit({decode_z_tagged,{weird_value,B}}).
461
462decode_float(Bs) ->
463    {FL,RestBs} = take_bytes(8,Bs),
464    <<Float:64/float>> = list_to_binary(FL),
465    {{float,Float},RestBs}.
466
467decode_fr(Bs) ->
468    {{u,Fr},RestBs} = decode_arg(Bs),
469    {{fr,Fr},RestBs}.
470
471decode_alloc_list(Bs) ->
472    {{u,N},RestBs} = decode_arg(Bs),
473    decode_alloc_list_1(N, RestBs, []).
474
475decode_alloc_list_1(0, RestBs, Acc) ->
476    {{u,{alloc,lists:reverse(Acc)}},RestBs};
477decode_alloc_list_1(N, Bs0, Acc) ->
478    {{u,Type},Bs1} = decode_arg(Bs0),
479    {{u,Val},Bs} = decode_arg(Bs1),
480    case Type of
481	0 ->
482	    decode_alloc_list_1(N-1, Bs, [{words,Val}|Acc]);
483	1 ->
484	    decode_alloc_list_1(N-1, Bs, [{floats,Val}|Acc])
485    end.
486
487%%-----------------------------------------------------------------------
488%% take N bytes from a stream, return { Taken_bytes, Remaining_bytes }
489%%-----------------------------------------------------------------------
490
491take_bytes(N,Bs) ->
492    take_bytes(N,Bs,[]).
493
494take_bytes(N,[B|Bs],Acc) when N > 0 ->
495    take_bytes(N-1,Bs,[B|Acc]);
496take_bytes(0,Bs,Acc) ->
497    { lists:reverse(Acc), Bs }.
498
499%%-----------------------------------------------------------------------
500%% from a list of bytes Bn,Bn-1,...,B1,B0
501%% build  (Bn << 8*n) bor ... bor B1 << 8 bor B0 << 0
502%%-----------------------------------------------------------------------
503
504build_arg(Bs) ->
505    build_arg(Bs,0).
506
507build_arg([B|Bs],N) ->
508    build_arg(Bs, (N bsl 8) bor B);
509build_arg([],N) ->
510    N.
511
512%%-----------------------------------------------------------------------
513%% Decodes a bunch of arguments and returns them in a list
514%%-----------------------------------------------------------------------
515
516decode_n_args(N, Bs, Atoms) when N >= 0 ->
517    decode_n_args(N, [], Bs, Atoms).
518
519decode_n_args(N, Acc, Bs0, Atoms) when N > 0 ->
520    {A1,Bs} = decode_arg(Bs0, Atoms),
521    decode_n_args(N-1, [A1|Acc], Bs, Atoms);
522decode_n_args(0, Acc, Bs, _) ->
523    {lists:reverse(Acc),Bs}.
524
525%%-----------------------------------------------------------------------
526%% Convert a numeric tag value into a symbolic one
527%%-----------------------------------------------------------------------
528
529decode_tag(?tag_u) -> u;
530decode_tag(?tag_i) -> i;
531decode_tag(?tag_a) -> a;
532decode_tag(?tag_x) -> x;
533decode_tag(?tag_y) -> y;
534decode_tag(?tag_f) -> f;
535decode_tag(?tag_h) -> h;
536decode_tag(?tag_z) -> z;
537decode_tag(X) -> ?exit({unknown_tag,X}).
538
539%%-----------------------------------------------------------------------
540%% - replace all references {a,I} with the atom with index I (or {atom,A})
541%% - replace all references to {i,K} in an external call position with
542%%    the proper MFA (position in list, first elt = 0, yields MFA to use)
543%% - resolve strings, represented as <offset, length>, into their
544%%   actual values by using string table
545%%    (note: string table should be passed as a BINARY so that we can
546%%    use binary_to_list/3!)
547%% - convert instruction to its readable form ...
548%%
549%% Currently, only the first three are done (systematically, at least).
550%%
551%% Note: It MAY be premature to remove the lists of args, since that
552%%  representation means it is simpler to iterate over all args, etc.
553%%-----------------------------------------------------------------------
554
555resolve_names(Fun, Imports, Str, Lbls, Lambdas) ->
556    [resolve_inst(Instr, Imports, Str, Lbls, Lambdas) || Instr <- Fun].
557
558%%
559%% New make_fun2/4 instruction added in August 2001 (R8).
560%% We handle it specially here to avoid adding an argument to
561%% the clause for every instruction.
562%%
563
564resolve_inst({make_fun2,Args},_,_,Lbls,Lambdas) ->
565    [OldIndex] = resolve_args(Args),
566    {value,{OldIndex,{F,A,_Lbl,_Index,NumFree,OldUniq}}} =
567	lists:keysearch(OldIndex, 1, Lambdas),
568    [{_,{M,_,_}}|_] = Lbls,			% Slightly kludgy.
569    {make_fun2,{M,F,A},OldIndex,OldUniq,NumFree};
570resolve_inst(Instr, Imports, Str, Lbls, _Lambdas) ->
571    resolve_inst(Instr, Imports, Str, Lbls).
572
573resolve_inst({label,[{u,L}]},_,_,_) ->
574    {label,L};
575resolve_inst({func_info,RawMFA},_,_,_) ->
576    {func_info,resolve_args(RawMFA)};
577% resolve_inst(int_code_end,_,_,_,_) ->  % instruction already handled
578%    int_code_end;                       % should not really be handled here
579resolve_inst({call,[{u,N},{f,L}]},_,_,Lbls) ->
580    {call,N,catch lookup_key(L,Lbls)};
581resolve_inst({call_last,[{u,N},{f,L},{u,U}]},_,_,Lbls) ->
582    {call_last,N,catch lookup_key(L,Lbls),U};
583resolve_inst({call_only,[{u,N},{f,L}]},_,_,Lbls) ->
584    {call_only,N,catch lookup_key(L,Lbls)};
585resolve_inst({call_ext,[{u,N},{u,MFAix}]},Imports,_,_) ->
586    {call_ext,N,catch lists:nth(MFAix+1,Imports)};
587resolve_inst({call_ext_last,[{u,N},{u,MFAix},{u,X}]},Imports,_,_) ->
588    {call_ext_last,N,catch lists:nth(MFAix+1,Imports),X};
589resolve_inst({bif0,Args},Imports,_,_) ->
590    [Bif,Reg] = resolve_args(Args),
591    {extfunc,_Mod,BifName,_Arity} = lists:nth(Bif+1,Imports),
592    %?NO_DEBUG('bif0(~p, ~p)~n',[BifName,Reg]),
593    {bif,BifName,nofail,[],Reg};
594resolve_inst({bif1,Args},Imports,_,_) ->
595    [F,Bif,A1,Reg] = resolve_args(Args),
596    {extfunc,_Mod,BifName,_Arity} = lists:nth(Bif+1,Imports),
597    %?NO_DEBUG('bif1(~p, ~p, ~p, ~p, ~p)~n',[Bif,BifName,F,[A1],Reg]),
598    {bif,BifName,F,[A1],Reg};
599resolve_inst({bif2,Args},Imports,_,_) ->
600    [F,Bif,A1,A2,Reg] = resolve_args(Args),
601    {extfunc,_Mod,BifName,_Arity} = lists:nth(Bif+1,Imports),
602    %?NO_DEBUG('bif2(~p, ~p, ~p, ~p, ~p)~n',[Bif,BifName,F,[A1,A2],Reg]),
603    {bif,BifName,F,[A1,A2],Reg};
604resolve_inst({allocate,[{u,X0},{u,X1}]},_,_,_) ->
605    {allocate,X0,X1};
606resolve_inst({allocate_heap,[{u,X0},{u,X1},{u,X2}]},_,_,_) ->
607    {allocate_heap,X0,X1,X2};
608resolve_inst({allocate_zero,[{u,X0},{u,X1}]},_,_,_) ->
609    {allocate_zero,X0,X1};
610resolve_inst({allocate_heap_zero,[{u,X0},{u,X1},{u,X2}]},_,_,_) ->
611    {allocate_heap_zero,X0,X1,X2};
612resolve_inst({test_heap,[{u,X0},{u,X1}]},_,_,_) ->
613    {test_heap,X0,X1};
614resolve_inst({init,[Dst]},_,_,_) ->
615    {init,Dst};
616resolve_inst({deallocate,[{u,L}]},_,_,_) ->
617    {deallocate,L};
618resolve_inst({return,[]},_,_,_) ->
619    return;
620resolve_inst({send,[]},_,_,_) ->
621    send;
622resolve_inst({remove_message,[]},_,_,_) ->
623    remove_message;
624resolve_inst({timeout,[]},_,_,_) ->
625    timeout;
626resolve_inst({loop_rec,[Lbl,Dst]},_,_,_) ->
627    {loop_rec,Lbl,Dst};
628resolve_inst({loop_rec_end,[Lbl]},_,_,_) ->
629    {loop_rec_end,Lbl};
630resolve_inst({wait,[Lbl]},_,_,_) ->
631    {wait,Lbl};
632resolve_inst({wait_timeout,[Lbl,Int]},_,_,_) ->
633    {wait_timeout,Lbl,resolve_arg(Int)};
634resolve_inst({m_plus,Args},_,_,_) ->
635    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
636    {arithbif,'+',W,[SrcR1,SrcR2],DstR};
637resolve_inst({m_minus,Args},_,_,_) ->
638    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
639    {arithbif,'-',W,[SrcR1,SrcR2],DstR};
640resolve_inst({m_times,Args},_,_,_) ->
641    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
642    {arithbif,'*',W,[SrcR1,SrcR2],DstR};
643resolve_inst({m_div,Args},_,_,_) ->
644    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
645    {arithbif,'/',W,[SrcR1,SrcR2],DstR};
646resolve_inst({int_div,Args},_,_,_) ->
647    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
648    {arithbif,'div',W,[SrcR1,SrcR2],DstR};
649resolve_inst({int_rem,Args},_,_,_) ->
650    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
651    {arithbif,'rem',W,[SrcR1,SrcR2],DstR};
652resolve_inst({int_band,Args},_,_,_) ->
653    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
654    {arithbif,'band',W,[SrcR1,SrcR2],DstR};
655resolve_inst({int_bor,Args},_,_,_) ->
656    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
657    {arithbif,'bor',W,[SrcR1,SrcR2],DstR};
658resolve_inst({int_bxor,Args},_,_,_) ->
659    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
660    {arithbif,'bxor',W,[SrcR1,SrcR2],DstR};
661resolve_inst({int_bsl,Args},_,_,_) ->
662    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
663    {arithbif,'bsl',W,[SrcR1,SrcR2],DstR};
664resolve_inst({int_bsr,Args},_,_,_) ->
665    [W,SrcR1,SrcR2,DstR] = resolve_args(Args),
666    {arithbif,'bsr',W,[SrcR1,SrcR2],DstR};
667resolve_inst({int_bnot,Args},_,_,_) ->
668    [W,SrcR,DstR] = resolve_args(Args),
669    {arithbif,'bnot',W,[SrcR],DstR};
670resolve_inst({is_lt=I,Args0},_,_,_) ->
671    [L|Args] = resolve_args(Args0),
672    {test,I,L,Args};
673resolve_inst({is_ge=I,Args0},_,_,_) ->
674    [L|Args] = resolve_args(Args0),
675    {test,I,L,Args};
676resolve_inst({is_eq=I,Args0},_,_,_) ->
677    [L|Args] = resolve_args(Args0),
678    {test,I,L,Args};
679resolve_inst({is_ne=I,Args0},_,_,_) ->
680    [L|Args] = resolve_args(Args0),
681    {test,I,L,Args};
682resolve_inst({is_eq_exact=I,Args0},_,_,_) ->
683    [L|Args] = resolve_args(Args0),
684    {test,I,L,Args};
685resolve_inst({is_ne_exact=I,Args0},_,_,_) ->
686    [L|Args] = resolve_args(Args0),
687    {test,I,L,Args};
688resolve_inst({is_integer=I,Args0},_,_,_) ->
689    [L|Args] = resolve_args(Args0),
690    {test,I,L,Args};
691resolve_inst({is_float=I,Args0},_,_,_) ->
692    [L|Args] = resolve_args(Args0),
693    {test,I,L,Args};
694resolve_inst({is_number=I,Args0},_,_,_) ->
695    [L|Args] = resolve_args(Args0),
696    {test,I,L,Args};
697resolve_inst({is_atom=I,Args0},_,_,_) ->
698    [L|Args] = resolve_args(Args0),
699    {test,I,L,Args};
700resolve_inst({is_pid=I,Args0},_,_,_) ->
701    [L|Args] = resolve_args(Args0),
702    {test,I,L,Args};
703resolve_inst({is_reference=I,Args0},_,_,_) ->
704    [L|Args] = resolve_args(Args0),
705    {test,I,L,Args};
706resolve_inst({is_port=I,Args0},_,_,_) ->
707    [L|Args] = resolve_args(Args0),
708    {test,I,L,Args};
709resolve_inst({is_nil=I,Args0},_,_,_) ->
710    [L|Args] = resolve_args(Args0),
711    {test,I,L,Args};
712resolve_inst({is_binary=I,Args0},_,_,_) ->
713    [L|Args] = resolve_args(Args0),
714    {test,I,L,Args};
715resolve_inst({is_constant=I,Args0},_,_,_) ->
716    [L|Args] = resolve_args(Args0),
717    {test,I,L,Args};
718resolve_inst({is_list=I,Args0},_,_,_) ->
719    [L|Args] = resolve_args(Args0),
720    {test,I,L,Args};
721resolve_inst({is_nonempty_list=I,Args0},_,_,_) ->
722    [L|Args] = resolve_args(Args0),
723    {test,I,L,Args};
724resolve_inst({is_tuple=I,Args0},_,_,_) ->
725    [L|Args] = resolve_args(Args0),
726    {test,I,L,Args};
727resolve_inst({test_arity=I,Args0},_,_,_) ->
728    [L|Args] = resolve_args(Args0),
729    {test,I,L,Args};
730resolve_inst({select_val,Args},_,_,_) ->
731    [Reg,FLbl,{{z,1},{u,_Len},List0}] = Args,
732    List = resolve_args(List0),
733    {select_val,Reg,FLbl,{list,List}};
734resolve_inst({select_tuple_arity,Args},_,_,_) ->
735    [Reg,FLbl,{{z,1},{u,_Len},List0}] = Args,
736    List = resolve_args(List0),
737    {select_tuple_arity,Reg,FLbl,{list,List}};
738resolve_inst({jump,[Lbl]},_,_,_) ->
739    {jump,Lbl};
740resolve_inst({'catch',[Dst,Lbl]},_,_,_) ->
741    {'catch',Dst,Lbl};
742resolve_inst({catch_end,[Dst]},_,_,_) ->
743    {catch_end,Dst};
744resolve_inst({move,[Src,Dst]},_,_,_) ->
745    {move,resolve_arg(Src),Dst};
746resolve_inst({get_list,[Src,Dst1,Dst2]},_,_,_) ->
747    {get_list,Src,Dst1,Dst2};
748resolve_inst({get_tuple_element,[Src,{u,Off},Dst]},_,_,_) ->
749    {get_tuple_element,resolve_arg(Src),Off,resolve_arg(Dst)};
750resolve_inst({set_tuple_element,[Src,Dst,{u,Off}]},_,_,_) ->
751    {set_tuple_element,resolve_arg(Src),resolve_arg(Dst),Off};
752resolve_inst({put_string,[{u,Len},{u,Off},Dst]},_,Strings,_) ->
753    String = if Len > 0 -> binary_to_list(Strings, Off+1, Off+Len);
754		true -> ""
755	     end,
756?NO_DEBUG('put_string(~p, {string,~p}, ~p)~n',[Len,String,Dst]),
757    {put_string,Len,{string,String},Dst};
758resolve_inst({put_list,[Src1,Src2,Dst]},_,_,_) ->
759    {put_list,resolve_arg(Src1),resolve_arg(Src2),Dst};
760resolve_inst({put_tuple,[{u,Arity},Dst]},_,_,_) ->
761    {put_tuple,Arity,Dst};
762resolve_inst({put,[Src]},_,_,_) ->
763    {put,resolve_arg(Src)};
764resolve_inst({badmatch,[X]},_,_,_) ->
765    {badmatch,resolve_arg(X)};
766resolve_inst({if_end,[]},_,_,_) ->
767    if_end;
768resolve_inst({case_end,[X]},_,_,_) ->
769    {case_end,resolve_arg(X)};
770resolve_inst({call_fun,[{u,N}]},_,_,_) ->
771    {call_fun,N};
772resolve_inst({make_fun,Args},_,_,Lbls) ->
773    [{f,L},Magic,FreeVars] = resolve_args(Args),
774    {make_fun,catch lookup_key(L,Lbls),Magic,FreeVars};
775resolve_inst({is_function=I,Args0},_,_,_) ->
776    [L|Args] = resolve_args(Args0),
777    {test,I,L,Args};
778resolve_inst({call_ext_only,[{u,N},{u,MFAix}]},Imports,_,_) ->
779    {call_ext_only,N,catch lists:nth(MFAix+1,Imports)};
780%%
781%% Instructions for handling binaries added in R7A & R7B
782%%
783resolve_inst({bs_start_match,[F,Reg]},_,_,_) ->
784    {bs_start_match,F,Reg};
785resolve_inst({bs_get_integer=I,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) ->
786    [A2,A5] = resolve_args([Arg2,Arg5]),
787    {test,I,Lbl,[A2,N,decode_field_flags(U),A5]};
788resolve_inst({bs_get_float=I,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) ->
789    [A2,A5] = resolve_args([Arg2,Arg5]),
790    {test,I,Lbl,[A2,N,decode_field_flags(U),A5]};
791resolve_inst({bs_get_binary=I,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) ->
792    [A2,A5] = resolve_args([Arg2,Arg5]),
793    {test,I,Lbl,[A2,N,decode_field_flags(U),A5]};
794resolve_inst({bs_skip_bits,[Lbl,Arg2,{u,N},{u,U}]},_,_,_) ->
795    [A2] = resolve_args([Arg2]),
796    {test,bs_skip_bits,Lbl,[A2,N,decode_field_flags(U)]};
797resolve_inst({bs_test_tail,[F,{u,N}]},_,_,_) ->
798    {test,bs_test_tail,F,[N]};
799resolve_inst({bs_save,[{u,N}]},_,_,_) ->
800    {bs_save,N};
801resolve_inst({bs_restore,[{u,N}]},_,_,_) ->
802    {bs_restore,N};
803resolve_inst({bs_init,[{u,N},{u,U}]},_,_,_) ->
804    {bs_init,N,decode_field_flags(U)};
805resolve_inst({bs_final,[F,X]},_,_,_) ->
806    {bs_final,F,X};
807resolve_inst({bs_put_integer,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) ->
808    [A2,A5] = resolve_args([Arg2,Arg5]),
809    {bs_put_integer,Lbl,A2,N,decode_field_flags(U),A5};
810resolve_inst({bs_put_binary,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) ->
811    [A2,A5] = resolve_args([Arg2,Arg5]),
812    ?NO_DEBUG('bs_put_binary(~p,~p,~p,~p,~p})~n',[Lbl,A2,N,U,A5]),
813    {bs_put_binary,Lbl,A2,N,decode_field_flags(U),A5};
814resolve_inst({bs_put_float,[Lbl,Arg2,{u,N},{u,U},Arg5]},_,_,_) ->
815    [A2,A5] = resolve_args([Arg2,Arg5]),
816    ?NO_DEBUG('bs_put_float(~p,~p,~p,~p,~p})~n',[Lbl,A2,N,U,A5]),
817    {bs_put_float,Lbl,A2,N,decode_field_flags(U),A5};
818resolve_inst({bs_put_string,[{u,Len},{u,Off}]},_,Strings,_) ->
819    String = if Len > 0 -> binary_to_list(Strings, Off+1, Off+Len);
820		true -> ""
821	     end,
822    ?NO_DEBUG('bs_put_string(~p, {string,~p})~n',[Len,String]),
823    {bs_put_string,Len,{string,String}};
824resolve_inst({bs_need_buf,[{u,N}]},_,_,_) ->
825    {bs_need_buf,N};
826
827%%
828%% Instructions for handling floating point numbers added in June 2001 (R8).
829%%
830resolve_inst({fclearerror,[]},_,_,_) ->
831    fclearerror;
832resolve_inst({fcheckerror,Args},_,_,_) ->
833    [Fail] = resolve_args(Args),
834    {fcheckerror,Fail};
835resolve_inst({fmove,Args},_,_,_) ->
836    [FR,Reg] = resolve_args(Args),
837    {fmove,FR,Reg};
838resolve_inst({fconv,Args},_,_,_) ->
839    [Reg,FR] = resolve_args(Args),
840    {fconv,Reg,FR};
841resolve_inst({fadd=I,Args},_,_,_) ->
842    [F,A1,A2,Reg] = resolve_args(Args),
843    {arithfbif,I,F,[A1,A2],Reg};
844resolve_inst({fsub=I,Args},_,_,_) ->
845    [F,A1,A2,Reg] = resolve_args(Args),
846    {arithfbif,I,F,[A1,A2],Reg};
847resolve_inst({fmul=I,Args},_,_,_) ->
848    [F,A1,A2,Reg] = resolve_args(Args),
849    {arithfbif,I,F,[A1,A2],Reg};
850resolve_inst({fdiv=I,Args},_,_,_) ->
851    [F,A1,A2,Reg] = resolve_args(Args),
852    {arithfbif,I,F,[A1,A2],Reg};
853resolve_inst({fnegate,Args},_,_,_) ->
854    [F,Arg,Reg] = resolve_args(Args),
855    {arithfbif,fnegate,F,[Arg],Reg};
856
857%%
858%% Instructions for try expressions added in January 2003 (R10).
859%%
860
861resolve_inst({'try',[Reg,Lbl]},_,_,_) -> % analogous to 'catch'
862    {'try',Reg,Lbl};
863resolve_inst({try_end,[Reg]},_,_,_) ->   % analogous to 'catch_end'
864    {try_end,Reg};
865resolve_inst({try_case,[Reg]},_,_,_) ->   % analogous to 'catch_end'
866    {try_case,Reg};
867resolve_inst({try_case_end,[Reg]},_,_,_) ->
868    {try_case_end,Reg};
869resolve_inst({raise,[Reg1,Reg2]},_,_,_) ->
870    {bif,raise,{f,0},[Reg1,Reg2],{x,0}};
871
872%%
873%% New bit syntax instructions added in February 2004 (R10B).
874%%
875
876resolve_inst({bs_init2,[Lbl,Arg2,{u,W},{u,R},{u,F},Arg6]},_,_,_) ->
877    [A2,A6] = resolve_args([Arg2,Arg6]),
878    {bs_init2,Lbl,A2,W,R,decode_field_flags(F),A6};
879resolve_inst({bs_bits_to_bytes,[Lbl,Arg2,Arg3]},_,_,_) ->
880    [A2,A3] = resolve_args([Arg2,Arg3]),
881    {bs_bits_to_bytes,Lbl,A2,A3};
882resolve_inst({bs_add=I,[Lbl,Arg2,Arg3,Arg4,Arg5]},_,_,_) ->
883    [A2,A3,A4,A5] = resolve_args([Arg2,Arg3,Arg4,Arg5]),
884    {I,Lbl,[A2,A3,A4],A5};
885
886%%
887%% New apply instructions added in April 2004 (R10B).
888%%
889resolve_inst({apply,[{u,Arity}]},_,_,_) ->
890    {apply,Arity};
891resolve_inst({apply_last,[{u,Arity},{u,D}]},_,_,_) ->
892    {apply_last,Arity,D};
893
894%%
895%% New test instruction added in April 2004 (R10B).
896%%
897resolve_inst({is_boolean=I,Args0},_,_,_) ->
898    [L|Args] = resolve_args(Args0),
899    {test,I,L,Args};
900
901%%
902%% Catches instructions that are not yet handled.
903%%
904
905resolve_inst(X,_,_,_) -> ?exit({resolve_inst,X}).
906
907%%-----------------------------------------------------------------------
908%% Resolves arguments in a generic way.
909%%-----------------------------------------------------------------------
910
911resolve_args(Args) -> [resolve_arg(A) || A <- Args].
912
913resolve_arg({u,N}) -> N;
914resolve_arg({i,N}) -> {integer,N};
915resolve_arg({atom,Atom}=A) when is_atom(Atom) -> A;
916resolve_arg(nil) -> nil;
917resolve_arg(Arg) -> Arg.
918
919%%-----------------------------------------------------------------------
920%% The purpose of the following is just to add a hook for future changes.
921%% Currently, field flags are numbers 1-2-4-8 and only two of these
922%% numbers (BSF_LITTLE 2 -- BSF_SIGNED 4) have a semantic significance;
923%% others are just hints for speeding up the execution; see "erl_bits.h".
924%%-----------------------------------------------------------------------
925
926decode_field_flags(FF) ->
927    {field_flags,FF}.
928
929%%-----------------------------------------------------------------------
930%% Each string is denoted in the assembled code by its offset into this
931%% binary.  This binary contains all strings concatenated together.
932%%-----------------------------------------------------------------------
933
934beam_disasm_strings(Bin) ->
935    Bin.
936
937%%-----------------------------------------------------------------------
938%% Disassembles the attributes of a BEAM file.
939%%-----------------------------------------------------------------------
940
941beam_disasm_attributes(none) -> none;
942beam_disasm_attributes(AttrBin) -> binary_to_term(AttrBin).
943
944%%-----------------------------------------------------------------------
945%% Disassembles the compilation information of a BEAM file.
946%%-----------------------------------------------------------------------
947
948beam_disasm_compilation_info(none) -> none;
949beam_disasm_compilation_info(Bin) ->  binary_to_term(Bin).
950
951%%-----------------------------------------------------------------------
952%% Private Utilities
953%%-----------------------------------------------------------------------
954
955%%-----------------------------------------------------------------------
956
957lookup_key(Key,[{Key,Val}|_]) ->
958    Val;
959lookup_key(Key,[_|KVs]) ->
960    lookup_key(Key,KVs);
961lookup_key(Key,[]) ->
962    ?exit({lookup_key,{key_not_found,Key}}).
963
964%%-----------------------------------------------------------------------
965