1%% -*- erlang-indent-level: 2 -*-
2%% Licensed under the Apache License, Version 2.0 (the "License");
3%% you may not use this file except in compliance with the License.
4%% You may obtain a copy of the License at
5%%
6%%     http://www.apache.org/licenses/LICENSE-2.0
7%%
8%% Unless required by applicable law or agreed to in writing, software
9%% distributed under the License is distributed on an "AS IS" BASIS,
10%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11%% See the License for the specific language governing permissions and
12%% limitations under the License.
13%% =======================================================================
14%%  Filename : 	hipe_unified_loader.erl
15%%  Module   :	hipe_unified_loader
16%%  Purpose  :  To load code into memory and link it to the system.
17%%  Notes    :  See hipe_ext_format.hrl for description of the external
18%%              format.
19%% =======================================================================
20%% TODO:
21%%   Problems with the order in which things are done.
22%%   export_funs should atomically patch references to make fe and
23%%   make beam stubs. !!
24%%
25%%   Each function should have two proper databases.
26%%   Describe the patch algorithm:
27%%     For each function MFA that is (re)compiled to Address:
28%%     1.  For the old MFA
29%%         a. RefsTo = MFA->refers_to
30%%         b. for each {F,Adr} in RefsTo: remove Adr from F->is_referred
31%%         c. RefsFrom = MFA->is_referred
32%%         d. For each {Adr,Type} in RefsFrom:
33%%                update instr at Adr to refer to Address instead.
34%%     2.  For the new MFA
35%%         a. MFA->is_referred=RefsFrom
36%%     3.  For each function F referenced in the code at Offset:
37%%                 add {Address+Offset,Type} to F->is_referred
38%%                 add {F,Address+Offset} to MFA->refers_to
39%%     4.  Make Address the entrypoint for MFA
40%%
41%%   Add exporting of exported constants.
42%%   Add freeing of old code.
43%%   Inline hipe_sparc_ext_format somehow.
44%% =======================================================================
45
46-module(hipe_unified_loader).
47
48-compile(no_native).
49% 'no_native' is a workaround to avoid "The code server called unloaded module"
50% caused by Mod:module_info(exports) in patch_to_emu_step1() called by post_beam_load.
51% Reproducable with hipelibs and asn1_SUITE.
52% I think the real solution would be to let BIF erlang:load_module/2 redirect all
53% hipe calls to the module and thereby remove post_beam_load.
54
55% SVERK: Can we remove -compile(no_native) now when post_beam_load is gone?
56
57-export([chunk_name/1,
58	 %% Only the code and code_server modules may call the entries below!
59	 load_native_code/3,
60	 load_module/4,
61	 load/3]).
62
63%%-define(DEBUG,true).
64-define(DO_ASSERT,true).
65-define(HIPE_LOGGING,true).
66
67-include("../../hipe/main/hipe.hrl").
68-include("hipe_ext_format.hrl").
69
70%% Currently, there is no need to expose these to the outside world.
71-define(HS8P_TAG,"HS8P").
72-define(HPPC_TAG,"HPPC").
73-define(HP64_TAG,"HP64").
74-define(HARM_TAG,"HARM").
75-define(HX86_TAG,"HX86").
76-define(HA64_TAG,"HA64").
77
78%%========================================================================
79
80-spec chunk_name(hipe_architecture()) -> string().
81%% @doc
82%%    Returns the native code chunk name of the Architecture.
83%%    (On which presumably we are running.)
84
85chunk_name(Architecture) ->
86  case Architecture of
87    amd64 ->      ?HA64_TAG; %% HiPE, x86_64, (implicit: 64-bit, Unix)
88    arm ->	  ?HARM_TAG; %% HiPE, arm, v5 (implicit: 32-bit, Linux)
89    powerpc ->    ?HPPC_TAG; %% HiPE, PowerPC (implicit: 32-bit, Linux)
90    ppc64 ->	  ?HP64_TAG; %% HiPE, ppc64 (implicit: 64-bit, Linux)
91    ultrasparc -> ?HS8P_TAG; %% HiPE, SPARC, V8+ (implicit: 32-bit)
92    x86 ->        ?HX86_TAG  %% HiPE, x86, (implicit: Unix)
93    %% Future:     HSV9      %% HiPE, SPARC, V9 (implicit: 64-bit)
94    %%             HW32      %% HiPE, x86, Win32
95  end.
96
97word_size(Architecture) ->
98  case Architecture of
99    amd64 -> 8;
100    ppc64 -> 8;
101    _ -> 4
102  end.
103
104%%========================================================================
105
106-spec load_native_code(Mod, binary(), hipe_architecture()) ->
107                          'no_native' | {'module', Mod} when Mod :: atom().
108%% @doc
109%%    Loads the native code of a module Mod.
110%%    Returns {module,Mod} on success (for compatibility with
111%%    code:load_file/1) and the atom `no_native' on failure.
112
113load_native_code(_Mod, _Bin, undefined) ->
114  no_native;
115load_native_code(Mod, Bin, Architecture) when is_atom(Mod), is_binary(Bin) ->
116  case code:get_chunk(Bin, chunk_name(Architecture)) of
117    undefined -> no_native;
118    NativeCode when is_binary(NativeCode) ->
119      erlang:system_flag(multi_scheduling, block_normal),
120      try
121        put(hipe_patch_closures, false),
122        case load_common(Mod, NativeCode, Bin, Architecture) of
123          bad_crc -> no_native;
124          Result -> Result
125        end
126      after
127        erlang:system_flag(multi_scheduling, unblock_normal)
128      end
129  end.
130
131%%========================================================================
132
133version_check(Version, Mod) when is_atom(Mod) ->
134  Ver = ?VERSION_STRING(),
135  case Version < Ver of
136    true ->
137      ?msg("WARNING: Module ~w was compiled with HiPE version ~s\n",
138	   [Mod, Version]);
139    _ -> ok
140  end.
141
142%%========================================================================
143
144-spec load_module(Mod, binary(), _, hipe_architecture()) ->
145                     'bad_crc' | {'module', Mod} when Mod :: atom().
146
147load_module(Mod, Bin, Beam, Architecture) ->
148  erlang:system_flag(multi_scheduling, block_normal),
149  try
150    put(hipe_patch_closures, false),
151    load_common(Mod, Bin, Beam, Architecture)
152  after
153    erlang:system_flag(multi_scheduling, unblock_normal)
154  end.
155
156
157%%========================================================================
158
159-spec load(Mod, binary(), hipe_architecture()) ->
160              'bad_crc' | {'module', Mod} when Mod :: atom().
161
162load(Mod, Bin, Architecture) ->
163  erlang:system_flag(multi_scheduling, block_normal),
164  try
165    ?debug_msg("********* Loading funs in module ~w *********\n",[Mod]),
166    %% Loading just some functions in a module; patch closures separately.
167    put(hipe_patch_closures, true),
168    load_common(Mod, Bin, [], Architecture)
169  after
170    erlang:system_flag(multi_scheduling, unblock_normal)
171  end.
172
173%%------------------------------------------------------------------------
174
175load_common(Mod, Bin, Beam, Architecture) ->
176  %% Unpack the binary.
177  [{Version, CheckSum},
178   ConstAlign, ConstSize, ConstMap, LabelMap, ExportMap,
179   CodeSize,  CodeBinary,  Refs,
180   0,[] % ColdSize, CRrefs
181  ] = binary_to_term(Bin),
182  MD5 = erlang:md5(Bin), % use md5 of actual running code for module_info
183  ?debug_msg("***** ErLLVM *****~nVersion: ~s~nCheckSum: ~w~nConstAlign: ~w~n" ++
184    "ConstSize: ~w~nConstMap: ~w~nLabelMap: ~w~nExportMap ~w~nRefs ~w~n",
185    [Version, CheckSum, ConstAlign, ConstSize, ConstMap, LabelMap, ExportMap,
186      Refs]),
187  %% Write HiPE binary code to a file in the current directory in order to
188  %% debug by disassembling.
189  %% file:write_file("erl.o", CodeBinary, [binary]),
190  %% Check that we are loading up-to-date code.
191  version_check(Version, Mod),
192  case hipe_bifs:check_crc(CheckSum) of
193    false ->
194      ?msg("Warning: not loading native code for module ~w: "
195	   "it was compiled for an incompatible runtime system; "
196	   "please regenerate native code for this runtime system\n", [Mod]),
197      bad_crc;
198    true ->
199      put(closures_to_patch, []),
200      WordSize = word_size(Architecture),
201      WriteWord = write_word_fun(WordSize),
202      LoaderState = hipe_bifs:alloc_loader_state(Mod),
203      put(hipe_loader_state, LoaderState),
204      %% Create data segment
205      {ConstAddr,ConstMap2} =
206	create_data_segment(ConstAlign, ConstSize, ConstMap, WriteWord,
207			    LoaderState),
208      %% Find callees for which we may need trampolines.
209      CalleeMFAs = find_callee_mfas(Refs, Architecture),
210      %% Write the code to memory.
211      {CodeAddress,Trampolines} =
212	enter_code(CodeSize, CodeBinary, CalleeMFAs, LoaderState),
213      %% Construct CalleeMFA-to-trampoline mapping.
214      TrampolineMap = mk_trampoline_map(CalleeMFAs, Trampolines,
215                                        Architecture),
216      %% Patch references to code labels in data seg.
217      ok = patch_consts(LabelMap, ConstAddr, CodeAddress, WriteWord),
218
219      %% Find out which functions are being loaded (and where).
220      %% Note: FunDefs are sorted descending address order.
221      FunDefs = exports(ExportMap, CodeAddress),
222
223      %% Patch all dynamic references in the code.
224      %%  Function calls, Atoms, Constants, System calls
225
226      ok = patch(Refs, CodeAddress, ConstMap2, FunDefs, TrampolineMap),
227
228      %% Tell the system where the loaded funs are.
229      %%  (patches the BEAM code to redirect to native.)
230      case Beam of
231	[] ->
232	  %% This module was previously loaded as BEAM code during system
233	  %% start-up before the code server has started (-enable-native-libs
234	  %% is active), so we must now patch the pre-existing entries in the
235	  %% fun table with the native code addresses for all closures.
236	  lists:foreach(fun({FE, DestAddress}) ->
237			    hipe_bifs:set_native_address_in_fe(FE, DestAddress)
238			end, erase(closures_to_patch)),
239	  set_beam_call_traps(FunDefs),
240	  export_funs(FunDefs),
241          ok = hipe_bifs:commit_patch_load(LoaderState),
242          ok;
243	BeamBinary when is_binary(BeamBinary) ->
244	  %% Find all closures in the code.
245	  [] = erase(closures_to_patch),	%Clean up, assertion.
246	  ClosurePatches = find_closure_patches(Refs),
247	  AddressesOfClosuresToPatch =
248	    calculate_addresses(ClosurePatches, CodeAddress, FunDefs),
249	  export_funs(FunDefs),
250	  make_beam_stub(Mod, LoaderState, MD5, BeamBinary, FunDefs,
251                         AddressesOfClosuresToPatch)
252      end,
253
254      %% Final clean up.
255      _ = erase(hipe_loader_state),
256      _ = erase(hipe_patch_closures),
257      _ = erase(hipe_assert_code_area),
258      ?debug_msg("****************Loader Finished****************\n", []),
259      {module,Mod}  % for compatibility with code:load_file/1
260  end.
261
262%%----------------------------------------------------------------
263%% Scan the list of patches and build a set (returned as a tuple)
264%% of the callees for which we may need trampolines.
265%%
266find_callee_mfas(Patches, Architecture) when is_list(Patches) ->
267  case needs_trampolines(Architecture) of
268    true -> find_callee_mfas(Patches, gb_sets:empty(),
269                             no_erts_trampolines(Architecture));
270    _ -> []
271  end.
272
273needs_trampolines(Architecture) ->
274  case Architecture of
275    arm -> true;
276    powerpc -> true;
277    ppc64 -> true;
278    amd64 -> true;
279    _ -> false
280  end.
281
282no_erts_trampolines(Architecture) ->
283  case Architecture of
284    powerpc -> true;
285    ppc64 -> true;
286    _ -> false
287  end.
288
289find_callee_mfas([{Type,Data}|Patches], MFAs, SkipErtsSyms) ->
290  NewMFAs =
291    case ?EXT2PATCH_TYPE(Type) of
292      call_local -> add_callee_mfas(Data, MFAs, SkipErtsSyms);
293      call_remote -> add_callee_mfas(Data, MFAs, SkipErtsSyms);
294      %% load_address(function) deliberately ignored
295      _ -> MFAs
296    end,
297  find_callee_mfas(Patches, NewMFAs, SkipErtsSyms);
298find_callee_mfas([], MFAs, _SkipErtsSyms) ->
299  list_to_tuple(gb_sets:to_list(MFAs)).
300
301add_callee_mfas([{DestMFA,_Offsets}|Refs], MFAs, SkipErtsSyms) ->
302  NewMFAs =
303    case SkipErtsSyms of
304      true ->
305	%% On PowerPC we put the runtime system below the
306	%% 32M boundary, which allows BIFs and primops to
307	%% be called with ba/bla instructions. Hence we do
308	%% not need trampolines for BIFs or primops.
309	case bif_address(DestMFA) of
310	  false -> gb_sets:add_element(DestMFA, MFAs);
311	  BifAddress when is_integer(BifAddress) -> MFAs
312	end;
313      false ->
314	%% On ARM we also need trampolines for BIFs and primops.
315	gb_sets:add_element(DestMFA, MFAs)
316    end,
317  add_callee_mfas(Refs, NewMFAs, SkipErtsSyms);
318add_callee_mfas([], MFAs, _SkipErtsSyms) -> MFAs.
319
320%%----------------------------------------------------------------
321%%
322mk_trampoline_map([], [], _) -> []; % archs not using trampolines
323mk_trampoline_map(CalleeMFAs, Trampolines, Architecture) ->
324  SizeofLong = word_size(Architecture),
325  mk_trampoline_map(tuple_size(CalleeMFAs), CalleeMFAs,
326		    Trampolines, SizeofLong, gb_trees:empty()).
327
328mk_trampoline_map(I, CalleeMFAs, Trampolines, SizeofLong, Map) when I >= 1 ->
329  MFA = element(I, CalleeMFAs),
330  %% Trampoline = element(I, Trampolines),
331  Skip = (I-1)*SizeofLong,
332  <<_:Skip/binary-unit:8,
333    Trampoline:SizeofLong/integer-unsigned-native-unit:8,
334    _/binary>> = Trampolines,
335  NewMap = gb_trees:insert(MFA, Trampoline, Map),
336  mk_trampoline_map(I-1, CalleeMFAs, Trampolines, SizeofLong, NewMap);
337mk_trampoline_map(0, _, _, _, Map) -> Map.
338
339%%----------------------------------------------------------------
340%%
341trampoline_map_get(_, []) -> []; % archs not using trampolines
342trampoline_map_get(MFA, Map) -> gb_trees:get(MFA, Map).
343
344trampoline_map_lookup(_, []) -> []; % archs not using trampolines
345trampoline_map_lookup(Primop, Map) ->
346  case gb_trees:lookup(Primop, Map) of
347    {value, X} -> X;
348    _ -> []
349  end.
350
351%%------------------------------------------------------------------------
352
353-record(fundef, {address     :: integer(),
354		 mfa         :: mfa(),
355		 is_closure  :: boolean(),
356		 is_exported :: boolean()}).
357
358exports(ExportMap, BaseAddress) ->
359  exports(ExportMap, BaseAddress, []).
360
361exports([Offset,M,F,A,IsClosure,IsExported|Rest], BaseAddress, FunDefs) ->
362  case IsExported andalso erlang:is_builtin(M, F, A) of
363    true ->
364      exports(Rest, BaseAddress, FunDefs);
365    _false ->
366      MFA = {M,F,A},
367      Address = BaseAddress + Offset,
368      FunDef = #fundef{address=Address, mfa=MFA, is_closure=IsClosure,
369		       is_exported=IsExported},
370      exports(Rest, BaseAddress, [FunDef|FunDefs])
371  end;
372exports([], _, FunDefs) ->
373  FunDefs.
374
375mod({M,_F,_A}) -> M.
376
377%%------------------------------------------------------------------------
378
379calculate_addresses(PatchOffsets, Base, FunDefs) ->
380  RemoteOrLocal = local, % closure code refs are local
381  [{Data,
382    offsets_to_addresses(Offsets, Base),
383    get_native_address(DestMFA, FunDefs, RemoteOrLocal)} ||
384    {{DestMFA,_,_}=Data,Offsets} <- PatchOffsets].
385
386offsets_to_addresses(Os, Base) ->
387  [{O+Base,load_fe} || O <- Os].
388
389%%------------------------------------------------------------------------
390
391find_closure_patches([{Type,Refs} | Rest]) ->
392  case ?EXT2PATCH_TYPE(Type) of
393    load_address ->
394      find_closure_refs(Refs, Rest);
395    _ ->
396      find_closure_patches(Rest)
397  end;
398find_closure_patches([]) -> [].
399
400find_closure_refs([{Dest,Offsets} | Rest], Refs) ->
401  case Dest of
402    {closure,Data} ->
403      [{Data,Offsets}|find_closure_refs(Rest,Refs)];
404    _ ->
405      find_closure_refs(Rest,Refs)
406  end;
407find_closure_refs([], Refs) ->
408  find_closure_patches(Refs).
409
410%%------------------------------------------------------------------------
411
412set_beam_call_traps([FunDef | FunDefs]) ->
413  #fundef{address=Address, mfa=MFA, is_closure=IsClosure,
414	  is_exported=_IsExported} = FunDef,
415  ?IF_DEBUG({M,F,A} = MFA, no_debug),
416  ?IF_DEBUG(
417     case IsClosure of
418       false ->
419	 ?debug_msg("LINKING: ~w:~w/~w to (0x~.16b)\n",
420		    [M,F,A, Address]);
421       true ->
422	 ?debug_msg("LINKING: ~w:~w/~w to closure (0x~.16b)\n",
423		    [M,F,A, Address])
424     end, no_debug),
425  hipe_bifs:set_native_address(MFA, Address, IsClosure),
426  set_beam_call_traps(FunDefs);
427set_beam_call_traps([]) ->
428  ok.
429
430export_funs([FunDef | FunDefs]) ->
431  #fundef{address=Address, mfa=MFA, is_closure=_IsClosure,
432	  is_exported=IsExported} = FunDef,
433  ?IF_DEBUG({M,F,A} = MFA, no_debug),
434  ?IF_DEBUG(
435     case _IsClosure of
436       false ->
437	 ?debug_msg("LINKING: ~w:~w/~w to (0x~.16b)\n",
438		    [M,F,A, Address]);
439       true ->
440	 ?debug_msg("LINKING: ~w:~w/~w to closure (0x~.16b)\n",
441		    [M,F,A, Address])
442     end, no_debug),
443  hipe_bifs:set_funinfo_native_address(MFA, Address, IsExported),
444  export_funs(FunDefs);
445export_funs([]) ->
446  ok.
447
448make_beam_stub(Mod, LoaderState, MD5, Beam, FunDefs, ClosuresToPatch) ->
449  Fs = [{F,A,Address} || #fundef{address=Address, mfa={_M,F,A}} <- FunDefs],
450  Mod = code:make_stub_module(LoaderState, Beam, {Fs,ClosuresToPatch,MD5}),
451  ok.
452
453%%========================================================================
454%% Patching
455%%  @spec patch(refs(), BaseAddress::integer(), ConstAndZone::term(),
456%%              FunDefs::term(), TrampolineMap::term()) -> 'ok'
457%%   @type refs()=[{RefType::integer(), Reflist::reflist()} | refs()]
458%%
459%%   @type reflist()=   [{Data::term(), Offsets::offests()}|reflist()]
460%%   @type offsets()=   [Offset::integer() | offsets()]
461%% @doc
462%%  The patchlist is a list of lists of patches of a type.
463%%  For each type the list of references is sorted so that several
464%%  references to the same type of data come after each other
465%%  (we use this to look up the address of a referred function only once).
466%%
467
468patch([{Type,SortedRefs}|Rest], CodeAddress, ConstMap2, FunDefs, TrampolineMap) ->
469  ?debug_msg("Patching ~w at [~w+offset] with ~w\n",
470	     [Type,CodeAddress,SortedRefs]),
471  case ?EXT2PATCH_TYPE(Type) of
472    call_local ->
473      patch_call(SortedRefs, CodeAddress, FunDefs, 'local', TrampolineMap);
474    call_remote ->
475      patch_call(SortedRefs, CodeAddress, FunDefs, 'remote', TrampolineMap);
476    Other ->
477      patch_all(Other, SortedRefs, CodeAddress, {ConstMap2,CodeAddress}, FunDefs)
478  end,
479  patch(Rest, CodeAddress, ConstMap2, FunDefs, TrampolineMap);
480patch([], _, _, _, _) -> ok.
481
482%%----------------------------------------------------------------
483%% Handle a 'call_local' or 'call_remote' patch.
484%%
485patch_call([{DestMFA,Offsets}|SortedRefs], BaseAddress, FunDefs, RemoteOrLocal, TrampolineMap) ->
486  case bif_address(DestMFA) of
487    false ->
488      %% Previous code used mfa_to_address(DestMFA, FunDefs)
489      %% here for local calls. That is wrong because even local
490      %% destinations may not be present in FunDefs: they may
491      %% not have been compiled yet, or they may be BEAM-only
492      %% functions (e.g. module_info).
493      DestAddress = get_native_address(DestMFA, FunDefs, RemoteOrLocal),
494      Trampoline = trampoline_map_get(DestMFA, TrampolineMap),
495      patch_mfa_call_list(Offsets, BaseAddress, DestMFA, DestAddress, FunDefs, RemoteOrLocal, Trampoline);
496    BifAddress when is_integer(BifAddress) ->
497      Trampoline = trampoline_map_lookup(DestMFA, TrampolineMap),
498      patch_bif_call_list(Offsets, BaseAddress, BifAddress, Trampoline)
499  end,
500  patch_call(SortedRefs, BaseAddress, FunDefs, RemoteOrLocal, TrampolineMap);
501patch_call([], _, _, _, _) ->
502  ok.
503
504patch_bif_call_list([Offset|Offsets], BaseAddress, BifAddress, Trampoline) ->
505  CallAddress = BaseAddress+Offset,
506  ?ASSERT(assert_local_patch(CallAddress)),
507  patch_call_insn(CallAddress, BifAddress, Trampoline),
508  patch_bif_call_list(Offsets, BaseAddress, BifAddress, Trampoline);
509patch_bif_call_list([], _, _, _) -> ok.
510
511patch_mfa_call_list([Offset|Offsets], BaseAddress, DestMFA, DestAddress, FunDefs, RemoteOrLocal, Trampoline) ->
512  CallAddress = BaseAddress+Offset,
513  add_ref(DestMFA, CallAddress, FunDefs, 'call', Trampoline, RemoteOrLocal),
514  ?ASSERT(assert_local_patch(CallAddress)),
515  patch_call_insn(CallAddress, DestAddress, Trampoline),
516  patch_mfa_call_list(Offsets, BaseAddress, DestMFA, DestAddress, FunDefs, RemoteOrLocal, Trampoline);
517patch_mfa_call_list([], _, _, _, _, _, _) -> ok.
518
519patch_call_insn(CallAddress, DestAddress, Trampoline) ->
520  ?ASSERT(assert_local_patch(CallAddress)),
521  hipe_bifs:patch_call(CallAddress, DestAddress, Trampoline).
522
523%% ____________________________________________________________________
524%%
525
526patch_all(Type, [{Dest,Offsets}|Rest], BaseAddress, ConstAndZone, FunDefs)->
527  patch_all_offsets(Type, Dest, Offsets, BaseAddress, ConstAndZone, FunDefs),
528  patch_all(Type, Rest, BaseAddress, ConstAndZone, FunDefs);
529patch_all(_, [], _, _, _) -> ok.
530
531patch_all_offsets(Type, Data, [Offset|Offsets], BaseAddress,
532		  ConstAndZone, FunDefs) ->
533  ?debug_msg("Patching ~w at [~w+~w] with ~w\n",
534	     [Type,BaseAddress,Offset, Data]),
535  Address = BaseAddress + Offset,
536  patch_offset(Type, Data, Address, ConstAndZone, FunDefs),
537  ?debug_msg("Patching done\n",[]),
538  patch_all_offsets(Type, Data, Offsets, BaseAddress, ConstAndZone, FunDefs);
539patch_all_offsets(_, _, [], _, _, _) -> ok.
540
541%%----------------------------------------------------------------
542%% Handle any patch type except 'call_local' or 'call_remote'.
543%%
544patch_offset(Type, Data, Address, ConstAndZone, FunDefs) ->
545  case Type of
546    load_address ->
547      patch_load_address(Data, Address, ConstAndZone, FunDefs);
548    load_atom ->
549      Atom = Data,
550      patch_atom(Address, Atom);
551    sdesc ->
552      patch_sdesc(Data, Address, ConstAndZone, FunDefs);
553    x86_abs_pcrel ->
554      patch_instr(Address, Data, x86_abs_pcrel)
555    %% _ ->
556    %%   ?error_msg("Unknown ref ~w ~w ~w\n", [Type, Address, Data]),
557    %%   exit({unknown_reference, Type, Address, Data})
558  end.
559
560patch_atom(Address, Atom) ->
561  ?ASSERT(assert_local_patch(Address)),
562  patch_instr(Address, hipe_bifs:atom_to_word(Atom), atom).
563
564patch_sdesc(?STACK_DESC(SymExnRA, FSize, Arity, Live),
565	    Address, {_ConstMap2,CodeAddress}, FunDefs) ->
566  ExnRA =
567    case SymExnRA of
568      [] -> 0; % No catch
569      LabelOffset -> CodeAddress + LabelOffset
570    end,
571  ?ASSERT(assert_local_patch(Address)),
572  MFA = address_to_mfa_lth(Address, FunDefs),
573  hipe_bifs:enter_sdesc({Address, ExnRA, FSize, Arity, Live, MFA},
574		       get(hipe_loader_state)).
575
576
577%%----------------------------------------------------------------
578%% Handle a 'load_address'-type patch.
579%%
580patch_load_address(Data, Address, ConstAndZone, FunDefs) ->
581  case Data of
582    {local_function,DestMFA} ->
583      patch_load_mfa(Address, DestMFA, FunDefs, 'local');
584    {remote_function,DestMFA} ->
585      patch_load_mfa(Address, DestMFA, FunDefs, 'remote');
586    {constant,Name} ->
587      {ConstMap2,_CodeAddress} = ConstAndZone,
588      ConstAddress = find_const(Name, ConstMap2),
589      patch_instr(Address, ConstAddress, constant);
590    {closure,{DestMFA,Uniq,Index}} ->
591      patch_closure(DestMFA, Uniq, Index, Address, FunDefs);
592    {c_const,CConst} ->
593      patch_instr(Address, bif_address(CConst), c_const)
594  end.
595
596patch_closure(DestMFA, Uniq, Index, Address, FunDefs) ->
597  case get(hipe_patch_closures) of
598    false ->
599      []; % This is taken care of when registering the module.
600    true ->
601      %% We are replacing a previosly loaded BEAM module with native code,
602      %% so we must reference the pre-existing entries in the fun table
603      %% from the native code. We must delay actually patching the native
604      %% address into the fun entry to ensure that the native code cannot
605      %% be called until it has been completely fixed up.
606      RemoteOrLocal = local, % closure code refs are local
607      DestAddress = get_native_address(DestMFA, FunDefs, RemoteOrLocal),
608      BEAMAddress = hipe_bifs:fun_to_address(DestMFA),
609      FE = hipe_bifs:get_fe(mod(DestMFA), {Uniq, Index, BEAMAddress}),
610      put(closures_to_patch, [{FE,DestAddress}|get(closures_to_patch)]),
611      ?debug_msg("Patch FE(~w) to 0x~.16b->0x~.16b (emu:0x~.16b)\n",
612		 [DestMFA, FE, DestAddress, BEAMAddress]),
613      ?ASSERT(assert_local_patch(Address)),
614      patch_instr(Address, FE, closure)
615  end.
616
617%%----------------------------------------------------------------
618%% Patch an instruction loading the address of an MFA.
619%% RemoteOrLocal ::= 'remote' | 'local'
620%%
621patch_load_mfa(CodeAddress, DestMFA, FunDefs, RemoteOrLocal) ->
622  ?ASSERT(assert_local_patch(CodeAddress)),
623  DestAddress =
624    case bif_address(DestMFA) of
625      false ->
626	NativeAddress = get_native_address(DestMFA, FunDefs, RemoteOrLocal),
627	add_ref(DestMFA, CodeAddress, FunDefs, 'load_mfa', [], RemoteOrLocal),
628	NativeAddress;
629      BifAddress when is_integer(BifAddress) ->
630	BifAddress
631    end,
632  patch_instr(CodeAddress, DestAddress, 'load_mfa').
633
634%%----------------------------------------------------------------
635%% Patch references to code labels in the data segment.
636%%
637patch_consts(Labels, DataAddress, CodeAddress, WriteWord) ->
638  lists:foreach(fun (L) ->
639		    patch_label_or_labels(L, DataAddress, CodeAddress,
640                                          WriteWord)
641		end, Labels).
642
643patch_label_or_labels({Pos,Offset}, DataAddress, CodeAddress, WriteWord) ->
644  ?ASSERT(assert_local_patch(CodeAddress+Offset)),
645  WriteWord(DataAddress+Pos, CodeAddress+Offset);
646patch_label_or_labels({sorted,Base,UnOrderdList}, DataAddress, CodeAddress,
647                      WriteWord) ->
648  sort_and_write(UnOrderdList, Base, DataAddress, CodeAddress, WriteWord).
649
650sort_and_write(UnOrderdList, Base, DataAddress, CodeAddress, WriteWord) ->
651  WriteAndInc =
652    fun ({_, Offset}, DataPos) ->
653	?ASSERT(assert_local_patch(CodeAddress+Offset)),
654	WriteWord(DataPos, CodeAddress+Offset)
655    end,
656  lists:foldl(WriteAndInc, DataAddress+Base, sort_on_representation(UnOrderdList)).
657
658sort_on_representation(List) ->
659  lists:sort([{hipe_bifs:term_to_word(Term), Offset} ||
660	       {Term, Offset} <- List]).
661
662%%--------------------------------------------------------------------
663%% Update an instruction to refer to a value of a given type.
664%%
665%% Type ::= 'call' | 'load_mfa' | 'x86_abs_pcrel' | 'atom'
666%%	  | 'constant' | 'c_const' | 'closure'
667%%
668%% Note: the values of this Type are hard-coded in file erl_bif_types.erl
669%%
670patch_instr(Address, Value, Type) ->
671  hipe_bifs:patch_insn(Address, Value, Type).
672
673%%--------------------------------------------------------------------
674%% Write a data word of the machine's natural word size.
675%% Returns the address of the next word.
676%%
677%% XXX: It appears this is used for inserting both code addresses
678%% and other data. In HiPE, code addresses are still 32-bit on
679%% some 64-bit machines.
680write_word_fun(WordSize) ->
681  case WordSize of
682    8 ->
683      fun (DataAddress, DataWord) ->
684          hipe_bifs:write_u64(DataAddress, DataWord),
685          DataAddress+8
686      end;
687    4 ->
688      fun (DataAddress, DataWord) ->
689          hipe_bifs:write_u32(DataAddress, DataWord),
690          DataAddress+4
691      end
692  end.
693
694%%--------------------------------------------------------------------
695
696bif_address({M,F,A}) ->
697  hipe_bifs:bif_address(M,F,A);
698bif_address(Name) when is_atom(Name) ->
699  hipe_bifs:primop_address(Name).
700
701%%--------------------------------------------------------------------
702%% create_data_segment/3 takes an object file ConstMap, as produced by
703%% hipe_pack_constants:slim_constmap/1, loads the constants into
704%% memory, and produces a ConstMap2 mapping each constant's ConstNo to
705%% its runtime address, tagged if the constant is a term.
706%%
707create_data_segment(DataAlign, DataSize, DataList, WriteWord, LoaderState) ->
708  %%io:format("create_data_segment: \nDataAlign: ~p\nDataSize: ~p\nDataList: ~p\n",[DataAlign,DataSize,DataList]),
709  DataAddress = hipe_bifs:alloc_data(DataAlign, DataSize, LoaderState),
710  enter_data(DataList, [], DataAddress, DataSize, WriteWord).
711
712enter_data(List, ConstMap2, DataAddress, DataSize, WriteWord) ->
713  case List of
714    [ConstNo,Offset,Type,Data|Rest] when is_integer(Offset) ->
715      %%?msg("Const ~w\n",[[ConstNo,Offset,Type,Data]]),
716      ?ASSERT((Offset >= 0) and (Offset =< DataSize)),
717      Res = enter_datum(Type, Data, DataAddress+Offset, WriteWord),
718      enter_data(Rest, [{ConstNo,Res}|ConstMap2], DataAddress, DataSize,
719                 WriteWord);
720    [] ->
721      {DataAddress, ConstMap2}
722  end.
723
724enter_datum(Type, Data, Address, WriteWord) ->
725  case ?EXT2CONST_TYPE(Type) of
726    term ->
727      %% Address is unused for terms
728      hipe_bifs:term_to_word(hipe_bifs:merge_term(Data));
729    sorted_block ->
730      L = lists:sort([hipe_bifs:term_to_word(Term) || Term <- Data]),
731      write_words(L, Address, WriteWord),
732      Address;
733    block ->
734      case Data of
735	{Lbls, []} ->
736	  write_bytes(Lbls, Address);
737	{Lbls, SortOrder} ->
738	  SortedLbls = [Lbl || {_,Lbl} <- lists:sort(group(Lbls, SortOrder))],
739	  write_words(SortedLbls, Address, WriteWord);
740	Lbls ->
741	  write_bytes(Lbls, Address)
742      end,
743      Address
744  end.
745
746group([], []) ->
747  [];
748group([B1,B2,B3,B4|Ls], [O|Os]) ->
749  [{hipe_bifs:term_to_word(O),bytes_to_32(B4,B3,B2,B1)}|group(Ls,Os)].
750
751bytes_to_32(B4,B3,B2,B1) ->
752  (B4 bsl 24) bor (B3 bsl 16) bor (B2 bsl 8) bor B1.
753
754write_words([W|Rest], Addr, WriteWord) ->
755  write_words(Rest, WriteWord(Addr, W), WriteWord);
756write_words([], Addr, _) when is_integer(Addr) -> true.
757
758write_bytes([B|Rest], Addr) ->
759  hipe_bifs:write_u8(Addr, B),
760  write_bytes(Rest, Addr+1);
761write_bytes([], Addr) when is_integer(Addr) -> true.
762
763%%% lists:keysearch/3 conses a useless wrapper around the found tuple :-(
764%%% otherwise it would have been a good replacement for this loop
765find_const(ConstNo, [{ConstNo,Addr}|_ConstMap2]) ->
766  Addr;
767find_const(ConstNo, [_|ConstMap2]) ->
768  find_const(ConstNo, ConstMap2);
769find_const(ConstNo, []) ->
770  ?error_msg("Constant not found ~w\n",[ConstNo]),
771  exit({constant_not_found,ConstNo}).
772
773
774%%----------------------------------------------------------------
775%% Record that the code at address 'Address' has a reference
776%% of type 'RefType' ('call' or 'load_mfa') to 'CalleeMFA'.
777%% 'FunDefs' must be an address-descending list from exports/2.
778%%
779%% If 'RefType' is 'call', then 'Trampoline' may be the address
780%% of a stub branching to 'CalleeMFA', where the stub is reachable
781%% from 'Address' via a normal call or tailcall instruction.
782%%
783%% RemoteOrLocal ::= 'remote' | 'local'.
784%%
785
786add_ref(CalleeMFA, Address, FunDefs, RefType, Trampoline, RemoteOrLocal) ->
787  CallerMFA = address_to_mfa_lth(Address, FunDefs),
788  case RemoteOrLocal of
789    local ->
790      %% assert that the callee and caller are from the same module
791      {M,_,_} = CalleeMFA,
792      {M,_,_} = CallerMFA,
793      ok;
794    remote ->
795      hipe_bifs:add_ref(CalleeMFA, {CallerMFA,Address,RefType,Trampoline,
796				    get(hipe_loader_state)})
797  end.
798
799%% For FunDefs sorted from low to high addresses
800address_to_mfa_lth(Address, FunDefs) ->
801  case address_to_mfa_lth(Address, FunDefs, false) of
802    false ->
803      ?error_msg("Local adddress not found ~w\n",[Address]),
804      exit({?MODULE, local_address_not_found});
805    MFA ->
806      MFA
807  end.
808
809address_to_mfa_lth(Address, [#fundef{address=Adr, mfa=MFA}|Rest], Prev) ->
810  if Address < Adr ->
811	  Prev;
812     true ->
813	  address_to_mfa_lth(Address, Rest, MFA)
814  end;
815address_to_mfa_lth(_Address, [], Prev) ->
816    Prev.
817
818%% For FunDefs sorted from high to low addresses
819%% address_to_mfa_htl(Address, [#fundef{address=Adr, mfa=MFA}|_Rest]) when Address >= Adr -> MFA;
820%% address_to_mfa_htl(Address, [_ | Rest]) -> address_to_mfa_htl(Address, Rest);
821%% address_to_mfa_htl(Address, []) ->
822%%   ?error_msg("Local adddress not found ~w\n",[Address]),
823%%   exit({?MODULE, local_address_not_found}).
824
825
826%%--------------------------------------------------------------------
827
828%% To find the native code of an MFA we need to look in 3 places:
829%%  1. If it is compiled now look in the FunDefs data structure.
830%%  2. Then look in native_addresses from module info.
831%%  3. Then (the function might have been singled compiled) look in
832%%      hipe_funinfo
833%%  If all else fails create a native stub for the MFA
834get_native_address(MFA, FunDefs, RemoteOrLocal) ->
835  case mfa_to_address(MFA, FunDefs, RemoteOrLocal) of
836    Adr when is_integer(Adr) -> Adr;
837    false ->
838	case RemoteOrLocal of
839	  remote ->
840	    hipe_bifs:find_na_or_make_stub(MFA);
841	  local ->
842	    ?error_msg("Local function ~p not found\n",[MFA]),
843	    exit({function_not_found,MFA})
844	end
845  end.
846
847mfa_to_address(MFA, [#fundef{address=Adr, mfa=MFA,
848			     is_exported=IsExported}|_Rest], RemoteOrLocal) ->
849  case RemoteOrLocal of
850    local ->
851      Adr;
852    remote ->
853      case IsExported of
854	true ->
855	  Adr;
856	false ->
857	  false
858      end
859  end;
860mfa_to_address(MFA, [_|Rest], RemoteOrLocal) ->
861  mfa_to_address(MFA, Rest, RemoteOrLocal);
862mfa_to_address(_, [], _) -> false.
863
864%% ____________________________________________________________________
865%%
866
867-ifdef(DO_ASSERT).
868
869-define(init_assert_patch(Base, Size), put(hipe_assert_code_area,{Base,Base+Size})).
870
871assert_local_patch(Address) when is_integer(Address) ->
872  {First,Last} = get(hipe_assert_code_area),
873  Address >= First andalso Address < (Last).
874
875-else.
876
877-define(init_assert_patch(Base, Size), ok).
878
879-endif.
880
881%% ____________________________________________________________________
882%%
883
884enter_code(CodeSize, CodeBinary, CalleeMFAs, LoaderState) ->
885  true = byte_size(CodeBinary) =:= CodeSize,
886  {CodeAddress,Trampolines} = hipe_bifs:enter_code(CodeBinary, CalleeMFAs,
887						   LoaderState),
888  ?init_assert_patch(CodeAddress, byte_size(CodeBinary)),
889  {CodeAddress,Trampolines}.
890
891