1%% -*- erlang-indent-level: 2 -*-
2%%
3%% Licensed under the Apache License, Version 2.0 (the "License");
4%% you may not use this file except in compliance with the License.
5%% You may obtain a copy of the License at
6%%
7%%     http://www.apache.org/licenses/LICENSE-2.0
8%%
9%% Unless required by applicable law or agreed to in writing, software
10%% distributed under the License is distributed on an "AS IS" BASIS,
11%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12%% See the License for the specific language governing permissions and
13%% limitations under the License.
14%%
15%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16%% Copyright (c) 2001 by Erik Johansson.  All Rights Reserved
17%% ====================================================================
18%%  Filename : 	hipe_rtl_mk_switch.erl
19%%  Module   :	hipe_rtl_mk_switch
20%%  Purpose  :  Implements switching on Erlang values.
21%%  Notes    :  Only fixnums are supported well,
22%%              atoms work with table search,
23%%              the inline search of atoms might have some bugs.
24%%              Should be extended to handle bignums and floats.
25%%
26%%  History  :	* 2001-02-28 Erik Johansson (happi@it.uu.se):
27%%                Created.
28%%              * 2001-04-01 Erik Trulsson (ertr1013@csd.uu.se):
29%%                           Stefan Lindström (stli3993@csd.uu.se):
30%%                Added clustering and inlined binary search trees.
31%%              * 2001-07-30 EJ (happi@it.uu.se):
32%%                Fixed some bugs and started cleanup.
33%% ====================================================================
34%%  Exports  :
35%%    gen_switch_val(I, VarMap, ConstTab, Options)
36%%    gen_switch_tuple(I, Map, ConstTab, Options)
37%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
38
39-module(hipe_rtl_mk_switch).
40
41-export([gen_switch_val/4, gen_switch_tuple/4]).
42
43%%-------------------------------------------------------------------------
44
45-include("../main/hipe.hrl").
46
47%%-------------------------------------------------------------------------
48
49-define(MINFORJUMPTABLE,9).
50	% Minimum number of integers needed to use something else than an inline search.
51-define(MINFORINTSEARCHTREE,65).  % Must be at least 3
52	% Minimum number of integer elements needed to use a non-inline binary search.
53
54-define(MININLINEATOMSEARCH,8).
55	% Minimum number of atoms needed to use an inline binary search instead
56	% of a fast linear search.
57
58-define(MINFORATOMSEARCHTREE,20).  % Must be at least 3
59	% Minimum number of atoms needed to use a non-inline binary search instead
60	% of a linear search.
61
62-define(MAXINLINEATOMSEARCH,64). % Must be at least 3
63	% The cutoff point between inlined and non-inlined binary search for atoms
64
65-define(WORDSIZE, hipe_rtl_arch:word_size()).
66-define(MINDENSITY, 0.5).
67        % Minimum density required to use a jumptable instead of a binary search.
68
69%% The reason why MINFORINTSEARCHTREE and MINFORATOMSEARCHTREE must be
70%% at least 3 is that the function tab/5 will enter an infinite loop
71%% and hang when faced with a switch of size 1 or 2.
72
73
74%% Options used by this module:
75%%
76%% [no_]use_indexing
77%%    Determines if any indexing be should be done at all. Turned on
78%%    by default at optimization level o2 and higher.
79%%
80%% [no_]use_clusters
81%%    Controls whether we attempt to divide sparse integer switches
82%%    into smaller dense clusters for which jumptables are practical.
83%%    Turned off by default since it can increase compilation time
84%%    considerably and most programs will gain little benefit from it.
85%%
86%% [no_]use_inline_atom_search
87%%    Controls whether we use an inline binary search for small number
88%%    of atoms. Turned off by default since this is currently only
89%%    supported on SPARC (and not on x86) and probably needs a bit
90%%    more testing before it can be turned on by default.
91
92gen_switch_val(I, VarMap, ConstTab, Options) ->
93  case proplists:get_bool(use_indexing, Options) of
94    false -> gen_slow_switch_val(I, VarMap, ConstTab, Options);
95    true -> gen_fast_switch_val(I, VarMap, ConstTab, Options)
96  end.
97
98gen_fast_switch_val(I, VarMap, ConstTab, Options) ->
99  {Arg, VarMap0} =
100    hipe_rtl_varmap:icode_var2rtl_var(hipe_icode:switch_val_term(I), VarMap),
101  IcodeFail = hipe_icode:switch_val_fail_label(I),
102  {Fail, VarMap1} = hipe_rtl_varmap:icode_label2rtl_label(IcodeFail, VarMap0),
103  %% Important that the list of cases is sorted when handling integers.
104  UnsortedCases = hipe_icode:switch_val_cases(I),
105  Cases = lists:sort(UnsortedCases),
106
107  check_duplicates(Cases),
108  %% This check is currently not really necessary.  The checking
109  %% happens at an earlier phase of the compilation.
110  {Types, InitCode} = split_types(Cases, Arg),
111  handle_types(Types, InitCode, VarMap1, ConstTab, Arg, {I, Fail, Options}).
112
113handle_types([{Type,Lbl,Cases}|Types], Code, VarMap, ConstTab, Arg, Info) ->
114  {Code1,VarMap1,ConstTab1} = gen_fast_switch_on(Type, Cases,
115						 VarMap,
116						 ConstTab, Arg, Info),
117  handle_types(Types, [Code,Lbl,Code1], VarMap1, ConstTab1, Arg, Info);
118handle_types([], Code, VarMap, ConstTab, _, _) ->
119  {Code, VarMap, ConstTab}.
120
121
122gen_fast_switch_on(integer, Cases, VarMap, ConstTab, Arg, {I, Fail, Options})  ->
123  {First,_} = hd(Cases),
124  Min = hipe_icode:const_value(First),
125  if length(Cases) < ?MINFORJUMPTABLE ->
126      gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options);
127     true ->
128      case proplists:get_bool(use_clusters, Options) of
129	false ->
130	  M = list_to_tuple(Cases),
131	  D = density(M, 1, tuple_size(M)),
132	  if
133	    D >= ?MINDENSITY ->
134	      gen_jump_table(Arg,Fail,hipe_icode:switch_val_fail_label(I),VarMap,ConstTab,Cases,Min);
135	    true ->
136	      gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options)
137	  end;
138	true ->
139	  MC = minclusters(Cases),
140	  Cl = cluster_split(Cases,MC),
141	  CM = cluster_merge(Cl),
142	  find_cluster(CM,VarMap,ConstTab,Options,Arg,Fail,hipe_icode:switch_val_fail_label(I))
143      end
144  end;
145gen_fast_switch_on(atom, Cases, VarMap, ConstTab, Arg, {_I, Fail, Options})  ->
146  case proplists:get_bool(use_inline_atom_search, Options) of
147    true ->
148      if
149	length(Cases) < ?MININLINEATOMSEARCH ->
150	  gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options);
151	length(Cases) > ?MAXINLINEATOMSEARCH ->
152	  gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options);
153	true ->
154	  gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options)
155      end;
156    false ->
157      if length(Cases) < ?MINFORATOMSEARCHTREE ->
158	  gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options);
159	 true ->
160	  gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options)
161      end
162  end;
163gen_fast_switch_on(_, _, VarMap, ConstTab, _, {I,_Fail,Options})  ->
164  %% We can only handle smart indexing of integers and atoms
165  %% TODO: Consider bignum
166  gen_slow_switch_val(I, VarMap, ConstTab, Options).
167
168
169%% Split different types into separate switches.
170split_types([Case|Cases], Arg) ->
171  Type1 = casetype(Case),
172  Types = split(Cases,Type1,[Case],[]),
173  switch_on_types(Types,[], [], Arg);
174split_types([],_) ->
175  %% Cant happen.
176  ?EXIT({empty_caselist}).
177
178switch_on_types([{Type,Cases}], AccCode, AccCases, _Arg) ->
179  Lbl = hipe_rtl:mk_new_label(),
180  I = hipe_rtl:mk_goto(hipe_rtl:label_name(Lbl)),
181  {[{Type,Lbl,lists:reverse(Cases)} | AccCases], lists:reverse([I|AccCode])};
182switch_on_types([{other,Cases} | Rest], AccCode, AccCases, Arg) ->
183  %% Make sure the general case is handled last.
184  switch_on_types(Rest ++ [{other,Cases}], AccCode, AccCases, Arg);
185switch_on_types([{Type,Cases} | Rest], AccCode, AccCases, Arg) ->
186  TLab = hipe_rtl:mk_new_label(),
187  FLab = hipe_rtl:mk_new_label(),
188  TestCode =
189    case Type of
190      integer ->
191	hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(TLab),
192				   hipe_rtl:label_name(FLab), 0.5);
193      atom ->
194	hipe_tagscheme:test_atom(Arg, hipe_rtl:label_name(TLab),
195				 hipe_rtl:label_name(FLab), 0.5);
196      bignum ->
197	hipe_tagscheme:test_bignum(Arg, hipe_rtl:label_name(TLab),
198				   hipe_rtl:label_name(FLab), 0.5);
199      _ -> ?EXIT({ooops, type_not_handled, Type})
200    end,
201  switch_on_types(Rest, [[TestCode,FLab] | AccCode],
202		  [{Type,TLab,lists:reverse(Cases)} | AccCases], Arg).
203
204split([Case|Cases], Type, Current, Rest) ->
205  case casetype(Case) of
206    Type ->
207      split(Cases, Type, [Case|Current],Rest);
208    Other ->
209      split(Cases, Other, [Case], [{Type,Current}|Rest])
210  end;
211split([], Type, Current, Rest) ->
212  [{Type, Current} | Rest].
213
214%% Determine what type an entry in the caselist has
215
216casetype({Const,_}) ->
217  casetype(hipe_icode:const_value(Const));
218casetype(A) ->
219  if
220    is_integer(A) ->
221      case hipe_tagscheme:is_fixnum(A) of
222	true -> integer;
223	false -> bignum
224      end;
225    is_float(A) -> float;
226    is_atom(A) -> atom;
227    true -> other
228  end.
229
230%% check that no duplicate values occur in the case list and also
231%% check that all case values have the same type.
232check_duplicates([]) -> true;
233check_duplicates([_]) -> true;
234check_duplicates([{Const1,_},{Const2,L2}|T]) ->
235  C1 = hipe_icode:const_value(Const1),
236  C2 = hipe_icode:const_value(Const2),
237  %%	T1 = casetype(C1),
238  %%	T2 = casetype(C2),
239  if C1 =/= C2 -> %% , T1 =:= T2 ->
240      check_duplicates([{Const2,L2}|T]);
241     true ->
242      ?EXIT({bad_values_in_switchval,C1})
243  end.
244
245%%
246%% Determine the optimal way to divide Cases into clusters such that each
247%% cluster is dense.
248%%
249%% See:
250%%  Producing Good Code for the Case Statement, Robert L. Bernstein
251%%  Software - Practice and Experience vol 15, 1985, no 10, pp 1021--1024
252%% And
253%%  Correction to "Producing Good Code for the Case Statement"
254%%  Sampath Kannan and Todd A. Proebsting,
255%%  Software - Practice and Experience vol 24, 1994, no 2, p 233
256%%
257%% (The latter is where the algorithm comes from.)
258
259%% This function will return a tuple with the first element being 0
260%% The rest of the elements being integers. A value of M at index N
261%% (where the first element is considered to have index 0) means that
262%% the first N cases can be divided into M (but no fewer) clusters where
263%% each cluster is dense.
264
265minclusters(Cases) when is_list(Cases) ->
266  minclusters(list_to_tuple(Cases));
267minclusters(Cases) when is_tuple(Cases) ->
268  N = tuple_size(Cases),
269  MinClusters = list_to_tuple([0|n_list(N,inf)]),
270  i_loop(1,N,MinClusters,Cases).
271
272%% Create a list with N elements initialized to Init
273n_list(0,_) -> [];
274n_list(N,Init) -> [Init | n_list(N-1,Init)].
275
276%% Do the dirty work of minclusters
277i_loop(I,N,MinClusters,_Cases) when I > N ->
278  MinClusters;
279i_loop(I,N,MinClusters,Cases) when I =< N ->
280  M = j_loop(0, I-1, MinClusters, Cases),
281  i_loop(I+1, N, M, Cases).
282
283%% More dirty work
284j_loop(J,I1,MinClusters,_Cases) when J > I1 ->
285  MinClusters;
286j_loop(J,I1,MinClusters,Cases) when J =< I1 ->
287  D = density(Cases,J+1,I1+1),
288  A0 = element(J+1,MinClusters),
289  A = if
290	is_number(A0) ->
291	  A0+1;
292	true ->
293	  A0
294      end,
295  B = element(I1+2,MinClusters),
296  M = if
297	D >= ?MINDENSITY, A<B ->
298	  setelement(I1+2,MinClusters,A);
299	true ->
300	  MinClusters
301      end,
302  j_loop(J+1,I1,M,Cases).
303
304
305%% Determine the density of a (subset of a) case list
306%% A is a tuple with the cases in order from smallest to largest
307%% I is the index of the first element and J of the last
308
309density(A,I,J) ->
310  {AI,_} = element(I,A),
311  {AJ,_} = element(J,A),
312  (J-I+1)/(hipe_icode:const_value(AJ)-hipe_icode:const_value(AI)+1).
313
314
315%% Split a case list into dense clusters
316%% Returns a list of lists of cases.
317%%
318%% Cases is the case list and Clust is a list describing the optimal
319%% clustering as returned by minclusters
320%%
321%% If the value in the last place in minclusters is M then we can
322%% split the case list into M clusters. We then search for the last
323%% (== right-most) occurance of the value M-1 in minclusters. That
324%% indicates the largest number of cases that can be split into M-1
325%% clusters. This means that the cases in between constitute one
326%% cluster. Then we recurse on the remainder of the cases.
327%%
328%% The various calls to lists:reverse are just to ensure that the
329%% cases remain in the correct, sorted order.
330
331cluster_split(Cases, Clust) ->
332  A = tl(tuple_to_list(Clust)),
333  Max = element(tuple_size(Clust), Clust),
334  L1 = lists:reverse(Cases),
335  L2 = lists:reverse(A),
336  cluster_split(Max, [], [], L1, L2).
337
338cluster_split(0, [], Res, Cases, _Clust) ->
339  L = lists:reverse(Cases),
340  {H,_} = hd(L),
341  {T,_} = hd(Cases),
342  [{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),L}|Res];
343cluster_split(N, [], Res, Cases, [N|_] = Clust) ->
344  cluster_split(N-1, [], Res, Cases, Clust);
345cluster_split(N,Sofar,Res,Cases,[N|Clust]) ->
346  {H,_} = hd(Sofar),
347  {T,_} = lists:last(Sofar),
348  cluster_split(N-1,[],[{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),Sofar}|Res],Cases,[N|Clust]);
349cluster_split(N,Sofar,Res,[C|Cases],[_|Clust]) ->
350  cluster_split(N,[C|Sofar],Res,Cases,Clust).
351
352%%
353%% Merge adjacent small clusters into larger sparse clusters
354%%
355cluster_merge([C]) -> [C];
356cluster_merge([{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE ->
357  C2 = cluster_merge(T),
358  [{dense,Min,Max,C}|C2];
359cluster_merge([{sparse,Min,_,C},{sparse,_,Max,D}|T]) ->
360  R = {sparse,Min,Max,C ++ D},
361  cluster_merge([R|T]);
362cluster_merge([{sparse,Min,_,C},{dense,_,Max,D}|T]) when length(D) < ?MINFORJUMPTABLE ->
363  R = {sparse,Min,Max,C ++ D},
364  cluster_merge([R|T]);
365cluster_merge([{dense,Min,_,C},{dense,_,Max,D}|T]) when length(C) < ?MINFORJUMPTABLE, length(D) < ?MINFORJUMPTABLE ->
366  R = {sparse,Min,Max,C ++ D},
367  cluster_merge([R|T]);
368cluster_merge([{dense,Min,_,D},{sparse,_,Max,C}|T]) when length(D) < ?MINFORJUMPTABLE ->
369  R = {sparse,Min,Max,C ++ D},
370  cluster_merge([R|T]);
371cluster_merge([A,{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE ->
372  R = cluster_merge([{dense,Min,Max,C}|T]),
373  [A|R].
374
375
376%% Generate code to search for the correct cluster
377
378find_cluster([{sparse,_Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,_IcodeFail) ->
379  case length(C) < ?MINFORINTSEARCHTREE of
380    true ->
381      gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options);
382    _ ->
383      gen_search_switch_val(Arg,C,Fail,VarMap,ConstTab,Options)
384  end;
385find_cluster([{dense,Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) ->
386  case length(C) < ?MINFORJUMPTABLE of
387    true ->
388      gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options);
389    _ ->
390      gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,C,Min)
391  end;
392find_cluster([{Density,Min,Max,C}|T],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) ->
393  ClustLab = hipe_rtl:mk_new_label(),
394  NextLab = hipe_rtl:mk_new_label(),
395  {ClustCode,V1,C1} = find_cluster([{Density,Min,Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail),
396
397  {Rest,V2,C2} = find_cluster(T,V1,C1,Options,Arg,Fail,IcodeFail),
398
399  {[
400    hipe_rtl:mk_branch(Arg, gt, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(Max)),
401		       hipe_rtl:label_name(NextLab),
402		       hipe_rtl:label_name(ClustLab), 0.50),
403    ClustLab
404   ] ++
405   ClustCode ++
406   [NextLab] ++
407   Rest,
408   V2,C2}.
409
410%% Generate efficient code for a linear search through the case list.
411%% Only works for atoms and integer.
412gen_linear_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) ->
413  {Values,_Labels} = split_cases(Cases),
414  {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap),
415  Code = fast_linear_search(Arg,Values,LabMap,Fail),
416  {Code,VarMap1,ConstTab}.
417
418fast_linear_search(_Arg,[],[],Fail) ->
419  [hipe_rtl:mk_goto(hipe_rtl:label_name(Fail))];
420fast_linear_search(Arg,[Case|Cases],[Label|Labels],Fail) ->
421  Reg = hipe_rtl:mk_new_reg_gcsafe(),
422  NextLab = hipe_rtl:mk_new_label(),
423  C2 = fast_linear_search(Arg,Cases,Labels,Fail),
424  C1 =
425    if
426      is_integer(Case) ->
427	TVal = hipe_tagscheme:mk_fixnum(Case),
428	[
429	 hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(TVal)),
430	 hipe_rtl:mk_branch(Arg,eq,Reg,
431			    Label,
432			    hipe_rtl:label_name(NextLab), 0.5),
433	 NextLab
434	];
435      is_atom(Case) ->
436	[
437	 hipe_rtl:mk_load_atom(Reg,Case),
438	 hipe_rtl:mk_branch(Arg,eq,Reg,
439			    Label,
440			    hipe_rtl:label_name(NextLab), 0.5),
441	 NextLab
442	];
443      true ->   % This should never happen !
444	?EXIT({internal_error_in_switch_val,Case})
445    end,
446  [C1,C2].
447
448
449%% Generate code to search through a small cluster of integers using
450%% binary search
451gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) ->
452  {Values,_Labels} = split_cases(Cases),
453  {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap),
454  Keys = [hipe_tagscheme:mk_fixnum(X)    % Add tags to the values
455	  || X <- Values],
456  Code = inline_search(Keys, LabMap, Arg, Fail),
457  {Code, VarMap1, ConstTab}.
458
459
460%% Generate code to search through a small cluster of atoms
461gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) ->
462  {Values, _Labels} = split_cases(Cases),
463  {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap),
464  LMap = [{label,L} || L <- LabMap],
465  {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values),
466  {NewConstTab2,LabId} =
467    hipe_consttab:insert_sorted_block(NewConstTab, word, LMap, Values),
468  Code = inline_atom_search(0, length(Cases)-1, Id, LabId, Arg, Fail, LabMap),
469  {Code, VarMap1, NewConstTab2}.
470
471
472%% calculate the middle position of a list (+ 1 because of 1-indexing of lists)
473get_middle(List) ->
474  N = length(List),
475  N div 2 + 1.
476
477%% get element [N1, N2] from a list
478get_cases(_, 0, 0) ->
479  [];
480get_cases([H|T], 0, N) ->
481  [H | get_cases(T, 0, N - 1)];
482get_cases([_|T], N1, N2) ->
483  get_cases(T, N1 - 1, N2 - 1).
484
485
486%% inline_search/4 creates RTL code for a inlined binary search.
487%% It requires two sorted tables - one with the keys to search
488%% through and one with the corresponding labels to jump to.
489%%
490%% Input:
491%%  KeyList - A list of keys to search through.
492%%  LableList - A list of labels to jump to.
493%%  KeyReg - A register containing the key to search for.
494%%  Default - A label to jump to if the key is not found.
495%%
496
497inline_search([], _LabelList, _KeyReg, _Default) -> [];
498inline_search(KeyList, LabelList, KeyReg, Default) ->
499  %% Create some registers and labels that we need.
500  Reg = hipe_rtl:mk_new_reg_gcsafe(),
501  Lab1 = hipe_rtl:mk_new_label(),
502  Lab2 = hipe_rtl:mk_new_label(),
503  Lab3 = hipe_rtl:mk_new_label(),
504
505  Length = length(KeyList),
506
507  if
508    Length >= 3 ->
509      %% Get middle element and keys/labels before that and after
510      Middle_pos = get_middle(KeyList),
511      Middle_key = lists:nth(Middle_pos, KeyList),
512      Keys_beginning = get_cases(KeyList, 0, Middle_pos - 1),
513      Labels_beginning = get_cases(LabelList, 0, Middle_pos - 1),
514      Keys_ending = get_cases(KeyList, Middle_pos, Length),
515      Labels_ending = get_cases(LabelList, Middle_pos, Length),
516
517      %% Create the code.
518
519      %% Get the label and build it up properly
520      Middle_label = lists:nth(Middle_pos, LabelList),
521
522      A = [hipe_rtl:mk_move(Reg, hipe_rtl:mk_imm(Middle_key)),
523	   hipe_rtl:mk_branch(KeyReg, lt, Reg,
524			      hipe_rtl:label_name(Lab2),
525			      hipe_rtl:label_name(Lab1), 0.5),
526	   Lab1,
527	   hipe_rtl:mk_branch(KeyReg, gt, Reg,
528			      hipe_rtl:label_name(Lab3),
529			      Middle_label , 0.5),
530	   Lab2],
531      %% build search tree for keys less than the middle element
532      B = inline_search(Keys_beginning, Labels_beginning, KeyReg, Default),
533      %% ...and for keys bigger than the middle element
534      D = inline_search(Keys_ending, Labels_ending, KeyReg, Default),
535
536      %% append the code and return it
537      A ++ B ++ [Lab3] ++ D;
538
539    Length =:= 2 ->
540      %% get the first and second elements and theirs labels
541      Key_first = hd(KeyList),
542      First_label = hd(LabelList),
543
544      %% Key_second = hipe_tagscheme:mk_fixnum(lists:nth(2, KeyList)),
545      Key_second = lists:nth(2, KeyList),
546      Second_label = lists:nth(2, LabelList),
547
548      NewLab = hipe_rtl:mk_new_label(),
549
550      %% compare them
551      A = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_first)),
552	   hipe_rtl:mk_branch(KeyReg, eq, Reg,
553			      First_label,
554			      hipe_rtl:label_name(NewLab) , 0.5),
555	   NewLab],
556
557      B = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_second)),
558	   hipe_rtl:mk_branch(KeyReg, eq, Reg,
559			      Second_label,
560			      hipe_rtl:label_name(Default) , 0.5)],
561      A ++ B;
562
563    Length =:= 1 ->
564      Key = hd(KeyList),
565      Label = hd(LabelList),
566
567      [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key)),
568       hipe_rtl:mk_branch(KeyReg, eq, Reg,
569			  Label,
570			  hipe_rtl:label_name(Default) , 0.5)]
571  end.
572
573
574inline_atom_search(Start, End, Block, LBlock, KeyReg, Default, Labels) ->
575  Reg = hipe_rtl:mk_new_reg_gcsafe(),
576
577  Length = (End - Start) + 1,
578
579  if
580    Length >= 3 ->
581      Lab1 = hipe_rtl:mk_new_label(),
582      Lab2 = hipe_rtl:mk_new_label(),
583      Lab3 = hipe_rtl:mk_new_label(),
584      Lab4 = hipe_rtl:mk_new_label(),
585
586      Mid = ((End-Start) div 2)+Start,
587      End1 = Mid-1,
588      Start1 = Mid+1,
589      A = [
590	   hipe_rtl:mk_load_word_index(Reg,Block,Mid),
591	   hipe_rtl:mk_branch(KeyReg, lt, Reg,
592			      hipe_rtl:label_name(Lab2),
593			      hipe_rtl:label_name(Lab1), 0.5),
594	   Lab1,
595	   hipe_rtl:mk_branch(KeyReg, gt, Reg,
596			      hipe_rtl:label_name(Lab3),
597			      hipe_rtl:label_name(Lab4), 0.5),
598	   Lab4,
599	   hipe_rtl:mk_goto_index(LBlock, Mid, Labels),
600	   Lab2
601	  ],
602      B = [inline_atom_search(Start,End1,Block,LBlock,KeyReg,Default,Labels)],
603      C = [inline_atom_search(Start1,End,Block,LBlock,KeyReg,Default,Labels)],
604      A ++ B ++ [Lab3] ++ C;
605
606    Length =:= 2 ->
607      L1 = hipe_rtl:mk_new_label(),
608      L2 = hipe_rtl:mk_new_label(),
609      L3 = hipe_rtl:mk_new_label(),
610      [
611       hipe_rtl:mk_load_word_index(Reg,Block,Start),
612       hipe_rtl:mk_branch(KeyReg,eq,Reg,
613			  hipe_rtl:label_name(L1),
614			  hipe_rtl:label_name(L2), 0.5),
615       L1,
616       hipe_rtl:mk_goto_index(LBlock,Start,Labels),
617
618       L2,
619       hipe_rtl:mk_load_word_index(Reg,Block,End),
620       hipe_rtl:mk_branch(KeyReg,eq,Reg,
621			  hipe_rtl:label_name(L3),
622			  hipe_rtl:label_name(Default), 0.5),
623       L3,
624       hipe_rtl:mk_goto_index(LBlock, End, Labels)
625      ];
626
627    Length =:= 1 ->
628      NewLab = hipe_rtl:mk_new_label(),
629      [
630       hipe_rtl:mk_load_word_index(Reg,Block,Start),
631       hipe_rtl:mk_branch(KeyReg, eq, Reg,
632			  hipe_rtl:label_name(NewLab),
633			  hipe_rtl:label_name(Default), 0.9),
634       NewLab,
635       hipe_rtl:mk_goto_index(LBlock, Start, Labels)
636      ]
637  end.
638
639
640%% Create a jumptable
641gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,Cases,Min) ->
642  %% Map is a rtl mapping of Dense
643  {Max,DenseTbl} = dense_interval(Cases,Min,IcodeFail),
644  {Map,VarMap2} = lbls_from_cases(DenseTbl,VarMap),
645
646  %% Make some labels and registers that we need.
647  BelowLab = hipe_rtl:mk_new_label(),
648  UntaggedR = hipe_rtl:mk_new_reg_gcsafe(),
649  StartR = hipe_rtl:mk_new_reg_gcsafe(),
650
651  %% Generate the code to do the switch...
652  {[
653    %% Untag the index.
654    hipe_tagscheme:untag_fixnum(UntaggedR, Arg)|
655    %% Check that the index is within Min and Max.
656    case Min of
657      0 -> %% First element is 0 this is simple.
658	[hipe_rtl:mk_branch(UntaggedR, gtu, hipe_rtl:mk_imm(Max),
659			    hipe_rtl:label_name(Fail),
660			    hipe_rtl:label_name(BelowLab), 0.01),
661	 BelowLab,
662	 %% StartR contains the index into the jumptable
663	 hipe_rtl:mk_switch(UntaggedR, Map)];
664      _ -> %% First element is not 0
665	[hipe_rtl:mk_alu(StartR, UntaggedR, sub,
666			 hipe_rtl:mk_imm(Min)),
667	 hipe_rtl:mk_branch(StartR, gtu, hipe_rtl:mk_imm(Max-Min),
668			    hipe_rtl:label_name(Fail),
669			    hipe_rtl:label_name(BelowLab), 0.01),
670	 BelowLab,
671	 %% StartR contains the index into the jumptable
672	 hipe_rtl:mk_switch(StartR, Map)]
673    end],
674   VarMap2,
675   ConstTab}.
676
677
678%% Generate the jumptable for Cases while filling in unused positions
679%% with the fail label
680
681dense_interval(Cases, Min, IcodeFail) ->
682  dense_interval(Cases, Min, IcodeFail, 0, 0).
683dense_interval([Pair = {Const,_}|Rest], Pos, Fail, Range, NoEntries) ->
684  Val = hipe_icode:const_value(Const),
685  if
686    Pos < Val ->
687      {Max, Res} =
688	dense_interval([Pair|Rest], Pos+1, Fail, Range+1, NoEntries),
689      {Max,[{hipe_icode:mk_const(Pos), Fail}|Res]};
690    true ->
691      {Max, Res} = dense_interval(Rest, Pos+1, Fail, Range+1, NoEntries+1),
692      {Max, [Pair | Res]}
693  end;
694dense_interval([], Max, _, _, _) ->
695  {Max-1, []}.
696
697
698%%-------------------------------------------------------------------------
699%% switch_val without jumptable
700%%
701
702gen_slow_switch_val(I, VarMap, ConstTab, Options) ->
703  Is = rewrite_switch_val(I),
704  ?IF_DEBUG_LEVEL(3,?msg("Switch: ~w\n", [Is]), no_debug),
705  hipe_icode2rtl:translate_instrs(Is, VarMap, ConstTab, Options).
706
707rewrite_switch_val(I) ->
708  Var = hipe_icode:switch_val_term(I),
709  Fail = hipe_icode:switch_val_fail_label(I),
710  Cases = hipe_icode:switch_val_cases(I),
711  rewrite_switch_val_cases(Cases, Fail, Var).
712
713rewrite_switch_val_cases([{C,L}|Cases], Fail, Arg) ->
714  Tmp = hipe_icode:mk_new_var(),
715  NextLab = hipe_icode:mk_new_label(),
716  [hipe_icode:mk_move(Tmp, C),
717   hipe_icode:mk_if(op_exact_eqeq_2, [Arg, Tmp], L,
718		    hipe_icode:label_name(NextLab)),
719   NextLab |
720   rewrite_switch_val_cases(Cases, Fail, Arg)];
721rewrite_switch_val_cases([], Fail, _Arg) ->
722  [hipe_icode:mk_goto(Fail)].
723
724
725%%-------------------------------------------------------------------------
726%% switch_val with binary search jumptable
727%%
728
729gen_search_switch_val(Arg, Cases, Default, VarMap, ConstTab, _Options) ->
730  ValTableR = hipe_rtl:mk_new_reg_gcsafe(),
731
732  {Values,_Labels} = split_cases(Cases),
733  {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values),
734  {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap),
735
736  Code =
737    [hipe_rtl:mk_load_address(ValTableR, Id, constant)|
738     tab(Values,LabMap,Arg,ValTableR,Default)],
739  {Code, VarMap1, NewConstTab}.
740
741
742%%-------------------------------------------------------------------------
743%%
744%% tab/5 creates RTL code for a binary search.
745%% It requires two sorted tables one with the keys to search
746%% through and one with the corresponding labels to jump to.
747%%
748%% The implementation is derived from John Bentlys
749%% Programming Pearls.
750%%
751%% Input:
752%%  KeyList - A list of keys to search through.
753%%           (Just used to calculate the number of elements.)
754%%  LableList - A list of labels to jump to.
755%%  KeyReg - A register containing the key to search for.
756%%  TablePntrReg - A register containing a pointer to the
757%%                tables with keys
758%%  Default - A lable to jump to if the key is not found.
759%%
760%% Example:
761%%  KeyTbl: < a, b, d, f, h, i, z >
762%%  Lbls:   < 5, 3, 2, 4, 1, 7, 6 >
763%%  Default: 8
764%%  KeyReg: v37
765%%  TablePntrReg: r41
766%%
767%%  should give code like:
768%%    r41 <- KeyTbl
769%%    r42 <- 0
770%%    r43 <- [r41+16]
771%%    if (r43 gt v37) then L17 (0.50) else L16
772%% L16:
773%%    r42 <- 16
774%%    goto L17
775%% L17:
776%%    r46 <- r42 add 16
777%%    r45 <- [r41+r46]
778%%    if (r45 gt v37) then L21 (0.50) else L20
779%% L20:
780%%    r42 <- r46
781%%    goto L21
782%% L21:
783%%    r48 <- r42 add 8
784%%    r47 <- [r41+r48]
785%%    if (r47 gt v37) then L23 (0.50) else L22
786%% L22:
787%%    r42 <- r48
788%%    goto L23
789%% L23:
790%%    r50 <- r42 add 4
791%%    r49 <- [r41+r50]
792%%    if (r49 gt v37) then L25 (0.50) else L24
793%% L24:
794%%    r42 <- r42 add 4
795%%    goto L25
796%% L25:
797%%    if (r42 gt 28) then L6 (0.50) else L18
798%% L18:
799%%    r44 <- [r41+r42]
800%%    if (r44 eq v37) then L19 (0.90) else L8
801%% L19:
802%%    r42 <- r42 sra 2
803%%    switch (r42) <L5, L3, L2, L4, L1,
804%%              L7, L6>
805
806%%
807%% The search is done like a rolled out binary search,
808%% but instead of starting in the middle we start at
809%% the power of two closest above the middle.
810%%
811%% We let IndexReg point to the lower bound of our
812%% search, and then we speculatively look at a
813%% position at IndexReg + I where I is a power of 2.
814%%
815%% Example: Looking for 'h' in
816%%  KeyTbl: < a, b, d, f, h, i, z >
817%%
818%%  We start with IndexReg=0 and I=4
819%%          < a, b, d, f, h, i, z >
820%%            ^        ^
821%%     IndexReg    +   I
822%%
823%%  'f' < 'h' so we add I to IndexReg and divide I with 2
824%%  IndexReg=4 and I=2
825%%          < a, b, d, f, h, i, z >
826%%                     ^     ^
827%%              IndexReg +  I
828%%
829%%  'i' > 'h' so we keep IndexReg and divide I with 2
830%%  IndexReg=4 and I=1
831%%          < a, b, d, f, h, i, z >
832%%                     ^  ^
833%%              IndexReg+ I
834%%  Now we have found 'h' so we add I to IndexReg -> 5
835%%  And we can load switch to the label at position 5 in
836%%  the label table.
837%%
838%%  Now since the wordsize is 4 all numbers above are
839%%  Multiples of 4.
840
841tab(KeyList, LabelList, KeyReg, TablePntrReg, Default) ->
842  %% Calculate the size of the table:
843  %%  the number of keys * wordsize
844  LastOffset = (length(KeyList)-1)*?WORDSIZE,
845
846  %% Calculate the power of two closest to the size of the table.
847  Pow2 = 1 bsl trunc(math:log(LastOffset) / math:log(2)),
848
849  %% Create some registers and lables that we need
850  IndexReg = hipe_rtl:mk_new_reg_gcsafe(),
851  Temp = hipe_rtl:mk_new_reg_gcsafe(),
852  Temp2 = hipe_rtl:mk_new_reg_gcsafe(),
853  Lab1 = hipe_rtl:mk_new_label(),
854  Lab2 = hipe_rtl:mk_new_label(),
855  Lab3 = hipe_rtl:mk_new_label(),
856  Lab4 = hipe_rtl:mk_new_label(),
857
858  %% Calculate the position to start looking at
859  Init = (LastOffset)-Pow2,
860
861  %% Create the code
862  [
863   hipe_rtl:mk_move(IndexReg,hipe_rtl:mk_imm(0)),
864   hipe_rtl:mk_load(Temp,TablePntrReg,hipe_rtl:mk_imm(Init)),
865   hipe_rtl:mk_branch(Temp, geu, KeyReg,
866		      hipe_rtl:label_name(Lab2),
867		      hipe_rtl:label_name(Lab1), 0.5),
868   Lab1,
869   hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(Init+?WORDSIZE)),
870   hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)),
871   Lab2] ++
872
873    step(Pow2 div 2, TablePntrReg, IndexReg, KeyReg) ++
874
875    [hipe_rtl:mk_branch(IndexReg, gt, hipe_rtl:mk_imm(LastOffset),
876		       hipe_rtl:label_name(Default),
877		       hipe_rtl:label_name(Lab3), 0.5),
878     Lab3,
879     hipe_rtl:mk_load(Temp2,TablePntrReg,IndexReg),
880     hipe_rtl:mk_branch(Temp2, eq, KeyReg,
881			hipe_rtl:label_name(Lab4),
882			hipe_rtl:label_name(Default), 0.9),
883     Lab4,
884     hipe_rtl:mk_alu(IndexReg, IndexReg, sra,
885                     hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())),
886     hipe_rtl:mk_sorted_switch(IndexReg, LabelList, KeyList)
887    ].
888
889step(I,TablePntrReg,IndexReg,KeyReg) ->
890  Temp = hipe_rtl:mk_new_reg_gcsafe(),
891  TempIndex = hipe_rtl:mk_new_reg_gcsafe(),
892  Lab1 = hipe_rtl:mk_new_label(),
893  Lab2 = hipe_rtl:mk_new_label(),
894  [hipe_rtl:mk_alu(TempIndex, IndexReg, add, hipe_rtl:mk_imm(I)),
895   hipe_rtl:mk_load(Temp,TablePntrReg,TempIndex),
896   hipe_rtl:mk_branch(Temp, gtu, KeyReg,
897                      hipe_rtl:label_name(Lab2),
898                      hipe_rtl:label_name(Lab1) , 0.5),
899   Lab1] ++
900    case ?WORDSIZE of
901      I -> %% Recursive base case
902        [hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(I)),
903         hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)),
904         Lab2
905        ];
906      _ -> %% Recursion case
907        [hipe_rtl:mk_move(IndexReg, TempIndex),
908         hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)),
909         Lab2
910         | step(I div 2, TablePntrReg, IndexReg, KeyReg)
911        ]
912    end.
913
914%%-------------------------------------------------------------------------
915
916lbls_from_cases([{_,L}|Rest], VarMap) ->
917  {Map,VarMap1} = lbls_from_cases(Rest, VarMap),
918  {RtlL, VarMap2} = hipe_rtl_varmap:icode_label2rtl_label(L,VarMap1),
919  %%  {[{label,hipe_rtl:label_name(RtlL)}|Map],VarMap2};
920  {[hipe_rtl:label_name(RtlL)|Map],VarMap2};
921lbls_from_cases([], VarMap) ->
922  {[], VarMap}.
923
924%%-------------------------------------------------------------------------
925
926split_cases(L) ->
927  split_cases(L, [], []).
928
929split_cases([], Vs, Ls) -> {lists:reverse(Vs),lists:reverse(Ls)};
930split_cases([{V,L}|Rest], Vs, Ls) ->
931  split_cases(Rest, [hipe_icode:const_value(V)|Vs], [L|Ls]).
932
933%%-------------------------------------------------------------------------
934%%
935%% {switch_tuple_arity,X,Fail,N,[{A1,L1},...,{AN,LN}]}
936%%
937%% if not boxed(X) goto Fail
938%% Hdr := *boxed_val(X)
939%% switch_int(Hdr,Fail,[{H(A1),L1},...,{H(AN),LN}])
940%% where H(Ai) = make_arityval(Ai)
941%%
942%%-------------------------------------------------------------------------
943
944gen_switch_tuple(I, Map, ConstTab, _Options) ->
945  Var = hipe_icode:switch_tuple_arity_term(I),
946  {X, Map1} = hipe_rtl_varmap:icode_var2rtl_var(Var, Map),
947  Fail0 = hipe_icode:switch_tuple_arity_fail_label(I),
948  {Fail1, Map2} = hipe_rtl_varmap:icode_label2rtl_label(Fail0, Map1),
949  FailLab = hipe_rtl:label_name(Fail1),
950  {Cases, Map3} =
951    lists:foldr(fun({A,L}, {Rest,M}) ->
952		    {L1,M1} = hipe_rtl_varmap:icode_label2rtl_label(L, M),
953		    L2 = hipe_rtl:label_name(L1),
954		    A1 = hipe_icode:const_value(A),
955		    H1 = hipe_tagscheme:mk_arityval(A1),
956		    {[{H1,L2}|Rest], M1} end,
957		{[], Map2},
958		hipe_icode:switch_tuple_arity_cases(I)),
959  Hdr = hipe_rtl:mk_new_reg_gcsafe(),
960  IsBoxedLab = hipe_rtl:mk_new_label(),
961  {[hipe_tagscheme:test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab),
962				 FailLab, 0.9),
963    IsBoxedLab,
964    hipe_tagscheme:get_header(Hdr, X) |
965    gen_switch_int(Hdr, FailLab, Cases)],
966   Map3, ConstTab}.
967
968%%
969%% RTL-level switch-on-int
970%%
971
972gen_switch_int(X, FailLab, [{C,L}|Rest]) ->
973  NextLab = hipe_rtl:mk_new_label(),
974  [hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(C), L,
975		      hipe_rtl:label_name(NextLab), 0.5),
976   NextLab |
977   gen_switch_int(X, FailLab, Rest)];
978gen_switch_int(_, FailLab, []) ->
979  [hipe_rtl:mk_goto(FailLab)].
980
981