1%% -*- erlang-indent-level: 2 -*- 2%% 3%% Licensed under the Apache License, Version 2.0 (the "License"); 4%% you may not use this file except in compliance with the License. 5%% You may obtain a copy of the License at 6%% 7%% http://www.apache.org/licenses/LICENSE-2.0 8%% 9%% Unless required by applicable law or agreed to in writing, software 10%% distributed under the License is distributed on an "AS IS" BASIS, 11%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12%% See the License for the specific language governing permissions and 13%% limitations under the License. 14%% 15%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 16%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved 17%% ==================================================================== 18%% Filename : hipe_rtl_mk_switch.erl 19%% Module : hipe_rtl_mk_switch 20%% Purpose : Implements switching on Erlang values. 21%% Notes : Only fixnums are supported well, 22%% atoms work with table search, 23%% the inline search of atoms might have some bugs. 24%% Should be extended to handle bignums and floats. 25%% 26%% History : * 2001-02-28 Erik Johansson (happi@it.uu.se): 27%% Created. 28%% * 2001-04-01 Erik Trulsson (ertr1013@csd.uu.se): 29%% Stefan Lindström (stli3993@csd.uu.se): 30%% Added clustering and inlined binary search trees. 31%% * 2001-07-30 EJ (happi@it.uu.se): 32%% Fixed some bugs and started cleanup. 33%% ==================================================================== 34%% Exports : 35%% gen_switch_val(I, VarMap, ConstTab, Options) 36%% gen_switch_tuple(I, Map, ConstTab, Options) 37%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 38 39-module(hipe_rtl_mk_switch). 40 41-export([gen_switch_val/4, gen_switch_tuple/4]). 42 43%%------------------------------------------------------------------------- 44 45-include("../main/hipe.hrl"). 46 47%%------------------------------------------------------------------------- 48 49-define(MINFORJUMPTABLE,9). 50 % Minimum number of integers needed to use something else than an inline search. 51-define(MINFORINTSEARCHTREE,65). % Must be at least 3 52 % Minimum number of integer elements needed to use a non-inline binary search. 53 54-define(MININLINEATOMSEARCH,8). 55 % Minimum number of atoms needed to use an inline binary search instead 56 % of a fast linear search. 57 58-define(MINFORATOMSEARCHTREE,20). % Must be at least 3 59 % Minimum number of atoms needed to use a non-inline binary search instead 60 % of a linear search. 61 62-define(MAXINLINEATOMSEARCH,64). % Must be at least 3 63 % The cutoff point between inlined and non-inlined binary search for atoms 64 65-define(WORDSIZE, hipe_rtl_arch:word_size()). 66-define(MINDENSITY, 0.5). 67 % Minimum density required to use a jumptable instead of a binary search. 68 69%% The reason why MINFORINTSEARCHTREE and MINFORATOMSEARCHTREE must be 70%% at least 3 is that the function tab/5 will enter an infinite loop 71%% and hang when faced with a switch of size 1 or 2. 72 73 74%% Options used by this module: 75%% 76%% [no_]use_indexing 77%% Determines if any indexing be should be done at all. Turned on 78%% by default at optimization level o2 and higher. 79%% 80%% [no_]use_clusters 81%% Controls whether we attempt to divide sparse integer switches 82%% into smaller dense clusters for which jumptables are practical. 83%% Turned off by default since it can increase compilation time 84%% considerably and most programs will gain little benefit from it. 85%% 86%% [no_]use_inline_atom_search 87%% Controls whether we use an inline binary search for small number 88%% of atoms. Turned off by default since this is currently only 89%% supported on SPARC (and not on x86) and probably needs a bit 90%% more testing before it can be turned on by default. 91 92gen_switch_val(I, VarMap, ConstTab, Options) -> 93 case proplists:get_bool(use_indexing, Options) of 94 false -> gen_slow_switch_val(I, VarMap, ConstTab, Options); 95 true -> gen_fast_switch_val(I, VarMap, ConstTab, Options) 96 end. 97 98gen_fast_switch_val(I, VarMap, ConstTab, Options) -> 99 {Arg, VarMap0} = 100 hipe_rtl_varmap:icode_var2rtl_var(hipe_icode:switch_val_term(I), VarMap), 101 IcodeFail = hipe_icode:switch_val_fail_label(I), 102 {Fail, VarMap1} = hipe_rtl_varmap:icode_label2rtl_label(IcodeFail, VarMap0), 103 %% Important that the list of cases is sorted when handling integers. 104 UnsortedCases = hipe_icode:switch_val_cases(I), 105 Cases = lists:sort(UnsortedCases), 106 107 check_duplicates(Cases), 108 %% This check is currently not really necessary. The checking 109 %% happens at an earlier phase of the compilation. 110 {Types, InitCode} = split_types(Cases, Arg), 111 handle_types(Types, InitCode, VarMap1, ConstTab, Arg, {I, Fail, Options}). 112 113handle_types([{Type,Lbl,Cases}|Types], Code, VarMap, ConstTab, Arg, Info) -> 114 {Code1,VarMap1,ConstTab1} = gen_fast_switch_on(Type, Cases, 115 VarMap, 116 ConstTab, Arg, Info), 117 handle_types(Types, [Code,Lbl,Code1], VarMap1, ConstTab1, Arg, Info); 118handle_types([], Code, VarMap, ConstTab, _, _) -> 119 {Code, VarMap, ConstTab}. 120 121 122gen_fast_switch_on(integer, Cases, VarMap, ConstTab, Arg, {I, Fail, Options}) -> 123 {First,_} = hd(Cases), 124 Min = hipe_icode:const_value(First), 125 if length(Cases) < ?MINFORJUMPTABLE -> 126 gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options); 127 true -> 128 case proplists:get_bool(use_clusters, Options) of 129 false -> 130 M = list_to_tuple(Cases), 131 D = density(M, 1, tuple_size(M)), 132 if 133 D >= ?MINDENSITY -> 134 gen_jump_table(Arg,Fail,hipe_icode:switch_val_fail_label(I),VarMap,ConstTab,Cases,Min); 135 true -> 136 gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options) 137 end; 138 true -> 139 MC = minclusters(Cases), 140 Cl = cluster_split(Cases,MC), 141 CM = cluster_merge(Cl), 142 find_cluster(CM,VarMap,ConstTab,Options,Arg,Fail,hipe_icode:switch_val_fail_label(I)) 143 end 144 end; 145gen_fast_switch_on(atom, Cases, VarMap, ConstTab, Arg, {_I, Fail, Options}) -> 146 case proplists:get_bool(use_inline_atom_search, Options) of 147 true -> 148 if 149 length(Cases) < ?MININLINEATOMSEARCH -> 150 gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); 151 length(Cases) > ?MAXINLINEATOMSEARCH -> 152 gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); 153 true -> 154 gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options) 155 end; 156 false -> 157 if length(Cases) < ?MINFORATOMSEARCHTREE -> 158 gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); 159 true -> 160 gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options) 161 end 162 end; 163gen_fast_switch_on(_, _, VarMap, ConstTab, _, {I,_Fail,Options}) -> 164 %% We can only handle smart indexing of integers and atoms 165 %% TODO: Consider bignum 166 gen_slow_switch_val(I, VarMap, ConstTab, Options). 167 168 169%% Split different types into separate switches. 170split_types([Case|Cases], Arg) -> 171 Type1 = casetype(Case), 172 Types = split(Cases,Type1,[Case],[]), 173 switch_on_types(Types,[], [], Arg); 174split_types([],_) -> 175 %% Cant happen. 176 ?EXIT({empty_caselist}). 177 178switch_on_types([{Type,Cases}], AccCode, AccCases, _Arg) -> 179 Lbl = hipe_rtl:mk_new_label(), 180 I = hipe_rtl:mk_goto(hipe_rtl:label_name(Lbl)), 181 {[{Type,Lbl,lists:reverse(Cases)} | AccCases], lists:reverse([I|AccCode])}; 182switch_on_types([{other,Cases} | Rest], AccCode, AccCases, Arg) -> 183 %% Make sure the general case is handled last. 184 switch_on_types(Rest ++ [{other,Cases}], AccCode, AccCases, Arg); 185switch_on_types([{Type,Cases} | Rest], AccCode, AccCases, Arg) -> 186 TLab = hipe_rtl:mk_new_label(), 187 FLab = hipe_rtl:mk_new_label(), 188 TestCode = 189 case Type of 190 integer -> 191 hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(TLab), 192 hipe_rtl:label_name(FLab), 0.5); 193 atom -> 194 hipe_tagscheme:test_atom(Arg, hipe_rtl:label_name(TLab), 195 hipe_rtl:label_name(FLab), 0.5); 196 bignum -> 197 hipe_tagscheme:test_bignum(Arg, hipe_rtl:label_name(TLab), 198 hipe_rtl:label_name(FLab), 0.5); 199 _ -> ?EXIT({ooops, type_not_handled, Type}) 200 end, 201 switch_on_types(Rest, [[TestCode,FLab] | AccCode], 202 [{Type,TLab,lists:reverse(Cases)} | AccCases], Arg). 203 204split([Case|Cases], Type, Current, Rest) -> 205 case casetype(Case) of 206 Type -> 207 split(Cases, Type, [Case|Current],Rest); 208 Other -> 209 split(Cases, Other, [Case], [{Type,Current}|Rest]) 210 end; 211split([], Type, Current, Rest) -> 212 [{Type, Current} | Rest]. 213 214%% Determine what type an entry in the caselist has 215 216casetype({Const,_}) -> 217 casetype(hipe_icode:const_value(Const)); 218casetype(A) -> 219 if 220 is_integer(A) -> 221 case hipe_tagscheme:is_fixnum(A) of 222 true -> integer; 223 false -> bignum 224 end; 225 is_float(A) -> float; 226 is_atom(A) -> atom; 227 true -> other 228 end. 229 230%% check that no duplicate values occur in the case list and also 231%% check that all case values have the same type. 232check_duplicates([]) -> true; 233check_duplicates([_]) -> true; 234check_duplicates([{Const1,_},{Const2,L2}|T]) -> 235 C1 = hipe_icode:const_value(Const1), 236 C2 = hipe_icode:const_value(Const2), 237 %% T1 = casetype(C1), 238 %% T2 = casetype(C2), 239 if C1 =/= C2 -> %% , T1 =:= T2 -> 240 check_duplicates([{Const2,L2}|T]); 241 true -> 242 ?EXIT({bad_values_in_switchval,C1}) 243 end. 244 245%% 246%% Determine the optimal way to divide Cases into clusters such that each 247%% cluster is dense. 248%% 249%% See: 250%% Producing Good Code for the Case Statement, Robert L. Bernstein 251%% Software - Practice and Experience vol 15, 1985, no 10, pp 1021--1024 252%% And 253%% Correction to "Producing Good Code for the Case Statement" 254%% Sampath Kannan and Todd A. Proebsting, 255%% Software - Practice and Experience vol 24, 1994, no 2, p 233 256%% 257%% (The latter is where the algorithm comes from.) 258 259%% This function will return a tuple with the first element being 0 260%% The rest of the elements being integers. A value of M at index N 261%% (where the first element is considered to have index 0) means that 262%% the first N cases can be divided into M (but no fewer) clusters where 263%% each cluster is dense. 264 265minclusters(Cases) when is_list(Cases) -> 266 minclusters(list_to_tuple(Cases)); 267minclusters(Cases) when is_tuple(Cases) -> 268 N = tuple_size(Cases), 269 MinClusters = list_to_tuple([0|n_list(N,inf)]), 270 i_loop(1,N,MinClusters,Cases). 271 272%% Create a list with N elements initialized to Init 273n_list(0,_) -> []; 274n_list(N,Init) -> [Init | n_list(N-1,Init)]. 275 276%% Do the dirty work of minclusters 277i_loop(I,N,MinClusters,_Cases) when I > N -> 278 MinClusters; 279i_loop(I,N,MinClusters,Cases) when I =< N -> 280 M = j_loop(0, I-1, MinClusters, Cases), 281 i_loop(I+1, N, M, Cases). 282 283%% More dirty work 284j_loop(J,I1,MinClusters,_Cases) when J > I1 -> 285 MinClusters; 286j_loop(J,I1,MinClusters,Cases) when J =< I1 -> 287 D = density(Cases,J+1,I1+1), 288 A0 = element(J+1,MinClusters), 289 A = if 290 is_number(A0) -> 291 A0+1; 292 true -> 293 A0 294 end, 295 B = element(I1+2,MinClusters), 296 M = if 297 D >= ?MINDENSITY, A<B -> 298 setelement(I1+2,MinClusters,A); 299 true -> 300 MinClusters 301 end, 302 j_loop(J+1,I1,M,Cases). 303 304 305%% Determine the density of a (subset of a) case list 306%% A is a tuple with the cases in order from smallest to largest 307%% I is the index of the first element and J of the last 308 309density(A,I,J) -> 310 {AI,_} = element(I,A), 311 {AJ,_} = element(J,A), 312 (J-I+1)/(hipe_icode:const_value(AJ)-hipe_icode:const_value(AI)+1). 313 314 315%% Split a case list into dense clusters 316%% Returns a list of lists of cases. 317%% 318%% Cases is the case list and Clust is a list describing the optimal 319%% clustering as returned by minclusters 320%% 321%% If the value in the last place in minclusters is M then we can 322%% split the case list into M clusters. We then search for the last 323%% (== right-most) occurance of the value M-1 in minclusters. That 324%% indicates the largest number of cases that can be split into M-1 325%% clusters. This means that the cases in between constitute one 326%% cluster. Then we recurse on the remainder of the cases. 327%% 328%% The various calls to lists:reverse are just to ensure that the 329%% cases remain in the correct, sorted order. 330 331cluster_split(Cases, Clust) -> 332 A = tl(tuple_to_list(Clust)), 333 Max = element(tuple_size(Clust), Clust), 334 L1 = lists:reverse(Cases), 335 L2 = lists:reverse(A), 336 cluster_split(Max, [], [], L1, L2). 337 338cluster_split(0, [], Res, Cases, _Clust) -> 339 L = lists:reverse(Cases), 340 {H,_} = hd(L), 341 {T,_} = hd(Cases), 342 [{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),L}|Res]; 343cluster_split(N, [], Res, Cases, [N|_] = Clust) -> 344 cluster_split(N-1, [], Res, Cases, Clust); 345cluster_split(N,Sofar,Res,Cases,[N|Clust]) -> 346 {H,_} = hd(Sofar), 347 {T,_} = lists:last(Sofar), 348 cluster_split(N-1,[],[{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),Sofar}|Res],Cases,[N|Clust]); 349cluster_split(N,Sofar,Res,[C|Cases],[_|Clust]) -> 350 cluster_split(N,[C|Sofar],Res,Cases,Clust). 351 352%% 353%% Merge adjacent small clusters into larger sparse clusters 354%% 355cluster_merge([C]) -> [C]; 356cluster_merge([{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE -> 357 C2 = cluster_merge(T), 358 [{dense,Min,Max,C}|C2]; 359cluster_merge([{sparse,Min,_,C},{sparse,_,Max,D}|T]) -> 360 R = {sparse,Min,Max,C ++ D}, 361 cluster_merge([R|T]); 362cluster_merge([{sparse,Min,_,C},{dense,_,Max,D}|T]) when length(D) < ?MINFORJUMPTABLE -> 363 R = {sparse,Min,Max,C ++ D}, 364 cluster_merge([R|T]); 365cluster_merge([{dense,Min,_,C},{dense,_,Max,D}|T]) when length(C) < ?MINFORJUMPTABLE, length(D) < ?MINFORJUMPTABLE -> 366 R = {sparse,Min,Max,C ++ D}, 367 cluster_merge([R|T]); 368cluster_merge([{dense,Min,_,D},{sparse,_,Max,C}|T]) when length(D) < ?MINFORJUMPTABLE -> 369 R = {sparse,Min,Max,C ++ D}, 370 cluster_merge([R|T]); 371cluster_merge([A,{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE -> 372 R = cluster_merge([{dense,Min,Max,C}|T]), 373 [A|R]. 374 375 376%% Generate code to search for the correct cluster 377 378find_cluster([{sparse,_Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,_IcodeFail) -> 379 case length(C) < ?MINFORINTSEARCHTREE of 380 true -> 381 gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options); 382 _ -> 383 gen_search_switch_val(Arg,C,Fail,VarMap,ConstTab,Options) 384 end; 385find_cluster([{dense,Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) -> 386 case length(C) < ?MINFORJUMPTABLE of 387 true -> 388 gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options); 389 _ -> 390 gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,C,Min) 391 end; 392find_cluster([{Density,Min,Max,C}|T],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) -> 393 ClustLab = hipe_rtl:mk_new_label(), 394 NextLab = hipe_rtl:mk_new_label(), 395 {ClustCode,V1,C1} = find_cluster([{Density,Min,Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail), 396 397 {Rest,V2,C2} = find_cluster(T,V1,C1,Options,Arg,Fail,IcodeFail), 398 399 {[ 400 hipe_rtl:mk_branch(Arg, gt, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(Max)), 401 hipe_rtl:label_name(NextLab), 402 hipe_rtl:label_name(ClustLab), 0.50), 403 ClustLab 404 ] ++ 405 ClustCode ++ 406 [NextLab] ++ 407 Rest, 408 V2,C2}. 409 410%% Generate efficient code for a linear search through the case list. 411%% Only works for atoms and integer. 412gen_linear_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> 413 {Values,_Labels} = split_cases(Cases), 414 {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), 415 Code = fast_linear_search(Arg,Values,LabMap,Fail), 416 {Code,VarMap1,ConstTab}. 417 418fast_linear_search(_Arg,[],[],Fail) -> 419 [hipe_rtl:mk_goto(hipe_rtl:label_name(Fail))]; 420fast_linear_search(Arg,[Case|Cases],[Label|Labels],Fail) -> 421 Reg = hipe_rtl:mk_new_reg_gcsafe(), 422 NextLab = hipe_rtl:mk_new_label(), 423 C2 = fast_linear_search(Arg,Cases,Labels,Fail), 424 C1 = 425 if 426 is_integer(Case) -> 427 TVal = hipe_tagscheme:mk_fixnum(Case), 428 [ 429 hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(TVal)), 430 hipe_rtl:mk_branch(Arg,eq,Reg, 431 Label, 432 hipe_rtl:label_name(NextLab), 0.5), 433 NextLab 434 ]; 435 is_atom(Case) -> 436 [ 437 hipe_rtl:mk_load_atom(Reg,Case), 438 hipe_rtl:mk_branch(Arg,eq,Reg, 439 Label, 440 hipe_rtl:label_name(NextLab), 0.5), 441 NextLab 442 ]; 443 true -> % This should never happen ! 444 ?EXIT({internal_error_in_switch_val,Case}) 445 end, 446 [C1,C2]. 447 448 449%% Generate code to search through a small cluster of integers using 450%% binary search 451gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> 452 {Values,_Labels} = split_cases(Cases), 453 {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), 454 Keys = [hipe_tagscheme:mk_fixnum(X) % Add tags to the values 455 || X <- Values], 456 Code = inline_search(Keys, LabMap, Arg, Fail), 457 {Code, VarMap1, ConstTab}. 458 459 460%% Generate code to search through a small cluster of atoms 461gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> 462 {Values, _Labels} = split_cases(Cases), 463 {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), 464 LMap = [{label,L} || L <- LabMap], 465 {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values), 466 {NewConstTab2,LabId} = 467 hipe_consttab:insert_sorted_block(NewConstTab, word, LMap, Values), 468 Code = inline_atom_search(0, length(Cases)-1, Id, LabId, Arg, Fail, LabMap), 469 {Code, VarMap1, NewConstTab2}. 470 471 472%% calculate the middle position of a list (+ 1 because of 1-indexing of lists) 473get_middle(List) -> 474 N = length(List), 475 N div 2 + 1. 476 477%% get element [N1, N2] from a list 478get_cases(_, 0, 0) -> 479 []; 480get_cases([H|T], 0, N) -> 481 [H | get_cases(T, 0, N - 1)]; 482get_cases([_|T], N1, N2) -> 483 get_cases(T, N1 - 1, N2 - 1). 484 485 486%% inline_search/4 creates RTL code for a inlined binary search. 487%% It requires two sorted tables - one with the keys to search 488%% through and one with the corresponding labels to jump to. 489%% 490%% Input: 491%% KeyList - A list of keys to search through. 492%% LableList - A list of labels to jump to. 493%% KeyReg - A register containing the key to search for. 494%% Default - A label to jump to if the key is not found. 495%% 496 497inline_search([], _LabelList, _KeyReg, _Default) -> []; 498inline_search(KeyList, LabelList, KeyReg, Default) -> 499 %% Create some registers and labels that we need. 500 Reg = hipe_rtl:mk_new_reg_gcsafe(), 501 Lab1 = hipe_rtl:mk_new_label(), 502 Lab2 = hipe_rtl:mk_new_label(), 503 Lab3 = hipe_rtl:mk_new_label(), 504 505 Length = length(KeyList), 506 507 if 508 Length >= 3 -> 509 %% Get middle element and keys/labels before that and after 510 Middle_pos = get_middle(KeyList), 511 Middle_key = lists:nth(Middle_pos, KeyList), 512 Keys_beginning = get_cases(KeyList, 0, Middle_pos - 1), 513 Labels_beginning = get_cases(LabelList, 0, Middle_pos - 1), 514 Keys_ending = get_cases(KeyList, Middle_pos, Length), 515 Labels_ending = get_cases(LabelList, Middle_pos, Length), 516 517 %% Create the code. 518 519 %% Get the label and build it up properly 520 Middle_label = lists:nth(Middle_pos, LabelList), 521 522 A = [hipe_rtl:mk_move(Reg, hipe_rtl:mk_imm(Middle_key)), 523 hipe_rtl:mk_branch(KeyReg, lt, Reg, 524 hipe_rtl:label_name(Lab2), 525 hipe_rtl:label_name(Lab1), 0.5), 526 Lab1, 527 hipe_rtl:mk_branch(KeyReg, gt, Reg, 528 hipe_rtl:label_name(Lab3), 529 Middle_label , 0.5), 530 Lab2], 531 %% build search tree for keys less than the middle element 532 B = inline_search(Keys_beginning, Labels_beginning, KeyReg, Default), 533 %% ...and for keys bigger than the middle element 534 D = inline_search(Keys_ending, Labels_ending, KeyReg, Default), 535 536 %% append the code and return it 537 A ++ B ++ [Lab3] ++ D; 538 539 Length =:= 2 -> 540 %% get the first and second elements and theirs labels 541 Key_first = hd(KeyList), 542 First_label = hd(LabelList), 543 544 %% Key_second = hipe_tagscheme:mk_fixnum(lists:nth(2, KeyList)), 545 Key_second = lists:nth(2, KeyList), 546 Second_label = lists:nth(2, LabelList), 547 548 NewLab = hipe_rtl:mk_new_label(), 549 550 %% compare them 551 A = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_first)), 552 hipe_rtl:mk_branch(KeyReg, eq, Reg, 553 First_label, 554 hipe_rtl:label_name(NewLab) , 0.5), 555 NewLab], 556 557 B = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_second)), 558 hipe_rtl:mk_branch(KeyReg, eq, Reg, 559 Second_label, 560 hipe_rtl:label_name(Default) , 0.5)], 561 A ++ B; 562 563 Length =:= 1 -> 564 Key = hd(KeyList), 565 Label = hd(LabelList), 566 567 [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key)), 568 hipe_rtl:mk_branch(KeyReg, eq, Reg, 569 Label, 570 hipe_rtl:label_name(Default) , 0.5)] 571 end. 572 573 574inline_atom_search(Start, End, Block, LBlock, KeyReg, Default, Labels) -> 575 Reg = hipe_rtl:mk_new_reg_gcsafe(), 576 577 Length = (End - Start) + 1, 578 579 if 580 Length >= 3 -> 581 Lab1 = hipe_rtl:mk_new_label(), 582 Lab2 = hipe_rtl:mk_new_label(), 583 Lab3 = hipe_rtl:mk_new_label(), 584 Lab4 = hipe_rtl:mk_new_label(), 585 586 Mid = ((End-Start) div 2)+Start, 587 End1 = Mid-1, 588 Start1 = Mid+1, 589 A = [ 590 hipe_rtl:mk_load_word_index(Reg,Block,Mid), 591 hipe_rtl:mk_branch(KeyReg, lt, Reg, 592 hipe_rtl:label_name(Lab2), 593 hipe_rtl:label_name(Lab1), 0.5), 594 Lab1, 595 hipe_rtl:mk_branch(KeyReg, gt, Reg, 596 hipe_rtl:label_name(Lab3), 597 hipe_rtl:label_name(Lab4), 0.5), 598 Lab4, 599 hipe_rtl:mk_goto_index(LBlock, Mid, Labels), 600 Lab2 601 ], 602 B = [inline_atom_search(Start,End1,Block,LBlock,KeyReg,Default,Labels)], 603 C = [inline_atom_search(Start1,End,Block,LBlock,KeyReg,Default,Labels)], 604 A ++ B ++ [Lab3] ++ C; 605 606 Length =:= 2 -> 607 L1 = hipe_rtl:mk_new_label(), 608 L2 = hipe_rtl:mk_new_label(), 609 L3 = hipe_rtl:mk_new_label(), 610 [ 611 hipe_rtl:mk_load_word_index(Reg,Block,Start), 612 hipe_rtl:mk_branch(KeyReg,eq,Reg, 613 hipe_rtl:label_name(L1), 614 hipe_rtl:label_name(L2), 0.5), 615 L1, 616 hipe_rtl:mk_goto_index(LBlock,Start,Labels), 617 618 L2, 619 hipe_rtl:mk_load_word_index(Reg,Block,End), 620 hipe_rtl:mk_branch(KeyReg,eq,Reg, 621 hipe_rtl:label_name(L3), 622 hipe_rtl:label_name(Default), 0.5), 623 L3, 624 hipe_rtl:mk_goto_index(LBlock, End, Labels) 625 ]; 626 627 Length =:= 1 -> 628 NewLab = hipe_rtl:mk_new_label(), 629 [ 630 hipe_rtl:mk_load_word_index(Reg,Block,Start), 631 hipe_rtl:mk_branch(KeyReg, eq, Reg, 632 hipe_rtl:label_name(NewLab), 633 hipe_rtl:label_name(Default), 0.9), 634 NewLab, 635 hipe_rtl:mk_goto_index(LBlock, Start, Labels) 636 ] 637 end. 638 639 640%% Create a jumptable 641gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,Cases,Min) -> 642 %% Map is a rtl mapping of Dense 643 {Max,DenseTbl} = dense_interval(Cases,Min,IcodeFail), 644 {Map,VarMap2} = lbls_from_cases(DenseTbl,VarMap), 645 646 %% Make some labels and registers that we need. 647 BelowLab = hipe_rtl:mk_new_label(), 648 UntaggedR = hipe_rtl:mk_new_reg_gcsafe(), 649 StartR = hipe_rtl:mk_new_reg_gcsafe(), 650 651 %% Generate the code to do the switch... 652 {[ 653 %% Untag the index. 654 hipe_tagscheme:untag_fixnum(UntaggedR, Arg)| 655 %% Check that the index is within Min and Max. 656 case Min of 657 0 -> %% First element is 0 this is simple. 658 [hipe_rtl:mk_branch(UntaggedR, gtu, hipe_rtl:mk_imm(Max), 659 hipe_rtl:label_name(Fail), 660 hipe_rtl:label_name(BelowLab), 0.01), 661 BelowLab, 662 %% StartR contains the index into the jumptable 663 hipe_rtl:mk_switch(UntaggedR, Map)]; 664 _ -> %% First element is not 0 665 [hipe_rtl:mk_alu(StartR, UntaggedR, sub, 666 hipe_rtl:mk_imm(Min)), 667 hipe_rtl:mk_branch(StartR, gtu, hipe_rtl:mk_imm(Max-Min), 668 hipe_rtl:label_name(Fail), 669 hipe_rtl:label_name(BelowLab), 0.01), 670 BelowLab, 671 %% StartR contains the index into the jumptable 672 hipe_rtl:mk_switch(StartR, Map)] 673 end], 674 VarMap2, 675 ConstTab}. 676 677 678%% Generate the jumptable for Cases while filling in unused positions 679%% with the fail label 680 681dense_interval(Cases, Min, IcodeFail) -> 682 dense_interval(Cases, Min, IcodeFail, 0, 0). 683dense_interval([Pair = {Const,_}|Rest], Pos, Fail, Range, NoEntries) -> 684 Val = hipe_icode:const_value(Const), 685 if 686 Pos < Val -> 687 {Max, Res} = 688 dense_interval([Pair|Rest], Pos+1, Fail, Range+1, NoEntries), 689 {Max,[{hipe_icode:mk_const(Pos), Fail}|Res]}; 690 true -> 691 {Max, Res} = dense_interval(Rest, Pos+1, Fail, Range+1, NoEntries+1), 692 {Max, [Pair | Res]} 693 end; 694dense_interval([], Max, _, _, _) -> 695 {Max-1, []}. 696 697 698%%------------------------------------------------------------------------- 699%% switch_val without jumptable 700%% 701 702gen_slow_switch_val(I, VarMap, ConstTab, Options) -> 703 Is = rewrite_switch_val(I), 704 ?IF_DEBUG_LEVEL(3,?msg("Switch: ~w\n", [Is]), no_debug), 705 hipe_icode2rtl:translate_instrs(Is, VarMap, ConstTab, Options). 706 707rewrite_switch_val(I) -> 708 Var = hipe_icode:switch_val_term(I), 709 Fail = hipe_icode:switch_val_fail_label(I), 710 Cases = hipe_icode:switch_val_cases(I), 711 rewrite_switch_val_cases(Cases, Fail, Var). 712 713rewrite_switch_val_cases([{C,L}|Cases], Fail, Arg) -> 714 Tmp = hipe_icode:mk_new_var(), 715 NextLab = hipe_icode:mk_new_label(), 716 [hipe_icode:mk_move(Tmp, C), 717 hipe_icode:mk_if(op_exact_eqeq_2, [Arg, Tmp], L, 718 hipe_icode:label_name(NextLab)), 719 NextLab | 720 rewrite_switch_val_cases(Cases, Fail, Arg)]; 721rewrite_switch_val_cases([], Fail, _Arg) -> 722 [hipe_icode:mk_goto(Fail)]. 723 724 725%%------------------------------------------------------------------------- 726%% switch_val with binary search jumptable 727%% 728 729gen_search_switch_val(Arg, Cases, Default, VarMap, ConstTab, _Options) -> 730 ValTableR = hipe_rtl:mk_new_reg_gcsafe(), 731 732 {Values,_Labels} = split_cases(Cases), 733 {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values), 734 {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), 735 736 Code = 737 [hipe_rtl:mk_load_address(ValTableR, Id, constant)| 738 tab(Values,LabMap,Arg,ValTableR,Default)], 739 {Code, VarMap1, NewConstTab}. 740 741 742%%------------------------------------------------------------------------- 743%% 744%% tab/5 creates RTL code for a binary search. 745%% It requires two sorted tables one with the keys to search 746%% through and one with the corresponding labels to jump to. 747%% 748%% The implementation is derived from John Bentlys 749%% Programming Pearls. 750%% 751%% Input: 752%% KeyList - A list of keys to search through. 753%% (Just used to calculate the number of elements.) 754%% LableList - A list of labels to jump to. 755%% KeyReg - A register containing the key to search for. 756%% TablePntrReg - A register containing a pointer to the 757%% tables with keys 758%% Default - A lable to jump to if the key is not found. 759%% 760%% Example: 761%% KeyTbl: < a, b, d, f, h, i, z > 762%% Lbls: < 5, 3, 2, 4, 1, 7, 6 > 763%% Default: 8 764%% KeyReg: v37 765%% TablePntrReg: r41 766%% 767%% should give code like: 768%% r41 <- KeyTbl 769%% r42 <- 0 770%% r43 <- [r41+16] 771%% if (r43 gt v37) then L17 (0.50) else L16 772%% L16: 773%% r42 <- 16 774%% goto L17 775%% L17: 776%% r46 <- r42 add 16 777%% r45 <- [r41+r46] 778%% if (r45 gt v37) then L21 (0.50) else L20 779%% L20: 780%% r42 <- r46 781%% goto L21 782%% L21: 783%% r48 <- r42 add 8 784%% r47 <- [r41+r48] 785%% if (r47 gt v37) then L23 (0.50) else L22 786%% L22: 787%% r42 <- r48 788%% goto L23 789%% L23: 790%% r50 <- r42 add 4 791%% r49 <- [r41+r50] 792%% if (r49 gt v37) then L25 (0.50) else L24 793%% L24: 794%% r42 <- r42 add 4 795%% goto L25 796%% L25: 797%% if (r42 gt 28) then L6 (0.50) else L18 798%% L18: 799%% r44 <- [r41+r42] 800%% if (r44 eq v37) then L19 (0.90) else L8 801%% L19: 802%% r42 <- r42 sra 2 803%% switch (r42) <L5, L3, L2, L4, L1, 804%% L7, L6> 805 806%% 807%% The search is done like a rolled out binary search, 808%% but instead of starting in the middle we start at 809%% the power of two closest above the middle. 810%% 811%% We let IndexReg point to the lower bound of our 812%% search, and then we speculatively look at a 813%% position at IndexReg + I where I is a power of 2. 814%% 815%% Example: Looking for 'h' in 816%% KeyTbl: < a, b, d, f, h, i, z > 817%% 818%% We start with IndexReg=0 and I=4 819%% < a, b, d, f, h, i, z > 820%% ^ ^ 821%% IndexReg + I 822%% 823%% 'f' < 'h' so we add I to IndexReg and divide I with 2 824%% IndexReg=4 and I=2 825%% < a, b, d, f, h, i, z > 826%% ^ ^ 827%% IndexReg + I 828%% 829%% 'i' > 'h' so we keep IndexReg and divide I with 2 830%% IndexReg=4 and I=1 831%% < a, b, d, f, h, i, z > 832%% ^ ^ 833%% IndexReg+ I 834%% Now we have found 'h' so we add I to IndexReg -> 5 835%% And we can load switch to the label at position 5 in 836%% the label table. 837%% 838%% Now since the wordsize is 4 all numbers above are 839%% Multiples of 4. 840 841tab(KeyList, LabelList, KeyReg, TablePntrReg, Default) -> 842 %% Calculate the size of the table: 843 %% the number of keys * wordsize 844 LastOffset = (length(KeyList)-1)*?WORDSIZE, 845 846 %% Calculate the power of two closest to the size of the table. 847 Pow2 = 1 bsl trunc(math:log(LastOffset) / math:log(2)), 848 849 %% Create some registers and lables that we need 850 IndexReg = hipe_rtl:mk_new_reg_gcsafe(), 851 Temp = hipe_rtl:mk_new_reg_gcsafe(), 852 Temp2 = hipe_rtl:mk_new_reg_gcsafe(), 853 Lab1 = hipe_rtl:mk_new_label(), 854 Lab2 = hipe_rtl:mk_new_label(), 855 Lab3 = hipe_rtl:mk_new_label(), 856 Lab4 = hipe_rtl:mk_new_label(), 857 858 %% Calculate the position to start looking at 859 Init = (LastOffset)-Pow2, 860 861 %% Create the code 862 [ 863 hipe_rtl:mk_move(IndexReg,hipe_rtl:mk_imm(0)), 864 hipe_rtl:mk_load(Temp,TablePntrReg,hipe_rtl:mk_imm(Init)), 865 hipe_rtl:mk_branch(Temp, geu, KeyReg, 866 hipe_rtl:label_name(Lab2), 867 hipe_rtl:label_name(Lab1), 0.5), 868 Lab1, 869 hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(Init+?WORDSIZE)), 870 hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), 871 Lab2] ++ 872 873 step(Pow2 div 2, TablePntrReg, IndexReg, KeyReg) ++ 874 875 [hipe_rtl:mk_branch(IndexReg, gt, hipe_rtl:mk_imm(LastOffset), 876 hipe_rtl:label_name(Default), 877 hipe_rtl:label_name(Lab3), 0.5), 878 Lab3, 879 hipe_rtl:mk_load(Temp2,TablePntrReg,IndexReg), 880 hipe_rtl:mk_branch(Temp2, eq, KeyReg, 881 hipe_rtl:label_name(Lab4), 882 hipe_rtl:label_name(Default), 0.9), 883 Lab4, 884 hipe_rtl:mk_alu(IndexReg, IndexReg, sra, 885 hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), 886 hipe_rtl:mk_sorted_switch(IndexReg, LabelList, KeyList) 887 ]. 888 889step(I,TablePntrReg,IndexReg,KeyReg) -> 890 Temp = hipe_rtl:mk_new_reg_gcsafe(), 891 TempIndex = hipe_rtl:mk_new_reg_gcsafe(), 892 Lab1 = hipe_rtl:mk_new_label(), 893 Lab2 = hipe_rtl:mk_new_label(), 894 [hipe_rtl:mk_alu(TempIndex, IndexReg, add, hipe_rtl:mk_imm(I)), 895 hipe_rtl:mk_load(Temp,TablePntrReg,TempIndex), 896 hipe_rtl:mk_branch(Temp, gtu, KeyReg, 897 hipe_rtl:label_name(Lab2), 898 hipe_rtl:label_name(Lab1) , 0.5), 899 Lab1] ++ 900 case ?WORDSIZE of 901 I -> %% Recursive base case 902 [hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(I)), 903 hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), 904 Lab2 905 ]; 906 _ -> %% Recursion case 907 [hipe_rtl:mk_move(IndexReg, TempIndex), 908 hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), 909 Lab2 910 | step(I div 2, TablePntrReg, IndexReg, KeyReg) 911 ] 912 end. 913 914%%------------------------------------------------------------------------- 915 916lbls_from_cases([{_,L}|Rest], VarMap) -> 917 {Map,VarMap1} = lbls_from_cases(Rest, VarMap), 918 {RtlL, VarMap2} = hipe_rtl_varmap:icode_label2rtl_label(L,VarMap1), 919 %% {[{label,hipe_rtl:label_name(RtlL)}|Map],VarMap2}; 920 {[hipe_rtl:label_name(RtlL)|Map],VarMap2}; 921lbls_from_cases([], VarMap) -> 922 {[], VarMap}. 923 924%%------------------------------------------------------------------------- 925 926split_cases(L) -> 927 split_cases(L, [], []). 928 929split_cases([], Vs, Ls) -> {lists:reverse(Vs),lists:reverse(Ls)}; 930split_cases([{V,L}|Rest], Vs, Ls) -> 931 split_cases(Rest, [hipe_icode:const_value(V)|Vs], [L|Ls]). 932 933%%------------------------------------------------------------------------- 934%% 935%% {switch_tuple_arity,X,Fail,N,[{A1,L1},...,{AN,LN}]} 936%% 937%% if not boxed(X) goto Fail 938%% Hdr := *boxed_val(X) 939%% switch_int(Hdr,Fail,[{H(A1),L1},...,{H(AN),LN}]) 940%% where H(Ai) = make_arityval(Ai) 941%% 942%%------------------------------------------------------------------------- 943 944gen_switch_tuple(I, Map, ConstTab, _Options) -> 945 Var = hipe_icode:switch_tuple_arity_term(I), 946 {X, Map1} = hipe_rtl_varmap:icode_var2rtl_var(Var, Map), 947 Fail0 = hipe_icode:switch_tuple_arity_fail_label(I), 948 {Fail1, Map2} = hipe_rtl_varmap:icode_label2rtl_label(Fail0, Map1), 949 FailLab = hipe_rtl:label_name(Fail1), 950 {Cases, Map3} = 951 lists:foldr(fun({A,L}, {Rest,M}) -> 952 {L1,M1} = hipe_rtl_varmap:icode_label2rtl_label(L, M), 953 L2 = hipe_rtl:label_name(L1), 954 A1 = hipe_icode:const_value(A), 955 H1 = hipe_tagscheme:mk_arityval(A1), 956 {[{H1,L2}|Rest], M1} end, 957 {[], Map2}, 958 hipe_icode:switch_tuple_arity_cases(I)), 959 Hdr = hipe_rtl:mk_new_reg_gcsafe(), 960 IsBoxedLab = hipe_rtl:mk_new_label(), 961 {[hipe_tagscheme:test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab), 962 FailLab, 0.9), 963 IsBoxedLab, 964 hipe_tagscheme:get_header(Hdr, X) | 965 gen_switch_int(Hdr, FailLab, Cases)], 966 Map3, ConstTab}. 967 968%% 969%% RTL-level switch-on-int 970%% 971 972gen_switch_int(X, FailLab, [{C,L}|Rest]) -> 973 NextLab = hipe_rtl:mk_new_label(), 974 [hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(C), L, 975 hipe_rtl:label_name(NextLab), 0.5), 976 NextLab | 977 gen_switch_int(X, FailLab, Rest)]; 978gen_switch_int(_, FailLab, []) -> 979 [hipe_rtl:mk_goto(FailLab)]. 980 981