1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2000-2016. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20 21%% This program is used to generate a header file with data for 22%% normalizing denormalized unicode. 23 24%% The C header is generated from a text file containing tuples in the 25%% following format: 26%% {RevList,Translation} 27%% Where 'RevList' is a reversed list of the denormalized repressentation of 28%% the character 'Translation'. An example would be the swedish character 29%% 'ö', which would be represented in the file as: 30%% {[776,111],246}, as the denormalized representation of codepoint 246 31%% is [111,776] (i.e an 'o' followed by the "double dot accent character 776), 32%% while 'ä' instead is represented as {[776,97],228}, as the denormalized 33%% form would be [97,776] (same accent but an 'a' instead). 34%% The datafile is generated from the table on Apple's developer connection 35%% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html 36%% The generating is done whenever new data is present (i.e. dec.dat has 37%% to be changed) and not for every build. The product (the C header) is copied 38%% to $ERL_TOP/erts/beam after generation and checked in. 39%% The program and the data file is included for reference. 40 41-module(dec). 42 43-compile(export_all). 44 45-define(HASH_SIZE_FACTOR,2). 46-define(BIG_PREFIX_SIZE,392). 47 48-define(INPUT_FILE_NAME,"dec.dat"). 49-define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h"). 50 51read(FName) -> 52 {ok,L} = file:consult(FName), 53 [{A,B} || {A,B} <- L, 54 length(A) > 1% , hd(A) < 769 55 ]. 56 57dec() -> 58 L = read(?INPUT_FILE_NAME), 59 G = group(L), 60 {ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]), 61 io:format 62 (Out, 63 "/*~n" 64 "* %CopyrightBegin%~n" 65 "*~n" 66 "* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n" 67 "*~n" 68 "* Licensed under the Apache License, Version 2.0 (the \"License\");~n" 69 "* you may not use this file except in compliance with the License.~n" 70 "* You may obtain a copy of the License at~n" 71 "*~n" 72 "* http://www.apache.org/licenses/LICENSE-2.0~n" 73 "*~n" 74 "* Unless required by applicable law or agreed to in writing, software~n" 75 "* distributed under the License is distributed on an \"AS IS\" BASIS,~n" 76 "* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.~n" 77 "* See the License for the specific language governing permissions and~n" 78 "* limitations under the License.~n" 79 "*~n" 80 "* %CopyrightEnd%~n" 81 "*/~n" 82 "/*~n" 83 "* This file is automatically generated by ~p.erl, " 84 "do not edit manually~n" 85 "*/~n", 86 [?MODULE]), 87 88 io:format(Out, 89 "#define HASH_SIZE_FACTOR ~w~n" 90 "typedef struct _compose_entry {~n" 91 " Uint16 c;~n" 92 " Uint16 res;~n" 93 " Uint16 num_subs;~n" 94 " struct _compose_entry *subs;~n" 95 " int *hash;~n" 96 "} CompEntry;~n~n" 97 "static int compose_tab_size = ~p;~n", 98 [?HASH_SIZE_FACTOR,length(G)]), 99 d(Out,G,[],0), 100 PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))), 101 dump_prefixes(Out,PreTab), 102%% Using this cuts down on the searching in the 103%% actual implementation, but wastes memory with little real gain.. 104%% LL = lists:flatten([PartList || {PartList,_} <- L]), 105%% BigPreTab = tuple_to_list( 106%% make_big_prefixes(LL, 107%% erlang:make_tuple(?BIG_PREFIX_SIZE,0))), 108%% dump_big_prefixes(Out,BigPreTab), 109 file:close(Out), 110 ok. 111 112 113 114d(Out,List,D,C) -> 115 d_sub(Out,List,D,C), 116 d_top_hash(Out,List,D,C), 117 d_top(Out,List,D,C). 118d_sub(_Out,[],_D,_C) -> 119 ok; 120d_sub(Out,[{_CP,[],_Res}|T],D,C) -> 121 d_sub(Out,T,D,C+1); 122d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) -> 123 d(Out,Subs,[C|D],0), 124 d_sub(Out,T,D,C+1). 125d_top(Out,L,D,C) -> 126 io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]), 127 d_top_1(Out,L,D,C), 128 io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]). 129 130d_top_1(_Out,[],_D,_C) -> 131 ok; 132d_top_1(Out,[{CP,[],Res}|T],D,C) -> 133 io:format(Out, 134 "{~w, ~w, 0, NULL, NULL}",[CP,Res]), 135 if 136 T =:= [] -> 137 io:format(Out,"~n",[]); 138 true -> 139 io:format(Out,",~n",[]) 140 end, 141 d_top_1(Out,T,D,C+1); 142d_top_1(Out,[{CP,Subs,_Res}|T],D,C) -> 143 io:format(Out, 144 "{~w, 0, ~w, ~s, ~s}",[CP,length(Subs), 145 format_depth([C|D]), 146 "hash_"++format_depth([C|D])]), 147 if 148 T =:= [] -> 149 io:format(Out,"~n",[]); 150 true -> 151 io:format(Out,",~n",[]) 152 end, 153 d_top_1(Out,T,D,C+1). 154 155 156d_top_hash(Out,List,D,_C) -> 157 HSize = length(List)*?HASH_SIZE_FACTOR, 158 io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]), 159 Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize), 160 io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]). 161 162d_top_hash_1([],_,Hash,_HSize) -> 163 Hash; 164d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) -> 165 Bucket = hash_search(Hash,HSize,CP rem HSize), 166 d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize). 167 168hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 -> 169 Bucket; 170hash_search(Hash,HSize,Bucket) -> 171 hash_search(Hash,HSize,(Bucket + 1) rem HSize). 172 173format_depth(D) -> 174 lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))). 175 176 177 178 179make_prefix_table([],Table) -> 180 Table; 181make_prefix_table([{C,_,_}|T],Table) when C =< 4023 -> 182 Index = (C div 32) + 1 - 24, 183 Pos = C rem 32, 184 X = element(Index,Table), 185 Y = X bor (1 bsl Pos), 186 NewTab = setelement(Index,Table,Y), 187 make_prefix_table(T,NewTab); 188make_prefix_table([_|T],Tab) -> 189 make_prefix_table(T,Tab). 190 191dump_prefixes(Out,L) -> 192 io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]), 193 io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]), 194 dump_prefixes_1(Out,L). 195dump_prefixes_1(Out,[H]) -> 196 io:format(Out," 0x~8.16.0BU~n",[H]), 197 io:format(Out,"};~n",[]); 198dump_prefixes_1(Out,[H|T]) -> 199 io:format(Out," 0x~8.16.0BU,~n",[H]), 200 dump_prefixes_1(Out,T). 201 202%% make_big_prefixes([],Table) -> 203%% Table; 204%% make_big_prefixes([C|T],Table) -> 205%% Index = (C div 32) + 1, 206%% Pos = C rem 32, 207%% X = element(Index,Table), 208%% Y = X bor (1 bsl Pos), 209%% NewTab = setelement(Index,Table,Y), 210%% make_big_prefixes(T,NewTab). 211 212%% dump_big_prefixes(Out,L) -> 213%% io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]), 214%% io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]), 215%% dump_prefixes_1(Out,L). 216 217pick([],_,Acc) -> 218 {lists:reverse(Acc),[]}; 219pick([{[H|TT],N}|T],H,Acc) -> 220 pick(T,H,[{TT,N}|Acc]); 221pick([{[H|_],_}|_]=L,M,Acc) when H =/= M -> 222 {lists:reverse(Acc),L}. 223 224 225group([]) -> 226 []; 227group([{[H],N}|T]) -> 228 {Part,Rest} = pick(T,H,[]), 229 [{H,group(Part),N}| group(Rest)]; 230group([{[H|_],_}|_]=L) -> 231 {Part,Rest} = pick(L,H,[]), 232 [{H,group(Part),0}| group(Rest)]. 233 234 235 236 237 238