1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2000-2016. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20
21%% This program is used to generate a header file with data for
22%% normalizing denormalized unicode.
23
24%% The C header is generated from a text file containing tuples in the
25%% following format:
26%% {RevList,Translation}
27%% Where 'RevList' is a reversed list of the denormalized repressentation of
28%% the character 'Translation'. An example would be the swedish character
29%% 'ö', which would be represented in the file as:
30%% {[776,111],246}, as the denormalized representation of codepoint 246
31%% is [111,776] (i.e an 'o' followed by the "double dot accent character 776),
32%% while 'ä' instead is represented as {[776,97],228}, as the denormalized
33%% form would be [97,776] (same accent but an 'a' instead).
34%% The datafile is generated from the table on Apple's developer connection
35%% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html
36%% The generating is done whenever new data is present (i.e. dec.dat has
37%% to be changed) and not for every build. The product (the C header) is copied
38%% to $ERL_TOP/erts/beam after generation and checked in.
39%% The program and the data file is included for reference.
40
41-module(dec).
42
43-compile(export_all).
44
45-define(HASH_SIZE_FACTOR,2).
46-define(BIG_PREFIX_SIZE,392).
47
48-define(INPUT_FILE_NAME,"dec.dat").
49-define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h").
50
51read(FName) ->
52    {ok,L} = file:consult(FName),
53    [{A,B} || {A,B} <- L,
54	      length(A) > 1% , hd(A) < 769
55		 ].
56
57dec() ->
58    L = read(?INPUT_FILE_NAME),
59    G = group(L),
60    {ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]),
61    io:format
62      (Out,
63       "/*~n"
64       "* %CopyrightBegin%~n"
65       "*~n"
66       "* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n"
67       "*~n"
68       "* Licensed under the Apache License, Version 2.0 (the \"License\");~n"
69       "* you may not use this file except in compliance with the License.~n"
70       "* You may obtain a copy of the License at~n"
71       "*~n"
72       "*     http://www.apache.org/licenses/LICENSE-2.0~n"
73       "*~n"
74       "* Unless required by applicable law or agreed to in writing, software~n"
75       "* distributed under the License is distributed on an \"AS IS\" BASIS,~n"
76       "* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.~n"
77       "* See the License for the specific language governing permissions and~n"
78       "* limitations under the License.~n"
79       "*~n"
80       "* %CopyrightEnd%~n"
81       "*/~n"
82       "/*~n"
83       "* This file is automatically generated by ~p.erl, "
84       "do not edit manually~n"
85       "*/~n",
86       [?MODULE]),
87
88    io:format(Out,
89	      "#define HASH_SIZE_FACTOR ~w~n"
90	      "typedef struct _compose_entry {~n"
91	      "    Uint16 c;~n"
92	      "    Uint16 res;~n"
93	      "    Uint16 num_subs;~n"
94	      "    struct _compose_entry *subs;~n"
95	      "    int *hash;~n"
96	      "} CompEntry;~n~n"
97	      "static int compose_tab_size = ~p;~n",
98	      [?HASH_SIZE_FACTOR,length(G)]),
99    d(Out,G,[],0),
100    PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))),
101    dump_prefixes(Out,PreTab),
102%% Using this cuts down on the searching in the
103%% actual implementation, but wastes memory with little real gain..
104%%    LL = lists:flatten([PartList || {PartList,_} <- L]),
105%%    BigPreTab = tuple_to_list(
106%%		  make_big_prefixes(LL,
107%%				    erlang:make_tuple(?BIG_PREFIX_SIZE,0))),
108%%    dump_big_prefixes(Out,BigPreTab),
109    file:close(Out),
110    ok.
111
112
113
114d(Out,List,D,C) ->
115    d_sub(Out,List,D,C),
116    d_top_hash(Out,List,D,C),
117    d_top(Out,List,D,C).
118d_sub(_Out,[],_D,_C) ->
119    ok;
120d_sub(Out,[{_CP,[],_Res}|T],D,C) ->
121    d_sub(Out,T,D,C+1);
122d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) ->
123    d(Out,Subs,[C|D],0),
124    d_sub(Out,T,D,C+1).
125d_top(Out,L,D,C) ->
126    io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]),
127    d_top_1(Out,L,D,C),
128    io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]).
129
130d_top_1(_Out,[],_D,_C) ->
131    ok;
132d_top_1(Out,[{CP,[],Res}|T],D,C) ->
133    io:format(Out,
134	          "{~w, ~w, 0, NULL, NULL}",[CP,Res]),
135    if
136	T =:= [] ->
137	    io:format(Out,"~n",[]);
138	true ->
139	    io:format(Out,",~n",[])
140    end,
141    d_top_1(Out,T,D,C+1);
142d_top_1(Out,[{CP,Subs,_Res}|T],D,C) ->
143    io:format(Out,
144	          "{~w, 0, ~w, ~s, ~s}",[CP,length(Subs),
145					  format_depth([C|D]),
146					 "hash_"++format_depth([C|D])]),
147    if
148	T =:= [] ->
149	    io:format(Out,"~n",[]);
150	true ->
151	    io:format(Out,",~n",[])
152    end,
153    d_top_1(Out,T,D,C+1).
154
155
156d_top_hash(Out,List,D,_C) ->
157     HSize = length(List)*?HASH_SIZE_FACTOR,
158     io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]),
159     Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize),
160     io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]).
161
162d_top_hash_1([],_,Hash,_HSize) ->
163    Hash;
164d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) ->
165    Bucket = hash_search(Hash,HSize,CP rem HSize),
166    d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize).
167
168hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 ->
169    Bucket;
170hash_search(Hash,HSize,Bucket) ->
171    hash_search(Hash,HSize,(Bucket + 1) rem HSize).
172
173format_depth(D) ->
174    lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))).
175
176
177
178
179make_prefix_table([],Table) ->
180    Table;
181make_prefix_table([{C,_,_}|T],Table) when C =< 4023 ->
182    Index = (C div 32) + 1 - 24,
183    Pos = C rem 32,
184    X = element(Index,Table),
185    Y = X bor (1 bsl Pos),
186    NewTab = setelement(Index,Table,Y),
187    make_prefix_table(T,NewTab);
188make_prefix_table([_|T],Tab) ->
189    make_prefix_table(T,Tab).
190
191dump_prefixes(Out,L) ->
192    io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]),
193    io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]),
194    dump_prefixes_1(Out,L).
195dump_prefixes_1(Out,[H]) ->
196    io:format(Out,"    0x~8.16.0BU~n",[H]),
197    io:format(Out,"};~n",[]);
198dump_prefixes_1(Out,[H|T]) ->
199    io:format(Out,"    0x~8.16.0BU,~n",[H]),
200    dump_prefixes_1(Out,T).
201
202%% make_big_prefixes([],Table) ->
203%%     Table;
204%% make_big_prefixes([C|T],Table) ->
205%%     Index = (C div 32) + 1,
206%%     Pos = C rem 32,
207%%     X = element(Index,Table),
208%%     Y = X bor (1 bsl Pos),
209%%     NewTab = setelement(Index,Table,Y),
210%%     make_big_prefixes(T,NewTab).
211
212%% dump_big_prefixes(Out,L) ->
213%%     io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]),
214%%     io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]),
215%%     dump_prefixes_1(Out,L).
216
217pick([],_,Acc) ->
218    {lists:reverse(Acc),[]};
219pick([{[H|TT],N}|T],H,Acc) ->
220    pick(T,H,[{TT,N}|Acc]);
221pick([{[H|_],_}|_]=L,M,Acc) when H =/= M ->
222    {lists:reverse(Acc),L}.
223
224
225group([]) ->
226    [];
227group([{[H],N}|T]) ->
228    {Part,Rest} = pick(T,H,[]),
229    [{H,group(Part),N}| group(Rest)];
230group([{[H|_],_}|_]=L) ->
231    {Part,Rest} = pick(L,H,[]),
232    [{H,group(Part),0}| group(Rest)].
233
234
235
236
237
238