1------------------------------------------------------------------------------
2--                                                                          --
3--                            Matreshka Project                             --
4--                                                                          --
5--         Localization, Internationalization, Globalization for Ada        --
6--                                                                          --
7--                        Runtime Library Component                         --
8--                                                                          --
9------------------------------------------------------------------------------
10--                                                                          --
11-- Copyright © 2014, Vadim Godunko <vgodunko@gmail.com>                     --
12-- All rights reserved.                                                     --
13--                                                                          --
14-- Redistribution and use in source and binary forms, with or without       --
15-- modification, are permitted provided that the following conditions       --
16-- are met:                                                                 --
17--                                                                          --
18--  * Redistributions of source code must retain the above copyright        --
19--    notice, this list of conditions and the following disclaimer.         --
20--                                                                          --
21--  * Redistributions in binary form must reproduce the above copyright     --
22--    notice, this list of conditions and the following disclaimer in the   --
23--    documentation and/or other materials provided with the distribution.  --
24--                                                                          --
25--  * Neither the name of the Vadim Godunko, IE nor the names of its        --
26--    contributors may be used to endorse or promote products derived from  --
27--    this software without specific prior written permission.              --
28--                                                                          --
29-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS      --
30-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT        --
31-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR    --
32-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT     --
33-- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,   --
34-- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED --
35-- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR   --
36-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF   --
37-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     --
38-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS       --
39-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.             --
40--                                                                          --
41------------------------------------------------------------------------------
42--  $Revision: 4822 $ $Date: 2014-04-08 20:42:04 +0400 (Tue, 08 Apr 2014) $
43------------------------------------------------------------------------------
44with Ada.Unchecked_Deallocation;
45
46with Matreshka.Internals.Unicode.Ucd;
47
48package body Matreshka.CLDR.Collation_Compiler is
49
50   type Collation_Element_Sequence_Access is
51     access all Matreshka.Internals.Unicode.Ucd.Collation_Element_Sequence;
52
53   type Collation_Second_Stage_Access is
54     access Matreshka.Internals.Unicode.Ucd.Collation_Second_Stage;
55
56   type Collation_First_Stage is
57     array (Matreshka.Internals.Unicode.Ucd.First_Stage_Index)
58       of Collation_Second_Stage_Access;
59
60   type Contractor_Array_Access is
61     access all Matreshka.Internals.Unicode.Ucd.Contractor_Array;
62
63   procedure Free is
64     new Ada.Unchecked_Deallocation
65          (Matreshka.Internals.Unicode.Ucd.Collation_Second_Stage,
66           Collation_Second_Stage_Access);
67
68   procedure Free is
69     new Ada.Unchecked_Deallocation
70          (Matreshka.Internals.Unicode.Ucd.Collation_Element_Sequence,
71           Collation_Element_Sequence_Access);
72
73   procedure Free is
74     new Ada.Unchecked_Deallocation
75          (Matreshka.Internals.Unicode.Ucd.Contractor_Array,
76           Contractor_Array_Access);
77
78   -------------------------------------
79   -- Construct_Collation_Information --
80   -------------------------------------
81
82   procedure Construct_Collation_Information
83    (Data   : Matreshka.CLDR.Collation_Data.Collation_Information;
84     Locale : not null access Matreshka.Internals.Locales.Locale_Data)
85   is
86      use type Matreshka.CLDR.Collation_Data.Collation_Record_Access;
87      use type Matreshka.Internals.Unicode.Code_Point;
88      use type Matreshka.Internals.Unicode.Ucd.Sequence_Index;
89
90      Expansion        : Collation_Element_Sequence_Access;
91      Expansion_Last   : Matreshka.Internals.Unicode.Ucd.Sequence_Index;
92      Contraction      : Contractor_Array_Access;
93      Contraction_Last : Matreshka.Internals.Unicode.Ucd.Sequence_Index;
94      Mapping          : Collation_First_Stage := (others => null);
95      Last_Variable    : Matreshka.Internals.Unicode.Ucd.Collation_Weight := 0;
96
97      procedure Append_Expansion
98       (Sequence : Matreshka.CLDR.Collation_Data.Collation_Element_Array;
99        First    : out Matreshka.Internals.Unicode.Ucd.Sequence_Index;
100        Last     : out Matreshka.Internals.Unicode.Ucd.Sequence_Index);
101      --  Appends specified expansion sequence to collectd set of expansion
102      --  sequences.
103
104      procedure Process_Contractors
105       (Start  : Matreshka.CLDR.Collation_Data.Collation_Record_Access;
106        Prefix : Matreshka.CLDR.Collation_Data.Code_Point_Array;
107        First  : out Matreshka.Internals.Unicode.Ucd.Sequence_Count;
108        Last   : out Matreshka.Internals.Unicode.Ucd.Sequence_Count);
109      --  Process contractors recursively.
110
111      ----------------------
112      -- Append_Expansion --
113      ----------------------
114
115      procedure Append_Expansion
116       (Sequence : Matreshka.CLDR.Collation_Data.Collation_Element_Array;
117        First    : out Matreshka.Internals.Unicode.Ucd.Sequence_Index;
118        Last     : out Matreshka.Internals.Unicode.Ucd.Sequence_Index)
119      is
120         use type Matreshka.Internals.Unicode.Ucd.Collation_Element_Sequence;
121
122         Internal        :
123           Matreshka.Internals.Unicode.Ucd.Collation_Element_Sequence
124            (1 .. Sequence'Length);
125         Internal_Last   : Matreshka.Internals.Unicode.Ucd.Sequence_Index
126           := Internal'First;
127         Expansion_First : Matreshka.Internals.Unicode.Ucd.Sequence_Index;
128
129      begin
130         --  Convert sequence of collation elements into format used by
131         --  internal database and update greatest weight of variable
132         --  character.
133
134         for Element of Sequence loop
135            Internal (Internal_Last) :=
136             (Primary   => Element.Primary,
137              Secondary => Element.Secondary,
138              Trinary   => Element.Trinary);
139            Internal_Last := Internal_Last + 1;
140
141            if Element.Is_Variable then
142               Last_Variable :=
143                 Matreshka.Internals.Unicode.Ucd.Collation_Weight'Max
144                  (Last_Variable, Element.Primary);
145            end if;
146         end loop;
147
148         if Expansion = null then
149            Expansion :=
150              new Matreshka.Internals.Unicode.Ucd.Collation_Element_Sequence
151                   (Matreshka.Internals.Unicode.Ucd.Sequence_Index'Range);
152            Expansion_First := Expansion'First;
153            Expansion_Last := Expansion'First + Internal'Length - 1;
154            Expansion (Expansion'First .. Expansion_Last) := Internal;
155
156         else
157            --  Lookup to reuse of existent sequences in the table is not time
158            --  efficient, thus it is not used here.
159
160            Expansion_First := Expansion_Last + 1;
161            Expansion_Last  := Expansion_Last + Internal'Length;
162            Expansion (Expansion_First .. Expansion_Last) := Internal;
163         end if;
164
165         First := Expansion_First;
166         Last  := Expansion_Last;
167      end Append_Expansion;
168
169      ------------------------------
170      -- Process_Code_Point_Chain --
171      ------------------------------
172
173      procedure Process_Code_Point_Chain
174       (Starter : Matreshka.Internals.Unicode.Code_Point)
175      is
176         First  : constant Matreshka.Internals.Unicode.Ucd.First_Stage_Index
177           := Matreshka.Internals.Unicode.Ucd.First_Stage_Index
178               (Starter / Internals.Unicode.Ucd.Second_Stage_Index'Modulus);
179         Second : constant Matreshka.Internals.Unicode.Ucd.Second_Stage_Index
180           := Matreshka.Internals.Unicode.Ucd.Second_Stage_Index
181               (Starter mod Internals.Unicode.Ucd.Second_Stage_Index'Modulus);
182
183         Current_Record : Matreshka.CLDR.Collation_Data.Collation_Record_Access
184           := Data.Collations (Starter);
185
186      begin
187         --  Allocate block when it wasn't allocated.
188
189         if Mapping (First) = null then
190            Mapping (First) :=
191              new Matreshka.Internals.Unicode.Ucd.Collation_Second_Stage'
192                   (others => (0, 0, 0, 0));
193         end if;
194
195         --  Lookup for collation record of code point itself (without
196         --  contractors) and process it.
197
198         while Current_Record /= null loop
199            --  XXX Loop can be removed if collations chain will be sorted in
200            --  contractors order (single character will be first element).
201
202            if Current_Record.Contractors'Length = 1 then
203               Append_Expansion
204                (Current_Record.Collations.all,
205                 Mapping (First) (Second).Expansion_First,
206                 Mapping (First) (Second).Expansion_Last);
207
208               exit;
209            end if;
210
211            Current_Record := Current_Record.Next;
212         end loop;
213
214         Process_Contractors
215          (Data.Collations (Starter),
216           (1 => Starter),
217           Mapping (First) (Second).Contractor_First,
218           Mapping (First) (Second).Contractor_Last);
219      end Process_Code_Point_Chain;
220
221      -------------------------
222      -- Process_Contractors --
223      -------------------------
224
225      procedure Process_Contractors
226       (Start  : Matreshka.CLDR.Collation_Data.Collation_Record_Access;
227        Prefix : Matreshka.CLDR.Collation_Data.Code_Point_Array;
228        First  : out Matreshka.Internals.Unicode.Ucd.Sequence_Count;
229        Last   : out Matreshka.Internals.Unicode.Ucd.Sequence_Count)
230      is
231         use type Matreshka.CLDR.Collation_Data.Code_Point_Array;
232
233         Current_Record : Matreshka.CLDR.Collation_Data.Collation_Record_Access
234           := Start;
235
236      begin
237         First := 0;
238         Last  := 0;
239
240         --  Process all contractors with currently processed length and
241         --  started from given prefix.
242
243         while Current_Record /= null loop
244            if Current_Record.Contractors'Length = Prefix'Length + 1
245              and Current_Record.Contractors
246                   (Current_Record.Contractors'First
247                      .. Current_Record.Contractors'Last - 1) = Prefix
248            then
249               if Contraction = null then
250                  Contraction :=
251                    new Matreshka.Internals.Unicode.Ucd.Contractor_Array
252                         (Matreshka.Internals.Unicode.Ucd.Sequence_Index);
253                  Contraction_Last := Contraction'First;
254
255               else
256                  Contraction_Last := Contraction_Last + 1;
257               end if;
258
259               if First = 0 then
260                  First := Contraction_Last;
261               end if;
262
263               Last := Contraction_Last;
264
265               Contraction (Contraction_Last).Code :=
266                 Current_Record.Contractors (Current_Record.Contractors'Last);
267
268               Append_Expansion
269                (Current_Record.Collations.all,
270                 Contraction (Contraction_Last).Expansion_First,
271                 Contraction (Contraction_Last).Expansion_Last);
272            end if;
273
274            Current_Record := Current_Record.Next;
275         end loop;
276
277         if First /= 0 then
278            for C in First .. Last loop
279               Process_Contractors
280                (Start,
281                 Prefix & Contraction (C).Code,
282                 Contraction (C).Contractor_First,
283                 Contraction (C).Contractor_Last);
284            end loop;
285         end if;
286      end Process_Contractors;
287
288      use type Matreshka.Internals.Unicode.Ucd.First_Stage_Index;
289      use type Matreshka.Internals.Unicode.Ucd.Collation_Second_Stage;
290
291      Replaced :
292        array (Matreshka.Internals.Unicode.Ucd.First_Stage_Index) of Boolean
293          := (others => False);
294
295   begin
296      --  Collect expansion and constraction information.
297
298      for Starting_Code in Data.Collations'Range loop
299         Process_Code_Point_Chain (Starting_Code);
300      end loop;
301
302      --  Remove duplicate tables and share one copy each time it duplicates.
303
304      for J in Mapping'Range loop
305         if not Replaced (J) then
306            for K in J + 1 .. Mapping'Last loop
307               if Mapping (J).all = Mapping (K).all then
308                  Free (Mapping (K));
309                  Mapping (K) := Mapping (J);
310                  Replaced (K) := True;
311               end if;
312           end loop;
313         end if;
314      end loop;
315
316      --  Construct collation information for locale finally.
317
318      Locale.Collation.Expansion :=
319        new Matreshka.Internals.Unicode.Ucd.Collation_Element_Sequence'
320             (Expansion (Expansion'First .. Expansion_Last));
321      Locale.Collation.Contraction :=
322        new Matreshka.Internals.Unicode.Ucd.Contractor_Array'
323             (Contraction (Contraction'First .. Contraction_Last));
324
325      declare
326         Aux : Matreshka.Internals.Unicode.Ucd.Collation_First_Stage
327           := (others =>
328                 Matreshka.Internals.Unicode.Ucd.Collation_Second_Stage_Access
329                  (Mapping (1)));
330
331      begin
332         for J in Mapping'Range loop
333            Aux (J) :=
334              Matreshka.Internals.Unicode.Ucd.Collation_Second_Stage_Access
335               (Mapping (J));
336         end loop;
337
338         Locale.Collation.Mapping :=
339           new Matreshka.Internals.Unicode.Ucd.Collation_First_Stage'(Aux);
340      end;
341
342      Locale.Collation.Last_Variable := Last_Variable;
343      Locale.Collation.Backwards := False;
344      --  XXX 'backward' must be taken from collation data.
345
346      --  Release auxiliary data.
347
348      Free (Expansion);
349      Free (Contraction);
350   end Construct_Collation_Information;
351
352end Matreshka.CLDR.Collation_Compiler;
353