1------------------------------------------------------------------------------
2--                                                                          --
3--                            Matreshka Project                             --
4--                                                                          --
5--         Localization, Internationalization, Globalization for Ada        --
6--                                                                          --
7--                        Runtime Library Component                         --
8--                                                                          --
9------------------------------------------------------------------------------
10--                                                                          --
11-- Copyright © 2009-2012, Vadim Godunko <vgodunko@gmail.com>                --
12-- All rights reserved.                                                     --
13--                                                                          --
14-- Redistribution and use in source and binary forms, with or without       --
15-- modification, are permitted provided that the following conditions       --
16-- are met:                                                                 --
17--                                                                          --
18--  * Redistributions of source code must retain the above copyright        --
19--    notice, this list of conditions and the following disclaimer.         --
20--                                                                          --
21--  * Redistributions in binary form must reproduce the above copyright     --
22--    notice, this list of conditions and the following disclaimer in the   --
23--    documentation and/or other materials provided with the distribution.  --
24--                                                                          --
25--  * Neither the name of the Vadim Godunko, IE nor the names of its        --
26--    contributors may be used to endorse or promote products derived from  --
27--    this software without specific prior written permission.              --
28--                                                                          --
29-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS      --
30-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT        --
31-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR    --
32-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT     --
33-- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,   --
34-- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED --
35-- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR   --
36-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF   --
37-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     --
38-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS       --
39-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.             --
40--                                                                          --
41------------------------------------------------------------------------------
42--  $Revision: 3178 $ $Date: 2012-10-09 11:57:26 +0400 (Tue, 09 Oct 2012) $
43------------------------------------------------------------------------------
44--  This package containts type declarations for UTF-16 encoded strings and
45--  useful subprograms.
46--
47--  For all subprogram those names starts from Unchecked_ all run time checks
48--  are disabled; but to compensate this, assertions to check ranges and
49--  validity of source data are added. So, developers can enable assertion
50--  checks to simplify debugging of their code.
51------------------------------------------------------------------------------
52pragma Ada_2012;
53
54with Matreshka.Internals.Unicode;
55
56package Matreshka.Internals.Utf16 is
57
58   pragma Pure;
59
60   ---------------------
61   -- Utf16_Code_Unit --
62   ---------------------
63
64   subtype Utf16_Code_Unit is Matreshka.Internals.Unicode.Code_Unit_16;
65
66   subtype High_Surrogate_Utf16_Code_Unit is Utf16_Code_Unit
67     range Matreshka.Internals.Unicode.High_Surrogate_First
68             .. Matreshka.Internals.Unicode.High_Surrogate_Last;
69
70   subtype Low_Surrogate_Utf16_Code_Unit is Utf16_Code_Unit
71     range Matreshka.Internals.Unicode.Low_Surrogate_First
72             .. Matreshka.Internals.Unicode.Low_Surrogate_Last;
73
74   function Is_Less
75    (Left : Utf16_Code_Unit; Right : Utf16_Code_Unit) return Boolean;
76   pragma Inline (Is_Less);
77   --  Compare two Utf16 code units with fixup for surrogate characters for
78   --  less.
79
80   function Is_Greater
81    (Left : Utf16_Code_Unit; Right : Utf16_Code_Unit) return Boolean;
82   pragma Inline (Is_Greater);
83   --  Compare two Utf16 code units with fixup for surrogate characters for
84   --  greater.
85
86   function Compare_Order (Item : Utf16_Code_Unit) return Utf16_Code_Unit
87     with Inline => True;
88   --  Returns fixed value for compare code unit of UTF16 in UCS4 binary order.
89
90   ------------------
91   -- Utf16_String --
92   ------------------
93
94   type Utf16_String_Index is mod 2 ** 32;
95
96   type Utf16_String is
97     array (Utf16_String_Index range <>) of aliased Utf16_Code_Unit;
98   for Utf16_String'Alignment use Standard'Maximum_Alignment;
99   for Utf16_String'Component_Size use Utf16_Code_Unit'Size;
100   pragma Pack (Utf16_String);
101   --  Internal representation of UTF-16 encoded string. Type has aliased
102   --  components to simplify interfacing with Windows API.
103
104   type Unaligned_Utf16_String is
105     array (Utf16_String_Index range <>) of aliased Utf16_Code_Unit;
106   for Unaligned_Utf16_String'Alignment use Utf16_Code_Unit'Alignment;
107   for Unaligned_Utf16_String'Component_Size use Utf16_Code_Unit'Size;
108   pragma Pack (Unaligned_Utf16_String);
109   --  Unaligned UTF-16 encoded string, intended to be used to interface with
110   --  external libraries.
111
112   function Unchecked_To_Code_Point
113    (Item     : Utf16_String;
114     Position : Utf16_String_Index)
115       return Matreshka.Internals.Unicode.Code_Point;
116   pragma Inline (Unchecked_To_Code_Point);
117   --  Convert character or surrogate pair at the cpecified position in the
118   --  the Unicode code point.
119
120   procedure Unchecked_Next
121    (Item     : Utf16_String;
122     Position : in out Utf16_String_Index;
123     Code     : out Matreshka.Internals.Unicode.Code_Point);
124   pragma Inline (Unchecked_Next);
125   --  Convert character or surrogate pair at the specified position in the
126   --  the Unicode code point and moves position to the next character.
127
128   procedure Unchecked_Next
129    (Item     : Unaligned_Utf16_String;
130     Position : in out Utf16_String_Index;
131     Code     : out Matreshka.Internals.Unicode.Code_Point);
132   pragma Inline (Unchecked_Next);
133   --  Convert character or surrogate pair at the specified position in the
134   --  the Unicode code point and moves position to the next character.
135
136   procedure Unchecked_Next
137    (Item     : Utf16_String;
138     Position : in out Utf16_String_Index);
139   pragma Inline (Unchecked_Next);
140   --  Moves position to the next character.
141
142   procedure Unchecked_Previous
143    (Item     : Utf16_String;
144     Position : in out Utf16_String_Index;
145     Code     : out Matreshka.Internals.Unicode.Code_Point);
146   pragma Inline (Unchecked_Previous);
147   --  Convert character or surrogate pair before the specified position in the
148   --  the Unicode code point and moves position to the previous character.
149
150   procedure Unchecked_Previous
151    (Item     : Utf16_String;
152     Position : in out Utf16_String_Index);
153   pragma Inline (Unchecked_Previous);
154   --  Moves position to the previous character.
155
156   procedure Unchecked_Store
157    (Item     : in out Utf16_String;
158     Position : in out Utf16_String_Index;
159     Code     : Matreshka.Internals.Unicode.Code_Point);
160   pragma Inline (Unchecked_Store);
161   --  Store specified character at the specified position and move position to
162   --  the next character.
163
164   function Unchecked_Surrogate_Pair_To_Code_Point
165    (High : Utf16_Code_Unit;
166     Low  : Utf16_Code_Unit) return Matreshka.Internals.Unicode.Code_Point;
167   pragma Inline (Unchecked_Surrogate_Pair_To_Code_Point);
168   --  Converts surrogate pair into the code point.
169
170   procedure Unchecked_Validate_Next
171    (Item     : Utf16_String;
172     Position : in out Utf16_String_Index;
173     Valid    : out Boolean);
174   pragma Inline (Unchecked_Validate_Next);
175   --  Validates next character or surrogate pair at the specified position.
176   --  Sets Valid to True and moves Position if character of surrogate pair is
177   --  valid; otherwise sets Valid to False and don't change Position.
178
179   procedure Unchecked_Validate_Next
180    (Item     : Unaligned_Utf16_String;
181     Position : in out Utf16_String_Index;
182     Code     : out Matreshka.Internals.Unicode.Code_Point;
183     Valid    : out Boolean);
184   pragma Inline (Unchecked_Validate_Next);
185   --  Validates next character or surrogate pair at the specified position.
186   --  Sets Valid to True, moves Position and sets Code to character's code
187   --  if code unit or surrogate pair is valid; otherwise sets Valid to False
188   --  and don't change Position.
189
190end Matreshka.Internals.Utf16;
191