1------------------------------------------------------------------------------ 2-- -- 3-- Matreshka Project -- 4-- -- 5-- Localization, Internationalization, Globalization for Ada -- 6-- -- 7-- Runtime Library Component -- 8-- -- 9------------------------------------------------------------------------------ 10-- -- 11-- Copyright © 2009-2012, Vadim Godunko <vgodunko@gmail.com> -- 12-- All rights reserved. -- 13-- -- 14-- Redistribution and use in source and binary forms, with or without -- 15-- modification, are permitted provided that the following conditions -- 16-- are met: -- 17-- -- 18-- * Redistributions of source code must retain the above copyright -- 19-- notice, this list of conditions and the following disclaimer. -- 20-- -- 21-- * Redistributions in binary form must reproduce the above copyright -- 22-- notice, this list of conditions and the following disclaimer in the -- 23-- documentation and/or other materials provided with the distribution. -- 24-- -- 25-- * Neither the name of the Vadim Godunko, IE nor the names of its -- 26-- contributors may be used to endorse or promote products derived from -- 27-- this software without specific prior written permission. -- 28-- -- 29-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -- 30-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -- 31-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -- 32-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -- 33-- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -- 34-- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -- 35-- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -- 36-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- 37-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -- 38-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- 39-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -- 40-- -- 41------------------------------------------------------------------------------ 42-- $Revision: 3178 $ $Date: 2012-10-09 11:57:26 +0400 (Tue, 09 Oct 2012) $ 43------------------------------------------------------------------------------ 44-- This package containts type declarations for UTF-16 encoded strings and 45-- useful subprograms. 46-- 47-- For all subprogram those names starts from Unchecked_ all run time checks 48-- are disabled; but to compensate this, assertions to check ranges and 49-- validity of source data are added. So, developers can enable assertion 50-- checks to simplify debugging of their code. 51------------------------------------------------------------------------------ 52pragma Ada_2012; 53 54with Matreshka.Internals.Unicode; 55 56package Matreshka.Internals.Utf16 is 57 58 pragma Pure; 59 60 --------------------- 61 -- Utf16_Code_Unit -- 62 --------------------- 63 64 subtype Utf16_Code_Unit is Matreshka.Internals.Unicode.Code_Unit_16; 65 66 subtype High_Surrogate_Utf16_Code_Unit is Utf16_Code_Unit 67 range Matreshka.Internals.Unicode.High_Surrogate_First 68 .. Matreshka.Internals.Unicode.High_Surrogate_Last; 69 70 subtype Low_Surrogate_Utf16_Code_Unit is Utf16_Code_Unit 71 range Matreshka.Internals.Unicode.Low_Surrogate_First 72 .. Matreshka.Internals.Unicode.Low_Surrogate_Last; 73 74 function Is_Less 75 (Left : Utf16_Code_Unit; Right : Utf16_Code_Unit) return Boolean; 76 pragma Inline (Is_Less); 77 -- Compare two Utf16 code units with fixup for surrogate characters for 78 -- less. 79 80 function Is_Greater 81 (Left : Utf16_Code_Unit; Right : Utf16_Code_Unit) return Boolean; 82 pragma Inline (Is_Greater); 83 -- Compare two Utf16 code units with fixup for surrogate characters for 84 -- greater. 85 86 function Compare_Order (Item : Utf16_Code_Unit) return Utf16_Code_Unit 87 with Inline => True; 88 -- Returns fixed value for compare code unit of UTF16 in UCS4 binary order. 89 90 ------------------ 91 -- Utf16_String -- 92 ------------------ 93 94 type Utf16_String_Index is mod 2 ** 32; 95 96 type Utf16_String is 97 array (Utf16_String_Index range <>) of aliased Utf16_Code_Unit; 98 for Utf16_String'Alignment use Standard'Maximum_Alignment; 99 for Utf16_String'Component_Size use Utf16_Code_Unit'Size; 100 pragma Pack (Utf16_String); 101 -- Internal representation of UTF-16 encoded string. Type has aliased 102 -- components to simplify interfacing with Windows API. 103 104 type Unaligned_Utf16_String is 105 array (Utf16_String_Index range <>) of aliased Utf16_Code_Unit; 106 for Unaligned_Utf16_String'Alignment use Utf16_Code_Unit'Alignment; 107 for Unaligned_Utf16_String'Component_Size use Utf16_Code_Unit'Size; 108 pragma Pack (Unaligned_Utf16_String); 109 -- Unaligned UTF-16 encoded string, intended to be used to interface with 110 -- external libraries. 111 112 function Unchecked_To_Code_Point 113 (Item : Utf16_String; 114 Position : Utf16_String_Index) 115 return Matreshka.Internals.Unicode.Code_Point; 116 pragma Inline (Unchecked_To_Code_Point); 117 -- Convert character or surrogate pair at the cpecified position in the 118 -- the Unicode code point. 119 120 procedure Unchecked_Next 121 (Item : Utf16_String; 122 Position : in out Utf16_String_Index; 123 Code : out Matreshka.Internals.Unicode.Code_Point); 124 pragma Inline (Unchecked_Next); 125 -- Convert character or surrogate pair at the specified position in the 126 -- the Unicode code point and moves position to the next character. 127 128 procedure Unchecked_Next 129 (Item : Unaligned_Utf16_String; 130 Position : in out Utf16_String_Index; 131 Code : out Matreshka.Internals.Unicode.Code_Point); 132 pragma Inline (Unchecked_Next); 133 -- Convert character or surrogate pair at the specified position in the 134 -- the Unicode code point and moves position to the next character. 135 136 procedure Unchecked_Next 137 (Item : Utf16_String; 138 Position : in out Utf16_String_Index); 139 pragma Inline (Unchecked_Next); 140 -- Moves position to the next character. 141 142 procedure Unchecked_Previous 143 (Item : Utf16_String; 144 Position : in out Utf16_String_Index; 145 Code : out Matreshka.Internals.Unicode.Code_Point); 146 pragma Inline (Unchecked_Previous); 147 -- Convert character or surrogate pair before the specified position in the 148 -- the Unicode code point and moves position to the previous character. 149 150 procedure Unchecked_Previous 151 (Item : Utf16_String; 152 Position : in out Utf16_String_Index); 153 pragma Inline (Unchecked_Previous); 154 -- Moves position to the previous character. 155 156 procedure Unchecked_Store 157 (Item : in out Utf16_String; 158 Position : in out Utf16_String_Index; 159 Code : Matreshka.Internals.Unicode.Code_Point); 160 pragma Inline (Unchecked_Store); 161 -- Store specified character at the specified position and move position to 162 -- the next character. 163 164 function Unchecked_Surrogate_Pair_To_Code_Point 165 (High : Utf16_Code_Unit; 166 Low : Utf16_Code_Unit) return Matreshka.Internals.Unicode.Code_Point; 167 pragma Inline (Unchecked_Surrogate_Pair_To_Code_Point); 168 -- Converts surrogate pair into the code point. 169 170 procedure Unchecked_Validate_Next 171 (Item : Utf16_String; 172 Position : in out Utf16_String_Index; 173 Valid : out Boolean); 174 pragma Inline (Unchecked_Validate_Next); 175 -- Validates next character or surrogate pair at the specified position. 176 -- Sets Valid to True and moves Position if character of surrogate pair is 177 -- valid; otherwise sets Valid to False and don't change Position. 178 179 procedure Unchecked_Validate_Next 180 (Item : Unaligned_Utf16_String; 181 Position : in out Utf16_String_Index; 182 Code : out Matreshka.Internals.Unicode.Code_Point; 183 Valid : out Boolean); 184 pragma Inline (Unchecked_Validate_Next); 185 -- Validates next character or surrogate pair at the specified position. 186 -- Sets Valid to True, moves Position and sets Code to character's code 187 -- if code unit or surrogate pair is valid; otherwise sets Valid to False 188 -- and don't change Position. 189 190end Matreshka.Internals.Utf16; 191