1------------------------------------------------------------------------------ 2-- -- 3-- GNAT COMPILER COMPONENTS -- 4-- -- 5-- G N A T . B Y T E _ O R D E R _ M A R K -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 2006-2013, AdaCore -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. -- 17-- -- 18-- As a special exception under Section 7 of GPL version 3, you are granted -- 19-- additional permissions described in the GCC Runtime Library Exception, -- 20-- version 3.1, as published by the Free Software Foundation. -- 21-- -- 22-- You should have received a copy of the GNU General Public License and -- 23-- a copy of the GCC Runtime Library Exception along with this program; -- 24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 25-- <http://www.gnu.org/licenses/>. -- 26-- -- 27-- GNAT was originally developed by the GNAT team at New York University. -- 28-- Extensive contributions were provided by Ada Core Technologies Inc. -- 29-- -- 30------------------------------------------------------------------------------ 31 32pragma Compiler_Unit_Warning; 33 34package body GNAT.Byte_Order_Mark is 35 36 -------------- 37 -- Read_BOM -- 38 -------------- 39 40 procedure Read_BOM 41 (Str : String; 42 Len : out Natural; 43 BOM : out BOM_Kind; 44 XML_Support : Boolean := False) 45 is 46 begin 47 -- Note: the order of these tests is important, because in some cases 48 -- one sequence is a prefix of a longer sequence, and we must test for 49 -- the longer sequence first 50 51 -- UTF-32 (big-endian) 52 53 if Str'Length >= 4 54 and then Str (Str'First) = Character'Val (16#00#) 55 and then Str (Str'First + 1) = Character'Val (16#00#) 56 and then Str (Str'First + 2) = Character'Val (16#FE#) 57 and then Str (Str'First + 3) = Character'Val (16#FF#) 58 then 59 Len := 4; 60 BOM := UTF32_BE; 61 62 -- UTF-32 (little-endian) 63 64 elsif Str'Length >= 4 65 and then Str (Str'First) = Character'Val (16#FF#) 66 and then Str (Str'First + 1) = Character'Val (16#FE#) 67 and then Str (Str'First + 2) = Character'Val (16#00#) 68 and then Str (Str'First + 3) = Character'Val (16#00#) 69 then 70 Len := 4; 71 BOM := UTF32_LE; 72 73 -- UTF-16 (big-endian) 74 75 elsif Str'Length >= 2 76 and then Str (Str'First) = Character'Val (16#FE#) 77 and then Str (Str'First + 1) = Character'Val (16#FF#) 78 then 79 Len := 2; 80 BOM := UTF16_BE; 81 82 -- UTF-16 (little-endian) 83 84 elsif Str'Length >= 2 85 and then Str (Str'First) = Character'Val (16#FF#) 86 and then Str (Str'First + 1) = Character'Val (16#FE#) 87 then 88 Len := 2; 89 BOM := UTF16_LE; 90 91 -- UTF-8 (endian-independent) 92 93 elsif Str'Length >= 3 94 and then Str (Str'First) = Character'Val (16#EF#) 95 and then Str (Str'First + 1) = Character'Val (16#BB#) 96 and then Str (Str'First + 2) = Character'Val (16#BF#) 97 then 98 Len := 3; 99 BOM := UTF8_All; 100 101 -- UCS-4 (big-endian) XML only 102 103 elsif XML_Support 104 and then Str'Length >= 4 105 and then Str (Str'First) = Character'Val (16#00#) 106 and then Str (Str'First + 1) = Character'Val (16#00#) 107 and then Str (Str'First + 2) = Character'Val (16#00#) 108 and then Str (Str'First + 3) = Character'Val (16#3C#) 109 then 110 Len := 0; 111 BOM := UCS4_BE; 112 113 -- UCS-4 (little-endian) XML case 114 115 elsif XML_Support 116 and then Str'Length >= 4 117 and then Str (Str'First) = Character'Val (16#3C#) 118 and then Str (Str'First + 1) = Character'Val (16#00#) 119 and then Str (Str'First + 2) = Character'Val (16#00#) 120 and then Str (Str'First + 3) = Character'Val (16#00#) 121 then 122 Len := 0; 123 BOM := UCS4_LE; 124 125 -- UCS-4 (unusual byte order 2143) XML case 126 127 elsif XML_Support 128 and then Str'Length >= 4 129 and then Str (Str'First) = Character'Val (16#00#) 130 and then Str (Str'First + 1) = Character'Val (16#00#) 131 and then Str (Str'First + 2) = Character'Val (16#3C#) 132 and then Str (Str'First + 3) = Character'Val (16#00#) 133 then 134 Len := 0; 135 BOM := UCS4_2143; 136 137 -- UCS-4 (unusual byte order 3412) XML case 138 139 elsif XML_Support 140 and then Str'Length >= 4 141 and then Str (Str'First) = Character'Val (16#00#) 142 and then Str (Str'First + 1) = Character'Val (16#3C#) 143 and then Str (Str'First + 2) = Character'Val (16#00#) 144 and then Str (Str'First + 3) = Character'Val (16#00#) 145 then 146 Len := 0; 147 BOM := UCS4_3412; 148 149 -- UTF-16 (big-endian) XML case 150 151 elsif XML_Support 152 and then Str'Length >= 4 153 and then Str (Str'First) = Character'Val (16#00#) 154 and then Str (Str'First + 1) = Character'Val (16#3C#) 155 and then Str (Str'First + 2) = Character'Val (16#00#) 156 and then Str (Str'First + 3) = Character'Val (16#3F#) 157 then 158 Len := 0; 159 BOM := UTF16_BE; 160 161 -- UTF-32 (little-endian) XML case 162 163 elsif XML_Support 164 and then Str'Length >= 4 165 and then Str (Str'First) = Character'Val (16#3C#) 166 and then Str (Str'First + 1) = Character'Val (16#00#) 167 and then Str (Str'First + 2) = Character'Val (16#3F#) 168 and then Str (Str'First + 3) = Character'Val (16#00#) 169 then 170 Len := 0; 171 BOM := UTF16_LE; 172 173 -- Unrecognized special encodings XML only 174 175 elsif XML_Support 176 and then Str'Length >= 4 177 and then Str (Str'First) = Character'Val (16#3C#) 178 and then Str (Str'First + 1) = Character'Val (16#3F#) 179 and then Str (Str'First + 2) = Character'Val (16#78#) 180 and then Str (Str'First + 3) = Character'Val (16#6D#) 181 then 182 -- UTF-8, ASCII, some part of ISO8859, Shift-JIS, EUC,... 183 184 Len := 0; 185 BOM := Unknown; 186 187 -- No BOM recognized 188 189 else 190 Len := 0; 191 BOM := Unknown; 192 end if; 193 end Read_BOM; 194 195end GNAT.Byte_Order_Mark; 196