1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT COMPILER COMPONENTS                         --
4--                                                                          --
5--                 G N A T . B Y T E _ O R D E R _ M A R K                  --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--                     Copyright (C) 2006-2013, AdaCore                     --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32pragma Compiler_Unit_Warning;
33
34package body GNAT.Byte_Order_Mark is
35
36   --------------
37   -- Read_BOM --
38   --------------
39
40   procedure Read_BOM
41     (Str         : String;
42      Len         : out Natural;
43      BOM         : out BOM_Kind;
44      XML_Support : Boolean := False)
45   is
46   begin
47      --  Note: the order of these tests is important, because in some cases
48      --  one sequence is a prefix of a longer sequence, and we must test for
49      --  the longer sequence first
50
51      --  UTF-32 (big-endian)
52
53      if Str'Length >= 4
54        and then Str (Str'First)     = Character'Val (16#00#)
55        and then Str (Str'First + 1) = Character'Val (16#00#)
56        and then Str (Str'First + 2) = Character'Val (16#FE#)
57        and then Str (Str'First + 3) = Character'Val (16#FF#)
58      then
59         Len := 4;
60         BOM := UTF32_BE;
61
62      --  UTF-32 (little-endian)
63
64      elsif Str'Length >= 4
65        and then Str (Str'First)     = Character'Val (16#FF#)
66        and then Str (Str'First + 1) = Character'Val (16#FE#)
67        and then Str (Str'First + 2) = Character'Val (16#00#)
68        and then Str (Str'First + 3) = Character'Val (16#00#)
69      then
70         Len := 4;
71         BOM := UTF32_LE;
72
73      --  UTF-16 (big-endian)
74
75      elsif Str'Length >= 2
76        and then Str (Str'First) = Character'Val (16#FE#)
77        and then Str (Str'First + 1) = Character'Val (16#FF#)
78      then
79         Len := 2;
80         BOM := UTF16_BE;
81
82      --  UTF-16 (little-endian)
83
84      elsif Str'Length >= 2
85        and then Str (Str'First) = Character'Val (16#FF#)
86        and then Str (Str'First + 1) = Character'Val (16#FE#)
87      then
88         Len := 2;
89         BOM := UTF16_LE;
90
91      --  UTF-8 (endian-independent)
92
93      elsif Str'Length >= 3
94        and then Str (Str'First)     = Character'Val (16#EF#)
95        and then Str (Str'First + 1) = Character'Val (16#BB#)
96        and then Str (Str'First + 2) = Character'Val (16#BF#)
97      then
98         Len := 3;
99         BOM := UTF8_All;
100
101      --  UCS-4 (big-endian) XML only
102
103      elsif XML_Support
104        and then Str'Length >= 4
105        and then Str (Str'First)     = Character'Val (16#00#)
106        and then Str (Str'First + 1) = Character'Val (16#00#)
107        and then Str (Str'First + 2) = Character'Val (16#00#)
108        and then Str (Str'First + 3) = Character'Val (16#3C#)
109      then
110         Len := 0;
111         BOM := UCS4_BE;
112
113      --  UCS-4 (little-endian) XML case
114
115      elsif XML_Support
116        and then Str'Length >= 4
117        and then Str (Str'First)     = Character'Val (16#3C#)
118        and then Str (Str'First + 1) = Character'Val (16#00#)
119        and then Str (Str'First + 2) = Character'Val (16#00#)
120        and then Str (Str'First + 3) = Character'Val (16#00#)
121      then
122         Len := 0;
123         BOM := UCS4_LE;
124
125      --  UCS-4 (unusual byte order 2143) XML case
126
127      elsif XML_Support
128        and then Str'Length >= 4
129        and then Str (Str'First)     = Character'Val (16#00#)
130        and then Str (Str'First + 1) = Character'Val (16#00#)
131        and then Str (Str'First + 2) = Character'Val (16#3C#)
132        and then Str (Str'First + 3) = Character'Val (16#00#)
133      then
134         Len := 0;
135         BOM := UCS4_2143;
136
137      --  UCS-4 (unusual byte order 3412) XML case
138
139      elsif XML_Support
140        and then Str'Length >= 4
141        and then Str (Str'First)     = Character'Val (16#00#)
142        and then Str (Str'First + 1) = Character'Val (16#3C#)
143        and then Str (Str'First + 2) = Character'Val (16#00#)
144        and then Str (Str'First + 3) = Character'Val (16#00#)
145      then
146         Len := 0;
147         BOM := UCS4_3412;
148
149      --  UTF-16 (big-endian) XML case
150
151      elsif XML_Support
152        and then Str'Length >= 4
153        and then Str (Str'First)     = Character'Val (16#00#)
154        and then Str (Str'First + 1) = Character'Val (16#3C#)
155        and then Str (Str'First + 2) = Character'Val (16#00#)
156        and then Str (Str'First + 3) = Character'Val (16#3F#)
157      then
158         Len := 0;
159         BOM := UTF16_BE;
160
161      --  UTF-32 (little-endian) XML case
162
163      elsif XML_Support
164        and then Str'Length >= 4
165        and then Str (Str'First)     = Character'Val (16#3C#)
166        and then Str (Str'First + 1) = Character'Val (16#00#)
167        and then Str (Str'First + 2) = Character'Val (16#3F#)
168        and then Str (Str'First + 3) = Character'Val (16#00#)
169      then
170         Len := 0;
171         BOM := UTF16_LE;
172
173      --  Unrecognized special encodings XML only
174
175      elsif XML_Support
176        and then Str'Length >= 4
177        and then Str (Str'First)     = Character'Val (16#3C#)
178        and then Str (Str'First + 1) = Character'Val (16#3F#)
179        and then Str (Str'First + 2) = Character'Val (16#78#)
180        and then Str (Str'First + 3) = Character'Val (16#6D#)
181      then
182         --  UTF-8, ASCII, some part of ISO8859, Shift-JIS, EUC,...
183
184         Len := 0;
185         BOM := Unknown;
186
187      --  No BOM recognized
188
189      else
190         Len := 0;
191         BOM := Unknown;
192      end if;
193   end Read_BOM;
194
195end GNAT.Byte_Order_Mark;
196