1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT RUN-TIME COMPONENTS                         --
4--                                                                          --
5--              A D A . C H A R A C T E R S . H A N D L I N G               --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--          Copyright (C) 1992-2001 Free Software Foundation, Inc.          --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
17-- for  more details.  You should have  received  a copy of the GNU General --
18-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
19-- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
20-- MA 02111-1307, USA.                                                      --
21--                                                                          --
22-- As a special exception,  if other files  instantiate  generics from this --
23-- unit, or you link  this unit with other files  to produce an executable, --
24-- this  unit  does not  by itself cause  the resulting  executable  to  be --
25-- covered  by the  GNU  General  Public  License.  This exception does not --
26-- however invalidate  any other reasons why  the executable file  might be --
27-- covered by the  GNU Public License.                                      --
28--                                                                          --
29-- GNAT was originally developed  by the GNAT team at  New York University. --
30-- Extensive contributions were provided by Ada Core Technologies Inc.      --
31--                                                                          --
32------------------------------------------------------------------------------
33
34with Ada.Characters.Latin_1;      use Ada.Characters.Latin_1;
35with Ada.Strings.Maps;            use Ada.Strings.Maps;
36with Ada.Strings.Maps.Constants;  use Ada.Strings.Maps.Constants;
37
38package body Ada.Characters.Handling is
39
40   ------------------------------------
41   -- Character Classification Table --
42   ------------------------------------
43
44   type Character_Flags is mod 256;
45   for Character_Flags'Size use 8;
46
47   Control    : constant Character_Flags := 1;
48   Lower      : constant Character_Flags := 2;
49   Upper      : constant Character_Flags := 4;
50   Basic      : constant Character_Flags := 8;
51   Hex_Digit  : constant Character_Flags := 16;
52   Digit      : constant Character_Flags := 32;
53   Special    : constant Character_Flags := 64;
54
55   Letter     : constant Character_Flags := Lower or Upper;
56   Alphanum   : constant Character_Flags := Letter or Digit;
57   Graphic    : constant Character_Flags := Alphanum or Special;
58
59   Char_Map : constant array (Character) of Character_Flags :=
60   (
61     NUL                         => Control,
62     SOH                         => Control,
63     STX                         => Control,
64     ETX                         => Control,
65     EOT                         => Control,
66     ENQ                         => Control,
67     ACK                         => Control,
68     BEL                         => Control,
69     BS                          => Control,
70     HT                          => Control,
71     LF                          => Control,
72     VT                          => Control,
73     FF                          => Control,
74     CR                          => Control,
75     SO                          => Control,
76     SI                          => Control,
77
78     DLE                         => Control,
79     DC1                         => Control,
80     DC2                         => Control,
81     DC3                         => Control,
82     DC4                         => Control,
83     NAK                         => Control,
84     SYN                         => Control,
85     ETB                         => Control,
86     CAN                         => Control,
87     EM                          => Control,
88     SUB                         => Control,
89     ESC                         => Control,
90     FS                          => Control,
91     GS                          => Control,
92     RS                          => Control,
93     US                          => Control,
94
95     Space                       => Special,
96     Exclamation                 => Special,
97     Quotation                   => Special,
98     Number_Sign                 => Special,
99     Dollar_Sign                 => Special,
100     Percent_Sign                => Special,
101     Ampersand                   => Special,
102     Apostrophe                  => Special,
103     Left_Parenthesis            => Special,
104     Right_Parenthesis           => Special,
105     Asterisk                    => Special,
106     Plus_Sign                   => Special,
107     Comma                       => Special,
108     Hyphen                      => Special,
109     Full_Stop                   => Special,
110     Solidus                     => Special,
111
112     '0' .. '9'                  => Digit + Hex_Digit,
113
114     Colon                       => Special,
115     Semicolon                   => Special,
116     Less_Than_Sign              => Special,
117     Equals_Sign                 => Special,
118     Greater_Than_Sign           => Special,
119     Question                    => Special,
120     Commercial_At               => Special,
121
122     'A' .. 'F'                  => Upper + Basic + Hex_Digit,
123     'G' .. 'Z'                  => Upper + Basic,
124
125     Left_Square_Bracket         => Special,
126     Reverse_Solidus             => Special,
127     Right_Square_Bracket        => Special,
128     Circumflex                  => Special,
129     Low_Line                    => Special,
130     Grave                       => Special,
131
132     'a' .. 'f'                  => Lower + Basic + Hex_Digit,
133     'g' .. 'z'                  => Lower + Basic,
134
135     Left_Curly_Bracket          => Special,
136     Vertical_Line               => Special,
137     Right_Curly_Bracket         => Special,
138     Tilde                       => Special,
139
140     DEL                         => Control,
141     Reserved_128                => Control,
142     Reserved_129                => Control,
143     BPH                         => Control,
144     NBH                         => Control,
145     Reserved_132                => Control,
146     NEL                         => Control,
147     SSA                         => Control,
148     ESA                         => Control,
149     HTS                         => Control,
150     HTJ                         => Control,
151     VTS                         => Control,
152     PLD                         => Control,
153     PLU                         => Control,
154     RI                          => Control,
155     SS2                         => Control,
156     SS3                         => Control,
157
158     DCS                         => Control,
159     PU1                         => Control,
160     PU2                         => Control,
161     STS                         => Control,
162     CCH                         => Control,
163     MW                          => Control,
164     SPA                         => Control,
165     EPA                         => Control,
166
167     SOS                         => Control,
168     Reserved_153                => Control,
169     SCI                         => Control,
170     CSI                         => Control,
171     ST                          => Control,
172     OSC                         => Control,
173     PM                          => Control,
174     APC                         => Control,
175
176     No_Break_Space              => Special,
177     Inverted_Exclamation        => Special,
178     Cent_Sign                   => Special,
179     Pound_Sign                  => Special,
180     Currency_Sign               => Special,
181     Yen_Sign                    => Special,
182     Broken_Bar                  => Special,
183     Section_Sign                => Special,
184     Diaeresis                   => Special,
185     Copyright_Sign              => Special,
186     Feminine_Ordinal_Indicator  => Special,
187     Left_Angle_Quotation        => Special,
188     Not_Sign                    => Special,
189     Soft_Hyphen                 => Special,
190     Registered_Trade_Mark_Sign  => Special,
191     Macron                      => Special,
192     Degree_Sign                 => Special,
193     Plus_Minus_Sign             => Special,
194     Superscript_Two             => Special,
195     Superscript_Three           => Special,
196     Acute                       => Special,
197     Micro_Sign                  => Special,
198     Pilcrow_Sign                => Special,
199     Middle_Dot                  => Special,
200     Cedilla                     => Special,
201     Superscript_One             => Special,
202     Masculine_Ordinal_Indicator => Special,
203     Right_Angle_Quotation       => Special,
204     Fraction_One_Quarter        => Special,
205     Fraction_One_Half           => Special,
206     Fraction_Three_Quarters     => Special,
207     Inverted_Question           => Special,
208
209     UC_A_Grave                  => Upper,
210     UC_A_Acute                  => Upper,
211     UC_A_Circumflex             => Upper,
212     UC_A_Tilde                  => Upper,
213     UC_A_Diaeresis              => Upper,
214     UC_A_Ring                   => Upper,
215     UC_AE_Diphthong             => Upper + Basic,
216     UC_C_Cedilla                => Upper,
217     UC_E_Grave                  => Upper,
218     UC_E_Acute                  => Upper,
219     UC_E_Circumflex             => Upper,
220     UC_E_Diaeresis              => Upper,
221     UC_I_Grave                  => Upper,
222     UC_I_Acute                  => Upper,
223     UC_I_Circumflex             => Upper,
224     UC_I_Diaeresis              => Upper,
225     UC_Icelandic_Eth            => Upper + Basic,
226     UC_N_Tilde                  => Upper,
227     UC_O_Grave                  => Upper,
228     UC_O_Acute                  => Upper,
229     UC_O_Circumflex             => Upper,
230     UC_O_Tilde                  => Upper,
231     UC_O_Diaeresis              => Upper,
232
233     Multiplication_Sign         => Special,
234
235     UC_O_Oblique_Stroke         => Upper,
236     UC_U_Grave                  => Upper,
237     UC_U_Acute                  => Upper,
238     UC_U_Circumflex             => Upper,
239     UC_U_Diaeresis              => Upper,
240     UC_Y_Acute                  => Upper,
241     UC_Icelandic_Thorn          => Upper + Basic,
242
243     LC_German_Sharp_S           => Lower + Basic,
244     LC_A_Grave                  => Lower,
245     LC_A_Acute                  => Lower,
246     LC_A_Circumflex             => Lower,
247     LC_A_Tilde                  => Lower,
248     LC_A_Diaeresis              => Lower,
249     LC_A_Ring                   => Lower,
250     LC_AE_Diphthong             => Lower + Basic,
251     LC_C_Cedilla                => Lower,
252     LC_E_Grave                  => Lower,
253     LC_E_Acute                  => Lower,
254     LC_E_Circumflex             => Lower,
255     LC_E_Diaeresis              => Lower,
256     LC_I_Grave                  => Lower,
257     LC_I_Acute                  => Lower,
258     LC_I_Circumflex             => Lower,
259     LC_I_Diaeresis              => Lower,
260     LC_Icelandic_Eth            => Lower + Basic,
261     LC_N_Tilde                  => Lower,
262     LC_O_Grave                  => Lower,
263     LC_O_Acute                  => Lower,
264     LC_O_Circumflex             => Lower,
265     LC_O_Tilde                  => Lower,
266     LC_O_Diaeresis              => Lower,
267
268     Division_Sign               => Special,
269
270     LC_O_Oblique_Stroke         => Lower,
271     LC_U_Grave                  => Lower,
272     LC_U_Acute                  => Lower,
273     LC_U_Circumflex             => Lower,
274     LC_U_Diaeresis              => Lower,
275     LC_Y_Acute                  => Lower,
276     LC_Icelandic_Thorn          => Lower + Basic,
277     LC_Y_Diaeresis              => Lower
278   );
279
280   ---------------------
281   -- Is_Alphanumeric --
282   ---------------------
283
284   function Is_Alphanumeric (Item : in Character) return Boolean is
285   begin
286      return (Char_Map (Item) and Alphanum) /= 0;
287   end Is_Alphanumeric;
288
289   --------------
290   -- Is_Basic --
291   --------------
292
293   function Is_Basic (Item : in Character) return Boolean is
294   begin
295      return (Char_Map (Item) and Basic) /= 0;
296   end Is_Basic;
297
298   ------------------
299   -- Is_Character --
300   ------------------
301
302   function Is_Character (Item : in Wide_Character) return Boolean is
303   begin
304      return Wide_Character'Pos (Item) < 256;
305   end Is_Character;
306
307   ----------------
308   -- Is_Control --
309   ----------------
310
311   function Is_Control (Item : in Character) return Boolean is
312   begin
313      return (Char_Map (Item) and Control) /= 0;
314   end Is_Control;
315
316   --------------
317   -- Is_Digit --
318   --------------
319
320   function Is_Digit (Item : in Character) return Boolean is
321   begin
322      return Item in '0' .. '9';
323   end Is_Digit;
324
325   ----------------
326   -- Is_Graphic --
327   ----------------
328
329   function Is_Graphic (Item : in Character) return Boolean is
330   begin
331      return (Char_Map (Item) and Graphic) /= 0;
332   end Is_Graphic;
333
334   --------------------------
335   -- Is_Hexadecimal_Digit --
336   --------------------------
337
338   function Is_Hexadecimal_Digit (Item : in Character) return Boolean is
339   begin
340      return (Char_Map (Item) and Hex_Digit) /= 0;
341   end Is_Hexadecimal_Digit;
342
343   ----------------
344   -- Is_ISO_646 --
345   ----------------
346
347   function Is_ISO_646 (Item : in Character) return Boolean is
348   begin
349      return Item in ISO_646;
350   end Is_ISO_646;
351
352   --  Note: much more efficient coding of the following function is possible
353   --  by testing several 16#80# bits in a complete word in a single operation
354
355   function Is_ISO_646 (Item : in String) return Boolean is
356   begin
357      for J in Item'Range loop
358         if Item (J) not in ISO_646 then
359            return False;
360         end if;
361      end loop;
362
363      return True;
364   end Is_ISO_646;
365
366   ---------------
367   -- Is_Letter --
368   ---------------
369
370   function Is_Letter (Item : in Character) return Boolean is
371   begin
372      return (Char_Map (Item) and Letter) /= 0;
373   end Is_Letter;
374
375   --------------
376   -- Is_Lower --
377   --------------
378
379   function Is_Lower (Item : in Character) return Boolean is
380   begin
381      return (Char_Map (Item) and Lower) /= 0;
382   end Is_Lower;
383
384   ----------------
385   -- Is_Special --
386   ----------------
387
388   function Is_Special (Item : in Character) return Boolean is
389   begin
390      return (Char_Map (Item) and Special) /= 0;
391   end Is_Special;
392
393   ---------------
394   -- Is_String --
395   ---------------
396
397   function Is_String (Item : in Wide_String) return Boolean is
398   begin
399      for J in Item'Range loop
400         if Wide_Character'Pos (Item (J)) >= 256 then
401            return False;
402         end if;
403      end loop;
404
405      return True;
406   end Is_String;
407
408   --------------
409   -- Is_Upper --
410   --------------
411
412   function Is_Upper (Item : in Character) return Boolean is
413   begin
414      return (Char_Map (Item) and Upper) /= 0;
415   end Is_Upper;
416
417   --------------
418   -- To_Basic --
419   --------------
420
421   function To_Basic (Item : in Character) return Character is
422   begin
423      return Value (Basic_Map, Item);
424   end To_Basic;
425
426   function To_Basic (Item : in String) return String is
427      Result : String (1 .. Item'Length);
428
429   begin
430      for J in Item'Range loop
431         Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J));
432      end loop;
433
434      return Result;
435   end To_Basic;
436
437   ------------------
438   -- To_Character --
439   ------------------
440
441   function To_Character
442     (Item       : in Wide_Character;
443      Substitute : in Character := ' ')
444      return       Character
445   is
446   begin
447      if Is_Character (Item) then
448         return Character'Val (Wide_Character'Pos (Item));
449      else
450         return Substitute;
451      end if;
452   end To_Character;
453
454   ----------------
455   -- To_ISO_646 --
456   ----------------
457
458   function To_ISO_646
459     (Item       : in Character;
460      Substitute : in ISO_646 := ' ')
461      return       ISO_646
462   is
463   begin
464      if Item in ISO_646 then
465         return Item;
466      else
467         return Substitute;
468      end if;
469   end To_ISO_646;
470
471   function To_ISO_646
472     (Item       : in String;
473      Substitute : in ISO_646 := ' ')
474      return       String
475   is
476      Result : String (1 .. Item'Length);
477
478   begin
479      for J in Item'Range loop
480         if Item (J) in ISO_646 then
481            Result (J - (Item'First - 1)) := Item (J);
482         else
483            Result (J - (Item'First - 1)) := Substitute;
484         end if;
485      end loop;
486
487      return Result;
488   end To_ISO_646;
489
490   --------------
491   -- To_Lower --
492   --------------
493
494   function To_Lower (Item : in Character) return Character is
495   begin
496      return Value (Lower_Case_Map, Item);
497   end To_Lower;
498
499   function To_Lower (Item : in String) return String is
500      Result : String (1 .. Item'Length);
501
502   begin
503      for J in Item'Range loop
504         Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J));
505      end loop;
506
507      return Result;
508   end To_Lower;
509
510   ---------------
511   -- To_String --
512   ---------------
513
514   function To_String
515     (Item       : in Wide_String;
516      Substitute : in Character := ' ')
517     return        String
518   is
519      Result : String (1 .. Item'Length);
520
521   begin
522      for J in Item'Range loop
523         Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute);
524      end loop;
525      return Result;
526   end To_String;
527
528   --------------
529   -- To_Upper --
530   --------------
531
532   function To_Upper
533     (Item : in Character)
534     return  Character
535   is
536   begin
537      return Value (Upper_Case_Map, Item);
538   end To_Upper;
539
540   function To_Upper
541     (Item : in String)
542      return String
543   is
544      Result : String (1 .. Item'Length);
545
546   begin
547      for J in Item'Range loop
548         Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J));
549      end loop;
550
551      return Result;
552   end To_Upper;
553
554   -----------------------
555   -- To_Wide_Character --
556   -----------------------
557
558   function To_Wide_Character
559     (Item : in Character)
560      return Wide_Character
561   is
562   begin
563      return Wide_Character'Val (Character'Pos (Item));
564   end To_Wide_Character;
565
566   --------------------
567   -- To_Wide_String --
568   --------------------
569
570   function To_Wide_String
571     (Item : in String)
572      return Wide_String
573   is
574      Result : Wide_String (1 .. Item'Length);
575
576   begin
577      for J in Item'Range loop
578         Result (J - (Item'First - 1)) := To_Wide_Character (Item (J));
579      end loop;
580
581      return Result;
582   end To_Wide_String;
583end Ada.Characters.Handling;
584