1------------------------------------------------------------------------------ 2-- -- 3-- GNAT RUN-TIME COMPONENTS -- 4-- -- 5-- A D A . C H A R A C T E R S . H A N D L I N G -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1992-2001 Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 2, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- 17-- for more details. You should have received a copy of the GNU General -- 18-- Public License distributed with GNAT; see file COPYING. If not, write -- 19-- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, -- 20-- MA 02111-1307, USA. -- 21-- -- 22-- As a special exception, if other files instantiate generics from this -- 23-- unit, or you link this unit with other files to produce an executable, -- 24-- this unit does not by itself cause the resulting executable to be -- 25-- covered by the GNU General Public License. This exception does not -- 26-- however invalidate any other reasons why the executable file might be -- 27-- covered by the GNU Public License. -- 28-- -- 29-- GNAT was originally developed by the GNAT team at New York University. -- 30-- Extensive contributions were provided by Ada Core Technologies Inc. -- 31-- -- 32------------------------------------------------------------------------------ 33 34with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; 35with Ada.Strings.Maps; use Ada.Strings.Maps; 36with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants; 37 38package body Ada.Characters.Handling is 39 40 ------------------------------------ 41 -- Character Classification Table -- 42 ------------------------------------ 43 44 type Character_Flags is mod 256; 45 for Character_Flags'Size use 8; 46 47 Control : constant Character_Flags := 1; 48 Lower : constant Character_Flags := 2; 49 Upper : constant Character_Flags := 4; 50 Basic : constant Character_Flags := 8; 51 Hex_Digit : constant Character_Flags := 16; 52 Digit : constant Character_Flags := 32; 53 Special : constant Character_Flags := 64; 54 55 Letter : constant Character_Flags := Lower or Upper; 56 Alphanum : constant Character_Flags := Letter or Digit; 57 Graphic : constant Character_Flags := Alphanum or Special; 58 59 Char_Map : constant array (Character) of Character_Flags := 60 ( 61 NUL => Control, 62 SOH => Control, 63 STX => Control, 64 ETX => Control, 65 EOT => Control, 66 ENQ => Control, 67 ACK => Control, 68 BEL => Control, 69 BS => Control, 70 HT => Control, 71 LF => Control, 72 VT => Control, 73 FF => Control, 74 CR => Control, 75 SO => Control, 76 SI => Control, 77 78 DLE => Control, 79 DC1 => Control, 80 DC2 => Control, 81 DC3 => Control, 82 DC4 => Control, 83 NAK => Control, 84 SYN => Control, 85 ETB => Control, 86 CAN => Control, 87 EM => Control, 88 SUB => Control, 89 ESC => Control, 90 FS => Control, 91 GS => Control, 92 RS => Control, 93 US => Control, 94 95 Space => Special, 96 Exclamation => Special, 97 Quotation => Special, 98 Number_Sign => Special, 99 Dollar_Sign => Special, 100 Percent_Sign => Special, 101 Ampersand => Special, 102 Apostrophe => Special, 103 Left_Parenthesis => Special, 104 Right_Parenthesis => Special, 105 Asterisk => Special, 106 Plus_Sign => Special, 107 Comma => Special, 108 Hyphen => Special, 109 Full_Stop => Special, 110 Solidus => Special, 111 112 '0' .. '9' => Digit + Hex_Digit, 113 114 Colon => Special, 115 Semicolon => Special, 116 Less_Than_Sign => Special, 117 Equals_Sign => Special, 118 Greater_Than_Sign => Special, 119 Question => Special, 120 Commercial_At => Special, 121 122 'A' .. 'F' => Upper + Basic + Hex_Digit, 123 'G' .. 'Z' => Upper + Basic, 124 125 Left_Square_Bracket => Special, 126 Reverse_Solidus => Special, 127 Right_Square_Bracket => Special, 128 Circumflex => Special, 129 Low_Line => Special, 130 Grave => Special, 131 132 'a' .. 'f' => Lower + Basic + Hex_Digit, 133 'g' .. 'z' => Lower + Basic, 134 135 Left_Curly_Bracket => Special, 136 Vertical_Line => Special, 137 Right_Curly_Bracket => Special, 138 Tilde => Special, 139 140 DEL => Control, 141 Reserved_128 => Control, 142 Reserved_129 => Control, 143 BPH => Control, 144 NBH => Control, 145 Reserved_132 => Control, 146 NEL => Control, 147 SSA => Control, 148 ESA => Control, 149 HTS => Control, 150 HTJ => Control, 151 VTS => Control, 152 PLD => Control, 153 PLU => Control, 154 RI => Control, 155 SS2 => Control, 156 SS3 => Control, 157 158 DCS => Control, 159 PU1 => Control, 160 PU2 => Control, 161 STS => Control, 162 CCH => Control, 163 MW => Control, 164 SPA => Control, 165 EPA => Control, 166 167 SOS => Control, 168 Reserved_153 => Control, 169 SCI => Control, 170 CSI => Control, 171 ST => Control, 172 OSC => Control, 173 PM => Control, 174 APC => Control, 175 176 No_Break_Space => Special, 177 Inverted_Exclamation => Special, 178 Cent_Sign => Special, 179 Pound_Sign => Special, 180 Currency_Sign => Special, 181 Yen_Sign => Special, 182 Broken_Bar => Special, 183 Section_Sign => Special, 184 Diaeresis => Special, 185 Copyright_Sign => Special, 186 Feminine_Ordinal_Indicator => Special, 187 Left_Angle_Quotation => Special, 188 Not_Sign => Special, 189 Soft_Hyphen => Special, 190 Registered_Trade_Mark_Sign => Special, 191 Macron => Special, 192 Degree_Sign => Special, 193 Plus_Minus_Sign => Special, 194 Superscript_Two => Special, 195 Superscript_Three => Special, 196 Acute => Special, 197 Micro_Sign => Special, 198 Pilcrow_Sign => Special, 199 Middle_Dot => Special, 200 Cedilla => Special, 201 Superscript_One => Special, 202 Masculine_Ordinal_Indicator => Special, 203 Right_Angle_Quotation => Special, 204 Fraction_One_Quarter => Special, 205 Fraction_One_Half => Special, 206 Fraction_Three_Quarters => Special, 207 Inverted_Question => Special, 208 209 UC_A_Grave => Upper, 210 UC_A_Acute => Upper, 211 UC_A_Circumflex => Upper, 212 UC_A_Tilde => Upper, 213 UC_A_Diaeresis => Upper, 214 UC_A_Ring => Upper, 215 UC_AE_Diphthong => Upper + Basic, 216 UC_C_Cedilla => Upper, 217 UC_E_Grave => Upper, 218 UC_E_Acute => Upper, 219 UC_E_Circumflex => Upper, 220 UC_E_Diaeresis => Upper, 221 UC_I_Grave => Upper, 222 UC_I_Acute => Upper, 223 UC_I_Circumflex => Upper, 224 UC_I_Diaeresis => Upper, 225 UC_Icelandic_Eth => Upper + Basic, 226 UC_N_Tilde => Upper, 227 UC_O_Grave => Upper, 228 UC_O_Acute => Upper, 229 UC_O_Circumflex => Upper, 230 UC_O_Tilde => Upper, 231 UC_O_Diaeresis => Upper, 232 233 Multiplication_Sign => Special, 234 235 UC_O_Oblique_Stroke => Upper, 236 UC_U_Grave => Upper, 237 UC_U_Acute => Upper, 238 UC_U_Circumflex => Upper, 239 UC_U_Diaeresis => Upper, 240 UC_Y_Acute => Upper, 241 UC_Icelandic_Thorn => Upper + Basic, 242 243 LC_German_Sharp_S => Lower + Basic, 244 LC_A_Grave => Lower, 245 LC_A_Acute => Lower, 246 LC_A_Circumflex => Lower, 247 LC_A_Tilde => Lower, 248 LC_A_Diaeresis => Lower, 249 LC_A_Ring => Lower, 250 LC_AE_Diphthong => Lower + Basic, 251 LC_C_Cedilla => Lower, 252 LC_E_Grave => Lower, 253 LC_E_Acute => Lower, 254 LC_E_Circumflex => Lower, 255 LC_E_Diaeresis => Lower, 256 LC_I_Grave => Lower, 257 LC_I_Acute => Lower, 258 LC_I_Circumflex => Lower, 259 LC_I_Diaeresis => Lower, 260 LC_Icelandic_Eth => Lower + Basic, 261 LC_N_Tilde => Lower, 262 LC_O_Grave => Lower, 263 LC_O_Acute => Lower, 264 LC_O_Circumflex => Lower, 265 LC_O_Tilde => Lower, 266 LC_O_Diaeresis => Lower, 267 268 Division_Sign => Special, 269 270 LC_O_Oblique_Stroke => Lower, 271 LC_U_Grave => Lower, 272 LC_U_Acute => Lower, 273 LC_U_Circumflex => Lower, 274 LC_U_Diaeresis => Lower, 275 LC_Y_Acute => Lower, 276 LC_Icelandic_Thorn => Lower + Basic, 277 LC_Y_Diaeresis => Lower 278 ); 279 280 --------------------- 281 -- Is_Alphanumeric -- 282 --------------------- 283 284 function Is_Alphanumeric (Item : in Character) return Boolean is 285 begin 286 return (Char_Map (Item) and Alphanum) /= 0; 287 end Is_Alphanumeric; 288 289 -------------- 290 -- Is_Basic -- 291 -------------- 292 293 function Is_Basic (Item : in Character) return Boolean is 294 begin 295 return (Char_Map (Item) and Basic) /= 0; 296 end Is_Basic; 297 298 ------------------ 299 -- Is_Character -- 300 ------------------ 301 302 function Is_Character (Item : in Wide_Character) return Boolean is 303 begin 304 return Wide_Character'Pos (Item) < 256; 305 end Is_Character; 306 307 ---------------- 308 -- Is_Control -- 309 ---------------- 310 311 function Is_Control (Item : in Character) return Boolean is 312 begin 313 return (Char_Map (Item) and Control) /= 0; 314 end Is_Control; 315 316 -------------- 317 -- Is_Digit -- 318 -------------- 319 320 function Is_Digit (Item : in Character) return Boolean is 321 begin 322 return Item in '0' .. '9'; 323 end Is_Digit; 324 325 ---------------- 326 -- Is_Graphic -- 327 ---------------- 328 329 function Is_Graphic (Item : in Character) return Boolean is 330 begin 331 return (Char_Map (Item) and Graphic) /= 0; 332 end Is_Graphic; 333 334 -------------------------- 335 -- Is_Hexadecimal_Digit -- 336 -------------------------- 337 338 function Is_Hexadecimal_Digit (Item : in Character) return Boolean is 339 begin 340 return (Char_Map (Item) and Hex_Digit) /= 0; 341 end Is_Hexadecimal_Digit; 342 343 ---------------- 344 -- Is_ISO_646 -- 345 ---------------- 346 347 function Is_ISO_646 (Item : in Character) return Boolean is 348 begin 349 return Item in ISO_646; 350 end Is_ISO_646; 351 352 -- Note: much more efficient coding of the following function is possible 353 -- by testing several 16#80# bits in a complete word in a single operation 354 355 function Is_ISO_646 (Item : in String) return Boolean is 356 begin 357 for J in Item'Range loop 358 if Item (J) not in ISO_646 then 359 return False; 360 end if; 361 end loop; 362 363 return True; 364 end Is_ISO_646; 365 366 --------------- 367 -- Is_Letter -- 368 --------------- 369 370 function Is_Letter (Item : in Character) return Boolean is 371 begin 372 return (Char_Map (Item) and Letter) /= 0; 373 end Is_Letter; 374 375 -------------- 376 -- Is_Lower -- 377 -------------- 378 379 function Is_Lower (Item : in Character) return Boolean is 380 begin 381 return (Char_Map (Item) and Lower) /= 0; 382 end Is_Lower; 383 384 ---------------- 385 -- Is_Special -- 386 ---------------- 387 388 function Is_Special (Item : in Character) return Boolean is 389 begin 390 return (Char_Map (Item) and Special) /= 0; 391 end Is_Special; 392 393 --------------- 394 -- Is_String -- 395 --------------- 396 397 function Is_String (Item : in Wide_String) return Boolean is 398 begin 399 for J in Item'Range loop 400 if Wide_Character'Pos (Item (J)) >= 256 then 401 return False; 402 end if; 403 end loop; 404 405 return True; 406 end Is_String; 407 408 -------------- 409 -- Is_Upper -- 410 -------------- 411 412 function Is_Upper (Item : in Character) return Boolean is 413 begin 414 return (Char_Map (Item) and Upper) /= 0; 415 end Is_Upper; 416 417 -------------- 418 -- To_Basic -- 419 -------------- 420 421 function To_Basic (Item : in Character) return Character is 422 begin 423 return Value (Basic_Map, Item); 424 end To_Basic; 425 426 function To_Basic (Item : in String) return String is 427 Result : String (1 .. Item'Length); 428 429 begin 430 for J in Item'Range loop 431 Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J)); 432 end loop; 433 434 return Result; 435 end To_Basic; 436 437 ------------------ 438 -- To_Character -- 439 ------------------ 440 441 function To_Character 442 (Item : in Wide_Character; 443 Substitute : in Character := ' ') 444 return Character 445 is 446 begin 447 if Is_Character (Item) then 448 return Character'Val (Wide_Character'Pos (Item)); 449 else 450 return Substitute; 451 end if; 452 end To_Character; 453 454 ---------------- 455 -- To_ISO_646 -- 456 ---------------- 457 458 function To_ISO_646 459 (Item : in Character; 460 Substitute : in ISO_646 := ' ') 461 return ISO_646 462 is 463 begin 464 if Item in ISO_646 then 465 return Item; 466 else 467 return Substitute; 468 end if; 469 end To_ISO_646; 470 471 function To_ISO_646 472 (Item : in String; 473 Substitute : in ISO_646 := ' ') 474 return String 475 is 476 Result : String (1 .. Item'Length); 477 478 begin 479 for J in Item'Range loop 480 if Item (J) in ISO_646 then 481 Result (J - (Item'First - 1)) := Item (J); 482 else 483 Result (J - (Item'First - 1)) := Substitute; 484 end if; 485 end loop; 486 487 return Result; 488 end To_ISO_646; 489 490 -------------- 491 -- To_Lower -- 492 -------------- 493 494 function To_Lower (Item : in Character) return Character is 495 begin 496 return Value (Lower_Case_Map, Item); 497 end To_Lower; 498 499 function To_Lower (Item : in String) return String is 500 Result : String (1 .. Item'Length); 501 502 begin 503 for J in Item'Range loop 504 Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J)); 505 end loop; 506 507 return Result; 508 end To_Lower; 509 510 --------------- 511 -- To_String -- 512 --------------- 513 514 function To_String 515 (Item : in Wide_String; 516 Substitute : in Character := ' ') 517 return String 518 is 519 Result : String (1 .. Item'Length); 520 521 begin 522 for J in Item'Range loop 523 Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute); 524 end loop; 525 return Result; 526 end To_String; 527 528 -------------- 529 -- To_Upper -- 530 -------------- 531 532 function To_Upper 533 (Item : in Character) 534 return Character 535 is 536 begin 537 return Value (Upper_Case_Map, Item); 538 end To_Upper; 539 540 function To_Upper 541 (Item : in String) 542 return String 543 is 544 Result : String (1 .. Item'Length); 545 546 begin 547 for J in Item'Range loop 548 Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J)); 549 end loop; 550 551 return Result; 552 end To_Upper; 553 554 ----------------------- 555 -- To_Wide_Character -- 556 ----------------------- 557 558 function To_Wide_Character 559 (Item : in Character) 560 return Wide_Character 561 is 562 begin 563 return Wide_Character'Val (Character'Pos (Item)); 564 end To_Wide_Character; 565 566 -------------------- 567 -- To_Wide_String -- 568 -------------------- 569 570 function To_Wide_String 571 (Item : in String) 572 return Wide_String 573 is 574 Result : Wide_String (1 .. Item'Length); 575 576 begin 577 for J in Item'Range loop 578 Result (J - (Item'First - 1)) := To_Wide_Character (Item (J)); 579 end loop; 580 581 return Result; 582 end To_Wide_String; 583end Ada.Characters.Handling; 584