1------------------------------------------------------------------------------ 2-- -- 3-- GNAT RUN-TIME COMPONENTS -- 4-- -- 5-- G N A T . D E C O D E _ S T R I N G -- 6-- -- 7-- S p e c -- 8-- -- 9-- Copyright (C) 2007-2010, AdaCore -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. -- 17-- -- 18-- As a special exception under Section 7 of GPL version 3, you are granted -- 19-- additional permissions described in the GCC Runtime Library Exception, -- 20-- version 3.1, as published by the Free Software Foundation. -- 21-- -- 22-- You should have received a copy of the GNU General Public License and -- 23-- a copy of the GCC Runtime Library Exception along with this program; -- 24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 25-- <http://www.gnu.org/licenses/>. -- 26-- -- 27-- GNAT was originally developed by the GNAT team at New York University. -- 28-- Extensive contributions were provided by Ada Core Technologies Inc. -- 29-- -- 30------------------------------------------------------------------------------ 31 32-- This package provides a utility routine for converting from an encoded 33-- string to a corresponding Wide_String or Wide_Wide_String value. 34 35with Interfaces; use Interfaces; 36 37with System.WCh_Cnv; use System.WCh_Cnv; 38with System.WCh_Con; use System.WCh_Con; 39 40package body GNAT.Decode_String is 41 42 ----------------------- 43 -- Local Subprograms -- 44 ----------------------- 45 46 procedure Bad; 47 pragma No_Return (Bad); 48 -- Raise error for bad encoding 49 50 procedure Past_End; 51 pragma No_Return (Past_End); 52 -- Raise error for off end of string 53 54 --------- 55 -- Bad -- 56 --------- 57 58 procedure Bad is 59 begin 60 raise Constraint_Error with 61 "bad encoding or character out of range"; 62 end Bad; 63 64 --------------------------- 65 -- Decode_Wide_Character -- 66 --------------------------- 67 68 procedure Decode_Wide_Character 69 (Input : String; 70 Ptr : in out Natural; 71 Result : out Wide_Character) 72 is 73 Char : Wide_Wide_Character; 74 begin 75 Decode_Wide_Wide_Character (Input, Ptr, Char); 76 77 if Wide_Wide_Character'Pos (Char) > 16#FFFF# then 78 Bad; 79 else 80 Result := Wide_Character'Val (Wide_Wide_Character'Pos (Char)); 81 end if; 82 end Decode_Wide_Character; 83 84 ------------------------ 85 -- Decode_Wide_String -- 86 ------------------------ 87 88 function Decode_Wide_String (S : String) return Wide_String is 89 Result : Wide_String (1 .. S'Length); 90 Length : Natural; 91 begin 92 Decode_Wide_String (S, Result, Length); 93 return Result (1 .. Length); 94 end Decode_Wide_String; 95 96 procedure Decode_Wide_String 97 (S : String; 98 Result : out Wide_String; 99 Length : out Natural) 100 is 101 Ptr : Natural; 102 103 begin 104 Ptr := S'First; 105 Length := 0; 106 while Ptr <= S'Last loop 107 if Length >= Result'Last then 108 Past_End; 109 end if; 110 111 Length := Length + 1; 112 Decode_Wide_Character (S, Ptr, Result (Length)); 113 end loop; 114 end Decode_Wide_String; 115 116 -------------------------------- 117 -- Decode_Wide_Wide_Character -- 118 -------------------------------- 119 120 procedure Decode_Wide_Wide_Character 121 (Input : String; 122 Ptr : in out Natural; 123 Result : out Wide_Wide_Character) 124 is 125 C : Character; 126 127 function In_Char return Character; 128 pragma Inline (In_Char); 129 -- Function to get one input character 130 131 ------------- 132 -- In_Char -- 133 ------------- 134 135 function In_Char return Character is 136 begin 137 if Ptr <= Input'Last then 138 Ptr := Ptr + 1; 139 return Input (Ptr - 1); 140 else 141 Past_End; 142 end if; 143 end In_Char; 144 145 -- Start of processing for Decode_Wide_Wide_Character 146 147 begin 148 C := In_Char; 149 150 -- Special fast processing for UTF-8 case 151 152 if Encoding_Method = WCEM_UTF8 then 153 UTF8 : declare 154 U : Unsigned_32; 155 W : Unsigned_32; 156 157 procedure Get_UTF_Byte; 158 pragma Inline (Get_UTF_Byte); 159 -- Used to interpret 2#10xxxxxx# continuation byte in UTF-8 mode. 160 -- Reads a byte, and raises CE if the first two bits are not 10. 161 -- Otherwise shifts W 6 bits left and or's in the 6 xxxxxx bits. 162 163 ------------------ 164 -- Get_UTF_Byte -- 165 ------------------ 166 167 procedure Get_UTF_Byte is 168 begin 169 U := Unsigned_32 (Character'Pos (In_Char)); 170 171 if (U and 2#11000000#) /= 2#10_000000# then 172 Bad; 173 end if; 174 175 W := Shift_Left (W, 6) or (U and 2#00111111#); 176 end Get_UTF_Byte; 177 178 -- Start of processing for UTF8 case 179 180 begin 181 -- Note: for details of UTF8 encoding see RFC 3629 182 183 U := Unsigned_32 (Character'Pos (C)); 184 185 -- 16#00_0000#-16#00_007F#: 0xxxxxxx 186 187 if (U and 2#10000000#) = 2#00000000# then 188 Result := Wide_Wide_Character'Val (Character'Pos (C)); 189 190 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx 191 192 elsif (U and 2#11100000#) = 2#110_00000# then 193 W := U and 2#00011111#; 194 Get_UTF_Byte; 195 Result := Wide_Wide_Character'Val (W); 196 197 -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx 198 199 elsif (U and 2#11110000#) = 2#1110_0000# then 200 W := U and 2#00001111#; 201 Get_UTF_Byte; 202 Get_UTF_Byte; 203 Result := Wide_Wide_Character'Val (W); 204 205 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 206 207 elsif (U and 2#11111000#) = 2#11110_000# then 208 W := U and 2#00000111#; 209 210 for K in 1 .. 3 loop 211 Get_UTF_Byte; 212 end loop; 213 214 Result := Wide_Wide_Character'Val (W); 215 216 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx 217 -- 10xxxxxx 10xxxxxx 218 219 elsif (U and 2#11111100#) = 2#111110_00# then 220 W := U and 2#00000011#; 221 222 for K in 1 .. 4 loop 223 Get_UTF_Byte; 224 end loop; 225 226 Result := Wide_Wide_Character'Val (W); 227 228 -- All other cases are invalid, note that this includes: 229 230 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx 231 -- 10xxxxxx 10xxxxxx 10xxxxxx 232 233 -- since Wide_Wide_Character does not include code values 234 -- greater than 16#03FF_FFFF#. 235 236 else 237 Bad; 238 end if; 239 end UTF8; 240 241 -- All encoding functions other than UTF-8 242 243 else 244 Non_UTF8 : declare 245 function Char_Sequence_To_UTF is 246 new Char_Sequence_To_UTF_32 (In_Char); 247 248 begin 249 -- For brackets, must test for specific case of [ not followed by 250 -- quotation, where we must not call Char_Sequence_To_UTF, but 251 -- instead just return the bracket unchanged. 252 253 if Encoding_Method = WCEM_Brackets 254 and then C = '[' 255 and then (Ptr > Input'Last or else Input (Ptr) /= '"') 256 then 257 Result := '['; 258 259 -- All other cases including [" with Brackets 260 261 else 262 Result := 263 Wide_Wide_Character'Val 264 (Char_Sequence_To_UTF (C, Encoding_Method)); 265 end if; 266 end Non_UTF8; 267 end if; 268 end Decode_Wide_Wide_Character; 269 270 ----------------------------- 271 -- Decode_Wide_Wide_String -- 272 ----------------------------- 273 274 function Decode_Wide_Wide_String (S : String) return Wide_Wide_String is 275 Result : Wide_Wide_String (1 .. S'Length); 276 Length : Natural; 277 begin 278 Decode_Wide_Wide_String (S, Result, Length); 279 return Result (1 .. Length); 280 end Decode_Wide_Wide_String; 281 282 procedure Decode_Wide_Wide_String 283 (S : String; 284 Result : out Wide_Wide_String; 285 Length : out Natural) 286 is 287 Ptr : Natural; 288 289 begin 290 Ptr := S'First; 291 Length := 0; 292 while Ptr <= S'Last loop 293 if Length >= Result'Last then 294 Past_End; 295 end if; 296 297 Length := Length + 1; 298 Decode_Wide_Wide_Character (S, Ptr, Result (Length)); 299 end loop; 300 end Decode_Wide_Wide_String; 301 302 ------------------------- 303 -- Next_Wide_Character -- 304 ------------------------- 305 306 procedure Next_Wide_Character (Input : String; Ptr : in out Natural) is 307 begin 308 if Ptr < Input'First then 309 Past_End; 310 end if; 311 312 -- Special efficient encoding for UTF-8 case 313 314 if Encoding_Method = WCEM_UTF8 then 315 UTF8 : declare 316 U : Unsigned_32; 317 318 procedure Getc; 319 pragma Inline (Getc); 320 -- Gets the character at Input (Ptr) and returns code in U as 321 -- Unsigned_32 value. On return Ptr is bumped past the character. 322 323 procedure Skip_UTF_Byte; 324 pragma Inline (Skip_UTF_Byte); 325 -- Skips past one encoded byte which must be 2#10xxxxxx# 326 327 ---------- 328 -- Getc -- 329 ---------- 330 331 procedure Getc is 332 begin 333 if Ptr > Input'Last then 334 Past_End; 335 else 336 U := Unsigned_32 (Character'Pos (Input (Ptr))); 337 Ptr := Ptr + 1; 338 end if; 339 end Getc; 340 341 ------------------- 342 -- Skip_UTF_Byte -- 343 ------------------- 344 345 procedure Skip_UTF_Byte is 346 begin 347 Getc; 348 349 if (U and 2#11000000#) /= 2#10_000000# then 350 Bad; 351 end if; 352 end Skip_UTF_Byte; 353 354 -- Start of processing for UTF-8 case 355 356 begin 357 -- 16#00_0000#-16#00_007F#: 0xxxxxxx 358 359 Getc; 360 361 if (U and 2#10000000#) = 2#00000000# then 362 return; 363 364 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx 365 366 elsif (U and 2#11100000#) = 2#110_00000# then 367 Skip_UTF_Byte; 368 369 -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx 370 371 elsif (U and 2#11110000#) = 2#1110_0000# then 372 Skip_UTF_Byte; 373 Skip_UTF_Byte; 374 375 -- Any other code is invalid, note that this includes: 376 377 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 378 379 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx 380 -- 10xxxxxx 10xxxxxx 381 382 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx 383 -- 10xxxxxx 10xxxxxx 10xxxxxx 384 385 -- since Wide_Character does not allow codes > 16#FFFF# 386 387 else 388 Bad; 389 end if; 390 end UTF8; 391 392 -- Non-UTF-8 case 393 394 else 395 declare 396 Discard : Wide_Character; 397 begin 398 Decode_Wide_Character (Input, Ptr, Discard); 399 end; 400 end if; 401 end Next_Wide_Character; 402 403 ------------------------------ 404 -- Next_Wide_Wide_Character -- 405 ------------------------------ 406 407 procedure Next_Wide_Wide_Character (Input : String; Ptr : in out Natural) is 408 begin 409 -- Special efficient encoding for UTF-8 case 410 411 if Encoding_Method = WCEM_UTF8 then 412 UTF8 : declare 413 U : Unsigned_32; 414 415 procedure Getc; 416 pragma Inline (Getc); 417 -- Gets the character at Input (Ptr) and returns code in U as 418 -- Unsigned_32 value. On return Ptr is bumped past the character. 419 420 procedure Skip_UTF_Byte; 421 pragma Inline (Skip_UTF_Byte); 422 -- Skips past one encoded byte which must be 2#10xxxxxx# 423 424 ---------- 425 -- Getc -- 426 ---------- 427 428 procedure Getc is 429 begin 430 if Ptr > Input'Last then 431 Past_End; 432 else 433 U := Unsigned_32 (Character'Pos (Input (Ptr))); 434 Ptr := Ptr + 1; 435 end if; 436 end Getc; 437 438 ------------------- 439 -- Skip_UTF_Byte -- 440 ------------------- 441 442 procedure Skip_UTF_Byte is 443 begin 444 Getc; 445 446 if (U and 2#11000000#) /= 2#10_000000# then 447 Bad; 448 end if; 449 end Skip_UTF_Byte; 450 451 -- Start of processing for UTF-8 case 452 453 begin 454 if Ptr < Input'First then 455 Past_End; 456 end if; 457 458 -- 16#00_0000#-16#00_007F#: 0xxxxxxx 459 460 Getc; 461 462 if (U and 2#10000000#) = 2#00000000# then 463 null; 464 465 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx 466 467 elsif (U and 2#11100000#) = 2#110_00000# then 468 Skip_UTF_Byte; 469 470 -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx 471 472 elsif (U and 2#11110000#) = 2#1110_0000# then 473 Skip_UTF_Byte; 474 Skip_UTF_Byte; 475 476 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 477 478 elsif (U and 2#11111000#) = 2#11110_000# then 479 for K in 1 .. 3 loop 480 Skip_UTF_Byte; 481 end loop; 482 483 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx 484 -- 10xxxxxx 10xxxxxx 485 486 elsif (U and 2#11111100#) = 2#111110_00# then 487 for K in 1 .. 4 loop 488 Skip_UTF_Byte; 489 end loop; 490 491 -- Any other code is invalid, note that this includes: 492 493 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx 494 -- 10xxxxxx 10xxxxxx 10xxxxxx 495 496 -- since Wide_Wide_Character does not allow codes > 16#03FF_FFFF# 497 498 else 499 Bad; 500 end if; 501 end UTF8; 502 503 -- Non-UTF-8 case 504 505 else 506 declare 507 Discard : Wide_Wide_Character; 508 begin 509 Decode_Wide_Wide_Character (Input, Ptr, Discard); 510 end; 511 end if; 512 end Next_Wide_Wide_Character; 513 514 -------------- 515 -- Past_End -- 516 -------------- 517 518 procedure Past_End is 519 begin 520 raise Constraint_Error with "past end of string"; 521 end Past_End; 522 523 ------------------------- 524 -- Prev_Wide_Character -- 525 ------------------------- 526 527 procedure Prev_Wide_Character (Input : String; Ptr : in out Natural) is 528 begin 529 if Ptr > Input'Last + 1 then 530 Past_End; 531 end if; 532 533 -- Special efficient encoding for UTF-8 case 534 535 if Encoding_Method = WCEM_UTF8 then 536 UTF8 : declare 537 U : Unsigned_32; 538 539 procedure Getc; 540 pragma Inline (Getc); 541 -- Gets the character at Input (Ptr - 1) and returns code in U as 542 -- Unsigned_32 value. On return Ptr is decremented by one. 543 544 procedure Skip_UTF_Byte; 545 pragma Inline (Skip_UTF_Byte); 546 -- Checks that U is 2#10xxxxxx# and then calls Get 547 548 ---------- 549 -- Getc -- 550 ---------- 551 552 procedure Getc is 553 begin 554 if Ptr <= Input'First then 555 Past_End; 556 else 557 Ptr := Ptr - 1; 558 U := Unsigned_32 (Character'Pos (Input (Ptr))); 559 end if; 560 end Getc; 561 562 ------------------- 563 -- Skip_UTF_Byte -- 564 ------------------- 565 566 procedure Skip_UTF_Byte is 567 begin 568 if (U and 2#11000000#) = 2#10_000000# then 569 Getc; 570 else 571 Bad; 572 end if; 573 end Skip_UTF_Byte; 574 575 -- Start of processing for UTF-8 case 576 577 begin 578 -- 16#00_0000#-16#00_007F#: 0xxxxxxx 579 580 Getc; 581 582 if (U and 2#10000000#) = 2#00000000# then 583 return; 584 585 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx 586 587 else 588 Skip_UTF_Byte; 589 590 if (U and 2#11100000#) = 2#110_00000# then 591 return; 592 593 -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx 594 595 else 596 Skip_UTF_Byte; 597 598 if (U and 2#11110000#) = 2#1110_0000# then 599 return; 600 601 -- Any other code is invalid, note that this includes: 602 603 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 604 -- 10xxxxxx 605 606 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 607 -- 10xxxxxx 10xxxxxx 608 -- 10xxxxxx 609 610 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 611 -- 10xxxxxx 10xxxxxx 612 -- 10xxxxxx 10xxxxxx 613 614 -- since Wide_Character does not allow codes > 16#FFFF# 615 616 else 617 Bad; 618 end if; 619 end if; 620 end if; 621 end UTF8; 622 623 -- Special efficient encoding for brackets case 624 625 elsif Encoding_Method = WCEM_Brackets then 626 Brackets : declare 627 P : Natural; 628 S : Natural; 629 630 begin 631 -- See if we have "] at end positions 632 633 if Ptr > Input'First + 1 634 and then Input (Ptr - 1) = ']' 635 and then Input (Ptr - 2) = '"' 636 then 637 P := Ptr - 2; 638 639 -- Loop back looking for [" at start 640 641 while P >= Ptr - 10 loop 642 if P <= Input'First + 1 then 643 Bad; 644 645 elsif Input (P - 1) = '"' 646 and then Input (P - 2) = '[' 647 then 648 -- Found ["..."], scan forward to check it 649 650 S := P - 2; 651 P := S; 652 Next_Wide_Character (Input, P); 653 654 -- OK if at original pointer, else error 655 656 if P = Ptr then 657 Ptr := S; 658 return; 659 else 660 Bad; 661 end if; 662 end if; 663 664 P := P - 1; 665 end loop; 666 667 -- Falling through loop means more than 8 chars between the 668 -- enclosing brackets (or simply a missing left bracket) 669 670 Bad; 671 672 -- Here if no bracket sequence present 673 674 else 675 if Ptr = Input'First then 676 Past_End; 677 else 678 Ptr := Ptr - 1; 679 end if; 680 end if; 681 end Brackets; 682 683 -- Non-UTF-8/Brackets. These are the inefficient cases where we have to 684 -- go to the start of the string and skip forwards till Ptr matches. 685 686 else 687 Non_UTF_Brackets : declare 688 Discard : Wide_Character; 689 PtrS : Natural; 690 PtrP : Natural; 691 692 begin 693 PtrS := Input'First; 694 695 if Ptr <= PtrS then 696 Past_End; 697 end if; 698 699 loop 700 PtrP := PtrS; 701 Decode_Wide_Character (Input, PtrS, Discard); 702 703 if PtrS = Ptr then 704 Ptr := PtrP; 705 return; 706 707 elsif PtrS > Ptr then 708 Bad; 709 end if; 710 end loop; 711 712 exception 713 when Constraint_Error => 714 Bad; 715 end Non_UTF_Brackets; 716 end if; 717 end Prev_Wide_Character; 718 719 ------------------------------ 720 -- Prev_Wide_Wide_Character -- 721 ------------------------------ 722 723 procedure Prev_Wide_Wide_Character (Input : String; Ptr : in out Natural) is 724 begin 725 if Ptr > Input'Last + 1 then 726 Past_End; 727 end if; 728 729 -- Special efficient encoding for UTF-8 case 730 731 if Encoding_Method = WCEM_UTF8 then 732 UTF8 : declare 733 U : Unsigned_32; 734 735 procedure Getc; 736 pragma Inline (Getc); 737 -- Gets the character at Input (Ptr - 1) and returns code in U as 738 -- Unsigned_32 value. On return Ptr is decremented by one. 739 740 procedure Skip_UTF_Byte; 741 pragma Inline (Skip_UTF_Byte); 742 -- Checks that U is 2#10xxxxxx# and then calls Get 743 744 ---------- 745 -- Getc -- 746 ---------- 747 748 procedure Getc is 749 begin 750 if Ptr <= Input'First then 751 Past_End; 752 else 753 Ptr := Ptr - 1; 754 U := Unsigned_32 (Character'Pos (Input (Ptr))); 755 end if; 756 end Getc; 757 758 ------------------- 759 -- Skip_UTF_Byte -- 760 ------------------- 761 762 procedure Skip_UTF_Byte is 763 begin 764 if (U and 2#11000000#) = 2#10_000000# then 765 Getc; 766 else 767 Bad; 768 end if; 769 end Skip_UTF_Byte; 770 771 -- Start of processing for UTF-8 case 772 773 begin 774 -- 16#00_0000#-16#00_007F#: 0xxxxxxx 775 776 Getc; 777 778 if (U and 2#10000000#) = 2#00000000# then 779 return; 780 781 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx 782 783 else 784 Skip_UTF_Byte; 785 786 if (U and 2#11100000#) = 2#110_00000# then 787 return; 788 789 -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx 790 791 else 792 Skip_UTF_Byte; 793 794 if (U and 2#11110000#) = 2#1110_0000# then 795 return; 796 797 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 798 -- 10xxxxxx 799 800 else 801 Skip_UTF_Byte; 802 803 if (U and 2#11111000#) = 2#11110_000# then 804 return; 805 806 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 807 -- 10xxxxxx 10xxxxxx 808 -- 10xxxxxx 809 810 else 811 Skip_UTF_Byte; 812 813 if (U and 2#11111100#) = 2#111110_00# then 814 return; 815 816 -- Any other code is invalid, note that this includes: 817 818 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 819 -- 10xxxxxx 10xxxxxx 820 -- 10xxxxxx 10xxxxxx 821 822 -- since Wide_Wide_Character does not allow codes 823 -- greater than 16#03FF_FFFF# 824 825 else 826 Bad; 827 end if; 828 end if; 829 end if; 830 end if; 831 end if; 832 end UTF8; 833 834 -- Special efficient encoding for brackets case 835 836 elsif Encoding_Method = WCEM_Brackets then 837 Brackets : declare 838 P : Natural; 839 S : Natural; 840 841 begin 842 -- See if we have "] at end positions 843 844 if Ptr > Input'First + 1 845 and then Input (Ptr - 1) = ']' 846 and then Input (Ptr - 2) = '"' 847 then 848 P := Ptr - 2; 849 850 -- Loop back looking for [" at start 851 852 while P >= Ptr - 10 loop 853 if P <= Input'First + 1 then 854 Bad; 855 856 elsif Input (P - 1) = '"' 857 and then Input (P - 2) = '[' 858 then 859 -- Found ["..."], scan forward to check it 860 861 S := P - 2; 862 P := S; 863 Next_Wide_Wide_Character (Input, P); 864 865 -- OK if at original pointer, else error 866 867 if P = Ptr then 868 Ptr := S; 869 return; 870 else 871 Bad; 872 end if; 873 end if; 874 875 P := P - 1; 876 end loop; 877 878 -- Falling through loop means more than 8 chars between the 879 -- enclosing brackets (or simply a missing left bracket) 880 881 Bad; 882 883 -- Here if no bracket sequence present 884 885 else 886 if Ptr = Input'First then 887 Past_End; 888 else 889 Ptr := Ptr - 1; 890 end if; 891 end if; 892 end Brackets; 893 894 -- Non-UTF-8/Brackets. These are the inefficient cases where we have to 895 -- go to the start of the string and skip forwards till Ptr matches. 896 897 else 898 Non_UTF8_Brackets : declare 899 Discard : Wide_Wide_Character; 900 PtrS : Natural; 901 PtrP : Natural; 902 903 begin 904 PtrS := Input'First; 905 906 if Ptr <= PtrS then 907 Past_End; 908 end if; 909 910 loop 911 PtrP := PtrS; 912 Decode_Wide_Wide_Character (Input, PtrS, Discard); 913 914 if PtrS = Ptr then 915 Ptr := PtrP; 916 return; 917 918 elsif PtrS > Ptr then 919 Bad; 920 end if; 921 end loop; 922 923 exception 924 when Constraint_Error => 925 Bad; 926 end Non_UTF8_Brackets; 927 end if; 928 end Prev_Wide_Wide_Character; 929 930 -------------------------- 931 -- Validate_Wide_String -- 932 -------------------------- 933 934 function Validate_Wide_String (S : String) return Boolean is 935 Ptr : Natural; 936 937 begin 938 Ptr := S'First; 939 while Ptr <= S'Last loop 940 Next_Wide_Character (S, Ptr); 941 end loop; 942 943 return True; 944 945 exception 946 when Constraint_Error => 947 return False; 948 end Validate_Wide_String; 949 950 ------------------------------- 951 -- Validate_Wide_Wide_String -- 952 ------------------------------- 953 954 function Validate_Wide_Wide_String (S : String) return Boolean is 955 Ptr : Natural; 956 957 begin 958 Ptr := S'First; 959 while Ptr <= S'Last loop 960 Next_Wide_Wide_Character (S, Ptr); 961 end loop; 962 963 return True; 964 965 exception 966 when Constraint_Error => 967 return False; 968 end Validate_Wide_Wide_String; 969 970end GNAT.Decode_String; 971