1------------------------------------------------------------------------------ 2-- -- 3-- GNAT2XML COMPONENTS -- 4-- -- 5-- G N A T 2 X M L . B U F F E R S -- 6-- -- 7-- S p e c -- 8-- -- 9-- Copyright (C) 2013-2016, AdaCore -- 10-- -- 11-- Gnat2xml is free software; you can redistribute it and/or modify it -- 12-- under terms of the GNU General Public License as published by the Free -- 13-- Software Foundation; either version 2, or (at your option) any later -- 14-- version. Gnat2xml is distributed in the hope that it will be useful, -- 15-- but WITHOUT ANY WARRANTY; without even the implied warranty of MER- -- 16-- CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -- 17-- Public License for more details. You should have received a copy of the -- 18-- GNU General Public License distributed with GNAT; see file COPYING. If -- 19-- not, write to the Free Software Foundation, 59 Temple Place Suite 330, -- 20-- Boston, MA 02111-1307, USA. -- 21-- The gnat2xml tool was derived from the Avatox sources. -- 22------------------------------------------------------------------------------ 23 24pragma Ada_2012; 25 26with ASIS_UL.Vectors; 27with ASIS_UL.Char_Vectors; use ASIS_UL.Char_Vectors; 28use ASIS_UL.Char_Vectors.Char_Vectors; 29-- use all type ASIS_UL.Char_Vectors.Char_Vector; 30 31private with Ada.Finalization; 32 33package Ada_Trees.Buffers is 34 35 -- This package supports a character Buffer type, and a Marker type that 36 -- can be used to point at particular characters. The main feature of 37 -- this abstraction is that Markers are automatically kept up to date 38 -- as modifications are made. For example, suppose the buffer Buf contains: 39 -- 40 -- "Hello, world." 41 -- 123456789 123 42 -- ^ ^ 43 -- and we have two Markers, one pointing at Buf(6) = ',' and the other 44 -- pointing at Buf(10) = 'r'. Suppose the insertion point is at Buf(8) = 45 -- 'w', meaning insertions will occur before that character. If we insert 46 -- "***", we will then have: 47 -- 48 -- "Hello, ***world." 49 -- 123456789 123456 50 -- ^ ^ 51 -- Note that the second Mark has "moved" so that it still points at the 52 -- 'r'. 53 -- 54 -- The basic idea is to move through the buffer (see procedure 55 -- Move_Forward), inserting text at various places (see procedure Insert). 56 -- When we get to the end, we can call Reset to move back to the beginning. 57 58 type Buffer is limited private; 59 -- Initially empty. A buffer has a current insertion point, called 'point', 60 -- which is initially at position 1. 61 62 type Marker is new Positive; 63 -- A Marker is a pointer to a particular character in the buffer. Note that 64 -- a Marker is valid only for a particular Buffer; it cannot be used to 65 -- refer to a different Buffer, nor can it be used after Clear. 66 -- 67 -- Type Marker should really be private, but Ada has an annoying 68 -- restriction that would disallow package Marker_Vectors below, so we 69 -- expose the fact that it's an integer. You shouldn't be doing arithmetic 70 -- and the like on it outside this package. "=" comparisons are OK. 71 72 type Marker_Index is new Positive; 73 74 type Marker_Array is array (Marker_Index range <>) of Marker; 75 package Marker_Vectors is new ASIS_UL.Vectors 76 (Marker_Index, 77 Marker, 78 Marker_Array); 79 subtype Marker_Vector is Marker_Vectors.Vector; 80 81 type Marker_Vector_Ptr is access all Marker_Vector; 82 use Marker_Vectors; 83 -- use all type Marker_Vector; 84 85 function At_End (Buf : Buffer) return Boolean; 86 pragma Inline (At_End); 87 -- True if 'point' is past the last character 88 89 function At_Beginning (Buf : Buffer) return Boolean; 90 pragma Inline (At_Beginning); 91 -- True if 'point' points to the first character (i.e. position 1) 92 93 function Cur (Buf : Buffer) return W_Char with 94 Post => Cur'Result /= W_HT; -- ???For now 95 pragma Inline (Cur); 96 -- Return the character at 'point'. NUL if At_End. 97 98 function Cur_Column (Buf : Buffer) return Positive; 99 pragma Inline (Cur_Column); 100 -- Column number in which the Cur character appears 101 102 function Lookahead (Buf : Buffer; Offset : Positive := 1) return W_Char; 103 -- Return the character at 'point' + Offset. NUL if out of range. 104 105 function Lookback (Buf : Buffer; Offset : Positive := 1) return W_Char; 106 -- Return the character at 'point' - Offset. NUL if out of range. 107 108 function To_String (Buf : Buffer) return String with 109 Post => To_String'Result'First = 1; 110 function To_W_Str (Buf : Buffer) return W_Str with 111 Post => To_W_Str'Result'First = 1; 112 -- Returns the current logical string of the buffer 113 114 function To_Vector (Buf : Buffer) return Char_Vector with 115 Pre => At_Beginning (Buf); 116 -- 'point' must be at the beginning of the buffer (e.g. after Reset). 117 -- Returns the content of the buffer. 118 119 function Elements 120 (Buf : Buffer) 121 return ASIS_UL.Char_Vectors.Char_Vectors.Big_Ptr with 122 Pre => At_Beginning (Buf); 123 -- 'point' must be at the beginning of the buffer (e.g. after Reset). 124 -- Returns the content of the buffer. 125 126 function Char_At (Buf : Buffer; Mark : Marker) return W_Char; 127 function Char_At (Buf : Buffer; Position : Positive) return W_Char; 128 pragma Inline (Char_At); 129 -- Return the character at the given Mark/Position 130 131 function Slice (Buf : Buffer; First, Last : Marker) return W_Str with 132 Post => Slice'Result'First = 1; 133 -- Return the string from First up to just before Last 134 135 function Slice 136 (Buf : Buffer; 137 First : Positive; 138 Last : Natural; 139 Lines : Boolean := False) 140 return W_Str with 141 Post => Slice'Result'First = 1; 142 -- Return the string from First up to and including Last. 143 -- If Lines is True, we expand the slice to include whole lines. 144 145 procedure Insert (Buf : in out Buffer; C : W_Char); 146 procedure Insert (Buf : in out Buffer; S : W_Str); 147 procedure Insert_Any (Buf : in out Buffer; C : W_Char); 148 procedure Insert_Any (Buf : in out Buffer; S : W_Str); 149 -- Insert C/S at 'point', leaving 'point' after the insertion. It is an 150 -- error for NL to follow ' '. Insert disallows NLs; Insert_Any allows 151 -- them. 152 153 procedure Insert_NL (Buf : in out Buffer); 154 -- Same as Insert_Any (Buf, NL) 155 156 procedure Append (Buf : in out Buffer; C : W_Char); 157 procedure Append (Buf : in out Buffer; S : W_Str); 158 procedure Append_Any (Buf : in out Buffer; C : W_Char); 159 procedure Append_Any (Buf : in out Buffer; S : W_Str); 160 procedure Append_NL (Buf : in out Buffer); 161 -- Above are the same as Insert*, except that they may be slightly more 162 -- efficient, but they only work when inserting at the end of the buffer. 163 -- ???Actually, we should probably get rid of these; they probably don't 164 -- do any good. 165 166 procedure Insert_Keeping_Mark 167 (Buf : in out Buffer; 168 Mark : Marker; 169 C : W_Char) with 170 Pre => At_Point (Buf, Mark); 171 -- Mark must be at 'point'. This does the same as Insert (Buf, C), 172 -- except that Mark is not adjusted to point to the character after 173 -- 'point'; it ends up pointing at the newly-inserted C. 174 175 procedure Replace_Cur (Buf : in out Buffer; C : W_Char); 176 -- Replace character at 'point' with C 177 178 procedure Replace_Previous (Buf : in out Buffer; C : W_Char); 179 -- Replace character just before 'point' with C 180 181 function String_To_Buffer (S : W_Str) return Buffer; 182 -- Return a buffer containing S, with 'point' set to the beginning 183 184 procedure Move_Forward (Buf : in out Buffer); 185 function Move_Forward (Buf : in out Buffer) return W_Char; 186 -- Move 'point' forward one character position. 'point' must not be at the 187 -- end. The function version returns the new current character. 188 189 procedure Delete_Char (Buf : in out Buffer) with 190 Pre => not At_End (Buf) and then False; 191 -- Delete the character at 'point', leaving 'point' at the following one. 192 -- ???This causes "duplicate marker" errors; currently not used. 193 194 procedure Clear (Buf : in out Buffer); 195 -- Set the buffer to its initial empty state. All existing Markers become 196 -- invalid. 197 198 function Is_Empty (Buf : Buffer) return Boolean; 199 200 procedure Reset (Buf : in out Buffer) with 201 Pre => At_End (Buf), 202 Post => At_Beginning (Buf); 203 -- 'point' must be at the end of the buffer. Move 'point' back to the 204 -- beginning. The buffer contents and markers are not changed. 205 206 function Mark (Buf : in out Buffer; Name : W_Char) return Marker; 207 -- Return a Marker that points to the current 'point'. Name is for 208 -- debugging; it is printed by debugging printouts, and may be used to keep 209 -- track of different kinds of Markers. Note that if you call Mark twice at 210 -- the same position, only the first Name will be used. 211 212 function Mark_Previous (Buf : in out Buffer; Name : W_Char) return Marker; 213 -- Similar to Mark, but the Marker points to the character just before the 214 -- current 'point'. 215 216 function At_Point (Buf : Buffer; Mark : Marker) return Boolean; 217 pragma Inline (At_Point); 218 -- True if Mark = the current 'point' 219 220 function Point (Buf : Buffer) return Positive; 221 -- Returns the position of 'point' in the logical string 222 223 function Position (Buf : Buffer; Mark : Marker) return Positive; 224 -- Returns the position of the mark in the logical string 225 226 function Last_Position (Buf : Buffer) return Natural; 227 -- Returns the last position in the buffer 228 229 function Mark_LT (Buf : Buffer; M, N : Marker) return Boolean; 230 -- less than 231 232 function Mark_LE (Buf : Buffer; M, N : Marker) return Boolean; 233 -- less than or equal 234 235 procedure Insert_Ada_Source 236 (Buf : in out Buffer; 237 Input : String; 238 Expand_Tabs : Boolean := False); 239 procedure Read_Ada_File 240 (Buf : in out Buffer; 241 File_Name : String; 242 BOM_Seen : out Boolean; 243 Expand_Tabs : Boolean := False); 244 -- Read an Ada source file into Buf. BOM is set to UTF8_All if that BOM was 245 -- found, Unknown otherwise. 246 247 procedure Move (Target, Source : in out Buffer); 248 249 function To_Debug_String (Buf : Buffer) return String with 250 Post => To_Debug_String'Result'First = 1; 251 -- For debugging. Returns the current logical string of the buffer, with 252 -- the Name of each Marker interspersed. 253 254 procedure Dump_Buf (Buf : Buffer); -- less verbose 255 procedure Dump_Buffer (Buf : Buffer); -- more verbose 256 procedure Dump_Marker (Buf : Buffer; Mark : Marker); 257 -- For debugging 258 259 function Name (Buf : Buffer; Mark : Marker) return W_Char; 260 261 procedure Validate (Buf : Buffer; Message : String); 262 263private 264 265 -- The concept of markers that automatically track buffer changes comes 266 -- from Emacs. The implementation here is not based on Emacs. 267 268 -- Markers are logically updated on every insertion. The "obvious" 269 -- implementation, which we don't use because it's grossly inefficient, 270 -- is as follows: Store the characters in a Vector, and store the markers 271 -- in another vector, each marker having the index of the corresponding 272 -- character. When inserting a character, we would have to shove all the 273 -- characters to the right of 'point' one position to the right. We would 274 -- also have to shove all the markers to the right of 'point' one position 275 -- to the right, and increment their index. 276 -- 277 -- So we don't do that. 278 -- 279 -- The actual implementation is set up so we don't have to loop through 280 -- all the characters and Markers to the right of the 'point' every time we 281 -- do an insertion. Instead, we update each Marker just once on each pass 282 -- through the buffer. These updates are done when moving forward, not 283 -- when inserting text (except in one minor case: insertion just before 284 -- a marker). 285 286 type Marker_Rec is record 287 Position : Positive; 288 -- Position in either From or To 289 290 Flag : Boolean; 291 -- Determines whether Position points into From or To. In particular, 292 -- if Flag = Buf.To_Flag, Position points into To, and if Flag = not 293 -- Buf.To_Flag, Position points into From. This trick allows us to 294 -- switch all the Markers from To to From by flipping the To_Flag 295 -- (see Reset). 296 297 Name : W_Char; 298 end record; 299 300 type Marker_Rec_Array is array (Marker range <>) of Marker_Rec; 301 package Marker_Rec_Vectors is new ASIS_UL.Vectors 302 (Marker, 303 Marker_Rec, 304 Marker_Rec_Array); 305 subtype Marker_Rec_Vector is Marker_Rec_Vectors.Vector; 306 use Marker_Rec_Vectors; 307 -- use all type Marker_Rec_Vector; 308 309 type Buffer is new Ada.Finalization.Limited_Controlled with record 310 To, From : Char_Vector; 311 -- The current characters of the buffer are: 312 -- 313 -- To & From(From_First..From'Last) 314 -- 315 -- This is what To_String returns, and what we call the "logical 316 -- string". As we move the 'point' forward, we copy characters from From 317 -- to To, and adjust From_First. Inserted characters are simply appended 318 -- to To. 319 320 From_First : Positive; 321 -- First in-use character in From. Characters before that have already 322 -- been copied to To. 323 324 Markers : Marker_Rec_Vector; 325 -- Positions of all the Markers. A Marker is represented as an index 326 -- into this array, offset by Unique_Id. The order in which Markers 327 -- are stored in this array is not significant. 328 329 To_Markers, From_Markers : Marker_Vector; 330 -- To_Markers point into To, From_Markers into From. To_Markers includes 331 -- all markers up to and including 'point', so the last To_Marker can be 332 -- At_Point, in which case its Position is one past the end of To. That 333 -- is, a marker pointing to the first character of From is the last 334 -- element of To_Markers, rather than the first element of From_Markers 335 -- as you might expect. This is necessary because we can only append to 336 -- To_Markers, not prepend to From_Markers. Both arrays are stored in 337 -- increasing order of Position. 338 339 From_Markers_First : Marker_Index; 340 -- First in-use Marker in From_Markers. Markers before that have already 341 -- been copied to To. Thus, all valid Markers are: 342 -- To_Markers & From_Markers(From_Markers_First..From_Markers'Last). 343 344 To_Flag : Boolean := False; -- Initial value doesn't matter 345 346 Cur_Char : W_Char; 347 -- This is the result of the Cur function. It is equal to the first 348 -- character in the valid portion of From: 349 -- Buf.From (Buf.From_First) 350 -- unless we're at the end, in which case it is NUL. 351 352 Cur_Column : Positive; 353 end record; 354 355 procedure Initialize (Buf : in out Buffer); 356 357end Ada_Trees.Buffers; 358