1------------------------------------------------------------------------------
2--                                                                          --
3--                            GNAT2XML COMPONENTS                           --
4--                                                                          --
5--                      G N A T 2 X M L . B U F F E R S                     --
6--                                                                          --
7--                                 S p e c                                  --
8--                                                                          --
9--                      Copyright (C) 2013-2016, AdaCore                    --
10--                                                                          --
11-- Gnat2xml is free software; you can redistribute it and/or modify it      --
12-- under terms of the  GNU General Public License  as published by the Free --
13-- Software Foundation;  either version 2,  or  (at your option)  any later --
14-- version. Gnat2xml is distributed  in the hope  that it will be useful,   --
15-- but WITHOUT ANY WARRANTY; without even the implied warranty of MER-      --
16-- CHANTABILITY or  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General  --
17-- Public License for more details. You should have received a copy of the  --
18-- GNU General Public License distributed with GNAT; see file COPYING. If   --
19-- not, write to the Free Software Foundation, 59 Temple Place Suite 330,   --
20-- Boston, MA 02111-1307, USA.                                              --
21-- The gnat2xml tool was derived from the Avatox sources.                   --
22------------------------------------------------------------------------------
23
24pragma Ada_2012;
25
26with ASIS_UL.Vectors;
27with ASIS_UL.Char_Vectors; use ASIS_UL.Char_Vectors;
28use ASIS_UL.Char_Vectors.Char_Vectors;
29--  use all type ASIS_UL.Char_Vectors.Char_Vector;
30
31private with Ada.Finalization;
32
33package Ada_Trees.Buffers is
34
35   --  This package supports a character Buffer type, and a Marker type that
36   --  can be used to point at particular characters. The main feature of
37   --  this abstraction is that Markers are automatically kept up to date
38   --  as modifications are made. For example, suppose the buffer Buf contains:
39   --
40   --     "Hello, world."
41   --      123456789 123
42   --           ^   ^
43   --  and we have two Markers, one pointing at Buf(6) = ',' and the other
44   --  pointing at Buf(10) = 'r'. Suppose the insertion point is at Buf(8) =
45   --  'w', meaning insertions will occur before that character. If we insert
46   --  "***", we will then have:
47   --
48   --     "Hello, ***world."
49   --      123456789 123456
50   --           ^      ^
51   --  Note that the second Mark has "moved" so that it still points at the
52   --  'r'.
53   --
54   --  The basic idea is to move through the buffer (see procedure
55   --  Move_Forward), inserting text at various places (see procedure Insert).
56   --  When we get to the end, we can call Reset to move back to the beginning.
57
58   type Buffer is limited private;
59   --  Initially empty. A buffer has a current insertion point, called 'point',
60   --  which is initially at position 1.
61
62   type Marker is new Positive;
63   --  A Marker is a pointer to a particular character in the buffer. Note that
64   --  a Marker is valid only for a particular Buffer; it cannot be used to
65   --  refer to a different Buffer, nor can it be used after Clear.
66   --
67   --  Type Marker should really be private, but Ada has an annoying
68   --  restriction that would disallow package Marker_Vectors below, so we
69   --  expose the fact that it's an integer. You shouldn't be doing arithmetic
70   --  and the like on it outside this package. "=" comparisons are OK.
71
72   type Marker_Index is new Positive;
73
74   type Marker_Array is array (Marker_Index range <>) of Marker;
75   package Marker_Vectors is new ASIS_UL.Vectors
76     (Marker_Index,
77      Marker,
78      Marker_Array);
79   subtype Marker_Vector is Marker_Vectors.Vector;
80
81   type Marker_Vector_Ptr is access all Marker_Vector;
82   use Marker_Vectors;
83   --  use all type Marker_Vector;
84
85   function At_End (Buf : Buffer) return Boolean;
86   pragma Inline (At_End);
87   --  True if 'point' is past the last character
88
89   function At_Beginning (Buf : Buffer) return Boolean;
90   pragma Inline (At_Beginning);
91   --  True if 'point' points to the first character (i.e. position 1)
92
93   function Cur (Buf : Buffer) return W_Char with
94      Post => Cur'Result /= W_HT; -- ???For now
95   pragma Inline (Cur);
96   --  Return the character at 'point'. NUL if At_End.
97
98   function Cur_Column (Buf : Buffer) return Positive;
99   pragma Inline (Cur_Column);
100   --  Column number in which the Cur character appears
101
102   function Lookahead (Buf : Buffer; Offset : Positive := 1) return W_Char;
103   --  Return the character at 'point' + Offset. NUL if out of range.
104
105   function Lookback (Buf : Buffer; Offset : Positive := 1) return W_Char;
106   --  Return the character at 'point' - Offset. NUL if out of range.
107
108   function To_String (Buf : Buffer) return String with
109      Post => To_String'Result'First = 1;
110   function To_W_Str (Buf : Buffer) return W_Str with
111      Post => To_W_Str'Result'First = 1;
112      --  Returns the current logical string of the buffer
113
114   function To_Vector (Buf : Buffer) return Char_Vector with
115      Pre => At_Beginning (Buf);
116      --  'point' must be at the beginning of the buffer (e.g. after Reset).
117      --  Returns the content of the buffer.
118
119   function Elements
120     (Buf  : Buffer)
121      return ASIS_UL.Char_Vectors.Char_Vectors.Big_Ptr with
122      Pre => At_Beginning (Buf);
123      --  'point' must be at the beginning of the buffer (e.g. after Reset).
124      --  Returns the content of the buffer.
125
126   function Char_At (Buf : Buffer; Mark : Marker) return W_Char;
127   function Char_At (Buf : Buffer; Position : Positive) return W_Char;
128   pragma Inline (Char_At);
129   --  Return the character at the given Mark/Position
130
131   function Slice (Buf : Buffer; First, Last : Marker) return W_Str with
132      Post => Slice'Result'First = 1;
133      --  Return the string from First up to just before Last
134
135   function Slice
136     (Buf   : Buffer;
137      First : Positive;
138      Last  : Natural;
139      Lines : Boolean := False)
140      return  W_Str with
141      Post => Slice'Result'First = 1;
142   --  Return the string from First up to and including Last.
143   --  If Lines is True, we expand the slice to include whole lines.
144
145   procedure Insert (Buf : in out Buffer; C : W_Char);
146   procedure Insert (Buf : in out Buffer; S : W_Str);
147   procedure Insert_Any (Buf : in out Buffer; C : W_Char);
148   procedure Insert_Any (Buf : in out Buffer; S : W_Str);
149   --  Insert C/S at 'point', leaving 'point' after the insertion. It is an
150   --  error for NL to follow ' '. Insert disallows NLs; Insert_Any allows
151   --  them.
152
153   procedure Insert_NL (Buf : in out Buffer);
154   --  Same as Insert_Any (Buf, NL)
155
156   procedure Append (Buf : in out Buffer; C : W_Char);
157   procedure Append (Buf : in out Buffer; S : W_Str);
158   procedure Append_Any (Buf : in out Buffer; C : W_Char);
159   procedure Append_Any (Buf : in out Buffer; S : W_Str);
160   procedure Append_NL (Buf : in out Buffer);
161   --  Above are the same as Insert*, except that they may be slightly more
162   --  efficient, but they only work when inserting at the end of the buffer.
163   --  ???Actually, we should probably get rid of these; they probably don't
164   --  do any good.
165
166   procedure Insert_Keeping_Mark
167     (Buf  : in out Buffer;
168      Mark : Marker;
169      C    : W_Char) with
170      Pre => At_Point (Buf, Mark);
171      --  Mark must be at 'point'. This does the same as Insert (Buf, C),
172      --  except that Mark is not adjusted to point to the character after
173      --  'point'; it ends up pointing at the newly-inserted C.
174
175   procedure Replace_Cur (Buf : in out Buffer; C : W_Char);
176   --  Replace character at 'point' with C
177
178   procedure Replace_Previous (Buf : in out Buffer; C : W_Char);
179   --  Replace character just before 'point' with C
180
181   function String_To_Buffer (S : W_Str) return Buffer;
182   --  Return a buffer containing S, with 'point' set to the beginning
183
184   procedure Move_Forward (Buf : in out Buffer);
185   function Move_Forward (Buf : in out Buffer) return W_Char;
186   --  Move 'point' forward one character position. 'point' must not be at the
187   --  end. The function version returns the new current character.
188
189   procedure Delete_Char (Buf : in out Buffer) with
190     Pre => not At_End (Buf) and then False;
191   --  Delete the character at 'point', leaving 'point' at the following one.
192   --  ???This causes "duplicate marker" errors; currently not used.
193
194   procedure Clear (Buf : in out Buffer);
195   --  Set the buffer to its initial empty state. All existing Markers become
196   --  invalid.
197
198   function Is_Empty (Buf : Buffer) return Boolean;
199
200   procedure Reset (Buf : in out Buffer) with
201      Pre  => At_End (Buf),
202      Post => At_Beginning (Buf);
203      --  'point' must be at the end of the buffer. Move 'point' back to the
204      --  beginning. The buffer contents and markers are not changed.
205
206   function Mark (Buf : in out Buffer; Name : W_Char) return Marker;
207   --  Return a Marker that points to the current 'point'. Name is for
208   --  debugging; it is printed by debugging printouts, and may be used to keep
209   --  track of different kinds of Markers. Note that if you call Mark twice at
210   --  the same position, only the first Name will be used.
211
212   function Mark_Previous (Buf : in out Buffer; Name : W_Char) return Marker;
213   --  Similar to Mark, but the Marker points to the character just before the
214   --  current 'point'.
215
216   function At_Point (Buf : Buffer; Mark : Marker) return Boolean;
217   pragma Inline (At_Point);
218   --  True if Mark = the current 'point'
219
220   function Point (Buf : Buffer) return Positive;
221   --  Returns the position of 'point' in the logical string
222
223   function Position (Buf : Buffer; Mark : Marker) return Positive;
224   --  Returns the position of the mark in the logical string
225
226   function Last_Position (Buf : Buffer) return Natural;
227   --  Returns the last position in the buffer
228
229   function Mark_LT (Buf : Buffer; M, N : Marker) return Boolean;
230   --  less than
231
232   function Mark_LE (Buf : Buffer; M, N : Marker) return Boolean;
233   --  less than or equal
234
235   procedure Insert_Ada_Source
236     (Buf         : in out Buffer;
237      Input       : String;
238      Expand_Tabs : Boolean := False);
239   procedure Read_Ada_File
240     (Buf         : in out Buffer;
241      File_Name   : String;
242      BOM_Seen    : out Boolean;
243      Expand_Tabs : Boolean := False);
244   --  Read an Ada source file into Buf. BOM is set to UTF8_All if that BOM was
245   --  found, Unknown otherwise.
246
247   procedure Move (Target, Source : in out Buffer);
248
249   function To_Debug_String (Buf : Buffer) return String with
250      Post => To_Debug_String'Result'First = 1;
251      --  For debugging. Returns the current logical string of the buffer, with
252      --  the Name of each Marker interspersed.
253
254   procedure Dump_Buf (Buf : Buffer); -- less verbose
255   procedure Dump_Buffer (Buf : Buffer); -- more verbose
256   procedure Dump_Marker (Buf : Buffer; Mark : Marker);
257   --  For debugging
258
259   function Name (Buf : Buffer; Mark : Marker) return W_Char;
260
261   procedure Validate (Buf : Buffer; Message : String);
262
263private
264
265   --  The concept of markers that automatically track buffer changes comes
266   --  from Emacs. The implementation here is not based on Emacs.
267
268   --  Markers are logically updated on every insertion. The "obvious"
269   --  implementation, which we don't use because it's grossly inefficient,
270   --  is as follows: Store the characters in a Vector, and store the markers
271   --  in another vector, each marker having the index of the corresponding
272   --  character. When inserting a character, we would have to shove all the
273   --  characters to the right of 'point' one position to the right. We would
274   --  also have to shove all the markers to the right of 'point' one position
275   --  to the right, and increment their index.
276   --
277   --  So we don't do that.
278   --
279   --  The actual implementation is set up so we don't have to loop through
280   --  all the characters and Markers to the right of the 'point' every time we
281   --  do an insertion. Instead, we update each Marker just once on each pass
282   --  through the buffer. These updates are done when moving forward, not
283   --  when inserting text (except in one minor case: insertion just before
284   --  a marker).
285
286   type Marker_Rec is record
287      Position : Positive;
288      --  Position in either From or To
289
290      Flag : Boolean;
291      --  Determines whether Position points into From or To. In particular,
292      --  if Flag = Buf.To_Flag, Position points into To, and if Flag = not
293      --  Buf.To_Flag, Position points into From. This trick allows us to
294      --  switch all the Markers from To to From by flipping the To_Flag
295      --  (see Reset).
296
297      Name : W_Char;
298   end record;
299
300   type Marker_Rec_Array is array (Marker range <>) of Marker_Rec;
301   package Marker_Rec_Vectors is new ASIS_UL.Vectors
302     (Marker,
303      Marker_Rec,
304      Marker_Rec_Array);
305   subtype Marker_Rec_Vector is Marker_Rec_Vectors.Vector;
306   use Marker_Rec_Vectors;
307   --  use all type Marker_Rec_Vector;
308
309   type Buffer is new Ada.Finalization.Limited_Controlled with record
310      To, From : Char_Vector;
311      --  The current characters of the buffer are:
312      --
313      --    To & From(From_First..From'Last)
314      --
315      --  This is what To_String returns, and what we call the "logical
316      --  string". As we move the 'point' forward, we copy characters from From
317      --  to To, and adjust From_First. Inserted characters are simply appended
318      --  to To.
319
320      From_First : Positive;
321      --  First in-use character in From. Characters before that have already
322      --  been copied to To.
323
324      Markers : Marker_Rec_Vector;
325      --  Positions of all the Markers. A Marker is represented as an index
326      --  into this array, offset by Unique_Id. The order in which Markers
327      --  are stored in this array is not significant.
328
329      To_Markers, From_Markers : Marker_Vector;
330      --  To_Markers point into To, From_Markers into From. To_Markers includes
331      --  all markers up to and including 'point', so the last To_Marker can be
332      --  At_Point, in which case its Position is one past the end of To. That
333      --  is, a marker pointing to the first character of From is the last
334      --  element of To_Markers, rather than the first element of From_Markers
335      --  as you might expect. This is necessary because we can only append to
336      --  To_Markers, not prepend to From_Markers. Both arrays are stored in
337      --  increasing order of Position.
338
339      From_Markers_First : Marker_Index;
340      --  First in-use Marker in From_Markers. Markers before that have already
341      --  been copied to To. Thus, all valid Markers are:
342      --     To_Markers & From_Markers(From_Markers_First..From_Markers'Last).
343
344      To_Flag : Boolean := False; -- Initial value doesn't matter
345
346      Cur_Char : W_Char;
347      --  This is the result of the Cur function. It is equal to the first
348      --  character in the valid portion of From:
349      --     Buf.From (Buf.From_First)
350      --  unless we're at the end, in which case it is NUL.
351
352      Cur_Column : Positive;
353   end record;
354
355   procedure Initialize (Buf : in out Buffer);
356
357end Ada_Trees.Buffers;
358