1 {
2   This module collects the various tables used by the Lex program:
3   - the symbol table
4   - the position table
5   - the DFA states and transition tables
6   Note: All tables are allocated dynamically (at initialization time)
7   because of the 64KB static data limit.
8 
9 
10   Copyright (c) 1990-92  Albert Graef <ag@muwiinfa.geschichte.uni-mainz.de>
11   Copyright (C) 1996     Berend de Boer <berend@pobox.com>
12 
13   This program is free software; you can redistribute it and/or modify
14   it under the terms of the GNU General Public License as published by
15   the Free Software Foundation; either version 2 of the License, or
16   (at your option) any later version.
17 
18   This program is distributed in the hope that it will be useful,
19   but WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21   GNU General Public License for more details.
22 
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 
27 
28 $Revision: 1.3 $
29 $Modtime: 96-08-01 10:23 $
30 
31 $History: LEXTABLE.PAS $
32  *
33  * *****************  Version 2  *****************
34  * User: Berend       Date: 96-10-10   Time: 21:16
35  * Updated in $/Lex and Yacc/tply
36  * Updated for protected mode, windows and Delphi 1.X and 2.X.
37 
38 }
39 
40 
41 unit LexTable;
42 
43 interface
44 
45 uses LexBase;
46 
47 const
48 
49 (* Maximum table sizes: *)
50 
51 max_keys           =  997;  (* size of hash symbol table (prime number!)   *)
52 {$IFDEF MsDos}
53 max_pos            =  600;  (* maximum number of positions                 *)
54 max_states         =  300;  (* number of DFA states                        *)
55 max_trans          =  600;  (* number of transitions                       *)
56 max_start_states   =   50;  (* maximum number of user-defined start states *)
57 {$ELSE}
58 max_pos            = 1200;  (* maximum number of positions                 *)
59 max_states         =  600;  (* number of DFA states                        *)
60 max_trans          = 1200;  (* number of transitions                       *)
61 max_start_states   =  100;  (* maximum number of user-defined start states *)
62 {$ENDIF}
63 
64 var
65 
66 (* Actual table sizes: *)
67 
68 n_pos            : Integer;
69 n_states         : Integer;
70 n_trans          : Integer;
71 n_start_states   : Integer;
72 
73 type
74 
75 (* Table data structures: *)
76 
77 SymTable = array [1..max_keys] of record
78              pname  : StrPtr;
79                (* print name; empty entries are denoted by pname=nil *)
80              case sym_type : ( none_sym, macro_sym, start_state_sym ) of
81              macro_sym : ( subst : StrPtr );
82                (* macro substitution *)
83              start_state_sym : ( start_state : Integer );
84                (* start state *)
85            end;
86 
87 PosTableEntry = record
88                   follow_pos    : IntSetPtr;
89                     (* set of follow positions *)
90                   case pos_type : ( char_pos, cclass_pos, mark_pos ) of
91                   char_pos      : ( c   : Char );
92                     (* character position *)
93                   cclass_pos    : ( cc  : CClassPtr );
94                     (* character class position *)
95                   mark_pos      : ( rule, pos : Integer );
96                     (* mark position *)
97                 end;
98 
99 PosTable = array [1..max_pos] of PosTableEntry;
100 
101 FirstPosTable  = array [0..2*max_start_states+1] of IntSetPtr;
102                    (* first positions for start states (even states
103                       are entered anywhere on the line, odd states only
104                       at the beginning of the line; states 0 and 1 denote
105                       default, states 2..2*n_start_states+1 user-defined
106                       start states) *)
107 
108 StartStateExclusive = array[0..max_start_states] of Boolean;
109 
110 StateTableEntry = record
111                     state_pos : IntSetPtr;
112                       (* positions covered by state *)
113                     final     : Boolean;
114                       (* final state? *)
115                     trans_lo,
116                     trans_hi  : Integer;
117                       (* transitions *)
118                   end;
119 
120 StateTable = array [0..max_states-1] of StateTableEntry;
121 
122 TransTableEntry = record
123                     cc              : CClassPtr;
124                       (* characters of transition *)
125                     follow_pos      : IntSetPtr;
126                       (* follow positions (positions of next state) *)
127                     next_state      : Integer;
128                       (* next state *)
129                   end;
130 
131 TransTable = array [1..max_trans] of TransTableEntry;
132 
133 
134 var
135 
136 verbose           : Boolean;          (* status of the verbose option *)
137 optimize          : Boolean;          (* status of the optimization option *)
138 
139 sym_table         : ^SymTable;        (* symbol table *)
140 pos_table         : ^PosTable;        (* position table *)
141 first_pos_table   : ^FirstPosTable;   (* first positions table *)
142 start_excl        : ^StartStateExclusive; (* user-defined start state type *)
143 state_table       : ^StateTable;      (* DFA state table *)
144 trans_table       : ^TransTable;      (* DFA transition table *)
145 
146 
147 (* Operations: *)
148 
149 (* Hash symbol table:
150    The following routines are supplied to be used with the generic hash table
151    routines in LexBase. *)
152 
lookupnull153 function lookup(k : Integer) : String;
154   (* print name of symbol no. k *)
155 procedure entry(k : Integer; symbol : String);
156   (* enter symbol into table *)
157 
158 (* Routines to build the position table: *)
159 
160 procedure addCharPos(c : Char);
161 procedure addCClassPos(cc : CClassPtr);
162 procedure addMarkPos(rule, pos : Integer);
163   (* Positions are allocated in the order of calls to addCharPos, addCClassPos
164      and addMarkPos, starting at position 1. These routines also initialize
165      the corresponding follow sets. *)
166 
167 (* Routines to build the state table: *)
168 
169 var act_state : Integer; (* state currently considered *)
170 
newStatenull171 function newState(POS : IntSetPtr) : Integer;
172   (* Add a new state with the given position set; initialize the state's
173      position set to POS (the offsets into the transition table are
174      initialized when the state becomes active, see startStateTrans, below).
175      Returns: the new state number *)
176 
addStatenull177 function addState(POS : IntSetPtr) : Integer;
178   (* add a new state, but only if there is not already a state with the
179      same position set *)
180 
181 procedure startStateTrans;
182 procedure endStateTrans;
183   (* initializes act_state's first and last offsets into the transition
184      table *)
185 
n_state_transnull186 function n_state_trans(i : Integer) : Integer;
187   (* return number of transitions in state i *)
188 
189 procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
190   (* adds a transition to the table *)
191 
192 procedure mergeTrans;
193   (* sorts transitions w.r.t. next states and merges transitions for the
194      same next state in the active state *)
195 
196 procedure sortTrans;
197   (* sort transitions in act_state lexicographically *)
198 
199 implementation
200 
201 uses LexMsgs;
202 
203 (* Hash table routines: *)
204 
lookupnull205 function lookup(k : Integer) : String;
206   begin
207     with sym_table^[k] do
208       if pname=nil then
209         lookup := ''
210       else
211         lookup := copy(pname^, 1, length(pname^))
212   end(*lookup*);
213 procedure entry(k : Integer; symbol : String);
214   begin
215     with sym_table^[k] do
216       begin
217         pname    := newStr(symbol);
218         sym_type := none_sym;
219       end
220   end(*entry*);
221 
222 (* Routines to build the position table: *)
223 
224 procedure addCharPos(c : Char);
225   begin
226     inc(n_pos);
227     if n_pos>max_pos then fatal(pos_table_overflow);
228     pos_table^[n_pos].follow_pos     := newIntSet;
229     pos_table^[n_pos].pos_type       := char_pos;
230     pos_table^[n_pos].c              := c;
231   end(*addCharPos*);
232 
233 procedure addCClassPos(cc : CClassPtr);
234   begin
235     inc(n_pos);
236     if n_pos>max_pos then fatal(pos_table_overflow);
237     pos_table^[n_pos].follow_pos     := newIntSet;
238     pos_table^[n_pos].pos_type       := cclass_pos;
239     pos_table^[n_pos].cc             := cc;
240   end(*addCClassPos*);
241 
242 procedure addMarkPos(rule, pos : Integer);
243   begin
244     inc(n_pos);
245     if n_pos>max_pos then fatal(pos_table_overflow);
246     pos_table^[n_pos].follow_pos     := newIntSet;
247     pos_table^[n_pos].pos_type       := mark_pos;
248     pos_table^[n_pos].rule           := rule;
249     pos_table^[n_pos].pos            := pos;
250   end(*addMarkPos*);
251 
252 (* Routines to build the state table: *)
253 
newStatenull254 function newState(POS : IntSetPtr) : Integer;
255   begin
256     if n_states>=max_states then fatal(state_table_overflow);
257     newState := n_states;
258     with state_table^[n_states] do
259       begin
260         state_pos := POS;
261         final     := false;
262       end;
263     inc(n_states);
264   end(*newState*);
265 
addStatenull266 function addState(POS : IntSetPtr) : Integer;
267   var i : Integer;
268   begin
269     for i := 0 to pred(n_states) do
270       if equal(POS^, state_table^[i].state_pos^) then
271         begin
272           addState := i;
273           exit;
274         end;
275     addState := newState(POS);
276   end(*addState*);
277 
278 procedure startStateTrans;
279   begin
280     state_table^[act_state].trans_lo := succ(n_trans);
281   end(*startStateTrans*);
282 
283 procedure endStateTrans;
284   begin
285     state_table^[act_state].trans_hi := n_trans;
286   end(*endStateTrans*);
287 
n_state_transnull288 function n_state_trans(i : Integer) : Integer;
289   begin
290     with state_table^[i] do
291       n_state_trans := trans_hi-trans_lo+1
292   end(*n_state_trans*);
293 
294 (* Construction of the transition table:
295    This implementation here uses a simple optimization which tries to avoid
296    the construction of different transitions for each individual character
297    in large character classes by MERGING transitions whenever possible. The
298    transitions, at any time, will be partitioned into transitions on disjoint
299    character classes. When adding a new transition on character class cc, we
300    repartition the transitions as follows:
301    1. If the current character class cc equals an existing one, we can
302       simply add the new follow set to the existing one.
303    2. Otherwise, for some existing transition on some character class
304       cc1 with cc*cc1<>[], we replace the existing transition by a new
305       transition on cc*cc1 with follow set = cc1's follow set + cc's follow
306       set, and, if necessary (i.e. if cc1-cc is nonempty), a transition on
307       cc1-cc with follow set = cc1's follow set. We then remove the elements
308       of cc1 from cc, and proceed again with step 1.
309    We may stop this process as soon as cc becomes empty (then all characters
310    in cc have been distributed among the existing partitions). If cc does
311    NOT become empty, we have to construct a new transition for the remaining
312    character class (which then will be disjoint from all other character
313    classes in the transition table). *)
314 
315 procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
316   var
317     i : Integer;
318     cc0, cc1, cc2 : CClass;
319   begin
320     for i := state_table^[act_state].trans_lo to n_trans do
321       if trans_table^[i].cc^=cc then
322         begin
323           setunion(trans_table^[i].follow_pos^, FOLLOW^);
324           exit
325         end
326       else
327         begin
328           cc0 := cc*trans_table^[i].cc^;
329           if cc0<>[] then
330             begin
331               cc1 := trans_table^[i].cc^-cc;
332               cc2 := cc-trans_table^[i].cc^;
333               if cc1<>[] then
334                 begin
335                   trans_table^[i].cc^ := cc1;
336                   inc(n_trans);
337                   if n_trans>max_trans then fatal(trans_table_overflow);
338                   trans_table^[n_trans].cc := newCClass(cc0);
339                   trans_table^[n_trans].follow_pos := newIntSet;
340                   trans_table^[n_trans].follow_pos^ :=
341                     trans_table^[i].follow_pos^;
342                   setunion(trans_table^[n_trans].follow_pos^, FOLLOW^);
343                 end
344               else
345                 begin
346                   trans_table^[i].cc^ := cc0;
347                   setunion(trans_table^[i].follow_pos^, FOLLOW^);
348                 end;
349               cc := cc2;
350               if cc=[] then exit;
351             end
352         end;
353     inc(n_trans);
354     if n_trans>max_trans then fatal(trans_table_overflow);
355     trans_table^[n_trans].cc          := newCClass(cc);
356     trans_table^[n_trans].follow_pos  := newIntSet;
357     trans_table^[n_trans].follow_pos^ := FOLLOW^;
358   end(*addCharTrans*);
359 
360 (* comparison and swap procedures for sorting transitions: *)
361 {$ifndef fpc}{$F+}{$endif}
transLessNextStatenull362 function transLessNextState(i, j : Integer) : Boolean;
363 {$ifndef fpc}{$F-}{$endif}
364   (* compare transitions based on next states (used in mergeCharTrans) *)
365   begin
366     transLessNextState := trans_table^[i].next_state<
367                           trans_table^[j].next_state
368   end(*transLessNextState*);
369 {$ifndef fpc}{$F+}{$endif}
transLessnull370 function transLess(i, j : Integer) : Boolean;
371 {$ifndef fpc}{$F-}{$endif}
372   (* lexical order on transitions *)
373   var c : Char; xi, xj : Boolean;
374   begin
375     for c := #0 to #255 do
376       begin
377         xi := c in trans_table^[i].cc^;
378         xj := c in trans_table^[j].cc^;
379         if xi<>xj then
380           begin
381             transLess := ord(xi)>ord(xj);
382             exit
383           end;
384       end;
385     transLess := false
386   end(*transLess*);
387 {$ifndef fpc}{$F+}{$endif}
388 procedure transSwap(i, j : Integer);
389 {$ifndef fpc}{$F-}{$endif}
390   (* swap transitions i and j *)
391   var x : TransTableEntry;
392   begin
393     x := trans_table^[i];
394     trans_table^[i] := trans_table^[j];
395     trans_table^[j] := x;
396   end(*transSwap*);
397 
398 procedure mergeTrans;
399   var
400     i, j, n_deleted : Integer;
401   begin
402     (* sort transitions w.r.t. next states: *)
403     quicksort(state_table^[act_state].trans_lo,
404               n_trans,
405               {$ifdef fpc}@{$endif}transLessNextState,
406               {$ifdef fpc}@{$endif}transSwap);
407     (* merge transitions for the same next state: *)
408     n_deleted := 0;
409     for i := state_table^[act_state].trans_lo to n_trans do
410     if trans_table^[i].cc<>nil then
411       begin
412         j := succ(i);
413         while (j<=n_trans) and
414               (trans_table^[i].next_state =
415                trans_table^[j].next_state) do
416           begin
417             (* merge cclasses of transitions i and j, then mark
418                transition j as deleted *)
419             trans_table^[i].cc^ := trans_table^[i].cc^+
420                                    trans_table^[j].cc^;
421             trans_table^[j].cc  := nil;
422             inc(n_deleted);
423             inc(j);
424           end;
425       end;
426     (* remove deleted transitions: *)
427     j := state_table^[act_state].trans_lo;
428     for i := state_table^[act_state].trans_lo to n_trans do
429       if trans_table^[i].cc<>nil then
430         if i<>j then
431           begin
432             trans_table^[j] := trans_table^[i];
433             inc(j);
434           end
435         else
436           inc(j);
437     (* update transition count: *)
438     dec(n_trans, n_deleted);
439   end(*mergeTrans*);
440 
441 procedure sortTrans;
442   begin
443     quicksort(state_table^[act_state].trans_lo,
444               n_trans,
445               {$ifdef fpc}@{$endif}transLess,
446               {$ifdef fpc}@{$endif}transSwap);
447   end(*sortTrans*);
448 
449 var i : Integer;
450 
451 begin
452 
453   verbose          := false;
454   optimize         := false;
455 
456   n_pos            := 0;
457   n_states         := 0;
458   n_trans          := 0;
459   n_start_states   := 0;
460 
461   (* allocate tables: *)
462 
463   new(sym_table);
464   new(pos_table);
465   new(first_pos_table);
466   new(start_excl);
467   new(state_table);
468   new(trans_table);
469 
470   (* initialize symbol table: *)
471 
472   for i := 1 to max_keys do sym_table^[i].pname := nil;
473 
474 end(*LexTables*).
475