1 {
2 This module collects the various tables used by the Lex program:
3 - the symbol table
4 - the position table
5 - the DFA states and transition tables
6 Note: All tables are allocated dynamically (at initialization time)
7 because of the 64KB static data limit.
8
9
10 Copyright (c) 1990-92 Albert Graef <ag@muwiinfa.geschichte.uni-mainz.de>
11 Copyright (C) 1996 Berend de Boer <berend@pobox.com>
12
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26
27
28 $Revision: 1.3 $
29 $Modtime: 96-08-01 10:23 $
30
31 $History: LEXTABLE.PAS $
32 *
33 * ***************** Version 2 *****************
34 * User: Berend Date: 96-10-10 Time: 21:16
35 * Updated in $/Lex and Yacc/tply
36 * Updated for protected mode, windows and Delphi 1.X and 2.X.
37
38 }
39
40
41 unit LexTable;
42
43 interface
44
45 uses LexBase;
46
47 const
48
49 (* Maximum table sizes: *)
50
51 max_keys = 997; (* size of hash symbol table (prime number!) *)
52 {$IFDEF MsDos}
53 max_pos = 600; (* maximum number of positions *)
54 max_states = 300; (* number of DFA states *)
55 max_trans = 600; (* number of transitions *)
56 max_start_states = 50; (* maximum number of user-defined start states *)
57 {$ELSE}
58 max_pos = 1200; (* maximum number of positions *)
59 max_states = 600; (* number of DFA states *)
60 max_trans = 1200; (* number of transitions *)
61 max_start_states = 100; (* maximum number of user-defined start states *)
62 {$ENDIF}
63
64 var
65
66 (* Actual table sizes: *)
67
68 n_pos : Integer;
69 n_states : Integer;
70 n_trans : Integer;
71 n_start_states : Integer;
72
73 type
74
75 (* Table data structures: *)
76
77 SymTable = array [1..max_keys] of record
78 pname : StrPtr;
79 (* print name; empty entries are denoted by pname=nil *)
80 case sym_type : ( none_sym, macro_sym, start_state_sym ) of
81 macro_sym : ( subst : StrPtr );
82 (* macro substitution *)
83 start_state_sym : ( start_state : Integer );
84 (* start state *)
85 end;
86
87 PosTableEntry = record
88 follow_pos : IntSetPtr;
89 (* set of follow positions *)
90 case pos_type : ( char_pos, cclass_pos, mark_pos ) of
91 char_pos : ( c : Char );
92 (* character position *)
93 cclass_pos : ( cc : CClassPtr );
94 (* character class position *)
95 mark_pos : ( rule, pos : Integer );
96 (* mark position *)
97 end;
98
99 PosTable = array [1..max_pos] of PosTableEntry;
100
101 FirstPosTable = array [0..2*max_start_states+1] of IntSetPtr;
102 (* first positions for start states (even states
103 are entered anywhere on the line, odd states only
104 at the beginning of the line; states 0 and 1 denote
105 default, states 2..2*n_start_states+1 user-defined
106 start states) *)
107
108 StartStateExclusive = array[0..max_start_states] of Boolean;
109
110 StateTableEntry = record
111 state_pos : IntSetPtr;
112 (* positions covered by state *)
113 final : Boolean;
114 (* final state? *)
115 trans_lo,
116 trans_hi : Integer;
117 (* transitions *)
118 end;
119
120 StateTable = array [0..max_states-1] of StateTableEntry;
121
122 TransTableEntry = record
123 cc : CClassPtr;
124 (* characters of transition *)
125 follow_pos : IntSetPtr;
126 (* follow positions (positions of next state) *)
127 next_state : Integer;
128 (* next state *)
129 end;
130
131 TransTable = array [1..max_trans] of TransTableEntry;
132
133
134 var
135
136 verbose : Boolean; (* status of the verbose option *)
137 optimize : Boolean; (* status of the optimization option *)
138
139 sym_table : ^SymTable; (* symbol table *)
140 pos_table : ^PosTable; (* position table *)
141 first_pos_table : ^FirstPosTable; (* first positions table *)
142 start_excl : ^StartStateExclusive; (* user-defined start state type *)
143 state_table : ^StateTable; (* DFA state table *)
144 trans_table : ^TransTable; (* DFA transition table *)
145
146
147 (* Operations: *)
148
149 (* Hash symbol table:
150 The following routines are supplied to be used with the generic hash table
151 routines in LexBase. *)
152
lookupnull153 function lookup(k : Integer) : String;
154 (* print name of symbol no. k *)
155 procedure entry(k : Integer; symbol : String);
156 (* enter symbol into table *)
157
158 (* Routines to build the position table: *)
159
160 procedure addCharPos(c : Char);
161 procedure addCClassPos(cc : CClassPtr);
162 procedure addMarkPos(rule, pos : Integer);
163 (* Positions are allocated in the order of calls to addCharPos, addCClassPos
164 and addMarkPos, starting at position 1. These routines also initialize
165 the corresponding follow sets. *)
166
167 (* Routines to build the state table: *)
168
169 var act_state : Integer; (* state currently considered *)
170
newStatenull171 function newState(POS : IntSetPtr) : Integer;
172 (* Add a new state with the given position set; initialize the state's
173 position set to POS (the offsets into the transition table are
174 initialized when the state becomes active, see startStateTrans, below).
175 Returns: the new state number *)
176
addStatenull177 function addState(POS : IntSetPtr) : Integer;
178 (* add a new state, but only if there is not already a state with the
179 same position set *)
180
181 procedure startStateTrans;
182 procedure endStateTrans;
183 (* initializes act_state's first and last offsets into the transition
184 table *)
185
n_state_transnull186 function n_state_trans(i : Integer) : Integer;
187 (* return number of transitions in state i *)
188
189 procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
190 (* adds a transition to the table *)
191
192 procedure mergeTrans;
193 (* sorts transitions w.r.t. next states and merges transitions for the
194 same next state in the active state *)
195
196 procedure sortTrans;
197 (* sort transitions in act_state lexicographically *)
198
199 implementation
200
201 uses LexMsgs;
202
203 (* Hash table routines: *)
204
lookupnull205 function lookup(k : Integer) : String;
206 begin
207 with sym_table^[k] do
208 if pname=nil then
209 lookup := ''
210 else
211 lookup := copy(pname^, 1, length(pname^))
212 end(*lookup*);
213 procedure entry(k : Integer; symbol : String);
214 begin
215 with sym_table^[k] do
216 begin
217 pname := newStr(symbol);
218 sym_type := none_sym;
219 end
220 end(*entry*);
221
222 (* Routines to build the position table: *)
223
224 procedure addCharPos(c : Char);
225 begin
226 inc(n_pos);
227 if n_pos>max_pos then fatal(pos_table_overflow);
228 pos_table^[n_pos].follow_pos := newIntSet;
229 pos_table^[n_pos].pos_type := char_pos;
230 pos_table^[n_pos].c := c;
231 end(*addCharPos*);
232
233 procedure addCClassPos(cc : CClassPtr);
234 begin
235 inc(n_pos);
236 if n_pos>max_pos then fatal(pos_table_overflow);
237 pos_table^[n_pos].follow_pos := newIntSet;
238 pos_table^[n_pos].pos_type := cclass_pos;
239 pos_table^[n_pos].cc := cc;
240 end(*addCClassPos*);
241
242 procedure addMarkPos(rule, pos : Integer);
243 begin
244 inc(n_pos);
245 if n_pos>max_pos then fatal(pos_table_overflow);
246 pos_table^[n_pos].follow_pos := newIntSet;
247 pos_table^[n_pos].pos_type := mark_pos;
248 pos_table^[n_pos].rule := rule;
249 pos_table^[n_pos].pos := pos;
250 end(*addMarkPos*);
251
252 (* Routines to build the state table: *)
253
newStatenull254 function newState(POS : IntSetPtr) : Integer;
255 begin
256 if n_states>=max_states then fatal(state_table_overflow);
257 newState := n_states;
258 with state_table^[n_states] do
259 begin
260 state_pos := POS;
261 final := false;
262 end;
263 inc(n_states);
264 end(*newState*);
265
addStatenull266 function addState(POS : IntSetPtr) : Integer;
267 var i : Integer;
268 begin
269 for i := 0 to pred(n_states) do
270 if equal(POS^, state_table^[i].state_pos^) then
271 begin
272 addState := i;
273 exit;
274 end;
275 addState := newState(POS);
276 end(*addState*);
277
278 procedure startStateTrans;
279 begin
280 state_table^[act_state].trans_lo := succ(n_trans);
281 end(*startStateTrans*);
282
283 procedure endStateTrans;
284 begin
285 state_table^[act_state].trans_hi := n_trans;
286 end(*endStateTrans*);
287
n_state_transnull288 function n_state_trans(i : Integer) : Integer;
289 begin
290 with state_table^[i] do
291 n_state_trans := trans_hi-trans_lo+1
292 end(*n_state_trans*);
293
294 (* Construction of the transition table:
295 This implementation here uses a simple optimization which tries to avoid
296 the construction of different transitions for each individual character
297 in large character classes by MERGING transitions whenever possible. The
298 transitions, at any time, will be partitioned into transitions on disjoint
299 character classes. When adding a new transition on character class cc, we
300 repartition the transitions as follows:
301 1. If the current character class cc equals an existing one, we can
302 simply add the new follow set to the existing one.
303 2. Otherwise, for some existing transition on some character class
304 cc1 with cc*cc1<>[], we replace the existing transition by a new
305 transition on cc*cc1 with follow set = cc1's follow set + cc's follow
306 set, and, if necessary (i.e. if cc1-cc is nonempty), a transition on
307 cc1-cc with follow set = cc1's follow set. We then remove the elements
308 of cc1 from cc, and proceed again with step 1.
309 We may stop this process as soon as cc becomes empty (then all characters
310 in cc have been distributed among the existing partitions). If cc does
311 NOT become empty, we have to construct a new transition for the remaining
312 character class (which then will be disjoint from all other character
313 classes in the transition table). *)
314
315 procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
316 var
317 i : Integer;
318 cc0, cc1, cc2 : CClass;
319 begin
320 for i := state_table^[act_state].trans_lo to n_trans do
321 if trans_table^[i].cc^=cc then
322 begin
323 setunion(trans_table^[i].follow_pos^, FOLLOW^);
324 exit
325 end
326 else
327 begin
328 cc0 := cc*trans_table^[i].cc^;
329 if cc0<>[] then
330 begin
331 cc1 := trans_table^[i].cc^-cc;
332 cc2 := cc-trans_table^[i].cc^;
333 if cc1<>[] then
334 begin
335 trans_table^[i].cc^ := cc1;
336 inc(n_trans);
337 if n_trans>max_trans then fatal(trans_table_overflow);
338 trans_table^[n_trans].cc := newCClass(cc0);
339 trans_table^[n_trans].follow_pos := newIntSet;
340 trans_table^[n_trans].follow_pos^ :=
341 trans_table^[i].follow_pos^;
342 setunion(trans_table^[n_trans].follow_pos^, FOLLOW^);
343 end
344 else
345 begin
346 trans_table^[i].cc^ := cc0;
347 setunion(trans_table^[i].follow_pos^, FOLLOW^);
348 end;
349 cc := cc2;
350 if cc=[] then exit;
351 end
352 end;
353 inc(n_trans);
354 if n_trans>max_trans then fatal(trans_table_overflow);
355 trans_table^[n_trans].cc := newCClass(cc);
356 trans_table^[n_trans].follow_pos := newIntSet;
357 trans_table^[n_trans].follow_pos^ := FOLLOW^;
358 end(*addCharTrans*);
359
360 (* comparison and swap procedures for sorting transitions: *)
361 {$ifndef fpc}{$F+}{$endif}
transLessNextStatenull362 function transLessNextState(i, j : Integer) : Boolean;
363 {$ifndef fpc}{$F-}{$endif}
364 (* compare transitions based on next states (used in mergeCharTrans) *)
365 begin
366 transLessNextState := trans_table^[i].next_state<
367 trans_table^[j].next_state
368 end(*transLessNextState*);
369 {$ifndef fpc}{$F+}{$endif}
transLessnull370 function transLess(i, j : Integer) : Boolean;
371 {$ifndef fpc}{$F-}{$endif}
372 (* lexical order on transitions *)
373 var c : Char; xi, xj : Boolean;
374 begin
375 for c := #0 to #255 do
376 begin
377 xi := c in trans_table^[i].cc^;
378 xj := c in trans_table^[j].cc^;
379 if xi<>xj then
380 begin
381 transLess := ord(xi)>ord(xj);
382 exit
383 end;
384 end;
385 transLess := false
386 end(*transLess*);
387 {$ifndef fpc}{$F+}{$endif}
388 procedure transSwap(i, j : Integer);
389 {$ifndef fpc}{$F-}{$endif}
390 (* swap transitions i and j *)
391 var x : TransTableEntry;
392 begin
393 x := trans_table^[i];
394 trans_table^[i] := trans_table^[j];
395 trans_table^[j] := x;
396 end(*transSwap*);
397
398 procedure mergeTrans;
399 var
400 i, j, n_deleted : Integer;
401 begin
402 (* sort transitions w.r.t. next states: *)
403 quicksort(state_table^[act_state].trans_lo,
404 n_trans,
405 {$ifdef fpc}@{$endif}transLessNextState,
406 {$ifdef fpc}@{$endif}transSwap);
407 (* merge transitions for the same next state: *)
408 n_deleted := 0;
409 for i := state_table^[act_state].trans_lo to n_trans do
410 if trans_table^[i].cc<>nil then
411 begin
412 j := succ(i);
413 while (j<=n_trans) and
414 (trans_table^[i].next_state =
415 trans_table^[j].next_state) do
416 begin
417 (* merge cclasses of transitions i and j, then mark
418 transition j as deleted *)
419 trans_table^[i].cc^ := trans_table^[i].cc^+
420 trans_table^[j].cc^;
421 trans_table^[j].cc := nil;
422 inc(n_deleted);
423 inc(j);
424 end;
425 end;
426 (* remove deleted transitions: *)
427 j := state_table^[act_state].trans_lo;
428 for i := state_table^[act_state].trans_lo to n_trans do
429 if trans_table^[i].cc<>nil then
430 if i<>j then
431 begin
432 trans_table^[j] := trans_table^[i];
433 inc(j);
434 end
435 else
436 inc(j);
437 (* update transition count: *)
438 dec(n_trans, n_deleted);
439 end(*mergeTrans*);
440
441 procedure sortTrans;
442 begin
443 quicksort(state_table^[act_state].trans_lo,
444 n_trans,
445 {$ifdef fpc}@{$endif}transLess,
446 {$ifdef fpc}@{$endif}transSwap);
447 end(*sortTrans*);
448
449 var i : Integer;
450
451 begin
452
453 verbose := false;
454 optimize := false;
455
456 n_pos := 0;
457 n_states := 0;
458 n_trans := 0;
459 n_start_states := 0;
460
461 (* allocate tables: *)
462
463 new(sym_table);
464 new(pos_table);
465 new(first_pos_table);
466 new(start_excl);
467 new(state_table);
468 new(trans_table);
469
470 (* initialize symbol table: *)
471
472 for i := 1 to max_keys do sym_table^[i].pname := nil;
473
474 end(*LexTables*).
475