1
2 unit LexLib;
3
4 (* Standard Lex library unit for TP Lex Version 3.0.
5 2-11-91 AG *)
6
7 interface
8
9 (* The Lex library unit supplies a collection of variables and routines
10 needed by the lexical analyzer routine yylex and application programs
11 using Lex-generated lexical analyzers. It also provides access to the
12 input/output streams used by the lexical analyzer and the text of the
13 matched string, and provides some utility functions which may be used
14 in actions.
15
16 This `standard' version of the LexLib unit is used to implement lexical
17 analyzers which read from and write to MS-DOS files (using standard input
18 and output, by default). It is suitable for many standard applications
19 for lexical analyzers, such as text conversion tools or compilers.
20
21 However, you may create your own version of the LexLib unit, tailored to
22 your target applications. In particular, you may wish to provide another
23 set of I/O functions, e.g., if you want to read from or write to memory
24 instead to files, or want to use different file types. *)
25
26 (* Variables:
27
28 The variable yytext contains the current match, yyleng its length.
29 The variable yyline contains the current input line, and yylineno and
30 yycolno denote the current input position (line, column). These values
31 are often used in giving error diagnostics (however, they will only be
32 meaningful if there is no rescanning across line ends).
33
34 The variables yyinput and yyoutput are the text files which are used
35 by the lexical analyzer. By default, they are assigned to standard
36 input and output, but you may change these assignments to fit your
37 target application (use the Turbo Pascal standard routines assign,
38 reset, and rewrite for this purpose). *)
39
40 var
41
42 yyinput, yyoutput : Text; (* input and output file *)
43 yyline : String; (* current input line *)
44 yylineno, yycolno : Integer; (* current input position *)
45 yytext : String; (* matched text (should be considered r/o) *)
46 yyleng : Byte (* length of matched text *)
47 absolute yytext;
48
49 (* I/O routines:
50
51 The following routines get_char, unget_char and put_char are used to
52 implement access to the input and output files. Since \n (newline) for
53 Lex means line end, the I/O routines have to translate MS-DOS line ends
54 (carriage-return/line-feed) into newline characters and vice versa. Input
55 is buffered to allow rescanning text (via unput_char).
56
57 The input buffer holds the text of the line to be scanned. When the input
58 buffer empties, a new line is obtained from the input stream. Characters
59 can be returned to the input buffer by calls to unget_char. At end-of-
60 file a null character is returned.
61
62 The input routines also keep track of the input position and set the
63 yyline, yylineno, yycolno variables accordingly.
64
65 Since the rest of the Lex library only depends on these three routines
66 (there are no direct references to the yyinput and yyoutput files or
67 to the input buffer), you can easily replace get_char, unget_char and
68 put_char by another suitable set of routines, e.g. if you want to read
69 from/write to memory, etc. *)
70
get_charnull71 function get_char : Char;
72 (* obtain one character from the input file (null character at end-of-
73 file) *)
74
75 procedure unget_char ( c : Char );
76 (* return one character to the input file to be reread in subsequent calls
77 to get_char *)
78
79 procedure put_char ( c : Char );
80 (* write one character to the output file *)
81
82 (* Utility routines: *)
83
84 procedure echo;
85 (* echoes the current match to the output stream *)
86
87 procedure yymore;
88 (* append the next match to the current one *)
89
90 procedure yyless ( n : Integer );
91 (* truncate yytext to size n and return the remaining characters to the
92 input stream *)
93
94 procedure reject;
95 (* reject the current match and execute the next one *)
96
97 (* reject does not actually cause the input to be rescanned; instead,
98 internal state information is used to find the next match. Hence
99 you should not try to modify the input stream or the yytext variable
100 when rejecting a match. *)
101
102 procedure return ( n : Integer );
103 procedure returnc ( c : Char );
104 (* sets the return value of yylex *)
105
106 procedure start ( state : Integer );
107 (* puts the lexical analyzer in the given start state; state=0 denotes
108 the default start state, other values are user-defined *)
109
110 (* yywrap:
111
112 The yywrap function is called by yylex at end-of-file (unless you have
113 specified a rule matching end-of-file). You may redefine this routine
114 in your Lex program to do application-dependent processing at end of
115 file. In particular, yywrap may arrange for more input and return false
116 in which case the yylex routine resumes lexical analysis. *)
117
yywrapnull118 function yywrap : Boolean;
119 (* The default yywrap routine supplied here closes input and output files
120 and returns true (causing yylex to terminate). *)
121
122 (* The following are the internal data structures and routines used by the
123 lexical analyzer routine yylex; they should not be used directly. *)
124
125 var
126
127 yystate : Integer; (* current state of lexical analyzer *)
128 yyactchar : Char; (* current character *)
129 yylastchar : Char; (* last matched character (#0 if none) *)
130 yyrule : Integer; (* matched rule *)
131 yyreject : Boolean; (* current match rejected? *)
132 yydone : Boolean; (* yylex return value set? *)
133 yyretval : Integer; (* yylex return value *)
134
135 procedure yynew;
136 (* starts next match; initializes state information of the lexical
137 analyzer *)
138
139 procedure yyscan;
140 (* gets next character from the input stream and updates yytext and
141 yyactchar accordingly *)
142
143 procedure yymark ( n : Integer );
144 (* marks position for rule no. n *)
145
146 procedure yymatch ( n : Integer );
147 (* declares a match for rule number n *)
148
yyfindnull149 function yyfind ( var n : Integer ) : Boolean;
150 (* finds the last match and the corresponding marked position and adjusts
151 the matched string accordingly; returns:
152 - true if a rule has been matched, false otherwise
153 - n: the number of the matched rule *)
154
yydefaultnull155 function yydefault : Boolean;
156 (* executes the default action (copy character); returns true unless
157 at end-of-file *)
158
159 procedure yyclear;
160 (* reinitializes state information after lexical analysis has been
161 finished *)
162
163 implementation
164
165 procedure fatal ( msg : String );
166 (* writes a fatal error message and halts program *)
167 begin
168 writeln('LexLib: ', msg);
169 halt(1);
170 end(*fatal*);
171
172 (* I/O routines: *)
173
174 const nl = #10; (* newline character *)
175
176 const max_chars = 2048;
177
178 var
179
180 bufptr : Integer;
181 buf : array [1..max_chars] of Char;
182
get_charnull183 function get_char : Char;
184 var i : Integer;
185 begin
186 if (bufptr=0) and not eof(yyinput) then
187 begin
188 readln(yyinput, yyline);
189 inc(yylineno); yycolno := 1;
190 buf[1] := nl;
191 for i := 1 to length(yyline) do
192 buf[i+1] := yyline[length(yyline)-i+1];
193 inc(bufptr, length(yyline)+1);
194 end;
195 if bufptr>0 then
196 begin
197 get_char := buf[bufptr];
198 dec(bufptr);
199 inc(yycolno);
200 end
201 else
202 get_char := #0;
203 end(*get_char*);
204
205 procedure unget_char ( c : Char );
206 begin
207 if bufptr=max_chars then fatal('input buffer overflow');
208 inc(bufptr);
209 dec(yycolno);
210 buf[bufptr] := c;
211 end(*unget_char*);
212
213 procedure put_char ( c : Char );
214 begin
215 if c=#0 then
216 { ignore }
217 else if c=nl then
218 writeln(yyoutput)
219 else
220 write(yyoutput, c)
221 end(*put_char*);
222
223 (* Variables:
224
225 Some state information is maintained to keep track with calls to yymore,
226 yyless, reject, start and yymatch/yymark, and to initialize state
227 information used by the lexical analyzer.
228 - yystext: contains the initial contents of the yytext variable; this
229 will be the empty string, unless yymore is called which sets yystext
230 to the current yytext
231 - yysstate: start state of lexical analyzer (set to 0 during
232 initialization, and modified in calls to the start routine)
233 - yylstate: line state information (1 if at beginning of line, 0
234 otherwise)
235 - yystack: stack containing matched rules; yymatches contains the number of
236 matches
237 - yypos: for each rule the last marked position (yymark); zeroed when rule
238 has already been considered
239 - yysleng: copy of the original yyleng used to restore state information
240 when reject is used *)
241
242 const
243
244 max_matches = 1024;
245 max_rules = 256;
246
247 var
248
249 yystext : String;
250 yysstate, yylstate : Integer;
251 yymatches : Integer;
252 yystack : array [1..max_matches] of Integer;
253 yypos : array [1..max_rules] of Integer;
254 yysleng : Byte;
255
256 (* Utilities: *)
257
258 procedure echo;
259 var i : Integer;
260 begin
261 for i := 1 to yyleng do
262 put_char(yytext[i])
263 end(*echo*);
264
265 procedure yymore;
266 begin
267 yystext := yytext;
268 end(*yymore*);
269
270 procedure yyless ( n : Integer );
271 var i : Integer;
272 begin
273 for i := yyleng downto n+1 do
274 unget_char(yytext[i]);
275 yyleng := n;
276 end(*yyless*);
277
278 procedure reject;
279 var i : Integer;
280 begin
281 yyreject := true;
282 for i := yyleng+1 to yysleng do
283 yytext := yytext+get_char;
284 dec(yymatches);
285 end(*reject*);
286
287 procedure return ( n : Integer );
288 begin
289 yyretval := n;
290 yydone := true;
291 end(*return*);
292
293 procedure returnc ( c : Char );
294 begin
295 yyretval := ord(c);
296 yydone := true;
297 end(*returnc*);
298
299 procedure start ( state : Integer );
300 begin
301 yysstate := state;
302 end(*start*);
303
304 (* yywrap: *)
305
yywrapnull306 function yywrap : Boolean;
307 begin
308 close(yyinput); close(yyoutput);
309 yywrap := true;
310 end(*yywrap*);
311
312 (* Internal routines: *)
313
314 procedure yynew;
315 begin
316 if yylastchar<>#0 then
317 if yylastchar=nl then
318 yylstate := 1
319 else
320 yylstate := 0;
321 yystate := yysstate+yylstate;
322 yytext := yystext;
323 yystext := '';
324 yymatches := 0;
325 yydone := false;
326 end(*yynew*);
327
328 procedure yyscan;
329 begin
330 if yyleng=255 then fatal('yytext overflow');
331 yyactchar := get_char;
332 inc(yyleng);
333 yytext[yyleng] := yyactchar;
334 end(*yyscan*);
335
336 procedure yymark ( n : Integer );
337 begin
338 if n>max_rules then fatal('too many rules');
339 yypos[n] := yyleng;
340 end(*yymark*);
341
342 procedure yymatch ( n : Integer );
343 begin
344 inc(yymatches);
345 if yymatches>max_matches then fatal('match stack overflow');
346 yystack[yymatches] := n;
347 end(*yymatch*);
348
yyfindnull349 function yyfind ( var n : Integer ) : Boolean;
350 begin
351 yyreject := false;
352 while (yymatches>0) and (yypos[yystack[yymatches]]=0) do
353 dec(yymatches);
354 if yymatches>0 then
355 begin
356 yysleng := yyleng;
357 n := yystack[yymatches];
358 yyless(yypos[n]);
359 yypos[n] := 0;
360 if yyleng>0 then
361 yylastchar := yytext[yyleng]
362 else
363 yylastchar := #0;
364 yyfind := true;
365 end
366 else
367 begin
368 yyless(0);
369 yylastchar := #0;
370 yyfind := false;
371 end
372 end(*yyfind*);
373
yydefaultnull374 function yydefault : Boolean;
375 begin
376 yyreject := false;
377 yyactchar := get_char;
378 if yyactchar<>#0 then
379 begin
380 put_char(yyactchar);
381 yydefault := true;
382 end
383 else
384 begin
385 yylstate := 1;
386 yydefault := false;
387 end;
388 yylastchar := yyactchar;
389 end(*yydefault*);
390
391 procedure yyclear;
392 begin
393 bufptr := 0;
394 yysstate := 0;
395 yylstate := 1;
396 yylastchar := #0;
397 yytext := '';
398 yystext := '';
399 end(*yyclear*);
400
401 begin
402 assign(yyinput, '');
403 assign(yyoutput, '');
404 reset(yyinput); rewrite(yyoutput);
405 yylineno := 0;
406 yyclear;
407 end(*LexLib*).
408