1% Test cases for the parser generator. This all runs in
2% symbolic mode...
3
4
5%
6% This is where (for now) I will put documentation of the syntax I
7% will use when creating a grammer. There is a main function called
8% lalr_construct_parser and that is passed a list that describes
9% a grammar. It is in the form of a sequence of productions, and the first
10% one given is taken to be the top-level target.
11%
12% Each production is in the form
13%     (LHS   ((rhs1.1 rhs1.2 ...) a1.1 a1.2 ...)
14%            ((rhs2.1 rhs2.1 ...) a2.1 a2.2 ...)
15%            ...)
16% which in regular publication style for grammars might be interpreted
17% as meaning
18%      LHS ::= rhs1.1 rhs1.2 ... { a1.1 a1.2 ... }
19%          |   rhs2.1 rhs2.2 ... { a2.1 a2.2 ... }
20%          ...
21%          ;
22%
23% Each LHS is treated as a non-terminal symbol and is specified as a simple
24% name. Note that by default the Reduce parser will be folding characters
25% within names to lower case and so it will be best to choose names for
26% non-terminals that are unambiguous even when case-folded, but I would like
27% to establish a convention that in source code they are written in capitals.
28%
29% The rhs items may be either non-terminals (identified because they are
30% present in the left hand side of some production) or terminals. Terminal
31% symbols can be specified in two different ways.
32% The lexer has built-in recipies that decode certain sequences of characters
33% and return the special markers for !:symbol, !:number, !:string, !:list for
34% commonly used cases. In these cases the variable yylval gets left set
35% to associated data, so for instance in the case of !:symbol it gets set
36% to the particular symbol concerned.
37% The token type :list is used for Lisp or rlisp-like notation where the
38% input contains
39%     'expression
40% or  `expression
41% so for instance the input `(a b c) leads to the lexer returning !:list and
42% yylvel being set to (backquote (a b c)). This treatment is specialised for
43% handling rlisp-like syntax.
44%
45% Other terminals are indicated by writing a string. That may either
46% consist of characters that would otherwise form a symbol (ie a letter
47% followed by letters, digits and underscores) or a sequence of
48% non-alphanumeric characters. In the latter case if a sequence of three or
49% more punctuation marks make up a terminal then all the shorter prefixes
50% of it will also be grouped to form single entities. So if "<-->" is a
51% terminal then '<', '<-' and '<--' will each by parsed as single tokens, and
52% any of them that are not used as terminals will be classified as !:symbol.
53%
54% When the lexer processes input it will return a numeric code that identifies
55% the type of the item seen, so in a production one might write
56%     (!:symbol ":=" EXPRESSION)
57% and as it recognises the first two tokens the lexer will return a numeric
58% code for !:symbol (and set yylval to the actual symbol as seen) and then
59% a numeric code that it allocates for ":=". In the latter case it will
60% also set yylval to the symbol !:!= in case that is useful.
61
62
63symbolic;
64
65% Before testing parser generation I will demonstrate the lexer..
66% If I was julpy about the exact behaviour of the lexer I could go
67%               on tracelex;
68% to get some more tracing.
69
70lex_cleanup();
71
72lex_keywords '("begin" "<=>" "<==");
73
74% The output from this is expected to be
75
76%  Result: (2 symbol)
77%  Result: (4 200)
78%  Result: (4 3.542)
79%  Result: (3 "a string")
80%  Result: (2 nil)
81%  Result: (5 (quote (quoted lisp)))
82%  Result: (5 (backquote (backquoted (!, comma) (!,!@ comma_at))))
83%  Result: (2 !+)
84%  Result: (7 !<!=!>)
85%  Result: (2 !-)
86%  Result: (2 !=)
87%  Result: (2 !>)
88%  Result: (9 !<)
89%  Result: (8 !<!=)
90%  Result: (5 begin)
91%  Result: (2 !;)
92%  Result: (2 !;)
93%  Result: (2 !;)
94%
95%  nil
96
97% The row of "; ; ;" at the end provides some protection so that
98% if faults in the lexer were to cause it to read more or less than it ought
99% to then what is left over is reasonably likely to remain as valid rlisp
100% syntax and so the rest of this test file will be able to continue happily.
101
102
103<< off echo;
104   lex_init();
105   for i := 1:18 do <<
106     tt := yylex();
107     if not zerop posn() then terpri();
108     princ "Result: ";
109     print list(tt, yylval) >>;
110   on echo >>;
111
112symbol
113200
1143.542
115"a string"
116'(quoted lisp)
117`(backquoted ,comma ,@comma_at)
118+
119<=>
120-
121=>
122<
123<=
124begin
125; ; ; ;
126
127
128on lalr_verbose;
129
130% Here I set up a sample grammar
131%    S' -> S
132%    S  -> C C        { }
133%    C  -> "c" C      { }
134%        | "d"        { }
135% Example 4.42 from Aho, Sethi and Ullman's Red Dragon book, with
136% some simple semantic actions added. Note that I do not need to insert
137% the production S' -> S for myself since the analysis code will
138% augment my grammar with it for me anyway.
139% Example 4.54 in the more recent Purple book.
140
141% Note that this grammar will introduce "c" and "d" as keywords rather than
142% being general symbols. When I construct a subsequent grammar that will
143% undo that setting. I will omit semantic actions here so that the default
144% action of building a form of tree is used.
145
146% Limitations are
147% (1) I will need a way to specify precedence if this is to be feasibly
148%     useful. I have some planning for this but have not implemented it yet.
149% (2) At present the parser generator will not cope with large grammars
150%     because it does not merge rules promptly enough.
151% (3) The lexer is hand-written and can not readily be reconfigured for
152%     use with languages other than rlisp. For instance it has use of "!"
153%     as a character escape built into it.
154%
155%
156
157
158grammar := '(
159  (S  ((C C)    )   % One production for S, no semantic actions
160  )
161  (C  (("c" C)  )   % First production for C
162      (("d")    )   % Second production for C
163  ));
164
165lalr_construct_parser grammar;
166
167printc yyparse()$
168
169c c c d c d ;
170
171printc yyparse()$
172
173d d ;
174
175
176% Example 4.46 from the Red Dragon (4.61 in Aho, Lam, Sethi and Ullman,
177% "Compilers: principles, techniques and tools", second edition 2007).
178
179% The semantic actions here contain print statements that will
180% print some sort of trace as the parsing progresses.
181
182symbolic procedure neatprintc x;
183 << if not zerop posn() then terpri();
184    printc x >>;
185
186g4_46 := '((S   ((L "=" R)   (neatprintc "## S => L = R")
187                             (list 'equal !$1 !$3))
188                ((R)         (neatprintc "## S => R")
189                             !$1))
190           (L   (("*" R)     (neatprintc "## L => * R")
191                             (list 'star !$2))
192                ((!:symbol)  (neatprintc "## L => symbol")
193                             !$1))
194           (R   ((L)         (neatprintc "## R => L")
195                             !$1)));
196
197lalr_construct_parser g4_46;
198
199printc yyparse()$
200
201leftsym = rightsym ;
202
203
204printc yyparse()$
205
206****abc = *def ;
207
208#if nil  % Skip the rest of this test file...
209
210
211% The next example will not work until I have precedence rules imlemented
212% but is expected to be reasonably representative of natural small grammars.
213
214gtest := '((S  ((P) !$1)
215               ((S op P) (list !$2 !$1 !$3))
216               (("-" P) (list 'minus !$2))
217               (("+" P) !$2))
218           (op (("+") 'plus)
219               (("-") 'difference)
220               (("*") 'times)
221               (("/") 'quotient)
222               (("**") 'expt)
223               (("^") 'expt))
224           (P  (("(" S ")") !$2)
225               ((!:symbol) !$1)
226               ((!:string) !$1)
227               ((!:number) !$1)));
228
229lalr_construct_parser gtest;
230
231printc yyparse()$
232
233a * (b/c + d/e) ^ 2 ^ g - "stringdata" ;
234
235
236% Now a much more complicated grammar - one that recognizes the syntax of
237% RLISP. In order to survive this grammar my paser generator will need to
238% be extended to deal with ambiguous grammars both to cope with the
239% standard problem of "dangling else" clauses and to use precedence
240% declarations to disambiguate the uses of infix operators. Well at
241% present the grammar is written in a grossly bloated form so that
242% operator predcedence is hard wired into it... that too will need changing.
243
244% Note that a naive implementaion of LALR parser table generation via
245% initial construction of a full LR(1) table leads to ridiculous expense
246% when processing a grammar of this scale.
247
248rlisp_grammar := '(
249
250(command         ((  cmnd sep ))
251                 ((  end sep ))
252                 ((  command cmnd sep ))
253                 ((  command end sep ))
254)
255
256
257(sep             ((  ";" ))
258                 ((  "$" ))
259)
260
261
262(proc_type       ((  "symbolic" ))
263                 ((  "algebraic" ))
264)
265
266
267(proc_qual       ((  "expr" ))
268                 ((  "macro" ))
269                 ((  "smacro" ))
270)
271
272
273(sym_list        ((  ")" ))
274                 ((  "," !:symbol sym_list ))
275)
276
277
278(infix           ((  "setq" ))
279                 ((  "or" ))
280                 ((  "and" ))
281                 ((  "member" ))
282                 ((  "memq" ))
283                 ((  "=" ))
284                 ((  "neq" ))
285                 ((  "eq" ))
286                 ((  ">=" ))
287                 ((  ">" ))
288                 ((  "<=" ))
289                 ((  "<" ))
290                 ((  "freeof" ))
291                 ((  "+" ))
292                 ((  "-" ))
293                 ((  "*" ))
294                 ((  "/" ))
295                 ((  "^" ))
296                 ((  "**" ))
297                 ((  "." ))
298)
299
300(prefix          ((  "not" ))
301                 ((  "+" ))
302                 ((  "-" ))
303)
304
305
306(proc_head       ((  !:symbol ))
307                 ((  !:symbol !:symbol ))
308                 ((  !:symbol "(" ")" ))
309                 ((  !:symbol "(" !:symbol sym_list ))
310                 ((  prefix !:symbol ))
311                 ((  !:symbol infix !:symbol ))
312)
313
314
315(proc_def        ((  "procedure" proc_head sep cmnd ))
316                 ((  proc_type "procedure" proc_head sep cmnd ))
317                 ((  proc_qual "procedure" proc_head sep cmnd ))
318                 ((  proc_type proc_qual "procedure" proc_head sep cmnd ))
319)
320
321
322% The type "!:rlistat" is dodgy here... it doe snot (yet) exist!
323
324(rlistat         ((  !:rlistat ))
325                 ((  "in" ))
326                 ((  "on" ))
327)
328
329
330(rltail          ((  expr ))
331                 ((  expr "," rltail ))
332)
333
334
335(cmnd            ((  expr ))
336                 ((  rlistat rltail ))
337)
338
339
340(if_stmt         ((  "if" expr "then" cmnd "else" cmnd ))
341                 ((  "if" expr "then" cmnd ))
342)
343
344
345(for_update      ((  ":" expr ))
346                 ((  "step" expr "until" expr ))
347)
348
349
350(for_action      ((  "do" ))
351                 ((  "sum" ))
352                 ((  "collect" ))
353)
354
355
356(for_inon        ((  "in" ))
357                 ((  "on" ))
358)
359
360
361(for_stmt        ((  "for" !:symbol !:setq expr for_update for_action cmnd ))
362                 ((  "for" "each" !:symbol for_inon expr for_action cmnd ))
363                 ((  "foreach" !:symbol for_inon expr for_action cmnd ))
364)
365
366
367(while_stmt      ((  "while" expr "do" cmnd ))
368)
369
370
371(repeat_stmt     ((  "repeat" cmnd "until" expr ))
372)
373
374
375(return_stmt     ((  "return" ))
376                 ((  "return" expr ))
377)
378
379
380(goto_stmt       ((  "goto" !:symbol ))
381                 ((  "go" !:symbol ))
382                 ((  "go" "to" !:symbol ))
383)
384
385
386(group_tail      ((  ">>" ))
387                 ((  sep ">>" ))
388                 ((  sep cmnd group_tail ))
389)
390
391
392(group_expr      ((  "<<" cmnd group_tail ))
393)
394
395
396(scalar_tail     ((  sep ))
397                 ((  "," !:symbol scalar_tail ))
398                 ((  "," integer scalar_tail ))
399)
400
401
402(scalar_def      ((  "scalar" !:symbol scalar_tail ))
403                 ((  "integer" !:symbol scalar_tail ))
404)
405
406
407(scalar_defs     ((  scalar_def ))
408                 ((  scalar_defs scalar_def ))
409)
410
411
412(block_tail      ((  "end" ))
413                 ((  cmnd "end" ))
414                 ((  !:symbol ":" block_tail ))
415                 ((  cmnd sep block_tail ))
416                 ((  sep block_tail ))
417)
418
419(block_expr      ((  "begin" scalar_defs block_tail ))
420                 ((  "begin" block_tail ))
421)
422
423
424(lambda_vars     ((  sep ))
425                 ((  "," !:symbol lambda_vars ))
426)
427
428
429(lambda_expr     ((  "lambda" !:symbol lambda_vars cmnd ))
430                 ((  "lambda" "(" ")" sep cmnd ))
431                 ((  "lambda" "(" !:symbol sym_list sep cmnd ))
432)
433
434
435(expr            ((  rx0 ))
436                 ((  lx0 ))
437)
438
439
440(rx0             ((  lx0 "where" !:symbol "=" rx1 ))
441                 ((  rx1 ))
442)
443
444
445(lx0             ((  lx0 "where" !:symbol "=" lx1 ))
446                 ((  lx1 ))
447)
448
449
450(rx1             ((  lx2 ":=" rx1 ))
451                 ((  rx2 ))
452)
453
454
455(lx1             ((  lx2 ":=" lx1 ))
456                 ((  lx2 ))
457)
458
459
460(rx2tail         ((  rx3 ))
461                 ((  lx3 "or" rx2tail ))
462)
463
464(rx2             ((  lx3 "or" rx2tail ))
465                 ((  rx3 ))
466)
467
468
469(lx2tail         ((  lx3 ))
470                 ((  lx3 "or" lx2tail ))
471)
472
473(lx2             ((  lx3 "or" lx2tail ))
474                 ((  lx3 ))
475)
476
477
478(rx3tail         ((  rx4 ))
479                 ((  lx4 "and" rx3tail ))
480)
481
482(rx3             ((  lx4 "and" rx3tail ))
483                 ((  rx4 ))
484)
485
486
487(lx3tail         ((  lx4 ))
488                 ((  lx4 "and" lx3tail ))
489)
490
491(lx3             ((  lx4 "and" lx3tail ))
492                 ((  lx4 ))
493)
494
495
496(rx4             ((  "not" rx4 ))
497                 ((  rx5 ))
498)
499
500
501(lx4             ((  "not" lx4 ))
502                 ((  lx5 ))
503)
504
505% The fact that this lists "member" and "memq" (etc) here makes those names
506% keywords, and so possibly disables use as function names as in
507%    member(a, b)
508
509(rx5             ((  lx6 "member" ry6 ))
510                 ((  lx6 "memq" ry6 ))
511                 ((  lx6 "=" ry6 ))
512                 ((  lx6 "neq" ry6 ))
513                 ((  lx6 "eq" ry6 ))
514                 ((  lx6 ">=" ry6 ))
515                 ((  lx6 ">" ry6 ))
516                 ((  lx6 "<=" ry6 ))
517                 ((  lx6 "<" ry6 ))
518                 ((  lx6 "freeof" ry6 ))
519                 ((  rx6 ))
520)
521
522
523(lx5             ((  lx6 "member" ly6 ))
524                 ((  lx6 "memq" ly6 ))
525                 ((  lx6 "=" ly6 ))
526                 ((  lx6 "neq" ly6 ))
527                 ((  lx6 "eq" ly6 ))
528                 ((  lx6 ">=" ly6 ))
529                 ((  lx6 ">" ly6 ))
530                 ((  lx6 "<=" ly6 ))
531                 ((  lx6 "<" ly6 ))
532                 ((  lx6 "freeof" ly6 ))
533                 ((  lx6 ))
534)
535
536
537(ry6             ((  "not" ry6 ))
538                 ((  rx6 ))
539)
540
541
542(ly6             ((  "not" ly6 ))
543                 ((  lx6 ))
544)
545
546
547(rx6tail         ((  ry6a ))
548                 ((  ly6a "+" rx6tail ))
549)
550
551(rx6             ((  lx6a "+" rx6tail ))
552                 ((  rx6a ))
553)
554
555
556(lx6tail         ((  ly6a ))
557                 ((  ly6a "+" lx6tail ))
558)
559
560(lx6             ((  lx6a "+" lx6tail ))
561                 ((  lx6a ))
562)
563
564
565(ry6a            ((  not ry6a ))
566                 ((  rx6a ))
567)
568
569
570(rx6a            ((  lx6a "-" ry7 ))
571                 ((  rx7 ))
572)
573
574
575(ly6a            ((  not ly6a ))
576                 ((  lx6a ))
577)
578
579
580(lx6a            ((  lx6a "-" ly7 ))
581                 ((  lx7 ))
582)
583
584
585(ry7             ((  not ry7 ))
586                 ((  rx7 ))
587)
588
589
590(rx7             ((  "+" ry7 ))
591                 ((  "-" ry7 ))
592                 ((  rx8 ))
593)
594
595
596(ly7             ((  not ly7 ))
597                 ((  lx7 ))
598)
599
600
601(lx7             ((  "+" ly7 ))
602                 ((  "-" ly7 ))
603                 ((  lx8 ))
604)
605
606
607(rx8tail         ((  ry9 ))
608                 ((  ly9 "*" rx8tail ))
609)
610
611(rx8             ((  lx9 "*" rx8tail ))
612                 ((  rx9 ))
613)
614
615
616(lx8tail         ((  ly9 ))
617                 ((  ly9 "*" lx8tail ))
618)
619
620(lx8             ((  lx9 "*" lx8tail ))
621                 ((  lx9 ))
622)
623
624
625(ry9             ((  "not" ry9 ))
626                 ((  "+" ry9 ))
627                 ((  "-" ry9 ))
628                 ((  rx9 ))
629)
630
631
632(rx9             ((  lx9 "/" ry10 ))
633                 ((  rx10 ))
634)
635
636
637(ly9             ((  "not" ly9 ))
638                 ((  "+" ly9 ))
639                 ((  "-" ly9 ))
640                 ((  lx9 ))
641)
642
643
644(lx9             ((  lx9 "/" ly10 ))
645                 ((  lx10 ))
646)
647
648
649(ly10            ((  "not" ly10 ))
650                 ((  "+" ly10 ))
651                 ((  "-" ly10 ))
652                 ((  lx10 ))
653)
654
655
656(lx10            ((  lx11 "^" ly10 ))
657                 ((  lx11 ))
658)
659
660
661(ry10            ((  "not" ry10 ))
662                 ((  "+" ry10 ))
663                 ((  "-" ry10 ))
664                 ((  rx10 ))
665)
666
667
668(rx10            ((  lx11 "^" ry10 ))
669                 ((  rx11 ))
670)
671
672
673(ry11            ((  "not" ry11 ))
674                 ((  "+" ry11 ))
675                 ((  "-" ry11 ))
676                 ((  rx11 ))
677)
678
679
680(rx11            ((  x12 "." ry11 ))
681                 ((  if_stmt ))
682                 ((  for_stmt ))
683                 ((  while_stmt ))
684                 ((  repeat_stmt ))
685                 ((  return_stmt ))
686                 ((  goto_stmt ))
687                 ((  lambda_expr ))
688                 ((  proc_type ))
689                 ((  proc_def ))
690                 ((  endstat ))
691)
692
693
694(ly11            ((  "not" ly11 ))
695                 ((  "+" ly11 ))
696                 ((  "-" ly11 ))
697                 ((  lx11 ))
698)
699
700
701(lx11            ((  x12 "." ly11 ))
702                 ((  x12 ))
703)
704
705
706(arg_list        ((  expr ")" ))
707                 ((  expr "," arg_list ))
708)
709
710
711(x12             ((  x13 "[" expr "]" ))
712                 ((  x13 "(" ")" ))
713                 ((  x13 "(" expr "," arg_list ))
714                 ((  x13 x12 ))
715                 ((  x13 ))
716)
717
718
719(x13             ((  !:symbol ))
720                 ((  !:number ))
721                 ((  !:string ))
722                 ((  !:list ))     % Both 'xxx and `xxx here
723                 ((  group_expr ))
724                 ((  block_expr ))
725                 ((  "(" expr ")" ))
726)
727)$
728
729
730% lalr_construct_parser rlisp_grammar;
731
732#endif
733
734end;
735
736