1;;;; matchable.scm -- portable hygienic pattern matcher
2;;
3;; This code is written by Alex Shinn and placed in the
4;; Public Domain.  All warranties are disclaimed.
5
6;; Written in fully portable SYNTAX-RULES, with a few non-portable
7;; bits at the end of the file conditioned out with COND-EXPAND.
8
9;; This is a simple generative pattern matcher - each pattern is
10;; expanded into the required tests, calling a failure continuation if
11;; the tests pass.  This makes the logic easy to follow and extend,
12;; but produces sub-optimal code in cases where you have many similar
13;; clauses due to repeating the same tests.  Nonetheless a smart
14;; compiler should be able to remove the redundant tests.  For
15;; MATCH-LET and DESTRUCTURING-BIND type uses there is no performance
16;; hit.
17
18;; 2008/03/20 - fixing bug where (a ...) matched non-lists
19;; 2008/03/15 - removing redundant check in vector patterns
20;; 2007/09/04 - fixing quasiquote patterns
21;; 2007/07/21 - allowing ellipse patterns in non-final list positions
22;; 2007/04/10 - fixing potential hygiene issue in match-check-ellipse
23;;              (thanks to Taylor Campbell)
24;; 2007/04/08 - clean up, commenting
25;; 2006/12/24 - bugfixes
26;; 2006/12/01 - non-linear patterns, shared variables in OR, get!/set!
27
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;; This is always passed a message, yet won't match the message, and
31;; thus always results in a compile-time error.
32
33(define-syntax match-syntax-error
34  (syntax-rules ()
35    ((_)
36     (match-syntax-error "invalid match-syntax-error usage"))))
37
38;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
39
40;; The basic interface.  MATCH just performs some basic syntax
41;; validation, binds the match expression to a temporary variable, and
42;; passes it on to MATCH-NEXT.
43
44(define-syntax match
45  (syntax-rules ()
46    ((match)
47     (match-syntax-error "missing match expression"))
48    ((match atom)
49     (match-syntax-error "missing match clause"))
50    ((match (app ...) (pat . body) ...)
51     (let ((v (app ...)))
52       (match-next v (app ...) (set! (app ...)) (pat . body) ...)))
53    ((match #(vec ...) (pat . body) ...)
54     (let ((v #(vec ...)))
55       (match-next v v (set! v) (pat . body) ...)))
56    ((match atom (pat . body) ...)
57     (match-next atom atom (set! atom) (pat . body) ...))
58    ))
59
60;; MATCH-NEXT passes each clause to MATCH-ONE in turn with its failure
61;; thunk, which is expanded by recursing MATCH-NEXT on the remaining
62;; clauses.
63
64(define-syntax match-next
65  (syntax-rules (=>)
66    ;; no more clauses, the match failed
67    ((match-next v g s)
68     (error 'match "no matching pattern"))
69    ;; named failure continuation
70    ((match-next v g s (pat (=> failure) . body) . rest)
71     (let ((failure (lambda () (match-next v g s . rest))))
72       ;; match-one analyzes the pattern for us
73       (match-one v pat g s (match-drop-ids (begin . body)) (failure) ())))
74    ;; anonymous failure continuation, give it a dummy name
75    ((match-next v g s (pat . body) . rest)
76     (match-next v g s (pat (=> failure) . body) . rest))))
77
78;; MATCH-ONE first checks for ellipse patterns, otherwise passes on to
79;; MATCH-TWO.
80
81(define-syntax match-one
82  (syntax-rules ()
83    ;; If it's a list of two values, check to see if the second one is
84    ;; an ellipse and handle accordingly, otherwise go to MATCH-TWO.
85    ((match-one v (p q . r) g s sk fk i)
86     (match-check-ellipse
87      q
88      (match-extract-vars p (match-gen-ellipses v p r g s sk fk i) i ())
89      (match-two v (p q . r) g s sk fk i)))
90    ;; Otherwise, go directly to MATCH-TWO.
91    ((match-one . x)
92     (match-two . x))))
93
94;; This is the guts of the pattern matcher.  We are passed a lot of
95;; information in the form:
96;;
97;;   (match-two var pattern getter setter success-k fail-k (ids ...))
98;;
99;; where VAR is the symbol name of the current variable we are
100;; matching, PATTERN is the current pattern, getter and setter are the
101;; corresponding accessors (e.g. CAR and SET-CAR! of the pair holding
102;; VAR), SUCCESS-K is the success continuation, FAIL-K is the failure
103;; continuation (which is just a thunk call and is thus safe to expand
104;; multiple times) and IDS are the list of identifiers bound in the
105;; pattern so far.
106
107(define-syntax match-two
108  (syntax-rules (_ ___ quote quasiquote ? $ = and or not set! get!)
109    ((match-two v () g s (sk ...) fk i)
110     (if (null? v) (sk ... i) fk))
111    ((match-two v (quote p) g s (sk ...) fk i)
112     (if (equal? v 'p) (sk ... i) fk))
113    ((match-two v (quasiquote p) g s sk fk i)
114     (match-quasiquote v p g s sk fk i))
115    ((match-two v (and) g s (sk ...) fk i) (sk ... i))
116    ((match-two v (and p q ...) g s sk fk i)
117     (match-one v p g s (match-one v (and q ...) g s sk fk) fk i))
118    ((match-two v (or) g s sk fk i) fk)
119    ((match-two v (or p) g s sk fk i)
120     (match-one v p g s sk fk i))
121    ((match-two v (or p ...) g s sk fk i)
122     (match-extract-vars (or p ...)
123                         (match-gen-or v (p ...) g s sk fk i)
124                         i
125                         ()))
126    ((match-two v (not p) g s (sk ...) fk i)
127     (match-one v p g s (match-drop-ids fk) (sk ... i) i))
128    ((match-two v (get! getter) g s (sk ...) fk i)
129     (let ((getter (lambda () g))) (sk ... i)))
130    ((match-two v (set! setter) g (s ...) (sk ...) fk i)
131     (let ((setter (lambda (x) (s ... x)))) (sk ... i)))
132    ((match-two v (? pred p ...) g s sk fk i)
133     (if (pred v) (match-one v (and p ...) g s sk fk i) fk))
134    ((match-two v (= proc p) g s sk fk i)
135     (let ((w (proc v)))
136       (match-one w p g s sk fk i)))
137    ((match-two v (p ___ . r) g s sk fk i)
138     (match-extract-vars p (match-gen-ellipses v p r g s sk fk i) i ()))
139    ((match-two v (p) g s sk fk i)
140     (if (and (pair? v) (null? (cdr v)))
141       (let ((w (car v)))
142         (match-one w p (car v) (set-car! v) sk fk i))
143       fk))
144    ((match-two v (p . q) g s sk fk i)
145     (if (pair? v)
146       (let ((w (car v)) (x (cdr v)))
147         (match-one w p (car v) (set-car! v)
148                    (match-one x q (cdr v) (set-cdr! v) sk fk)
149                    fk
150                    i))
151       fk))
152    ((match-two v #(p ...) g s sk fk i)
153     (match-vector v 0 () (p ...) sk fk i))
154    ((match-two v _ g s (sk ...) fk i) (sk ... i))
155    ;; Not a pair or vector or special literal, test to see if it's a
156    ;; new symbol, in which case we just bind it, or if it's an
157    ;; already bound symbol or some other literal, in which case we
158    ;; compare it with EQUAL?.
159    ((match-two v x g s (sk ...) fk (id ...))
160     (let-syntax
161         ((new-sym?
162           (syntax-rules (id ...)
163             ((new-sym? x sk2 fk2) sk2)
164             ((new-sym? y sk2 fk2) fk2))))
165       (new-sym? abracadabra  ; thanks Oleg
166             (let ((x v)) (sk ... (id ... x)))
167             (if (equal? v x) (sk ... (id ...)) fk))))
168    ))
169
170;; QUASIQUOTE patterns
171
172(define-syntax match-quasiquote
173  (syntax-rules (unquote unquote-splicing quasiquote)
174    ((_ v (unquote p) g s sk fk i)
175     (match-one v p g s sk fk i))
176    ((_ v ((unquote-splicing p) . rest) g s sk fk i)
177     (if (pair? v)
178       (match-one v
179                  (p . tmp)
180                  (match-quasiquote tmp rest g s sk fk)
181                  fk
182                  i)
183       fk))
184    ((_ v (quasiquote p) g s sk fk i . depth)
185     (match-quasiquote v p g s sk fk i #f . depth))
186    ((_ v (unquote p) g s sk fk i x . depth)
187     (match-quasiquote v p g s sk fk i . depth))
188    ((_ v (unquote-splicing p) g s sk fk i x . depth)
189     (match-quasiquote v p g s sk fk i . depth))
190    ((_ v (p . q) g s sk fk i . depth)
191     (if (pair? v)
192       (let ((w (car v)) (x (cdr v)))
193         (match-quasiquote
194          w p g s
195          (match-quasiquote-step x q g s sk fk depth)
196          fk i . depth))
197       fk))
198    ((_ v #(elt ...) g s sk fk i . depth)
199     (if (vector? v)
200       (let ((ls (vector->list v)))
201         (match-quasiquote ls (elt ...) g s sk fk i . depth))
202       fk))
203    ((_ v x g s sk fk i . depth)
204     (match-one v 'x g s sk fk i))))
205
206(define-syntax match-quasiquote-step
207  (syntax-rules ()
208    ((match-quasiquote-step x q g s sk fk depth i)
209     (match-quasiquote x q g s sk fk i . depth))
210    ))
211
212;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
213;; Utilities
214
215;; A CPS utility that takes two values and just expands into the
216;; first.
217(define-syntax match-drop-ids
218  (syntax-rules ()
219    ((_ expr ids ...) expr)))
220
221;; Generating OR clauses just involves binding the success
222;; continuation into a thunk which takes the identifiers common to
223;; each OR clause, and trying each clause, calling the thunk as soon
224;; as we succeed.
225
226(define-syntax match-gen-or
227  (syntax-rules ()
228    ((_ v p g s (sk ...) fk (i ...) ((id id-ls) ...))
229     (let ((sk2 (lambda (id ...) (sk ... (i ... id ...)))))
230       (match-gen-or-step
231        v p g s (match-drop-ids (sk2 id ...)) fk (i ...))))))
232
233(define-syntax match-gen-or-step
234  (syntax-rules ()
235    ((_ v () g s sk fk i)
236     ;; no OR clauses, call the failure continuation
237     fk)
238    ((_ v (p) g s sk fk i)
239     ;; last (or only) OR clause, just expand normally
240     (match-one v p g s sk fk i))
241    ((_ v (p . q) g s sk fk i)
242     ;; match one and try the remaining on failure
243     (match-one v p g s sk (match-gen-or-step v q g s sk fk i) i))
244    ))
245
246;; We match a pattern (p ...) by matching the pattern p in a loop on
247;; each element of the variable, accumulating the bound ids into lists
248
249;; Look at the body - it's just a named let loop, matching each
250;; element in turn to the same pattern.  This illustrates the
251;; simplicity of this generative-style pattern matching.  It would be
252;; just as easy to implement a tree searching pattern.
253
254(define-syntax match-gen-ellipses
255  (syntax-rules ()
256    ((_ v p () g s (sk ...) fk i ((id id-ls) ...))
257     (match-check-identifier p
258       (let ((p v))
259         (if (list? p)
260             (sk ... i)
261             fk))
262       (let loop ((ls v) (id-ls '()) ...)
263         (cond
264           ((null? ls)
265            (let ((id (reverse id-ls)) ...) (sk ... i)))
266           ((pair? ls)
267            (let ((w (car ls)))
268              (match-one w p (car ls) (set-car! ls)
269                         (match-drop-ids (loop (cdr ls) (cons id id-ls) ...))
270                         fk i)))
271           (else
272            fk)))))
273    ((_ v p (r ...) g s (sk ...) fk i ((id id-ls) ...))
274     (match-verify-no-ellipses
275      (r ...)
276      (let* ((tail-len (length '(r ...)))
277             (ls v)
278             (len (length ls)))
279        (if (< len tail-len)
280            fk
281            (let loop ((ls ls) (n len) (id-ls '()) ...)
282              (cond
283                ((= n tail-len)
284                 (let ((id (reverse id-ls)) ...)
285                   (match-one ls (r ...) #f #f (sk ... i) fk i)))
286                ((pair? ls)
287                 (let ((w (car ls)))
288                   (match-one w p (car ls) (set-car! ls)
289                              (match-drop-ids
290                               (loop (cdr ls) (- n 1) (cons id id-ls) ...))
291                              fk
292                              i)))
293                (else
294                 fk)))))))
295    ))
296
297(define-syntax match-verify-no-ellipses
298  (syntax-rules ()
299    ((_ (x . y) sk)
300     (match-check-ellipse
301      x
302      (match-syntax-error
303       "multiple ellipse patterns not allowed at same level")
304      (match-verify-no-ellipses y sk)))
305    ((_ x sk) sk)
306    ))
307
308;; Vector patterns are just more of the same, with the slight
309;; exception that we pass around the current vector index being
310;; matched.
311
312(define-syntax match-vector
313  (syntax-rules (___)
314    ((_ v n pats (p q) sk fk i)
315     (match-check-ellipse q
316                          (match-vector-ellipses v n pats p sk fk i)
317                          (match-vector-two v n pats (p q) sk fk i)))
318    ((_ v n pats (p ___) sk fk i)
319     (match-vector-ellipses v n pats p sk fk i))
320    ((_ . x)
321     (match-vector-two . x))))
322
323;; Check the exact vector length, then check each element in turn.
324
325(define-syntax match-vector-two
326  (syntax-rules ()
327    ((_ v n ((pat index) ...) () sk fk i)
328     (if (vector? v)
329       (let ((len (vector-length v)))
330         (if (= len n)
331           (match-vector-step v ((pat index) ...) sk fk i)
332           fk))
333       fk))
334    ((_ v n (pats ...) (p . q) sk fk i)
335     (match-vector v (+ n 1) (pats ... (p n)) q sk fk i))
336    ))
337
338(define-syntax match-vector-step
339  (syntax-rules ()
340    ((_ v () (sk ...) fk i) (sk ... i))
341    ((_ v ((pat index) . rest) sk fk i)
342     (let ((w (vector-ref v index)))
343       (match-one w pat (vector-ref v index) (vector-set! v index)
344                  (match-vector-step v rest sk fk)
345                  fk i)))))
346
347;; With a vector ellipse pattern we first check to see if the vector
348;; length is at least the required length.
349
350(define-syntax match-vector-ellipses
351  (syntax-rules ()
352    ((_ v n ((pat index) ...) p sk fk i)
353     (if (vector? v)
354       (let ((len (vector-length v)))
355         (if (>= len n)
356           (match-vector-step v ((pat index) ...)
357                              (match-vector-tail v p n len sk fk)
358                              fk i)
359           fk))
360       fk))))
361
362(define-syntax match-vector-tail
363  (syntax-rules ()
364    ((_ v p n len sk fk i)
365     (match-extract-vars p (match-vector-tail-two v p n len sk fk i) i ()))))
366
367(define-syntax match-vector-tail-two
368  (syntax-rules ()
369    ((_ v p n len (sk ...) fk i ((id id-ls) ...))
370     (let loop ((j n) (id-ls '()) ...)
371       (if (>= j len)
372         (let ((id (reverse id-ls)) ...) (sk ... i))
373         (let ((w (vector-ref v j)))
374           (match-one w p (vector-ref v j) (vetor-set! v j)
375                      (match-drop-ids (loop (+ j 1) (cons id id-ls) ...))
376                      fk i)))))))
377
378;; Extract all identifiers in a pattern.  A little more complicated
379;; than just looking for symbols, we need to ignore special keywords
380;; and not pattern forms (such as the predicate expression in ?
381;; patterns).
382;;
383;; (match-extract-vars pattern continuation (ids ...) (new-vars ...))
384
385(define-syntax match-extract-vars
386  (syntax-rules (_ ___ ? $ = quote quasiquote and or not get! set!)
387    ((match-extract-vars (? pred . p) k i v)
388     (match-extract-vars p k i v))
389    ((match-extract-vars ($ rec . p) k i v)
390     (match-extract-vars p k i v))
391    ((match-extract-vars (= proc p) k i v)
392     (match-extract-vars p k i v))
393    ((match-extract-vars (quote x) (k ...) i v)
394     (k ... v))
395    ((match-extract-vars (quasiquote x) k i v)
396     (match-extract-quasiquote-vars x k i v (#t)))
397    ((match-extract-vars (and . p) k i v)
398     (match-extract-vars p k i v))
399    ((match-extract-vars (or . p) k i v)
400     (match-extract-vars p k i v))
401    ((match-extract-vars (not . p) k i v)
402     (match-extract-vars p k i v))
403    ;; A non-keyword pair, expand the CAR with a continuation to
404    ;; expand the CDR.
405    ((match-extract-vars (p q . r) k i v)
406     (match-check-ellipse
407      q
408      (match-extract-vars (p . r) k i v)
409      (match-extract-vars p (match-extract-vars-step (q . r) k i v) i ())))
410    ((match-extract-vars (p . q) k i v)
411     (match-extract-vars p (match-extract-vars-step q k i v) i ()))
412    ((match-extract-vars #(p ...) k i v)
413     (match-extract-vars (p ...) k i v))
414    ((match-extract-vars _ (k ...) i v)    (k ... v))
415    ((match-extract-vars ___ (k ...) i v)  (k ... v))
416    ;; This is the main part, the only place where we might add a new
417    ;; var if it's an unbound symbol.
418    ((match-extract-vars p (k ...) (i ...) v)
419     (let-syntax
420         ((new-sym?
421           (syntax-rules (i ...)
422             ((new-sym? p sk fk) sk)
423             ((new-sym? x sk fk) fk))))
424       (new-sym? random-sym-to-match
425                 (k ... ((p p-ls) . v))
426                 (k ... v))))
427    ))
428
429;; Stepper used in the above so it can expand the CAR and CDR
430;; separately.
431
432(define-syntax match-extract-vars-step
433  (syntax-rules ()
434    ((_ p k i v ((v2 v2-ls) ...))
435     (match-extract-vars p k (v2 ... . i) ((v2 v2-ls) ... . v)))
436    ))
437
438(define-syntax match-extract-quasiquote-vars
439  (syntax-rules (quasiquote unquote unquote-splicing)
440    ((match-extract-quasiquote-vars (quasiquote x) k i v d)
441     (match-extract-quasiquote-vars x k i v (#t . d)))
442    ((match-extract-quasiquote-vars (unquote-splicing x) k i v d)
443     (match-extract-quasiquote-vars (unquote x) k i v d))
444    ((match-extract-quasiquote-vars (unquote x) k i v (#t))
445     (match-extract-vars x k i v))
446    ((match-extract-quasiquote-vars (unquote x) k i v (#t . d))
447     (match-extract-quasiquote-vars x k i v d))
448    ((match-extract-quasiquote-vars (x . y) k i v (#t . d))
449     (match-extract-quasiquote-vars
450      x
451      (match-extract-quasiquote-vars-step y k i v d) i ()))
452    ((match-extract-quasiquote-vars #(x ...) k i v (#t . d))
453     (match-extract-quasiquote-vars (x ...) k i v d))
454    ((match-extract-quasiquote-vars x (k ...) i v (#t . d))
455     (k ... v))
456    ))
457
458(define-syntax match-extract-quasiquote-vars-step
459  (syntax-rules ()
460    ((_ x k i v d ((v2 v2-ls) ...))
461     (match-extract-quasiquote-vars x k (v2 ... . i) ((v2 v2-ls) ... . v) d))
462    ))
463
464
465;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
466;; Gimme some sugar baby.
467
468(define-syntax match-lambda
469  (syntax-rules ()
470    ((_ clause ...) (lambda (expr) (match expr clause ...)))))
471
472(define-syntax match-lambda*
473  (syntax-rules ()
474    ((_ clause ...) (lambda expr (match expr clause ...)))))
475
476(define-syntax match-let
477  (syntax-rules ()
478    ((_ (vars ...) . body)
479     (match-let/helper let () () (vars ...) . body))
480    ((_ loop . rest)
481     (match-named-let loop () . rest))))
482
483(define-syntax match-letrec
484  (syntax-rules ()
485    ((_ vars . body) (match-let/helper letrec () () vars . body))))
486
487(define-syntax match-let/helper
488  (syntax-rules ()
489    ((_ let ((var expr) ...) () () . body)
490     (let ((var expr) ...) . body))
491    ((_ let ((var expr) ...) ((pat tmp) ...) () . body)
492     (let ((var expr) ...)
493       (match-let* ((pat tmp) ...)
494         . body)))
495    ((_ let (v ...) (p ...) (((a . b) expr) . rest) . body)
496     (match-let/helper
497      let (v ... (tmp expr)) (p ... ((a . b) tmp)) rest . body))
498    ((_ let (v ...) (p ...) ((#(a ...) expr) . rest) . body)
499     (match-let/helper
500      let (v ... (tmp expr)) (p ... (#(a ...) tmp)) rest . body))
501    ((_ let (v ...) (p ...) ((a expr) . rest) . body)
502     (match-let/helper let (v ... (a expr)) (p ...) rest . body))
503    ))
504
505(define-syntax match-named-let
506  (syntax-rules ()
507    ((_ loop ((pat expr var) ...) () . body)
508     (let loop ((var expr) ...)
509       (match-let ((pat var) ...)
510         . body)))
511    ((_ loop (v ...) ((pat expr) . rest) . body)
512     (match-named-let loop (v ... (pat expr tmp)) rest . body))))
513
514(define-syntax match-let*
515  (syntax-rules ()
516    ((_ () . body)
517     (begin . body))
518    ((_ ((pat expr) . rest) . body)
519     (match expr (pat (match-let* rest . body))))))
520
521
522;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
523;; Not quite portable bits.
524
525;; Matching ellipses `...' is tricky.  A strict interpretation of R5RS
526;; would suggest that `...' in the literals list would treat it as a
527;; literal in pattern, however no SYNTAX-RULES implementation I'm
528;; aware of currently supports this.  SRFI-46 support would makes this
529;; easy, but SRFI-46 also is widely unsupported.
530
531;; In the meantime we conditionally implement this in whatever
532;; low-level macro system is available, defaulting to an
533;; implementation which doesn't support `...' and requires the user to
534;; match with `___'.
535
536(define-syntax match-check-ellipse
537  (syntax-rules ___ (...)
538    ((_ ... sk fk) sk)
539    ((_ x sk fk) fk)))
540
541(define-syntax match-check-identifier
542  (syntax-rules ()
543    ((_ (x . y) sk fk) fk)
544    ((_ #(x ...) sk fk) fk)
545    ((_ x sk fk)
546     (let-syntax
547         ((sym?
548           (syntax-rules ()
549             ((sym? x sk2 fk2) sk2)
550             ((sym? y sk2 fk2) fk2))))
551       (sym? abracadabra sk fk))) ))
552