1#lang racket/base
2(require "../parse/ast.rkt"
3         "../common/range.rkt"
4         "../common/error.rkt")
5
6(provide validate)
7
8;; Returns max-lookbehind or reports an error
9(define (validate rx num-groups)
10  (define group-sizes #hasheqv())
11  (define depends-sizes #hasheqv())
12  (define must-sizes #hasheqv())
13  (define (might-be-empty-error)
14    (regexp-error "`*`, `+`, or `{...}` operand could be empty"))
15  (define-values (min-len max-len max-lookbehind)
16    (let validate ([rx rx])
17      (cond
18       [(eq? rx rx:never)
19        (values 1 1 0)]
20       [(or (eq? rx rx:any)
21            (exact-integer? rx)
22            (rx:range? rx))
23        (values 1 1 0)]
24       [(bytes? rx)
25        (define len (bytes-length rx))
26        (values len len 0)]
27       [(or (eq? rx rx:empty)
28            (eq? rx rx:end)
29            (eq? rx rx:line-end))
30        (values 0 0 0)]
31       [(or (eq? rx rx:start)
32            (eq? rx rx:line-start))
33        (values 0 0 1)]
34       [(or (eq? rx rx:word-boundary)
35            (eq? rx rx:not-word-boundary))
36        (values 0 0 1)]
37       [(rx:alts? rx)
38        (define-values (min1 max1 lb1) (validate (rx:alts-rx1 rx)))
39        (define-values (min2 max2 lb2) (validate (rx:alts-rx2 rx)))
40        (values (min min1 min2) (max max1 max2) (max lb1 lb2))]
41       [(rx:sequence? rx)
42        (for/fold ([min-len 0] [max-len 0] [max-lb 0]) ([rx (in-list (rx:sequence-rxs rx))])
43          (define-values (min1 max1 lb1) (validate rx))
44          (values (+ min-len min1) (+ max-len max1) (max max-lb lb1)))]
45       [(rx:group? rx)
46        (define-values (min1 max1 lb1) (validate (rx:group-rx rx)))
47        (set! group-sizes (hash-set group-sizes (rx:group-number rx) min1))
48        (values min1 max1 lb1)]
49       [(rx:repeat? rx)
50        (define old-depends-sizes depends-sizes)
51        (set! depends-sizes #hasheqv())
52        (define-values (min1 max1 lb1) (validate (rx:repeat-rx rx)))
53        (when (zero? min1)
54          (might-be-empty-error))
55        (set! must-sizes (merge-depends-sizes must-sizes depends-sizes))
56        (set! depends-sizes (merge-depends-sizes old-depends-sizes depends-sizes))
57        (values (* min1 (rx:repeat-min rx))
58                (* max1 (rx:repeat-max rx))
59                lb1)]
60       [(rx:maybe? rx)
61        (define-values (min1 max1 lb1) (validate (rx:maybe-rx rx)))
62        (values 0 max1 lb1)]
63       [(rx:conditional? rx)
64        (define-values (min0 max0 lb0) (validate (rx:conditional-tst rx)))
65        (define-values (min1 max1 lb1) (validate (rx:conditional-rx1 rx)))
66        (define-values (min2 max2 lb2) (validate (rx:conditional-rx2 rx)))
67        (values (min min1 min2) (max max1 max2) (max lb0 lb1 lb2))]
68       [(rx:lookahead? rx)
69        (define-values (min1 max1 lb1) (validate (rx:lookahead-rx rx)))
70        (values 0 0 lb1)]
71       [(rx:lookbehind? rx)
72        (define-values (min1 max1 lb1) (validate (rx:lookbehind-rx rx)))
73        (when (= +inf.0 max1)
74          (regexp-error "lookbehind pattern does not match a bounded length"))
75        (set-rx:lookbehind-lb-min! rx min1)
76        (set-rx:lookbehind-lb-max! rx max1)
77        (values 0 0 (max max1 lb1))]
78       [(rx:cut? rx)
79        (validate (rx:cut-rx rx))]
80       [(rx:reference? rx)
81        (define n (rx:reference-n rx))
82        (unless (n . <= . num-groups)
83          (regexp-error "backreference number is larger than the highest-numbered cluster"))
84        (define min-size (hash-ref group-sizes n #f))
85        (cond
86         [min-size
87          ;; known minimum:
88          (values min-size +inf.0 0)]
89         [else
90          ;; assume at least one, but check:
91          (set! depends-sizes (hash-set depends-sizes (sub1 n) #t))
92          (values 1 +inf.0 0)])]
93       [(rx:unicode-categories? rx)
94        (values 1 4 0)]
95       [else (error 'validate "internal error: ~s" rx)])))
96  (for ([n (in-hash-keys must-sizes)])
97    (unless (positive? (hash-ref group-sizes n 0))
98      (might-be-empty-error)))
99  max-lookbehind)
100
101(define (merge-depends-sizes ht1 ht2)
102  (cond
103   [(zero? (hash-count ht1)) ht2]
104   [((hash-count ht2) . < . (hash-count ht1))
105    (merge-depends-sizes ht2 ht1)]
106   [else
107    (for/fold ([ht2 ht2]) ([k (in-hash-keys ht1)])
108      (hash-set ht2 k #t))]))
109
110(define (range-utf-8-encoding-lengths range)
111  (for/fold ([min1 4] [max1 0]) ([seg (in-list '((0 127 1)
112                                                 (128 #x7FF 2)
113                                                 (#x800 #xFFFF 3)
114                                                 (#x10000 #x10FFFF 4)))])
115    (if (range-overlaps? range (car seg) (cadr seg))
116        (values (min min1 (caddr seg))
117                (max max1 (caddr seg)))
118        (values min1 max1))))
119