1#lang racket/base 2(require "../parse/ast.rkt" 3 "../common/range.rkt" 4 "../common/error.rkt") 5 6(provide validate) 7 8;; Returns max-lookbehind or reports an error 9(define (validate rx num-groups) 10 (define group-sizes #hasheqv()) 11 (define depends-sizes #hasheqv()) 12 (define must-sizes #hasheqv()) 13 (define (might-be-empty-error) 14 (regexp-error "`*`, `+`, or `{...}` operand could be empty")) 15 (define-values (min-len max-len max-lookbehind) 16 (let validate ([rx rx]) 17 (cond 18 [(eq? rx rx:never) 19 (values 1 1 0)] 20 [(or (eq? rx rx:any) 21 (exact-integer? rx) 22 (rx:range? rx)) 23 (values 1 1 0)] 24 [(bytes? rx) 25 (define len (bytes-length rx)) 26 (values len len 0)] 27 [(or (eq? rx rx:empty) 28 (eq? rx rx:end) 29 (eq? rx rx:line-end)) 30 (values 0 0 0)] 31 [(or (eq? rx rx:start) 32 (eq? rx rx:line-start)) 33 (values 0 0 1)] 34 [(or (eq? rx rx:word-boundary) 35 (eq? rx rx:not-word-boundary)) 36 (values 0 0 1)] 37 [(rx:alts? rx) 38 (define-values (min1 max1 lb1) (validate (rx:alts-rx1 rx))) 39 (define-values (min2 max2 lb2) (validate (rx:alts-rx2 rx))) 40 (values (min min1 min2) (max max1 max2) (max lb1 lb2))] 41 [(rx:sequence? rx) 42 (for/fold ([min-len 0] [max-len 0] [max-lb 0]) ([rx (in-list (rx:sequence-rxs rx))]) 43 (define-values (min1 max1 lb1) (validate rx)) 44 (values (+ min-len min1) (+ max-len max1) (max max-lb lb1)))] 45 [(rx:group? rx) 46 (define-values (min1 max1 lb1) (validate (rx:group-rx rx))) 47 (set! group-sizes (hash-set group-sizes (rx:group-number rx) min1)) 48 (values min1 max1 lb1)] 49 [(rx:repeat? rx) 50 (define old-depends-sizes depends-sizes) 51 (set! depends-sizes #hasheqv()) 52 (define-values (min1 max1 lb1) (validate (rx:repeat-rx rx))) 53 (when (zero? min1) 54 (might-be-empty-error)) 55 (set! must-sizes (merge-depends-sizes must-sizes depends-sizes)) 56 (set! depends-sizes (merge-depends-sizes old-depends-sizes depends-sizes)) 57 (values (* min1 (rx:repeat-min rx)) 58 (* max1 (rx:repeat-max rx)) 59 lb1)] 60 [(rx:maybe? rx) 61 (define-values (min1 max1 lb1) (validate (rx:maybe-rx rx))) 62 (values 0 max1 lb1)] 63 [(rx:conditional? rx) 64 (define-values (min0 max0 lb0) (validate (rx:conditional-tst rx))) 65 (define-values (min1 max1 lb1) (validate (rx:conditional-rx1 rx))) 66 (define-values (min2 max2 lb2) (validate (rx:conditional-rx2 rx))) 67 (values (min min1 min2) (max max1 max2) (max lb0 lb1 lb2))] 68 [(rx:lookahead? rx) 69 (define-values (min1 max1 lb1) (validate (rx:lookahead-rx rx))) 70 (values 0 0 lb1)] 71 [(rx:lookbehind? rx) 72 (define-values (min1 max1 lb1) (validate (rx:lookbehind-rx rx))) 73 (when (= +inf.0 max1) 74 (regexp-error "lookbehind pattern does not match a bounded length")) 75 (set-rx:lookbehind-lb-min! rx min1) 76 (set-rx:lookbehind-lb-max! rx max1) 77 (values 0 0 (max max1 lb1))] 78 [(rx:cut? rx) 79 (validate (rx:cut-rx rx))] 80 [(rx:reference? rx) 81 (define n (rx:reference-n rx)) 82 (unless (n . <= . num-groups) 83 (regexp-error "backreference number is larger than the highest-numbered cluster")) 84 (define min-size (hash-ref group-sizes n #f)) 85 (cond 86 [min-size 87 ;; known minimum: 88 (values min-size +inf.0 0)] 89 [else 90 ;; assume at least one, but check: 91 (set! depends-sizes (hash-set depends-sizes (sub1 n) #t)) 92 (values 1 +inf.0 0)])] 93 [(rx:unicode-categories? rx) 94 (values 1 4 0)] 95 [else (error 'validate "internal error: ~s" rx)]))) 96 (for ([n (in-hash-keys must-sizes)]) 97 (unless (positive? (hash-ref group-sizes n 0)) 98 (might-be-empty-error))) 99 max-lookbehind) 100 101(define (merge-depends-sizes ht1 ht2) 102 (cond 103 [(zero? (hash-count ht1)) ht2] 104 [((hash-count ht2) . < . (hash-count ht1)) 105 (merge-depends-sizes ht2 ht1)] 106 [else 107 (for/fold ([ht2 ht2]) ([k (in-hash-keys ht1)]) 108 (hash-set ht2 k #t))])) 109 110(define (range-utf-8-encoding-lengths range) 111 (for/fold ([min1 4] [max1 0]) ([seg (in-list '((0 127 1) 112 (128 #x7FF 2) 113 (#x800 #xFFFF 3) 114 (#x10000 #x10FFFF 4)))]) 115 (if (range-overlaps? range (car seg) (cadr seg)) 116 (values (min min1 (caddr seg)) 117 (max max1 (caddr seg))) 118 (values min1 max1)))) 119