1#lang racket/base
2
3;; A compiled matcher accepts a byte string or a `lazy-bytes` object,
4;; where the later is used to pull bytes on demand from a port or a
5;; long character string.
6
7(provide make-lazy-bytes
8         lazy-bytes-before-end?
9         lazy-bytes-ref
10         lazy-bytes-bstr
11         lazy-bytes-failed?
12         lazy-bytes-discarded-count
13         lazy-bytes-advance!)
14
15(struct lazy-bytes ([bstr #:mutable] ; buffered bytes
16                    [end #:mutable]  ; number of available bytes --- plus discarded bytes
17                    in               ; input port
18                    skip-amt         ; offset into the port; 0 if `(not peek?)`
19                    prefix-len       ; length of prefix (not from port)
20                    peek?            ; peeking mode
21                    immediate-only?  ; non-blocking mode; implies `peek?`
22                    progress-evt     ; stop peeking if ready
23                    out              ; output hold discarded bytes; implies `(not peek?)`
24                    max-lookbehind   ; bytes before current counter to preserve, if `out`
25                    [failed? #:mutable] ; set to #t if `progress-evt` fires or read blocks
26                    [discarded-count #:mutable] ; bytes discarded, if not `peek?`
27                    max-peek))       ; maximum number of bytes to peek or #f
28
29(define (make-lazy-bytes in skip-amt prefix
30                         peek? immediate-only? progress-evt
31                         out max-lookbehind
32                         max-peek)
33  (define len (bytes-length prefix))
34  (lazy-bytes prefix len in skip-amt len
35              peek? immediate-only? progress-evt
36              out max-lookbehind
37              #f 0
38              max-peek))
39
40(define (lazy-bytes-before-end? s pos end)
41  (and (or (not (exact-integer? end))
42           (pos . < . end))
43       (cond
44        [(pos . < . (lazy-bytes-end s))
45         #t]
46        [else
47         (and (get-more-bytes! s)
48              (lazy-bytes-before-end? s pos end))])))
49
50(define (lazy-bytes-ref s pos)
51  ;; Assume a preceding `lazy-bytes-before-end?` call, so
52  ;; we have the byte
53  (bytes-ref (lazy-bytes-bstr s) (- pos (lazy-bytes-discarded-count s))))
54
55(define (lazy-bytes-advance! s given-pos force?)
56  ;; If we advance far enough and not peeking,
57  ;; then flush unneeded bytes...
58  ;; The promise is that we won't ask for bytes before
59  ;; `pos` minus the `max-lookbehind`
60  (when force?
61    (lazy-bytes-before-end? s given-pos 'eof))
62  (define pos (min given-pos (lazy-bytes-end s)))
63  (when (and (lazy-bytes? s)
64             (not (lazy-bytes-peek? s)))
65    (define discarded-count (lazy-bytes-discarded-count s))
66    (define unneeded (- pos
67                        discarded-count
68                        (lazy-bytes-max-lookbehind s)))
69    (when (or force? (unneeded . > . 4096))
70      (define amt (if force?
71                      (- pos (lazy-bytes-discarded-count s))
72                      4096))
73      (define bstr (lazy-bytes-bstr s))
74      (define out (lazy-bytes-out s))
75      (when out
76        ;; Discard bytes to `out`
77        (define prefix-len (lazy-bytes-prefix-len s))
78        (write-bytes bstr
79                     out
80                     ;; Skip over bytes that are part of the prefix:
81                     (cond
82                      [(discarded-count . > . prefix-len) 0]
83                      [else (min amt (- prefix-len discarded-count))])
84                     ;; To amount to discard:
85                     amt))
86      (define copy-end (- (lazy-bytes-end s) discarded-count))
87      (unless (= amt copy-end)
88        (bytes-copy! bstr 0 bstr amt copy-end))
89      (set-lazy-bytes-discarded-count! s (+ amt discarded-count)))))
90
91;; ----------------------------------------
92
93;; Result reports whether new bytes were read
94(define (get-more-bytes! s)
95  (cond
96   [(lazy-bytes? s)
97    (define discarded-count (lazy-bytes-discarded-count s))
98    (define len (- (lazy-bytes-end s) discarded-count))
99    (define bstr (lazy-bytes-bstr s))
100    (cond
101     [(lazy-bytes-failed? s) #f]
102     [(len . < . (bytes-length bstr))
103      ;; Room in current byte string
104      (define n ((if (lazy-bytes-immediate-only? s)
105                     peek-bytes-avail!*
106                     peek-bytes-avail!)
107                 bstr
108                 (+ (- len (lazy-bytes-prefix-len s))
109                    (lazy-bytes-skip-amt s)
110                    discarded-count)
111                 (lazy-bytes-progress-evt s)
112                 (lazy-bytes-in s)
113                 len))
114      (cond
115       [(eof-object? n) #f]
116       [(not (fixnum? n))
117        (raise-arguments-error 'regexp-match
118                               "non-character in an unsupported context"
119                               "port" (lazy-bytes-in s))]
120       [(zero? n)
121        ;; would block or progress evt became ready
122        (set-lazy-bytes-failed?! s #t)
123        #f]
124       [else
125        (set-lazy-bytes-end! s (+ n len discarded-count))
126        #t])]
127     [else
128      (define max-peek (lazy-bytes-max-peek s))
129      (define prefix-len (and max-peek (lazy-bytes-prefix-len s)))
130      (cond
131        [(and max-peek
132              (len . >= . (- (+ max-peek prefix-len) discarded-count)))
133         ;; Not allowed to read any more
134         #f]
135        [else
136         ;; We're going to need a bigger byte string
137         (define bstr2 (make-bytes (let ([sz (max 32 (* 2 (bytes-length bstr)))])
138                                     (if max-peek
139                                         (min sz (- (+ prefix-len max-peek) discarded-count))
140                                         sz))))
141         (bytes-copy! bstr2 0 bstr 0 len)
142         (set-lazy-bytes-bstr! s bstr2)
143         (get-more-bytes! s)])])]
144   [else #f]))
145