1;;
2;; Taken from srfi-181 reference implementation by Shiro Kawai
3;; MIT License.
4;;
5
6(define-module srfi-181
7  (use gauche.record)
8  (use gauche.vport)
9  (use gauche.uvector)
10  (use gauche.charconv)
11  (use srfi-42)
12  (export make-custom-binary-input-port
13          make-custom-textual-input-port
14          make-custom-binary-output-port
15          make-custom-textual-output-port
16          make-custom-binary-input/output-port
17          make-file-error
18
19          make-codec latin-1-codec utf-8-codec utf-16-codec
20          native-eol-style
21          unknown-encoding-error? unknown-encoding-error-name
22          i/o-decoding-error? i/o-encoding-error?
23          i/o-encoding-error-char
24          make-transcoder native-transcoder
25          transcoded-port
26          bytevector->string string->bytevector))
27(select-module srfi-181)
28
29;;;
30;;; Adapters for gauche.vport
31;;;
32
33(define-constant *buffer-size* 1024)
34
35;; If set-position! method raises a condition <io-invalid-position-error>
36;; using srfi-192's make-i/o-invalid-position-error, we intercept it
37;; to add port info.
38(define-syntax wrap-setpos
39  (syntax-rules ()
40    [(_ setpos portvar)
41     (and setpos
42          (^[pos] (guard (e [(and (<io-invalid-position-error> e)
43                                  (not (~ e'port)))
44                             (errorf <io-invalid-position-error>
45                                     :port portvar
46                                     :position (~ e'position)
47                                     "Invalid position object: ~s"
48                                     (~ e'position))]
49                            [else (raise e)])
50                    (setpos pos))))]))
51
52(define (make-custom-binary-input-port id read!
53                                       get-position set-position!
54                                       close)
55  (define (filler buf)
56    (read! buf 0 (u8vector-length buf)))
57  (letrec ([p (make <buffered-input-port>
58                :name id
59                :fill filler
60                :getpos get-position
61                :setpos (wrap-setpos set-position! p)
62                :close close)])
63    p))
64
65(define (make-custom-textual-input-port id read!
66                                        get-position set-position!
67                                        close)
68  (if (or get-position set-position!)
69    ;; If positioning is required, we can't buffer characters.
70    (let ([buf (make-vector 1)])
71      (letrec ([p (make <virtual-input-port>
72                    :getc (^[] (let1 n (read! buf 0 1)
73                                 (if (zero? n)
74                                   (eof-object)
75                                   (vector-ref buf 0))))
76                    :getpos get-position
77                    :setpos (wrap-setpos set-position! p)
78                    :close close)])
79        p))
80    ;; <buffered-input-port> uses u8vector for the buffer, so we have
81    ;; to convert it.
82    (let ([cbuf #f])                     ;vector, allocated on demand
83      (define (filler buf)
84        (unless cbuf
85          (set! cbuf (make-vector (quotient (u8vector-length buf) 4))))
86        (let1 n (read! cbuf 0 (vector-length cbuf))
87          (if (zero? n)
88            n
89            (let* ([s (vector->string cbuf 0 n)]
90                   [size (string-size s)])
91              (assume (<= size (u8vector-length buf)))
92              (string->u8vector! buf 0 s)
93              size))))
94      (make <buffered-input-port>
95        :name id
96        :fill filler
97        :close close))))
98
99(define (make-custom-binary-output-port id write!
100                                        get-position set-position!
101                                        close :optional (flush #f))
102  (define (flusher buf complete?)
103    (if (not complete?)
104      (write! buf 0 (u8vector-length buf))
105      ;; this is a buffer-flush operation
106      (let1 len (u8vector-length buf)
107        (let loop ([pos 0])
108          (if (= pos len)
109            (begin
110              (when flush (flush))
111              len)
112            (let1 n (write! buf pos (- len pos))
113              (loop (+ pos n))))))))
114  (letrec ([p (make <buffered-output-port>
115                :name id
116                :flush flusher
117                :getpos get-position
118                :setpos (wrap-setpos set-position! p)
119                :close close)])
120    p))
121
122;; For textual output, <buffered-output-port> is inconvenient,
123;; for the passed u8vector may end in the middle of multibyte character.
124(define (make-custom-textual-output-port id write!
125                                         get-position set-position!
126                                         close :optional (flush #f))
127  (define cbuf (make-vector 1))
128  (letrec ([p (make <virtual-output-port>
129                :name id
130                :putc (^c (vector-set! cbuf 0 c) (write! cbuf 0 1))
131                :puts (^s (let1 siz (string-length s)
132                            (when (< (vector-length cbuf) siz)
133                              (set! cbuf (make-vector siz)))
134                            (do-ec (: c (index i) s)
135                                   (vector-set! cbuf i c))
136                            (write! cbuf 0 siz)))
137                :getpos get-position
138                :setpos (wrap-setpos set-position! p)
139                :flush (^[] (and flush (flush)))
140                :close close)])
141    p))
142
143(define (make-custom-binary-input/output-port id read! write!
144                                              get-position set-position!
145                                              close :optional (flush #f))
146  ;; We don't have buffered bidirectional port.  Using virtual i/o port
147  ;; is less efficient, for I/O is done one byte at a time.
148  (define buf (make-u8vector 1))
149
150  (letrec ([p (make <virtual-io-port>
151                :name id
152                :getb (^[] (let1 r (read! buf 0 1)
153                             (if (zero? r)
154                               (eof-object)
155                               (u8vector-ref buf 0))))
156                :putb (^b (u8vector-set! buf 0 b)
157                          (write! buf 0 1))
158                :getpos get-position
159                :setpos (wrap-setpos set-position! p)
160                :close close
161                :flush flush)])
162    p))
163
164(define (make-file-error . objs)
165  ;; As of 0.9.9, Gauche uses ad-hoc way to determine file-error--
166  ;; that is, a <system-error> with certain errnos is a file error.
167  ;; It is impossible to translate arbitrary objs into meaningful
168  ;; <system-error>.  This is just a crude emulation.
169  (condition
170   (<system-error> (errno ENXIO) (message (write-to-string objs)))))
171
172;;;
173;;; Transcoders
174;;;
175
176;; Since we already have gauche.charconv, we don't use srfi-181 layer
177;; to implement transcoded ports.
178
179;; TODO: newline style and decoding/encoding error is temporarily
180;; unsupported.  We'll support them in gauche.charconv and propagate it
181;; to this layer.
182
183(define-condition-type <unknown-encoding-error> <error>
184  unknown-encoding-error?
185  (name unknown-encoding-error-name))
186
187(define (i/o-decoding-error? obj)
188  (condition-has-type? obj <io-decoding-error>))
189
190(define (i/o-encoding-error? obj)
191  (condition-has-type? obj <io-encoding-error>))
192
193(define (i/o-encoding-error-char obj) 'writeme)
194
195(define-record-type codec
196  (%make-codec name)
197  codec?
198  (name codec-name))
199(define-method write-object ((c codec) port)
200  (format port "#<codec ~s>" (codec-name c)))
201
202;; CES 'none' is kind of special---you can treat octet stream as any
203;; single-byte encoding.  However, srfi-181 transcoder needs to assume
204;; specific internal encoding, so we treat 'none' as Latin1.
205(define *native-codec-name*
206  (cond-expand
207   [gauche.ces.none 'latin1]
208   [else (gauche-character-encoding)]))
209
210(define (make-codec name)
211  (if (and (ces-conversion-supported? *native-codec-name* name)
212           (ces-conversion-supported? name *native-codec-name*))
213    (%make-codec name)
214    (error <unknown-encoding-error>
215           :name name
216           "Unknown encoding:" name)))
217
218(define *native-codec* (make-codec *native-codec-name*))
219(define *ascii-codec* (make-codec "ascii"))
220(define *latin-1-codec* (make-codec "latin1"))
221(define (latin-1-codec) *latin-1-codec*)
222
223(cond-expand
224 [gauche.ces.none]
225 [else
226  (define *utf-8-codec* (make-codec "utf-8"))
227  (define *utf-16-codec* (make-codec "utf-16"))
228  (define (utf-8-codec) *utf-8-codec*)
229  (define (utf-16-codec) *utf-16-codec*)])
230
231(define-record-type <transcoder>
232  (%make-transcoder codec eol-style handling-mode)
233  transcoder?
234  (codec transcoder-codec)
235  (eol-style transcoder-eol-style)
236  (handling-mode transcoder-handling-mode))
237(define-method write-object ((obj <transcoder>) port)
238  (format port "#<transcoder ~a ~a ~a>"
239          (~ obj'codec) (~ obj'eol-style) (~ obj'handling-mode)))
240
241(define (make-transcoder codec eol-style handling-mode)
242  (unless (codec? codec)
243    (error "codec required, but got" codec))
244  (unless (memq eol-style '(none lf crlf))
245    (error "unsupported eol-style, must be one of (none lf crlf), but got"
246           eol-style))
247  (unless (memq handling-mode '(replace raise))
248    (error "unsupported handling-mode, must be either replace or raise, but got"
249           handling-mode))
250  (%make-transcoder codec eol-style handling-mode))
251
252(define (native-eol-style) 'none)
253
254(define (native-transcoder)
255  (make-transcoder *native-codec* (native-eol-style) 'replace))
256
257;; wrapper for eol translation
258;; Ideally eol-translation should be handled directly in the port layer
259;; to be efficient.  For the time being, though, we use a vport wrapper
260;; to fulfill the requirement.
261(define (eol-iport iport)
262  (define (getb)
263    (let1 b (read-byte iport)
264      (cond [(eof-object? b) b]
265            [(eqv? b #x0d)
266             (let1 b2 (peek-byte iport)
267               (if (eqv? b2 #x0a)
268                 (read-byte iport) ;just return #\newline
269                 #x0a))]
270            [else b])))
271  (define (getc)
272    (let1 c (read-char iport)
273      (cond [(eof-object? c) c]
274            [(eqv? c #\return)
275             (let1 c2 (peek-char iport)
276               (if (eqv? c2 #\newline)
277                 (read-char iport) ;just return #\newline
278                 #\newline))]
279            [else c])))
280  (make <virtual-input-port> :getb getb :getc getc))
281
282(define (eol-lf-oport oport)
283  (define got-return #f)
284  (define (putx ch-or-byte)
285    (case ch-or-byte
286      [(#\return #x0d)
287       (when got-return (write-char #\newline oport))
288       (set! got-return #t)]
289      [(#\newline #x0a) (write-char #\newline oport)
290       (set! got-return #f)]
291      [else (when got-return (write-char #\newline oport))
292            (if (integer? ch-or-byte)
293              (write-byte ch-or-byte oport)
294              (write-char ch-or-byte oport))
295            (set! got-return #f)]))
296  (make <virtual-output-port> :putb putx :putc putx))
297
298(define (eol-crlf-oport oport)
299  (define got-return #f)
300  (define (putx ch-or-byte)
301    (case ch-or-byte
302      [(#\return #x0d)
303       (when got-return (display "\r\n" oport))
304       (set! got-return #t)]
305      [(#\newline #x0a) (display "\r\n" oport)
306       (set! got-return #f)]
307      [else
308       (when got-return (display "\r\n" oport))
309       (if (integer? ch-or-byte)
310         (write-byte ch-or-byte oport)
311         (write-char ch-or-byte oport))
312       (set! got-return #f)]))
313  (make <virtual-output-port> :putb putx :putc putx))
314
315;; API
316(define (transcoded-port inner transcoder)
317  (assume-type transcoder <transcoder>)
318  (cond
319   [(input-port? inner)
320    (let1 p (case (~ transcoder'eol-style)
321              [(lf crlf)   (eol-iport inner)]
322              [else inner])
323      (parameterize ((external-conversion-library #f))
324        (open-input-conversion-port p
325                                    (~ transcoder'codec'name)
326                                    :owner? #t
327                                    :illegal-output (~ transcoder'handling-mode))))]
328   [(output-port? inner)
329    (let1 p (case (~ transcoder'eol-style)
330              [(lf)   (eol-lf-oport inner)]
331              [(crlf) (eol-crlf-oport inner)]
332              [else inner])
333      (parameterize ((external-conversion-library #f))
334        (open-output-conversion-port p
335                                     (~ transcoder'codec'name)
336                                     :owner? #t
337                                     :illegal-output (~ transcoder'handling-mode))))]
338   [else
339    (error "port required, but got:" inner)]))
340
341(define (bytevector->string bytevector transcoder)
342  (assume-type bytevector <u8vector>)
343  (assume-type transcoder <transcoder>)
344  (if (eq? (~ transcoder'eol-style) 'none)
345    (parameterize ((external-conversion-library #f))
346      (ces-convert-to <string> bytevector
347                      (codec-name (transcoder-codec transcoder))
348                      *native-codec-name*
349                      :illegal-output (transcoder-handling-mode transcoder)))
350    (port->string
351     (transcoded-port (open-input-bytevector bytevector) transcoder))))
352
353(define (string->bytevector string transcoder)
354  (assume-type string <string>)
355  (assume-type transcoder <transcoder>)
356  (if (eq? (~ transcoder'eol-style) 'none)
357    ;; ces-convert-to doesn't distinguish 'decoding' and 'encoding', so
358    ;; it always raises <io-decoding-error> if input is invalid.
359    ;; In srfi-181, we should treat it as encoding error.
360    (guard (e [(<io-decoding-error> e)
361               (error <io-encoding-error>
362                      :port (~ e'port)
363                      :message (~ e'message))]
364              [else (raise e)])
365      (parameterize ((external-conversion-library #f))
366        (ces-convert-to <u8vector> string
367                        *native-codec-name*
368                        (codec-name (transcoder-codec transcoder))
369                        :illegal-output (transcoder-handling-mode transcoder))))
370    (let* ([sink (open-output-bytevector)]
371           [p (transcoded-port sink transcoder)])
372      (display string p)
373      (flush p)
374      (begin0 (get-output-bytevector sink)
375        (close-output-port p)))))
376