1;; 2;; Taken from srfi-181 reference implementation by Shiro Kawai 3;; MIT License. 4;; 5 6(define-module srfi-181 7 (use gauche.record) 8 (use gauche.vport) 9 (use gauche.uvector) 10 (use gauche.charconv) 11 (use srfi-42) 12 (export make-custom-binary-input-port 13 make-custom-textual-input-port 14 make-custom-binary-output-port 15 make-custom-textual-output-port 16 make-custom-binary-input/output-port 17 make-file-error 18 19 make-codec latin-1-codec utf-8-codec utf-16-codec 20 native-eol-style 21 unknown-encoding-error? unknown-encoding-error-name 22 i/o-decoding-error? i/o-encoding-error? 23 i/o-encoding-error-char 24 make-transcoder native-transcoder 25 transcoded-port 26 bytevector->string string->bytevector)) 27(select-module srfi-181) 28 29;;; 30;;; Adapters for gauche.vport 31;;; 32 33(define-constant *buffer-size* 1024) 34 35;; If set-position! method raises a condition <io-invalid-position-error> 36;; using srfi-192's make-i/o-invalid-position-error, we intercept it 37;; to add port info. 38(define-syntax wrap-setpos 39 (syntax-rules () 40 [(_ setpos portvar) 41 (and setpos 42 (^[pos] (guard (e [(and (<io-invalid-position-error> e) 43 (not (~ e'port))) 44 (errorf <io-invalid-position-error> 45 :port portvar 46 :position (~ e'position) 47 "Invalid position object: ~s" 48 (~ e'position))] 49 [else (raise e)]) 50 (setpos pos))))])) 51 52(define (make-custom-binary-input-port id read! 53 get-position set-position! 54 close) 55 (define (filler buf) 56 (read! buf 0 (u8vector-length buf))) 57 (letrec ([p (make <buffered-input-port> 58 :name id 59 :fill filler 60 :getpos get-position 61 :setpos (wrap-setpos set-position! p) 62 :close close)]) 63 p)) 64 65(define (make-custom-textual-input-port id read! 66 get-position set-position! 67 close) 68 (if (or get-position set-position!) 69 ;; If positioning is required, we can't buffer characters. 70 (let ([buf (make-vector 1)]) 71 (letrec ([p (make <virtual-input-port> 72 :getc (^[] (let1 n (read! buf 0 1) 73 (if (zero? n) 74 (eof-object) 75 (vector-ref buf 0)))) 76 :getpos get-position 77 :setpos (wrap-setpos set-position! p) 78 :close close)]) 79 p)) 80 ;; <buffered-input-port> uses u8vector for the buffer, so we have 81 ;; to convert it. 82 (let ([cbuf #f]) ;vector, allocated on demand 83 (define (filler buf) 84 (unless cbuf 85 (set! cbuf (make-vector (quotient (u8vector-length buf) 4)))) 86 (let1 n (read! cbuf 0 (vector-length cbuf)) 87 (if (zero? n) 88 n 89 (let* ([s (vector->string cbuf 0 n)] 90 [size (string-size s)]) 91 (assume (<= size (u8vector-length buf))) 92 (string->u8vector! buf 0 s) 93 size)))) 94 (make <buffered-input-port> 95 :name id 96 :fill filler 97 :close close)))) 98 99(define (make-custom-binary-output-port id write! 100 get-position set-position! 101 close :optional (flush #f)) 102 (define (flusher buf complete?) 103 (if (not complete?) 104 (write! buf 0 (u8vector-length buf)) 105 ;; this is a buffer-flush operation 106 (let1 len (u8vector-length buf) 107 (let loop ([pos 0]) 108 (if (= pos len) 109 (begin 110 (when flush (flush)) 111 len) 112 (let1 n (write! buf pos (- len pos)) 113 (loop (+ pos n)))))))) 114 (letrec ([p (make <buffered-output-port> 115 :name id 116 :flush flusher 117 :getpos get-position 118 :setpos (wrap-setpos set-position! p) 119 :close close)]) 120 p)) 121 122;; For textual output, <buffered-output-port> is inconvenient, 123;; for the passed u8vector may end in the middle of multibyte character. 124(define (make-custom-textual-output-port id write! 125 get-position set-position! 126 close :optional (flush #f)) 127 (define cbuf (make-vector 1)) 128 (letrec ([p (make <virtual-output-port> 129 :name id 130 :putc (^c (vector-set! cbuf 0 c) (write! cbuf 0 1)) 131 :puts (^s (let1 siz (string-length s) 132 (when (< (vector-length cbuf) siz) 133 (set! cbuf (make-vector siz))) 134 (do-ec (: c (index i) s) 135 (vector-set! cbuf i c)) 136 (write! cbuf 0 siz))) 137 :getpos get-position 138 :setpos (wrap-setpos set-position! p) 139 :flush (^[] (and flush (flush))) 140 :close close)]) 141 p)) 142 143(define (make-custom-binary-input/output-port id read! write! 144 get-position set-position! 145 close :optional (flush #f)) 146 ;; We don't have buffered bidirectional port. Using virtual i/o port 147 ;; is less efficient, for I/O is done one byte at a time. 148 (define buf (make-u8vector 1)) 149 150 (letrec ([p (make <virtual-io-port> 151 :name id 152 :getb (^[] (let1 r (read! buf 0 1) 153 (if (zero? r) 154 (eof-object) 155 (u8vector-ref buf 0)))) 156 :putb (^b (u8vector-set! buf 0 b) 157 (write! buf 0 1)) 158 :getpos get-position 159 :setpos (wrap-setpos set-position! p) 160 :close close 161 :flush flush)]) 162 p)) 163 164(define (make-file-error . objs) 165 ;; As of 0.9.9, Gauche uses ad-hoc way to determine file-error-- 166 ;; that is, a <system-error> with certain errnos is a file error. 167 ;; It is impossible to translate arbitrary objs into meaningful 168 ;; <system-error>. This is just a crude emulation. 169 (condition 170 (<system-error> (errno ENXIO) (message (write-to-string objs))))) 171 172;;; 173;;; Transcoders 174;;; 175 176;; Since we already have gauche.charconv, we don't use srfi-181 layer 177;; to implement transcoded ports. 178 179;; TODO: newline style and decoding/encoding error is temporarily 180;; unsupported. We'll support them in gauche.charconv and propagate it 181;; to this layer. 182 183(define-condition-type <unknown-encoding-error> <error> 184 unknown-encoding-error? 185 (name unknown-encoding-error-name)) 186 187(define (i/o-decoding-error? obj) 188 (condition-has-type? obj <io-decoding-error>)) 189 190(define (i/o-encoding-error? obj) 191 (condition-has-type? obj <io-encoding-error>)) 192 193(define (i/o-encoding-error-char obj) 'writeme) 194 195(define-record-type codec 196 (%make-codec name) 197 codec? 198 (name codec-name)) 199(define-method write-object ((c codec) port) 200 (format port "#<codec ~s>" (codec-name c))) 201 202;; CES 'none' is kind of special---you can treat octet stream as any 203;; single-byte encoding. However, srfi-181 transcoder needs to assume 204;; specific internal encoding, so we treat 'none' as Latin1. 205(define *native-codec-name* 206 (cond-expand 207 [gauche.ces.none 'latin1] 208 [else (gauche-character-encoding)])) 209 210(define (make-codec name) 211 (if (and (ces-conversion-supported? *native-codec-name* name) 212 (ces-conversion-supported? name *native-codec-name*)) 213 (%make-codec name) 214 (error <unknown-encoding-error> 215 :name name 216 "Unknown encoding:" name))) 217 218(define *native-codec* (make-codec *native-codec-name*)) 219(define *ascii-codec* (make-codec "ascii")) 220(define *latin-1-codec* (make-codec "latin1")) 221(define (latin-1-codec) *latin-1-codec*) 222 223(cond-expand 224 [gauche.ces.none] 225 [else 226 (define *utf-8-codec* (make-codec "utf-8")) 227 (define *utf-16-codec* (make-codec "utf-16")) 228 (define (utf-8-codec) *utf-8-codec*) 229 (define (utf-16-codec) *utf-16-codec*)]) 230 231(define-record-type <transcoder> 232 (%make-transcoder codec eol-style handling-mode) 233 transcoder? 234 (codec transcoder-codec) 235 (eol-style transcoder-eol-style) 236 (handling-mode transcoder-handling-mode)) 237(define-method write-object ((obj <transcoder>) port) 238 (format port "#<transcoder ~a ~a ~a>" 239 (~ obj'codec) (~ obj'eol-style) (~ obj'handling-mode))) 240 241(define (make-transcoder codec eol-style handling-mode) 242 (unless (codec? codec) 243 (error "codec required, but got" codec)) 244 (unless (memq eol-style '(none lf crlf)) 245 (error "unsupported eol-style, must be one of (none lf crlf), but got" 246 eol-style)) 247 (unless (memq handling-mode '(replace raise)) 248 (error "unsupported handling-mode, must be either replace or raise, but got" 249 handling-mode)) 250 (%make-transcoder codec eol-style handling-mode)) 251 252(define (native-eol-style) 'none) 253 254(define (native-transcoder) 255 (make-transcoder *native-codec* (native-eol-style) 'replace)) 256 257;; wrapper for eol translation 258;; Ideally eol-translation should be handled directly in the port layer 259;; to be efficient. For the time being, though, we use a vport wrapper 260;; to fulfill the requirement. 261(define (eol-iport iport) 262 (define (getb) 263 (let1 b (read-byte iport) 264 (cond [(eof-object? b) b] 265 [(eqv? b #x0d) 266 (let1 b2 (peek-byte iport) 267 (if (eqv? b2 #x0a) 268 (read-byte iport) ;just return #\newline 269 #x0a))] 270 [else b]))) 271 (define (getc) 272 (let1 c (read-char iport) 273 (cond [(eof-object? c) c] 274 [(eqv? c #\return) 275 (let1 c2 (peek-char iport) 276 (if (eqv? c2 #\newline) 277 (read-char iport) ;just return #\newline 278 #\newline))] 279 [else c]))) 280 (make <virtual-input-port> :getb getb :getc getc)) 281 282(define (eol-lf-oport oport) 283 (define got-return #f) 284 (define (putx ch-or-byte) 285 (case ch-or-byte 286 [(#\return #x0d) 287 (when got-return (write-char #\newline oport)) 288 (set! got-return #t)] 289 [(#\newline #x0a) (write-char #\newline oport) 290 (set! got-return #f)] 291 [else (when got-return (write-char #\newline oport)) 292 (if (integer? ch-or-byte) 293 (write-byte ch-or-byte oport) 294 (write-char ch-or-byte oport)) 295 (set! got-return #f)])) 296 (make <virtual-output-port> :putb putx :putc putx)) 297 298(define (eol-crlf-oport oport) 299 (define got-return #f) 300 (define (putx ch-or-byte) 301 (case ch-or-byte 302 [(#\return #x0d) 303 (when got-return (display "\r\n" oport)) 304 (set! got-return #t)] 305 [(#\newline #x0a) (display "\r\n" oport) 306 (set! got-return #f)] 307 [else 308 (when got-return (display "\r\n" oport)) 309 (if (integer? ch-or-byte) 310 (write-byte ch-or-byte oport) 311 (write-char ch-or-byte oport)) 312 (set! got-return #f)])) 313 (make <virtual-output-port> :putb putx :putc putx)) 314 315;; API 316(define (transcoded-port inner transcoder) 317 (assume-type transcoder <transcoder>) 318 (cond 319 [(input-port? inner) 320 (let1 p (case (~ transcoder'eol-style) 321 [(lf crlf) (eol-iport inner)] 322 [else inner]) 323 (parameterize ((external-conversion-library #f)) 324 (open-input-conversion-port p 325 (~ transcoder'codec'name) 326 :owner? #t 327 :illegal-output (~ transcoder'handling-mode))))] 328 [(output-port? inner) 329 (let1 p (case (~ transcoder'eol-style) 330 [(lf) (eol-lf-oport inner)] 331 [(crlf) (eol-crlf-oport inner)] 332 [else inner]) 333 (parameterize ((external-conversion-library #f)) 334 (open-output-conversion-port p 335 (~ transcoder'codec'name) 336 :owner? #t 337 :illegal-output (~ transcoder'handling-mode))))] 338 [else 339 (error "port required, but got:" inner)])) 340 341(define (bytevector->string bytevector transcoder) 342 (assume-type bytevector <u8vector>) 343 (assume-type transcoder <transcoder>) 344 (if (eq? (~ transcoder'eol-style) 'none) 345 (parameterize ((external-conversion-library #f)) 346 (ces-convert-to <string> bytevector 347 (codec-name (transcoder-codec transcoder)) 348 *native-codec-name* 349 :illegal-output (transcoder-handling-mode transcoder))) 350 (port->string 351 (transcoded-port (open-input-bytevector bytevector) transcoder)))) 352 353(define (string->bytevector string transcoder) 354 (assume-type string <string>) 355 (assume-type transcoder <transcoder>) 356 (if (eq? (~ transcoder'eol-style) 'none) 357 ;; ces-convert-to doesn't distinguish 'decoding' and 'encoding', so 358 ;; it always raises <io-decoding-error> if input is invalid. 359 ;; In srfi-181, we should treat it as encoding error. 360 (guard (e [(<io-decoding-error> e) 361 (error <io-encoding-error> 362 :port (~ e'port) 363 :message (~ e'message))] 364 [else (raise e)]) 365 (parameterize ((external-conversion-library #f)) 366 (ces-convert-to <u8vector> string 367 *native-codec-name* 368 (codec-name (transcoder-codec transcoder)) 369 :illegal-output (transcoder-handling-mode transcoder)))) 370 (let* ([sink (open-output-bytevector)] 371 [p (transcoded-port sink transcoder)]) 372 (display string p) 373 (flush p) 374 (begin0 (get-output-bytevector sink) 375 (close-output-port p))))) 376