1#lang racket/base 2(require "config.rkt" 3 "special.rkt" 4 "wrap.rkt" 5 "readtable.rkt" 6 "consume.rkt" 7 "accum-string.rkt" 8 "error.rkt" 9 "digit.rkt") 10 11(provide read-string 12 read-here-string) 13 14(define (read-string in config #:mode [mode 'string]) 15 (define source (read-config-source config)) 16 (define-values (open-end-line open-end-col open-end-pos) (port-next-location in)) 17 (define accum-str (accum-string-init! config)) 18 (define (bad-end c) 19 (cond 20 [(eof-object? c) 21 (reader-error in config #:due-to c #:end-pos open-end-pos "expected a closing `\"`")] 22 [else 23 (reader-error in config #:due-to c 24 "found non-character while reading a ~a" 25 mode)])) 26 (let loop () 27 (define c (read-char/special in config source)) 28 ;; Note: readtable is not used for a closing " or other string decisions 29 (cond 30 [(not (char? c)) 31 (bad-end c)] 32 [(char=? #\\ c) 33 (define escaping-c c) 34 (define escaped-c (read-char/special in config source)) 35 (when (not (char? escaped-c)) 36 (bad-end escaped-c)) 37 (define (unknown-error) 38 (reader-error in config 39 "unknown escape sequence `~a~a` in ~a" 40 escaping-c escaped-c 41 mode)) 42 (case escaped-c 43 [(#\\ #\" #\') 44 (accum-string-add! accum-str escaped-c)] 45 [(#\a) (accum-string-add! accum-str #\u7)] 46 [(#\b) (accum-string-add! accum-str #\backspace)] 47 [(#\t) (accum-string-add! accum-str #\tab)] 48 [(#\n) (accum-string-add! accum-str #\newline)] 49 [(#\v) (accum-string-add! accum-str #\vtab)] 50 [(#\f) (accum-string-add! accum-str #\page)] 51 [(#\r) (accum-string-add! accum-str #\return)] 52 [(#\e) (accum-string-add! accum-str #\u1B)] 53 [(#\newline) (void)] 54 [(#\return) 55 (define maybe-newline-c (peek-char/special in config 0 source)) 56 (when (eqv? maybe-newline-c #\newline) 57 (consume-char in maybe-newline-c)) 58 (void)] 59 [(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7) 60 ;; Octal (valid if <= 255) 61 (define pos (accum-string-count accum-str)) 62 (accum-string-add! accum-str escaped-c) 63 (define init-v (digit->number escaped-c)) 64 (define v (read-digits in config accum-str #:base 8 #:max-count 2 65 #:init init-v 66 #:zero-digits-result init-v)) 67 (unless (v . <= . 255) 68 (reader-error in config 69 "escape sequence `~a~a` is out of range in ~a" 70 escaping-c (accum-string-get! accum-str config #:start-pos pos) 71 mode)) 72 (set-accum-string-count! accum-str pos) 73 (accum-string-add! accum-str (integer->char v))] 74 [(#\x) 75 ;; Hex, two characters (always valid) 76 (define pos (accum-string-count accum-str)) 77 (define v (read-digits in config accum-str #:base 16 #:max-count 2)) 78 (unless (integer? v) (no-hex-digits in config v escaping-c escaped-c)) 79 (set-accum-string-count! accum-str pos) 80 (accum-string-add! accum-str (integer->char v))] 81 [(#\u) 82 ;; Hex, four characters (valid if not surrogate or if surrogate pair) 83 (unless (eq? mode 'string) (unknown-error)) 84 (define pos (accum-string-count accum-str)) 85 (define v (read-digits in config accum-str #:base 16 #:max-count 4)) 86 (unless (integer? v) (no-hex-digits in config v escaping-c escaped-c)) 87 (cond 88 [(or (v . < . #xD800) (v . > . #xDFFF)) 89 ;; Normal \u escape 90 (set-accum-string-count! accum-str pos) 91 (accum-string-add! accum-str (integer->char v))] 92 [else 93 ;; Maybe a surrogate-pair encoding 94 (define (next!) 95 (define next-c (read-char/special in config source)) 96 (when (char? next-c) 97 (accum-string-add! accum-str next-c)) 98 next-c) 99 (define v2 100 (let ([next-c (next!)]) 101 (cond 102 [(char=? next-c #\\) 103 (define next-c (next!)) 104 (cond 105 [(char=? next-c #\u) 106 (define v2 (read-digits in config accum-str #:base 16 #:max-count 4)) 107 (cond 108 [(integer? v2) 109 (and (v2 . >= . #xDC00) 110 (v2 . <= . #xDFFF) 111 v2)] 112 [else v2])] ; maybe EOF 113 [else next-c])] ; maybe EOF 114 [else next-c]))) ; maybe EOF 115 (cond 116 [(integer? v2) 117 (define combined-v (+ (arithmetic-shift (- v #xD800) 10) 118 (- v2 #xDC00) 119 #x10000)) 120 (cond 121 [(combined-v . > . #x10FFFF) 122 (reader-error in config 123 "escape sequence `~au~a` is out of range in string" 124 escaping-c (accum-string-get! accum-str config #:start-pos pos))] 125 [else 126 (set-accum-string-count! accum-str pos) 127 (accum-string-add! accum-str (integer->char combined-v))])] 128 [else 129 (reader-error in config 130 #:due-to v2 131 "bad or incomplete surrogate-style encoding at `~au~a`" 132 escaping-c (accum-string-get! accum-str config #:start-pos pos))])])] 133 [(#\U) 134 (unless (eq? mode 'string) (unknown-error)) 135 (define pos (accum-string-count accum-str)) 136 (define v (read-digits in config accum-str #:base 16 #:max-count 8)) 137 (unless (integer? v) (no-hex-digits in config v escaping-c escaped-c)) 138 (cond 139 [(and (or (v . < . #xD800) (v . > . #xDFFF)) 140 (v . <= . #x10FFFF)) 141 (set-accum-string-count! accum-str pos) 142 (accum-string-add! accum-str (integer->char v))] 143 [else 144 (reader-error in config 145 "escape sequence `~aU~a` is out of range in string" 146 escaping-c (accum-string-get! accum-str config #:start-pos pos))])] 147 [else (unknown-error)]) 148 (loop)] 149 [(char=? #\" c) 150 null] 151 [else 152 (when (eq? mode '|byte string|) 153 (unless (byte? (char->integer c)) 154 (reader-error in config 155 "character `~a` is out of range in byte string" 156 c))) 157 (accum-string-add! accum-str c) 158 (loop)])) 159 (define str (if (eq? mode '|byte string|) 160 (accum-string-get-bytes! accum-str config) 161 (accum-string-get! accum-str config))) 162 (wrap str 163 in 164 config 165 str)) 166 167;; ---------------------------------------- 168 169(define (read-here-string in config) 170 (define source (read-config-source config)) 171 (define-values (open-end-line open-end-col open-end-pos) (port-next-location in)) 172 (define accum-str (accum-string-init! config)) 173 174 ;; Parse terminator 175 (define full-terminator 176 (cons 177 #\newline ;; assumption below that this character is first 178 (let loop () 179 (define c (read-char/special in config source)) 180 (cond 181 [(eof-object? c) 182 (reader-error in config #:due-to c 183 "found end-of-file after `#<<` and before a newline")] 184 [(not (char? c)) 185 (reader-error in config #:due-to c 186 "found non-character while reading `#<<`")] 187 [(char=? c #\newline) null] 188 [else (cons c (loop))])))) 189 190 ;; Get string content. 191 ;; We just saw a newline that could be considered the start of an 192 ;; immediate `full-terminator`. 193 (let loop ([terminator (cdr full-terminator)] [terminator-accum null]) 194 (define c (read-char/special in config source)) 195 (cond 196 [(eof-object? c) 197 (unless (null? terminator) 198 (reader-error in config #:due-to c #:end-pos open-end-pos 199 "found end-of-file before terminating `~a`" 200 (list->string (cdr full-terminator))))] 201 [(not (char? c)) 202 (reader-error in config #:due-to c 203 "found non-character while reading `#<<`")] 204 [(and (pair? terminator) 205 (char=? c (car terminator))) 206 (loop (cdr terminator) (cons (car terminator) terminator-accum))] 207 [(and (null? terminator) 208 (char=? c #\newline)) 209 (void)] 210 [else 211 (unless (null? terminator-accum) 212 (for ([c (in-list (reverse terminator-accum))]) 213 (accum-string-add! accum-str c))) 214 (cond 215 [(char=? c #\newline) 216 ;; assume `full-terminator` starts with #\newline 217 (loop (cdr full-terminator) (list #\newline))] 218 [else 219 (accum-string-add! accum-str c) 220 (loop full-terminator null)])])) 221 222 ;; Done 223 (define str (accum-string-get! accum-str config)) 224 (wrap str 225 in 226 config 227 str)) 228 229;; ---------------------------------------- 230 231(define (no-hex-digits in config c escaping-c escaped-c) 232 (reader-error in config 233 #:due-to c 234 "no hex digit following `~a~a`" 235 escaping-c escaped-c)) 236