1;;;
2;;; srfi-175 - ASCII character library
3;;;
4;;;   Copyright (c) 2020  Shiro Kawai  <shiro@acm.org>
5;;;
6;;;   Redistribution and use in source and binary forms, with or without
7;;;   modification, are permitted provided that the following conditions
8;;;   are met:
9;;;
10;;;   1. Redistributions of source code must retain the above copyright
11;;;      notice, this list of conditions and the following disclaimer.
12;;;
13;;;   2. Redistributions in binary form must reproduce the above copyright
14;;;      notice, this list of conditions and the following disclaimer in the
15;;;      documentation and/or other materials provided with the distribution.
16;;;
17;;;   3. Neither the name of the authors nor the names of its contributors
18;;;      may be used to endorse or promote products derived from this
19;;;      software without specific prior written permission.
20;;;
21;;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22;;;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23;;;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24;;;   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25;;;   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26;;;   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27;;;   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28;;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29;;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30;;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31;;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32;;;
33
34(define-module srfi-175
35  (use gauche.uvector)
36  (use gauche.generator)
37  (use srfi-13)
38  (use srfi-14)
39  (use srfi-42)
40  (export ascii-codepoint? ascii-bytevector?
41          ascii-char? ascii-string?
42          ascii-control? ascii-non-control?
43          ascii-space-or-tab? ascii-other-graphic?
44          ascii-alphanumeric? ascii-alphabetic?
45          ascii-numeric? ascii-whitespace?
46          ascii-upper-case? ascii-lower-case?
47          ascii-ci=? ascii-ci<? ascii-ci<=? ascii-ci>? ascii-ci>=?
48          ascii-string-ci=? ascii-string-ci<? ascii-string-ci<=?
49          ascii-string-ci>? ascii-string-ci>=?
50          ascii-upcase ascii-downcase
51          ascii-control->graphic ascii-graphic->control
52          ascii-mirror-bracket
53          ascii-nth-digit ascii-nth-upper-case ascii-nth-lower-case
54          ascii-digit-value ascii-upper-case-value ascii-lower-case-value
55          ))
56(select-module srfi-175)
57
58(define (ascii-codepoint? x) (and (integer? x) (<= #x00 x #x7f)))
59
60(define (ascii-bytevector? x)
61  (and (u8vector? x)
62       (every?-ec [: b x]
63                  (ascii-codepoint? b))))
64
65(define (ascii-char? x) (and (char? x) (char-set-contains? #[[:ascii:]] x)))
66
67(define (ascii-string? x)
68  (and (string? x)
69       (string-every ascii-char? x)))
70
71;; some infrastructure
72
73(define (%char x)
74  (cond [(char? x) x]
75        [(integer? x) (integer->char x)]
76        [else (type-error 'argument "character or an integer" x)]))
77(define (%code x)
78  (cond [(char? x) (char->integer x)]
79        [(integer? x) x]
80        [else (type-error 'argument "character or an integer" x)]))
81
82(define (ascii-control? c)
83  (char-set-contains? char-set:ascii-control (%char c)))
84
85(define *non-control*
86  (char-set-intersection char-set:ascii
87                         (char-set-complement char-set:ascii-control)))
88(define (ascii-non-control? c)
89  (char-set-contains? *non-control* (%char c)))
90
91(define (ascii-space-or-tab? c)
92  (boolean (memv (%char c) '(#\space #\tab))))
93
94(define *other-graphic*
95  (char-set-difference char-set:ascii-graphic
96                       char-set:ascii-letter+digit))
97(define (ascii-other-graphic? c)
98  (char-set-contains? *other-graphic* (%char c)))
99(define (ascii-alphanumeric? c)
100  (char-set-contains? char-set:ascii-letter+digit (%char c)))
101(define (ascii-alphabetic? c)
102  (char-set-contains? char-set:ascii-letter (%char c)))
103(define (ascii-numeric? c )
104  (char-set-contains? char-set:ascii-digit (%char c)))
105(define (ascii-whitespace? c)
106  (char-set-contains? char-set:ascii-whitespace (%char c)))
107(define (ascii-upper-case? c)
108  (char-set-contains? char-set:ascii-upper-case (%char c)))
109(define (ascii-lower-case? c)
110  (char-set-contains? char-set:ascii-lower-case (%char c)))
111
112(define (%foldcase-code c)
113  (if (ascii-upper-case? c)
114    (+ (%code c) #x20)
115    (%code c)))
116
117(define (ascii-ci=? c1 c2)  (= (%foldcase-code c1) (%foldcase-code c2)))
118(define (ascii-ci<? c1 c2)  (< (%foldcase-code c1) (%foldcase-code c2)))
119(define (ascii-ci<=? c1 c2) (<= (%foldcase-code c1) (%foldcase-code c2)))
120(define (ascii-ci>? c1 c2)  (> (%foldcase-code c1) (%foldcase-code c2)))
121(define (ascii-ci>=? c1 c2) (>= (%foldcase-code c1) (%foldcase-code c2)))
122
123(define (ascii-string-ci=? s1 s2)
124  (let ([g1 (string->generator s1)]
125        [g2 (string->generator s2)])
126    (let loop ([c1 (g1)] [c2 (g2)])
127      (cond [(eof-object? c1) (eof-object? c2)]
128            [(eof-object? c2) #f]
129            [else (let ([cc1 (%foldcase-code c1)]
130                        [cc2 (%foldcase-code c2)])
131                    (and (= cc1 cc2)
132                         (loop (g1) (g2))))]))))
133
134(define (ascii-string-ci<? s1 s2)
135  (let ([g1 (string->generator s1)]
136        [g2 (string->generator s2)])
137    (let loop ([c1 (g1)] [c2 (g2)])
138      (cond [(eof-object? c2) #f]
139            [(eof-object? c1) #t]
140            [else (let ([cc1 (%foldcase-code c1)]
141                        [cc2 (%foldcase-code c2)])
142                    (cond [(= cc1 cc2) (loop (g1) (g2))]
143                          [(< cc1 cc2)]
144                          [else #f]))]))))
145
146(define (ascii-string-ci<=? s1 s2)
147  (let ([g1 (string->generator s1)]
148        [g2 (string->generator s2)])
149    (let loop ([c1 (g1)] [c2 (g2)])
150      (cond [(eof-object? c2) (eof-object? c1)]
151            [(eof-object? c1) #t]
152            [else (let ([cc1 (%foldcase-code c1)]
153                        [cc2 (%foldcase-code c2)])
154                    (cond [(= cc1 cc2) (loop (g1) (g2))]
155                          [(< cc1 cc2)]
156                          [else #f]))]))))
157
158(define (ascii-string-ci>? s1 s2)
159  (let ([g1 (string->generator s1)]
160        [g2 (string->generator s2)])
161    (let loop ([c1 (g1)] [c2 (g2)])
162      (cond [(eof-object? c1) #f]
163            [(eof-object? c2) #t]
164            [else
165             (let ([cc1 (%foldcase-code c1)]
166                   [cc2 (%foldcase-code c2)])
167               (cond [(= cc1 cc2) (loop (g1) (g2))]
168                     [(> cc1 cc2) #t]
169                     [else #f]))]))))
170
171(define (ascii-string-ci>=? s1 s2)
172  (let ([g1 (string->generator s1)]
173        [g2 (string->generator s2)])
174    (let loop ([c1 (g1)] [c2 (g2)])
175      (cond [(eof-object? c1) (eof-object? c2)]
176            [(eof-object? c2) #t]
177            [else (let ([cc1 (%foldcase-code c1)]
178                        [cc2 (%foldcase-code c2)])
179                    (cond [(= cc1 cc2) (loop (g1) (g2))]
180                          [(> cc1 cc2)]
181                          [else #f]))]))))
182
183(define (ascii-upcase c)
184  (cond [(char? c)
185         (if (char-set-contains? char-set:ascii-lower-case c)
186           (char-upcase c)
187           c)]
188        [(integer? c)
189         (if (char-set-contains? char-set:ascii-lower-case (integer->char c))
190           (- c #x20)
191           c)]
192        [else (type-error 'c "char or integer" c)]))
193
194(define (ascii-downcase c)
195  (cond [(char? c)
196         (if (char-set-contains? char-set:ascii-upper-case c)
197           (char-downcase c)
198           c)]
199        [(integer? c)
200         (if (char-set-contains? char-set:ascii-upper-case (integer->char c))
201           (+ c #x20)
202           c)]
203        [else (type-error 'c "char or integer" c)]))
204
205(define (ascii-control->graphic c)
206  (cond [(char? c)
207         (and (char-set-contains? char-set:ascii-control c)
208              (if (eqv? c #\x7f)
209                #\?
210                (integer->char (+ (char->integer c) #x40))))]
211        [(integer? c)
212         (and (char-set-contains? char-set:ascii-control (integer->char c))
213              (if (eqv? c #x7f)
214                (char->integer #\?)
215                (+ c #x40)))]
216        [else (type-error 'c "char or integer" c)]))
217
218(define (ascii-graphic->control c)
219  (cond [(char? c)
220         (if (eqv? c #\?)
221           #\x7f
222           (let1 cc (char->integer c)
223             (and (<= #x40 cc #x5f)
224                  (integer->char (- cc #x40)))))]
225        [(integer? c)
226         (cond [(= c (char->integer #\?)) #x7f]
227               [(<= #x40 c #x5f) (- c #x40)]
228               [else #f])]
229        [else (type-error 'c "char or integer" c)]))
230
231(define (ascii-mirror-bracket c)
232  (cond[(char? c)
233        (and-let1 p (assv c '((#\( . #\)) (#\[ . #\]) (#\{ . #\}) (#\< . #\>)
234                              (#\) . #\() (#\] . #\[) (#\} . #\{) (#\> . #\<)))
235          (cdr p))]
236       [(integer? c)
237        (and-let1 p (assv c `((,(char->integer #\() . ,(char->integer #\)))
238                              (,(char->integer #\[) . ,(char->integer #\]))
239                              (,(char->integer #\{) . ,(char->integer #\}))
240                              (,(char->integer #\<) . ,(char->integer #\>))
241                              (,(char->integer #\)) . ,(char->integer #\())
242                              (,(char->integer #\]) . ,(char->integer #\[))
243                              (,(char->integer #\}) . ,(char->integer #\{))
244                              (,(char->integer #\>) . ,(char->integer #\<))))
245          (cdr p))]
246        [else (type-error 'c "char or integer" c)]))
247
248(define (ascii-nth-digit n)
249  (and (exact-integer? n) (integer->digit n)))
250
251(define (ascii-nth-upper-case n)
252  (integer->char (+ (modulo n 26) (char->integer #\A))))
253(define (ascii-nth-lower-case n)
254  (integer->char (+ (modulo n 26) (char->integer #\a))))
255
256(define (ascii-digit-value c limit)
257  (assume (real? limit))
258  (let1 lim (floor->exact limit)
259    (cond
260     [(= lim 1) (and (or (eqv? c #\0) (eqv? c (char->integer #\0))) 0)] ;special
261     [(<= 2 lim) (digit->integer (%char c) (min lim 10))]
262     [else #f])))
263
264(define (ascii-upper-case-value c offset limit)
265  (let1 cc (- (%code c) (char->integer #\A))
266    (and (< -1 cc limit)
267         (+ offset cc))))
268
269(define (ascii-lower-case-value c offset limit)
270  (let1 cc (- (%code c) (char->integer #\a))
271    (and (< -1 cc limit)
272         (+ offset cc))))
273