1;;; Included only if the tested implementation claims to support
2;;; the unicode feature and also claims to be very slow.
3
4;;; Given a vector of unary predicates on characters,
5;;; returns a vector of sorted lists of all characters
6;;; that satisfy the corresponding predicate.
7
8(define (filter-all-chars-by-predicates pvec)
9
10  (define (loop i charlists)
11    (cond ((= i #x110000)
12           charlists)
13          ((<= #xd800 i #xdfff)
14           (loop #xe000 charlists))
15          (else
16           (let ((c (integer->char i)))
17             (update! c charlists)
18             (loop (+ i 1)
19                   charlists)))))
20
21  (define (update! c charlists)
22    (do ((n (vector-length pvec))
23         (j 0 (+ j 1)))
24        ((= j n))
25      (if ((vector-ref pvec j) c)
26          (vector-set! charlists j (cons c (vector-ref charlists j))))))
27
28  (loop 0 (make-vector (vector-length pvec) '())))
29
30(define (run-char-tests-for-unicode)
31
32  (test (char-upcase #\xDF) #\xDF)
33  (test (char-downcase #\xDF) #\xDF)
34  (test (char-foldcase #\xDF) #\xDF)
35
36  (test (char-upcase #\x3A3) #\x3A3)
37  (test (char-downcase #\x3A3) #\x3C3)
38  (test (char-foldcase #\x3A3) #\x3C3)
39
40  (test (char-upcase #\x3C2) #\x3A3)
41  (test (char-downcase #\x3C2) #\x3C2)
42  (test (char-foldcase #\x3C2) #\x3C3)
43
44  (test (char-ci=? #\x3C2 #\x3C3) #t)
45
46  (test (char-whitespace? #\x00A0) #t)
47  (test (char-upper-case? #\x3A3) #t)
48  (test (char-lower-case? #\x3C3) #t)
49  (test (char-lower-case? #\x00AA) #t)
50
51  (test (string-upcase "Stra\xDF;e") "STRASSE")
52  (test (string-downcase "Stra\xDF;e") "stra\xDF;e")
53  (test (string-foldcase "Stra\xDF;e") "strasse")
54  (test (string-downcase "\x3A3;") "\x3C3;")
55
56  (test (string-upcase "\x39E;\x391;\x39F;\x3A3;")
57        "\x39E;\x391;\x39F;\x3A3;")
58  (test (string-downcase "\x39E;\x391;\x39F;\x3A3;")
59        "\x3BE;\x3B1;\x3BF;\x3C2;")
60  (test (string-downcase "\x39E;\x391;\x39F;\x3A3;\x3A3;")
61        "\x3BE;\x3B1;\x3BF;\x3C3;\x3C2;")
62  (test (string-downcase "\x39E;\x391;\x39F;\x3A3; \x3A3;")
63        "\x3BE;\x3B1;\x3BF;\x3C2; \x3C3;")
64  (test (string-foldcase "\x39E;\x391;\x39F;\x3A3;")
65        "\x3BE;\x3B1;\x3BF;\x3C3;")
66  (test (string-upcase "\x3BE;\x3B1;\x3BF;\x3C3;")
67        "\x39E;\x391;\x39F;\x3A3;")
68  (test (string-upcase "\x3BE;\x3B1;\x3BF;\x3C2;")
69        "\x39E;\x391;\x39F;\x3A3;")
70
71  (test (string-downcase "A\x3A3;'x") "a\x3C3;'x") ; ' is a MidLetter
72
73  (test (string-ci=? "Stra\xDF;e" "Strasse") #t)
74  (test (string-ci=? "Stra\xDF;e" "STRASSE") #t)
75  (test (string-ci=? "\x39E;\x391;\x39F;\x3A3;" "\x3BE;\x3B1;\x3BF;\x3C2;")
76        #t)
77  (test (string-ci=? "\x39E;\x391;\x39F;\x3A3;" "\x3BE;\x3B1;\x3BF;\x3C3;")
78        #t)
79
80  ;; Systematic testing on every Unicode character.
81  ;; The counts are believed to be correct for Unicode 5.0,
82  ;; except for char-whitespace? (which has dropped to 25 in Unicode 7.0).
83  ;; The counts are likely to increase monotonically (if at all) in later
84  ;; versions, but that's not a given.
85
86  (let* ((is-a-char? (lambda (c)
87                       (and (char? c)
88                            (char? (char-upcase c))
89                            (char? (char-downcase c))
90                            (char? (char-foldcase c))
91                            (char=? c
92                                    (integer->char
93                                     (char->integer c))))))
94
95         (is-bad-numeric? (lambda (c)
96                            (and (char-numeric? c)
97                                 (let ((n (digit-value c)))
98                                   (not (and (exact-integer? n)
99                                             (<= 0 n 9)))))))
100
101         (is-bad-non-numeric? (lambda (c)
102                                (and (not (char-numeric? c))
103                                     (digit-value c))))
104
105         (pvec (vector is-a-char?
106                       char-alphabetic?
107                       char-numeric?
108                       char-whitespace?
109                       char-upper-case?
110                       char-lower-case?
111                       is-bad-numeric?
112                       is-bad-non-numeric?))
113
114         (nvec (vector 1112064 ; is-a-char?
115                       93217   ; char-alphabetic?
116                       282     ; char-numeric?
117                       25      ; char-whitespace?
118                       1362    ; char-upper-case?
119                       1791    ; char-lower-case?
120                       0       ; is-bad-numeric?
121                       0))     ; is-bad-non-numeric?
122
123         (cvec (filter-all-chars-by-predicates pvec)))
124
125    (test (= (length (vector-ref cvec 0))  ; is-a-char?
126             (vector-ref nvec 0))
127          #t)
128
129    (test (>= (length (vector-ref cvec 1)) ; char-alphabetic?
130              (vector-ref nvec 1))
131          #t)
132
133    (test (>= (length (vector-ref cvec 2)) ; char-numeric?
134              (vector-ref nvec 2))
135          #t)
136
137    (test (>= (length (vector-ref cvec 3)) ; char-whitespace?
138              (vector-ref nvec 3))
139          #t)
140
141    (test (>= (length (vector-ref cvec 4)) ; char-upper-case?
142              (vector-ref nvec 4))
143          #t)
144
145    (test (>= (length (vector-ref cvec 5)) ; char-lower-case?
146              (vector-ref nvec 5))
147          #t)
148
149    (test (= (length (vector-ref cvec 6))  ; is-bad-numeric?
150             (vector-ref nvec 6))
151          #t)
152
153    (test (= (length (vector-ref cvec 7))  ; is-bad-non-numeric?
154             (vector-ref nvec 7))
155          #t)
156
157    ))
158
159