1;;; Included only if the tested implementation claims to support 2;;; the unicode feature and also claims to be very slow. 3 4;;; Given a vector of unary predicates on characters, 5;;; returns a vector of sorted lists of all characters 6;;; that satisfy the corresponding predicate. 7 8(define (filter-all-chars-by-predicates pvec) 9 10 (define (loop i charlists) 11 (cond ((= i #x110000) 12 charlists) 13 ((<= #xd800 i #xdfff) 14 (loop #xe000 charlists)) 15 (else 16 (let ((c (integer->char i))) 17 (update! c charlists) 18 (loop (+ i 1) 19 charlists))))) 20 21 (define (update! c charlists) 22 (do ((n (vector-length pvec)) 23 (j 0 (+ j 1))) 24 ((= j n)) 25 (if ((vector-ref pvec j) c) 26 (vector-set! charlists j (cons c (vector-ref charlists j)))))) 27 28 (loop 0 (make-vector (vector-length pvec) '()))) 29 30(define (run-char-tests-for-unicode) 31 32 (test (char-upcase #\xDF) #\xDF) 33 (test (char-downcase #\xDF) #\xDF) 34 (test (char-foldcase #\xDF) #\xDF) 35 36 (test (char-upcase #\x3A3) #\x3A3) 37 (test (char-downcase #\x3A3) #\x3C3) 38 (test (char-foldcase #\x3A3) #\x3C3) 39 40 (test (char-upcase #\x3C2) #\x3A3) 41 (test (char-downcase #\x3C2) #\x3C2) 42 (test (char-foldcase #\x3C2) #\x3C3) 43 44 (test (char-ci=? #\x3C2 #\x3C3) #t) 45 46 (test (char-whitespace? #\x00A0) #t) 47 (test (char-upper-case? #\x3A3) #t) 48 (test (char-lower-case? #\x3C3) #t) 49 (test (char-lower-case? #\x00AA) #t) 50 51 (test (string-upcase "Stra\xDF;e") "STRASSE") 52 (test (string-downcase "Stra\xDF;e") "stra\xDF;e") 53 (test (string-foldcase "Stra\xDF;e") "strasse") 54 (test (string-downcase "\x3A3;") "\x3C3;") 55 56 (test (string-upcase "\x39E;\x391;\x39F;\x3A3;") 57 "\x39E;\x391;\x39F;\x3A3;") 58 (test (string-downcase "\x39E;\x391;\x39F;\x3A3;") 59 "\x3BE;\x3B1;\x3BF;\x3C2;") 60 (test (string-downcase "\x39E;\x391;\x39F;\x3A3;\x3A3;") 61 "\x3BE;\x3B1;\x3BF;\x3C3;\x3C2;") 62 (test (string-downcase "\x39E;\x391;\x39F;\x3A3; \x3A3;") 63 "\x3BE;\x3B1;\x3BF;\x3C2; \x3C3;") 64 (test (string-foldcase "\x39E;\x391;\x39F;\x3A3;") 65 "\x3BE;\x3B1;\x3BF;\x3C3;") 66 (test (string-upcase "\x3BE;\x3B1;\x3BF;\x3C3;") 67 "\x39E;\x391;\x39F;\x3A3;") 68 (test (string-upcase "\x3BE;\x3B1;\x3BF;\x3C2;") 69 "\x39E;\x391;\x39F;\x3A3;") 70 71 (test (string-downcase "A\x3A3;'x") "a\x3C3;'x") ; ' is a MidLetter 72 73 (test (string-ci=? "Stra\xDF;e" "Strasse") #t) 74 (test (string-ci=? "Stra\xDF;e" "STRASSE") #t) 75 (test (string-ci=? "\x39E;\x391;\x39F;\x3A3;" "\x3BE;\x3B1;\x3BF;\x3C2;") 76 #t) 77 (test (string-ci=? "\x39E;\x391;\x39F;\x3A3;" "\x3BE;\x3B1;\x3BF;\x3C3;") 78 #t) 79 80 ;; Systematic testing on every Unicode character. 81 ;; The counts are believed to be correct for Unicode 5.0, 82 ;; except for char-whitespace? (which has dropped to 25 in Unicode 7.0). 83 ;; The counts are likely to increase monotonically (if at all) in later 84 ;; versions, but that's not a given. 85 86 (let* ((is-a-char? (lambda (c) 87 (and (char? c) 88 (char? (char-upcase c)) 89 (char? (char-downcase c)) 90 (char? (char-foldcase c)) 91 (char=? c 92 (integer->char 93 (char->integer c)))))) 94 95 (is-bad-numeric? (lambda (c) 96 (and (char-numeric? c) 97 (let ((n (digit-value c))) 98 (not (and (exact-integer? n) 99 (<= 0 n 9))))))) 100 101 (is-bad-non-numeric? (lambda (c) 102 (and (not (char-numeric? c)) 103 (digit-value c)))) 104 105 (pvec (vector is-a-char? 106 char-alphabetic? 107 char-numeric? 108 char-whitespace? 109 char-upper-case? 110 char-lower-case? 111 is-bad-numeric? 112 is-bad-non-numeric?)) 113 114 (nvec (vector 1112064 ; is-a-char? 115 93217 ; char-alphabetic? 116 282 ; char-numeric? 117 25 ; char-whitespace? 118 1362 ; char-upper-case? 119 1791 ; char-lower-case? 120 0 ; is-bad-numeric? 121 0)) ; is-bad-non-numeric? 122 123 (cvec (filter-all-chars-by-predicates pvec))) 124 125 (test (= (length (vector-ref cvec 0)) ; is-a-char? 126 (vector-ref nvec 0)) 127 #t) 128 129 (test (>= (length (vector-ref cvec 1)) ; char-alphabetic? 130 (vector-ref nvec 1)) 131 #t) 132 133 (test (>= (length (vector-ref cvec 2)) ; char-numeric? 134 (vector-ref nvec 2)) 135 #t) 136 137 (test (>= (length (vector-ref cvec 3)) ; char-whitespace? 138 (vector-ref nvec 3)) 139 #t) 140 141 (test (>= (length (vector-ref cvec 4)) ; char-upper-case? 142 (vector-ref nvec 4)) 143 #t) 144 145 (test (>= (length (vector-ref cvec 5)) ; char-lower-case? 146 (vector-ref nvec 5)) 147 #t) 148 149 (test (= (length (vector-ref cvec 6)) ; is-bad-numeric? 150 (vector-ref nvec 6)) 151 #t) 152 153 (test (= (length (vector-ref cvec 7)) ; is-bad-non-numeric? 154 (vector-ref nvec 7)) 155 #t) 156 157 )) 158 159