1; Part of Scheme 48 1.9. See file COPYING for notices and license. 2 3; Authors: Mike Sperber 4; Copyright (c) 2005-2006 by Basis Technology Corporation. 5 6; Code-point classification 7 8(define-enumerated-type primary-category :primary-category 9 primary-category? 10 primary-categories 11 primary-category-name 12 primary-category-index 13 (letter 14 number 15 punctuation 16 symbol 17 mark 18 separator 19 miscellaneous)) 20 21(define-finite-type general-category :general-category 22 (primary-category id symbol) 23 general-category? 24 general-categories 25 general-category-name 26 general-category-index 27 (primary-category general-category-primary-category) 28 (id general-category-id) 29 (symbol general-category-symbol) 30 ((uppercase-letter (primary-category letter) "Lu" 'Lu) 31 (lowercase-letter (primary-category letter) "Ll" 'Ll) 32 (titlecase-letter (primary-category letter) "Lt" 'Lt) 33 (modified-letter (primary-category letter) "Lm" 'Lm) 34 (other-letter (primary-category letter) "Lo" 'Lo) 35 36 (non-spacing-mark (primary-category mark) "Mn" 'Mn) 37 (combining-spacing-mark (primary-category mark) "Mc" 'Mc) 38 (enclosing-mark (primary-category mark) "Me" 'Me) 39 40 (decimal-digit-number (primary-category number) "Nd" 'Nd) 41 (letter-number (primary-category number) "Nl" 'Nl) 42 (other-number (primary-category number) "No" 'No) 43 44 (opening-punctuation (primary-category punctuation) "Ps" 'Ps) 45 (closing-punctuation (primary-category punctuation) "Pe" 'Pe) 46 (initial-quote-punctuation (primary-category punctuation) "Pi" 'Pi) 47 (final-quote-punctuation (primary-category punctuation) "Pf" 'Pf) 48 (dash-punctuation (primary-category punctuation) "Pd" 'Pd) 49 (connector-punctuation (primary-category punctuation) "Pc" 'Pc) 50 (other-punctuation (primary-category punctuation) "Po" 'Po) 51 52 (currency-symbol (primary-category symbol) "Sc" 'Sc) 53 (mathematical-symbol (primary-category symbol) "Sm" 'Sm) 54 (modifier-symbol (primary-category symbol) "Sk" 'Sk) 55 (other-symbol (primary-category symbol) "So" 'So) 56 57 (space-separator (primary-category separator) "Zs" 'Zs) 58 (paragraph-separator (primary-category separator) "Zp" 'Zp) 59 (line-separator (primary-category separator) "Zl" 'Zl) 60 61 (control-character (primary-category miscellaneous) "Cc" 'Cc) 62 (formatting-character (primary-category miscellaneous) "Cf" 'Cf) 63 (surrogate (primary-category miscellaneous) "Cs" 'Cs) 64 (private-use-character (primary-category miscellaneous) "Co" 'Co) 65 (unassigned (primary-category miscellaneous) "Cn" 'Cn))) 66 67(define (bits-necessary count) 68 (let loop ((e 0) 69 (reached 1)) 70 (if (>= reached count) 71 e 72 (loop (+ e 1) (* 2 reached))))) 73 74(define *general-category-bits* 75 (bits-necessary (vector-length general-categories))) 76 77(define (id->general-category id) 78 (let ((count (vector-length general-categories))) 79 (let loop ((i 0)) 80 (cond 81 ((>= i count) #f) 82 ((string=? (general-category-id (vector-ref general-categories i)) 83 id) 84 (vector-ref general-categories i)) 85 (else 86 (loop (+ 1 i))))))) 87