1; Part of Scheme 48 1.9.  See file COPYING for notices and license.
2
3; Authors: Mike Sperber
4; Copyright (c) 2005-2006 by Basis Technology Corporation.
5
6; Code-point classification
7
8(define-enumerated-type primary-category :primary-category
9  primary-category?
10  primary-categories
11  primary-category-name
12  primary-category-index
13  (letter
14   number
15   punctuation
16   symbol
17   mark
18   separator
19   miscellaneous))
20
21(define-finite-type general-category :general-category
22  (primary-category id symbol)
23  general-category?
24  general-categories
25  general-category-name
26  general-category-index
27  (primary-category general-category-primary-category)
28  (id general-category-id)
29  (symbol general-category-symbol)
30  ((uppercase-letter (primary-category letter) "Lu" 'Lu)
31   (lowercase-letter (primary-category letter) "Ll" 'Ll)
32   (titlecase-letter (primary-category letter) "Lt" 'Lt)
33   (modified-letter (primary-category letter) "Lm" 'Lm)
34   (other-letter (primary-category letter) "Lo" 'Lo)
35
36   (non-spacing-mark (primary-category mark) "Mn" 'Mn)
37   (combining-spacing-mark (primary-category mark) "Mc" 'Mc)
38   (enclosing-mark (primary-category mark) "Me" 'Me)
39
40   (decimal-digit-number (primary-category number) "Nd" 'Nd)
41   (letter-number (primary-category number) "Nl" 'Nl)
42   (other-number (primary-category number) "No" 'No)
43
44   (opening-punctuation (primary-category punctuation) "Ps" 'Ps)
45   (closing-punctuation (primary-category punctuation) "Pe" 'Pe)
46   (initial-quote-punctuation (primary-category punctuation) "Pi" 'Pi)
47   (final-quote-punctuation (primary-category punctuation) "Pf" 'Pf)
48   (dash-punctuation (primary-category punctuation) "Pd" 'Pd)
49   (connector-punctuation (primary-category punctuation) "Pc" 'Pc)
50   (other-punctuation (primary-category punctuation) "Po" 'Po)
51
52   (currency-symbol (primary-category symbol) "Sc" 'Sc)
53   (mathematical-symbol (primary-category symbol) "Sm" 'Sm)
54   (modifier-symbol (primary-category symbol) "Sk" 'Sk)
55   (other-symbol (primary-category symbol) "So" 'So)
56
57   (space-separator (primary-category separator) "Zs" 'Zs)
58   (paragraph-separator (primary-category separator) "Zp" 'Zp)
59   (line-separator (primary-category separator) "Zl" 'Zl)
60
61   (control-character (primary-category miscellaneous) "Cc" 'Cc)
62   (formatting-character (primary-category miscellaneous) "Cf" 'Cf)
63   (surrogate (primary-category miscellaneous) "Cs" 'Cs)
64   (private-use-character (primary-category miscellaneous) "Co" 'Co)
65   (unassigned (primary-category miscellaneous) "Cn" 'Cn)))
66
67(define (bits-necessary count)
68  (let loop ((e 0)
69	     (reached 1))
70    (if (>= reached count)
71	e
72	(loop (+ e 1) (* 2 reached)))))
73
74(define *general-category-bits*
75  (bits-necessary (vector-length general-categories)))
76
77(define (id->general-category id)
78  (let ((count (vector-length general-categories)))
79    (let loop ((i 0))
80      (cond
81       ((>= i count) #f)
82       ((string=? (general-category-id (vector-ref general-categories i))
83		  id)
84	(vector-ref general-categories i))
85       (else
86	(loop (+ 1 i)))))))
87