1 /* prop.c - Character properties.
2
3 Copyright (C) 1999 Tom Tromey
4
5 The Gnome Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The Gnome Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the Gnome Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #include <config.h>
21
22 #include <stddef.h>
23
24 #include "unicode.h"
25 #include "chartables.h"
26
27 #define asize(x) ((sizeof (x)) / sizeof (x[0]))
28
29 #define ATTTABLE(Page, Char) \
30 ((attr_table[Page] == 0) ? 0 : (attr_table[Page][Char]))
31
32 /* We cheat a bit and cast type values to (char *). We detect these
33 using the &0xff trick. */
34 #define TTYPE(Page, Char) \
35 (((((int) type_table[Page]) & 0xff) == ((int) type_table[Page])) \
36 ? ((int) (type_table[Page])) \
37 : (type_table[Page][Char]))
38
39 #define TYPE(Char) (((Char) > (UNICODE_LAST_CHAR)) ? UNICODE_UNASSIGNED : TTYPE ((Char) >> 8, (Char) & 0xff))
40
41 #define ISDIGIT(Type) ((Type) == UNICODE_DECIMAL_NUMBER \
42 || (Type) == UNICODE_LETTER_NUMBER \
43 || (Type) == UNICODE_OTHER_NUMBER)
44
45 #define ISALPHA(Type) ((Type) == UNICODE_LOWERCASE_LETTER \
46 || (Type) == UNICODE_UPPERCASE_LETTER \
47 || (Type) == UNICODE_TITLECASE_LETTER \
48 || (Type) == UNICODE_MODIFIER_LETTER \
49 || (Type) == UNICODE_OTHER_LETTER)
50
51 int
unicode_isalnum(unicode_char_t c)52 unicode_isalnum (unicode_char_t c)
53 {
54 int t = TYPE (c);
55 return ISDIGIT (t) || ISALPHA (t);
56 }
57
58 int
unicode_isalpha(unicode_char_t c)59 unicode_isalpha (unicode_char_t c)
60 {
61 int t = TYPE (c);
62 return ISALPHA (t);
63 }
64
65 int
unicode_iscntrl(unicode_char_t c)66 unicode_iscntrl (unicode_char_t c)
67 {
68 return TYPE (c) == UNICODE_CONTROL;
69 }
70
71 int
unicode_isdigit(unicode_char_t c)72 unicode_isdigit (unicode_char_t c)
73 {
74 return TYPE (c) == UNICODE_DECIMAL_NUMBER;
75 }
76
77 int
unicode_isgraph(unicode_char_t c)78 unicode_isgraph (unicode_char_t c)
79 {
80 int t = TYPE (c);
81 return (t != UNICODE_CONTROL
82 && t != UNICODE_FORMAT
83 && t != UNICODE_UNASSIGNED
84 && t != UNICODE_PRIVATE_USE
85 && t != UNICODE_SURROGATE
86 && t != UNICODE_SPACE_SEPARATOR);
87 }
88
89 int
unicode_islower(unicode_char_t c)90 unicode_islower (unicode_char_t c)
91 {
92 return TYPE (c) == UNICODE_LOWERCASE_LETTER;
93 }
94
95 int
unicode_isprint(unicode_char_t c)96 unicode_isprint (unicode_char_t c)
97 {
98 int t = TYPE (c);
99 return (t != UNICODE_CONTROL
100 && t != UNICODE_FORMAT
101 && t != UNICODE_UNASSIGNED
102 && t != UNICODE_PRIVATE_USE
103 && t != UNICODE_SURROGATE);
104 }
105
106 int
unicode_ispunct(unicode_char_t c)107 unicode_ispunct (unicode_char_t c)
108 {
109 int t = TYPE (c);
110 return (t == UNICODE_CONNECT_PUNCTUATION || t == UNICODE_DASH_PUNCTUATION
111 || t == UNICODE_CLOSE_PUNCTUATION || t == UNICODE_FINAL_PUNCTUATION
112 || t == UNICODE_INITIAL_PUNCTUATION || t == UNICODE_OTHER_PUNCTUATION
113 || t == UNICODE_OPEN_PUNCTUATION);
114 }
115
116 int
unicode_isspace(unicode_char_t c)117 unicode_isspace (unicode_char_t c)
118 {
119 int t = TYPE (c);
120 return (t == UNICODE_SPACE_SEPARATOR || t == UNICODE_LINE_SEPARATOR
121 || t == UNICODE_PARAGRAPH_SEPARATOR);
122 }
123
124 int
unicode_isupper(unicode_char_t c)125 unicode_isupper (unicode_char_t c)
126 {
127 return TYPE (c) == UNICODE_UPPERCASE_LETTER;
128 }
129
130 int
unicode_istitle(unicode_char_t c)131 unicode_istitle (unicode_char_t c)
132 {
133 unsigned int i;
134 for (i = 0; i < asize (title_table); ++i)
135 if (title_table[i][0] == c)
136 return 1;
137 return 0;
138 }
139
140 int
unicode_isxdigit(unicode_char_t c)141 unicode_isxdigit (unicode_char_t c)
142 {
143 int t = TYPE (c);
144 return ((c >= 'a' && c <= 'f')
145 || (c >= 'A' && c <= 'F')
146 || ISDIGIT (t));
147 }
148
149 int
unicode_isdefined(unicode_char_t c)150 unicode_isdefined (unicode_char_t c)
151 {
152 int t = TYPE (c);
153 return t != UNICODE_UNASSIGNED;
154 }
155
156 /* This function stolen from Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>. */
157 int
unicode_iswide(unicode_char_t c)158 unicode_iswide (unicode_char_t c)
159 {
160 if (c < 0x1100)
161 return 0;
162
163 return ((c >= 0x1100 && c <= 0x115f) /* Hangul Jamo */
164 || (c >= 0x2e80 && c <= 0xa4cf && (c & ~0x0011) != 0x300a &&
165 c != 0x303f) /* CJK ... Yi */
166 || (c >= 0xac00 && c <= 0xd7a3) /* Hangul Syllables */
167 || (c >= 0xf900 && c <= 0xfaff) /* CJK Compatibility Ideographs */
168 || (c >= 0xfe30 && c <= 0xfe6f) /* CJK Compatibility Forms */
169 || (c >= 0xff00 && c <= 0xff5f) /* Fullwidth Forms */
170 || (c >= 0xffe0 && c <= 0xffe6));
171 }
172
173 unicode_char_t
unicode_toupper(unicode_char_t c)174 unicode_toupper (unicode_char_t c)
175 {
176 int t = TYPE (c);
177 if (t == UNICODE_LOWERCASE_LETTER)
178 return ATTTABLE (c >> 8, c & 0xff);
179 else if (t == UNICODE_TITLECASE_LETTER)
180 {
181 unsigned int i;
182 for (i = 0; i < asize (title_table); ++i)
183 {
184 if (title_table[i][0] == c)
185 return title_table[i][1];
186 }
187 }
188 return c;
189 }
190
191 unicode_char_t
unicode_tolower(unicode_char_t c)192 unicode_tolower (unicode_char_t c)
193 {
194 int t = TYPE (c);
195 if (t == UNICODE_UPPERCASE_LETTER)
196 return ATTTABLE (c >> 8, c & 0xff);
197 else if (t == UNICODE_TITLECASE_LETTER)
198 {
199 unsigned int i;
200 for (i = 0; i < asize (title_table); ++i)
201 {
202 if (title_table[i][0] == c)
203 return title_table[i][2];
204 }
205 }
206 return c;
207 }
208
209 unicode_char_t
unicode_totitle(unicode_char_t c)210 unicode_totitle (unicode_char_t c)
211 {
212 unsigned int i;
213 for (i = 0; i < asize (title_table); ++i)
214 {
215 if (title_table[i][0] == c || title_table[i][1] == c
216 || title_table[i][2] == c)
217 return title_table[i][0];
218 }
219 return (TYPE (c) == UNICODE_LOWERCASE_LETTER
220 ? ATTTABLE (c >> 8, c & 0xff)
221 : c);
222 }
223
224 int
unicode_digit_value(unicode_char_t c)225 unicode_digit_value (unicode_char_t c)
226 {
227 if (TYPE (c) == UNICODE_DECIMAL_NUMBER)
228 return ATTTABLE (c >> 8, c & 0xff);
229 return -1;
230 }
231
232 int
unicode_xdigit_value(unicode_char_t c)233 unicode_xdigit_value (unicode_char_t c)
234 {
235 if (c >= 'A' && c <= 'F')
236 return c - 'A' + 1;
237 if (c >= 'a' && c <= 'f')
238 return c - 'a' + 1;
239 if (TYPE (c) == UNICODE_DECIMAL_NUMBER)
240 return ATTTABLE (c >> 8, c & 0xff);
241 return -1;
242 }
243
244 int
unicode_type(unicode_char_t c)245 unicode_type (unicode_char_t c)
246 {
247 return TYPE (c);
248 }
249