1 /* prop.c - Character properties.
2 
3    Copyright (C) 1999 Tom Tromey
4 
5    The Gnome Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public License as
7    published by the Free Software Foundation; either version 2 of the
8    License, or (at your option) any later version.
9 
10    The Gnome Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public
16    License along with the Gnome Library; see the file COPYING.LIB.  If not,
17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18    Boston, MA 02111-1307, USA.  */
19 
20 #include <config.h>
21 
22 #include <stddef.h>
23 
24 #include "unicode.h"
25 #include "chartables.h"
26 
27 #define asize(x)  ((sizeof (x)) / sizeof (x[0]))
28 
29 #define ATTTABLE(Page, Char) \
30   ((attr_table[Page] == 0) ? 0 : (attr_table[Page][Char]))
31 
32 /* We cheat a bit and cast type values to (char *).  We detect these
33    using the &0xff trick.  */
34 #define TTYPE(Page, Char) \
35   (((((int) type_table[Page]) & 0xff) == ((int) type_table[Page])) \
36    ? ((int) (type_table[Page])) \
37    : (type_table[Page][Char]))
38 
39 #define TYPE(Char) (((Char) > (UNICODE_LAST_CHAR)) ? UNICODE_UNASSIGNED : TTYPE ((Char) >> 8, (Char) & 0xff))
40 
41 #define ISDIGIT(Type) ((Type) == UNICODE_DECIMAL_NUMBER \
42 		       || (Type) == UNICODE_LETTER_NUMBER \
43 		       || (Type) == UNICODE_OTHER_NUMBER)
44 
45 #define ISALPHA(Type) ((Type) == UNICODE_LOWERCASE_LETTER \
46 		       || (Type) == UNICODE_UPPERCASE_LETTER \
47 		       || (Type) == UNICODE_TITLECASE_LETTER \
48 		       || (Type) == UNICODE_MODIFIER_LETTER \
49 		       || (Type) == UNICODE_OTHER_LETTER)
50 
51 int
unicode_isalnum(unicode_char_t c)52 unicode_isalnum (unicode_char_t c)
53 {
54   int t = TYPE (c);
55   return ISDIGIT (t) || ISALPHA (t);
56 }
57 
58 int
unicode_isalpha(unicode_char_t c)59 unicode_isalpha (unicode_char_t c)
60 {
61   int t = TYPE (c);
62   return ISALPHA (t);
63 }
64 
65 int
unicode_iscntrl(unicode_char_t c)66 unicode_iscntrl (unicode_char_t c)
67 {
68   return TYPE (c) == UNICODE_CONTROL;
69 }
70 
71 int
unicode_isdigit(unicode_char_t c)72 unicode_isdigit (unicode_char_t c)
73 {
74   return TYPE (c) == UNICODE_DECIMAL_NUMBER;
75 }
76 
77 int
unicode_isgraph(unicode_char_t c)78 unicode_isgraph (unicode_char_t c)
79 {
80   int t = TYPE (c);
81   return (t != UNICODE_CONTROL
82 	  && t != UNICODE_FORMAT
83 	  && t != UNICODE_UNASSIGNED
84 	  && t != UNICODE_PRIVATE_USE
85 	  && t != UNICODE_SURROGATE
86 	  && t != UNICODE_SPACE_SEPARATOR);
87 }
88 
89 int
unicode_islower(unicode_char_t c)90 unicode_islower (unicode_char_t c)
91 {
92   return TYPE (c) == UNICODE_LOWERCASE_LETTER;
93 }
94 
95 int
unicode_isprint(unicode_char_t c)96 unicode_isprint (unicode_char_t c)
97 {
98   int t = TYPE (c);
99   return (t != UNICODE_CONTROL
100 	  && t != UNICODE_FORMAT
101 	  && t != UNICODE_UNASSIGNED
102 	  && t != UNICODE_PRIVATE_USE
103 	  && t != UNICODE_SURROGATE);
104 }
105 
106 int
unicode_ispunct(unicode_char_t c)107 unicode_ispunct (unicode_char_t c)
108 {
109   int t = TYPE (c);
110   return (t == UNICODE_CONNECT_PUNCTUATION || t == UNICODE_DASH_PUNCTUATION
111 	  || t == UNICODE_CLOSE_PUNCTUATION || t == UNICODE_FINAL_PUNCTUATION
112 	  || t == UNICODE_INITIAL_PUNCTUATION || t == UNICODE_OTHER_PUNCTUATION
113 	  || t == UNICODE_OPEN_PUNCTUATION);
114 }
115 
116 int
unicode_isspace(unicode_char_t c)117 unicode_isspace (unicode_char_t c)
118 {
119   int t = TYPE (c);
120   return (t == UNICODE_SPACE_SEPARATOR || t == UNICODE_LINE_SEPARATOR
121 	  || t == UNICODE_PARAGRAPH_SEPARATOR);
122 }
123 
124 int
unicode_isupper(unicode_char_t c)125 unicode_isupper (unicode_char_t c)
126 {
127   return TYPE (c) == UNICODE_UPPERCASE_LETTER;
128 }
129 
130 int
unicode_istitle(unicode_char_t c)131 unicode_istitle (unicode_char_t c)
132 {
133   unsigned int i;
134   for (i = 0; i < asize (title_table); ++i)
135     if (title_table[i][0] == c)
136       return 1;
137   return 0;
138 }
139 
140 int
unicode_isxdigit(unicode_char_t c)141 unicode_isxdigit (unicode_char_t c)
142 {
143   int t = TYPE (c);
144   return ((c >= 'a' && c <= 'f')
145 	  || (c >= 'A' && c <= 'F')
146 	  || ISDIGIT (t));
147 }
148 
149 int
unicode_isdefined(unicode_char_t c)150 unicode_isdefined (unicode_char_t c)
151 {
152   int t = TYPE (c);
153   return t != UNICODE_UNASSIGNED;
154 }
155 
156 /* This function stolen from Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>.  */
157 int
unicode_iswide(unicode_char_t c)158 unicode_iswide (unicode_char_t c)
159 {
160   if (c < 0x1100)
161     return 0;
162 
163   return ((c >= 0x1100 && c <= 0x115f)	   /* Hangul Jamo */
164 	  || (c >= 0x2e80 && c <= 0xa4cf && (c & ~0x0011) != 0x300a &&
165 	      c != 0x303f)		   /* CJK ... Yi */
166 	  || (c >= 0xac00 && c <= 0xd7a3)  /* Hangul Syllables */
167 	  || (c >= 0xf900 && c <= 0xfaff)  /* CJK Compatibility Ideographs */
168 	  || (c >= 0xfe30 && c <= 0xfe6f)  /* CJK Compatibility Forms */
169 	  || (c >= 0xff00 && c <= 0xff5f)  /* Fullwidth Forms */
170 	  || (c >= 0xffe0 && c <= 0xffe6));
171 }
172 
173 unicode_char_t
unicode_toupper(unicode_char_t c)174 unicode_toupper (unicode_char_t c)
175 {
176   int t = TYPE (c);
177   if (t == UNICODE_LOWERCASE_LETTER)
178     return ATTTABLE (c >> 8, c & 0xff);
179   else if (t == UNICODE_TITLECASE_LETTER)
180     {
181       unsigned int i;
182       for (i = 0; i < asize (title_table); ++i)
183 	{
184 	  if (title_table[i][0] == c)
185 	    return title_table[i][1];
186 	}
187     }
188   return c;
189 }
190 
191 unicode_char_t
unicode_tolower(unicode_char_t c)192 unicode_tolower (unicode_char_t c)
193 {
194   int t = TYPE (c);
195   if (t == UNICODE_UPPERCASE_LETTER)
196     return ATTTABLE (c >> 8, c & 0xff);
197   else if (t == UNICODE_TITLECASE_LETTER)
198     {
199       unsigned int i;
200       for (i = 0; i < asize (title_table); ++i)
201 	{
202 	  if (title_table[i][0] == c)
203 	    return title_table[i][2];
204 	}
205     }
206   return c;
207 }
208 
209 unicode_char_t
unicode_totitle(unicode_char_t c)210 unicode_totitle (unicode_char_t c)
211 {
212   unsigned int i;
213   for (i = 0; i < asize (title_table); ++i)
214     {
215       if (title_table[i][0] == c || title_table[i][1] == c
216 	  || title_table[i][2] == c)
217 	return title_table[i][0];
218     }
219   return (TYPE (c) == UNICODE_LOWERCASE_LETTER
220 	  ? ATTTABLE (c >> 8, c & 0xff)
221 	  : c);
222 }
223 
224 int
unicode_digit_value(unicode_char_t c)225 unicode_digit_value (unicode_char_t c)
226 {
227   if (TYPE (c) == UNICODE_DECIMAL_NUMBER)
228     return ATTTABLE (c >> 8, c & 0xff);
229   return -1;
230 }
231 
232 int
unicode_xdigit_value(unicode_char_t c)233 unicode_xdigit_value (unicode_char_t c)
234 {
235   if (c >= 'A' && c <= 'F')
236     return c - 'A' + 1;
237   if (c >= 'a' && c <= 'f')
238     return c - 'a' + 1;
239   if (TYPE (c) == UNICODE_DECIMAL_NUMBER)
240     return ATTTABLE (c >> 8, c & 0xff);
241   return -1;
242 }
243 
244 int
unicode_type(unicode_char_t c)245 unicode_type (unicode_char_t c)
246 {
247   return TYPE (c);
248 }
249