1 /********************************************************************/
2 /* */
3 /* chr_rtl.c Primitive actions for the char type. */
4 /* Copyright (C) 1989 - 2016 Thomas Mertes */
5 /* 2015 Arkadiy Kuleshov */
6 /* */
7 /* This file is part of the Seed7 Runtime Library. */
8 /* */
9 /* The Seed7 Runtime Library is free software; you can */
10 /* redistribute it and/or modify it under the terms of the GNU */
11 /* Lesser General Public License as published by the Free Software */
12 /* Foundation; either version 2.1 of the License, or (at your */
13 /* option) any later version. */
14 /* */
15 /* The Seed7 Runtime Library is distributed in the hope that it */
16 /* will be useful, but WITHOUT ANY WARRANTY; without even the */
17 /* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR */
18 /* PURPOSE. See the GNU Lesser General Public License for more */
19 /* details. */
20 /* */
21 /* You should have received a copy of the GNU Lesser General */
22 /* Public License along with this program; if not, write to the */
23 /* Free Software Foundation, Inc., 51 Franklin Street, */
24 /* Fifth Floor, Boston, MA 02110-1301, USA. */
25 /* */
26 /* Module: Seed7 Runtime Library */
27 /* File: seed7/src/chr_rtl.c */
28 /* Changes: 1992, 1993, 1994, 2005, 2010 Thomas Mertes */
29 /* Content: Primitive actions for the char type. */
30 /* */
31 /********************************************************************/
32
33 #define LOG_FUNCTIONS 0
34 #define VERBOSE_EXCEPTIONS 0
35
36 #include "version.h"
37
38 #include "stdlib.h"
39 #include "stdio.h"
40 #include "string.h"
41
42 #include "common.h"
43 #include "data_rtl.h"
44 #include "heaputl.h"
45 #include "striutl.h"
46 #include "int_rtl.h"
47 #include "str_rtl.h"
48 #include "rtl_err.h"
49
50 #undef EXTERN
51 #define EXTERN
52 #include "chr_rtl.h"
53
54
55 static const uint64Type unicode_letters_data[] = {
56 /* 0x000000-0x0001ff | number of bits: 373 */
57 0x0000000000000000, 0x07fffffe07fffffe, 0x0420040000000000, 0xff7fffffff7fffff,
58 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
59 /* 0x000200-0x0003ff | number of bits: 301 */
60 0x007fffffffffffff, 0xffffffffffff0000, 0xffffffffffffffff, 0x0000401f0003ffc3,
61 0x0000000000000000, 0x0400000000000020, 0xfffffffbffffd740, 0x0fbfffffffff7fff,
62 /* 0x000400-0x0005ff | number of bits: 380 */
63 0xffffffffffffffff, 0xffffffffffffffff, 0xfffffffffffffc03, 0x033fffffffff7fff,
64 0xfffe00000000ffff, 0xfffffffe027fffff, 0xbbff0000000000ff, 0x000707ffffff0016,
65 /* 0x000600-0x0007ff | number of bits: 282 */
66 0x07fffffe003f0000, 0xffffc00000ffffff, 0xffffffffffffffff, 0x9c00e1fe1fefffff,
67 0xffffffffffff0000, 0x000000000000e000, 0x0003ffffffffffff, 0x0000000000000000,
68 /* 0x000800-0x0009ff | number of bits: 155 */
69 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
70 0xe3fffffffffffffe, 0x0000000fff011fff, 0xe3c5fdfffff99fee, 0x0003000fb080199f,
71 /* 0x000a00-0x000bff | number of bits: 251 */
72 0xc36dfdfffff987ee, 0x001f00005e001987, 0xe3edfdfffffbbfee, 0x0000000f00011bbf,
73 0xe3edfdfffff99fee, 0x00020003b0c0198f, 0xc3bfc718d63dc7ec, 0x0000000000801dc7,
74 /* 0x000c00-0x000dff | number of bits: 284 */
75 0xc3effdfffffddfee, 0x0000000300601ddf, 0xe3effdfffffddfec, 0x0000000340601ddf,
76 0xc3fffdfffffddfec, 0x0000000300801dcf, 0x2ffbfffffc7fffec, 0x000c0000ff5f807f,
77 /* 0x000e00-0x000fff | number of bits: 224 */
78 0x07fffffffffffffe, 0x000000000000207f, 0x3bffecaefef02596, 0x000000003000205f,
79 0x0000000000000001, 0xfffe07fffffffeff, 0x1ffffffffeff0f03, 0x0000000000000000,
80 /* 0x001000-0x0011ff | number of bits: 379 */
81 0x0147f6fbffffffff, 0x0000000003ff0000, 0xffffffff00000000, 0x01ffffffffff003f,
82 0xffffffffffffffff, 0xffffffff83ffffff, 0xffffff07ffffffff, 0x03ffffffffffffff,
83 /* 0x001200-0x0013ff | number of bits: 402 */
84 0xffffffffffffff7f, 0xffffffff3d7f3d7f, 0x7f3d7fffffff3d7f, 0xffff7fffff7f7f3d,
85 0xffffffff7f3d7fff, 0x0000000007ffff7f, 0xffffffff00000000, 0x001fffffffffffff,
86 /* 0x001400-0x0015ff | number of bits: 511 */
87 0xfffffffffffffffe, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
88 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
89 /* 0x001600-0x0017ff | number of bits: 371 */
90 0xffffffffffffffff, 0x007f9fffffffffff, 0xffffffff07fffffe, 0x0001c7ffffffffff,
91 0x000fffff000fdfff, 0x000ddfff000fffff, 0xffcfffffffffffff, 0x00000000108001ff,
92 /* 0x001800-0x0019ff | number of bits: 215 */
93 0xffffffff00000000, 0x00ffffffffffffff, 0x000003ffffffffff, 0x0000000000000000,
94 0x01ff0fff1fffffff, 0x001f3fffffff0000, 0x0000000000000000, 0x0000000000000000,
95 /* 0x001c00-0x001dff | number of bits: 108 */
96 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
97 0xffffffffffffffff, 0x00000fffffffffff, 0x0000000000000000, 0x0000000000000000,
98 /* 0x001e00-0x001fff | number of bits: 464 */
99 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffff0fffffff, 0x03ffffffffffffff,
100 0xffffffff3f3fffff, 0x3fffffffaaff3f3f, 0x5fdfffffffffffff, 0x1fdc1fff0fcf1fdc,
101 /* 0x002000-0x0021ff | number of bits: 81 */
102 0x0000000000000000, 0x8002000000000000, 0x0000000000000000, 0x0000000000000000,
103 0xe3fbbd503e2ffc84, 0xffffffff000003e0, 0x000000000000000f, 0x0000000000000000,
104 /* 0x003000-0x0031ff | number of bits: 379 */
105 0x1f3e03fe000000e0, 0xfffffffffffffffe, 0xfffffffee07fffff, 0xf7ffffffffffffff,
106 0xfffe1fffffffffe0, 0xffffffffffffffff, 0x00ffffff00007fff, 0xffff000000000000,
107 /* 0x004c00-0x004dff | number of bits: 438 */
108 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
109 0xffffffffffffffff, 0xffffffffffffffff, 0x003fffffffffffff, 0x0000000000000000,
110 /* 0x009e00-0x009fff | number of bits: 422 */
111 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
112 0xffffffffffffffff, 0xffffffffffffffff, 0x0000003fffffffff, 0x0000000000000000,
113 /* 0x00a400-0x00a5ff | number of bits: 141 */
114 0xffffffffffffffff, 0xffffffffffffffff, 0x0000000000001fff, 0x0000000000000000,
115 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
116 /* 0x00d600-0x00d7ff | number of bits: 420 */
117 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
118 0xffffffffffffffff, 0xffffffffffffffff, 0x0000000fffffffff, 0x0000000000000000,
119 /* 0x00f800-0x00f9ff | number of bits: 256 */
120 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
121 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
122 /* 0x00fa00-0x00fbff | number of bits: 305 */
123 0xffff3fffffffffff, 0x000007ffffffffff, 0x0000000000000000, 0x0000000000000000,
124 0x5f7ffdffe0f8007f, 0xffffffffffffffdb, 0x0003ffffffffffff, 0xfffffffffff80000,
125 /* 0x00fc00-0x00fdff | number of bits: 448 */
126 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
127 0x3fffffffffffffff, 0xffffffffffff0000, 0xfffffffffffcffff, 0x0fff0000000000ff,
128 /* 0x00fe00-0x00ffff | number of bits: 302 */
129 0x0000000000000000, 0xffdf000000000000, 0xffffffffffffffff, 0x1fffffffffffffff,
130 0x07fffffe00000000, 0xffffffc007fffffe, 0x7fffffffffffffff, 0x000000001cfcfcfc,
131 /* 0x010000-0x0101ff | number of bits: 211 */
132 0xb7ffff7fffffefff, 0x000000003fff3fff, 0xffffffffffffffff, 0x07ffffffffffffff,
133 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
134 /* 0x010200-0x0103ff | number of bits: 88 */
135 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
136 0xffff00007fffffff, 0x00000000000007ff, 0x000000003fffffff, 0x0000000000000000,
137 /* 0x010400-0x0105ff | number of bits: 158 */
138 0xffffffffffffffff, 0xffffffffffffffff, 0x000000003fffffff, 0x0000000000000000,
139 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
140 /* 0x010800-0x0109ff | number of bits: 55 */
141 0x91bffffffffffd3f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
142 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
143 /* 0x01d400-0x01d5ff | number of bits: 488 */
144 0xffffffffffffffff, 0xffffffffffdfffff, 0xebffde64dfffffff, 0xffffffffffffffef,
145 0x7bffffffdfdfe7bf, 0xfffffffffffdfc5f, 0xffffffffffffffff, 0xffffffffffffffff,
146 /* 0x01d600-0x01d7ff | number of bits: 444 */
147 0xffffffffffffffff, 0xffffffffffffffff, 0xffffff0fffffffff, 0xf7fffffff7fffffd,
148 0xffdfffffffdfffff, 0xffff7fffffff7fff, 0xfffffdfffffffdff, 0x00000000000003f7,
149 /* 0x02a600-0x02a7ff | number of bits: 215 */
150 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x00000000007fffff,
151 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
152 /* 0x02fa00-0x02fbff | number of bits: 30 */
153 0x000000003fffffff, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
154 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
155 };
156
157 static const signed char unicode_letters_ind[] = {
158 0, 1, 2, 3, 4, 5, 6, 7, /* 0x000000-0x000fff */
159 8, 9, 10, 11, 12, -1, 13, 14, /* 0x001000-0x001fff */
160 15, -1, -1, -1, -1, -1, -1, -1, /* 0x002000-0x002fff */
161 16, -1, -2, -2, -2, -2, -2, -2, /* 0x003000-0x003fff */
162 -2, -2, -2, -2, -2, -2, 17, -2, /* 0x004000-0x004fff */
163 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x005000-0x005fff */
164 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x006000-0x006fff */
165 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x007000-0x007fff */
166 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x008000-0x008fff */
167 -2, -2, -2, -2, -2, -2, -2, 18, /* 0x009000-0x009fff */
168 -2, -2, 19, -1, -1, -1, -2, -2, /* 0x00a000-0x00afff */
169 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x00b000-0x00bfff */
170 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x00c000-0x00cfff */
171 -2, -2, -2, 20, -1, -1, -1, -1, /* 0x00d000-0x00dfff */
172 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x00e000-0x00efff */
173 -1, -1, -1, -1, 21, 22, 23, 24, /* 0x00f000-0x00ffff */
174 25, 26, 27, -1, 28, -1, -1, -1, /* 0x010000-0x010fff */
175 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x011000-0x011fff */
176 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x012000-0x012fff */
177 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x013000-0x013fff */
178 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x014000-0x014fff */
179 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x015000-0x015fff */
180 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x016000-0x016fff */
181 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x017000-0x017fff */
182 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x018000-0x018fff */
183 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x019000-0x019fff */
184 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x01a000-0x01afff */
185 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x01b000-0x01bfff */
186 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x01c000-0x01cfff */
187 -1, -1, 29, 30, -1, -1, -1, -1, /* 0x01d000-0x01dfff */
188 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x01e000-0x01efff */
189 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x01f000-0x01ffff */
190 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x020000-0x020fff */
191 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x021000-0x021fff */
192 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x022000-0x022fff */
193 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x023000-0x023fff */
194 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x024000-0x024fff */
195 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x025000-0x025fff */
196 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x026000-0x026fff */
197 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x027000-0x027fff */
198 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x028000-0x028fff */
199 -2, -2, -2, -2, -2, -2, -2, -2, /* 0x029000-0x029fff */
200 -2, -2, -2, 31, -1, -1, -1, -1, /* 0x02a000-0x02afff */
201 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x02b000-0x02bfff */
202 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x02c000-0x02cfff */
203 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x02d000-0x02dfff */
204 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x02e000-0x02efff */
205 -1, -1, -1, -1, -2, 32 /* 0x02f000-0x02fbff */
206 };
207
208 /**
209 * Non-spacing attribute table.
210 * See PropList.txt, or grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt
211 * Control characters are also marked non-spacing here, because they are not
212 * printable.
213 */
214 static const uint64Type nonspacing_table_data[] = {
215 /* 0x000000-0x0001ff | number of bits: 65 */
216 0x00000000ffffffff, 0x8000000000000000, 0x00000000ffffffff, 0x0000000000000000,
217 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
218 /* 0x000200-0x0003ff | number of bits: 82 */
219 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
220 0xffffffffffffffff, 0x0000000700007fff, 0x0000000000000000, 0x0000000000000000,
221 /* 0x000400-0x0005ff | number of bits: 53 */
222 0x0000000000000000, 0x0000000000000000, 0x0000000000000378, 0x0000000000000000,
223 0x0000000000000000, 0x0000000000000000, 0xbbfffffbfffe0000, 0x0000000000000016,
224 /* 0x000600-0x0007ff | number of bits: 72 */
225 0x0000000000000000, 0x00010000003ff800, 0x0000000000000000, 0x00003d9fffc00000,
226 0xffff000000020000, 0x00000000000007ff, 0x0001ffc000000000, 0x0000000000000000,
227 /* 0x000800-0x0009ff | number of bits: 27 */
228 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
229 0x1000000000000006, 0x0000000c001e21fe, 0x1000000000000002, 0x0000000c0000201e,
230 /* 0x000a00-0x000bff | number of bits: 33 */
231 0x1000000000000004, 0x0003000000003986, 0x1000000000000006, 0x00000000000021be,
232 0x9000000000000002, 0x000000000040200e, 0x0000000000000004, 0x0000000000002001,
233 /* 0x000c00-0x000dff | number of bits: 25 */
234 0xc000000000000000, 0x0000000000603dc1, 0x8000000000000000, 0x0000000000003040,
235 0x0000000000000000, 0x000000000000200e, 0x0000000000000000, 0x00000000005c0400,
236 /* 0x000e00-0x000fff | number of bits: 102 */
237 0x07f2000000000000, 0x0000000000007f80, 0x1bf2000000000000, 0x0000000000003f00,
238 0x02a0000003000000, 0x7ffe000000000000, 0x1ffffffffeff00df, 0x0000000000000040,
239 /* 0x001000-0x0011ff | number of bits: 10 */
240 0x02c5e00000000000, 0x0000000003000000, 0x0000000000000000, 0x0000000000000000,
241 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
242 /* 0x001600-0x0017ff | number of bits: 19 */
243 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
244 0x0000000000000000, 0x0000000000000000, 0x3f80000000000000, 0x00000000000ffe40,
245 /* 0x001800-0x0019ff | number of bits: 1 */
246 0x0000000000000000, 0x0000000000000000, 0x0000020000000000, 0x0000000000000000,
247 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
248 /* 0x002000-0x0021ff | number of bits: 20 */
249 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000fffff0000,
250 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
251 /* 0x003000-0x0031ff | number of bits: 8 */
252 0x0000fc0000000000, 0x0000000000000000, 0x0000000006000000, 0x0000000000000000,
253 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
254 /* 0x00fa00-0x00fbff | number of bits: 1 */
255 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
256 0x0000000040000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
257 /* 0x00fe00-0x00ffff | number of bits: 4 */
258 0x0000000f00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
259 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
260 };
261
262 static const signed char nonspacing_table_ind[] = {
263 0, 1, 2, 3, 4, 5, 6, 7, /* 0x0000-0x0fff */
264 8, -1, -1, 9, 10, -1, -1, -1, /* 0x1000-0x1fff */
265 11, -1, -1, -1, -1, -1, -1, -1, /* 0x2000-0x2fff */
266 12, -1, -1, -1, -1, -1, -1, -1, /* 0x3000-0x3fff */
267 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x4000-0x4fff */
268 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x5000-0x5fff */
269 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x6000-0x6fff */
270 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x7000-0x7fff */
271 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x8000-0x8fff */
272 -1, -1, -1, -1, -1, -1, -1, -1, /* 0x9000-0x9fff */
273 -1, -1, -1, -1, -1, -1, -1, -1, /* 0xa000-0xafff */
274 -1, -1, -1, -1, -1, -1, -1, -1, /* 0xb000-0xbfff */
275 -1, -1, -1, -1, -1, -1, -1, -1, /* 0xc000-0xcfff */
276 -1, -1, -1, -1, -1, -1, -1, -1, /* 0xd000-0xdfff */
277 -1, -1, -1, -1, -1, -1, -1, -1, /* 0xe000-0xefff */
278 -1, -1, -1, -1, -1, 13, -1, 14 /* 0xf000-0xffff */
279 };
280
281 /**
282 * Sorted array of character indices marking transitions
283 * between East Asian single- or neutral width and double-width
284 */
285 static const charType east_asian_width[] = {
286 0x000000, 0x001101, 0x00115b, 0x001160, 0x001161, 0x00232a, 0x00232c, 0x002e81,
287 0x002e9b, 0x002e9c, 0x002ef5, 0x002f01, 0x002fd7, 0x002ff1, 0x002ffd, 0x003001,
288 0x003040, 0x003042, 0x003098, 0x00309a, 0x003101, 0x003106, 0x00312e, 0x003132,
289 0x003190, 0x003191, 0x0031b9, 0x0031f1, 0x003220, 0x003221, 0x003245, 0x003251,
290 0x00327f, 0x003280, 0x003300, 0x003301, 0x004db7, 0x004e01, 0x009fa7, 0x00a001,
291 0x00a48e, 0x00a491, 0x00a4c8, 0x00ac01, 0x00d7a5, 0x00f901, 0x00fa2f, 0x00fa31,
292 0x00fa6c, 0x00fe31, 0x00fe54, 0x00fe55, 0x00fe68, 0x00fe69, 0x00fe6d, 0x00ff02,
293 0x00ff62, 0x00ffe1, 0x00ffe8, 0x020001, 0x02ffff, 0x030001, 0x03ffff
294 };
295
296
297
is_nonspacing(charType ch)298 static inline boolType is_nonspacing (charType ch)
299
300 {
301 int ind;
302
303 /* is_nonspacing */
304 if (ch <= 0x00ffff) {
305 ind = nonspacing_table_ind[ch >> 9];
306 if (ind >= 0) {
307 return (boolType) (
308 (nonspacing_table_data[8 * (unsigned int) ind + ((ch >> 6) & 7)] >> (ch & 63)) & 1);
309 } /* if */
310 } else if (ch >= 0x10ffff) {
311 return TRUE;
312 } /* if */
313 return FALSE;
314 } /* is_nonspacing */
315
316
317
318 /**
319 * Do a binary search of the character index
320 * and assume double width is true for each odd block
321 * since first block has single or neutral width
322 * and the block properties are alternating
323 */
is_doublewidth(charType ch)324 static inline boolType is_doublewidth (charType ch)
325
326 {
327 int min = 0;
328 int mid = 0;
329 int max = sizeof(east_asian_width) / sizeof(charType) - 1;
330
331 /* is_doublewidth */
332 while (min <= max) {
333 mid = min + (max - min) / 2;
334 if (ch < east_asian_width[mid]) {
335 max = mid - 1;
336 } else if (ch > east_asian_width[mid]) {
337 min = mid + 1;
338 } else {
339 /* printf("mid for 0x%06x is %d\n", ch, mid); */
340 return (mid % 2 == 1) ? TRUE : FALSE;
341 } /* if */
342 } /* while */
343 /* printf("min for 0x%06x is %d\n", ch, min - 1); */
344 return ((min - 1) % 2 == 1) ? TRUE : FALSE;
345 } /* is_doublewidth */
346
347
348
chrCLit(charType character)349 striType chrCLit (charType character)
350
351 {
352 /* A string literal starts and ends with apostrophe ('): */
353 const memSizeType numOfApostrophes = 2;
354 memSizeType len;
355 striType result;
356
357 /* chrCLit */
358 logFunction(printf("chrCLit('\\" FMT_U32 ";')\n", character););
359 if (character < 127) {
360 if (character < ' ') {
361 len = strlen(cstri_escape_sequence[character]);
362 if (unlikely(!ALLOC_STRI_SIZE_OK(result, len + numOfApostrophes))) {
363 raise_error(MEMORY_ERROR);
364 } else {
365 result->size = len + numOfApostrophes;
366 result->mem[0] = '\'';
367 memcpy_to_strelem(&result->mem[1],
368 (const_ustriType) cstri_escape_sequence[character], len);
369 result->mem[len + 1] = '\'';
370 } /* if */
371 } else if (character == '\\' || character == '\'') {
372 if (unlikely(!ALLOC_STRI_SIZE_OK(result, 4))) {
373 raise_error(MEMORY_ERROR);
374 } else {
375 result->size = 4;
376 result->mem[0] = '\'';
377 result->mem[1] = (strElemType) '\\';
378 result->mem[2] = (strElemType) character;
379 result->mem[3] = '\'';
380 } /* if */
381 } else {
382 if (unlikely(!ALLOC_STRI_SIZE_OK(result, 3))) {
383 raise_error(MEMORY_ERROR);
384 } else {
385 result->size = 3;
386 result->mem[0] = '\'';
387 result->mem[1] = (strElemType) character;
388 result->mem[2] = '\'';
389 } /* if */
390 } /* if */
391 } else {
392 result = intStr((intType) character);
393 } /* if */
394 return result;
395 } /* chrCLit */
396
397
398
399 #if ALLOW_STRITYPE_SLICES
chrCLitToBuffer(charType character,striType buffer)400 striType chrCLitToBuffer (charType character, striType buffer)
401
402 { /* chrCLitToBuffer */
403 logFunction(printf("chrCLitToBuffer('\\" FMT_U32 ";')\n", character););
404 if (character < 127) {
405 buffer->mem = buffer->mem1;
406 buffer->mem1[0] = (strElemType) '\'';
407 if (character < ' ') {
408 buffer->mem1[1] = (strElemType) '\\';
409 if (cstri_escape_sequence[character][1] == '0') {
410 /* Always write three octal digits as strCLit does. */
411 buffer->mem1[2] = (strElemType) '0';
412 /* Write the character as two octal digits. */
413 /* This code is much faster than sprintf(). */
414 buffer->mem1[3] = (strElemType) ((character >> 3 & 0x7) + '0');
415 buffer->mem1[4] = (strElemType) ((character & 0x7) + '0');
416 buffer->mem1[5] = (strElemType) '\'';
417 buffer->size = 6;
418 } else {
419 buffer->mem1[2] = (strElemType) cstri_escape_sequence[character][1];
420 buffer->mem1[3] = (strElemType) '\'';
421 buffer->size = 4;
422 } /* if */
423 } else if (character == '\\' || character == '\'') {
424 buffer->mem1[1] = (strElemType) '\\';
425 buffer->mem1[2] = (strElemType) character;
426 buffer->mem1[3] = (strElemType) '\'';
427 buffer->size = 4;
428 } else {
429 buffer->mem1[1] = (strElemType) character;
430 buffer->mem1[2] = (strElemType) '\'';
431 buffer->size = 3;
432 } /* if */
433 } else {
434 (void) intStrToBuffer((intType) character, buffer);
435 } /* if */
436 return buffer;
437 } /* chrCLitToBuffer */
438 #endif
439
440
441
442 /**
443 * Compare two characters.
444 * @return -1, 0 or 1 if the first argument is considered to be
445 * respectively less than, equal to, or greater than the
446 * second.
447 */
chrCmp(charType char1,charType char2)448 intType chrCmp (charType char1, charType char2)
449
450 {
451 intType signumValue;
452
453 /* chrCmp */
454 if (char1 < char2) {
455 signumValue = -1;
456 } else {
457 signumValue = char1 > char2;
458 } /* if */
459 return signumValue;
460 } /* chrCmp */
461
462
463
464 /**
465 * Reinterpret the generic parameters as charType and call chrCmp.
466 * Function pointers in C programs generated by the Seed7 compiler
467 * may point to this function. This assures correct behaviour even
468 * if sizeof(genericType) != sizeof(charType).
469 */
chrCmpGeneric(const genericType value1,const genericType value2)470 intType chrCmpGeneric (const genericType value1, const genericType value2)
471
472 { /* chrCmpGeneric */
473 return chrCmp(((const_rtlObjectType *) &value1)->value.charValue,
474 ((const_rtlObjectType *) &value2)->value.charValue);
475 } /* chrCmpGeneric */
476
477
478
479 /**
480 * Reinterpret the generic parameters as charType and assign source to dest.
481 * Function pointers in C programs generated by the Seed7 compiler
482 * may point to this function. This assures correct behaviour even
483 * if sizeof(genericType) != sizeof(charType).
484 */
chrCpyGeneric(genericType * const dest,const genericType source)485 void chrCpyGeneric (genericType *const dest, const genericType source)
486
487 { /* chrCpyGeneric */
488 ((rtlObjectType *) dest)->value.charValue =
489 ((const_rtlObjectType *) &source)->value.charValue;
490 } /* chrCpyGeneric */
491
492
493
494 /**
495 * Check whether 'ch' is an alphabetic Unicode character.
496 * Uses identifier data table to look up a particular code point.
497 * The table includes many Unicode ranges listed below.
498 * @return TRUE if 'ch' is an alphabetic symbol,
499 * FALSE otherwise
500 */
chrIsLetter(charType ch)501 boolType chrIsLetter (charType ch)
502
503 {
504 int ind;
505
506 /* chrIsLetter */
507 if (ch <= 0x02fbff) {
508 ind = unicode_letters_ind[ch >> 9];
509 if (ind >= 0) {
510 return (boolType) (
511 (unicode_letters_data[8 * (unsigned int) ind + ((ch >> 6) & 7)] >> (ch & 63)) & 1);
512 } else {
513 return ~ind; /* -1 -> FALSE, -2 -> TRUE */
514 } /* if */
515 } /* if */
516 return FALSE;
517 } /* chrIsLetter */
518
519
520
521 /**
522 * Convert a character to lower case.
523 * The conversion uses the default Unicode case mapping,
524 * where each character is considered in isolation.
525 * Characters without case mapping are left unchanged.
526 * The mapping is independent from the locale. Individual
527 * character case mappings cannot be reversed, because some
528 * characters have multiple characters that map to them.
529 * @return the character converted to lower case.
530 */
chrLow(charType ch)531 charType chrLow (charType ch)
532
533 { /* chrLow */
534 toLower(&ch, 1, &ch);
535 return ch;
536 } /* chrLow */
537
538
539
540 /**
541 * Create a string with one character.
542 * @return a string with the character 'ch'.
543 */
chrStr(charType ch)544 striType chrStr (charType ch)
545
546 {
547 striType result;
548
549 /* chrStr */
550 if (unlikely(!ALLOC_STRI_SIZE_OK(result, (memSizeType) 1))) {
551 raise_error(MEMORY_ERROR);
552 return NULL;
553 } else {
554 result->size = 1;
555 result->mem[0] = (strElemType) ch;
556 return result;
557 } /* if */
558 } /* chrStr */
559
560
561
562 /**
563 * Convert a character to upper case.
564 * The conversion uses the default Unicode case mapping,
565 * where each character is considered in isolation.
566 * Characters without case mapping are left unchanged.
567 * The mapping is independent from the locale. Individual
568 * character case mappings cannot be reversed, because some
569 * characters have multiple characters that map to them.
570 * @return the character converted to upper case.
571 */
chrUp(charType ch)572 charType chrUp (charType ch)
573
574 { /* chrUp */
575 toUpper(&ch, 1, &ch);
576 return ch;
577 } /* chrUp */
578
579
580
581 /**
582 * Number of screen columns occupied by the Unicode character 'ch'.
583 * Non-spacing characters and control characters have width of 0.
584 * @return 0,1 or 2 depending on the width occupied on a terminal.
585 */
chrWidth(charType ch)586 intType chrWidth (charType ch)
587
588 { /* chrWidth */
589 if (is_nonspacing(ch)) {
590 return 0;
591 } else if (is_doublewidth(ch)) {
592 return 2;
593 } else {
594 return 1;
595 } /* if */
596 } /* chrWidth */
597