1 /********************************************************************/
2 /*                                                                  */
3 /*  chr_rtl.c     Primitive actions for the char type.              */
4 /*  Copyright (C) 1989 - 2016  Thomas Mertes                        */
5 /*                2015 Arkadiy Kuleshov                             */
6 /*                                                                  */
7 /*  This file is part of the Seed7 Runtime Library.                 */
8 /*                                                                  */
9 /*  The Seed7 Runtime Library is free software; you can             */
10 /*  redistribute it and/or modify it under the terms of the GNU     */
11 /*  Lesser General Public License as published by the Free Software */
12 /*  Foundation; either version 2.1 of the License, or (at your      */
13 /*  option) any later version.                                      */
14 /*                                                                  */
15 /*  The Seed7 Runtime Library is distributed in the hope that it    */
16 /*  will be useful, but WITHOUT ANY WARRANTY; without even the      */
17 /*  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR */
18 /*  PURPOSE.  See the GNU Lesser General Public License for more    */
19 /*  details.                                                        */
20 /*                                                                  */
21 /*  You should have received a copy of the GNU Lesser General       */
22 /*  Public License along with this program; if not, write to the    */
23 /*  Free Software Foundation, Inc., 51 Franklin Street,             */
24 /*  Fifth Floor, Boston, MA  02110-1301, USA.                       */
25 /*                                                                  */
26 /*  Module: Seed7 Runtime Library                                   */
27 /*  File: seed7/src/chr_rtl.c                                       */
28 /*  Changes: 1992, 1993, 1994, 2005, 2010  Thomas Mertes            */
29 /*  Content: Primitive actions for the char type.                   */
30 /*                                                                  */
31 /********************************************************************/
32 
33 #define LOG_FUNCTIONS 0
34 #define VERBOSE_EXCEPTIONS 0
35 
36 #include "version.h"
37 
38 #include "stdlib.h"
39 #include "stdio.h"
40 #include "string.h"
41 
42 #include "common.h"
43 #include "data_rtl.h"
44 #include "heaputl.h"
45 #include "striutl.h"
46 #include "int_rtl.h"
47 #include "str_rtl.h"
48 #include "rtl_err.h"
49 
50 #undef EXTERN
51 #define EXTERN
52 #include "chr_rtl.h"
53 
54 
55 static const uint64Type unicode_letters_data[] = {
56   /* 0x000000-0x0001ff | number of bits: 373 */
57   0x0000000000000000, 0x07fffffe07fffffe, 0x0420040000000000, 0xff7fffffff7fffff,
58   0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
59   /* 0x000200-0x0003ff | number of bits: 301 */
60   0x007fffffffffffff, 0xffffffffffff0000, 0xffffffffffffffff, 0x0000401f0003ffc3,
61   0x0000000000000000, 0x0400000000000020, 0xfffffffbffffd740, 0x0fbfffffffff7fff,
62   /* 0x000400-0x0005ff | number of bits: 380 */
63   0xffffffffffffffff, 0xffffffffffffffff, 0xfffffffffffffc03, 0x033fffffffff7fff,
64   0xfffe00000000ffff, 0xfffffffe027fffff, 0xbbff0000000000ff, 0x000707ffffff0016,
65   /* 0x000600-0x0007ff | number of bits: 282 */
66   0x07fffffe003f0000, 0xffffc00000ffffff, 0xffffffffffffffff, 0x9c00e1fe1fefffff,
67   0xffffffffffff0000, 0x000000000000e000, 0x0003ffffffffffff, 0x0000000000000000,
68   /* 0x000800-0x0009ff | number of bits: 155 */
69   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
70   0xe3fffffffffffffe, 0x0000000fff011fff, 0xe3c5fdfffff99fee, 0x0003000fb080199f,
71   /* 0x000a00-0x000bff | number of bits: 251 */
72   0xc36dfdfffff987ee, 0x001f00005e001987, 0xe3edfdfffffbbfee, 0x0000000f00011bbf,
73   0xe3edfdfffff99fee, 0x00020003b0c0198f, 0xc3bfc718d63dc7ec, 0x0000000000801dc7,
74   /* 0x000c00-0x000dff | number of bits: 284 */
75   0xc3effdfffffddfee, 0x0000000300601ddf, 0xe3effdfffffddfec, 0x0000000340601ddf,
76   0xc3fffdfffffddfec, 0x0000000300801dcf, 0x2ffbfffffc7fffec, 0x000c0000ff5f807f,
77   /* 0x000e00-0x000fff | number of bits: 224 */
78   0x07fffffffffffffe, 0x000000000000207f, 0x3bffecaefef02596, 0x000000003000205f,
79   0x0000000000000001, 0xfffe07fffffffeff, 0x1ffffffffeff0f03, 0x0000000000000000,
80   /* 0x001000-0x0011ff | number of bits: 379 */
81   0x0147f6fbffffffff, 0x0000000003ff0000, 0xffffffff00000000, 0x01ffffffffff003f,
82   0xffffffffffffffff, 0xffffffff83ffffff, 0xffffff07ffffffff, 0x03ffffffffffffff,
83   /* 0x001200-0x0013ff | number of bits: 402 */
84   0xffffffffffffff7f, 0xffffffff3d7f3d7f, 0x7f3d7fffffff3d7f, 0xffff7fffff7f7f3d,
85   0xffffffff7f3d7fff, 0x0000000007ffff7f, 0xffffffff00000000, 0x001fffffffffffff,
86   /* 0x001400-0x0015ff | number of bits: 511 */
87   0xfffffffffffffffe, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
88   0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
89   /* 0x001600-0x0017ff | number of bits: 371 */
90   0xffffffffffffffff, 0x007f9fffffffffff, 0xffffffff07fffffe, 0x0001c7ffffffffff,
91   0x000fffff000fdfff, 0x000ddfff000fffff, 0xffcfffffffffffff, 0x00000000108001ff,
92   /* 0x001800-0x0019ff | number of bits: 215 */
93   0xffffffff00000000, 0x00ffffffffffffff, 0x000003ffffffffff, 0x0000000000000000,
94   0x01ff0fff1fffffff, 0x001f3fffffff0000, 0x0000000000000000, 0x0000000000000000,
95   /* 0x001c00-0x001dff | number of bits: 108 */
96   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
97   0xffffffffffffffff, 0x00000fffffffffff, 0x0000000000000000, 0x0000000000000000,
98   /* 0x001e00-0x001fff | number of bits: 464 */
99   0xffffffffffffffff, 0xffffffffffffffff, 0xffffffff0fffffff, 0x03ffffffffffffff,
100   0xffffffff3f3fffff, 0x3fffffffaaff3f3f, 0x5fdfffffffffffff, 0x1fdc1fff0fcf1fdc,
101   /* 0x002000-0x0021ff | number of bits: 81 */
102   0x0000000000000000, 0x8002000000000000, 0x0000000000000000, 0x0000000000000000,
103   0xe3fbbd503e2ffc84, 0xffffffff000003e0, 0x000000000000000f, 0x0000000000000000,
104   /* 0x003000-0x0031ff | number of bits: 379 */
105   0x1f3e03fe000000e0, 0xfffffffffffffffe, 0xfffffffee07fffff, 0xf7ffffffffffffff,
106   0xfffe1fffffffffe0, 0xffffffffffffffff, 0x00ffffff00007fff, 0xffff000000000000,
107   /* 0x004c00-0x004dff | number of bits: 438 */
108   0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
109   0xffffffffffffffff, 0xffffffffffffffff, 0x003fffffffffffff, 0x0000000000000000,
110   /* 0x009e00-0x009fff | number of bits: 422 */
111   0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
112   0xffffffffffffffff, 0xffffffffffffffff, 0x0000003fffffffff, 0x0000000000000000,
113   /* 0x00a400-0x00a5ff | number of bits: 141 */
114   0xffffffffffffffff, 0xffffffffffffffff, 0x0000000000001fff, 0x0000000000000000,
115   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
116   /* 0x00d600-0x00d7ff | number of bits: 420 */
117   0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
118   0xffffffffffffffff, 0xffffffffffffffff, 0x0000000fffffffff, 0x0000000000000000,
119   /* 0x00f800-0x00f9ff | number of bits: 256 */
120   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
121   0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
122   /* 0x00fa00-0x00fbff | number of bits: 305 */
123   0xffff3fffffffffff, 0x000007ffffffffff, 0x0000000000000000, 0x0000000000000000,
124   0x5f7ffdffe0f8007f, 0xffffffffffffffdb, 0x0003ffffffffffff, 0xfffffffffff80000,
125   /* 0x00fc00-0x00fdff | number of bits: 448 */
126   0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff,
127   0x3fffffffffffffff, 0xffffffffffff0000, 0xfffffffffffcffff, 0x0fff0000000000ff,
128   /* 0x00fe00-0x00ffff | number of bits: 302 */
129   0x0000000000000000, 0xffdf000000000000, 0xffffffffffffffff, 0x1fffffffffffffff,
130   0x07fffffe00000000, 0xffffffc007fffffe, 0x7fffffffffffffff, 0x000000001cfcfcfc,
131   /* 0x010000-0x0101ff | number of bits: 211 */
132   0xb7ffff7fffffefff, 0x000000003fff3fff, 0xffffffffffffffff, 0x07ffffffffffffff,
133   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
134   /* 0x010200-0x0103ff | number of bits: 88 */
135   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
136   0xffff00007fffffff, 0x00000000000007ff, 0x000000003fffffff, 0x0000000000000000,
137   /* 0x010400-0x0105ff | number of bits: 158 */
138   0xffffffffffffffff, 0xffffffffffffffff, 0x000000003fffffff, 0x0000000000000000,
139   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
140   /* 0x010800-0x0109ff | number of bits: 55 */
141   0x91bffffffffffd3f, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
142   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
143   /* 0x01d400-0x01d5ff | number of bits: 488 */
144   0xffffffffffffffff, 0xffffffffffdfffff, 0xebffde64dfffffff, 0xffffffffffffffef,
145   0x7bffffffdfdfe7bf, 0xfffffffffffdfc5f, 0xffffffffffffffff, 0xffffffffffffffff,
146   /* 0x01d600-0x01d7ff | number of bits: 444 */
147   0xffffffffffffffff, 0xffffffffffffffff, 0xffffff0fffffffff, 0xf7fffffff7fffffd,
148   0xffdfffffffdfffff, 0xffff7fffffff7fff, 0xfffffdfffffffdff, 0x00000000000003f7,
149   /* 0x02a600-0x02a7ff | number of bits: 215 */
150   0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x00000000007fffff,
151   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
152   /* 0x02fa00-0x02fbff | number of bits: 30 */
153   0x000000003fffffff, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
154   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
155 };
156 
157 static const signed char unicode_letters_ind[] = {
158     0,   1,   2,   3,   4,   5,   6,   7, /* 0x000000-0x000fff */
159     8,   9,  10,  11,  12,  -1,  13,  14, /* 0x001000-0x001fff */
160    15,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x002000-0x002fff */
161    16,  -1,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x003000-0x003fff */
162    -2,  -2,  -2,  -2,  -2,  -2,  17,  -2, /* 0x004000-0x004fff */
163    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x005000-0x005fff */
164    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x006000-0x006fff */
165    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x007000-0x007fff */
166    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x008000-0x008fff */
167    -2,  -2,  -2,  -2,  -2,  -2,  -2,  18, /* 0x009000-0x009fff */
168    -2,  -2,  19,  -1,  -1,  -1,  -2,  -2, /* 0x00a000-0x00afff */
169    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x00b000-0x00bfff */
170    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x00c000-0x00cfff */
171    -2,  -2,  -2,  20,  -1,  -1,  -1,  -1, /* 0x00d000-0x00dfff */
172    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x00e000-0x00efff */
173    -1,  -1,  -1,  -1,  21,  22,  23,  24, /* 0x00f000-0x00ffff */
174    25,  26,  27,  -1,  28,  -1,  -1,  -1, /* 0x010000-0x010fff */
175    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x011000-0x011fff */
176    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x012000-0x012fff */
177    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x013000-0x013fff */
178    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x014000-0x014fff */
179    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x015000-0x015fff */
180    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x016000-0x016fff */
181    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x017000-0x017fff */
182    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x018000-0x018fff */
183    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x019000-0x019fff */
184    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x01a000-0x01afff */
185    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x01b000-0x01bfff */
186    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x01c000-0x01cfff */
187    -1,  -1,  29,  30,  -1,  -1,  -1,  -1, /* 0x01d000-0x01dfff */
188    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x01e000-0x01efff */
189    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x01f000-0x01ffff */
190    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x020000-0x020fff */
191    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x021000-0x021fff */
192    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x022000-0x022fff */
193    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x023000-0x023fff */
194    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x024000-0x024fff */
195    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x025000-0x025fff */
196    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x026000-0x026fff */
197    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x027000-0x027fff */
198    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x028000-0x028fff */
199    -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, /* 0x029000-0x029fff */
200    -2,  -2,  -2,  31,  -1,  -1,  -1,  -1, /* 0x02a000-0x02afff */
201    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x02b000-0x02bfff */
202    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x02c000-0x02cfff */
203    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x02d000-0x02dfff */
204    -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1, /* 0x02e000-0x02efff */
205    -1,  -1,  -1,  -1,  -2,  32            /* 0x02f000-0x02fbff */
206 };
207 
208 /**
209  *  Non-spacing attribute table.
210  *  See PropList.txt, or grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt
211  *  Control characters are also marked non-spacing here, because they are not
212  *  printable.
213  */
214 static const uint64Type nonspacing_table_data[] = {
215   /* 0x000000-0x0001ff | number of bits: 65 */
216   0x00000000ffffffff, 0x8000000000000000, 0x00000000ffffffff, 0x0000000000000000,
217   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
218   /* 0x000200-0x0003ff | number of bits: 82 */
219   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
220   0xffffffffffffffff, 0x0000000700007fff, 0x0000000000000000, 0x0000000000000000,
221   /* 0x000400-0x0005ff | number of bits: 53 */
222   0x0000000000000000, 0x0000000000000000, 0x0000000000000378, 0x0000000000000000,
223   0x0000000000000000, 0x0000000000000000, 0xbbfffffbfffe0000, 0x0000000000000016,
224   /* 0x000600-0x0007ff | number of bits: 72 */
225   0x0000000000000000, 0x00010000003ff800, 0x0000000000000000, 0x00003d9fffc00000,
226   0xffff000000020000, 0x00000000000007ff, 0x0001ffc000000000, 0x0000000000000000,
227   /* 0x000800-0x0009ff | number of bits: 27 */
228   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
229   0x1000000000000006, 0x0000000c001e21fe, 0x1000000000000002, 0x0000000c0000201e,
230   /* 0x000a00-0x000bff | number of bits: 33 */
231   0x1000000000000004, 0x0003000000003986, 0x1000000000000006, 0x00000000000021be,
232   0x9000000000000002, 0x000000000040200e, 0x0000000000000004, 0x0000000000002001,
233   /* 0x000c00-0x000dff | number of bits: 25 */
234   0xc000000000000000, 0x0000000000603dc1, 0x8000000000000000, 0x0000000000003040,
235   0x0000000000000000, 0x000000000000200e, 0x0000000000000000, 0x00000000005c0400,
236   /* 0x000e00-0x000fff | number of bits: 102 */
237   0x07f2000000000000, 0x0000000000007f80, 0x1bf2000000000000, 0x0000000000003f00,
238   0x02a0000003000000, 0x7ffe000000000000, 0x1ffffffffeff00df, 0x0000000000000040,
239   /* 0x001000-0x0011ff | number of bits: 10 */
240   0x02c5e00000000000, 0x0000000003000000, 0x0000000000000000, 0x0000000000000000,
241   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
242   /* 0x001600-0x0017ff | number of bits: 19 */
243   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
244   0x0000000000000000, 0x0000000000000000, 0x3f80000000000000, 0x00000000000ffe40,
245   /* 0x001800-0x0019ff | number of bits: 1 */
246   0x0000000000000000, 0x0000000000000000, 0x0000020000000000, 0x0000000000000000,
247   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
248   /* 0x002000-0x0021ff | number of bits: 20 */
249   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000fffff0000,
250   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
251   /* 0x003000-0x0031ff | number of bits: 8 */
252   0x0000fc0000000000, 0x0000000000000000, 0x0000000006000000, 0x0000000000000000,
253   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
254   /* 0x00fa00-0x00fbff | number of bits: 1 */
255   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
256   0x0000000040000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
257   /* 0x00fe00-0x00ffff | number of bits: 4 */
258   0x0000000f00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
259   0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
260 };
261 
262 static const signed char nonspacing_table_ind[] = {
263    0,  1,  2,  3,  4,  5,  6,  7, /* 0x0000-0x0fff */
264    8, -1, -1,  9, 10, -1, -1, -1, /* 0x1000-0x1fff */
265   11, -1, -1, -1, -1, -1, -1, -1, /* 0x2000-0x2fff */
266   12, -1, -1, -1, -1, -1, -1, -1, /* 0x3000-0x3fff */
267   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x4000-0x4fff */
268   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x5000-0x5fff */
269   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x6000-0x6fff */
270   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x7000-0x7fff */
271   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x8000-0x8fff */
272   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x9000-0x9fff */
273   -1, -1, -1, -1, -1, -1, -1, -1, /* 0xa000-0xafff */
274   -1, -1, -1, -1, -1, -1, -1, -1, /* 0xb000-0xbfff */
275   -1, -1, -1, -1, -1, -1, -1, -1, /* 0xc000-0xcfff */
276   -1, -1, -1, -1, -1, -1, -1, -1, /* 0xd000-0xdfff */
277   -1, -1, -1, -1, -1, -1, -1, -1, /* 0xe000-0xefff */
278   -1, -1, -1, -1, -1, 13, -1, 14  /* 0xf000-0xffff */
279 };
280 
281 /**
282  *  Sorted array of character indices marking transitions
283  *  between East Asian single- or neutral width and double-width
284  */
285 static const charType east_asian_width[] = {
286     0x000000, 0x001101, 0x00115b, 0x001160, 0x001161, 0x00232a, 0x00232c, 0x002e81,
287     0x002e9b, 0x002e9c, 0x002ef5, 0x002f01, 0x002fd7, 0x002ff1, 0x002ffd, 0x003001,
288     0x003040, 0x003042, 0x003098, 0x00309a, 0x003101, 0x003106, 0x00312e, 0x003132,
289     0x003190, 0x003191, 0x0031b9, 0x0031f1, 0x003220, 0x003221, 0x003245, 0x003251,
290     0x00327f, 0x003280, 0x003300, 0x003301, 0x004db7, 0x004e01, 0x009fa7, 0x00a001,
291     0x00a48e, 0x00a491, 0x00a4c8, 0x00ac01, 0x00d7a5, 0x00f901, 0x00fa2f, 0x00fa31,
292     0x00fa6c, 0x00fe31, 0x00fe54, 0x00fe55, 0x00fe68, 0x00fe69, 0x00fe6d, 0x00ff02,
293     0x00ff62, 0x00ffe1, 0x00ffe8, 0x020001, 0x02ffff, 0x030001, 0x03ffff
294 };
295 
296 
297 
is_nonspacing(charType ch)298 static inline boolType is_nonspacing (charType ch)
299 
300   {
301     int ind;
302 
303   /* is_nonspacing */
304     if (ch <= 0x00ffff) {
305       ind = nonspacing_table_ind[ch >> 9];
306       if (ind >= 0) {
307         return (boolType) (
308             (nonspacing_table_data[8 * (unsigned int) ind + ((ch >> 6) & 7)] >> (ch & 63)) & 1);
309       } /* if */
310     } else if (ch >= 0x10ffff) {
311       return TRUE;
312     } /* if */
313     return FALSE;
314   } /* is_nonspacing */
315 
316 
317 
318 /**
319  * Do a binary search of the character index
320  * and assume double width is true for each odd block
321  * since first block has single or neutral width
322  * and the block properties are alternating
323  */
is_doublewidth(charType ch)324 static inline boolType is_doublewidth (charType ch)
325 
326   {
327     int min = 0;
328     int mid = 0;
329     int max = sizeof(east_asian_width) / sizeof(charType) - 1;
330 
331   /* is_doublewidth */
332     while (min <= max) {
333       mid = min + (max - min) / 2;
334       if (ch < east_asian_width[mid]) {
335         max = mid - 1;
336       } else if (ch > east_asian_width[mid]) {
337         min = mid + 1;
338       } else {
339         /* printf("mid for 0x%06x is %d\n", ch, mid); */
340         return (mid % 2 == 1) ? TRUE : FALSE;
341       } /* if */
342     } /* while */
343     /* printf("min for 0x%06x is %d\n", ch, min - 1); */
344     return ((min - 1) % 2 == 1) ? TRUE : FALSE;
345   } /* is_doublewidth */
346 
347 
348 
chrCLit(charType character)349 striType chrCLit (charType character)
350 
351   {
352     /* A string literal starts and ends with apostrophe ('): */
353     const memSizeType numOfApostrophes = 2;
354     memSizeType len;
355     striType result;
356 
357   /* chrCLit */
358     logFunction(printf("chrCLit('\\" FMT_U32 ";')\n", character););
359     if (character < 127) {
360       if (character < ' ') {
361         len = strlen(cstri_escape_sequence[character]);
362         if (unlikely(!ALLOC_STRI_SIZE_OK(result, len + numOfApostrophes))) {
363           raise_error(MEMORY_ERROR);
364         } else {
365           result->size = len + numOfApostrophes;
366           result->mem[0] = '\'';
367           memcpy_to_strelem(&result->mem[1],
368               (const_ustriType) cstri_escape_sequence[character], len);
369           result->mem[len + 1] = '\'';
370         } /* if */
371       } else if (character == '\\' || character == '\'') {
372         if (unlikely(!ALLOC_STRI_SIZE_OK(result, 4))) {
373           raise_error(MEMORY_ERROR);
374         } else {
375           result->size = 4;
376           result->mem[0] = '\'';
377           result->mem[1] = (strElemType) '\\';
378           result->mem[2] = (strElemType) character;
379           result->mem[3] = '\'';
380         } /* if */
381       } else {
382         if (unlikely(!ALLOC_STRI_SIZE_OK(result, 3))) {
383           raise_error(MEMORY_ERROR);
384         } else {
385           result->size = 3;
386           result->mem[0] = '\'';
387           result->mem[1] = (strElemType) character;
388           result->mem[2] = '\'';
389         } /* if */
390       } /* if */
391     } else {
392       result = intStr((intType) character);
393     } /* if */
394     return result;
395   } /* chrCLit */
396 
397 
398 
399 #if ALLOW_STRITYPE_SLICES
chrCLitToBuffer(charType character,striType buffer)400 striType chrCLitToBuffer (charType character, striType buffer)
401 
402   { /* chrCLitToBuffer */
403     logFunction(printf("chrCLitToBuffer('\\" FMT_U32 ";')\n", character););
404     if (character < 127) {
405       buffer->mem = buffer->mem1;
406       buffer->mem1[0] = (strElemType) '\'';
407       if (character < ' ') {
408         buffer->mem1[1] = (strElemType) '\\';
409         if (cstri_escape_sequence[character][1] == '0') {
410           /* Always write three octal digits as strCLit does. */
411           buffer->mem1[2] = (strElemType) '0';
412           /* Write the character as two octal digits. */
413           /* This code is much faster than sprintf(). */
414           buffer->mem1[3] = (strElemType) ((character >> 3 & 0x7) + '0');
415           buffer->mem1[4] = (strElemType) ((character      & 0x7) + '0');
416           buffer->mem1[5] = (strElemType) '\'';
417           buffer->size = 6;
418         } else {
419           buffer->mem1[2] = (strElemType) cstri_escape_sequence[character][1];
420           buffer->mem1[3] = (strElemType) '\'';
421           buffer->size = 4;
422         } /* if */
423       } else if (character == '\\' || character == '\'') {
424         buffer->mem1[1] = (strElemType) '\\';
425         buffer->mem1[2] = (strElemType) character;
426         buffer->mem1[3] = (strElemType) '\'';
427         buffer->size = 4;
428       } else {
429         buffer->mem1[1] = (strElemType) character;
430         buffer->mem1[2] = (strElemType) '\'';
431         buffer->size = 3;
432       } /* if */
433     } else {
434       (void) intStrToBuffer((intType) character, buffer);
435     } /* if */
436     return buffer;
437   } /* chrCLitToBuffer */
438 #endif
439 
440 
441 
442 /**
443  *  Compare two characters.
444  *  @return -1, 0 or 1 if the first argument is considered to be
445  *          respectively less than, equal to, or greater than the
446  *          second.
447  */
chrCmp(charType char1,charType char2)448 intType chrCmp (charType char1, charType char2)
449 
450   {
451     intType signumValue;
452 
453   /* chrCmp */
454     if (char1 < char2) {
455       signumValue = -1;
456     } else {
457       signumValue = char1 > char2;
458     } /* if */
459     return signumValue;
460   } /* chrCmp */
461 
462 
463 
464 /**
465  *  Reinterpret the generic parameters as charType and call chrCmp.
466  *  Function pointers in C programs generated by the Seed7 compiler
467  *  may point to this function. This assures correct behaviour even
468  *  if sizeof(genericType) != sizeof(charType).
469  */
chrCmpGeneric(const genericType value1,const genericType value2)470 intType chrCmpGeneric (const genericType value1, const genericType value2)
471 
472   { /* chrCmpGeneric */
473     return chrCmp(((const_rtlObjectType *) &value1)->value.charValue,
474                   ((const_rtlObjectType *) &value2)->value.charValue);
475   } /* chrCmpGeneric */
476 
477 
478 
479 /**
480  *  Reinterpret the generic parameters as charType and assign source to dest.
481  *  Function pointers in C programs generated by the Seed7 compiler
482  *  may point to this function. This assures correct behaviour even
483  *  if sizeof(genericType) != sizeof(charType).
484  */
chrCpyGeneric(genericType * const dest,const genericType source)485 void chrCpyGeneric (genericType *const dest, const genericType source)
486 
487   { /* chrCpyGeneric */
488     ((rtlObjectType *) dest)->value.charValue =
489         ((const_rtlObjectType *) &source)->value.charValue;
490   } /* chrCpyGeneric */
491 
492 
493 
494 /**
495  *  Check whether 'ch' is an alphabetic Unicode character.
496  *  Uses identifier data table to look up a particular code point.
497  *  The table includes many Unicode ranges listed below.
498  *  @return TRUE if 'ch' is an alphabetic symbol,
499  *          FALSE otherwise
500  */
chrIsLetter(charType ch)501 boolType chrIsLetter (charType ch)
502 
503   {
504     int ind;
505 
506   /* chrIsLetter */
507     if (ch <= 0x02fbff) {
508       ind = unicode_letters_ind[ch >> 9];
509       if (ind >= 0) {
510         return (boolType) (
511             (unicode_letters_data[8 * (unsigned int) ind + ((ch >> 6) & 7)] >> (ch & 63)) & 1);
512       } else {
513         return ~ind; /* -1 -> FALSE, -2 -> TRUE */
514       } /* if */
515     } /* if */
516     return FALSE;
517   } /* chrIsLetter */
518 
519 
520 
521 /**
522  *  Convert a character to lower case.
523  *  The conversion uses the default Unicode case mapping,
524  *  where each character is considered in isolation.
525  *  Characters without case mapping are left unchanged.
526  *  The mapping is independent from the locale. Individual
527  *  character case mappings cannot be reversed, because some
528  *  characters have multiple characters that map to them.
529  *  @return the character converted to lower case.
530  */
chrLow(charType ch)531 charType chrLow (charType ch)
532 
533   { /* chrLow */
534     toLower(&ch, 1, &ch);
535     return ch;
536   } /* chrLow */
537 
538 
539 
540 /**
541  *  Create a string with one character.
542  *  @return a string with the character 'ch'.
543  */
chrStr(charType ch)544 striType chrStr (charType ch)
545 
546   {
547     striType result;
548 
549   /* chrStr */
550     if (unlikely(!ALLOC_STRI_SIZE_OK(result, (memSizeType) 1))) {
551       raise_error(MEMORY_ERROR);
552       return NULL;
553     } else {
554       result->size = 1;
555       result->mem[0] = (strElemType) ch;
556       return result;
557     } /* if */
558   } /* chrStr */
559 
560 
561 
562 /**
563  *  Convert a character to upper case.
564  *  The conversion uses the default Unicode case mapping,
565  *  where each character is considered in isolation.
566  *  Characters without case mapping are left unchanged.
567  *  The mapping is independent from the locale. Individual
568  *  character case mappings cannot be reversed, because some
569  *  characters have multiple characters that map to them.
570  *  @return the character converted to upper case.
571  */
chrUp(charType ch)572 charType chrUp (charType ch)
573 
574   { /* chrUp */
575     toUpper(&ch, 1, &ch);
576     return ch;
577   } /* chrUp */
578 
579 
580 
581 /**
582  *  Number of screen columns occupied by the Unicode character 'ch'.
583  *  Non-spacing characters and control characters have width of 0.
584  *  @return 0,1 or 2 depending on the width occupied on a terminal.
585  */
chrWidth(charType ch)586 intType chrWidth (charType ch)
587 
588   { /* chrWidth */
589     if (is_nonspacing(ch)) {
590       return 0;
591     } else if (is_doublewidth(ch)) {
592       return 2;
593     } else {
594       return 1;
595     } /* if */
596   } /* chrWidth */
597