1 // This is free and unencumbered software released into the public domain.
2 //
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
6 // means.
7 //
8 // In jurisdictions that recognize copyright laws, the author or authors
9 // of this software dedicate any and all copyright interest in the
10 // software to the public domain. We make this dedication for the benefit
11 // of the public at large and to the detriment of our heirs and
12 // successors. We intend this dedication to be an overt act of
13 // relinquishment in perpetuity of all present and future rights to this
14 // software under copyright law.
15 //
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 // OTHER DEALINGS IN THE SOFTWARE.
23 //
24 // For more information, please refer to <http://unlicense.org/>
25
26 // include the unit testing framework
27 #include "utest.h"
28
29 // include the header we are testing
30 #include "utf8.h"
31
32 const char data[] = {
33 '\xce', '\x93', '\xce', '\xb1', '\xce', '\xb6', '\xce', '\xad', '\xce',
34 '\xb5', '\xcf', '\x82', '\x20', '\xce', '\xba', '\xce', '\xb1', '\xe1',
35 '\xbd', '\xb6', '\x20', '\xce', '\xbc', '\xcf', '\x85', '\xcf', '\x81',
36 '\xcf', '\x84', '\xce', '\xb9', '\xe1', '\xbd', '\xb2', '\xcf', '\x82',
37 '\x20', '\xce', '\xb4', '\xe1', '\xbd', '\xb2', '\xce', '\xbd', '\x20',
38 '\xce', '\xb8', '\xe1', '\xbd', '\xb0', '\x20', '\xce', '\xb2', '\xcf',
39 '\x81', '\xe1', '\xbf', '\xb6', '\x20', '\xcf', '\x80', '\xce', '\xb9',
40 '\xe1', '\xbd', '\xb0', '\x20', '\xcf', '\x83', '\xcf', '\x84', '\xe1',
41 '\xbd', '\xb8', '\x20', '\xcf', '\x87', '\xcf', '\x81', '\xcf', '\x85',
42 '\xcf', '\x83', '\xce', '\xb1', '\xcf', '\x86', '\xe1', '\xbd', '\xb6',
43 '\x20', '\xce', '\xbe', '\xce', '\xad', '\xcf', '\x86', '\xcf', '\x89',
44 '\xcf', '\x84', '\xce', '\xbf', '\x0a', '\0'};
45
46 const char cmp[] = {'\xce', '\xbc', '\xcf', '\x85', '\0'};
47
48 const char lt[] = {'\xce', '\x93', '\xce', '\xb1', '\xce',
49 '\xb6', '\xce', '\xac', '\0'};
50
51 const char gt[] = {'\xce', '\x93', '\xce', '\xb1', '\xce',
52 '\xb6', '\xce', '\xae', '\0'};
53
54 const char spn[] = {'\xce', '\x93', '\xce', '\xb1', '\xce', '\xb6',
55 '\xce', '\xad', '\xce', '\xb5', '\xcf', '\x82',
56 '\x20', '\xce', '\xba', '\0'};
57
58 const char pbrk[] = {'\xcf', '\x82', '\x20', '\xce', '\xb5', '\0'};
59
60 const char ascii1[] = "I lIke GOATS YARHAR.";
61 const char ascii2[] = "i LIKE goats yarHAR.";
62 const char allascii1[] = "abcdefghijklmnopqrstuvwyzABCDEFGHIJKLMNOPQRSTUVWYZ";
63 const char allascii2[] = "ABCDEFGHIJKLMNOPQRSTUVWYZabcdefghijklmnopqrstuvwyz";
64 const char haystack[] = "foobar";
65 const char needle[] = "oba";
66 const char endfailneedle[] = "ra";
67
68 struct LowerUpperPair {
69 int lower;
70 int upper;
71 };
72
73 const struct LowerUpperPair lowupPairs[] = {
74 /* ascii */
75 {0x0061, 0x0041},
76 {0x0062, 0x0042},
77 {0x0063, 0x0043},
78 {0x0064, 0x0044},
79 {0x0065, 0x0045},
80 {0x0066, 0x0046},
81 {0x0067, 0x0047},
82 {0x0068, 0x0048},
83 {0x0069, 0x0049},
84 {0x006a, 0x004a},
85 {0x006b, 0x004b},
86 {0x006c, 0x004c},
87 {0x006d, 0x004d},
88 {0x006e, 0x004e},
89 {0x006f, 0x004f},
90 {0x0070, 0x0050},
91 {0x0071, 0x0051},
92 {0x0072, 0x0052},
93 {0x0073, 0x0053},
94 {0x0074, 0x0054},
95 {0x0075, 0x0055},
96 {0x0076, 0x0056},
97 {0x0077, 0x0057},
98 {0x0078, 0x0058},
99 {0x0079, 0x0059},
100 {0x007a, 0x005a},
101
102 /* Latin-1 Supplement */
103 {0x00e0, 0x00c0},
104 {0x00e1, 0x00c1},
105 {0x00e2, 0x00c2},
106 {0x00e3, 0x00c3},
107 {0x00e4, 0x00c4},
108 {0x00e5, 0x00c5},
109 {0x00e6, 0x00c6},
110 {0x00e7, 0x00c7},
111 {0x00e8, 0x00c8},
112 {0x00e9, 0x00c9},
113 {0x00ea, 0x00ca},
114 {0x00eb, 0x00cb},
115 {0x00ec, 0x00cc},
116 {0x00ed, 0x00cd},
117 {0x00ee, 0x00ce},
118 {0x00ef, 0x00cf},
119 {0x00f0, 0x00d0},
120 {0x00f1, 0x00d1},
121 {0x00f2, 0x00d2},
122 {0x00f3, 0x00d3},
123 {0x00f4, 0x00d4},
124 {0x00f5, 0x00d5},
125 {0x00f6, 0x00d6},
126 {0x00f8, 0x00d8},
127 {0x00f9, 0x00d9},
128 {0x00fa, 0x00da},
129 {0x00fb, 0x00db},
130 {0x00fc, 0x00dc},
131 {0x00fd, 0x00dd},
132 {0x00fe, 0x00de},
133 {0x00ff, 0x0178},
134
135 /* Latin Extended-A */
136 {0x0101, 0x0100},
137 {0x0103, 0x0102},
138 {0x0105, 0x0104},
139 {0x0107, 0x0106},
140 {0x0109, 0x0108},
141 {0x010b, 0x010a},
142 {0x010d, 0x010c},
143 {0x010f, 0x010e},
144 {0x0111, 0x0110},
145 {0x0113, 0x0112},
146 {0x0115, 0x0114},
147 {0x0117, 0x0116},
148 {0x0119, 0x0118},
149 {0x011b, 0x011a},
150 {0x011d, 0x011c},
151 {0x011f, 0x011e},
152 {0x0121, 0x0120},
153 {0x0123, 0x0122},
154 {0x0125, 0x0124},
155 {0x0127, 0x0126},
156 {0x0129, 0x0128},
157 {0x012b, 0x012a},
158 {0x012d, 0x012c},
159 {0x012f, 0x012e},
160 {0x0133, 0x0132},
161 {0x0135, 0x0134},
162 {0x0137, 0x0136},
163 {0x013a, 0x0139},
164 {0x013c, 0x013b},
165 {0x013e, 0x013d},
166 {0x0140, 0x013f},
167 {0x0142, 0x0141},
168 {0x0144, 0x0143},
169 {0x0146, 0x0145},
170 {0x0148, 0x0147},
171 {0x014b, 0x014a},
172 {0x014d, 0x014c},
173 {0x014f, 0x014e},
174 {0x0151, 0x0150},
175 {0x0153, 0x0152},
176 {0x0155, 0x0154},
177 {0x0157, 0x0156},
178 {0x0159, 0x0158},
179 {0x015b, 0x015a},
180 {0x015d, 0x015c},
181 {0x015f, 0x015e},
182 {0x0161, 0x0160},
183 {0x0163, 0x0162},
184 {0x0165, 0x0164},
185 {0x0167, 0x0166},
186 {0x0169, 0x0168},
187 {0x016b, 0x016a},
188 {0x016d, 0x016c},
189 {0x016f, 0x016e},
190 {0x0171, 0x0170},
191 {0x0173, 0x0172},
192 {0x0175, 0x0174},
193 {0x0177, 0x0176},
194 {0x017a, 0x0179},
195 {0x017c, 0x017b},
196 {0x017e, 0x017d},
197
198 /* Latin Extended-B */
199 {0x0180, 0x0243},
200 {0x01dd, 0x018e},
201 {0x019a, 0x023d},
202 {0x019e, 0x0220},
203 {0x0292, 0x01b7},
204 {0x01c6, 0x01c4},
205 {0x01c9, 0x01c7},
206 {0x01cc, 0x01ca},
207 {0x01f3, 0x01f1},
208 {0x01bf, 0x01f7},
209 {0x0183, 0x0182},
210 {0x0185, 0x0184},
211 {0x0188, 0x0187},
212 {0x018c, 0x018b},
213 {0x0192, 0x0191},
214 {0x0199, 0x0198},
215 {0x01a1, 0x01a0},
216 {0x01a3, 0x01a2},
217 {0x01a5, 0x01a4},
218 {0x01a8, 0x01a7},
219 {0x01ad, 0x01ac},
220 {0x01b0, 0x01af},
221 {0x01b4, 0x01b3},
222 {0x01b6, 0x01b5},
223 {0x01b9, 0x01b8},
224 {0x01bd, 0x01bc},
225 {0x01ce, 0x01cd},
226 {0x01d0, 0x01cf},
227 {0x01d2, 0x01d1},
228 {0x01d4, 0x01d3},
229 {0x01d6, 0x01d5},
230 {0x01d8, 0x01d7},
231 {0x01da, 0x01d9},
232 {0x01dc, 0x01db},
233 {0x01df, 0x01de},
234 {0x01e1, 0x01e0},
235 {0x01e3, 0x01e2},
236 {0x01e5, 0x01e4},
237 {0x01e7, 0x01e6},
238 {0x01e9, 0x01e8},
239 {0x01eb, 0x01ea},
240 {0x01ed, 0x01ec},
241 {0x01ef, 0x01ee},
242 {0x01f5, 0x01f4},
243 {0x01f9, 0x01f8},
244 {0x01fb, 0x01fa},
245 {0x01fd, 0x01fc},
246 {0x01ff, 0x01fe},
247 {0x0201, 0x0200},
248 {0x0203, 0x0202},
249 {0x0205, 0x0204},
250 {0x0207, 0x0206},
251 {0x0209, 0x0208},
252 {0x020b, 0x020a},
253 {0x020d, 0x020c},
254 {0x020f, 0x020e},
255 {0x0211, 0x0210},
256 {0x0213, 0x0212},
257 {0x0215, 0x0214},
258 {0x0217, 0x0216},
259 {0x0219, 0x0218},
260 {0x021b, 0x021a},
261 {0x021d, 0x021c},
262 {0x021f, 0x021e},
263 {0x0223, 0x0222},
264 {0x0225, 0x0224},
265 {0x0227, 0x0226},
266 {0x0229, 0x0228},
267 {0x022b, 0x022a},
268 {0x022d, 0x022c},
269 {0x022f, 0x022e},
270 {0x0231, 0x0230},
271 {0x0233, 0x0232},
272 {0x023c, 0x023b},
273 {0x0242, 0x0241},
274 {0x0247, 0x0246},
275 {0x0249, 0x0248},
276 {0x024b, 0x024a},
277 {0x024d, 0x024c},
278 {0x024f, 0x024e},
279
280 /* Greek and Coptic */
281 {0x037b, 0x03fd},
282 {0x037c, 0x03fe},
283 {0x037d, 0x03ff},
284
285 {0x03f3, 0x037f},
286 {0x03ac, 0x0386},
287
288 {0x03ad, 0x0388},
289 {0x03ae, 0x0389},
290 {0x03af, 0x038a},
291
292 {0x03cc, 0x038c},
293
294 {0x03cd, 0x038e},
295 {0x03ce, 0x038f},
296
297 {0x0371, 0x0370},
298 {0x0373, 0x0372},
299 {0x0377, 0x0376},
300
301 {0x03B1, 0x0391},
302 {0x03B2, 0x0392},
303 {0x03B3, 0x0393},
304 {0x03B4, 0x0394},
305 {0x03B5, 0x0395},
306 {0x03B6, 0x0396},
307 {0x03B7, 0x0397},
308 {0x03B8, 0x0398},
309 {0x03B9, 0x0399},
310 {0x03BA, 0x039A},
311 {0x03BB, 0x039B},
312 {0x03BC, 0x039C},
313 {0x03BD, 0x039D},
314 {0x03BE, 0x039E},
315 {0x03BF, 0x039F},
316 {0x03C0, 0x03A0},
317 {0x03C1, 0x03A1},
318
319 {0x03C3, 0x03A3},
320 {0x03C4, 0x03A4},
321 {0x03C5, 0x03A5},
322 {0x03C6, 0x03A6},
323 {0x03C7, 0x03A7},
324 {0x03C8, 0x03A8},
325 {0x03C9, 0x03A9},
326 {0x03ca, 0x03aa},
327 {0x03cb, 0x03ab},
328
329 {0x03d1, 0x03f4},
330
331 {0x03d7, 0x03cf},
332
333 {0x03d9, 0x03d8},
334 {0x03db, 0x03da},
335 {0x03dd, 0x03dc},
336 {0x03df, 0x03de},
337 {0x03e1, 0x03e0},
338 {0x03e3, 0x03e2},
339 {0x03e5, 0x03e4},
340 {0x03e7, 0x03e6},
341 {0x03e9, 0x03e8},
342 {0x03eb, 0x03ea},
343 {0x03ed, 0x03ec},
344 {0x03ef, 0x03ee},
345
346 {0x03f2, 0x03f9},
347
348 {0x03f8, 0x03f7},
349
350 {0x03fb, 0x03fa},
351
352 // End of array marker
353 {0, 0}};
354
355 const char lowersStr[] = {
356 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', '\x68', '\x69',
357 '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', '\x70', '\x71', '\x72',
358 '\x73', '\x74', '\x75', '\x76', '\x77', '\x78', '\x79', '\x7a', '\xc3',
359 '\xa0', '\xc3', '\xa1', '\xc3', '\xa2', '\xc3', '\xa3', '\xc3', '\xa4',
360 '\xc3', '\xa5', '\xc3', '\xa6', '\xc3', '\xa7', '\xc3', '\xa8', '\xc3',
361 '\xa9', '\xc3', '\xaa', '\xc3', '\xab', '\xc3', '\xac', '\xc3', '\xad',
362 '\xc3', '\xae', '\xc3', '\xaf', '\xc3', '\xb0', '\xc3', '\xb1', '\xc3',
363 '\xb2', '\xc3', '\xb3', '\xc3', '\xb4', '\xc3', '\xb5', '\xc3', '\xb6',
364 '\xc3', '\xb8', '\xc3', '\xb9', '\xc3', '\xba', '\xc3', '\xbb', '\xc3',
365 '\xbc', '\xc3', '\xbd', '\xc3', '\xbe', '\xc3', '\xbf', '\xc4', '\x81',
366 '\xc4', '\x83', '\xc4', '\x85', '\xc4', '\x87', '\xc4', '\x89', '\xc4',
367 '\x8b', '\xc4', '\x8d', '\xc4', '\x8f', '\xc4', '\x91', '\xc4', '\x93',
368 '\xc4', '\x95', '\xc4', '\x97', '\xc4', '\x99', '\xc4', '\x9b', '\xc4',
369 '\x9d', '\xc4', '\x9f', '\xc4', '\xa1', '\xc4', '\xa3', '\xc4', '\xa5',
370 '\xc4', '\xa7', '\xc4', '\xa9', '\xc4', '\xab', '\xc4', '\xad', '\xc4',
371 '\xaf', '\xc4', '\xb3', '\xc4', '\xb5', '\xc4', '\xb7', '\xc4', '\xba',
372 '\xc4', '\xbc', '\xc4', '\xbe', '\xc5', '\x80', '\xc5', '\x82', '\xc5',
373 '\x84', '\xc5', '\x86', '\xc5', '\x88', '\xc5', '\x8b', '\xc5', '\x8d',
374 '\xc5', '\x8f', '\xc5', '\x91', '\xc5', '\x93', '\xc5', '\x95', '\xc5',
375 '\x97', '\xc5', '\x99', '\xc5', '\x9b', '\xc5', '\x9d', '\xc5', '\x9f',
376 '\xc5', '\xa1', '\xc5', '\xa3', '\xc5', '\xa5', '\xc5', '\xa7', '\xc5',
377 '\xa9', '\xc5', '\xab', '\xc5', '\xad', '\xc5', '\xaf', '\xc5', '\xb1',
378 '\xc5', '\xb3', '\xc5', '\xb5', '\xc5', '\xb7', '\xc5', '\xba', '\xc5',
379 '\xbc', '\xc5', '\xbe', '\xc6', '\x80', '\xc7', '\x9d', '\xc6', '\x9a',
380 '\xc6', '\x9e', '\xca', '\x92', '\xc7', '\x86', '\xc7', '\x89', '\xc7',
381 '\x8c', '\xc7', '\xb3', '\xc6', '\xbf', '\xc6', '\x83', '\xc6', '\x85',
382 '\xc6', '\x88', '\xc6', '\x8c', '\xc6', '\x92', '\xc6', '\x99', '\xc6',
383 '\xa1', '\xc6', '\xa3', '\xc6', '\xa5', '\xc6', '\xa8', '\xc6', '\xad',
384 '\xc6', '\xb0', '\xc6', '\xb4', '\xc6', '\xb6', '\xc6', '\xb9', '\xc6',
385 '\xbd', '\xc7', '\x8e', '\xc7', '\x90', '\xc7', '\x92', '\xc7', '\x94',
386 '\xc7', '\x96', '\xc7', '\x98', '\xc7', '\x9a', '\xc7', '\x9c', '\xc7',
387 '\x9f', '\xc7', '\xa1', '\xc7', '\xa3', '\xc7', '\xa5', '\xc7', '\xa7',
388 '\xc7', '\xa9', '\xc7', '\xab', '\xc7', '\xad', '\xc7', '\xaf', '\xc7',
389 '\xb5', '\xc7', '\xb9', '\xc7', '\xbb', '\xc7', '\xbd', '\xc7', '\xbf',
390 '\xc8', '\x81', '\xc8', '\x83', '\xc8', '\x85', '\xc8', '\x87', '\xc8',
391 '\x89', '\xc8', '\x8b', '\xc8', '\x8d', '\xc8', '\x8f', '\xc8', '\x91',
392 '\xc8', '\x93', '\xc8', '\x95', '\xc8', '\x97', '\xc8', '\x99', '\xc8',
393 '\x9b', '\xc8', '\x9d', '\xc8', '\x9f', '\xc8', '\xa3', '\xc8', '\xa5',
394 '\xc8', '\xa7', '\xc8', '\xa9', '\xc8', '\xab', '\xc8', '\xad', '\xc8',
395 '\xaf', '\xc8', '\xb1', '\xc8', '\xb3', '\xc8', '\xbc', '\xc9', '\x82',
396 '\xc9', '\x87', '\xc9', '\x89', '\xc9', '\x8b', '\xc9', '\x8d', '\xc9',
397 '\x8f', '\xcd', '\xbb', '\xcd', '\xbc', '\xcd', '\xbd', '\xcf', '\xb3',
398 '\xce', '\xac', '\xce', '\xad', '\xce', '\xae', '\xce', '\xaf', '\xcf',
399 '\x8c', '\xcf', '\x8d', '\xcf', '\x8e', '\xcd', '\xb1', '\xcd', '\xb3',
400 '\xcd', '\xb7', '\xce', '\xb1', '\xce', '\xb2', '\xce', '\xb3', '\xce',
401 '\xb4', '\xce', '\xb5', '\xce', '\xb6', '\xce', '\xb7', '\xce', '\xb8',
402 '\xce', '\xb9', '\xce', '\xba', '\xce', '\xbb', '\xce', '\xbc', '\xce',
403 '\xbd', '\xce', '\xbe', '\xce', '\xbf', '\xcf', '\x80', '\xcf', '\x81',
404 '\xcf', '\x83', '\xcf', '\x84', '\xcf', '\x85', '\xcf', '\x86', '\xcf',
405 '\x87', '\xcf', '\x88', '\xcf', '\x89', '\xcf', '\x8a', '\xcf', '\x8b',
406 '\xcf', '\x91', '\xcf', '\x97', '\xcf', '\x99', '\xcf', '\x9b', '\xcf',
407 '\x9d', '\xcf', '\x9f', '\xcf', '\xa1', '\xcf', '\xa3', '\xcf', '\xa5',
408 '\xcf', '\xa7', '\xcf', '\xa9', '\xcf', '\xab', '\xcf', '\xad', '\xcf',
409 '\xaf', '\xcf', '\xb2', '\xcf', '\xb8', '\xcf', '\xbb', '\0'};
410
411 const char uppersStr[] = {
412 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', '\x48', '\x49',
413 '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', '\x50', '\x51', '\x52',
414 '\x53', '\x54', '\x55', '\x56', '\x57', '\x58', '\x59', '\x5a', '\xc3',
415 '\x80', '\xc3', '\x81', '\xc3', '\x82', '\xc3', '\x83', '\xc3', '\x84',
416 '\xc3', '\x85', '\xc3', '\x86', '\xc3', '\x87', '\xc3', '\x88', '\xc3',
417 '\x89', '\xc3', '\x8a', '\xc3', '\x8b', '\xc3', '\x8c', '\xc3', '\x8d',
418 '\xc3', '\x8e', '\xc3', '\x8f', '\xc3', '\x90', '\xc3', '\x91', '\xc3',
419 '\x92', '\xc3', '\x93', '\xc3', '\x94', '\xc3', '\x95', '\xc3', '\x96',
420 '\xc3', '\x98', '\xc3', '\x99', '\xc3', '\x9a', '\xc3', '\x9b', '\xc3',
421 '\x9c', '\xc3', '\x9d', '\xc3', '\x9e', '\xc5', '\xb8', '\xc4', '\x80',
422 '\xc4', '\x82', '\xc4', '\x84', '\xc4', '\x86', '\xc4', '\x88', '\xc4',
423 '\x8a', '\xc4', '\x8c', '\xc4', '\x8e', '\xc4', '\x90', '\xc4', '\x92',
424 '\xc4', '\x94', '\xc4', '\x96', '\xc4', '\x98', '\xc4', '\x9a', '\xc4',
425 '\x9c', '\xc4', '\x9e', '\xc4', '\xa0', '\xc4', '\xa2', '\xc4', '\xa4',
426 '\xc4', '\xa6', '\xc4', '\xa8', '\xc4', '\xaa', '\xc4', '\xac', '\xc4',
427 '\xae', '\xc4', '\xb2', '\xc4', '\xb4', '\xc4', '\xb6', '\xc4', '\xb9',
428 '\xc4', '\xbb', '\xc4', '\xbd', '\xc4', '\xbf', '\xc5', '\x81', '\xc5',
429 '\x83', '\xc5', '\x85', '\xc5', '\x87', '\xc5', '\x8a', '\xc5', '\x8c',
430 '\xc5', '\x8e', '\xc5', '\x90', '\xc5', '\x92', '\xc5', '\x94', '\xc5',
431 '\x96', '\xc5', '\x98', '\xc5', '\x9a', '\xc5', '\x9c', '\xc5', '\x9e',
432 '\xc5', '\xa0', '\xc5', '\xa2', '\xc5', '\xa4', '\xc5', '\xa6', '\xc5',
433 '\xa8', '\xc5', '\xaa', '\xc5', '\xac', '\xc5', '\xae', '\xc5', '\xb0',
434 '\xc5', '\xb2', '\xc5', '\xb4', '\xc5', '\xb6', '\xc5', '\xb9', '\xc5',
435 '\xbb', '\xc5', '\xbd', '\xc9', '\x83', '\xc6', '\x8e', '\xc8', '\xbd',
436 '\xc8', '\xa0', '\xc6', '\xb7', '\xc7', '\x84', '\xc7', '\x87', '\xc7',
437 '\x8a', '\xc7', '\xb1', '\xc7', '\xb7', '\xc6', '\x82', '\xc6', '\x84',
438 '\xc6', '\x87', '\xc6', '\x8b', '\xc6', '\x91', '\xc6', '\x98', '\xc6',
439 '\xa0', '\xc6', '\xa2', '\xc6', '\xa4', '\xc6', '\xa7', '\xc6', '\xac',
440 '\xc6', '\xaf', '\xc6', '\xb3', '\xc6', '\xb5', '\xc6', '\xb8', '\xc6',
441 '\xbc', '\xc7', '\x8d', '\xc7', '\x8f', '\xc7', '\x91', '\xc7', '\x93',
442 '\xc7', '\x95', '\xc7', '\x97', '\xc7', '\x99', '\xc7', '\x9b', '\xc7',
443 '\x9e', '\xc7', '\xa0', '\xc7', '\xa2', '\xc7', '\xa4', '\xc7', '\xa6',
444 '\xc7', '\xa8', '\xc7', '\xaa', '\xc7', '\xac', '\xc7', '\xae', '\xc7',
445 '\xb4', '\xc7', '\xb8', '\xc7', '\xba', '\xc7', '\xbc', '\xc7', '\xbe',
446 '\xc8', '\x80', '\xc8', '\x82', '\xc8', '\x84', '\xc8', '\x86', '\xc8',
447 '\x88', '\xc8', '\x8a', '\xc8', '\x8c', '\xc8', '\x8e', '\xc8', '\x90',
448 '\xc8', '\x92', '\xc8', '\x94', '\xc8', '\x96', '\xc8', '\x98', '\xc8',
449 '\x9a', '\xc8', '\x9c', '\xc8', '\x9e', '\xc8', '\xa2', '\xc8', '\xa4',
450 '\xc8', '\xa6', '\xc8', '\xa8', '\xc8', '\xaa', '\xc8', '\xac', '\xc8',
451 '\xae', '\xc8', '\xb0', '\xc8', '\xb2', '\xc8', '\xbb', '\xc9', '\x81',
452 '\xc9', '\x86', '\xc9', '\x88', '\xc9', '\x8a', '\xc9', '\x8c', '\xc9',
453 '\x8e', '\xcf', '\xbd', '\xcf', '\xbe', '\xcf', '\xbf', '\xcd', '\xbf',
454 '\xce', '\x86', '\xce', '\x88', '\xce', '\x89', '\xce', '\x8a', '\xce',
455 '\x8c', '\xce', '\x8e', '\xce', '\x8f', '\xcd', '\xb0', '\xcd', '\xb2',
456 '\xcd', '\xb6', '\xce', '\x91', '\xce', '\x92', '\xce', '\x93', '\xce',
457 '\x94', '\xce', '\x95', '\xce', '\x96', '\xce', '\x97', '\xce', '\x98',
458 '\xce', '\x99', '\xce', '\x9a', '\xce', '\x9b', '\xce', '\x9c', '\xce',
459 '\x9d', '\xce', '\x9e', '\xce', '\x9f', '\xce', '\xa0', '\xce', '\xa1',
460 '\xce', '\xa3', '\xce', '\xa4', '\xce', '\xa5', '\xce', '\xa6', '\xce',
461 '\xa7', '\xce', '\xa8', '\xce', '\xa9', '\xce', '\xaa', '\xce', '\xab',
462 '\xcf', '\xb4', '\xcf', '\x8f', '\xcf', '\x98', '\xcf', '\x9a', '\xcf',
463 '\x9c', '\xcf', '\x9e', '\xcf', '\xa0', '\xcf', '\xa2', '\xcf', '\xa4',
464 '\xcf', '\xa6', '\xcf', '\xa8', '\xcf', '\xaa', '\xcf', '\xac', '\xcf',
465 '\xae', '\xcf', '\xb9', '\xcf', '\xb7', '\xcf', '\xba', '\0'};
466
UTEST(utf8len,data)467 UTEST(utf8len, data) { ASSERT_EQ(53, utf8len(data)); }
468
UTEST(utf8cat,empty_cat_data)469 UTEST(utf8cat, empty_cat_data) {
470 char cat[512] = {'\0'};
471
472 ASSERT_EQ(0, utf8len(cat));
473
474 ASSERT_EQ(53, utf8len(utf8cat(cat, data)));
475 }
476
UTEST(utf8cat,one_byte_cat_data)477 UTEST(utf8cat, one_byte_cat_data) {
478 char cat[512];
479
480 cat[0] = 'a';
481 cat[1] = '\0';
482
483 ASSERT_EQ(1, utf8len(cat));
484
485 ASSERT_EQ(54, utf8len(utf8cat(cat, data)));
486 }
487
UTEST(utf8cat,two_bytes_cat_data)488 UTEST(utf8cat, two_bytes_cat_data) {
489 char cat[512];
490
491 cat[0] = '\xce';
492 cat[1] = '\x93';
493 cat[2] = '\0';
494
495 ASSERT_EQ(1, utf8len(cat));
496
497 ASSERT_EQ(54, utf8len(utf8cat(cat, data)));
498 }
499
UTEST(utf8cat,three_bytes_cat_data)500 UTEST(utf8cat, three_bytes_cat_data) {
501 char cat[512];
502
503 cat[0] = '\xe1';
504 cat[1] = '\xbd';
505 cat[2] = '\xb6';
506 cat[3] = '\0';
507
508 ASSERT_EQ(1, utf8len(cat));
509
510 ASSERT_EQ(54, utf8len(utf8cat(cat, data)));
511 }
512
UTEST(utf8cat,four_bytes_cat_data)513 UTEST(utf8cat, four_bytes_cat_data) {
514 char cat[512];
515
516 cat[0] = '\xf0';
517 cat[1] = '\x90';
518 cat[2] = '\x8d';
519 cat[3] = '\x88';
520 cat[4] = '\0';
521
522 ASSERT_EQ(1, utf8len(cat));
523
524 ASSERT_EQ(54, utf8len(utf8cat(cat, data)));
525 }
526
UTEST(utf8cat,cat_data_data)527 UTEST(utf8cat, cat_data_data) {
528 char cat[512] = {'\0'};
529
530 ASSERT_EQ(0, utf8len(cat));
531
532 ASSERT_EQ(106, utf8len(utf8cat(utf8cat(cat, data), data)));
533 }
534
UTEST(utf8str,cmp)535 UTEST(utf8str, cmp) { ASSERT_EQ(data + 21, utf8str(data, cmp)); }
536
UTEST(utf8str,test)537 UTEST(utf8str, test) { ASSERT_EQ((void *)0, utf8str(data, "test")); }
538
UTEST(utf8str,empty)539 UTEST(utf8str, empty) { ASSERT_EQ(data, utf8str(data, "")); }
540
UTEST(utf8str,partial)541 UTEST(utf8str, partial) { ASSERT_EQ(haystack + 2, utf8str(haystack, needle)); }
542
UTEST(utf8str,endfail)543 UTEST(utf8str, endfail) { ASSERT_EQ((void *)0, utf8str(haystack, endfailneedle)); }
544
UTEST(utf8casestr,cmp)545 UTEST(utf8casestr, cmp) { ASSERT_EQ(data + 21, utf8casestr(data, cmp)); }
546
UTEST(utf8casestr,test)547 UTEST(utf8casestr, test) { ASSERT_EQ((void *)0, utf8casestr(data, "test")); }
548
UTEST(utf8casestr,empty)549 UTEST(utf8casestr, empty) { ASSERT_EQ(data, utf8casestr(data, "")); }
550
UTEST(utf8casestr,partial)551 UTEST(utf8casestr, partial) { ASSERT_EQ(haystack + 2, utf8casestr(haystack, needle)); }
552
UTEST(utf8casestr,endfail)553 UTEST(utf8casestr, endfail) { ASSERT_EQ((void *)0, utf8casestr(haystack, endfailneedle)); }
554
UTEST(utf8casestr,latin)555 UTEST(utf8casestr, latin) {
556 ASSERT_EQ(lowersStr, utf8casestr(lowersStr, uppersStr));
557 }
558
UTEST(utf8chr,a)559 UTEST(utf8chr, a) { ASSERT_EQ(data + 21, utf8chr(data, 0x3bc)); }
560
UTEST(utf8chr,b)561 UTEST(utf8chr, b) { ASSERT_EQ(0, utf8chr(data, 0x20ac)); }
562
UTEST(utf8chr,null_terminator)563 UTEST(utf8chr, null_terminator) { ASSERT_EQ(data + 104, utf8chr(data, '\0')); }
564
565 UTEST(utf8chr, 0x20) { ASSERT_EQ(data + 12, utf8chr(data, 0x20)); }
566
UTEST(utf8cmp,lt)567 UTEST(utf8cmp, lt) { ASSERT_LT(0, utf8cmp(data, lt)); }
568
UTEST(utf8cmp,eq)569 UTEST(utf8cmp, eq) { ASSERT_EQ(0, utf8cmp(data, data)); }
570
UTEST(utf8cmp,gt)571 UTEST(utf8cmp, gt) { ASSERT_GT(0, utf8cmp(data, gt)); }
572
UTEST(utf8cpy,data)573 UTEST(utf8cpy, data) {
574 char cpy[512] = {'\0'};
575
576 ASSERT_EQ(53, utf8len(utf8cpy(cpy, data)));
577 }
578
UTEST(utf8spn,spn)579 UTEST(utf8spn, spn) { ASSERT_EQ(7, utf8spn(data, spn)); }
580
UTEST(utf8spn,data)581 UTEST(utf8spn, data) { ASSERT_EQ(52, utf8spn(data, data)); }
582
UTEST(utf8spn,ascii)583 UTEST(utf8spn, ascii) { ASSERT_EQ(0, utf8spn(data, "ab")); }
584
UTEST(utf8cspn,spn)585 UTEST(utf8cspn, spn) { ASSERT_EQ(0, utf8cspn(data, spn)); }
586
UTEST(utf8cspn,data)587 UTEST(utf8cspn, data) { ASSERT_EQ(0, utf8cspn(data, data)); }
588
UTEST(utf8cspn,ascii)589 UTEST(utf8cspn, ascii) { ASSERT_EQ(53, utf8cspn(data, "ab")); }
590
UTEST(utf8rchr,a)591 UTEST(utf8rchr, a) { ASSERT_EQ(data + 21, utf8rchr(data, 0x3bc)); }
592
UTEST(utf8rchr,b)593 UTEST(utf8rchr, b) { ASSERT_EQ(0, utf8rchr(data, 0x20ac)); }
594
UTEST(utf8rchr,null_terminator)595 UTEST(utf8rchr, null_terminator) {
596 ASSERT_EQ(data + 104, utf8rchr(data, '\0'));
597 }
598
599 UTEST(utf8rchr, 0x20) { ASSERT_EQ(data + 90, utf8rchr(data, 0x20)); }
600
UTEST(utf8dup,data)601 UTEST(utf8dup, data) {
602 void *const dup = utf8dup(data);
603 ASSERT_TRUE(dup);
604 ASSERT_EQ(53, utf8len(dup));
605 free(dup);
606 }
607
UTEST(utf8dup,ascii)608 UTEST(utf8dup, ascii) {
609 void *const dup = utf8dup("ab");
610 ASSERT_TRUE(dup);
611 ASSERT_EQ(2, utf8len(dup));
612 free(dup);
613 }
614
UTEST(utf8dup,empty)615 UTEST(utf8dup, empty) {
616 void *const dup = utf8dup("");
617 ASSERT_TRUE(dup);
618 ASSERT_EQ(0, utf8len(dup));
619 free(dup);
620 }
621
UTEST(utf8ndup,ascii)622 UTEST(utf8ndup, ascii) {
623 void *const dup = utf8ndup("1234567890", 4);
624 ASSERT_TRUE(dup);
625 ASSERT_EQ(4, utf8len(dup));
626 free(dup);
627 }
628
UTEST(utf8ndup,ascii_larger)629 UTEST(utf8ndup, ascii_larger) {
630 void *const dup = utf8ndup("1234567890", 100);
631 ASSERT_TRUE(dup);
632 ASSERT_EQ(10, utf8len(dup));
633 free(dup);
634 }
635
UTEST(utf8size,data)636 UTEST(utf8size, data) { ASSERT_EQ(105, utf8size(data)); }
637
UTEST(utf8size,ascii)638 UTEST(utf8size, ascii) { ASSERT_EQ(3, utf8size("ab")); }
639
UTEST(utf8size,empty)640 UTEST(utf8size, empty) { ASSERT_EQ(1, utf8size("")); }
641
UTEST(utf8valid,a)642 UTEST(utf8valid, a) {
643 char invalid[6];
644
645 invalid[0] = '\xf0';
646 invalid[1] = '\x8f';
647 invalid[2] = '\xbf';
648 invalid[3] = '\xbf';
649 invalid[4] = '\0';
650
651 ASSERT_EQ(invalid, utf8valid(invalid));
652 }
653
UTEST(utf8valid,b)654 UTEST(utf8valid, b) {
655 char invalid[6];
656
657 invalid[0] = '\xf1';
658 invalid[1] = '\x3f';
659 invalid[2] = '\xbf';
660 invalid[3] = '\xbf';
661 invalid[4] = '\0';
662
663 ASSERT_EQ(invalid, utf8valid(invalid));
664 }
665
UTEST(utf8valid,c)666 UTEST(utf8valid, c) {
667 char invalid[6];
668
669 invalid[0] = '\xf1';
670 invalid[1] = '\xbf';
671 invalid[2] = '\x3f';
672 invalid[3] = '\xbf';
673 invalid[4] = '\0';
674
675 ASSERT_EQ(invalid, utf8valid(invalid));
676 }
677
UTEST(utf8valid,d)678 UTEST(utf8valid, d) {
679 char invalid[6];
680
681 invalid[0] = '\xf1';
682 invalid[1] = '\xbf';
683 invalid[2] = '\xbf';
684 invalid[3] = '\x3f';
685 invalid[4] = '\0';
686
687 ASSERT_EQ(invalid, utf8valid(invalid));
688 }
689
UTEST(utf8valid,e)690 UTEST(utf8valid, e) {
691 char invalid[6];
692
693 invalid[0] = '\xe0';
694 invalid[1] = '\x9f';
695 invalid[2] = '\xbf';
696 invalid[3] = '\0';
697
698 ASSERT_EQ(invalid, utf8valid(invalid));
699 }
700
UTEST(utf8valid,f)701 UTEST(utf8valid, f) {
702 char invalid[6];
703
704 invalid[0] = '\xef';
705 invalid[1] = '\x3f';
706 invalid[2] = '\xbf';
707 invalid[3] = '\0';
708
709 ASSERT_EQ(invalid, utf8valid(invalid));
710 }
711
UTEST(utf8valid,g)712 UTEST(utf8valid, g) {
713 char invalid[6];
714
715 invalid[0] = '\xef';
716 invalid[1] = '\xbf';
717 invalid[2] = '\x3f';
718 invalid[3] = '\0';
719
720 ASSERT_EQ(invalid, utf8valid(invalid));
721 }
722
UTEST(utf8valid,h)723 UTEST(utf8valid, h) {
724 char invalid[6];
725
726 invalid[0] = '\xc1';
727 invalid[1] = '\xbf';
728 invalid[2] = '\0';
729
730 ASSERT_EQ(invalid, utf8valid(invalid));
731 }
732
UTEST(utf8valid,i)733 UTEST(utf8valid, i) {
734 char invalid[6];
735
736 invalid[0] = '\xdf';
737 invalid[1] = '\x3f';
738 invalid[2] = '\0';
739
740 ASSERT_EQ(invalid, utf8valid(invalid));
741 }
742
UTEST(utf8valid,j)743 UTEST(utf8valid, j) {
744 char invalid[6];
745
746 invalid[0] = '\x80';
747 invalid[1] = '\0';
748
749 ASSERT_EQ(invalid, utf8valid(invalid));
750 }
751
UTEST(utf8valid,k)752 UTEST(utf8valid, k) {
753 char invalid[6];
754
755 invalid[0] = '\xf8';
756 invalid[1] = '\0';
757
758 ASSERT_EQ(invalid, utf8valid(invalid));
759 }
760
UTEST(utf8valid,l)761 UTEST(utf8valid, l) {
762 char invalid[6];
763
764 invalid[0] = '\xf1';
765 invalid[1] = '\xbf';
766 invalid[2] = '\xbf';
767 invalid[3] = '\xbf';
768 invalid[4] = '\xbf';
769 invalid[5] = '\0';
770
771 ASSERT_EQ(invalid, utf8valid(invalid));
772 }
773
UTEST(utf8valid,m)774 UTEST(utf8valid, m) {
775 char invalid[6];
776
777 invalid[0] = '\xef';
778 invalid[1] = '\xbf';
779 invalid[2] = '\xbf';
780 invalid[3] = '\xbf';
781 invalid[4] = '\0';
782
783 ASSERT_EQ(invalid, utf8valid(invalid));
784 }
785
UTEST(utf8valid,n)786 UTEST(utf8valid, n) {
787 char invalid[6];
788
789 invalid[0] = '\xdf';
790 invalid[1] = '\xbf';
791 invalid[2] = '\xbf';
792 invalid[3] = '\0';
793
794 ASSERT_EQ(invalid, utf8valid(invalid));
795 }
796
UTEST(utf8valid,data)797 UTEST(utf8valid, data) { ASSERT_EQ(0, utf8valid(data)); }
798
UTEST(utf8valid,ascii)799 UTEST(utf8valid, ascii) { ASSERT_EQ(0, utf8valid("ab")); }
800
UTEST(utf8valid,empty)801 UTEST(utf8valid, empty) { ASSERT_EQ(0, utf8valid("")); }
802
UTEST(utf8ncat,ascii_cat_data)803 UTEST(utf8ncat, ascii_cat_data) {
804 char cat[512] = {'\0'};
805 cat[0] = 'a';
806 cat[1] = '\0';
807 ASSERT_EQ(2, utf8len(utf8ncat(cat, data, 2)));
808 }
809
UTEST(utf8ncat,cat_data)810 UTEST(utf8ncat, cat_data) {
811 char cat[512] = {'\0'};
812 ASSERT_EQ(53, utf8len(utf8ncat(cat, data, 40000)));
813 }
814
UTEST(utf8ncat,bad_cat)815 UTEST(utf8ncat, bad_cat) {
816 char cat[512] = {'\0'};
817 ASSERT_EQ(cat, utf8valid(utf8ncat(cat, data, 1)));
818 }
819
UTEST(utf8ncmp,lt_large)820 UTEST(utf8ncmp, lt_large) { ASSERT_LT(0, utf8ncmp(data, lt, 4000)); }
821
UTEST(utf8ncmp,lt_small)822 UTEST(utf8ncmp, lt_small) { ASSERT_EQ(0, utf8ncmp(data, lt, 7)); }
823
UTEST(utf8ncmp,eq_large)824 UTEST(utf8ncmp, eq_large) { ASSERT_EQ(0, utf8ncmp(data, data, 4000)); }
825
UTEST(utf8ncmp,eq_small)826 UTEST(utf8ncmp, eq_small) { ASSERT_EQ(0, utf8ncmp(data, data, 7)); }
827
UTEST(utf8ncmp,gt_large)828 UTEST(utf8ncmp, gt_large) { ASSERT_GT(0, utf8ncmp(data, gt, 4000)); }
829
UTEST(utf8ncmp,gt_small)830 UTEST(utf8ncmp, gt_small) { ASSERT_EQ(0, utf8ncmp(data, gt, 7)); }
831
UTEST(utf8ncpy,data_null_terminated)832 UTEST(utf8ncpy, data_null_terminated) {
833 char cpy[512] = {'\0'};
834 ASSERT_EQ('\0', *((char *)utf8ncpy(cpy, data, 106) + 105));
835 }
836
UTEST(utf8ncpy,data)837 UTEST(utf8ncpy, data) {
838 char cpy[512] = {'\0'};
839 ASSERT_EQ(53, utf8len(utf8ncpy(cpy, data, 105)));
840 }
841
UTEST(utf8ncpy,check_no_buffer_overflow)842 UTEST(utf8ncpy, check_no_buffer_overflow) {
843 utf8_int32_t i;
844 char buffer[11] = {0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd,
845 0xdd, 0xdd, 0xdd, 0xdd, 0xdd};
846 ASSERT_EQ(buffer, utf8ncpy(buffer, "foo", 10));
847
848 ASSERT_EQ('f', buffer[0]);
849 ASSERT_EQ('o', buffer[1]);
850 ASSERT_EQ('o', buffer[2]);
851
852 for (i = 3; 10 != i; i++) {
853 ASSERT_EQ(0, buffer[i]);
854 }
855
856 ASSERT_EQ((char)0xdd, buffer[10]);
857 }
858
UTEST(utf8ncpy,check_no_n_overflow)859 UTEST(utf8ncpy, check_no_n_overflow) {
860 char buffer[4] = {1, 2, 3, 4};
861 ASSERT_EQ(buffer, utf8ncpy(buffer, "foo", 2));
862
863 ASSERT_EQ('f', buffer[0]);
864 ASSERT_EQ('o', buffer[1]);
865 ASSERT_EQ(3, buffer[2]);
866 ASSERT_EQ(4, buffer[3]);
867 }
868
UTEST(utf8pbrk,pbrk)869 UTEST(utf8pbrk, pbrk) { ASSERT_EQ(data + 8, utf8pbrk(data, pbrk)); }
870
UTEST(utf8pbrk,data)871 UTEST(utf8pbrk, data) { ASSERT_EQ(data, utf8pbrk(data, data)); }
872
UTEST(utf8casecmp,ascii)873 UTEST(utf8casecmp, ascii) { ASSERT_EQ(0, utf8casecmp(ascii1, ascii2)); }
UTEST(utf8casecmp,latin_upvslow)874 UTEST(utf8casecmp, latin_upvslow) {
875 ASSERT_EQ(0, utf8casecmp(lowersStr, uppersStr));
876 }
UTEST(utf8casecmp,latin_lowvsup)877 UTEST(utf8casecmp, latin_lowvsup) {
878 ASSERT_EQ(0, utf8casecmp(uppersStr, lowersStr));
879 }
880
UTEST(utf8casecmp,allascii)881 UTEST(utf8casecmp, allascii) {
882 ASSERT_EQ(0, utf8casecmp(allascii1, allascii2));
883 }
884
UTEST(utf8casecmp,data_lt)885 UTEST(utf8casecmp, data_lt) { ASSERT_LT(0, utf8casecmp(data, lt)); }
886
UTEST(utf8casecmp,data_eq)887 UTEST(utf8casecmp, data_eq) { ASSERT_EQ(0, utf8casecmp(data, data)); }
888
UTEST(utf8casecmp,data_gt)889 UTEST(utf8casecmp, data_gt) { ASSERT_GT(0, utf8casecmp(data, gt)); }
890
UTEST(utf8ncasecmp,lt_large)891 UTEST(utf8ncasecmp, lt_large) { ASSERT_LT(0, utf8ncasecmp(data, lt, 4000)); }
892
UTEST(utf8ncasecmp,lt_small)893 UTEST(utf8ncasecmp, lt_small) { ASSERT_EQ(0, utf8ncasecmp(data, lt, 7)); }
894
UTEST(utf8ncasecmp,eq_large)895 UTEST(utf8ncasecmp, eq_large) { ASSERT_EQ(0, utf8ncasecmp(data, data, 4000)); }
896
UTEST(utf8ncasecmp,eq_small)897 UTEST(utf8ncasecmp, eq_small) { ASSERT_EQ(0, utf8ncasecmp(data, data, 7)); }
898
UTEST(utf8ncasecmp,gt_large)899 UTEST(utf8ncasecmp, gt_large) { ASSERT_GT(0, utf8ncasecmp(data, gt, 4000)); }
900
UTEST(utf8ncasecmp,gt_small)901 UTEST(utf8ncasecmp, gt_small) { ASSERT_EQ(0, utf8ncasecmp(data, gt, 7)); }
902
UTEST(utf8ncasecmp,ascii)903 UTEST(utf8ncasecmp, ascii) { ASSERT_EQ(0, utf8ncasecmp(ascii1, ascii2, 4)); }
UTEST(utf8ncasecmp,latin_upvslow)904 UTEST(utf8ncasecmp, latin_upvslow) {
905 ASSERT_EQ(0, utf8ncasecmp(lowersStr, uppersStr, 120));
906 }
UTEST(utf8ncasecmp,latin_lowvsup)907 UTEST(utf8ncasecmp, latin_lowvsup) {
908 ASSERT_EQ(0, utf8ncasecmp(uppersStr, lowersStr, 120));
909 }
910
UTEST(utf8codepoint,data)911 UTEST(utf8codepoint, data) {
912 utf8_int32_t codepoint;
913 void *v;
914 size_t expected_length = utf8len(data) - 1;
915 for (v = utf8codepoint(data, &codepoint); codepoint;
916 v = utf8codepoint(v, &codepoint)) {
917 ASSERT_EQ(expected_length, utf8len(v));
918 expected_length -= 1;
919 }
920 }
921
UTEST(utf8codepointsize,size_1)922 UTEST(utf8codepointsize, size_1) { ASSERT_EQ(1, utf8codepointsize('A')); }
923
UTEST(utf8codepointsize,size_4)924 UTEST(utf8codepointsize, size_4) { ASSERT_EQ(4, utf8codepointsize(0x20C78)); }
925
UTEST(utf8catcodepoint,data)926 UTEST(utf8catcodepoint, data) {
927 char buffer[129];
928 char *p = buffer;
929 long cp;
930 int i;
931 memset(buffer, 0, 129);
932 for (i = 0; i < 128; i++) {
933 cp = (i % 2 == 0 ? 'A' : 0x20C78);
934 p = utf8catcodepoint(p, cp, 128 - (p - buffer));
935 if (!p) {
936 break;
937 }
938 }
939 ASSERT_EQ(51, utf8len(buffer));
940 }
941
UTEST(utf8islower,upper)942 UTEST(utf8islower, upper) {
943 utf8_int32_t i;
944
945 for (i = 0; 0 != lowupPairs[i].lower; i++) {
946 ASSERT_EQ(0, utf8islower(lowupPairs[i].upper));
947 }
948 }
949
UTEST(utf8islower,lower)950 UTEST(utf8islower, lower) {
951 utf8_int32_t i;
952
953 for (i = 0; 0 != lowupPairs[i].lower; i++) {
954 ASSERT_EQ(1, utf8islower(lowupPairs[i].lower));
955 }
956 }
957
UTEST(utf8isupper,upper)958 UTEST(utf8isupper, upper) {
959 utf8_int32_t i;
960
961 for (i = 0; 0 != lowupPairs[i].lower; i++) {
962 ASSERT_EQ(1, utf8isupper(lowupPairs[i].upper));
963 }
964 }
965
UTEST(utf8isupper,lower)966 UTEST(utf8isupper, lower) {
967 utf8_int32_t i;
968
969 for (i = 0; 0 != lowupPairs[i].lower; i++) {
970 ASSERT_EQ(0, utf8isupper(lowupPairs[i].lower));
971 }
972 }
973
UTEST(utf8lwr,ascii)974 UTEST(utf8lwr, ascii) {
975 size_t sz;
976 char *str;
977 sz = strlen(ascii1);
978 str = (char *)malloc(sz + 1);
979 memcpy(str, ascii1, sz + 1);
980 utf8lwr(str);
981 ASSERT_EQ(0, strcmp(str, "i like goats yarhar."));
982 free(str);
983 }
984
UTEST(utf8lwr,latin_lower)985 UTEST(utf8lwr, latin_lower) {
986 size_t sz;
987 void *str;
988 sz = utf8size(lowersStr);
989 str = malloc(sz);
990 memcpy(str, lowersStr, sz);
991 utf8lwr(str);
992 ASSERT_EQ(0, utf8cmp(str, lowersStr));
993 free(str);
994 }
995
UTEST(utf8lwr,latin_upper)996 UTEST(utf8lwr, latin_upper) {
997 size_t sz;
998 void *str;
999 sz = utf8size(uppersStr);
1000 str = malloc(sz);
1001 memcpy(str, uppersStr, sz);
1002 utf8lwr(str);
1003 ASSERT_EQ(0, utf8cmp(str, lowersStr));
1004 free(str);
1005 }
1006
UTEST(utf8upr,ascii)1007 UTEST(utf8upr, ascii) {
1008 size_t sz;
1009 char *str;
1010 sz = strlen(ascii1);
1011 str = (char *)malloc(sz + 1);
1012 memcpy(str, ascii1, sz + 1);
1013 utf8upr(str);
1014 ASSERT_EQ(0, strcmp(str, "I LIKE GOATS YARHAR."));
1015 free(str);
1016 }
1017
UTEST(utf8upr,latin_lower)1018 UTEST(utf8upr, latin_lower) {
1019 size_t sz;
1020 void *str;
1021 sz = utf8size(lowersStr);
1022 str = malloc(sz);
1023 memcpy(str, lowersStr, sz);
1024 utf8upr(str);
1025 ASSERT_EQ(0, utf8cmp(str, uppersStr));
1026 free(str);
1027 }
1028
UTEST(utf8upr,latin_upper)1029 UTEST(utf8upr, latin_upper) {
1030 size_t sz;
1031 void *str;
1032 sz = utf8size(uppersStr);
1033 str = malloc(sz);
1034 memcpy(str, uppersStr, sz);
1035 utf8upr(str);
1036 ASSERT_EQ(0, utf8cmp(str, uppersStr));
1037 free(str);
1038 }
1039
1040 UTEST_MAIN();
1041