1# This  software is Copyright (c) 2012-2018 magnum, and it is hereby
2# released to the general public under the following terms:
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted.
5#
6# Generic implementation of "dumb" exhaustive search of FULL Unicode.
7# Default is to try *all* allocated characters in Unicode v11 (there's
8# 137,046 of them). Even if a fast format can exhaust two characters in one
9# hour, three characters would take 12 years...
10#
11# Note that these modes will handle --max-len differently than normal: They
12# will consider number of characters as opposed to number of bytes. This
13# means you can naturally just use e.g. --max-len=3 for generating all
14# three-character candidates (which may be up to 12 bytes each).
15#
16# Also note that for UTF-16 formats, the resulting plaintext size within the
17# format will be up to four bytes (two 16-bit words) due to use of surrogates
18# for characters above U+FFFF. This means a format which normally handles up
19# to 27 characters may be limited to only 13 characters, worst case.
20[List.External:Dumb32]
21int maxlength;            // Maximum password length to try
22int last;                 // Last character position, zero-based
23int lastid;               // Character index in the last position
24int id[0x7f];             // Current character indices for other positions
25int charset[0x22000], c0; // Characters
26
27void init()
28{
29	int minlength;
30	int i, c;
31
32	# Trigger UTF-32 handling in External mode
33	utf32 = 1;
34
35	if (req_minlen)
36		minlength = req_minlen;
37	else
38		minlength = 1;
39	if (req_maxlen)
40		maxlength = req_maxlen;
41	else
42		maxlength = 2;
43
44/*
45 * This defines the character set. This is auto-generated from UnicodeData.txt
46 * and we skip control characters.
47 */
48	i = 0;
49// 0000..007F; Basic Latin
50	c = 0x20;		// from SPACE
51	while (c <= 0x7e)	// ..to TILDE
52		charset[i++] = c++;
53// 0080..00FF; Latin-1 Supplement
54	c = 0xa0;		// from NO-BREAK SPACE
55	while (c <= 0xff)	// ..to LATIN SMALL LETTER Y WITH DIAERESIS
56		charset[i++] = c++;
57// 0100..017F; Latin Extended-A
58	c = 0x100;		// from LATIN CAPITAL LETTER A WITH MACRON
59	while (c <= 0x17f)	// ..to LATIN SMALL LETTER LONG S
60		charset[i++] = c++;
61// 0180..024F; Latin Extended-B
62	c = 0x180;		// from LATIN SMALL LETTER B WITH STROKE
63	while (c <= 0x24f)	// ..to LATIN SMALL LETTER Y WITH STROKE
64		charset[i++] = c++;
65// 0250..02AF; IPA Extensions
66	c = 0x250;		// from LATIN SMALL LETTER TURNED A
67	while (c <= 0x2af)	// ..to LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
68		charset[i++] = c++;
69// 02B0..02FF; Spacing Modifier Letters
70	c = 0x2b0;		// from MODIFIER LETTER SMALL H
71	while (c <= 0x2ff)	// ..to MODIFIER LETTER LOW LEFT ARROW
72		charset[i++] = c++;
73// 0300..036F; Combining Diacritical Marks
74	c = 0x300;		// from COMBINING GRAVE ACCENT
75	while (c <= 0x36f)	// ..to COMBINING LATIN SMALL LETTER X
76		charset[i++] = c++;
77// 0370..03FF; Greek and Coptic
78	c = 0x370;		// from GREEK CAPITAL LETTER HETA
79	while (c <= 0x377)	// ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
80		charset[i++] = c++;
81	c = 0x37a;		// from GREEK YPOGEGRAMMENI
82	while (c <= 0x37f)	// ..to GREEK CAPITAL LETTER YOT
83		charset[i++] = c++;
84	c = 0x384;		// from GREEK TONOS
85	while (c <= 0x38a)	// ..to GREEK CAPITAL LETTER IOTA WITH TONOS
86		charset[i++] = c++;
87	c = 0x38e;		// from GREEK CAPITAL LETTER UPSILON WITH TONOS
88	while (c <= 0x3a1)	// ..to GREEK CAPITAL LETTER RHO
89		charset[i++] = c++;
90	c = 0x3a3;		// from GREEK CAPITAL LETTER SIGMA
91	while (c <= 0x3ff)	// ..to GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
92		charset[i++] = c++;
93// 0400..04FF; Cyrillic
94	c = 0x400;		// from CYRILLIC CAPITAL LETTER IE WITH GRAVE
95	while (c <= 0x4ff)	// ..to CYRILLIC SMALL LETTER HA WITH STROKE
96		charset[i++] = c++;
97// 0500..052F; Cyrillic Supplement
98	c = 0x500;		// from CYRILLIC CAPITAL LETTER KOMI DE
99	while (c <= 0x52f)	// ..to CYRILLIC SMALL LETTER EL WITH DESCENDER
100		charset[i++] = c++;
101// 0530..058F; Armenian
102	c = 0x531;		// from ARMENIAN CAPITAL LETTER AYB
103	while (c <= 0x556)	// ..to ARMENIAN CAPITAL LETTER FEH
104		charset[i++] = c++;
105	c = 0x559;		// from ARMENIAN MODIFIER LETTER LEFT HALF RING
106	while (c <= 0x58a)	// ..to ARMENIAN HYPHEN
107		charset[i++] = c++;
108	charset[i++] = 0x58d;	// RIGHT-FACING ARMENIAN ETERNITY SIGN
109	charset[i++] = 0x58f;	// ARMENIAN DRAM SIGN
110// 0590..05FF; Hebrew
111	c = 0x591;		// from HEBREW ACCENT ETNAHTA
112	while (c <= 0x5c7)	// ..to HEBREW POINT QAMATS QATAN
113		charset[i++] = c++;
114	c = 0x5d0;		// from HEBREW LETTER ALEF
115	while (c <= 0x5ea)	// ..to HEBREW LETTER TAV
116		charset[i++] = c++;
117	c = 0x5ef;		// from HEBREW YOD TRIANGLE
118	while (c <= 0x5f4)	// ..to HEBREW PUNCTUATION GERSHAYIM
119		charset[i++] = c++;
120// 0600..06FF; Arabic
121	c = 0x600;		// from ARABIC NUMBER SIGN
122	while (c <= 0x61c)	// ..to ARABIC LETTER MARK
123		charset[i++] = c++;
124	c = 0x61e;		// from ARABIC TRIPLE DOT PUNCTUATION MARK
125	while (c <= 0x6ff)	// ..to ARABIC LETTER HEH WITH INVERTED V
126		charset[i++] = c++;
127// 0700..074F; Syriac
128	c = 0x700;		// from SYRIAC END OF PARAGRAPH
129	while (c <= 0x70d)	// ..to SYRIAC HARKLEAN ASTERISCUS
130		charset[i++] = c++;
131	c = 0x70f;		// from SYRIAC ABBREVIATION MARK
132	while (c <= 0x74a)	// ..to SYRIAC BARREKH
133		charset[i++] = c++;
134	charset[i++] = 0x74d;	// SYRIAC LETTER SOGDIAN ZHAIN
135	charset[i++] = 0x74f;	// SYRIAC LETTER SOGDIAN FE
136// 0750..077F; Arabic Supplement
137	c = 0x750;		// from ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW
138	while (c <= 0x77f)	// ..to ARABIC LETTER KAF WITH TWO DOTS ABOVE
139		charset[i++] = c++;
140// 0780..07BF; Thaana
141	c = 0x780;		// from THAANA LETTER HAA
142	while (c <= 0x7b1)	// ..to THAANA LETTER NAA
143		charset[i++] = c++;
144// 07C0..07FF; NKo
145	c = 0x7c0;		// from NKO DIGIT ZERO
146	while (c <= 0x7fa)	// ..to NKO LAJANYALAN
147		charset[i++] = c++;
148	charset[i++] = 0x7fd;	// NKO DANTAYALAN
149	charset[i++] = 0x7ff;	// NKO TAMAN SIGN
150// 0800..083F; Samaritan
151	c = 0x800;		// from SAMARITAN LETTER ALAF
152	while (c <= 0x82d)	// ..to SAMARITAN MARK NEQUDAA
153		charset[i++] = c++;
154	c = 0x830;		// from SAMARITAN PUNCTUATION NEQUDAA
155	while (c <= 0x83e)	// ..to SAMARITAN PUNCTUATION ANNAAU
156		charset[i++] = c++;
157// 0840..085F; Mandaic
158	c = 0x840;		// from MANDAIC LETTER HALQA
159	while (c <= 0x85b)	// ..to MANDAIC GEMINATION MARK
160		charset[i++] = c++;
161	charset[i++] = 0x85e;	// MANDAIC PUNCTUATION
162// 0860..086F; Syriac Supplement
163	c = 0x860;		// from SYRIAC LETTER MALAYALAM NGA
164	while (c <= 0x86a)	// ..to SYRIAC LETTER MALAYALAM SSA
165		charset[i++] = c++;
166// 08A0..08FF; Arabic Extended-A
167	c = 0x8a0;		// from ARABIC LETTER BEH WITH SMALL V BELOW
168	while (c <= 0x8b4)	// ..to ARABIC LETTER KAF WITH DOT BELOW
169		charset[i++] = c++;
170	c = 0x8b6;		// from ARABIC LETTER BEH WITH SMALL MEEM ABOVE
171	while (c <= 0x8bd)	// ..to ARABIC LETTER AFRICAN NOON
172		charset[i++] = c++;
173	c = 0x8d3;		// from ARABIC SMALL LOW WAW
174	while (c <= 0x8ff)	// ..to ARABIC MARK SIDEWAYS NOON GHUNNA
175		charset[i++] = c++;
176// 0900..097F; Devanagari
177	c = 0x900;		// from DEVANAGARI SIGN INVERTED CANDRABINDU
178	while (c <= 0x97f)	// ..to DEVANAGARI LETTER BBA
179		charset[i++] = c++;
180// 0980..09FF; Bengali
181	c = 0x980;		// from BENGALI ANJI
182	while (c <= 0x983)	// ..to BENGALI SIGN VISARGA
183		charset[i++] = c++;
184	c = 0x985;		// from BENGALI LETTER A
185	while (c <= 0x98c)	// ..to BENGALI LETTER VOCALIC L
186		charset[i++] = c++;
187	charset[i++] = 0x98f;	// BENGALI LETTER E
188	charset[i++] = 0x990;	// BENGALI LETTER AI
189	c = 0x993;		// from BENGALI LETTER O
190	while (c <= 0x9a8)	// ..to BENGALI LETTER NA
191		charset[i++] = c++;
192	c = 0x9aa;		// from BENGALI LETTER PA
193	while (c <= 0x9b0)	// ..to BENGALI LETTER RA
194		charset[i++] = c++;
195	c = 0x9b6;		// from BENGALI LETTER SHA
196	while (c <= 0x9b9)	// ..to BENGALI LETTER HA
197		charset[i++] = c++;
198	c = 0x9bc;		// from BENGALI SIGN NUKTA
199	while (c <= 0x9c4)	// ..to BENGALI VOWEL SIGN VOCALIC RR
200		charset[i++] = c++;
201	charset[i++] = 0x9c7;	// BENGALI VOWEL SIGN E
202	charset[i++] = 0x9c8;	// BENGALI VOWEL SIGN AI
203	c = 0x9cb;		// from BENGALI VOWEL SIGN O
204	while (c <= 0x9ce)	// ..to BENGALI LETTER KHANDA TA
205		charset[i++] = c++;
206	charset[i++] = 0x9dc;	// BENGALI LETTER RRA
207	charset[i++] = 0x9dd;	// BENGALI LETTER RHA
208	c = 0x9df;		// from BENGALI LETTER YYA
209	while (c <= 0x9e3)	// ..to BENGALI VOWEL SIGN VOCALIC LL
210		charset[i++] = c++;
211	c = 0x9e6;		// from BENGALI DIGIT ZERO
212	while (c <= 0x9fe)	// ..to BENGALI SANDHI MARK
213		charset[i++] = c++;
214// 0A00..0A7F; Gurmukhi
215	charset[i++] = 0xa01;	// GURMUKHI SIGN ADAK BINDI
216	charset[i++] = 0xa03;	// GURMUKHI SIGN VISARGA
217	c = 0xa05;		// from GURMUKHI LETTER A
218	while (c <= 0xa0a)	// ..to GURMUKHI LETTER UU
219		charset[i++] = c++;
220	charset[i++] = 0xa0f;	// GURMUKHI LETTER EE
221	charset[i++] = 0xa10;	// GURMUKHI LETTER AI
222	c = 0xa13;		// from GURMUKHI LETTER OO
223	while (c <= 0xa28)	// ..to GURMUKHI LETTER NA
224		charset[i++] = c++;
225	c = 0xa2a;		// from GURMUKHI LETTER PA
226	while (c <= 0xa30)	// ..to GURMUKHI LETTER RA
227		charset[i++] = c++;
228	charset[i++] = 0xa32;	// GURMUKHI LETTER LA
229	charset[i++] = 0xa33;	// GURMUKHI LETTER LLA
230	charset[i++] = 0xa35;	// GURMUKHI LETTER VA
231	charset[i++] = 0xa36;	// GURMUKHI LETTER SHA
232	charset[i++] = 0xa38;	// GURMUKHI LETTER SA
233	charset[i++] = 0xa39;	// GURMUKHI LETTER HA
234	c = 0xa3e;		// from GURMUKHI VOWEL SIGN AA
235	while (c <= 0xa42)	// ..to GURMUKHI VOWEL SIGN UU
236		charset[i++] = c++;
237	charset[i++] = 0xa47;	// GURMUKHI VOWEL SIGN EE
238	charset[i++] = 0xa48;	// GURMUKHI VOWEL SIGN AI
239	charset[i++] = 0xa4b;	// GURMUKHI VOWEL SIGN OO
240	charset[i++] = 0xa4d;	// GURMUKHI SIGN VIRAMA
241	c = 0xa59;		// from GURMUKHI LETTER KHHA
242	while (c <= 0xa5c)	// ..to GURMUKHI LETTER RRA
243		charset[i++] = c++;
244	c = 0xa66;		// from GURMUKHI DIGIT ZERO
245	while (c <= 0xa76)	// ..to GURMUKHI ABBREVIATION SIGN
246		charset[i++] = c++;
247// 0A80..0AFF; Gujarati
248	charset[i++] = 0xa81;	// GUJARATI SIGN CANDRABINDU
249	charset[i++] = 0xa83;	// GUJARATI SIGN VISARGA
250	c = 0xa85;		// from GUJARATI LETTER A
251	while (c <= 0xa8d)	// ..to GUJARATI VOWEL CANDRA E
252		charset[i++] = c++;
253	charset[i++] = 0xa8f;	// GUJARATI LETTER E
254	charset[i++] = 0xa91;	// GUJARATI VOWEL CANDRA O
255	c = 0xa93;		// from GUJARATI LETTER O
256	while (c <= 0xaa8)	// ..to GUJARATI LETTER NA
257		charset[i++] = c++;
258	c = 0xaaa;		// from GUJARATI LETTER PA
259	while (c <= 0xab0)	// ..to GUJARATI LETTER RA
260		charset[i++] = c++;
261	charset[i++] = 0xab2;	// GUJARATI LETTER LA
262	charset[i++] = 0xab3;	// GUJARATI LETTER LLA
263	c = 0xab5;		// from GUJARATI LETTER VA
264	while (c <= 0xab9)	// ..to GUJARATI LETTER HA
265		charset[i++] = c++;
266	c = 0xabc;		// from GUJARATI SIGN NUKTA
267	while (c <= 0xac5)	// ..to GUJARATI VOWEL SIGN CANDRA E
268		charset[i++] = c++;
269	charset[i++] = 0xac7;	// GUJARATI VOWEL SIGN E
270	charset[i++] = 0xac9;	// GUJARATI VOWEL SIGN CANDRA O
271	charset[i++] = 0xacb;	// GUJARATI VOWEL SIGN O
272	charset[i++] = 0xacd;	// GUJARATI SIGN VIRAMA
273	c = 0xae0;		// from GUJARATI LETTER VOCALIC RR
274	while (c <= 0xae3)	// ..to GUJARATI VOWEL SIGN VOCALIC LL
275		charset[i++] = c++;
276	c = 0xae6;		// from GUJARATI DIGIT ZERO
277	while (c <= 0xaf1)	// ..to GUJARATI RUPEE SIGN
278		charset[i++] = c++;
279	c = 0xaf9;		// from GUJARATI LETTER ZHA
280	while (c <= 0xaff)	// ..to GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
281		charset[i++] = c++;
282// 0B00..0B7F; Oriya
283	charset[i++] = 0xb01;	// ORIYA SIGN CANDRABINDU
284	charset[i++] = 0xb03;	// ORIYA SIGN VISARGA
285	c = 0xb05;		// from ORIYA LETTER A
286	while (c <= 0xb0c)	// ..to ORIYA LETTER VOCALIC L
287		charset[i++] = c++;
288	charset[i++] = 0xb0f;	// ORIYA LETTER E
289	charset[i++] = 0xb10;	// ORIYA LETTER AI
290	c = 0xb13;		// from ORIYA LETTER O
291	while (c <= 0xb28)	// ..to ORIYA LETTER NA
292		charset[i++] = c++;
293	c = 0xb2a;		// from ORIYA LETTER PA
294	while (c <= 0xb30)	// ..to ORIYA LETTER RA
295		charset[i++] = c++;
296	charset[i++] = 0xb32;	// ORIYA LETTER LA
297	charset[i++] = 0xb33;	// ORIYA LETTER LLA
298	c = 0xb35;		// from ORIYA LETTER VA
299	while (c <= 0xb39)	// ..to ORIYA LETTER HA
300		charset[i++] = c++;
301	c = 0xb3c;		// from ORIYA SIGN NUKTA
302	while (c <= 0xb44)	// ..to ORIYA VOWEL SIGN VOCALIC RR
303		charset[i++] = c++;
304	charset[i++] = 0xb47;	// ORIYA VOWEL SIGN E
305	charset[i++] = 0xb48;	// ORIYA VOWEL SIGN AI
306	charset[i++] = 0xb4b;	// ORIYA VOWEL SIGN O
307	charset[i++] = 0xb4d;	// ORIYA SIGN VIRAMA
308	charset[i++] = 0xb56;	// ORIYA AI LENGTH MARK
309	charset[i++] = 0xb57;	// ORIYA AU LENGTH MARK
310	charset[i++] = 0xb5c;	// ORIYA LETTER RRA
311	charset[i++] = 0xb5d;	// ORIYA LETTER RHA
312	c = 0xb5f;		// from ORIYA LETTER YYA
313	while (c <= 0xb63)	// ..to ORIYA VOWEL SIGN VOCALIC LL
314		charset[i++] = c++;
315	c = 0xb66;		// from ORIYA DIGIT ZERO
316	while (c <= 0xb77)	// ..to ORIYA FRACTION THREE SIXTEENTHS
317		charset[i++] = c++;
318// 0B80..0BFF; Tamil
319	charset[i++] = 0xb82;	// TAMIL SIGN ANUSVARA
320	charset[i++] = 0xb83;	// TAMIL SIGN VISARGA
321	c = 0xb85;		// from TAMIL LETTER A
322	while (c <= 0xb8a)	// ..to TAMIL LETTER UU
323		charset[i++] = c++;
324	charset[i++] = 0xb8e;	// TAMIL LETTER E
325	charset[i++] = 0xb90;	// TAMIL LETTER AI
326	c = 0xb92;		// from TAMIL LETTER O
327	while (c <= 0xb95)	// ..to TAMIL LETTER KA
328		charset[i++] = c++;
329	charset[i++] = 0xb99;	// TAMIL LETTER NGA
330	charset[i++] = 0xb9a;	// TAMIL LETTER CA
331	charset[i++] = 0xb9e;	// TAMIL LETTER NYA
332	charset[i++] = 0xb9f;	// TAMIL LETTER TTA
333	charset[i++] = 0xba3;	// TAMIL LETTER NNA
334	charset[i++] = 0xba4;	// TAMIL LETTER TA
335	charset[i++] = 0xba8;	// TAMIL LETTER NA
336	charset[i++] = 0xbaa;	// TAMIL LETTER PA
337	c = 0xbae;		// from TAMIL LETTER MA
338	while (c <= 0xbb9)	// ..to TAMIL LETTER HA
339		charset[i++] = c++;
340	c = 0xbbe;		// from TAMIL VOWEL SIGN AA
341	while (c <= 0xbc2)	// ..to TAMIL VOWEL SIGN UU
342		charset[i++] = c++;
343	charset[i++] = 0xbc6;	// TAMIL VOWEL SIGN E
344	charset[i++] = 0xbc8;	// TAMIL VOWEL SIGN AI
345	c = 0xbca;		// from TAMIL VOWEL SIGN O
346	while (c <= 0xbcd)	// ..to TAMIL SIGN VIRAMA
347		charset[i++] = c++;
348	c = 0xbe6;		// from TAMIL DIGIT ZERO
349	while (c <= 0xbfa)	// ..to TAMIL NUMBER SIGN
350		charset[i++] = c++;
351// 0C00..0C7F; Telugu
352	c = 0xc00;		// from TELUGU SIGN COMBINING CANDRABINDU ABOVE
353	while (c <= 0xc0c)	// ..to TELUGU LETTER VOCALIC L
354		charset[i++] = c++;
355	charset[i++] = 0xc0e;	// TELUGU LETTER E
356	charset[i++] = 0xc10;	// TELUGU LETTER AI
357	c = 0xc12;		// from TELUGU LETTER O
358	while (c <= 0xc28)	// ..to TELUGU LETTER NA
359		charset[i++] = c++;
360	c = 0xc2a;		// from TELUGU LETTER PA
361	while (c <= 0xc39)	// ..to TELUGU LETTER HA
362		charset[i++] = c++;
363	c = 0xc3d;		// from TELUGU SIGN AVAGRAHA
364	while (c <= 0xc44)	// ..to TELUGU VOWEL SIGN VOCALIC RR
365		charset[i++] = c++;
366	charset[i++] = 0xc46;	// TELUGU VOWEL SIGN E
367	charset[i++] = 0xc48;	// TELUGU VOWEL SIGN AI
368	c = 0xc4a;		// from TELUGU VOWEL SIGN O
369	while (c <= 0xc4d)	// ..to TELUGU SIGN VIRAMA
370		charset[i++] = c++;
371	charset[i++] = 0xc55;	// TELUGU LENGTH MARK
372	charset[i++] = 0xc56;	// TELUGU AI LENGTH MARK
373	charset[i++] = 0xc58;	// TELUGU LETTER TSA
374	charset[i++] = 0xc5a;	// TELUGU LETTER RRRA
375	c = 0xc60;		// from TELUGU LETTER VOCALIC RR
376	while (c <= 0xc63)	// ..to TELUGU VOWEL SIGN VOCALIC LL
377		charset[i++] = c++;
378	c = 0xc66;		// from TELUGU DIGIT ZERO
379	while (c <= 0xc6f)	// ..to TELUGU DIGIT NINE
380		charset[i++] = c++;
381	c = 0xc78;		// from TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR
382	while (c <= 0xc7f)	// ..to TELUGU SIGN TUUMU
383		charset[i++] = c++;
384// 0C80..0CFF; Kannada
385	c = 0xc80;		// from KANNADA SIGN SPACING CANDRABINDU
386	while (c <= 0xc8c)	// ..to KANNADA LETTER VOCALIC L
387		charset[i++] = c++;
388	charset[i++] = 0xc8e;	// KANNADA LETTER E
389	charset[i++] = 0xc90;	// KANNADA LETTER AI
390	c = 0xc92;		// from KANNADA LETTER O
391	while (c <= 0xca8)	// ..to KANNADA LETTER NA
392		charset[i++] = c++;
393	c = 0xcaa;		// from KANNADA LETTER PA
394	while (c <= 0xcb3)	// ..to KANNADA LETTER LLA
395		charset[i++] = c++;
396	c = 0xcb5;		// from KANNADA LETTER VA
397	while (c <= 0xcb9)	// ..to KANNADA LETTER HA
398		charset[i++] = c++;
399	c = 0xcbc;		// from KANNADA SIGN NUKTA
400	while (c <= 0xcc4)	// ..to KANNADA VOWEL SIGN VOCALIC RR
401		charset[i++] = c++;
402	charset[i++] = 0xcc6;	// KANNADA VOWEL SIGN E
403	charset[i++] = 0xcc8;	// KANNADA VOWEL SIGN AI
404	c = 0xcca;		// from KANNADA VOWEL SIGN O
405	while (c <= 0xccd)	// ..to KANNADA SIGN VIRAMA
406		charset[i++] = c++;
407	charset[i++] = 0xcd5;	// KANNADA LENGTH MARK
408	charset[i++] = 0xcd6;	// KANNADA AI LENGTH MARK
409	c = 0xce0;		// from KANNADA LETTER VOCALIC RR
410	while (c <= 0xce3)	// ..to KANNADA VOWEL SIGN VOCALIC LL
411		charset[i++] = c++;
412	c = 0xce6;		// from KANNADA DIGIT ZERO
413	while (c <= 0xcef)	// ..to KANNADA DIGIT NINE
414		charset[i++] = c++;
415	charset[i++] = 0xcf1;	// KANNADA SIGN JIHVAMULIYA
416	charset[i++] = 0xcf2;	// KANNADA SIGN UPADHMANIYA
417// 0D00..0D7F; Malayalam
418	c = 0xd00;		// from MALAYALAM SIGN COMBINING ANUSVARA ABOVE
419	while (c <= 0xd03)	// ..to MALAYALAM SIGN VISARGA
420		charset[i++] = c++;
421	c = 0xd05;		// from MALAYALAM LETTER A
422	while (c <= 0xd0c)	// ..to MALAYALAM LETTER VOCALIC L
423		charset[i++] = c++;
424	charset[i++] = 0xd0e;	// MALAYALAM LETTER E
425	charset[i++] = 0xd10;	// MALAYALAM LETTER AI
426	c = 0xd12;		// from MALAYALAM LETTER O
427	while (c <= 0xd44)	// ..to MALAYALAM VOWEL SIGN VOCALIC RR
428		charset[i++] = c++;
429	charset[i++] = 0xd46;	// MALAYALAM VOWEL SIGN E
430	charset[i++] = 0xd48;	// MALAYALAM VOWEL SIGN AI
431	c = 0xd4a;		// from MALAYALAM VOWEL SIGN O
432	while (c <= 0xd4f)	// ..to MALAYALAM SIGN PARA
433		charset[i++] = c++;
434	c = 0xd54;		// from MALAYALAM LETTER CHILLU M
435	while (c <= 0xd63)	// ..to MALAYALAM VOWEL SIGN VOCALIC LL
436		charset[i++] = c++;
437	c = 0xd66;		// from MALAYALAM DIGIT ZERO
438	while (c <= 0xd7f)	// ..to MALAYALAM LETTER CHILLU K
439		charset[i++] = c++;
440// 0D80..0DFF; Sinhala
441	charset[i++] = 0xd82;	// SINHALA SIGN ANUSVARAYA
442	charset[i++] = 0xd83;	// SINHALA SIGN VISARGAYA
443	c = 0xd85;		// from SINHALA LETTER AYANNA
444	while (c <= 0xd96)	// ..to SINHALA LETTER AUYANNA
445		charset[i++] = c++;
446	c = 0xd9a;		// from SINHALA LETTER ALPAPRAANA KAYANNA
447	while (c <= 0xdb1)	// ..to SINHALA LETTER DANTAJA NAYANNA
448		charset[i++] = c++;
449	c = 0xdb3;		// from SINHALA LETTER SANYAKA DAYANNA
450	while (c <= 0xdbb)	// ..to SINHALA LETTER RAYANNA
451		charset[i++] = c++;
452	c = 0xdc0;		// from SINHALA LETTER VAYANNA
453	while (c <= 0xdc6)	// ..to SINHALA LETTER FAYANNA
454		charset[i++] = c++;
455	c = 0xdcf;		// from SINHALA VOWEL SIGN AELA-PILLA
456	while (c <= 0xdd4)	// ..to SINHALA VOWEL SIGN KETTI PAA-PILLA
457		charset[i++] = c++;
458	c = 0xdd8;		// from SINHALA VOWEL SIGN GAETTA-PILLA
459	while (c <= 0xddf)	// ..to SINHALA VOWEL SIGN GAYANUKITTA
460		charset[i++] = c++;
461	c = 0xde6;		// from SINHALA LITH DIGIT ZERO
462	while (c <= 0xdef)	// ..to SINHALA LITH DIGIT NINE
463		charset[i++] = c++;
464	charset[i++] = 0xdf2;	// SINHALA VOWEL SIGN DIGA GAETTA-PILLA
465	charset[i++] = 0xdf4;	// SINHALA PUNCTUATION KUNDDALIYA
466// 0E00..0E7F; Thai
467	c = 0xe01;		// from THAI CHARACTER KO KAI
468	while (c <= 0xe3a)	// ..to THAI CHARACTER PHINTHU
469		charset[i++] = c++;
470	c = 0xe3f;		// from THAI CURRENCY SYMBOL BAHT
471	while (c <= 0xe5b)	// ..to THAI CHARACTER KHOMUT
472		charset[i++] = c++;
473// 0E80..0EFF; Lao
474	charset[i++] = 0xe81;	// LAO LETTER KO
475	charset[i++] = 0xe82;	// LAO LETTER KHO SUNG
476	charset[i++] = 0xe87;	// LAO LETTER NGO
477	charset[i++] = 0xe88;	// LAO LETTER CO
478	c = 0xe94;		// from LAO LETTER DO
479	while (c <= 0xe97)	// ..to LAO LETTER THO TAM
480		charset[i++] = c++;
481	c = 0xe99;		// from LAO LETTER NO
482	while (c <= 0xe9f)	// ..to LAO LETTER FO SUNG
483		charset[i++] = c++;
484	charset[i++] = 0xea1;	// LAO LETTER MO
485	charset[i++] = 0xea3;	// LAO LETTER LO LING
486	charset[i++] = 0xeaa;	// LAO LETTER SO SUNG
487	charset[i++] = 0xeab;	// LAO LETTER HO SUNG
488	c = 0xead;		// from LAO LETTER O
489	while (c <= 0xeb9)	// ..to LAO VOWEL SIGN UU
490		charset[i++] = c++;
491	charset[i++] = 0xebb;	// LAO VOWEL SIGN MAI KON
492	charset[i++] = 0xebd;	// LAO SEMIVOWEL SIGN NYO
493	c = 0xec0;		// from LAO VOWEL SIGN E
494	while (c <= 0xec4)	// ..to LAO VOWEL SIGN AI
495		charset[i++] = c++;
496	c = 0xec8;		// from LAO TONE MAI EK
497	while (c <= 0xecd)	// ..to LAO NIGGAHITA
498		charset[i++] = c++;
499	c = 0xed0;		// from LAO DIGIT ZERO
500	while (c <= 0xed9)	// ..to LAO DIGIT NINE
501		charset[i++] = c++;
502	c = 0xedc;		// from LAO HO NO
503	while (c <= 0xedf)	// ..to LAO LETTER KHMU NYO
504		charset[i++] = c++;
505// 0F00..0FFF; Tibetan
506	c = 0xf00;		// from TIBETAN SYLLABLE OM
507	while (c <= 0xf47)	// ..to TIBETAN LETTER JA
508		charset[i++] = c++;
509	c = 0xf49;		// from TIBETAN LETTER NYA
510	while (c <= 0xf6c)	// ..to TIBETAN LETTER RRA
511		charset[i++] = c++;
512	c = 0xf71;		// from TIBETAN VOWEL SIGN AA
513	while (c <= 0xf97)	// ..to TIBETAN SUBJOINED LETTER JA
514		charset[i++] = c++;
515	c = 0xf99;		// from TIBETAN SUBJOINED LETTER NYA
516	while (c <= 0xfbc)	// ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA
517		charset[i++] = c++;
518	c = 0xfbe;		// from TIBETAN KU RU KHA
519	while (c <= 0xfcc)	// ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL
520		charset[i++] = c++;
521	c = 0xfce;		// from TIBETAN SIGN RDEL NAG RDEL DKAR
522	while (c <= 0xfda)	// ..to TIBETAN MARK TRAILING MCHAN RTAGS
523		charset[i++] = c++;
524// 1000..109F; Myanmar
525	c = 0x1000;		// from MYANMAR LETTER KA
526	while (c <= 0x109f)	// ..to MYANMAR SYMBOL SHAN EXCLAMATION
527		charset[i++] = c++;
528// 10A0..10FF; Georgian
529	c = 0x10a0;		// from GEORGIAN CAPITAL LETTER AN
530	while (c <= 0x10c5)	// ..to GEORGIAN CAPITAL LETTER HOE
531		charset[i++] = c++;
532	c = 0x10d0;		// from GEORGIAN LETTER AN
533	while (c <= 0x10ff)	// ..to GEORGIAN LETTER LABIAL SIGN
534		charset[i++] = c++;
535// 1100..11FF; Hangul Jamo
536	c = 0x1100;		// from HANGUL CHOSEONG KIYEOK
537	while (c <= 0x11ff)	// ..to HANGUL JONGSEONG SSANGNIEUN
538		charset[i++] = c++;
539// 1200..137F; Ethiopic
540	c = 0x1200;		// from ETHIOPIC SYLLABLE HA
541	while (c <= 0x1248)	// ..to ETHIOPIC SYLLABLE QWA
542		charset[i++] = c++;
543	c = 0x124a;		// from ETHIOPIC SYLLABLE QWI
544	while (c <= 0x124d)	// ..to ETHIOPIC SYLLABLE QWE
545		charset[i++] = c++;
546	c = 0x1250;		// from ETHIOPIC SYLLABLE QHA
547	while (c <= 0x1256)	// ..to ETHIOPIC SYLLABLE QHO
548		charset[i++] = c++;
549	c = 0x125a;		// from ETHIOPIC SYLLABLE QHWI
550	while (c <= 0x125d)	// ..to ETHIOPIC SYLLABLE QHWE
551		charset[i++] = c++;
552	c = 0x1260;		// from ETHIOPIC SYLLABLE BA
553	while (c <= 0x1288)	// ..to ETHIOPIC SYLLABLE XWA
554		charset[i++] = c++;
555	c = 0x128a;		// from ETHIOPIC SYLLABLE XWI
556	while (c <= 0x128d)	// ..to ETHIOPIC SYLLABLE XWE
557		charset[i++] = c++;
558	c = 0x1290;		// from ETHIOPIC SYLLABLE NA
559	while (c <= 0x12b0)	// ..to ETHIOPIC SYLLABLE KWA
560		charset[i++] = c++;
561	c = 0x12b2;		// from ETHIOPIC SYLLABLE KWI
562	while (c <= 0x12b5)	// ..to ETHIOPIC SYLLABLE KWE
563		charset[i++] = c++;
564	c = 0x12b8;		// from ETHIOPIC SYLLABLE KXA
565	while (c <= 0x12be)	// ..to ETHIOPIC SYLLABLE KXO
566		charset[i++] = c++;
567	c = 0x12c2;		// from ETHIOPIC SYLLABLE KXWI
568	while (c <= 0x12c5)	// ..to ETHIOPIC SYLLABLE KXWE
569		charset[i++] = c++;
570	c = 0x12c8;		// from ETHIOPIC SYLLABLE WA
571	while (c <= 0x12d6)	// ..to ETHIOPIC SYLLABLE PHARYNGEAL O
572		charset[i++] = c++;
573	c = 0x12d8;		// from ETHIOPIC SYLLABLE ZA
574	while (c <= 0x1310)	// ..to ETHIOPIC SYLLABLE GWA
575		charset[i++] = c++;
576	c = 0x1312;		// from ETHIOPIC SYLLABLE GWI
577	while (c <= 0x1315)	// ..to ETHIOPIC SYLLABLE GWE
578		charset[i++] = c++;
579	c = 0x1318;		// from ETHIOPIC SYLLABLE GGA
580	while (c <= 0x135a)	// ..to ETHIOPIC SYLLABLE FYA
581		charset[i++] = c++;
582	c = 0x135d;		// from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK
583	while (c <= 0x137c)	// ..to ETHIOPIC NUMBER TEN THOUSAND
584		charset[i++] = c++;
585// 1380..139F; Ethiopic Supplement
586	c = 0x1380;		// from ETHIOPIC SYLLABLE SEBATBEIT MWA
587	while (c <= 0x1399)	// ..to ETHIOPIC TONAL MARK KURT
588		charset[i++] = c++;
589// 13A0..13FF; Cherokee
590	c = 0x13a0;		// from CHEROKEE LETTER A
591	while (c <= 0x13f5)	// ..to CHEROKEE LETTER MV
592		charset[i++] = c++;
593	c = 0x13f8;		// from CHEROKEE SMALL LETTER YE
594	while (c <= 0x13fd)	// ..to CHEROKEE SMALL LETTER MV
595		charset[i++] = c++;
596// 1400..167F; Unified Canadian Aboriginal Syllabics
597	c = 0x1400;		// from CANADIAN SYLLABICS HYPHEN
598	while (c <= 0x167f)	// ..to CANADIAN SYLLABICS BLACKFOOT W
599		charset[i++] = c++;
600// 1680..169F; Ogham
601	c = 0x1680;		// from OGHAM SPACE MARK
602	while (c <= 0x169c)	// ..to OGHAM REVERSED FEATHER MARK
603		charset[i++] = c++;
604// 16A0..16FF; Runic
605	c = 0x16a0;		// from RUNIC LETTER FEHU FEOH FE F
606	while (c <= 0x16f8)	// ..to RUNIC LETTER FRANKS CASKET AESC
607		charset[i++] = c++;
608// 1700..171F; Tagalog
609	c = 0x1700;		// from TAGALOG LETTER A
610	while (c <= 0x170c)	// ..to TAGALOG LETTER YA
611		charset[i++] = c++;
612	c = 0x170e;		// from TAGALOG LETTER LA
613	while (c <= 0x1714)	// ..to TAGALOG SIGN VIRAMA
614		charset[i++] = c++;
615// 1720..173F; Hanunoo
616	c = 0x1720;		// from HANUNOO LETTER A
617	while (c <= 0x1736)	// ..to PHILIPPINE DOUBLE PUNCTUATION
618		charset[i++] = c++;
619// 1740..175F; Buhid
620	c = 0x1740;		// from BUHID LETTER A
621	while (c <= 0x1753)	// ..to BUHID VOWEL SIGN U
622		charset[i++] = c++;
623// 1760..177F; Tagbanwa
624	c = 0x1760;		// from TAGBANWA LETTER A
625	while (c <= 0x176c)	// ..to TAGBANWA LETTER YA
626		charset[i++] = c++;
627	charset[i++] = 0x176e;	// TAGBANWA LETTER LA
628	charset[i++] = 0x1770;	// TAGBANWA LETTER SA
629	charset[i++] = 0x1772;	// TAGBANWA VOWEL SIGN I
630	charset[i++] = 0x1773;	// TAGBANWA VOWEL SIGN U
631// 1780..17FF; Khmer
632	c = 0x1780;		// from KHMER LETTER KA
633	while (c <= 0x17dd)	// ..to KHMER SIGN ATTHACAN
634		charset[i++] = c++;
635	c = 0x17e0;		// from KHMER DIGIT ZERO
636	while (c <= 0x17e9)	// ..to KHMER DIGIT NINE
637		charset[i++] = c++;
638	c = 0x17f0;		// from KHMER SYMBOL LEK ATTAK SON
639	while (c <= 0x17f9)	// ..to KHMER SYMBOL LEK ATTAK PRAM-BUON
640		charset[i++] = c++;
641// 1800..18AF; Mongolian
642	c = 0x1800;		// from MONGOLIAN BIRGA
643	while (c <= 0x180e)	// ..to MONGOLIAN VOWEL SEPARATOR
644		charset[i++] = c++;
645	c = 0x1810;		// from MONGOLIAN DIGIT ZERO
646	while (c <= 0x1819)	// ..to MONGOLIAN DIGIT NINE
647		charset[i++] = c++;
648	c = 0x1820;		// from MONGOLIAN LETTER A
649	while (c <= 0x1878)	// ..to MONGOLIAN LETTER CHA WITH TWO DOTS
650		charset[i++] = c++;
651	c = 0x1880;		// from MONGOLIAN LETTER ALI GALI ANUSVARA ONE
652	while (c <= 0x18aa)	// ..to MONGOLIAN LETTER MANCHU ALI GALI LHA
653		charset[i++] = c++;
654// 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
655	c = 0x18b0;		// from CANADIAN SYLLABICS OY
656	while (c <= 0x18f5)	// ..to CANADIAN SYLLABICS CARRIER DENTAL S
657		charset[i++] = c++;
658// 1900..194F; Limbu
659	c = 0x1900;		// from LIMBU VOWEL-CARRIER LETTER
660	while (c <= 0x191e)	// ..to LIMBU LETTER TRA
661		charset[i++] = c++;
662	c = 0x1920;		// from LIMBU VOWEL SIGN A
663	while (c <= 0x192b)	// ..to LIMBU SUBJOINED LETTER WA
664		charset[i++] = c++;
665	c = 0x1930;		// from LIMBU SMALL LETTER KA
666	while (c <= 0x193b)	// ..to LIMBU SIGN SA-I
667		charset[i++] = c++;
668	c = 0x1944;		// from LIMBU EXCLAMATION MARK
669	while (c <= 0x194f)	// ..to LIMBU DIGIT NINE
670		charset[i++] = c++;
671// 1950..197F; Tai Le
672	c = 0x1950;		// from TAI LE LETTER KA
673	while (c <= 0x196d)	// ..to TAI LE LETTER AI
674		charset[i++] = c++;
675	c = 0x1970;		// from TAI LE LETTER TONE-2
676	while (c <= 0x1974)	// ..to TAI LE LETTER TONE-6
677		charset[i++] = c++;
678// 1980..19DF; New Tai Lue
679	c = 0x1980;		// from NEW TAI LUE LETTER HIGH QA
680	while (c <= 0x19ab)	// ..to NEW TAI LUE LETTER LOW SUA
681		charset[i++] = c++;
682	c = 0x19b0;		// from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER
683	while (c <= 0x19c9)	// ..to NEW TAI LUE TONE MARK-2
684		charset[i++] = c++;
685	c = 0x19d0;		// from NEW TAI LUE DIGIT ZERO
686	while (c <= 0x19da)	// ..to NEW TAI LUE THAM DIGIT ONE
687		charset[i++] = c++;
688	charset[i++] = 0x19de;	// NEW TAI LUE SIGN LAE
689	charset[i++] = 0x19df;	// NEW TAI LUE SIGN LAEV
690// 19E0..19FF; Khmer Symbols
691	c = 0x19e0;		// from KHMER SYMBOL PATHAMASAT
692	while (c <= 0x19ff)	// ..to KHMER SYMBOL DAP-PRAM ROC
693		charset[i++] = c++;
694// 1A00..1A1F; Buginese
695	c = 0x1a00;		// from BUGINESE LETTER KA
696	while (c <= 0x1a1b)	// ..to BUGINESE VOWEL SIGN AE
697		charset[i++] = c++;
698	charset[i++] = 0x1a1e;	// BUGINESE PALLAWA
699	charset[i++] = 0x1a1f;	// BUGINESE END OF SECTION
700// 1A20..1AAF; Tai Tham
701	c = 0x1a20;		// from TAI THAM LETTER HIGH KA
702	while (c <= 0x1a5e)	// ..to TAI THAM CONSONANT SIGN SA
703		charset[i++] = c++;
704	c = 0x1a60;		// from TAI THAM SIGN SAKOT
705	while (c <= 0x1a7c)	// ..to TAI THAM SIGN KHUEN-LUE KARAN
706		charset[i++] = c++;
707	c = 0x1a7f;		// from TAI THAM COMBINING CRYPTOGRAMMIC DOT
708	while (c <= 0x1a89)	// ..to TAI THAM HORA DIGIT NINE
709		charset[i++] = c++;
710	c = 0x1a90;		// from TAI THAM THAM DIGIT ZERO
711	while (c <= 0x1a99)	// ..to TAI THAM THAM DIGIT NINE
712		charset[i++] = c++;
713	c = 0x1aa0;		// from TAI THAM SIGN WIANG
714	while (c <= 0x1aad)	// ..to TAI THAM SIGN CAANG
715		charset[i++] = c++;
716// 1AB0..1AFF; Combining Diacritical Marks Extended
717	c = 0x1ab0;		// from COMBINING DOUBLED CIRCUMFLEX ACCENT
718	while (c <= 0x1abe)	// ..to COMBINING PARENTHESES OVERLAY
719		charset[i++] = c++;
720// 1B00..1B7F; Balinese
721	c = 0x1b00;		// from BALINESE SIGN ULU RICEM
722	while (c <= 0x1b4b)	// ..to BALINESE LETTER ASYURA SASAK
723		charset[i++] = c++;
724	c = 0x1b50;		// from BALINESE DIGIT ZERO
725	while (c <= 0x1b7c)	// ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
726		charset[i++] = c++;
727// 1B80..1BBF; Sundanese
728	c = 0x1b80;		// from SUNDANESE SIGN PANYECEK
729	while (c <= 0x1bbf)	// ..to SUNDANESE LETTER FINAL M
730		charset[i++] = c++;
731// 1BC0..1BFF; Batak
732	c = 0x1bc0;		// from BATAK LETTER A
733	while (c <= 0x1bf3)	// ..to BATAK PANONGONAN
734		charset[i++] = c++;
735	c = 0x1bfc;		// from BATAK SYMBOL BINDU NA METEK
736	while (c <= 0x1bff)	// ..to BATAK SYMBOL BINDU PANGOLAT
737		charset[i++] = c++;
738// 1C00..1C4F; Lepcha
739	c = 0x1c00;		// from LEPCHA LETTER KA
740	while (c <= 0x1c37)	// ..to LEPCHA SIGN NUKTA
741		charset[i++] = c++;
742	c = 0x1c3b;		// from LEPCHA PUNCTUATION TA-ROL
743	while (c <= 0x1c49)	// ..to LEPCHA DIGIT NINE
744		charset[i++] = c++;
745	charset[i++] = 0x1c4d;	// LEPCHA LETTER TTA
746	charset[i++] = 0x1c4f;	// LEPCHA LETTER DDA
747// 1C50..1C7F; Ol Chiki
748	c = 0x1c50;		// from OL CHIKI DIGIT ZERO
749	while (c <= 0x1c7f)	// ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD
750		charset[i++] = c++;
751// 1C80..1C8F; Cyrillic Extended-C
752	c = 0x1c80;		// from CYRILLIC SMALL LETTER ROUNDED VE
753	while (c <= 0x1c88)	// ..to CYRILLIC SMALL LETTER UNBLENDED UK
754		charset[i++] = c++;
755// 1C90..1CBF; Georgian Extended
756	c = 0x1c90;		// from GEORGIAN MTAVRULI CAPITAL LETTER AN
757	while (c <= 0x1cba)	// ..to GEORGIAN MTAVRULI CAPITAL LETTER AIN
758		charset[i++] = c++;
759	charset[i++] = 0x1cbd;	// GEORGIAN MTAVRULI CAPITAL LETTER AEN
760	charset[i++] = 0x1cbf;	// GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
761// 1CC0..1CCF; Sundanese Supplement
762	c = 0x1cc0;		// from SUNDANESE PUNCTUATION BINDU SURYA
763	while (c <= 0x1cc7)	// ..to SUNDANESE PUNCTUATION BINDU BA SATANGA
764		charset[i++] = c++;
765// 1CD0..1CFF; Vedic Extensions
766	c = 0x1cd0;		// from VEDIC TONE KARSHANA
767	while (c <= 0x1cf9)	// ..to VEDIC TONE DOUBLE RING ABOVE
768		charset[i++] = c++;
769// 1D00..1D7F; Phonetic Extensions
770	c = 0x1d00;		// from LATIN LETTER SMALL CAPITAL A
771	while (c <= 0x1d7f)	// ..to LATIN SMALL LETTER UPSILON WITH STROKE
772		charset[i++] = c++;
773// 1D80..1DBF; Phonetic Extensions Supplement
774	c = 0x1d80;		// from LATIN SMALL LETTER B WITH PALATAL HOOK
775	while (c <= 0x1dbf)	// ..to MODIFIER LETTER SMALL THETA
776		charset[i++] = c++;
777// 1DC0..1DFF; Combining Diacritical Marks Supplement
778	c = 0x1dc0;		// from COMBINING DOTTED GRAVE ACCENT
779	while (c <= 0x1df9)	// ..to COMBINING WIDE INVERTED BRIDGE BELOW
780		charset[i++] = c++;
781	c = 0x1dfb;		// from COMBINING DELETION MARK
782	while (c <= 0x1dff)	// ..to COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
783		charset[i++] = c++;
784// 1E00..1EFF; Latin Extended Additional
785	c = 0x1e00;		// from LATIN CAPITAL LETTER A WITH RING BELOW
786	while (c <= 0x1eff)	// ..to LATIN SMALL LETTER Y WITH LOOP
787		charset[i++] = c++;
788// 1F00..1FFF; Greek Extended
789	c = 0x1f00;		// from GREEK SMALL LETTER ALPHA WITH PSILI
790	while (c <= 0x1f15)	// ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
791		charset[i++] = c++;
792	c = 0x1f18;		// from GREEK CAPITAL LETTER EPSILON WITH PSILI
793	while (c <= 0x1f1d)	// ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
794		charset[i++] = c++;
795	c = 0x1f20;		// from GREEK SMALL LETTER ETA WITH PSILI
796	while (c <= 0x1f45)	// ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
797		charset[i++] = c++;
798	c = 0x1f48;		// from GREEK CAPITAL LETTER OMICRON WITH PSILI
799	while (c <= 0x1f4d)	// ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
800		charset[i++] = c++;
801	c = 0x1f50;		// from GREEK SMALL LETTER UPSILON WITH PSILI
802	while (c <= 0x1f57)	// ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
803		charset[i++] = c++;
804	c = 0x1f5f;		// from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
805	while (c <= 0x1f7d)	// ..to GREEK SMALL LETTER OMEGA WITH OXIA
806		charset[i++] = c++;
807	c = 0x1f80;		// from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
808	while (c <= 0x1fb4)	// ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
809		charset[i++] = c++;
810	c = 0x1fb6;		// from GREEK SMALL LETTER ALPHA WITH PERISPOMENI
811	while (c <= 0x1fc4)	// ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
812		charset[i++] = c++;
813	c = 0x1fc6;		// from GREEK SMALL LETTER ETA WITH PERISPOMENI
814	while (c <= 0x1fd3)	// ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
815		charset[i++] = c++;
816	c = 0x1fd6;		// from GREEK SMALL LETTER IOTA WITH PERISPOMENI
817	while (c <= 0x1fdb)	// ..to GREEK CAPITAL LETTER IOTA WITH OXIA
818		charset[i++] = c++;
819	c = 0x1fdd;		// from GREEK DASIA AND VARIA
820	while (c <= 0x1fef)	// ..to GREEK VARIA
821		charset[i++] = c++;
822	charset[i++] = 0x1ff2;	// GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
823	charset[i++] = 0x1ff4;	// GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
824	c = 0x1ff6;		// from GREEK SMALL LETTER OMEGA WITH PERISPOMENI
825	while (c <= 0x1ffe)	// ..to GREEK DASIA
826		charset[i++] = c++;
827// 2000..206F; General Punctuation
828	c = 0x2000;		// from EN QUAD
829	while (c <= 0x2064)	// ..to INVISIBLE PLUS
830		charset[i++] = c++;
831	c = 0x2066;		// from LEFT-TO-RIGHT ISOLATE
832	while (c <= 0x206f)	// ..to NOMINAL DIGIT SHAPES
833		charset[i++] = c++;
834// 2070..209F; Superscripts and Subscripts
835	charset[i++] = 0x2070;	// SUPERSCRIPT ZERO
836	charset[i++] = 0x2071;	// SUPERSCRIPT LATIN SMALL LETTER I
837	c = 0x2074;		// from SUPERSCRIPT FOUR
838	while (c <= 0x208e)	// ..to SUBSCRIPT RIGHT PARENTHESIS
839		charset[i++] = c++;
840	c = 0x2090;		// from LATIN SUBSCRIPT SMALL LETTER A
841	while (c <= 0x209c)	// ..to LATIN SUBSCRIPT SMALL LETTER T
842		charset[i++] = c++;
843// 20A0..20CF; Currency Symbols
844	c = 0x20a0;		// from EURO-CURRENCY SIGN
845	while (c <= 0x20bf)	// ..to BITCOIN SIGN
846		charset[i++] = c++;
847// 20D0..20FF; Combining Diacritical Marks for Symbols
848	c = 0x20d0;		// from COMBINING LEFT HARPOON ABOVE
849	while (c <= 0x20f0)	// ..to COMBINING ASTERISK ABOVE
850		charset[i++] = c++;
851// 2100..214F; Letterlike Symbols
852	c = 0x2100;		// from ACCOUNT OF
853	while (c <= 0x214f)	// ..to SYMBOL FOR SAMARITAN SOURCE
854		charset[i++] = c++;
855// 2150..218F; Number Forms
856	c = 0x2150;		// from VULGAR FRACTION ONE SEVENTH
857	while (c <= 0x218b)	// ..to TURNED DIGIT THREE
858		charset[i++] = c++;
859// 2190..21FF; Arrows
860	c = 0x2190;		// from LEFTWARDS ARROW
861	while (c <= 0x21ff)	// ..to LEFT RIGHT OPEN-HEADED ARROW
862		charset[i++] = c++;
863// 2200..22FF; Mathematical Operators
864	c = 0x2200;		// from FOR ALL
865	while (c <= 0x22ff)	// ..to Z NOTATION BAG MEMBERSHIP
866		charset[i++] = c++;
867// 2300..23FF; Miscellaneous Technical
868	c = 0x2300;		// from DIAMETER SIGN
869	while (c <= 0x23ff)	// ..to OBSERVER EYE SYMBOL
870		charset[i++] = c++;
871// 2400..243F; Control Pictures
872	c = 0x2400;		// from SYMBOL FOR NULL
873	while (c <= 0x2426)	// ..to SYMBOL FOR SUBSTITUTE FORM TWO
874		charset[i++] = c++;
875// 2440..245F; Optical Character Recognition
876	c = 0x2440;		// from OCR HOOK
877	while (c <= 0x244a)	// ..to OCR DOUBLE BACKSLASH
878		charset[i++] = c++;
879// 2460..24FF; Enclosed Alphanumerics
880	c = 0x2460;		// from CIRCLED DIGIT ONE
881	while (c <= 0x24ff)	// ..to NEGATIVE CIRCLED DIGIT ZERO
882		charset[i++] = c++;
883// 2500..257F; Box Drawing
884	c = 0x2500;		// from BOX DRAWINGS LIGHT HORIZONTAL
885	while (c <= 0x257f)	// ..to BOX DRAWINGS HEAVY UP AND LIGHT DOWN
886		charset[i++] = c++;
887// 2580..259F; Block Elements
888	c = 0x2580;		// from UPPER HALF BLOCK
889	while (c <= 0x259f)	// ..to QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT
890		charset[i++] = c++;
891// 25A0..25FF; Geometric Shapes
892	c = 0x25a0;		// from BLACK SQUARE
893	while (c <= 0x25ff)	// ..to LOWER RIGHT TRIANGLE
894		charset[i++] = c++;
895// 2600..26FF; Miscellaneous Symbols
896	c = 0x2600;		// from BLACK SUN WITH RAYS
897	while (c <= 0x26ff)	// ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
898		charset[i++] = c++;
899// 2700..27BF; Dingbats
900	c = 0x2700;		// from BLACK SAFETY SCISSORS
901	while (c <= 0x27bf)	// ..to DOUBLE CURLY LOOP
902		charset[i++] = c++;
903// 27C0..27EF; Miscellaneous Mathematical Symbols-A
904	c = 0x27c0;		// from THREE DIMENSIONAL ANGLE
905	while (c <= 0x27ef)	// ..to MATHEMATICAL RIGHT FLATTENED PARENTHESIS
906		charset[i++] = c++;
907// 27F0..27FF; Supplemental Arrows-A
908	c = 0x27f0;		// from UPWARDS QUADRUPLE ARROW
909	while (c <= 0x27ff)	// ..to LONG RIGHTWARDS SQUIGGLE ARROW
910		charset[i++] = c++;
911// 2800..28FF; Braille Patterns
912	c = 0x2800;		// from BRAILLE PATTERN BLANK
913	while (c <= 0x28ff)	// ..to BRAILLE PATTERN DOTS-12345678
914		charset[i++] = c++;
915// 2900..297F; Supplemental Arrows-B
916	c = 0x2900;		// from RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE
917	while (c <= 0x297f)	// ..to DOWN FISH TAIL
918		charset[i++] = c++;
919// 2980..29FF; Miscellaneous Mathematical Symbols-B
920	c = 0x2980;		// from TRIPLE VERTICAL BAR DELIMITER
921	while (c <= 0x29ff)	// ..to MINY
922		charset[i++] = c++;
923// 2A00..2AFF; Supplemental Mathematical Operators
924	c = 0x2a00;		// from N-ARY CIRCLED DOT OPERATOR
925	while (c <= 0x2aff)	// ..to N-ARY WHITE VERTICAL BAR
926		charset[i++] = c++;
927// 2B00..2BFF; Miscellaneous Symbols and Arrows
928	c = 0x2b00;		// from NORTH EAST WHITE ARROW
929	while (c <= 0x2b73)	// ..to DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
930		charset[i++] = c++;
931	c = 0x2b76;		// from NORTH WEST TRIANGLE-HEADED ARROW TO BAR
932	while (c <= 0x2b95)	// ..to RIGHTWARDS BLACK ARROW
933		charset[i++] = c++;
934	c = 0x2b98;		// from THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD
935	while (c <= 0x2bc8)	// ..to BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
936		charset[i++] = c++;
937	c = 0x2bca;		// from TOP HALF BLACK CIRCLE
938	while (c <= 0x2bfe)	// ..to REVERSED RIGHT ANGLE
939		charset[i++] = c++;
940// 2C00..2C5F; Glagolitic
941	c = 0x2c00;		// from GLAGOLITIC CAPITAL LETTER AZU
942	while (c <= 0x2c2e)	// ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
943		charset[i++] = c++;
944	c = 0x2c30;		// from GLAGOLITIC SMALL LETTER AZU
945	while (c <= 0x2c5e)	// ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE
946		charset[i++] = c++;
947// 2C60..2C7F; Latin Extended-C
948	c = 0x2c60;		// from LATIN CAPITAL LETTER L WITH DOUBLE BAR
949	while (c <= 0x2c7f)	// ..to LATIN CAPITAL LETTER Z WITH SWASH TAIL
950		charset[i++] = c++;
951// 2C80..2CFF; Coptic
952	c = 0x2c80;		// from COPTIC CAPITAL LETTER ALFA
953	while (c <= 0x2cf3)	// ..to COPTIC SMALL LETTER BOHAIRIC KHEI
954		charset[i++] = c++;
955	c = 0x2cf9;		// from COPTIC OLD NUBIAN FULL STOP
956	while (c <= 0x2cff)	// ..to COPTIC MORPHOLOGICAL DIVIDER
957		charset[i++] = c++;
958// 2D00..2D2F; Georgian Supplement
959	c = 0x2d00;		// from GEORGIAN SMALL LETTER AN
960	while (c <= 0x2d25)	// ..to GEORGIAN SMALL LETTER HOE
961		charset[i++] = c++;
962	c = 0x2d27;		// from GEORGIAN SMALL LETTER YN
963	while (c <= 0x2d2d)	// ..to GEORGIAN SMALL LETTER AEN
964		charset[i++] = c++;
965// 2D30..2D7F; Tifinagh
966	c = 0x2d30;		// from TIFINAGH LETTER YA
967	while (c <= 0x2d67)	// ..to TIFINAGH LETTER YO
968		charset[i++] = c++;
969	charset[i++] = 0x2d6f;	// TIFINAGH MODIFIER LETTER LABIALIZATION MARK
970	charset[i++] = 0x2d70;	// TIFINAGH SEPARATOR MARK
971	charset[i++] = 0x2d7f;	// TIFINAGH CONSONANT JOINER
972// 2D80..2DDF; Ethiopic Extended
973	c = 0x2d80;		// from ETHIOPIC SYLLABLE LOA
974	while (c <= 0x2d96)	// ..to ETHIOPIC SYLLABLE GGWE
975		charset[i++] = c++;
976	c = 0x2da0;		// from ETHIOPIC SYLLABLE SSA
977	while (c <= 0x2da6)	// ..to ETHIOPIC SYLLABLE SSO
978		charset[i++] = c++;
979	c = 0x2da8;		// from ETHIOPIC SYLLABLE CCA
980	while (c <= 0x2dae)	// ..to ETHIOPIC SYLLABLE CCO
981		charset[i++] = c++;
982	c = 0x2db0;		// from ETHIOPIC SYLLABLE ZZA
983	while (c <= 0x2db6)	// ..to ETHIOPIC SYLLABLE ZZO
984		charset[i++] = c++;
985	c = 0x2db8;		// from ETHIOPIC SYLLABLE CCHA
986	while (c <= 0x2dbe)	// ..to ETHIOPIC SYLLABLE CCHO
987		charset[i++] = c++;
988	c = 0x2dc0;		// from ETHIOPIC SYLLABLE QYA
989	while (c <= 0x2dc6)	// ..to ETHIOPIC SYLLABLE QYO
990		charset[i++] = c++;
991	c = 0x2dc8;		// from ETHIOPIC SYLLABLE KYA
992	while (c <= 0x2dce)	// ..to ETHIOPIC SYLLABLE KYO
993		charset[i++] = c++;
994	c = 0x2dd0;		// from ETHIOPIC SYLLABLE XYA
995	while (c <= 0x2dd6)	// ..to ETHIOPIC SYLLABLE XYO
996		charset[i++] = c++;
997	c = 0x2dd8;		// from ETHIOPIC SYLLABLE GYA
998	while (c <= 0x2dde)	// ..to ETHIOPIC SYLLABLE GYO
999		charset[i++] = c++;
1000// 2DE0..2DFF; Cyrillic Extended-A
1001	c = 0x2de0;		// from COMBINING CYRILLIC LETTER BE
1002	while (c <= 0x2dff)	// ..to COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
1003		charset[i++] = c++;
1004// 2E00..2E7F; Supplemental Punctuation
1005	c = 0x2e00;		// from RIGHT ANGLE SUBSTITUTION MARKER
1006	while (c <= 0x2e4e)	// ..to PUNCTUS ELEVATUS MARK
1007		charset[i++] = c++;
1008// 2E80..2EFF; CJK Radicals Supplement
1009	c = 0x2e80;		// from CJK RADICAL REPEAT
1010	while (c <= 0x2e99)	// ..to CJK RADICAL RAP
1011		charset[i++] = c++;
1012	c = 0x2e9b;		// from CJK RADICAL CHOKE
1013	while (c <= 0x2ef3)	// ..to CJK RADICAL C-SIMPLIFIED TURTLE
1014		charset[i++] = c++;
1015// 2F00..2FDF; Kangxi Radicals
1016	c = 0x2f00;		// from KANGXI RADICAL ONE
1017	while (c <= 0x2fd5)	// ..to KANGXI RADICAL FLUTE
1018		charset[i++] = c++;
1019// 2FF0..2FFF; Ideographic Description Characters
1020	c = 0x2ff0;		// from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT
1021	while (c <= 0x2ffb)	// ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
1022		charset[i++] = c++;
1023// 3000..303F; CJK Symbols and Punctuation
1024	c = 0x3000;		// from IDEOGRAPHIC SPACE
1025	while (c <= 0x303f)	// ..to IDEOGRAPHIC HALF FILL SPACE
1026		charset[i++] = c++;
1027// 3040..309F; Hiragana
1028	c = 0x3041;		// from HIRAGANA LETTER SMALL A
1029	while (c <= 0x3096)	// ..to HIRAGANA LETTER SMALL KE
1030		charset[i++] = c++;
1031	c = 0x3099;		// from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
1032	while (c <= 0x309f)	// ..to HIRAGANA DIGRAPH YORI
1033		charset[i++] = c++;
1034// 30A0..30FF; Katakana
1035	c = 0x30a0;		// from KATAKANA-HIRAGANA DOUBLE HYPHEN
1036	while (c <= 0x30ff)	// ..to KATAKANA DIGRAPH KOTO
1037		charset[i++] = c++;
1038// 3100..312F; Bopomofo
1039	c = 0x3105;		// from BOPOMOFO LETTER B
1040	while (c <= 0x312f)	// ..to BOPOMOFO LETTER NN
1041		charset[i++] = c++;
1042// 3130..318F; Hangul Compatibility Jamo
1043	c = 0x3131;		// from HANGUL LETTER KIYEOK
1044	while (c <= 0x318e)	// ..to HANGUL LETTER ARAEAE
1045		charset[i++] = c++;
1046// 3190..319F; Kanbun
1047	c = 0x3190;		// from IDEOGRAPHIC ANNOTATION LINKING MARK
1048	while (c <= 0x319f)	// ..to IDEOGRAPHIC ANNOTATION MAN MARK
1049		charset[i++] = c++;
1050// 31A0..31BF; Bopomofo Extended
1051	c = 0x31a0;		// from BOPOMOFO LETTER BU
1052	while (c <= 0x31ba)	// ..to BOPOMOFO LETTER ZY
1053		charset[i++] = c++;
1054// 31C0..31EF; CJK Strokes
1055	c = 0x31c0;		// from CJK STROKE T
1056	while (c <= 0x31e3)	// ..to CJK STROKE Q
1057		charset[i++] = c++;
1058// 31F0..31FF; Katakana Phonetic Extensions
1059	c = 0x31f0;		// from KATAKANA LETTER SMALL KU
1060	while (c <= 0x31ff)	// ..to KATAKANA LETTER SMALL RO
1061		charset[i++] = c++;
1062// 3200..32FF; Enclosed CJK Letters and Months
1063	c = 0x3200;		// from PARENTHESIZED HANGUL KIYEOK
1064	while (c <= 0x321e)	// ..to PARENTHESIZED KOREAN CHARACTER O HU
1065		charset[i++] = c++;
1066	c = 0x3220;		// from PARENTHESIZED IDEOGRAPH ONE
1067	while (c <= 0x32fe)	// ..to CIRCLED KATAKANA WO
1068		charset[i++] = c++;
1069// 3300..33FF; CJK Compatibility
1070	c = 0x3300;		// from SQUARE APAATO
1071	while (c <= 0x33ff)	// ..to SQUARE GAL
1072		charset[i++] = c++;
1073// 3400..4DBF; CJK Unified Ideographs Extension A
1074	c = 0x3400;		// from <CJK Ideograph Extension A, First>
1075	while (c <= 0x4db5)	// ..to <CJK Ideograph Extension A, Last>
1076		charset[i++] = c++;
1077// 4DC0..4DFF; Yijing Hexagram Symbols
1078	c = 0x4dc0;		// from HEXAGRAM FOR THE CREATIVE HEAVEN
1079	while (c <= 0x4dff)	// ..to HEXAGRAM FOR BEFORE COMPLETION
1080		charset[i++] = c++;
1081// 4E00..9FFF; CJK Unified Ideographs
1082	c = 0x4e00;		// from <CJK Ideograph, First>
1083	while (c <= 0x9fef)	// ..to <CJK Ideograph, Last>
1084		charset[i++] = c++;
1085// A000..A48F; Yi Syllables
1086	c = 0xa000;		// from YI SYLLABLE IT
1087	while (c <= 0xa48c)	// ..to YI SYLLABLE YYR
1088		charset[i++] = c++;
1089// A490..A4CF; Yi Radicals
1090	c = 0xa490;		// from YI RADICAL QOT
1091	while (c <= 0xa4c6)	// ..to YI RADICAL KE
1092		charset[i++] = c++;
1093// A4D0..A4FF; Lisu
1094	c = 0xa4d0;		// from LISU LETTER BA
1095	while (c <= 0xa4ff)	// ..to LISU PUNCTUATION FULL STOP
1096		charset[i++] = c++;
1097// A500..A63F; Vai
1098	c = 0xa500;		// from VAI SYLLABLE EE
1099	while (c <= 0xa62b)	// ..to VAI SYLLABLE NDOLE DO
1100		charset[i++] = c++;
1101// A640..A69F; Cyrillic Extended-B
1102	c = 0xa640;		// from CYRILLIC CAPITAL LETTER ZEMLYA
1103	while (c <= 0xa69f)	// ..to COMBINING CYRILLIC LETTER IOTIFIED E
1104		charset[i++] = c++;
1105// A6A0..A6FF; Bamum
1106	c = 0xa6a0;		// from BAMUM LETTER A
1107	while (c <= 0xa6f7)	// ..to BAMUM QUESTION MARK
1108		charset[i++] = c++;
1109// A700..A71F; Modifier Tone Letters
1110	c = 0xa700;		// from MODIFIER LETTER CHINESE TONE YIN PING
1111	while (c <= 0xa71f)	// ..to MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
1112		charset[i++] = c++;
1113// A720..A7FF; Latin Extended-D
1114	c = 0xa720;		// from MODIFIER LETTER STRESS AND HIGH TONE
1115	while (c <= 0xa7b9)	// ..to LATIN SMALL LETTER U WITH STROKE
1116		charset[i++] = c++;
1117	c = 0xa7f7;		// from LATIN EPIGRAPHIC LETTER SIDEWAYS I
1118	while (c <= 0xa7ff)	// ..to LATIN EPIGRAPHIC LETTER ARCHAIC M
1119		charset[i++] = c++;
1120// A800..A82F; Syloti Nagri
1121	c = 0xa800;		// from SYLOTI NAGRI LETTER A
1122	while (c <= 0xa82b)	// ..to SYLOTI NAGRI POETRY MARK-4
1123		charset[i++] = c++;
1124// A830..A83F; Common Indic Number Forms
1125	c = 0xa830;		// from NORTH INDIC FRACTION ONE QUARTER
1126	while (c <= 0xa839)	// ..to NORTH INDIC QUANTITY MARK
1127		charset[i++] = c++;
1128// A840..A87F; Phags-pa
1129	c = 0xa840;		// from PHAGS-PA LETTER KA
1130	while (c <= 0xa877)	// ..to PHAGS-PA MARK DOUBLE SHAD
1131		charset[i++] = c++;
1132// A880..A8DF; Saurashtra
1133	c = 0xa880;		// from SAURASHTRA SIGN ANUSVARA
1134	while (c <= 0xa8c5)	// ..to SAURASHTRA SIGN CANDRABINDU
1135		charset[i++] = c++;
1136	c = 0xa8ce;		// from SAURASHTRA DANDA
1137	while (c <= 0xa8d9)	// ..to SAURASHTRA DIGIT NINE
1138		charset[i++] = c++;
1139// A8E0..A8FF; Devanagari Extended
1140	c = 0xa8e0;		// from COMBINING DEVANAGARI DIGIT ZERO
1141	while (c <= 0xa8ff)	// ..to DEVANAGARI VOWEL SIGN AY
1142		charset[i++] = c++;
1143// A900..A92F; Kayah Li
1144	c = 0xa900;		// from KAYAH LI DIGIT ZERO
1145	while (c <= 0xa92f)	// ..to KAYAH LI SIGN SHYA
1146		charset[i++] = c++;
1147// A930..A95F; Rejang
1148	c = 0xa930;		// from REJANG LETTER KA
1149	while (c <= 0xa953)	// ..to REJANG VIRAMA
1150		charset[i++] = c++;
1151	charset[i++] = 0xa95f;	// REJANG SECTION MARK
1152// A960..A97F; Hangul Jamo Extended-A
1153	c = 0xa960;		// from HANGUL CHOSEONG TIKEUT-MIEUM
1154	while (c <= 0xa97c)	// ..to HANGUL CHOSEONG SSANGYEORINHIEUH
1155		charset[i++] = c++;
1156// A980..A9DF; Javanese
1157	c = 0xa980;		// from JAVANESE SIGN PANYANGGA
1158	while (c <= 0xa9cd)	// ..to JAVANESE TURNED PADA PISELEH
1159		charset[i++] = c++;
1160	c = 0xa9cf;		// from JAVANESE PANGRANGKEP
1161	while (c <= 0xa9d9)	// ..to JAVANESE DIGIT NINE
1162		charset[i++] = c++;
1163	charset[i++] = 0xa9de;	// JAVANESE PADA TIRTA TUMETES
1164	charset[i++] = 0xa9df;	// JAVANESE PADA ISEN-ISEN
1165// A9E0..A9FF; Myanmar Extended-B
1166	c = 0xa9e0;		// from MYANMAR LETTER SHAN GHA
1167	while (c <= 0xa9fe)	// ..to MYANMAR LETTER TAI LAING BHA
1168		charset[i++] = c++;
1169// AA00..AA5F; Cham
1170	c = 0xaa00;		// from CHAM LETTER A
1171	while (c <= 0xaa36)	// ..to CHAM CONSONANT SIGN WA
1172		charset[i++] = c++;
1173	c = 0xaa40;		// from CHAM LETTER FINAL K
1174	while (c <= 0xaa4d)	// ..to CHAM CONSONANT SIGN FINAL H
1175		charset[i++] = c++;
1176	c = 0xaa50;		// from CHAM DIGIT ZERO
1177	while (c <= 0xaa59)	// ..to CHAM DIGIT NINE
1178		charset[i++] = c++;
1179	c = 0xaa5c;		// from CHAM PUNCTUATION SPIRAL
1180	while (c <= 0xaa5f)	// ..to CHAM PUNCTUATION TRIPLE DANDA
1181		charset[i++] = c++;
1182// AA60..AA7F; Myanmar Extended-A
1183	c = 0xaa60;		// from MYANMAR LETTER KHAMTI GA
1184	while (c <= 0xaa7f)	// ..to MYANMAR LETTER SHWE PALAUNG SHA
1185		charset[i++] = c++;
1186// AA80..AADF; Tai Viet
1187	c = 0xaa80;		// from TAI VIET LETTER LOW KO
1188	while (c <= 0xaac2)	// ..to TAI VIET TONE MAI SONG
1189		charset[i++] = c++;
1190	c = 0xaadb;		// from TAI VIET SYMBOL KON
1191	while (c <= 0xaadf)	// ..to TAI VIET SYMBOL KOI KOI
1192		charset[i++] = c++;
1193// AAE0..AAFF; Meetei Mayek Extensions
1194	c = 0xaae0;		// from MEETEI MAYEK LETTER E
1195	while (c <= 0xaaf6)	// ..to MEETEI MAYEK VIRAMA
1196		charset[i++] = c++;
1197// AB00..AB2F; Ethiopic Extended-A
1198	c = 0xab01;		// from ETHIOPIC SYLLABLE TTHU
1199	while (c <= 0xab06)	// ..to ETHIOPIC SYLLABLE TTHO
1200		charset[i++] = c++;
1201	c = 0xab09;		// from ETHIOPIC SYLLABLE DDHU
1202	while (c <= 0xab0e)	// ..to ETHIOPIC SYLLABLE DDHO
1203		charset[i++] = c++;
1204	c = 0xab11;		// from ETHIOPIC SYLLABLE DZU
1205	while (c <= 0xab16)	// ..to ETHIOPIC SYLLABLE DZO
1206		charset[i++] = c++;
1207	c = 0xab20;		// from ETHIOPIC SYLLABLE CCHHA
1208	while (c <= 0xab26)	// ..to ETHIOPIC SYLLABLE CCHHO
1209		charset[i++] = c++;
1210	c = 0xab28;		// from ETHIOPIC SYLLABLE BBA
1211	while (c <= 0xab2e)	// ..to ETHIOPIC SYLLABLE BBO
1212		charset[i++] = c++;
1213// AB30..AB6F; Latin Extended-E
1214	c = 0xab30;		// from LATIN SMALL LETTER BARRED ALPHA
1215	while (c <= 0xab65)	// ..to GREEK LETTER SMALL CAPITAL OMEGA
1216		charset[i++] = c++;
1217// AB70..ABBF; Cherokee Supplement
1218	c = 0xab70;		// from CHEROKEE SMALL LETTER A
1219	while (c <= 0xabbf)	// ..to CHEROKEE SMALL LETTER YA
1220		charset[i++] = c++;
1221// ABC0..ABFF; Meetei Mayek
1222	c = 0xabc0;		// from MEETEI MAYEK LETTER KOK
1223	while (c <= 0xabed)	// ..to MEETEI MAYEK APUN IYEK
1224		charset[i++] = c++;
1225	c = 0xabf0;		// from MEETEI MAYEK DIGIT ZERO
1226	while (c <= 0xabf9)	// ..to MEETEI MAYEK DIGIT NINE
1227		charset[i++] = c++;
1228// AC00..D7AF; Hangul Syllables
1229	c = 0xac00;		// from <Hangul Syllable, First>
1230	while (c <= 0xd7a3)	// ..to <Hangul Syllable, Last>
1231		charset[i++] = c++;
1232// D7B0..D7FF; Hangul Jamo Extended-B
1233	c = 0xd7b0;		// from HANGUL JUNGSEONG O-YEO
1234	while (c <= 0xd7c6)	// ..to HANGUL JUNGSEONG ARAEA-E
1235		charset[i++] = c++;
1236	c = 0xd7cb;		// from HANGUL JONGSEONG NIEUN-RIEUL
1237	while (c <= 0xd7fb)	// ..to HANGUL JONGSEONG PHIEUPH-THIEUTH
1238		charset[i++] = c++;
1239// D800..DB7F; High Surrogates
1240// DB80..DBFF; High Private Use Surrogates
1241// DC00..DFFF; Low Surrogates
1242// E000..F8FF; Private Use Area
1243// F900..FAFF; CJK Compatibility Ideographs
1244	c = 0xf900;		// from CJK COMPATIBILITY IDEOGRAPH-F900
1245	while (c <= 0xfa6d)	// ..to CJK COMPATIBILITY IDEOGRAPH-FA6D
1246		charset[i++] = c++;
1247	c = 0xfa70;		// from CJK COMPATIBILITY IDEOGRAPH-FA70
1248	while (c <= 0xfad9)	// ..to CJK COMPATIBILITY IDEOGRAPH-FAD9
1249		charset[i++] = c++;
1250// FB00..FB4F; Alphabetic Presentation Forms
1251	c = 0xfb00;		// from LATIN SMALL LIGATURE FF
1252	while (c <= 0xfb06)	// ..to LATIN SMALL LIGATURE ST
1253		charset[i++] = c++;
1254	c = 0xfb13;		// from ARMENIAN SMALL LIGATURE MEN NOW
1255	while (c <= 0xfb17)	// ..to ARMENIAN SMALL LIGATURE MEN XEH
1256		charset[i++] = c++;
1257	c = 0xfb1d;		// from HEBREW LETTER YOD WITH HIRIQ
1258	while (c <= 0xfb36)	// ..to HEBREW LETTER ZAYIN WITH DAGESH
1259		charset[i++] = c++;
1260	c = 0xfb38;		// from HEBREW LETTER TET WITH DAGESH
1261	while (c <= 0xfb3c)	// ..to HEBREW LETTER LAMED WITH DAGESH
1262		charset[i++] = c++;
1263	charset[i++] = 0xfb40;	// HEBREW LETTER NUN WITH DAGESH
1264	charset[i++] = 0xfb41;	// HEBREW LETTER SAMEKH WITH DAGESH
1265	charset[i++] = 0xfb43;	// HEBREW LETTER FINAL PE WITH DAGESH
1266	charset[i++] = 0xfb44;	// HEBREW LETTER PE WITH DAGESH
1267	c = 0xfb46;		// from HEBREW LETTER TSADI WITH DAGESH
1268	while (c <= 0xfb4f)	// ..to HEBREW LIGATURE ALEF LAMED
1269		charset[i++] = c++;
1270// FB50..FDFF; Arabic Presentation Forms-A
1271	c = 0xfb50;		// from ARABIC LETTER ALEF WASLA ISOLATED FORM
1272	while (c <= 0xfbc1)	// ..to ARABIC SYMBOL SMALL TAH BELOW
1273		charset[i++] = c++;
1274	c = 0xfbd3;		// from ARABIC LETTER NG ISOLATED FORM
1275	while (c <= 0xfd3f)	// ..to ORNATE RIGHT PARENTHESIS
1276		charset[i++] = c++;
1277	c = 0xfd50;		// from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM
1278	while (c <= 0xfd8f)	// ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
1279		charset[i++] = c++;
1280	c = 0xfd92;		// from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
1281	while (c <= 0xfdc7)	// ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
1282		charset[i++] = c++;
1283	c = 0xfdf0;		// from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM
1284	while (c <= 0xfdfd)	// ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
1285		charset[i++] = c++;
1286// FE00..FE0F; Variation Selectors
1287	c = 0xfe00;		// from VARIATION SELECTOR-1
1288	while (c <= 0xfe0f)	// ..to VARIATION SELECTOR-16
1289		charset[i++] = c++;
1290// FE10..FE1F; Vertical Forms
1291	c = 0xfe10;		// from PRESENTATION FORM FOR VERTICAL COMMA
1292	while (c <= 0xfe19)	// ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
1293		charset[i++] = c++;
1294// FE20..FE2F; Combining Half Marks
1295	c = 0xfe20;		// from COMBINING LIGATURE LEFT HALF
1296	while (c <= 0xfe2f)	// ..to COMBINING CYRILLIC TITLO RIGHT HALF
1297		charset[i++] = c++;
1298// FE30..FE4F; CJK Compatibility Forms
1299	c = 0xfe30;		// from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
1300	while (c <= 0xfe4f)	// ..to WAVY LOW LINE
1301		charset[i++] = c++;
1302// FE50..FE6F; Small Form Variants
1303	charset[i++] = 0xfe50;	// SMALL COMMA
1304	charset[i++] = 0xfe52;	// SMALL FULL STOP
1305	c = 0xfe54;		// from SMALL SEMICOLON
1306	while (c <= 0xfe66)	// ..to SMALL EQUALS SIGN
1307		charset[i++] = c++;
1308	c = 0xfe68;		// from SMALL REVERSE SOLIDUS
1309	while (c <= 0xfe6b)	// ..to SMALL COMMERCIAL AT
1310		charset[i++] = c++;
1311// FE70..FEFF; Arabic Presentation Forms-B
1312	c = 0xfe70;		// from ARABIC FATHATAN ISOLATED FORM
1313	while (c <= 0xfe74)	// ..to ARABIC KASRATAN ISOLATED FORM
1314		charset[i++] = c++;
1315	c = 0xfe76;		// from ARABIC FATHA ISOLATED FORM
1316	while (c <= 0xfefc)	// ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM
1317		charset[i++] = c++;
1318	charset[i++] = 0xfeff;	// ZERO WIDTH NO-BREAK SPACE
1319// FF00..FFEF; Halfwidth and Fullwidth Forms
1320	c = 0xff01;		// from FULLWIDTH EXCLAMATION MARK
1321	while (c <= 0xffbe)	// ..to HALFWIDTH HANGUL LETTER HIEUH
1322		charset[i++] = c++;
1323	c = 0xffc2;		// from HALFWIDTH HANGUL LETTER A
1324	while (c <= 0xffc7)	// ..to HALFWIDTH HANGUL LETTER E
1325		charset[i++] = c++;
1326	c = 0xffca;		// from HALFWIDTH HANGUL LETTER YEO
1327	while (c <= 0xffcf)	// ..to HALFWIDTH HANGUL LETTER OE
1328		charset[i++] = c++;
1329	c = 0xffd2;		// from HALFWIDTH HANGUL LETTER YO
1330	while (c <= 0xffd7)	// ..to HALFWIDTH HANGUL LETTER YU
1331		charset[i++] = c++;
1332	charset[i++] = 0xffda;	// HALFWIDTH HANGUL LETTER EU
1333	charset[i++] = 0xffdc;	// HALFWIDTH HANGUL LETTER I
1334	c = 0xffe0;		// from FULLWIDTH CENT SIGN
1335	while (c <= 0xffe6)	// ..to FULLWIDTH WON SIGN
1336		charset[i++] = c++;
1337	c = 0xffe8;		// from HALFWIDTH FORMS LIGHT VERTICAL
1338	while (c <= 0xffee)	// ..to HALFWIDTH WHITE CIRCLE
1339		charset[i++] = c++;
1340// FFF0..FFFF; Specials
1341	c = 0xfff9;		// from INTERLINEAR ANNOTATION ANCHOR
1342	while (c <= 0xfffd)	// ..to REPLACEMENT CHARACTER
1343		charset[i++] = c++;
1344// 10000..1007F; Linear B Syllabary
1345	c = 0x10000;		// from LINEAR B SYLLABLE B008 A
1346	while (c <= 0x1000b)	// ..to LINEAR B SYLLABLE B046 JE
1347		charset[i++] = c++;
1348	c = 0x1000d;		// from LINEAR B SYLLABLE B036 JO
1349	while (c <= 0x10026)	// ..to LINEAR B SYLLABLE B032 QO
1350		charset[i++] = c++;
1351	c = 0x10028;		// from LINEAR B SYLLABLE B060 RA
1352	while (c <= 0x1003a)	// ..to LINEAR B SYLLABLE B042 WO
1353		charset[i++] = c++;
1354	charset[i++] = 0x1003c;	// LINEAR B SYLLABLE B017 ZA
1355	charset[i++] = 0x1003d;	// LINEAR B SYLLABLE B074 ZE
1356	c = 0x1003f;		// from LINEAR B SYLLABLE B020 ZO
1357	while (c <= 0x1004d)	// ..to LINEAR B SYLLABLE B091 TWO
1358		charset[i++] = c++;
1359	c = 0x10050;		// from LINEAR B SYMBOL B018
1360	while (c <= 0x1005d)	// ..to LINEAR B SYMBOL B089
1361		charset[i++] = c++;
1362// 10080..100FF; Linear B Ideograms
1363	c = 0x10080;		// from LINEAR B IDEOGRAM B100 MAN
1364	while (c <= 0x100fa)	// ..to LINEAR B IDEOGRAM VESSEL B305
1365		charset[i++] = c++;
1366// 10100..1013F; Aegean Numbers
1367	charset[i++] = 0x10100;	// AEGEAN WORD SEPARATOR LINE
1368	charset[i++] = 0x10102;	// AEGEAN CHECK MARK
1369	c = 0x10107;		// from AEGEAN NUMBER ONE
1370	while (c <= 0x10133)	// ..to AEGEAN NUMBER NINETY THOUSAND
1371		charset[i++] = c++;
1372	c = 0x10137;		// from AEGEAN WEIGHT BASE UNIT
1373	while (c <= 0x1013f)	// ..to AEGEAN MEASURE THIRD SUBUNIT
1374		charset[i++] = c++;
1375// 10140..1018F; Ancient Greek Numbers
1376	c = 0x10140;		// from GREEK ACROPHONIC ATTIC ONE QUARTER
1377	while (c <= 0x1018e)	// ..to NOMISMA SIGN
1378		charset[i++] = c++;
1379// 10190..101CF; Ancient Symbols
1380	c = 0x10190;		// from ROMAN SEXTANS SIGN
1381	while (c <= 0x1019b)	// ..to ROMAN CENTURIAL SIGN
1382		charset[i++] = c++;
1383	charset[i++] = 0x101a0;	// GREEK SYMBOL TAU RHO
1384// 101D0..101FF; Phaistos Disc
1385	c = 0x101d0;		// from PHAISTOS DISC SIGN PEDESTRIAN
1386	while (c <= 0x101fd)	// ..to PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
1387		charset[i++] = c++;
1388// 10280..1029F; Lycian
1389	c = 0x10280;		// from LYCIAN LETTER A
1390	while (c <= 0x1029c)	// ..to LYCIAN LETTER X
1391		charset[i++] = c++;
1392// 102A0..102DF; Carian
1393	c = 0x102a0;		// from CARIAN LETTER A
1394	while (c <= 0x102d0)	// ..to CARIAN LETTER UUU3
1395		charset[i++] = c++;
1396// 102E0..102FF; Coptic Epact Numbers
1397	c = 0x102e0;		// from COPTIC EPACT THOUSANDS MARK
1398	while (c <= 0x102fb)	// ..to COPTIC EPACT NUMBER NINE HUNDRED
1399		charset[i++] = c++;
1400// 10300..1032F; Old Italic
1401	c = 0x10300;		// from OLD ITALIC LETTER A
1402	while (c <= 0x10323)	// ..to OLD ITALIC NUMERAL FIFTY
1403		charset[i++] = c++;
1404	charset[i++] = 0x1032d;	// OLD ITALIC LETTER YE
1405	charset[i++] = 0x1032f;	// OLD ITALIC LETTER SOUTHERN TSE
1406// 10330..1034F; Gothic
1407	c = 0x10330;		// from GOTHIC LETTER AHSA
1408	while (c <= 0x1034a)	// ..to GOTHIC LETTER NINE HUNDRED
1409		charset[i++] = c++;
1410// 10350..1037F; Old Permic
1411	c = 0x10350;		// from OLD PERMIC LETTER AN
1412	while (c <= 0x1037a)	// ..to COMBINING OLD PERMIC LETTER SII
1413		charset[i++] = c++;
1414// 10380..1039F; Ugaritic
1415	c = 0x10380;		// from UGARITIC LETTER ALPA
1416	while (c <= 0x1039d)	// ..to UGARITIC LETTER SSU
1417		charset[i++] = c++;
1418	charset[i++] = 0x1039f;	// UGARITIC WORD DIVIDER
1419// 103A0..103DF; Old Persian
1420	c = 0x103a0;		// from OLD PERSIAN SIGN A
1421	while (c <= 0x103c3)	// ..to OLD PERSIAN SIGN HA
1422		charset[i++] = c++;
1423	c = 0x103c8;		// from OLD PERSIAN SIGN AURAMAZDAA
1424	while (c <= 0x103d5)	// ..to OLD PERSIAN NUMBER HUNDRED
1425		charset[i++] = c++;
1426// 10400..1044F; Deseret
1427	c = 0x10400;		// from DESERET CAPITAL LETTER LONG I
1428	while (c <= 0x1044f)	// ..to DESERET SMALL LETTER EW
1429		charset[i++] = c++;
1430// 10450..1047F; Shavian
1431	c = 0x10450;		// from SHAVIAN LETTER PEEP
1432	while (c <= 0x1047f)	// ..to SHAVIAN LETTER YEW
1433		charset[i++] = c++;
1434// 10480..104AF; Osmanya
1435	c = 0x10480;		// from OSMANYA LETTER ALEF
1436	while (c <= 0x1049d)	// ..to OSMANYA LETTER OO
1437		charset[i++] = c++;
1438	c = 0x104a0;		// from OSMANYA DIGIT ZERO
1439	while (c <= 0x104a9)	// ..to OSMANYA DIGIT NINE
1440		charset[i++] = c++;
1441// 104B0..104FF; Osage
1442	c = 0x104b0;		// from OSAGE CAPITAL LETTER A
1443	while (c <= 0x104d3)	// ..to OSAGE CAPITAL LETTER ZHA
1444		charset[i++] = c++;
1445	c = 0x104d8;		// from OSAGE SMALL LETTER A
1446	while (c <= 0x104fb)	// ..to OSAGE SMALL LETTER ZHA
1447		charset[i++] = c++;
1448// 10500..1052F; Elbasan
1449	c = 0x10500;		// from ELBASAN LETTER A
1450	while (c <= 0x10527)	// ..to ELBASAN LETTER KHE
1451		charset[i++] = c++;
1452// 10530..1056F; Caucasian Albanian
1453	c = 0x10530;		// from CAUCASIAN ALBANIAN LETTER ALT
1454	while (c <= 0x10563)	// ..to CAUCASIAN ALBANIAN LETTER KIW
1455		charset[i++] = c++;
1456	charset[i++] = 0x1056f;	// CAUCASIAN ALBANIAN CITATION MARK
1457// 10600..1077F; Linear A
1458	c = 0x10600;		// from LINEAR A SIGN AB001
1459	while (c <= 0x10736)	// ..to LINEAR A SIGN A664
1460		charset[i++] = c++;
1461	c = 0x10740;		// from LINEAR A SIGN A701 A
1462	while (c <= 0x10755)	// ..to LINEAR A SIGN A732 JE
1463		charset[i++] = c++;
1464	c = 0x10760;		// from LINEAR A SIGN A800
1465	while (c <= 0x10767)	// ..to LINEAR A SIGN A807
1466		charset[i++] = c++;
1467// 10800..1083F; Cypriot Syllabary
1468	c = 0x10800;		// from CYPRIOT SYLLABLE A
1469	while (c <= 0x10805)	// ..to CYPRIOT SYLLABLE JA
1470		charset[i++] = c++;
1471	c = 0x1080a;		// from CYPRIOT SYLLABLE KA
1472	while (c <= 0x10835)	// ..to CYPRIOT SYLLABLE WO
1473		charset[i++] = c++;
1474	charset[i++] = 0x10837;	// CYPRIOT SYLLABLE XA
1475	charset[i++] = 0x10838;	// CYPRIOT SYLLABLE XE
1476	c = 0x1083c;		// from CYPRIOT SYLLABLE ZA
1477	while (c <= 0x1083f)	// ..to CYPRIOT SYLLABLE ZO
1478		charset[i++] = c++;
1479// 10840..1085F; Imperial Aramaic
1480	c = 0x10840;		// from IMPERIAL ARAMAIC LETTER ALEPH
1481	while (c <= 0x10855)	// ..to IMPERIAL ARAMAIC LETTER TAW
1482		charset[i++] = c++;
1483	c = 0x10857;		// from IMPERIAL ARAMAIC SECTION SIGN
1484	while (c <= 0x1085f)	// ..to IMPERIAL ARAMAIC NUMBER TEN THOUSAND
1485		charset[i++] = c++;
1486// 10860..1087F; Palmyrene
1487	c = 0x10860;		// from PALMYRENE LETTER ALEPH
1488	while (c <= 0x1087f)	// ..to PALMYRENE NUMBER TWENTY
1489		charset[i++] = c++;
1490// 10880..108AF; Nabataean
1491	c = 0x10880;		// from NABATAEAN LETTER FINAL ALEPH
1492	while (c <= 0x1089e)	// ..to NABATAEAN LETTER TAW
1493		charset[i++] = c++;
1494	c = 0x108a7;		// from NABATAEAN NUMBER ONE
1495	while (c <= 0x108af)	// ..to NABATAEAN NUMBER ONE HUNDRED
1496		charset[i++] = c++;
1497// 108E0..108FF; Hatran
1498	c = 0x108e0;		// from HATRAN LETTER ALEPH
1499	while (c <= 0x108f2)	// ..to HATRAN LETTER QOPH
1500		charset[i++] = c++;
1501	charset[i++] = 0x108f4;	// HATRAN LETTER SHIN
1502	charset[i++] = 0x108f5;	// HATRAN LETTER TAW
1503	c = 0x108fb;		// from HATRAN NUMBER ONE
1504	while (c <= 0x108ff)	// ..to HATRAN NUMBER ONE HUNDRED
1505		charset[i++] = c++;
1506// 10900..1091F; Phoenician
1507	c = 0x10900;		// from PHOENICIAN LETTER ALF
1508	while (c <= 0x1091b)	// ..to PHOENICIAN NUMBER THREE
1509		charset[i++] = c++;
1510	charset[i++] = 0x1091f;	// PHOENICIAN WORD SEPARATOR
1511// 10920..1093F; Lydian
1512	c = 0x10920;		// from LYDIAN LETTER A
1513	while (c <= 0x10939)	// ..to LYDIAN LETTER C
1514		charset[i++] = c++;
1515	charset[i++] = 0x1093f;	// LYDIAN TRIANGULAR MARK
1516// 10980..1099F; Meroitic Hieroglyphs
1517	c = 0x10980;		// from MEROITIC HIEROGLYPHIC LETTER A
1518	while (c <= 0x1099f)	// ..to MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2
1519		charset[i++] = c++;
1520// 109A0..109FF; Meroitic Cursive
1521	c = 0x109a0;		// from MEROITIC CURSIVE LETTER A
1522	while (c <= 0x109b7)	// ..to MEROITIC CURSIVE LETTER DA
1523		charset[i++] = c++;
1524	c = 0x109bc;		// from MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS
1525	while (c <= 0x109cf)	// ..to MEROITIC CURSIVE NUMBER SEVENTY
1526		charset[i++] = c++;
1527	c = 0x109d2;		// from MEROITIC CURSIVE NUMBER ONE HUNDRED
1528	while (c <= 0x109ff)	// ..to MEROITIC CURSIVE FRACTION TEN TWELFTHS
1529		charset[i++] = c++;
1530// 10A00..10A5F; Kharoshthi
1531	c = 0x10a00;		// from KHAROSHTHI LETTER A
1532	while (c <= 0x10a03)	// ..to KHAROSHTHI VOWEL SIGN VOCALIC R
1533		charset[i++] = c++;
1534	charset[i++] = 0x10a05;	// KHAROSHTHI VOWEL SIGN E
1535	charset[i++] = 0x10a06;	// KHAROSHTHI VOWEL SIGN O
1536	c = 0x10a0c;		// from KHAROSHTHI VOWEL LENGTH MARK
1537	while (c <= 0x10a13)	// ..to KHAROSHTHI LETTER GHA
1538		charset[i++] = c++;
1539	charset[i++] = 0x10a15;	// KHAROSHTHI LETTER CA
1540	charset[i++] = 0x10a17;	// KHAROSHTHI LETTER JA
1541	c = 0x10a19;		// from KHAROSHTHI LETTER NYA
1542	while (c <= 0x10a35)	// ..to KHAROSHTHI LETTER VHA
1543		charset[i++] = c++;
1544	charset[i++] = 0x10a38;	// KHAROSHTHI SIGN BAR ABOVE
1545	charset[i++] = 0x10a3a;	// KHAROSHTHI SIGN DOT BELOW
1546	c = 0x10a3f;		// from KHAROSHTHI VIRAMA
1547	while (c <= 0x10a48)	// ..to KHAROSHTHI FRACTION ONE HALF
1548		charset[i++] = c++;
1549	c = 0x10a50;		// from KHAROSHTHI PUNCTUATION DOT
1550	while (c <= 0x10a58)	// ..to KHAROSHTHI PUNCTUATION LINES
1551		charset[i++] = c++;
1552// 10A60..10A7F; Old South Arabian
1553	c = 0x10a60;		// from OLD SOUTH ARABIAN LETTER HE
1554	while (c <= 0x10a7f)	// ..to OLD SOUTH ARABIAN NUMERIC INDICATOR
1555		charset[i++] = c++;
1556// 10A80..10A9F; Old North Arabian
1557	c = 0x10a80;		// from OLD NORTH ARABIAN LETTER HEH
1558	while (c <= 0x10a9f)	// ..to OLD NORTH ARABIAN NUMBER TWENTY
1559		charset[i++] = c++;
1560// 10AC0..10AFF; Manichaean
1561	c = 0x10ac0;		// from MANICHAEAN LETTER ALEPH
1562	while (c <= 0x10ae6)	// ..to MANICHAEAN ABBREVIATION MARK BELOW
1563		charset[i++] = c++;
1564	c = 0x10aeb;		// from MANICHAEAN NUMBER ONE
1565	while (c <= 0x10af6)	// ..to MANICHAEAN PUNCTUATION LINE FILLER
1566		charset[i++] = c++;
1567// 10B00..10B3F; Avestan
1568	c = 0x10b00;		// from AVESTAN LETTER A
1569	while (c <= 0x10b35)	// ..to AVESTAN LETTER HE
1570		charset[i++] = c++;
1571	c = 0x10b39;		// from AVESTAN ABBREVIATION MARK
1572	while (c <= 0x10b3f)	// ..to LARGE ONE RING OVER TWO RINGS PUNCTUATION
1573		charset[i++] = c++;
1574// 10B40..10B5F; Inscriptional Parthian
1575	c = 0x10b40;		// from INSCRIPTIONAL PARTHIAN LETTER ALEPH
1576	while (c <= 0x10b55)	// ..to INSCRIPTIONAL PARTHIAN LETTER TAW
1577		charset[i++] = c++;
1578	c = 0x10b58;		// from INSCRIPTIONAL PARTHIAN NUMBER ONE
1579	while (c <= 0x10b5f)	// ..to INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
1580		charset[i++] = c++;
1581// 10B60..10B7F; Inscriptional Pahlavi
1582	c = 0x10b60;		// from INSCRIPTIONAL PAHLAVI LETTER ALEPH
1583	while (c <= 0x10b72)	// ..to INSCRIPTIONAL PAHLAVI LETTER TAW
1584		charset[i++] = c++;
1585	c = 0x10b78;		// from INSCRIPTIONAL PAHLAVI NUMBER ONE
1586	while (c <= 0x10b7f)	// ..to INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
1587		charset[i++] = c++;
1588// 10B80..10BAF; Psalter Pahlavi
1589	c = 0x10b80;		// from PSALTER PAHLAVI LETTER ALEPH
1590	while (c <= 0x10b91)	// ..to PSALTER PAHLAVI LETTER TAW
1591		charset[i++] = c++;
1592	c = 0x10b99;		// from PSALTER PAHLAVI SECTION MARK
1593	while (c <= 0x10b9c)	// ..to PSALTER PAHLAVI FOUR DOTS WITH DOT
1594		charset[i++] = c++;
1595	c = 0x10ba9;		// from PSALTER PAHLAVI NUMBER ONE
1596	while (c <= 0x10baf)	// ..to PSALTER PAHLAVI NUMBER ONE HUNDRED
1597		charset[i++] = c++;
1598// 10C00..10C4F; Old Turkic
1599	c = 0x10c00;		// from OLD TURKIC LETTER ORKHON A
1600	while (c <= 0x10c48)	// ..to OLD TURKIC LETTER ORKHON BASH
1601		charset[i++] = c++;
1602// 10C80..10CFF; Old Hungarian
1603	c = 0x10c80;		// from OLD HUNGARIAN CAPITAL LETTER A
1604	while (c <= 0x10cb2)	// ..to OLD HUNGARIAN CAPITAL LETTER US
1605		charset[i++] = c++;
1606	c = 0x10cc0;		// from OLD HUNGARIAN SMALL LETTER A
1607	while (c <= 0x10cf2)	// ..to OLD HUNGARIAN SMALL LETTER US
1608		charset[i++] = c++;
1609	c = 0x10cfa;		// from OLD HUNGARIAN NUMBER ONE
1610	while (c <= 0x10cff)	// ..to OLD HUNGARIAN NUMBER ONE THOUSAND
1611		charset[i++] = c++;
1612// 10D00..10D3F; Hanifi Rohingya
1613	c = 0x10d00;		// from HANIFI ROHINGYA LETTER A
1614	while (c <= 0x10d27)	// ..to HANIFI ROHINGYA SIGN TASSI
1615		charset[i++] = c++;
1616	c = 0x10d30;		// from HANIFI ROHINGYA DIGIT ZERO
1617	while (c <= 0x10d39)	// ..to HANIFI ROHINGYA DIGIT NINE
1618		charset[i++] = c++;
1619// 10E60..10E7F; Rumi Numeral Symbols
1620	c = 0x10e60;		// from RUMI DIGIT ONE
1621	while (c <= 0x10e7e)	// ..to RUMI FRACTION TWO THIRDS
1622		charset[i++] = c++;
1623// 10F00..10F2F; Old Sogdian
1624	c = 0x10f00;		// from OLD SOGDIAN LETTER ALEPH
1625	while (c <= 0x10f27)	// ..to OLD SOGDIAN LIGATURE AYIN-DALETH
1626		charset[i++] = c++;
1627// 10F30..10F6F; Sogdian
1628	c = 0x10f30;		// from SOGDIAN LETTER ALEPH
1629	while (c <= 0x10f59)	// ..to SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
1630		charset[i++] = c++;
1631// 11000..1107F; Brahmi
1632	c = 0x11000;		// from BRAHMI SIGN CANDRABINDU
1633	while (c <= 0x1104d)	// ..to BRAHMI PUNCTUATION LOTUS
1634		charset[i++] = c++;
1635	c = 0x11052;		// from BRAHMI NUMBER ONE
1636	while (c <= 0x1106f)	// ..to BRAHMI DIGIT NINE
1637		charset[i++] = c++;
1638	charset[i++] = 0x1107f;	// BRAHMI NUMBER JOINER
1639// 11080..110CF; Kaithi
1640	c = 0x11080;		// from KAITHI SIGN CANDRABINDU
1641	while (c <= 0x110c1)	// ..to KAITHI DOUBLE DANDA
1642		charset[i++] = c++;
1643	charset[i++] = 0x110cd;	// KAITHI NUMBER SIGN ABOVE
1644// 110D0..110FF; Sora Sompeng
1645	c = 0x110d0;		// from SORA SOMPENG LETTER SAH
1646	while (c <= 0x110e8)	// ..to SORA SOMPENG LETTER MAE
1647		charset[i++] = c++;
1648	c = 0x110f0;		// from SORA SOMPENG DIGIT ZERO
1649	while (c <= 0x110f9)	// ..to SORA SOMPENG DIGIT NINE
1650		charset[i++] = c++;
1651// 11100..1114F; Chakma
1652	c = 0x11100;		// from CHAKMA SIGN CANDRABINDU
1653	while (c <= 0x11134)	// ..to CHAKMA MAAYYAA
1654		charset[i++] = c++;
1655	c = 0x11136;		// from CHAKMA DIGIT ZERO
1656	while (c <= 0x11146)	// ..to CHAKMA VOWEL SIGN EI
1657		charset[i++] = c++;
1658// 11150..1117F; Mahajani
1659	c = 0x11150;		// from MAHAJANI LETTER A
1660	while (c <= 0x11176)	// ..to MAHAJANI LIGATURE SHRI
1661		charset[i++] = c++;
1662// 11180..111DF; Sharada
1663	c = 0x11180;		// from SHARADA SIGN CANDRABINDU
1664	while (c <= 0x111cd)	// ..to SHARADA SUTRA MARK
1665		charset[i++] = c++;
1666	c = 0x111d0;		// from SHARADA DIGIT ZERO
1667	while (c <= 0x111df)	// ..to SHARADA SECTION MARK-2
1668		charset[i++] = c++;
1669// 111E0..111FF; Sinhala Archaic Numbers
1670	c = 0x111e1;		// from SINHALA ARCHAIC DIGIT ONE
1671	while (c <= 0x111f4)	// ..to SINHALA ARCHAIC NUMBER ONE THOUSAND
1672		charset[i++] = c++;
1673// 11200..1124F; Khojki
1674	c = 0x11200;		// from KHOJKI LETTER A
1675	while (c <= 0x11211)	// ..to KHOJKI LETTER JJA
1676		charset[i++] = c++;
1677	c = 0x11213;		// from KHOJKI LETTER NYA
1678	while (c <= 0x1123e)	// ..to KHOJKI SIGN SUKUN
1679		charset[i++] = c++;
1680// 11280..112AF; Multani
1681	c = 0x11280;		// from MULTANI LETTER A
1682	while (c <= 0x11286)	// ..to MULTANI LETTER GA
1683		charset[i++] = c++;
1684	c = 0x1128a;		// from MULTANI LETTER CA
1685	while (c <= 0x1128d)	// ..to MULTANI LETTER JJA
1686		charset[i++] = c++;
1687	c = 0x1128f;		// from MULTANI LETTER NYA
1688	while (c <= 0x1129d)	// ..to MULTANI LETTER BA
1689		charset[i++] = c++;
1690	c = 0x1129f;		// from MULTANI LETTER BHA
1691	while (c <= 0x112a9)	// ..to MULTANI SECTION MARK
1692		charset[i++] = c++;
1693// 112B0..112FF; Khudawadi
1694	c = 0x112b0;		// from KHUDAWADI LETTER A
1695	while (c <= 0x112ea)	// ..to KHUDAWADI SIGN VIRAMA
1696		charset[i++] = c++;
1697	c = 0x112f0;		// from KHUDAWADI DIGIT ZERO
1698	while (c <= 0x112f9)	// ..to KHUDAWADI DIGIT NINE
1699		charset[i++] = c++;
1700// 11300..1137F; Grantha
1701	c = 0x11300;		// from GRANTHA SIGN COMBINING ANUSVARA ABOVE
1702	while (c <= 0x11303)	// ..to GRANTHA SIGN VISARGA
1703		charset[i++] = c++;
1704	c = 0x11305;		// from GRANTHA LETTER A
1705	while (c <= 0x1130c)	// ..to GRANTHA LETTER VOCALIC L
1706		charset[i++] = c++;
1707	charset[i++] = 0x1130f;	// GRANTHA LETTER EE
1708	charset[i++] = 0x11310;	// GRANTHA LETTER AI
1709	c = 0x11313;		// from GRANTHA LETTER OO
1710	while (c <= 0x11328)	// ..to GRANTHA LETTER NA
1711		charset[i++] = c++;
1712	c = 0x1132a;		// from GRANTHA LETTER PA
1713	while (c <= 0x11330)	// ..to GRANTHA LETTER RA
1714		charset[i++] = c++;
1715	charset[i++] = 0x11332;	// GRANTHA LETTER LA
1716	charset[i++] = 0x11333;	// GRANTHA LETTER LLA
1717	c = 0x11335;		// from GRANTHA LETTER VA
1718	while (c <= 0x11339)	// ..to GRANTHA LETTER HA
1719		charset[i++] = c++;
1720	c = 0x1133b;		// from COMBINING BINDU BELOW
1721	while (c <= 0x11344)	// ..to GRANTHA VOWEL SIGN VOCALIC RR
1722		charset[i++] = c++;
1723	charset[i++] = 0x11347;	// GRANTHA VOWEL SIGN EE
1724	charset[i++] = 0x11348;	// GRANTHA VOWEL SIGN AI
1725	charset[i++] = 0x1134b;	// GRANTHA VOWEL SIGN OO
1726	charset[i++] = 0x1134d;	// GRANTHA SIGN VIRAMA
1727	c = 0x1135d;		// from GRANTHA SIGN PLUTA
1728	while (c <= 0x11363)	// ..to GRANTHA VOWEL SIGN VOCALIC LL
1729		charset[i++] = c++;
1730	c = 0x11366;		// from COMBINING GRANTHA DIGIT ZERO
1731	while (c <= 0x1136c)	// ..to COMBINING GRANTHA DIGIT SIX
1732		charset[i++] = c++;
1733	c = 0x11370;		// from COMBINING GRANTHA LETTER A
1734	while (c <= 0x11374)	// ..to COMBINING GRANTHA LETTER PA
1735		charset[i++] = c++;
1736// 11400..1147F; Newa
1737	c = 0x11400;		// from NEWA LETTER A
1738	while (c <= 0x11459)	// ..to NEWA DIGIT NINE
1739		charset[i++] = c++;
1740	charset[i++] = 0x1145d;	// NEWA INSERTION SIGN
1741	charset[i++] = 0x1145e;	// NEWA SANDHI MARK
1742// 11480..114DF; Tirhuta
1743	c = 0x11480;		// from TIRHUTA ANJI
1744	while (c <= 0x114c7)	// ..to TIRHUTA OM
1745		charset[i++] = c++;
1746	c = 0x114d0;		// from TIRHUTA DIGIT ZERO
1747	while (c <= 0x114d9)	// ..to TIRHUTA DIGIT NINE
1748		charset[i++] = c++;
1749// 11580..115FF; Siddham
1750	c = 0x11580;		// from SIDDHAM LETTER A
1751	while (c <= 0x115b5)	// ..to SIDDHAM VOWEL SIGN VOCALIC RR
1752		charset[i++] = c++;
1753	c = 0x115b8;		// from SIDDHAM VOWEL SIGN E
1754	while (c <= 0x115dd)	// ..to SIDDHAM VOWEL SIGN ALTERNATE UU
1755		charset[i++] = c++;
1756// 11600..1165F; Modi
1757	c = 0x11600;		// from MODI LETTER A
1758	while (c <= 0x11644)	// ..to MODI SIGN HUVA
1759		charset[i++] = c++;
1760	c = 0x11650;		// from MODI DIGIT ZERO
1761	while (c <= 0x11659)	// ..to MODI DIGIT NINE
1762		charset[i++] = c++;
1763// 11660..1167F; Mongolian Supplement
1764	c = 0x11660;		// from MONGOLIAN BIRGA WITH ORNAMENT
1765	while (c <= 0x1166c)	// ..to MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
1766		charset[i++] = c++;
1767// 11680..116CF; Takri
1768	c = 0x11680;		// from TAKRI LETTER A
1769	while (c <= 0x116b7)	// ..to TAKRI SIGN NUKTA
1770		charset[i++] = c++;
1771	c = 0x116c0;		// from TAKRI DIGIT ZERO
1772	while (c <= 0x116c9)	// ..to TAKRI DIGIT NINE
1773		charset[i++] = c++;
1774// 11700..1173F; Ahom
1775	c = 0x11700;		// from AHOM LETTER KA
1776	while (c <= 0x1171a)	// ..to AHOM LETTER ALTERNATE BA
1777		charset[i++] = c++;
1778	c = 0x1171d;		// from AHOM CONSONANT SIGN MEDIAL LA
1779	while (c <= 0x1172b)	// ..to AHOM SIGN KILLER
1780		charset[i++] = c++;
1781	c = 0x11730;		// from AHOM DIGIT ZERO
1782	while (c <= 0x1173f)	// ..to AHOM SYMBOL VI
1783		charset[i++] = c++;
1784// 11800..1184F; Dogra
1785	c = 0x11800;		// from DOGRA LETTER A
1786	while (c <= 0x1183b)	// ..to DOGRA ABBREVIATION SIGN
1787		charset[i++] = c++;
1788// 118A0..118FF; Warang Citi
1789	c = 0x118a0;		// from WARANG CITI CAPITAL LETTER NGAA
1790	while (c <= 0x118f2)	// ..to WARANG CITI NUMBER NINETY
1791		charset[i++] = c++;
1792	charset[i++] = 0x118ff;	// WARANG CITI OM
1793// 11A00..11A4F; Zanabazar Square
1794	c = 0x11a00;		// from ZANABAZAR SQUARE LETTER A
1795	while (c <= 0x11a47)	// ..to ZANABAZAR SQUARE SUBJOINER
1796		charset[i++] = c++;
1797// 11A50..11AAF; Soyombo
1798	c = 0x11a50;		// from SOYOMBO LETTER A
1799	while (c <= 0x11a83)	// ..to SOYOMBO LETTER KSSA
1800		charset[i++] = c++;
1801	c = 0x11a86;		// from SOYOMBO CLUSTER-INITIAL LETTER RA
1802	while (c <= 0x11aa2)	// ..to SOYOMBO TERMINAL MARK-2
1803		charset[i++] = c++;
1804// 11AC0..11AFF; Pau Cin Hau
1805	c = 0x11ac0;		// from PAU CIN HAU LETTER PA
1806	while (c <= 0x11af8)	// ..to PAU CIN HAU GLOTTAL STOP FINAL
1807		charset[i++] = c++;
1808// 11C00..11C6F; Bhaiksuki
1809	c = 0x11c00;		// from BHAIKSUKI LETTER A
1810	while (c <= 0x11c08)	// ..to BHAIKSUKI LETTER VOCALIC L
1811		charset[i++] = c++;
1812	c = 0x11c0a;		// from BHAIKSUKI LETTER E
1813	while (c <= 0x11c36)	// ..to BHAIKSUKI VOWEL SIGN VOCALIC L
1814		charset[i++] = c++;
1815	c = 0x11c38;		// from BHAIKSUKI VOWEL SIGN E
1816	while (c <= 0x11c45)	// ..to BHAIKSUKI GAP FILLER-2
1817		charset[i++] = c++;
1818	c = 0x11c50;		// from BHAIKSUKI DIGIT ZERO
1819	while (c <= 0x11c6c)	// ..to BHAIKSUKI HUNDREDS UNIT MARK
1820		charset[i++] = c++;
1821// 11C70..11CBF; Marchen
1822	c = 0x11c70;		// from MARCHEN HEAD MARK
1823	while (c <= 0x11c8f)	// ..to MARCHEN LETTER A
1824		charset[i++] = c++;
1825	c = 0x11c92;		// from MARCHEN SUBJOINED LETTER KA
1826	while (c <= 0x11ca7)	// ..to MARCHEN SUBJOINED LETTER ZA
1827		charset[i++] = c++;
1828	c = 0x11ca9;		// from MARCHEN SUBJOINED LETTER YA
1829	while (c <= 0x11cb6)	// ..to MARCHEN SIGN CANDRABINDU
1830		charset[i++] = c++;
1831// 11D00..11D5F; Masaram Gondi
1832	c = 0x11d00;		// from MASARAM GONDI LETTER A
1833	while (c <= 0x11d06)	// ..to MASARAM GONDI LETTER E
1834		charset[i++] = c++;
1835	charset[i++] = 0x11d08;	// MASARAM GONDI LETTER AI
1836	charset[i++] = 0x11d09;	// MASARAM GONDI LETTER O
1837	c = 0x11d0b;		// from MASARAM GONDI LETTER AU
1838	while (c <= 0x11d36)	// ..to MASARAM GONDI VOWEL SIGN VOCALIC R
1839		charset[i++] = c++;
1840	charset[i++] = 0x11d3c;	// MASARAM GONDI VOWEL SIGN AI
1841	charset[i++] = 0x11d3d;	// MASARAM GONDI VOWEL SIGN O
1842	c = 0x11d3f;		// from MASARAM GONDI VOWEL SIGN AU
1843	while (c <= 0x11d47)	// ..to MASARAM GONDI RA-KARA
1844		charset[i++] = c++;
1845	c = 0x11d50;		// from MASARAM GONDI DIGIT ZERO
1846	while (c <= 0x11d59)	// ..to MASARAM GONDI DIGIT NINE
1847		charset[i++] = c++;
1848// 11D60..11DAF; Gunjala Gondi
1849	c = 0x11d60;		// from GUNJALA GONDI LETTER A
1850	while (c <= 0x11d65)	// ..to GUNJALA GONDI LETTER UU
1851		charset[i++] = c++;
1852	charset[i++] = 0x11d67;	// GUNJALA GONDI LETTER EE
1853	charset[i++] = 0x11d68;	// GUNJALA GONDI LETTER AI
1854	c = 0x11d6a;		// from GUNJALA GONDI LETTER OO
1855	while (c <= 0x11d8e)	// ..to GUNJALA GONDI VOWEL SIGN UU
1856		charset[i++] = c++;
1857	charset[i++] = 0x11d90;	// GUNJALA GONDI VOWEL SIGN EE
1858	charset[i++] = 0x11d91;	// GUNJALA GONDI VOWEL SIGN AI
1859	c = 0x11d93;		// from GUNJALA GONDI VOWEL SIGN OO
1860	while (c <= 0x11d98)	// ..to GUNJALA GONDI OM
1861		charset[i++] = c++;
1862	c = 0x11da0;		// from GUNJALA GONDI DIGIT ZERO
1863	while (c <= 0x11da9)	// ..to GUNJALA GONDI DIGIT NINE
1864		charset[i++] = c++;
1865// 11EE0..11EFF; Makasar
1866	c = 0x11ee0;		// from MAKASAR LETTER KA
1867	while (c <= 0x11ef8)	// ..to MAKASAR END OF SECTION
1868		charset[i++] = c++;
1869// 12000..123FF; Cuneiform
1870	c = 0x12000;		// from CUNEIFORM SIGN A
1871	while (c <= 0x12399)	// ..to CUNEIFORM SIGN U U
1872		charset[i++] = c++;
1873// 12400..1247F; Cuneiform Numbers and Punctuation
1874	c = 0x12400;		// from CUNEIFORM NUMERIC SIGN TWO ASH
1875	while (c <= 0x1246e)	// ..to CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
1876		charset[i++] = c++;
1877	c = 0x12470;		// from CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER
1878	while (c <= 0x12474)	// ..to CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
1879		charset[i++] = c++;
1880// 12480..1254F; Early Dynastic Cuneiform
1881	c = 0x12480;		// from CUNEIFORM SIGN AB TIMES NUN TENU
1882	while (c <= 0x12543)	// ..to CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
1883		charset[i++] = c++;
1884// 13000..1342F; Egyptian Hieroglyphs
1885	c = 0x13000;		// from EGYPTIAN HIEROGLYPH A001
1886	while (c <= 0x1342e)	// ..to EGYPTIAN HIEROGLYPH AA032
1887		charset[i++] = c++;
1888// 14400..1467F; Anatolian Hieroglyphs
1889	c = 0x14400;		// from ANATOLIAN HIEROGLYPH A001
1890	while (c <= 0x14646)	// ..to ANATOLIAN HIEROGLYPH A530
1891		charset[i++] = c++;
1892// 16800..16A3F; Bamum Supplement
1893	c = 0x16800;		// from BAMUM LETTER PHASE-A NGKUE MFON
1894	while (c <= 0x16a38)	// ..to BAMUM LETTER PHASE-F VUEQ
1895		charset[i++] = c++;
1896// 16A40..16A6F; Mro
1897	c = 0x16a40;		// from MRO LETTER TA
1898	while (c <= 0x16a5e)	// ..to MRO LETTER TEK
1899		charset[i++] = c++;
1900	c = 0x16a60;		// from MRO DIGIT ZERO
1901	while (c <= 0x16a69)	// ..to MRO DIGIT NINE
1902		charset[i++] = c++;
1903	charset[i++] = 0x16a6e;	// MRO DANDA
1904	charset[i++] = 0x16a6f;	// MRO DOUBLE DANDA
1905// 16AD0..16AFF; Bassa Vah
1906	c = 0x16ad0;		// from BASSA VAH LETTER ENNI
1907	while (c <= 0x16aed)	// ..to BASSA VAH LETTER I
1908		charset[i++] = c++;
1909	c = 0x16af0;		// from BASSA VAH COMBINING HIGH TONE
1910	while (c <= 0x16af5)	// ..to BASSA VAH FULL STOP
1911		charset[i++] = c++;
1912// 16B00..16B8F; Pahawh Hmong
1913	c = 0x16b00;		// from PAHAWH HMONG VOWEL KEEB
1914	while (c <= 0x16b45)	// ..to PAHAWH HMONG SIGN CIM TSOV ROG
1915		charset[i++] = c++;
1916	c = 0x16b50;		// from PAHAWH HMONG DIGIT ZERO
1917	while (c <= 0x16b59)	// ..to PAHAWH HMONG DIGIT NINE
1918		charset[i++] = c++;
1919	c = 0x16b5b;		// from PAHAWH HMONG NUMBER TENS
1920	while (c <= 0x16b61)	// ..to PAHAWH HMONG NUMBER TRILLIONS
1921		charset[i++] = c++;
1922	c = 0x16b63;		// from PAHAWH HMONG SIGN VOS LUB
1923	while (c <= 0x16b77)	// ..to PAHAWH HMONG SIGN CIM NRES TOS
1924		charset[i++] = c++;
1925	c = 0x16b7d;		// from PAHAWH HMONG CLAN SIGN TSHEEJ
1926	while (c <= 0x16b8f)	// ..to PAHAWH HMONG CLAN SIGN VWJ
1927		charset[i++] = c++;
1928// 16E40..16E9F; Medefaidrin
1929	c = 0x16e40;		// from MEDEFAIDRIN CAPITAL LETTER M
1930	while (c <= 0x16e9a)	// ..to MEDEFAIDRIN EXCLAMATION OH
1931		charset[i++] = c++;
1932// 16F00..16F9F; Miao
1933	c = 0x16f00;		// from MIAO LETTER PA
1934	while (c <= 0x16f44)	// ..to MIAO LETTER HHA
1935		charset[i++] = c++;
1936	c = 0x16f50;		// from MIAO LETTER NASALIZATION
1937	while (c <= 0x16f7e)	// ..to MIAO VOWEL SIGN NG
1938		charset[i++] = c++;
1939	c = 0x16f8f;		// from MIAO TONE RIGHT
1940	while (c <= 0x16f9f)	// ..to MIAO LETTER REFORMED TONE-8
1941		charset[i++] = c++;
1942// 16FE0..16FFF; Ideographic Symbols and Punctuation
1943	charset[i++] = 0x16fe0;	// TANGUT ITERATION MARK
1944	charset[i++] = 0x16fe1;	// NUSHU ITERATION MARK
1945// 17000..187FF; Tangut
1946	c = 0x17000;		// from <Tangut Ideograph, First>
1947	while (c <= 0x187f1)	// ..to <Tangut Ideograph, Last>
1948		charset[i++] = c++;
1949// 18800..18AFF; Tangut Components
1950	c = 0x18800;		// from TANGUT COMPONENT-001
1951	while (c <= 0x18af2)	// ..to TANGUT COMPONENT-755
1952		charset[i++] = c++;
1953// 1B000..1B0FF; Kana Supplement
1954	c = 0x1b000;		// from KATAKANA LETTER ARCHAIC E
1955	while (c <= 0x1b0ff)	// ..to HENTAIGANA LETTER RE-2
1956		charset[i++] = c++;
1957// 1B100..1B12F; Kana Extended-A
1958	c = 0x1b100;		// from HENTAIGANA LETTER RE-3
1959	while (c <= 0x1b11e)	// ..to HENTAIGANA LETTER N-MU-MO-2
1960		charset[i++] = c++;
1961// 1B170..1B2FF; Nushu
1962	c = 0x1b170;		// from NUSHU CHARACTER-1B170
1963	while (c <= 0x1b2fb)	// ..to NUSHU CHARACTER-1B2FB
1964		charset[i++] = c++;
1965// 1BC00..1BC9F; Duployan
1966	c = 0x1bc00;		// from DUPLOYAN LETTER H
1967	while (c <= 0x1bc6a)	// ..to DUPLOYAN LETTER VOCALIC M
1968		charset[i++] = c++;
1969	c = 0x1bc70;		// from DUPLOYAN AFFIX LEFT HORIZONTAL SECANT
1970	while (c <= 0x1bc7c)	// ..to DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1971		charset[i++] = c++;
1972	c = 0x1bc80;		// from DUPLOYAN AFFIX HIGH ACUTE
1973	while (c <= 0x1bc88)	// ..to DUPLOYAN AFFIX HIGH VERTICAL
1974		charset[i++] = c++;
1975	c = 0x1bc90;		// from DUPLOYAN AFFIX LOW ACUTE
1976	while (c <= 0x1bc99)	// ..to DUPLOYAN AFFIX LOW ARROW
1977		charset[i++] = c++;
1978	c = 0x1bc9c;		// from DUPLOYAN SIGN O WITH CROSS
1979	while (c <= 0x1bc9f)	// ..to DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1980		charset[i++] = c++;
1981// 1BCA0..1BCAF; Shorthand Format Controls
1982	c = 0x1bca0;		// from SHORTHAND FORMAT LETTER OVERLAP
1983	while (c <= 0x1bca3)	// ..to SHORTHAND FORMAT UP STEP
1984		charset[i++] = c++;
1985// 1D000..1D0FF; Byzantine Musical Symbols
1986	c = 0x1d000;		// from BYZANTINE MUSICAL SYMBOL PSILI
1987	while (c <= 0x1d0f5)	// ..to BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1988		charset[i++] = c++;
1989// 1D100..1D1FF; Musical Symbols
1990	c = 0x1d100;		// from MUSICAL SYMBOL SINGLE BARLINE
1991	while (c <= 0x1d126)	// ..to MUSICAL SYMBOL DRUM CLEF-2
1992		charset[i++] = c++;
1993	c = 0x1d129;		// from MUSICAL SYMBOL MULTIPLE MEASURE REST
1994	while (c <= 0x1d1e8)	// ..to MUSICAL SYMBOL KIEVAN FLAT SIGN
1995		charset[i++] = c++;
1996// 1D200..1D24F; Ancient Greek Musical Notation
1997	c = 0x1d200;		// from GREEK VOCAL NOTATION SYMBOL-1
1998	while (c <= 0x1d245)	// ..to GREEK MUSICAL LEIMMA
1999		charset[i++] = c++;
2000// 1D2E0..1D2FF; Mayan Numerals
2001	c = 0x1d2e0;		// from MAYAN NUMERAL ZERO
2002	while (c <= 0x1d2f3)	// ..to MAYAN NUMERAL NINETEEN
2003		charset[i++] = c++;
2004// 1D300..1D35F; Tai Xuan Jing Symbols
2005	c = 0x1d300;		// from MONOGRAM FOR EARTH
2006	while (c <= 0x1d356)	// ..to TETRAGRAM FOR FOSTERING
2007		charset[i++] = c++;
2008// 1D360..1D37F; Counting Rod Numerals
2009	c = 0x1d360;		// from COUNTING ROD UNIT DIGIT ONE
2010	while (c <= 0x1d378)	// ..to TALLY MARK FIVE
2011		charset[i++] = c++;
2012// 1D400..1D7FF; Mathematical Alphanumeric Symbols
2013	c = 0x1d400;		// from MATHEMATICAL BOLD CAPITAL A
2014	while (c <= 0x1d454)	// ..to MATHEMATICAL ITALIC SMALL G
2015		charset[i++] = c++;
2016	c = 0x1d456;		// from MATHEMATICAL ITALIC SMALL I
2017	while (c <= 0x1d49c)	// ..to MATHEMATICAL SCRIPT CAPITAL A
2018		charset[i++] = c++;
2019	charset[i++] = 0x1d49e;	// MATHEMATICAL SCRIPT CAPITAL C
2020	charset[i++] = 0x1d49f;	// MATHEMATICAL SCRIPT CAPITAL D
2021	charset[i++] = 0x1d4a5;	// MATHEMATICAL SCRIPT CAPITAL J
2022	charset[i++] = 0x1d4a6;	// MATHEMATICAL SCRIPT CAPITAL K
2023	c = 0x1d4a9;		// from MATHEMATICAL SCRIPT CAPITAL N
2024	while (c <= 0x1d4ac)	// ..to MATHEMATICAL SCRIPT CAPITAL Q
2025		charset[i++] = c++;
2026	c = 0x1d4ae;		// from MATHEMATICAL SCRIPT CAPITAL S
2027	while (c <= 0x1d4b9)	// ..to MATHEMATICAL SCRIPT SMALL D
2028		charset[i++] = c++;
2029	c = 0x1d4bd;		// from MATHEMATICAL SCRIPT SMALL H
2030	while (c <= 0x1d4c3)	// ..to MATHEMATICAL SCRIPT SMALL N
2031		charset[i++] = c++;
2032	c = 0x1d4c5;		// from MATHEMATICAL SCRIPT SMALL P
2033	while (c <= 0x1d505)	// ..to MATHEMATICAL FRAKTUR CAPITAL B
2034		charset[i++] = c++;
2035	c = 0x1d507;		// from MATHEMATICAL FRAKTUR CAPITAL D
2036	while (c <= 0x1d50a)	// ..to MATHEMATICAL FRAKTUR CAPITAL G
2037		charset[i++] = c++;
2038	c = 0x1d50d;		// from MATHEMATICAL FRAKTUR CAPITAL J
2039	while (c <= 0x1d514)	// ..to MATHEMATICAL FRAKTUR CAPITAL Q
2040		charset[i++] = c++;
2041	c = 0x1d516;		// from MATHEMATICAL FRAKTUR CAPITAL S
2042	while (c <= 0x1d51c)	// ..to MATHEMATICAL FRAKTUR CAPITAL Y
2043		charset[i++] = c++;
2044	c = 0x1d51e;		// from MATHEMATICAL FRAKTUR SMALL A
2045	while (c <= 0x1d539)	// ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL B
2046		charset[i++] = c++;
2047	c = 0x1d53b;		// from MATHEMATICAL DOUBLE-STRUCK CAPITAL D
2048	while (c <= 0x1d53e)	// ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL G
2049		charset[i++] = c++;
2050	c = 0x1d540;		// from MATHEMATICAL DOUBLE-STRUCK CAPITAL I
2051	while (c <= 0x1d544)	// ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL M
2052		charset[i++] = c++;
2053	c = 0x1d54a;		// from MATHEMATICAL DOUBLE-STRUCK CAPITAL S
2054	while (c <= 0x1d550)	// ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
2055		charset[i++] = c++;
2056	c = 0x1d552;		// from MATHEMATICAL DOUBLE-STRUCK SMALL A
2057	while (c <= 0x1d6a5)	// ..to MATHEMATICAL ITALIC SMALL DOTLESS J
2058		charset[i++] = c++;
2059	c = 0x1d6a8;		// from MATHEMATICAL BOLD CAPITAL ALPHA
2060	while (c <= 0x1d7cb)	// ..to MATHEMATICAL BOLD SMALL DIGAMMA
2061		charset[i++] = c++;
2062	c = 0x1d7ce;		// from MATHEMATICAL BOLD DIGIT ZERO
2063	while (c <= 0x1d7ff)	// ..to MATHEMATICAL MONOSPACE DIGIT NINE
2064		charset[i++] = c++;
2065// 1D800..1DAAF; Sutton SignWriting
2066	c = 0x1d800;		// from SIGNWRITING HAND-FIST INDEX
2067	while (c <= 0x1da8b)	// ..to SIGNWRITING PARENTHESIS
2068		charset[i++] = c++;
2069	c = 0x1da9b;		// from SIGNWRITING FILL MODIFIER-2
2070	while (c <= 0x1da9f)	// ..to SIGNWRITING FILL MODIFIER-6
2071		charset[i++] = c++;
2072	c = 0x1daa1;		// from SIGNWRITING ROTATION MODIFIER-2
2073	while (c <= 0x1daaf)	// ..to SIGNWRITING ROTATION MODIFIER-16
2074		charset[i++] = c++;
2075// 1E000..1E02F; Glagolitic Supplement
2076	c = 0x1e000;		// from COMBINING GLAGOLITIC LETTER AZU
2077	while (c <= 0x1e006)	// ..to COMBINING GLAGOLITIC LETTER ZHIVETE
2078		charset[i++] = c++;
2079	c = 0x1e008;		// from COMBINING GLAGOLITIC LETTER ZEMLJA
2080	while (c <= 0x1e018)	// ..to COMBINING GLAGOLITIC LETTER HERU
2081		charset[i++] = c++;
2082	c = 0x1e01b;		// from COMBINING GLAGOLITIC LETTER SHTA
2083	while (c <= 0x1e021)	// ..to COMBINING GLAGOLITIC LETTER YATI
2084		charset[i++] = c++;
2085	charset[i++] = 0x1e023;	// COMBINING GLAGOLITIC LETTER YU
2086	charset[i++] = 0x1e024;	// COMBINING GLAGOLITIC LETTER SMALL YUS
2087	c = 0x1e026;		// from COMBINING GLAGOLITIC LETTER YO
2088	while (c <= 0x1e02a)	// ..to COMBINING GLAGOLITIC LETTER FITA
2089		charset[i++] = c++;
2090// 1E800..1E8DF; Mende Kikakui
2091	c = 0x1e800;		// from MENDE KIKAKUI SYLLABLE M001 KI
2092	while (c <= 0x1e8c4)	// ..to MENDE KIKAKUI SYLLABLE M060 NYON
2093		charset[i++] = c++;
2094	c = 0x1e8c7;		// from MENDE KIKAKUI DIGIT ONE
2095	while (c <= 0x1e8d6)	// ..to MENDE KIKAKUI COMBINING NUMBER MILLIONS
2096		charset[i++] = c++;
2097// 1E900..1E95F; Adlam
2098	c = 0x1e900;		// from ADLAM CAPITAL LETTER ALIF
2099	while (c <= 0x1e94a)	// ..to ADLAM NUKTA
2100		charset[i++] = c++;
2101	c = 0x1e950;		// from ADLAM DIGIT ZERO
2102	while (c <= 0x1e959)	// ..to ADLAM DIGIT NINE
2103		charset[i++] = c++;
2104	charset[i++] = 0x1e95e;	// ADLAM INITIAL EXCLAMATION MARK
2105	charset[i++] = 0x1e95f;	// ADLAM INITIAL QUESTION MARK
2106// 1EC70..1ECBF; Indic Siyaq Numbers
2107	c = 0x1ec71;		// from INDIC SIYAQ NUMBER ONE
2108	while (c <= 0x1ecb4)	// ..to INDIC SIYAQ ALTERNATE LAKH MARK
2109		charset[i++] = c++;
2110// 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2111	c = 0x1ee00;		// from ARABIC MATHEMATICAL ALEF
2112	while (c <= 0x1ee03)	// ..to ARABIC MATHEMATICAL DAL
2113		charset[i++] = c++;
2114	c = 0x1ee05;		// from ARABIC MATHEMATICAL WAW
2115	while (c <= 0x1ee1f)	// ..to ARABIC MATHEMATICAL DOTLESS QAF
2116		charset[i++] = c++;
2117	charset[i++] = 0x1ee21;	// ARABIC MATHEMATICAL INITIAL BEH
2118	charset[i++] = 0x1ee22;	// ARABIC MATHEMATICAL INITIAL JEEM
2119	c = 0x1ee29;		// from ARABIC MATHEMATICAL INITIAL YEH
2120	while (c <= 0x1ee32)	// ..to ARABIC MATHEMATICAL INITIAL QAF
2121		charset[i++] = c++;
2122	c = 0x1ee34;		// from ARABIC MATHEMATICAL INITIAL SHEEN
2123	while (c <= 0x1ee37)	// ..to ARABIC MATHEMATICAL INITIAL KHAH
2124		charset[i++] = c++;
2125	charset[i++] = 0x1ee4d;	// ARABIC MATHEMATICAL TAILED NOON
2126	charset[i++] = 0x1ee4f;	// ARABIC MATHEMATICAL TAILED AIN
2127	charset[i++] = 0x1ee51;	// ARABIC MATHEMATICAL TAILED SAD
2128	charset[i++] = 0x1ee52;	// ARABIC MATHEMATICAL TAILED QAF
2129	charset[i++] = 0x1ee61;	// ARABIC MATHEMATICAL STRETCHED BEH
2130	charset[i++] = 0x1ee62;	// ARABIC MATHEMATICAL STRETCHED JEEM
2131	c = 0x1ee67;		// from ARABIC MATHEMATICAL STRETCHED HAH
2132	while (c <= 0x1ee6a)	// ..to ARABIC MATHEMATICAL STRETCHED KAF
2133		charset[i++] = c++;
2134	c = 0x1ee6c;		// from ARABIC MATHEMATICAL STRETCHED MEEM
2135	while (c <= 0x1ee72)	// ..to ARABIC MATHEMATICAL STRETCHED QAF
2136		charset[i++] = c++;
2137	c = 0x1ee74;		// from ARABIC MATHEMATICAL STRETCHED SHEEN
2138	while (c <= 0x1ee77)	// ..to ARABIC MATHEMATICAL STRETCHED KHAH
2139		charset[i++] = c++;
2140	c = 0x1ee79;		// from ARABIC MATHEMATICAL STRETCHED DAD
2141	while (c <= 0x1ee7c)	// ..to ARABIC MATHEMATICAL STRETCHED DOTLESS BEH
2142		charset[i++] = c++;
2143	c = 0x1ee80;		// from ARABIC MATHEMATICAL LOOPED ALEF
2144	while (c <= 0x1ee89)	// ..to ARABIC MATHEMATICAL LOOPED YEH
2145		charset[i++] = c++;
2146	c = 0x1ee8b;		// from ARABIC MATHEMATICAL LOOPED LAM
2147	while (c <= 0x1ee9b)	// ..to ARABIC MATHEMATICAL LOOPED GHAIN
2148		charset[i++] = c++;
2149	charset[i++] = 0x1eea1;	// ARABIC MATHEMATICAL DOUBLE-STRUCK BEH
2150	charset[i++] = 0x1eea3;	// ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
2151	c = 0x1eea5;		// from ARABIC MATHEMATICAL DOUBLE-STRUCK WAW
2152	while (c <= 0x1eea9)	// ..to ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
2153		charset[i++] = c++;
2154	c = 0x1eeab;		// from ARABIC MATHEMATICAL DOUBLE-STRUCK LAM
2155	while (c <= 0x1eebb)	// ..to ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
2156		charset[i++] = c++;
2157	charset[i++] = 0x1eef0;	// ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL
2158	charset[i++] = 0x1eef1;	// ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
2159// 1F000..1F02F; Mahjong Tiles
2160	c = 0x1f000;		// from MAHJONG TILE EAST WIND
2161	while (c <= 0x1f02b)	// ..to MAHJONG TILE BACK
2162		charset[i++] = c++;
2163// 1F030..1F09F; Domino Tiles
2164	c = 0x1f030;		// from DOMINO TILE HORIZONTAL BACK
2165	while (c <= 0x1f093)	// ..to DOMINO TILE VERTICAL-06-06
2166		charset[i++] = c++;
2167// 1F0A0..1F0FF; Playing Cards
2168	c = 0x1f0a0;		// from PLAYING CARD BACK
2169	while (c <= 0x1f0ae)	// ..to PLAYING CARD KING OF SPADES
2170		charset[i++] = c++;
2171	c = 0x1f0b1;		// from PLAYING CARD ACE OF HEARTS
2172	while (c <= 0x1f0bf)	// ..to PLAYING CARD RED JOKER
2173		charset[i++] = c++;
2174	c = 0x1f0c1;		// from PLAYING CARD ACE OF DIAMONDS
2175	while (c <= 0x1f0cf)	// ..to PLAYING CARD BLACK JOKER
2176		charset[i++] = c++;
2177	c = 0x1f0d1;		// from PLAYING CARD ACE OF CLUBS
2178	while (c <= 0x1f0f5)	// ..to PLAYING CARD TRUMP-21
2179		charset[i++] = c++;
2180// 1F100..1F1FF; Enclosed Alphanumeric Supplement
2181	c = 0x1f100;		// from DIGIT ZERO FULL STOP
2182	while (c <= 0x1f10c)	// ..to DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
2183		charset[i++] = c++;
2184	c = 0x1f110;		// from PARENTHESIZED LATIN CAPITAL LETTER A
2185	while (c <= 0x1f16b)	// ..to RAISED MD SIGN
2186		charset[i++] = c++;
2187	c = 0x1f170;		// from NEGATIVE SQUARED LATIN CAPITAL LETTER A
2188	while (c <= 0x1f1ac)	// ..to SQUARED VOD
2189		charset[i++] = c++;
2190	c = 0x1f1e6;		// from REGIONAL INDICATOR SYMBOL LETTER A
2191	while (c <= 0x1f1ff)	// ..to REGIONAL INDICATOR SYMBOL LETTER Z
2192		charset[i++] = c++;
2193// 1F200..1F2FF; Enclosed Ideographic Supplement
2194	charset[i++] = 0x1f200;	// SQUARE HIRAGANA HOKA
2195	charset[i++] = 0x1f202;	// SQUARED KATAKANA SA
2196	c = 0x1f210;		// from SQUARED CJK UNIFIED IDEOGRAPH-624B
2197	while (c <= 0x1f23b)	// ..to SQUARED CJK UNIFIED IDEOGRAPH-914D
2198		charset[i++] = c++;
2199	c = 0x1f240;		// from TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C
2200	while (c <= 0x1f248)	// ..to TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
2201		charset[i++] = c++;
2202	charset[i++] = 0x1f250;	// CIRCLED IDEOGRAPH ADVANTAGE
2203	charset[i++] = 0x1f251;	// CIRCLED IDEOGRAPH ACCEPT
2204	c = 0x1f260;		// from ROUNDED SYMBOL FOR FU
2205	while (c <= 0x1f265)	// ..to ROUNDED SYMBOL FOR CAI
2206		charset[i++] = c++;
2207// 1F300..1F5FF; Miscellaneous Symbols and Pictographs
2208	c = 0x1f300;		// from CYCLONE
2209	while (c <= 0x1f5ff)	// ..to MOYAI
2210		charset[i++] = c++;
2211// 1F600..1F64F; Emoticons
2212	c = 0x1f600;		// from GRINNING FACE
2213	while (c <= 0x1f64f)	// ..to PERSON WITH FOLDED HANDS
2214		charset[i++] = c++;
2215// 1F650..1F67F; Ornamental Dingbats
2216	c = 0x1f650;		// from NORTH WEST POINTING LEAF
2217	while (c <= 0x1f67f)	// ..to REVERSE CHECKER BOARD
2218		charset[i++] = c++;
2219// 1F680..1F6FF; Transport and Map Symbols
2220	c = 0x1f680;		// from ROCKET
2221	while (c <= 0x1f6d4)	// ..to PAGODA
2222		charset[i++] = c++;
2223	c = 0x1f6e0;		// from HAMMER AND WRENCH
2224	while (c <= 0x1f6ec)	// ..to AIRPLANE ARRIVING
2225		charset[i++] = c++;
2226	c = 0x1f6f0;		// from SATELLITE
2227	while (c <= 0x1f6f9)	// ..to SKATEBOARD
2228		charset[i++] = c++;
2229// 1F700..1F77F; Alchemical Symbols
2230	c = 0x1f700;		// from ALCHEMICAL SYMBOL FOR QUINTESSENCE
2231	while (c <= 0x1f773)	// ..to ALCHEMICAL SYMBOL FOR HALF OUNCE
2232		charset[i++] = c++;
2233// 1F780..1F7FF; Geometric Shapes Extended
2234	c = 0x1f780;		// from BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE
2235	while (c <= 0x1f7d8)	// ..to NEGATIVE CIRCLED SQUARE
2236		charset[i++] = c++;
2237// 1F800..1F8FF; Supplemental Arrows-C
2238	c = 0x1f800;		// from LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD
2239	while (c <= 0x1f80b)	// ..to DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
2240		charset[i++] = c++;
2241	c = 0x1f810;		// from LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD
2242	while (c <= 0x1f847)	// ..to DOWNWARDS HEAVY ARROW
2243		charset[i++] = c++;
2244	c = 0x1f850;		// from LEFTWARDS SANS-SERIF ARROW
2245	while (c <= 0x1f859)	// ..to UP DOWN SANS-SERIF ARROW
2246		charset[i++] = c++;
2247	c = 0x1f860;		// from WIDE-HEADED LEFTWARDS LIGHT BARB ARROW
2248	while (c <= 0x1f887)	// ..to WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
2249		charset[i++] = c++;
2250	c = 0x1f890;		// from LEFTWARDS TRIANGLE ARROWHEAD
2251	while (c <= 0x1f8ad)	// ..to WHITE ARROW SHAFT WIDTH TWO THIRDS
2252		charset[i++] = c++;
2253// 1F900..1F9FF; Supplemental Symbols and Pictographs
2254	c = 0x1f900;		// from CIRCLED CROSS FORMEE WITH FOUR DOTS
2255	while (c <= 0x1f90b)	// ..to DOWNWARD FACING NOTCHED HOOK WITH DOT
2256		charset[i++] = c++;
2257	c = 0x1f910;		// from ZIPPER-MOUTH FACE
2258	while (c <= 0x1f93e)	// ..to HANDBALL
2259		charset[i++] = c++;
2260	c = 0x1f940;		// from WILTED FLOWER
2261	while (c <= 0x1f970)	// ..to SMILING FACE WITH SMILING EYES AND THREE HEARTS
2262		charset[i++] = c++;
2263	c = 0x1f973;		// from FACE WITH PARTY HORN AND PARTY HAT
2264	while (c <= 0x1f976)	// ..to FREEZING FACE
2265		charset[i++] = c++;
2266	c = 0x1f97c;		// from LAB COAT
2267	while (c <= 0x1f9a2)	// ..to SWAN
2268		charset[i++] = c++;
2269	c = 0x1f9b0;		// from EMOJI COMPONENT RED HAIR
2270	while (c <= 0x1f9b9)	// ..to SUPERVILLAIN
2271		charset[i++] = c++;
2272	charset[i++] = 0x1f9c0;	// CHEESE WEDGE
2273	charset[i++] = 0x1f9c2;	// SALT SHAKER
2274	c = 0x1f9d0;		// from FACE WITH MONOCLE
2275	while (c <= 0x1f9ff)	// ..to NAZAR AMULET
2276		charset[i++] = c++;
2277// 1FA00..1FA6F; Chess Symbols
2278	c = 0x1fa60;		// from XIANGQI RED GENERAL
2279	while (c <= 0x1fa6d)	// ..to XIANGQI BLACK SOLDIER
2280		charset[i++] = c++;
2281// 20000..2A6DF; CJK Unified Ideographs Extension B
2282	c = 0x20000;		// from <CJK Ideograph Extension B, First>
2283	while (c <= 0x2a6d6)	// ..to <CJK Ideograph Extension B, Last>
2284		charset[i++] = c++;
2285// 2A700..2B73F; CJK Unified Ideographs Extension C
2286	c = 0x2a700;		// from <CJK Ideograph Extension C, First>
2287	while (c <= 0x2b734)	// ..to <CJK Ideograph Extension C, Last>
2288		charset[i++] = c++;
2289// 2B740..2B81F; CJK Unified Ideographs Extension D
2290	c = 0x2b740;		// from <CJK Ideograph Extension D, First>
2291	while (c <= 0x2b81d)	// ..to <CJK Ideograph Extension D, Last>
2292		charset[i++] = c++;
2293// 2B820..2CEAF; CJK Unified Ideographs Extension E
2294	c = 0x2b820;		// from <CJK Ideograph Extension E, First>
2295	while (c <= 0x2cea1)	// ..to <CJK Ideograph Extension E, Last>
2296		charset[i++] = c++;
2297// 2CEB0..2EBEF; CJK Unified Ideographs Extension F
2298	c = 0x2ceb0;		// from <CJK Ideograph Extension F, First>
2299	while (c <= 0x2ebe0)	// ..to <CJK Ideograph Extension F, Last>
2300		charset[i++] = c++;
2301// 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2302	c = 0x2f800;		// from CJK COMPATIBILITY IDEOGRAPH-2F800
2303	while (c <= 0x2fa1d)	// ..to CJK COMPATIBILITY IDEOGRAPH-2FA1D
2304		charset[i++] = c++;
2305// E0000..E007F; Tags
2306	c = 0xe0020;		// from TAG SPACE
2307	while (c <= 0xe007f)	// ..to CANCEL TAG
2308		charset[i++] = c++;
2309// E0100..E01EF; Variation Selectors Supplement
2310// F0000..FFFFF; Supplementary Private Use Area-A
2311// 100000..10FFFF; Supplementary Private Use Area-B
2312
2313/* Zero-terminate it, and cache the first character */
2314	charset[i] = 0;
2315	c0 = charset[0];
2316
2317	last = minlength - 1;
2318	i = 0;
2319	while (i <= last) {
2320		id[i] = 0;
2321		word[i++] = c0;
2322	}
2323	lastid = -1;
2324	word[i] = 0;
2325
2326/* We must init word with dummy data, it doesn't get set until filter() */
2327	word = 1;
2328}
2329
2330void generate()
2331{
2332	int i;
2333
2334/* Handle the typical case specially */
2335	if (word[last] = charset[++lastid]) return;
2336
2337	lastid = 0;
2338	word[i = last] = c0;
2339	while (i--) {			// Have a preceding position?
2340		if (word[i] = charset[++id[i]]) return;
2341		id[i] = 0;
2342		word[i] = c0;
2343	}
2344
2345	if (++last < maxlength) {	// Next length?
2346		id[last] = lastid = 0;
2347		word[last] = c0;
2348		word[last + 1] = 0;
2349	} else				// We're done
2350		word = 0;
2351}
2352
2353void restore()
2354{
2355	int i, c;
2356
2357	/* Calculate the current length and infer the character indices */
2358	last = 0;
2359	while (c = word[last]) {
2360		i = 0; while (charset[i] != c && charset[i]) i++;
2361		if (!charset[i]) i = 0;	// Not found
2362		id[last++] = i;
2363	}
2364	lastid = id[--last];
2365}
2366