1 /* Conversion of files between different charsets and surfaces.
2 Copyright � 1996, 97, 98, 99, 00 Free Software Foundation, Inc.
3 Contributed by Fran�ois Pinard <pinard@iro.umontreal.ca>, 1996.
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public License
7 as published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be
11 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
12 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the `recode' Library; see the file `COPYING.LIB'.
17 If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 Suite 330, Boston, MA 02111-1307, USA. */
19
20 #include "common.h"
21
22 /* Description of some UCS-2 combinings. */
23
24 #define DONE NOT_A_CHARACTER
25 #define ELSE BYTE_ORDER_MARK_SWAPPED
26
27 static const unsigned short combining_data [] =
28 {
29 /* Diacriticized letters. */
30
31 0x00C0, 0x0041, 0x0300, DONE,
32 0x00C1, 0x0041, 0x0301, DONE,
33 0x00C2, 0x0041, 0x0302, DONE,
34 0x00C3, 0x0041, 0x0303, DONE,
35 0x00C4, 0x0041, 0x0308, DONE,
36 0x00C5, 0x0041, 0x030A, DONE,
37 0x00C7, 0x0043, 0x0327, DONE,
38 0x00C8, 0x0045, 0x0300, DONE,
39 0x00C9, 0x0045, 0x0301, DONE,
40 0x00CA, 0x0045, 0x0302, DONE,
41 0x00CB, 0x0045, 0x0308, DONE,
42 0x00CC, 0x0049, 0x0300, DONE,
43 0x00CD, 0x0049, 0x0301, DONE,
44 0x00CE, 0x0049, 0x0302, DONE,
45 0x00CF, 0x0049, 0x0308, DONE,
46 0x00D1, 0x004E, 0x0303, DONE,
47 0x00D2, 0x004F, 0x0300, DONE,
48 0x00D3, 0x004F, 0x0301, DONE,
49 0x00D4, 0x004F, 0x0302, DONE,
50 0x00D5, 0x004F, 0x0303, DONE,
51 0x00D6, 0x004F, 0x0308, DONE,
52 0x00D8, 0x004F, 0x0338, DONE,
53 0x00D9, 0x0055, 0x0300, DONE,
54 0x00DA, 0x0055, 0x0301, DONE,
55 0x00DB, 0x0055, 0x0302, DONE,
56 0x00DC, 0x0055, 0x0308, DONE,
57 0x00DD, 0x0059, 0x0301, DONE,
58 0x00E0, 0x0061, 0x0300, DONE,
59 0x00E1, 0x0061, 0x0301, DONE,
60 0x00E2, 0x0061, 0x0302, DONE,
61 0x00E3, 0x0061, 0x0303, DONE,
62 0x00E4, 0x0061, 0x0308, DONE,
63 0x00E5, 0x0061, 0x030A, DONE,
64 0x00E7, 0x0063, 0x0327, DONE,
65 0x00E8, 0x0065, 0x0300, DONE,
66 0x00E9, 0x0065, 0x0301, DONE,
67 0x00EA, 0x0065, 0x0302, DONE,
68 0x00EB, 0x0065, 0x0308, DONE,
69 0x00EC, 0x0069, 0x0300, DONE,
70 0x00ED, 0x0069, 0x0301, DONE,
71 0x00EE, 0x0069, 0x0302, DONE,
72 0x00EF, 0x0069, 0x0308, DONE,
73 0x00F1, 0x006E, 0x0303, DONE,
74 0x00F2, 0x006F, 0x0300, DONE,
75 0x00F3, 0x006F, 0x0301, DONE,
76 0x00F4, 0x006F, 0x0302, DONE,
77 0x00F5, 0x006F, 0x0303, DONE,
78 0x00F6, 0x006F, 0x0308, DONE,
79 0x00F8, 0x006F, 0x0338, DONE,
80 0x00F9, 0x0075, 0x0300, DONE,
81 0x00FA, 0x0075, 0x0301, DONE,
82 0x00FB, 0x0075, 0x0302, DONE,
83 0x00FC, 0x0075, 0x0308, DONE,
84 0x00FD, 0x0079, 0x0301, DONE,
85 0x00FF, 0x0079, 0x0308, DONE,
86 0x0100, 0x0041, 0x0304, DONE,
87 0x0101, 0x0061, 0x0304, DONE,
88 0x0102, 0x0041, 0x0306, DONE,
89 0x0103, 0x0061, 0x0306, DONE,
90 0x0104, 0x0041, 0x0328, DONE,
91 0x0105, 0x0061, 0x0328, DONE,
92 0x0106, 0x0043, 0x0301, DONE,
93 0x0107, 0x0063, 0x0301, DONE,
94 0x0108, 0x0043, 0x0302, DONE,
95 0x0109, 0x0063, 0x0302, DONE,
96 0x010A, 0x0043, 0x0307, DONE,
97 0x010B, 0x0063, 0x0307, DONE,
98 0x010C, 0x0043, 0x030C, DONE,
99 0x010D, 0x0063, 0x030C, DONE,
100 0x010E, 0x0044, 0x030C, DONE,
101 0x010F, 0x0064, 0x030C, DONE,
102 0x0110, 0x0044, 0x0335, DONE,
103 0x0111, 0x0064, 0x0335, DONE,
104 0x0112, 0x0045, 0x0304, DONE,
105 0x0113, 0x0065, 0x0304, DONE,
106 0x0114, 0x0045, 0x0306, DONE,
107 0x0115, 0x0065, 0x0306, DONE,
108 0x0116, 0x0045, 0x0307, DONE,
109 0x0117, 0x0065, 0x0307, DONE,
110 0x0118, 0x0045, 0x0328, DONE,
111 0x0119, 0x0065, 0x0328, DONE,
112 0x011A, 0x0045, 0x030C, DONE,
113 0x011B, 0x0065, 0x030C, DONE,
114 0x011C, 0x0047, 0x0302, DONE,
115 0x011D, 0x0067, 0x0302, DONE,
116 0x011E, 0x0047, 0x0306, DONE,
117 0x011F, 0x0067, 0x0306, DONE,
118 0x0120, 0x0047, 0x0307, DONE,
119 0x0121, 0x0067, 0x0307, DONE,
120 0x0122, 0x0047, 0x0327, DONE,
121 0x0123, 0x0067, 0x0327, DONE,
122 0x0124, 0x0048, 0x0302, DONE,
123 0x0125, 0x0068, 0x0302, DONE,
124 0x0126, 0x0048, 0x0335, DONE,
125 0x0127, 0x0068, 0x0335, DONE,
126 0x0128, 0x0049, 0x0303, DONE,
127 0x0129, 0x0069, 0x0303, DONE,
128 0x012A, 0x0049, 0x0304, DONE,
129 0x012B, 0x0069, 0x0304, DONE,
130 0x012C, 0x0049, 0x0306, DONE,
131 0x012D, 0x0069, 0x0306, DONE,
132 0x012E, 0x0049, 0x0328, DONE,
133 0x012F, 0x0069, 0x0328, DONE,
134 0x0130, 0x0049, 0x0307, DONE,
135 0x0134, 0x004A, 0x0302, DONE,
136 0x0135, 0x006A, 0x0302, DONE,
137 0x0136, 0x004B, 0x0327, DONE,
138 0x0137, 0x006B, 0x0327, DONE,
139 0x0139, 0x004C, 0x0301, DONE,
140 0x013A, 0x006C, 0x0301, DONE,
141 0x013B, 0x004C, 0x0327, DONE,
142 0x013C, 0x006C, 0x0327, DONE,
143 0x013D, 0x004C, 0x030C, DONE,
144 0x013E, 0x006C, 0x030C, DONE,
145 0x0141, 0x004C, 0x0337, DONE,
146 0x0142, 0x006C, 0x0337, DONE,
147 0x0143, 0x004E, 0x0301, DONE,
148 0x0144, 0x006E, 0x0301, DONE,
149 0x0145, 0x004E, 0x0327, DONE,
150 0x0146, 0x006E, 0x0327, DONE,
151 0x0147, 0x004E, 0x030C, DONE,
152 0x0148, 0x006E, 0x030C, DONE,
153 0x014C, 0x004F, 0x0304, DONE,
154 0x014D, 0x006F, 0x0304, DONE,
155 0x014E, 0x004F, 0x0306, DONE,
156 0x014F, 0x006F, 0x0306, DONE,
157 0x0150, 0x004F, 0x030B, DONE,
158 0x0151, 0x006F, 0x030B, DONE,
159 0x0154, 0x0052, 0x0301, DONE,
160 0x0155, 0x0072, 0x0301, DONE,
161 0x0156, 0x0052, 0x0327, DONE,
162 0x0157, 0x0072, 0x0327, DONE,
163 0x0158, 0x0052, 0x030C, DONE,
164 0x0159, 0x0072, 0x030C, DONE,
165 0x015A, 0x0053, 0x0301, DONE,
166 0x015B, 0x0073, 0x0301, DONE,
167 0x015C, 0x0053, 0x0302, DONE,
168 0x015D, 0x0073, 0x0302, DONE,
169 0x015E, 0x0053, 0x0327, DONE,
170 0x015F, 0x0073, 0x0327, DONE,
171 0x0160, 0x0053, 0x030C, DONE,
172 0x0161, 0x0073, 0x030C, DONE,
173 0x0162, 0x0054, 0x0327, DONE,
174 0x0163, 0x0074, 0x0327, DONE,
175 0x0164, 0x0054, 0x030C, DONE,
176 0x0165, 0x0074, 0x030C, DONE,
177 0x0166, 0x0054, 0x0335, DONE,
178 0x0167, 0x0074, 0x0335, DONE,
179 0x0168, 0x0055, 0x0303, DONE,
180 0x0169, 0x0075, 0x0303, DONE,
181 0x016A, 0x0055, 0x0304, DONE,
182 0x016B, 0x0075, 0x0304, DONE,
183 0x016C, 0x0055, 0x0306, DONE,
184 0x016D, 0x0075, 0x0306, DONE,
185 0x016E, 0x0055, 0x030A, DONE,
186 0x016F, 0x0075, 0x030A, DONE,
187 0x0170, 0x0055, 0x030B, DONE,
188 0x0171, 0x0075, 0x030B, DONE,
189 0x0172, 0x0055, 0x0328, DONE,
190 0x0173, 0x0075, 0x0328, DONE,
191 0x0174, 0x0057, 0x0302, DONE,
192 0x0175, 0x0077, 0x0302, DONE,
193 0x0176, 0x0059, 0x0302, DONE,
194 0x0177, 0x0079, 0x0302, DONE,
195 0x0178, 0x0059, 0x0308, DONE,
196 0x0179, 0x005A, 0x0301, DONE,
197 0x017A, 0x007A, 0x0301, DONE,
198 0x017B, 0x005A, 0x0307, DONE,
199 0x017C, 0x007A, 0x0307, DONE,
200 0x017D, 0x005A, 0x030C, DONE,
201 0x017E, 0x007A, 0x030C, DONE,
202 0x0180, 0x0062, 0x0335, DONE,
203 0x0197, 0x0049, 0x0335, DONE,
204 0x019A, 0x006C, 0x0335, DONE,
205 0x019B, 0x03BB, 0x0335, DONE,
206 0x019F, 0x004F, 0x0335, DONE,
207 0x01A0, 0x004F, 0x031B, DONE,
208 0x01A1, 0x006F, 0x031B, DONE,
209 0x01AB, 0x0074, 0x0321, DONE,
210 0x01AE, 0x0054, 0x0322, DONE,
211 0x01AF, 0x0055, 0x031B, DONE,
212 0x01B0, 0x0075, 0x031B, DONE,
213 0x01CD, 0x0041, 0x030C, DONE,
214 0x01CE, 0x0061, 0x030C, DONE,
215 0x01CF, 0x0049, 0x030C, DONE,
216 0x01D0, 0x0069, 0x030C, DONE,
217 0x01D1, 0x004F, 0x030C, DONE,
218 0x01D2, 0x006F, 0x030C, DONE,
219 0x01D3, 0x0055, 0x030C, DONE,
220 0x01D4, 0x0075, 0x030C, DONE,
221 0x01D5, 0x0055, 0x0308, 0x0304, ELSE, 0x00DC, 0x0304, DONE,
222 0x01D6, 0x0075, 0x0308, 0x0304, ELSE, 0x00FC, 0x0304, DONE,
223 0x01D7, 0x0055, 0x0308, 0x0301, ELSE, 0x00DC, 0x0301, DONE,
224 0x01D8, 0x0075, 0x0308, 0x0301, ELSE, 0x00FC, 0x0301, DONE,
225 0x01D9, 0x0055, 0x0308, 0x030C, ELSE, 0x00DC, 0x030C, DONE,
226 0x01DA, 0x0075, 0x0308, 0x030C, ELSE, 0x00FC, 0x030C, DONE,
227 0x01DB, 0x0055, 0x0308, 0x0300, ELSE, 0x00DC, 0x0300, DONE,
228 0x01DC, 0x0075, 0x0308, 0x0300, ELSE, 0x00FC, 0x0300, DONE,
229 0x01DE, 0x0041, 0x0308, 0x0304, ELSE, 0x00C4, 0x0304, DONE,
230 0x01DF, 0x0061, 0x0308, 0x0304, ELSE, 0x00E4, 0x0304, DONE,
231 0x01E0, 0x0041, 0x0307, 0x0304, DONE,
232 0x01E1, 0x0061, 0x0307, 0x0304, DONE,
233 0x01E2, 0x00C6, 0x0304, DONE,
234 0x01E3, 0x00E6, 0x0304, DONE,
235 0x01E4, 0x0047, 0x0335, DONE,
236 0x01E5, 0x0067, 0x0335, DONE,
237 0x01E6, 0x0047, 0x030C, DONE,
238 0x01E7, 0x0067, 0x030C, DONE,
239 0x01E8, 0x004B, 0x030C, DONE,
240 0x01E9, 0x006B, 0x030C, DONE,
241 0x01EA, 0x004F, 0x0328, DONE,
242 0x01EB, 0x006F, 0x0328, DONE,
243 0x01EC, 0x004F, 0x0328, 0x0304, ELSE, 0x01EA, 0x0304, DONE,
244 0x01ED, 0x006F, 0x0328, 0x0304, ELSE, 0x01EB, 0x0304, DONE,
245 0x01EE, 0x01B7, 0x030C, DONE,
246 0x01EF, 0x0292, 0x030C, DONE,
247 0x01F0, 0x006A, 0x030C, DONE,
248 0x0386, 0x0391, 0x0384, DONE,
249 0x0388, 0x0395, 0x0384, DONE,
250 0x0389, 0x0397, 0x0384, DONE,
251 0x038A, 0x0399, 0x0384, DONE,
252 0x038C, 0x039F, 0x0384, DONE,
253 0x038E, 0x03A5, 0x0384, DONE,
254 0x038F, 0x03A9, 0x0384, DONE,
255 0x0390, 0x03B9, 0x0385, DONE,
256 0x03AA, 0x0399, 0x0308, DONE,
257 0x03AB, 0x03A5, 0x0308, DONE,
258 0x03AC, 0x03B1, 0x0384, DONE,
259 0x03AD, 0x03B5, 0x0384, DONE,
260 0x03AE, 0x03B7, 0x0384, DONE,
261 0x03AF, 0x03B9, 0x0384, DONE,
262 0x03B0, 0x03C5, 0x0385, DONE,
263 0x03CA, 0x03B9, 0x0308, DONE,
264 0x03CB, 0x03C5, 0x0308, DONE,
265 0x03CC, 0x03BF, 0x0384, DONE,
266 0x03CD, 0x03C5, 0x0384, DONE,
267 0x03CE, 0x03C9, 0x0384, DONE,
268 0x03D3, 0x03D2, 0x0384, DONE,
269 0x03D4, 0x03D2, 0x0308, DONE,
270 0x0401, 0x0415, 0x0308, DONE,
271 0x0403, 0x0413, 0x0301, DONE,
272 0x0407, 0x0406, 0x0308, DONE,
273 0x040C, 0x041A, 0x0301, DONE,
274 0x040E, 0x0423, 0x0306, DONE,
275 0x0419, 0x0418, 0x0306, DONE,
276 0x0439, 0x0438, 0x0306, DONE,
277 0x0451, 0x0435, 0x0308, DONE,
278 0x0453, 0x0433, 0x0301, DONE,
279 0x0457, 0x0456, 0x0308, DONE,
280 0x045C, 0x043A, 0x0301, DONE,
281 0x045E, 0x0443, 0x0306, DONE,
282 0x0476, 0x0474, 0x030F, DONE,
283 0x0477, 0x0475, 0x030F, DONE,
284 0x0492, 0x0413, 0x0335, DONE,
285 0x0493, 0x0433, 0x0335, DONE,
286 0x0498, 0x0417, 0x0327, DONE,
287 0x0499, 0x0437, 0x0327, DONE,
288 0x04AA, 0x0421, 0x0327, DONE,
289 0x04AB, 0x0441, 0x0327, DONE,
290 0x04B0, 0x04AE, 0x0335, DONE,
291 0x04B1, 0x04AF, 0x0335, DONE,
292 0x04BE, 0x04BC, 0x0328, DONE,
293 0x04BF, 0x04BD, 0x0328, DONE,
294 0x04C1, 0x0416, 0x0306, DONE,
295 0x04C2, 0x0436, 0x0306, DONE,
296 0x04C5, 0x041A, 0x0328, DONE,
297 0x04C6, 0x043A, 0x0328, DONE,
298 0x04C9, 0x0425, 0x0328, DONE,
299 0x04CA, 0x0445, 0x0328, DONE,
300 0x0958, 0x0915, 0x093C, DONE,
301 0x0959, 0x0916, 0x093C, DONE,
302 0x095A, 0x0917, 0x093C, DONE,
303 0x095B, 0x091C, 0x093C, DONE,
304 0x095C, 0x0921, 0x093C, DONE,
305 0x095D, 0x0922, 0x093C, DONE,
306 0x095E, 0x092B, 0x093C, DONE,
307 0x095F, 0x092F, 0x093C, DONE,
308 0x09DC, 0x09A1, 0x09BC, DONE,
309 0x09DD, 0x09A2, 0x09BC, DONE,
310 0x09DF, 0x09AF, 0x09BC, DONE,
311 0x0A59, 0x0A16, 0x0A3C, DONE,
312 0x0A5A, 0x0A17, 0x0A3C, DONE,
313 0x0A5B, 0x0A1C, 0x0A3C, DONE,
314 0x0A5C, 0x0A21, 0x0A3C, DONE,
315 0x0A5E, 0x0A2B, 0x0A3C, DONE,
316 0x0B5C, 0x0B21, 0x0B3C, DONE,
317 0x0B5D, 0x0B22, 0x0B3C, DONE,
318 0x0B5F, 0x0B2F, 0x0B3C, DONE,
319 0x1014, 0x1004, 0x104C, DONE,
320 0x1015, 0x1005, 0x104C, DONE,
321 0x1016, 0x1006, 0x104C, DONE,
322
323 /* Ligatures, digraphs. */
324
325 0x00C6, 0x0041, 0x0045, DONE,
326 0x00E6, 0x0061, 0x0065, DONE,
327 0x0132, 0x0049, 0x004A, DONE,
328 0x0133, 0x0069, 0x006A, DONE,
329 0x013F, 0x004C, 0x00B7, DONE,
330 0x0140, 0x006C, 0x00B7, DONE,
331 0x0149, 0x0027, 0x006E, DONE,
332 0x0152, 0x004F, 0x0045, DONE,
333 0x0153, 0x006F, 0x0065, DONE,
334 0x01C4, 0x0044, 0x005A, 0x030C, ELSE, 0x0044, 0x017D, DONE,
335 0x01C5, 0x0044, 0x007A, 0x030C, ELSE, 0x0044, 0x017E, DONE,
336 0x01C6, 0x0064, 0x007A, 0x030C, ELSE, 0x0064, 0x017E, DONE,
337 0x01C7, 0x004C, 0x004A, DONE,
338 0x01C8, 0x004C, 0x006A, DONE,
339 0x01C9, 0x006C, 0x006A, DONE,
340 0x01CA, 0x004E, 0x004A, DONE,
341 0x01CB, 0x004E, 0x006A, DONE,
342 0x01CC, 0x006E, 0x006A, DONE,
343 0x02A3, 0x0064, 0x007A, DONE,
344 0x02A4, 0x0064, 0x0292, DONE,
345 0x02A5, 0x0064, 0x0291, DONE,
346 0x02A6, 0x0074, 0x0073, DONE,
347 0x02A7, 0x0074, 0x0283, DONE,
348 0x02A8, 0x0074, 0x0255, DONE,
349 0x0409, 0x041B, 0x042C, DONE,
350 0x040A, 0x041D, 0x042C, DONE,
351 0x0459, 0x043B, 0x044C, DONE,
352 0x045A, 0x043D, 0x044C, DONE,
353 0x0EDC, 0x0EAB, 0x0E99, DONE,
354 0x0EDD, 0x0EAB, 0x0EA1, DONE,
355 0x203C, 0x0021, 0x0021, DONE,
356 0x203D, 0x003F, 0x0021, DONE,
357 DONE
358 };
359
360 /* UCS-2 input and output. */
361
362 /*-------------------------------------------------------------------------.
363 | Get one UCS-2 VALUE for TASK, maybe swapping pair of bytes as we go. |
364 | Whenever a byte order mark is seen, either straight or swapped, always |
365 | use it to decide whether itself and subsequent UCS-2 values should be |
366 | swapped, or not. At the very beginning of the text stream, a byte order |
367 | mark is merely swallowed and never returned. Everywhere else, it is |
368 | transmitted as a zero-width non-breaking space. |
369 `-------------------------------------------------------------------------*/
370
371 /* An UCS-2 file canonically has a byte order mark at its very beginning.
372 So, concatenating many UCS-2 files might produce some spurious, but valid
373 zero-width non-breaking spaces. Those are produced for each file, after
374 the first, starting with a byte order mark, regardless of byte order. */
375
376 bool
get_ucs2(unsigned * value,RECODE_SUBTASK subtask)377 get_ucs2 (unsigned *value, RECODE_SUBTASK subtask)
378 {
379 while (true)
380 {
381 int character1;
382 int character2;
383 unsigned chunk;
384
385 character1 = get_byte (subtask);
386 if (character1 == EOF)
387 return false;
388 character2 = get_byte (subtask);
389 if (character2 == EOF)
390 {
391 SET_SUBTASK_ERROR (RECODE_INVALID_INPUT, subtask);
392 return false;
393 }
394
395 switch (subtask->task->swap_input)
396 {
397 case RECODE_SWAP_UNDECIDED:
398 chunk = ((MASK (8) & character1) << 8) | (MASK (8) & character2);
399 switch (chunk)
400 {
401 case BYTE_ORDER_MARK:
402 subtask->task->swap_input = RECODE_SWAP_NO;
403 break;
404
405 case BYTE_ORDER_MARK_SWAPPED:
406 subtask->task->swap_input = RECODE_SWAP_YES;
407 break;
408
409 default:
410 *value = chunk;
411 subtask->task->swap_input = RECODE_SWAP_NO;
412 if (subtask->task->byte_order_mark)
413 RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
414 return true;
415 }
416 break;
417
418 case RECODE_SWAP_NO:
419 chunk = ((MASK (8) & character1) << 8) | (MASK (8) & character2);
420 switch (chunk)
421 {
422 case BYTE_ORDER_MARK:
423 RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
424 break;
425
426 case BYTE_ORDER_MARK_SWAPPED:
427 subtask->task->swap_input = RECODE_SWAP_YES;
428 RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
429 break;
430
431 default:
432 *value = chunk;
433 return true;
434 }
435 break;
436
437 case RECODE_SWAP_YES:
438 chunk = ((MASK (8) & character2) << 8) | (MASK (8) & character1);
439 switch (chunk)
440 {
441 case BYTE_ORDER_MARK:
442 RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
443 break;
444
445 case BYTE_ORDER_MARK_SWAPPED:
446 subtask->task->swap_input = RECODE_SWAP_NO;
447 RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
448 break;
449
450 default:
451 *value = chunk;
452 return true;
453 }
454 break;
455 }
456 }
457 }
458
459 /*-------------------------------.
460 | Put one UCS-2 VALUE for TASK. |
461 `-------------------------------*/
462
463 bool
put_ucs2(unsigned value,RECODE_SUBTASK subtask)464 put_ucs2 (unsigned value, RECODE_SUBTASK subtask)
465 {
466 put_byte (MASK (8) & value >> 8, subtask);
467 put_byte (MASK (8) & value, subtask);
468 return true;
469 }
470
471 /* UCS-4 input and output. */
472
473 /*-------------------------------.
474 | Get one UCS-4 VALUE for TASK. |
475 `-------------------------------*/
476
477 bool
get_ucs4(unsigned * value,RECODE_SUBTASK subtask)478 get_ucs4 (unsigned *value, RECODE_SUBTASK subtask)
479 {
480 int character;
481 unsigned chunk;
482
483 character = get_byte (subtask);
484 if (character == EOF)
485 return false;
486 chunk = (MASK (8) & character) << 24;
487
488 character = get_byte (subtask);
489 if (character == EOF)
490 {
491 SET_SUBTASK_ERROR (RECODE_INVALID_INPUT, subtask);
492 return false;
493 }
494 chunk |= (MASK (8) & character) << 16;
495
496 character = get_byte (subtask);
497 if (character == EOF)
498 {
499 SET_SUBTASK_ERROR (RECODE_INVALID_INPUT, subtask);
500 return false;
501 }
502 chunk |= (MASK (8) & character) << 8;
503
504 character = get_byte (subtask);
505 if (character == EOF)
506 {
507 SET_SUBTASK_ERROR (RECODE_INVALID_INPUT, subtask);
508 return false;
509 }
510 chunk |= MASK (8) & character;
511
512 *value = chunk;
513 return true;
514 }
515
516 /*-------------------------------.
517 | Put one UCS-4 VALUE for TASK. |
518 `-------------------------------*/
519
520 bool
put_ucs4(unsigned value,RECODE_SUBTASK subtask)521 put_ucs4 (unsigned value, RECODE_SUBTASK subtask)
522 {
523 put_byte (MASK (8) & value >> 24, subtask);
524 put_byte (MASK (8) & value >> 16, subtask);
525 put_byte (MASK (8) & value >> 8, subtask);
526 put_byte (MASK (8) & value, subtask);
527 return true;
528 }
529
530 /* Provided steps. */
531
532 /*-----------.
533 | Combined. |
534 `-----------*/
535
536 static bool
init_combined_ucs2(RECODE_STEP step,RECODE_CONST_REQUEST request,RECODE_CONST_OPTION_LIST before_options,RECODE_CONST_OPTION_LIST after_options)537 init_combined_ucs2 (RECODE_STEP step,
538 RECODE_CONST_REQUEST request,
539 RECODE_CONST_OPTION_LIST before_options,
540 RECODE_CONST_OPTION_LIST after_options)
541 {
542 step->before->data_type = RECODE_EXPLODE_DATA;
543 step->before->data = (void *) combining_data;
544 return init_explode (step, request, before_options, after_options);
545 }
546
547 static bool
init_ucs2_combined(RECODE_STEP step,RECODE_CONST_REQUEST request,RECODE_CONST_OPTION_LIST before_options,RECODE_CONST_OPTION_LIST after_options)548 init_ucs2_combined (RECODE_STEP step,
549 RECODE_CONST_REQUEST request,
550 RECODE_CONST_OPTION_LIST before_options,
551 RECODE_CONST_OPTION_LIST after_options)
552 {
553 step->after->data_type = RECODE_EXPLODE_DATA;
554 step->after->data = (void *) combining_data;
555 return init_combine (step, request, before_options, after_options);
556 }
557
558 /*-----------------------------.
559 | Transform Latin-1 to UCS-4. |
560 `-----------------------------*/
561
562 static bool
transform_latin1_ucs4(RECODE_SUBTASK subtask)563 transform_latin1_ucs4 (RECODE_SUBTASK subtask)
564 {
565 int character;
566
567 while (character = get_byte (subtask), character != EOF)
568 put_ucs4 (MASK (8) & character, subtask);
569
570 SUBTASK_RETURN (subtask);
571 }
572
573 /*---------------------------.
574 | Transform UCS-2 to UCS-4. |
575 `---------------------------*/
576
577 static bool
transform_ucs2_ucs4(RECODE_SUBTASK subtask)578 transform_ucs2_ucs4 (RECODE_SUBTASK subtask)
579 {
580 unsigned value;
581
582 while (get_ucs2 (&value, subtask))
583 put_ucs4 (value, subtask);
584
585 SUBTASK_RETURN (subtask);
586 }
587
588 /*-----------------------------------------------------------------.
589 | Declare the basic UCS-2 and UCS-4 charsets and transformations. |
590 `-----------------------------------------------------------------*/
591
592 bool
module_ucs(RECODE_OUTER outer)593 module_ucs (RECODE_OUTER outer)
594 {
595 return
596 declare_single (outer, "combined-UCS-2", "ISO-10646-UCS-2",
597 outer->quality_ucs2_to_variable,
598 init_combined_ucs2, explode_ucs2_ucs2)
599 && declare_single (outer, "ISO-10646-UCS-2", "combined-UCS-2",
600 outer->quality_variable_to_ucs2,
601 init_ucs2_combined, combine_ucs2_ucs2)
602 && declare_single (outer, "latin1", "ISO-10646-UCS-4",
603 outer->quality_byte_to_variable,
604 NULL, transform_latin1_ucs4)
605 && declare_single (outer, "ISO-10646-UCS-2", "ISO-10646-UCS-4",
606 outer->quality_variable_to_variable,
607 NULL, transform_ucs2_ucs4)
608
609 && declare_alias (outer, "UCS", "ISO-10646-UCS-4")
610 && declare_alias (outer, "UCS-4", "ISO-10646-UCS-4")
611 && declare_alias (outer, "ISO_10646", "ISO-10646-UCS-4")
612 && declare_alias (outer, "10646", "ISO-10646-UCS-4")
613 && declare_alias (outer, "u4", "ISO-10646-UCS-4")
614
615 && declare_alias (outer, "UCS-2", "ISO-10646-UCS-2")
616 && declare_alias (outer, "UNICODE-1-1", "ISO-10646-UCS-2") /* RFC1641 */
617 && declare_alias (outer, "BMP", "ISO-10646-UCS-2")
618 && declare_alias (outer, "u2", "ISO-10646-UCS-2")
619 && declare_alias (outer, "rune", "ISO-10646-UCS-2")
620
621 && declare_alias (outer, "co", "combined-UCS-2");
622 }
623
624 void
delmodule_ucs(RECODE_OUTER outer)625 delmodule_ucs (RECODE_OUTER outer)
626 {
627 }
628