1 /* Conversion of files between different charsets and surfaces.
2    Copyright � 1996, 97, 98, 99, 00 Free Software Foundation, Inc.
3    Contributed by Fran�ois Pinard <pinard@iro.umontreal.ca>, 1996.
4 
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public License
7    as published by the Free Software Foundation; either version 2 of the
8    License, or (at your option) any later version.
9 
10    This library is distributed in the hope that it will be
11    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
12    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the `recode' Library; see the file `COPYING.LIB'.
17    If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18    Suite 330, Boston, MA 02111-1307, USA.  */
19 
20 #include "common.h"
21 
22 /* Description of some UCS-2 combinings.  */
23 
24 #define DONE NOT_A_CHARACTER
25 #define ELSE BYTE_ORDER_MARK_SWAPPED
26 
27 static const unsigned short combining_data [] =
28   {
29     /* Diacriticized letters.  */
30 
31     0x00C0, 0x0041, 0x0300, DONE,
32     0x00C1, 0x0041, 0x0301, DONE,
33     0x00C2, 0x0041, 0x0302, DONE,
34     0x00C3, 0x0041, 0x0303, DONE,
35     0x00C4, 0x0041, 0x0308, DONE,
36     0x00C5, 0x0041, 0x030A, DONE,
37     0x00C7, 0x0043, 0x0327, DONE,
38     0x00C8, 0x0045, 0x0300, DONE,
39     0x00C9, 0x0045, 0x0301, DONE,
40     0x00CA, 0x0045, 0x0302, DONE,
41     0x00CB, 0x0045, 0x0308, DONE,
42     0x00CC, 0x0049, 0x0300, DONE,
43     0x00CD, 0x0049, 0x0301, DONE,
44     0x00CE, 0x0049, 0x0302, DONE,
45     0x00CF, 0x0049, 0x0308, DONE,
46     0x00D1, 0x004E, 0x0303, DONE,
47     0x00D2, 0x004F, 0x0300, DONE,
48     0x00D3, 0x004F, 0x0301, DONE,
49     0x00D4, 0x004F, 0x0302, DONE,
50     0x00D5, 0x004F, 0x0303, DONE,
51     0x00D6, 0x004F, 0x0308, DONE,
52     0x00D8, 0x004F, 0x0338, DONE,
53     0x00D9, 0x0055, 0x0300, DONE,
54     0x00DA, 0x0055, 0x0301, DONE,
55     0x00DB, 0x0055, 0x0302, DONE,
56     0x00DC, 0x0055, 0x0308, DONE,
57     0x00DD, 0x0059, 0x0301, DONE,
58     0x00E0, 0x0061, 0x0300, DONE,
59     0x00E1, 0x0061, 0x0301, DONE,
60     0x00E2, 0x0061, 0x0302, DONE,
61     0x00E3, 0x0061, 0x0303, DONE,
62     0x00E4, 0x0061, 0x0308, DONE,
63     0x00E5, 0x0061, 0x030A, DONE,
64     0x00E7, 0x0063, 0x0327, DONE,
65     0x00E8, 0x0065, 0x0300, DONE,
66     0x00E9, 0x0065, 0x0301, DONE,
67     0x00EA, 0x0065, 0x0302, DONE,
68     0x00EB, 0x0065, 0x0308, DONE,
69     0x00EC, 0x0069, 0x0300, DONE,
70     0x00ED, 0x0069, 0x0301, DONE,
71     0x00EE, 0x0069, 0x0302, DONE,
72     0x00EF, 0x0069, 0x0308, DONE,
73     0x00F1, 0x006E, 0x0303, DONE,
74     0x00F2, 0x006F, 0x0300, DONE,
75     0x00F3, 0x006F, 0x0301, DONE,
76     0x00F4, 0x006F, 0x0302, DONE,
77     0x00F5, 0x006F, 0x0303, DONE,
78     0x00F6, 0x006F, 0x0308, DONE,
79     0x00F8, 0x006F, 0x0338, DONE,
80     0x00F9, 0x0075, 0x0300, DONE,
81     0x00FA, 0x0075, 0x0301, DONE,
82     0x00FB, 0x0075, 0x0302, DONE,
83     0x00FC, 0x0075, 0x0308, DONE,
84     0x00FD, 0x0079, 0x0301, DONE,
85     0x00FF, 0x0079, 0x0308, DONE,
86     0x0100, 0x0041, 0x0304, DONE,
87     0x0101, 0x0061, 0x0304, DONE,
88     0x0102, 0x0041, 0x0306, DONE,
89     0x0103, 0x0061, 0x0306, DONE,
90     0x0104, 0x0041, 0x0328, DONE,
91     0x0105, 0x0061, 0x0328, DONE,
92     0x0106, 0x0043, 0x0301, DONE,
93     0x0107, 0x0063, 0x0301, DONE,
94     0x0108, 0x0043, 0x0302, DONE,
95     0x0109, 0x0063, 0x0302, DONE,
96     0x010A, 0x0043, 0x0307, DONE,
97     0x010B, 0x0063, 0x0307, DONE,
98     0x010C, 0x0043, 0x030C, DONE,
99     0x010D, 0x0063, 0x030C, DONE,
100     0x010E, 0x0044, 0x030C, DONE,
101     0x010F, 0x0064, 0x030C, DONE,
102     0x0110, 0x0044, 0x0335, DONE,
103     0x0111, 0x0064, 0x0335, DONE,
104     0x0112, 0x0045, 0x0304, DONE,
105     0x0113, 0x0065, 0x0304, DONE,
106     0x0114, 0x0045, 0x0306, DONE,
107     0x0115, 0x0065, 0x0306, DONE,
108     0x0116, 0x0045, 0x0307, DONE,
109     0x0117, 0x0065, 0x0307, DONE,
110     0x0118, 0x0045, 0x0328, DONE,
111     0x0119, 0x0065, 0x0328, DONE,
112     0x011A, 0x0045, 0x030C, DONE,
113     0x011B, 0x0065, 0x030C, DONE,
114     0x011C, 0x0047, 0x0302, DONE,
115     0x011D, 0x0067, 0x0302, DONE,
116     0x011E, 0x0047, 0x0306, DONE,
117     0x011F, 0x0067, 0x0306, DONE,
118     0x0120, 0x0047, 0x0307, DONE,
119     0x0121, 0x0067, 0x0307, DONE,
120     0x0122, 0x0047, 0x0327, DONE,
121     0x0123, 0x0067, 0x0327, DONE,
122     0x0124, 0x0048, 0x0302, DONE,
123     0x0125, 0x0068, 0x0302, DONE,
124     0x0126, 0x0048, 0x0335, DONE,
125     0x0127, 0x0068, 0x0335, DONE,
126     0x0128, 0x0049, 0x0303, DONE,
127     0x0129, 0x0069, 0x0303, DONE,
128     0x012A, 0x0049, 0x0304, DONE,
129     0x012B, 0x0069, 0x0304, DONE,
130     0x012C, 0x0049, 0x0306, DONE,
131     0x012D, 0x0069, 0x0306, DONE,
132     0x012E, 0x0049, 0x0328, DONE,
133     0x012F, 0x0069, 0x0328, DONE,
134     0x0130, 0x0049, 0x0307, DONE,
135     0x0134, 0x004A, 0x0302, DONE,
136     0x0135, 0x006A, 0x0302, DONE,
137     0x0136, 0x004B, 0x0327, DONE,
138     0x0137, 0x006B, 0x0327, DONE,
139     0x0139, 0x004C, 0x0301, DONE,
140     0x013A, 0x006C, 0x0301, DONE,
141     0x013B, 0x004C, 0x0327, DONE,
142     0x013C, 0x006C, 0x0327, DONE,
143     0x013D, 0x004C, 0x030C, DONE,
144     0x013E, 0x006C, 0x030C, DONE,
145     0x0141, 0x004C, 0x0337, DONE,
146     0x0142, 0x006C, 0x0337, DONE,
147     0x0143, 0x004E, 0x0301, DONE,
148     0x0144, 0x006E, 0x0301, DONE,
149     0x0145, 0x004E, 0x0327, DONE,
150     0x0146, 0x006E, 0x0327, DONE,
151     0x0147, 0x004E, 0x030C, DONE,
152     0x0148, 0x006E, 0x030C, DONE,
153     0x014C, 0x004F, 0x0304, DONE,
154     0x014D, 0x006F, 0x0304, DONE,
155     0x014E, 0x004F, 0x0306, DONE,
156     0x014F, 0x006F, 0x0306, DONE,
157     0x0150, 0x004F, 0x030B, DONE,
158     0x0151, 0x006F, 0x030B, DONE,
159     0x0154, 0x0052, 0x0301, DONE,
160     0x0155, 0x0072, 0x0301, DONE,
161     0x0156, 0x0052, 0x0327, DONE,
162     0x0157, 0x0072, 0x0327, DONE,
163     0x0158, 0x0052, 0x030C, DONE,
164     0x0159, 0x0072, 0x030C, DONE,
165     0x015A, 0x0053, 0x0301, DONE,
166     0x015B, 0x0073, 0x0301, DONE,
167     0x015C, 0x0053, 0x0302, DONE,
168     0x015D, 0x0073, 0x0302, DONE,
169     0x015E, 0x0053, 0x0327, DONE,
170     0x015F, 0x0073, 0x0327, DONE,
171     0x0160, 0x0053, 0x030C, DONE,
172     0x0161, 0x0073, 0x030C, DONE,
173     0x0162, 0x0054, 0x0327, DONE,
174     0x0163, 0x0074, 0x0327, DONE,
175     0x0164, 0x0054, 0x030C, DONE,
176     0x0165, 0x0074, 0x030C, DONE,
177     0x0166, 0x0054, 0x0335, DONE,
178     0x0167, 0x0074, 0x0335, DONE,
179     0x0168, 0x0055, 0x0303, DONE,
180     0x0169, 0x0075, 0x0303, DONE,
181     0x016A, 0x0055, 0x0304, DONE,
182     0x016B, 0x0075, 0x0304, DONE,
183     0x016C, 0x0055, 0x0306, DONE,
184     0x016D, 0x0075, 0x0306, DONE,
185     0x016E, 0x0055, 0x030A, DONE,
186     0x016F, 0x0075, 0x030A, DONE,
187     0x0170, 0x0055, 0x030B, DONE,
188     0x0171, 0x0075, 0x030B, DONE,
189     0x0172, 0x0055, 0x0328, DONE,
190     0x0173, 0x0075, 0x0328, DONE,
191     0x0174, 0x0057, 0x0302, DONE,
192     0x0175, 0x0077, 0x0302, DONE,
193     0x0176, 0x0059, 0x0302, DONE,
194     0x0177, 0x0079, 0x0302, DONE,
195     0x0178, 0x0059, 0x0308, DONE,
196     0x0179, 0x005A, 0x0301, DONE,
197     0x017A, 0x007A, 0x0301, DONE,
198     0x017B, 0x005A, 0x0307, DONE,
199     0x017C, 0x007A, 0x0307, DONE,
200     0x017D, 0x005A, 0x030C, DONE,
201     0x017E, 0x007A, 0x030C, DONE,
202     0x0180, 0x0062, 0x0335, DONE,
203     0x0197, 0x0049, 0x0335, DONE,
204     0x019A, 0x006C, 0x0335, DONE,
205     0x019B, 0x03BB, 0x0335, DONE,
206     0x019F, 0x004F, 0x0335, DONE,
207     0x01A0, 0x004F, 0x031B, DONE,
208     0x01A1, 0x006F, 0x031B, DONE,
209     0x01AB, 0x0074, 0x0321, DONE,
210     0x01AE, 0x0054, 0x0322, DONE,
211     0x01AF, 0x0055, 0x031B, DONE,
212     0x01B0, 0x0075, 0x031B, DONE,
213     0x01CD, 0x0041, 0x030C, DONE,
214     0x01CE, 0x0061, 0x030C, DONE,
215     0x01CF, 0x0049, 0x030C, DONE,
216     0x01D0, 0x0069, 0x030C, DONE,
217     0x01D1, 0x004F, 0x030C, DONE,
218     0x01D2, 0x006F, 0x030C, DONE,
219     0x01D3, 0x0055, 0x030C, DONE,
220     0x01D4, 0x0075, 0x030C, DONE,
221     0x01D5, 0x0055, 0x0308, 0x0304, ELSE, 0x00DC, 0x0304, DONE,
222     0x01D6, 0x0075, 0x0308, 0x0304, ELSE, 0x00FC, 0x0304, DONE,
223     0x01D7, 0x0055, 0x0308, 0x0301, ELSE, 0x00DC, 0x0301, DONE,
224     0x01D8, 0x0075, 0x0308, 0x0301, ELSE, 0x00FC, 0x0301, DONE,
225     0x01D9, 0x0055, 0x0308, 0x030C, ELSE, 0x00DC, 0x030C, DONE,
226     0x01DA, 0x0075, 0x0308, 0x030C, ELSE, 0x00FC, 0x030C, DONE,
227     0x01DB, 0x0055, 0x0308, 0x0300, ELSE, 0x00DC, 0x0300, DONE,
228     0x01DC, 0x0075, 0x0308, 0x0300, ELSE, 0x00FC, 0x0300, DONE,
229     0x01DE, 0x0041, 0x0308, 0x0304, ELSE, 0x00C4, 0x0304, DONE,
230     0x01DF, 0x0061, 0x0308, 0x0304, ELSE, 0x00E4, 0x0304, DONE,
231     0x01E0, 0x0041, 0x0307, 0x0304, DONE,
232     0x01E1, 0x0061, 0x0307, 0x0304, DONE,
233     0x01E2, 0x00C6, 0x0304, DONE,
234     0x01E3, 0x00E6, 0x0304, DONE,
235     0x01E4, 0x0047, 0x0335, DONE,
236     0x01E5, 0x0067, 0x0335, DONE,
237     0x01E6, 0x0047, 0x030C, DONE,
238     0x01E7, 0x0067, 0x030C, DONE,
239     0x01E8, 0x004B, 0x030C, DONE,
240     0x01E9, 0x006B, 0x030C, DONE,
241     0x01EA, 0x004F, 0x0328, DONE,
242     0x01EB, 0x006F, 0x0328, DONE,
243     0x01EC, 0x004F, 0x0328, 0x0304, ELSE, 0x01EA, 0x0304, DONE,
244     0x01ED, 0x006F, 0x0328, 0x0304, ELSE, 0x01EB, 0x0304, DONE,
245     0x01EE, 0x01B7, 0x030C, DONE,
246     0x01EF, 0x0292, 0x030C, DONE,
247     0x01F0, 0x006A, 0x030C, DONE,
248     0x0386, 0x0391, 0x0384, DONE,
249     0x0388, 0x0395, 0x0384, DONE,
250     0x0389, 0x0397, 0x0384, DONE,
251     0x038A, 0x0399, 0x0384, DONE,
252     0x038C, 0x039F, 0x0384, DONE,
253     0x038E, 0x03A5, 0x0384, DONE,
254     0x038F, 0x03A9, 0x0384, DONE,
255     0x0390, 0x03B9, 0x0385, DONE,
256     0x03AA, 0x0399, 0x0308, DONE,
257     0x03AB, 0x03A5, 0x0308, DONE,
258     0x03AC, 0x03B1, 0x0384, DONE,
259     0x03AD, 0x03B5, 0x0384, DONE,
260     0x03AE, 0x03B7, 0x0384, DONE,
261     0x03AF, 0x03B9, 0x0384, DONE,
262     0x03B0, 0x03C5, 0x0385, DONE,
263     0x03CA, 0x03B9, 0x0308, DONE,
264     0x03CB, 0x03C5, 0x0308, DONE,
265     0x03CC, 0x03BF, 0x0384, DONE,
266     0x03CD, 0x03C5, 0x0384, DONE,
267     0x03CE, 0x03C9, 0x0384, DONE,
268     0x03D3, 0x03D2, 0x0384, DONE,
269     0x03D4, 0x03D2, 0x0308, DONE,
270     0x0401, 0x0415, 0x0308, DONE,
271     0x0403, 0x0413, 0x0301, DONE,
272     0x0407, 0x0406, 0x0308, DONE,
273     0x040C, 0x041A, 0x0301, DONE,
274     0x040E, 0x0423, 0x0306, DONE,
275     0x0419, 0x0418, 0x0306, DONE,
276     0x0439, 0x0438, 0x0306, DONE,
277     0x0451, 0x0435, 0x0308, DONE,
278     0x0453, 0x0433, 0x0301, DONE,
279     0x0457, 0x0456, 0x0308, DONE,
280     0x045C, 0x043A, 0x0301, DONE,
281     0x045E, 0x0443, 0x0306, DONE,
282     0x0476, 0x0474, 0x030F, DONE,
283     0x0477, 0x0475, 0x030F, DONE,
284     0x0492, 0x0413, 0x0335, DONE,
285     0x0493, 0x0433, 0x0335, DONE,
286     0x0498, 0x0417, 0x0327, DONE,
287     0x0499, 0x0437, 0x0327, DONE,
288     0x04AA, 0x0421, 0x0327, DONE,
289     0x04AB, 0x0441, 0x0327, DONE,
290     0x04B0, 0x04AE, 0x0335, DONE,
291     0x04B1, 0x04AF, 0x0335, DONE,
292     0x04BE, 0x04BC, 0x0328, DONE,
293     0x04BF, 0x04BD, 0x0328, DONE,
294     0x04C1, 0x0416, 0x0306, DONE,
295     0x04C2, 0x0436, 0x0306, DONE,
296     0x04C5, 0x041A, 0x0328, DONE,
297     0x04C6, 0x043A, 0x0328, DONE,
298     0x04C9, 0x0425, 0x0328, DONE,
299     0x04CA, 0x0445, 0x0328, DONE,
300     0x0958, 0x0915, 0x093C, DONE,
301     0x0959, 0x0916, 0x093C, DONE,
302     0x095A, 0x0917, 0x093C, DONE,
303     0x095B, 0x091C, 0x093C, DONE,
304     0x095C, 0x0921, 0x093C, DONE,
305     0x095D, 0x0922, 0x093C, DONE,
306     0x095E, 0x092B, 0x093C, DONE,
307     0x095F, 0x092F, 0x093C, DONE,
308     0x09DC, 0x09A1, 0x09BC, DONE,
309     0x09DD, 0x09A2, 0x09BC, DONE,
310     0x09DF, 0x09AF, 0x09BC, DONE,
311     0x0A59, 0x0A16, 0x0A3C, DONE,
312     0x0A5A, 0x0A17, 0x0A3C, DONE,
313     0x0A5B, 0x0A1C, 0x0A3C, DONE,
314     0x0A5C, 0x0A21, 0x0A3C, DONE,
315     0x0A5E, 0x0A2B, 0x0A3C, DONE,
316     0x0B5C, 0x0B21, 0x0B3C, DONE,
317     0x0B5D, 0x0B22, 0x0B3C, DONE,
318     0x0B5F, 0x0B2F, 0x0B3C, DONE,
319     0x1014, 0x1004, 0x104C, DONE,
320     0x1015, 0x1005, 0x104C, DONE,
321     0x1016, 0x1006, 0x104C, DONE,
322 
323     /* Ligatures, digraphs.  */
324 
325     0x00C6, 0x0041, 0x0045, DONE,
326     0x00E6, 0x0061, 0x0065, DONE,
327     0x0132, 0x0049, 0x004A, DONE,
328     0x0133, 0x0069, 0x006A, DONE,
329     0x013F, 0x004C, 0x00B7, DONE,
330     0x0140, 0x006C, 0x00B7, DONE,
331     0x0149, 0x0027, 0x006E, DONE,
332     0x0152, 0x004F, 0x0045, DONE,
333     0x0153, 0x006F, 0x0065, DONE,
334     0x01C4, 0x0044, 0x005A, 0x030C, ELSE, 0x0044, 0x017D, DONE,
335     0x01C5, 0x0044, 0x007A, 0x030C, ELSE, 0x0044, 0x017E, DONE,
336     0x01C6, 0x0064, 0x007A, 0x030C, ELSE, 0x0064, 0x017E, DONE,
337     0x01C7, 0x004C, 0x004A, DONE,
338     0x01C8, 0x004C, 0x006A, DONE,
339     0x01C9, 0x006C, 0x006A, DONE,
340     0x01CA, 0x004E, 0x004A, DONE,
341     0x01CB, 0x004E, 0x006A, DONE,
342     0x01CC, 0x006E, 0x006A, DONE,
343     0x02A3, 0x0064, 0x007A, DONE,
344     0x02A4, 0x0064, 0x0292, DONE,
345     0x02A5, 0x0064, 0x0291, DONE,
346     0x02A6, 0x0074, 0x0073, DONE,
347     0x02A7, 0x0074, 0x0283, DONE,
348     0x02A8, 0x0074, 0x0255, DONE,
349     0x0409, 0x041B, 0x042C, DONE,
350     0x040A, 0x041D, 0x042C, DONE,
351     0x0459, 0x043B, 0x044C, DONE,
352     0x045A, 0x043D, 0x044C, DONE,
353     0x0EDC, 0x0EAB, 0x0E99, DONE,
354     0x0EDD, 0x0EAB, 0x0EA1, DONE,
355     0x203C, 0x0021, 0x0021, DONE,
356     0x203D, 0x003F, 0x0021, DONE,
357     DONE
358   };
359 
360 /* UCS-2 input and output.  */
361 
362 /*-------------------------------------------------------------------------.
363 | Get one UCS-2 VALUE for TASK, maybe swapping pair of bytes as we go.     |
364 | Whenever a byte order mark is seen, either straight or swapped, always   |
365 | use it to decide whether itself and subsequent UCS-2 values should be    |
366 | swapped, or not.  At the very beginning of the text stream, a byte order |
367 | mark is merely swallowed and never returned.  Everywhere else, it is     |
368 | transmitted as a zero-width non-breaking space.                          |
369 `-------------------------------------------------------------------------*/
370 
371 /* An UCS-2 file canonically has a byte order mark at its very beginning.
372    So, concatenating many UCS-2 files might produce some spurious, but valid
373    zero-width non-breaking spaces.  Those are produced for each file, after
374    the first, starting with a byte order mark, regardless of byte order.  */
375 
376 bool
get_ucs2(unsigned * value,RECODE_SUBTASK subtask)377 get_ucs2 (unsigned *value, RECODE_SUBTASK subtask)
378 {
379   while (true)
380     {
381       int character1;
382       int character2;
383       unsigned chunk;
384 
385       character1 = get_byte (subtask);
386       if (character1 == EOF)
387 	return false;
388       character2 = get_byte (subtask);
389       if (character2 == EOF)
390 	{
391 	  SET_SUBTASK_ERROR (RECODE_INVALID_INPUT, subtask);
392 	  return false;
393 	}
394 
395       switch (subtask->task->swap_input)
396 	{
397 	case RECODE_SWAP_UNDECIDED:
398 	  chunk = ((MASK (8) & character1) << 8) | (MASK (8) & character2);
399 	  switch (chunk)
400 	    {
401 	    case BYTE_ORDER_MARK:
402 	      subtask->task->swap_input = RECODE_SWAP_NO;
403 	      break;
404 
405 	    case BYTE_ORDER_MARK_SWAPPED:
406 	      subtask->task->swap_input = RECODE_SWAP_YES;
407 	      break;
408 
409 	    default:
410 	      *value = chunk;
411 	      subtask->task->swap_input = RECODE_SWAP_NO;
412 	      if (subtask->task->byte_order_mark)
413 		RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
414 	      return true;
415 	    }
416 	  break;
417 
418 	case RECODE_SWAP_NO:
419 	  chunk = ((MASK (8) & character1) << 8) | (MASK (8) & character2);
420 	  switch (chunk)
421 	    {
422 	    case BYTE_ORDER_MARK:
423 	      RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
424 	      break;
425 
426 	    case BYTE_ORDER_MARK_SWAPPED:
427 	      subtask->task->swap_input = RECODE_SWAP_YES;
428 	      RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
429 	      break;
430 
431 	    default:
432 	      *value = chunk;
433 	      return true;
434 	    }
435 	  break;
436 
437 	case RECODE_SWAP_YES:
438 	  chunk = ((MASK (8) & character2) << 8) | (MASK (8) & character1);
439 	  switch (chunk)
440 	    {
441 	    case BYTE_ORDER_MARK:
442 	      RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
443 	      break;
444 
445 	    case BYTE_ORDER_MARK_SWAPPED:
446 	      subtask->task->swap_input = RECODE_SWAP_NO;
447 	      RETURN_IF_NOGO (RECODE_NOT_CANONICAL, subtask);
448 	      break;
449 
450 	    default:
451 	      *value = chunk;
452 	      return true;
453 	    }
454 	  break;
455 	}
456     }
457 }
458 
459 /*-------------------------------.
460 | Put one UCS-2 VALUE for TASK.  |
461 `-------------------------------*/
462 
463 bool
put_ucs2(unsigned value,RECODE_SUBTASK subtask)464 put_ucs2 (unsigned value, RECODE_SUBTASK subtask)
465 {
466   put_byte (MASK (8) & value >> 8, subtask);
467   put_byte (MASK (8) & value, subtask);
468   return true;
469 }
470 
471 /* UCS-4 input and output.  */
472 
473 /*-------------------------------.
474 | Get one UCS-4 VALUE for TASK.  |
475 `-------------------------------*/
476 
477 bool
get_ucs4(unsigned * value,RECODE_SUBTASK subtask)478 get_ucs4 (unsigned *value, RECODE_SUBTASK subtask)
479 {
480   int character;
481   unsigned chunk;
482 
483   character = get_byte (subtask);
484   if (character == EOF)
485     return false;
486   chunk = (MASK (8) & character) << 24;
487 
488   character = get_byte (subtask);
489   if (character == EOF)
490     {
491       SET_SUBTASK_ERROR (RECODE_INVALID_INPUT, subtask);
492       return false;
493     }
494   chunk |= (MASK (8) & character) << 16;
495 
496   character = get_byte (subtask);
497   if (character == EOF)
498     {
499       SET_SUBTASK_ERROR (RECODE_INVALID_INPUT, subtask);
500       return false;
501     }
502   chunk |= (MASK (8) & character) << 8;
503 
504   character = get_byte (subtask);
505   if (character == EOF)
506     {
507       SET_SUBTASK_ERROR (RECODE_INVALID_INPUT, subtask);
508       return false;
509     }
510   chunk |= MASK (8) & character;
511 
512   *value = chunk;
513   return true;
514 }
515 
516 /*-------------------------------.
517 | Put one UCS-4 VALUE for TASK.  |
518 `-------------------------------*/
519 
520 bool
put_ucs4(unsigned value,RECODE_SUBTASK subtask)521 put_ucs4 (unsigned value, RECODE_SUBTASK subtask)
522 {
523   put_byte (MASK (8) & value >> 24, subtask);
524   put_byte (MASK (8) & value >> 16, subtask);
525   put_byte (MASK (8) & value >> 8, subtask);
526   put_byte (MASK (8) & value, subtask);
527   return true;
528 }
529 
530 /* Provided steps.  */
531 
532 /*-----------.
533 | Combined.  |
534 `-----------*/
535 
536 static bool
init_combined_ucs2(RECODE_STEP step,RECODE_CONST_REQUEST request,RECODE_CONST_OPTION_LIST before_options,RECODE_CONST_OPTION_LIST after_options)537 init_combined_ucs2 (RECODE_STEP step,
538 		    RECODE_CONST_REQUEST request,
539 		    RECODE_CONST_OPTION_LIST before_options,
540 		    RECODE_CONST_OPTION_LIST after_options)
541 {
542   step->before->data_type = RECODE_EXPLODE_DATA;
543   step->before->data = (void *) combining_data;
544   return init_explode (step, request, before_options, after_options);
545 }
546 
547 static bool
init_ucs2_combined(RECODE_STEP step,RECODE_CONST_REQUEST request,RECODE_CONST_OPTION_LIST before_options,RECODE_CONST_OPTION_LIST after_options)548 init_ucs2_combined (RECODE_STEP step,
549 		    RECODE_CONST_REQUEST request,
550 		    RECODE_CONST_OPTION_LIST before_options,
551 		    RECODE_CONST_OPTION_LIST after_options)
552 {
553   step->after->data_type = RECODE_EXPLODE_DATA;
554   step->after->data = (void *) combining_data;
555   return init_combine (step, request, before_options, after_options);
556 }
557 
558 /*-----------------------------.
559 | Transform Latin-1 to UCS-4.  |
560 `-----------------------------*/
561 
562 static bool
transform_latin1_ucs4(RECODE_SUBTASK subtask)563 transform_latin1_ucs4 (RECODE_SUBTASK subtask)
564 {
565   int character;
566 
567   while (character = get_byte (subtask), character != EOF)
568     put_ucs4 (MASK (8) & character, subtask);
569 
570   SUBTASK_RETURN (subtask);
571 }
572 
573 /*---------------------------.
574 | Transform UCS-2 to UCS-4.  |
575 `---------------------------*/
576 
577 static bool
transform_ucs2_ucs4(RECODE_SUBTASK subtask)578 transform_ucs2_ucs4 (RECODE_SUBTASK subtask)
579 {
580   unsigned value;
581 
582   while (get_ucs2 (&value, subtask))
583     put_ucs4 (value, subtask);
584 
585   SUBTASK_RETURN (subtask);
586 }
587 
588 /*-----------------------------------------------------------------.
589 | Declare the basic UCS-2 and UCS-4 charsets and transformations.  |
590 `-----------------------------------------------------------------*/
591 
592 bool
module_ucs(RECODE_OUTER outer)593 module_ucs (RECODE_OUTER outer)
594 {
595   return
596     declare_single (outer, "combined-UCS-2", "ISO-10646-UCS-2",
597 		    outer->quality_ucs2_to_variable,
598 		    init_combined_ucs2, explode_ucs2_ucs2)
599     && declare_single (outer, "ISO-10646-UCS-2", "combined-UCS-2",
600 		       outer->quality_variable_to_ucs2,
601 		       init_ucs2_combined, combine_ucs2_ucs2)
602     && declare_single (outer, "latin1", "ISO-10646-UCS-4",
603 		       outer->quality_byte_to_variable,
604 		       NULL, transform_latin1_ucs4)
605     && declare_single (outer, "ISO-10646-UCS-2", "ISO-10646-UCS-4",
606 		       outer->quality_variable_to_variable,
607 		       NULL, transform_ucs2_ucs4)
608 
609     && declare_alias (outer, "UCS", "ISO-10646-UCS-4")
610     && declare_alias (outer, "UCS-4", "ISO-10646-UCS-4")
611     && declare_alias (outer, "ISO_10646", "ISO-10646-UCS-4")
612     && declare_alias (outer, "10646", "ISO-10646-UCS-4")
613     && declare_alias (outer, "u4", "ISO-10646-UCS-4")
614 
615     && declare_alias (outer, "UCS-2", "ISO-10646-UCS-2")
616     && declare_alias (outer, "UNICODE-1-1", "ISO-10646-UCS-2") /* RFC1641 */
617     && declare_alias (outer, "BMP", "ISO-10646-UCS-2")
618     && declare_alias (outer, "u2", "ISO-10646-UCS-2")
619     && declare_alias (outer, "rune", "ISO-10646-UCS-2")
620 
621     && declare_alias (outer, "co", "combined-UCS-2");
622 }
623 
624 void
delmodule_ucs(RECODE_OUTER outer)625 delmodule_ucs (RECODE_OUTER outer)
626 {
627 }
628