1 /* Copyright (C) 2004-2012 by George Williams */
2 /*
3  * Redistribution and use in source and binary forms, with or without
4  * modification, are permitted provided that the following conditions are met:
5 
6  * Redistributions of source code must retain the above copyright notice, this
7  * list of conditions and the following disclaimer.
8 
9  * Redistributions in binary form must reproduce the above copyright notice,
10  * this list of conditions and the following disclaimer in the documentation
11  * and/or other materials provided with the distribution.
12 
13  * The name of the author may not be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15 
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <unibasics.h>
29 #include <gwwiconv.h>
30 #include <charset.h>
31 #include <chardata.h>
32 #include <string.h>
33 #include <ustring.h>
34 #include <stdio.h>
35 
36 #ifndef HAVE_ICONV_H
37 
38 /* I have written an limited iconv which will convert either to or from unichar_t */
39 /* (either UCS2 or UCS4) */
40 /*  it will not convert latin1 to latin2, but latin1->UCS2, UCS2->latin2 */
41 /*  it uses the encodings built into libgunicode for systems with no iconv */
42 /*  (ie. macs before 10.3, perhaps others) */
43 
44 struct gww_iconv_t {
45     enum encoding from;
46     enum encoding to;
47 };
48 
49 enum extended_encoding { e_jisgbpk = e_encodingmax };
50 
51 static enum endian { end_big, end_little, end_unknown } endian = end_unknown;
52 
endian_detector(void)53 static void endian_detector(void) {
54     union { short s; char c[2]; } u;
55 
56     u.s = 0x0102;
57     if ( u.c[0]==0x1 )
58 	endian = end_big;
59     else
60 	endian = end_little;
61 }
62 
name_to_enc(const char * encname)63 static enum encoding name_to_enc(const char *encname) {
64     struct { const char *name; enum encoding enc; } map[] = {
65 	{ "UCS-2-INTERNAL", e_unicode },
66 	{ "UCS2", e_unicode },
67 	{ "UCS-2", e_unicode },
68 	{ "UCS-2LE", e_unicode },
69 	{ "UCS-2BE", e_unicode },
70 	{ "UNICODELITTLE", e_unicode },
71 	{ "UNICODEBIG", e_unicode },
72 	{ "ISO-10646/UCS2", e_unicode },
73 	{ "ISO-10646/USC2", e_unicode },		/* Old typo */
74 	{ "UCS4", e_ucs4 },
75 	{ "UCS-4", e_ucs4 },
76 	{ "UCS-4LE", e_ucs4 },
77 	{ "UCS-4BE", e_ucs4 },
78 	{ "UCS-4-INTERNAL", e_ucs4 },
79 	{ "ISO-10646/UCS4", e_ucs4 },
80 	{ "iso8859-1", e_iso8859_1 },
81 	{ "iso8859-2", e_iso8859_2 },
82 	{ "iso8859-3", e_iso8859_3 },
83 	{ "iso8859-4", e_iso8859_4 },
84 	{ "iso8859-5", e_iso8859_5 },
85 	{ "iso8859-6", e_iso8859_6 },
86 	{ "iso8859-7", e_iso8859_7 },
87 	{ "iso8859-8", e_iso8859_8 },
88 	{ "iso8859-9", e_iso8859_9 },
89 	{ "iso8859-10", e_iso8859_10 },
90 	{ "iso8859-11", e_iso8859_11 },
91 	{ "iso8859-13", e_iso8859_13 },
92 	{ "iso8859-14", e_iso8859_14 },
93 	{ "iso8859-15", e_iso8859_15 },
94 	{ "iso-8859-1", e_iso8859_1 },
95 	{ "iso-8859-2", e_iso8859_2 },
96 	{ "iso-8859-3", e_iso8859_3 },
97 	{ "iso-8859-4", e_iso8859_4 },
98 	{ "iso-8859-5", e_iso8859_5 },
99 	{ "iso-8859-6", e_iso8859_6 },
100 	{ "iso-8859-7", e_iso8859_7 },
101 	{ "iso-8859-8", e_iso8859_8 },
102 	{ "iso-8859-9", e_iso8859_9 },
103 	{ "iso-8859-10", e_iso8859_10 },
104 	{ "iso-8859-11", e_iso8859_11 },
105 	{ "iso-8859-13", e_iso8859_13 },
106 	{ "iso-8859-14", e_iso8859_14 },
107 	{ "iso-8859-15", e_iso8859_15 },
108 	{ "koi8-r", e_koi8_r },
109 	{ "jis201", e_jis201 },
110 	{ "mac", e_mac },
111 	{ "Macintosh", e_mac },
112 	{ "MS-ANSI", e_win },
113 	{ "EUC-KR", e_wansung },
114 	{ "johab", e_johab },
115 	{ "ISO-2022-KR", e_jiskorean },
116 	{ "ISO-2022-CN", e_jisgb },
117 	{ "EUC-CN", e_jisgbpk },
118 	{ "big5", e_big5 },
119 	{ "big5hkscs", e_big5hkscs },
120 	{ "ISO-2022-JP", e_jis },
121 	{ "ISO-2022-JP-2", e_jis2 },
122 	{ "Sjis", e_sjis },
123 	{ "UTF-8", e_utf8 },
124 	{ "UTF8", e_utf8 },
125 	{ NULL }};
126     int i;
127 
128     for ( i=0; map[i].name!=NULL; ++i )
129 	if ( strmatch(map[i].name,encname)==0 )
130 return( map[i].enc );
131 
132 return( -1 );
133 }
134 
gww_iconv_open(const char * toenc,const char * fromenc)135 gww_iconv_t gww_iconv_open(const char *toenc,const char *fromenc) {
136     struct gww_iconv_t stuff, *ret;
137 
138     if ( endian==end_unknown )
139 	endian_detector();
140 
141     stuff.from = name_to_enc(fromenc);
142     stuff.to = name_to_enc(toenc);
143     if ( stuff.from==(enum encoding) -1 || stuff.to==(enum encoding) -1 ) {
144 	/*fprintf( stderr, "Unknown encoding\n" );*/
145 return( (iconv_t)(-1) );
146     } else if ( stuff.from!=e_ucs4 && stuff.to!=e_ucs4 ) {
147 	fprintf( stderr, "Bad call to gww_iconv_open, neither arg is UCS4\n" );
148 return( (iconv_t)(-1) );
149     }
150 
151     ret = xmalloc(sizeof(struct gww_iconv_t));
152     *ret = stuff;
153 return( ret );
154 }
155 
gww_iconv_close(gww_iconv_t cd)156 void gww_iconv_close( gww_iconv_t cd) {
157     free(cd);
158 }
159 
gww_iconv(gww_iconv_t _cd,char ** inbuf,size_t * inlen,char ** outbuf,size_t * outlen)160 size_t gww_iconv( gww_iconv_t _cd,
161 	char **inbuf, size_t *inlen,
162 	char **outbuf, size_t *outlen) {
163     struct gww_iconv_t *cd = _cd;
164     int char_cnt = 0;
165     unsigned char *plane;
166     int ch;
167 
168     if ( inbuf==NULL || outbuf==NULL || inlen==NULL || outlen==NULL ||
169 	    *inbuf==NULL || *outbuf==NULL )
170 return( 0 );	/* Legal, used to reset the state. As we don't do states, irrelevant */
171 
172     if ( cd->from<0 || cd->from>e_encodingmax || cd->to<0 || cd->to>e_encodingmax ) {
173 	fprintf( stderr, "Garbage encoding passed to gww_iconv()\n" );
174 return( (size_t) -1 );
175     }
176 
177     if ( cd->from==e_unicode ) {
178 	if ( cd->to==e_unicode ) {
179 	    int min = *inlen < *outlen ? *inlen : *outlen;
180 	    min &= ~1;
181 	    memcpy(*inbuf,*outbuf,min);
182 	    char_cnt = min/sizeof(short);
183 	    *inbuf += min; *outbuf += min;
184 	    *inlen -= min; *outlen -= min;
185 	    if ( *inlen==1 && *outlen>0 )
186 return( (size_t) -1 );			/* Incomplete multi-byte sequence */
187 	} else if ( cd->to==e_ucs4 ) {
188 	    int min = *inlen/sizeof(short) < *outlen/sizeof(int32) ? *inlen/sizeof(short) : *outlen/sizeof(int32);
189 	    int highch, lowch;
190 	    if ( endian == end_little ) {
191 		while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) {
192 		    highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
193 		    ((uint8 *) outbuf)[3] = 0; ((uint8 *) outbuf)[2] = 0;
194 		    ((uint8 *) outbuf)[1] = highch; ((uint8 *) outbuf)[0] = lowch;
195 		    outbuf += sizeof(int32); inbuf += sizeof(short);
196 		    *outlen -= sizeof(int32); *inlen -= sizeof(short);
197 		}
198 	    } else {
199 		while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) {
200 		    highch = ((unsigned char *) *inbuf)[0], lowch = ((unsigned char *) *inbuf)[1];
201 		    ((uint8 *) outbuf)[0] = 0; ((uint8 *) outbuf)[1] = 0;
202 		    ((uint8 *) outbuf)[2] = highch; ((uint8 *) outbuf)[3] = lowch;
203 		    outbuf += sizeof(int32); inbuf += sizeof(short);
204 		    *outlen -= sizeof(int32); *inlen -= sizeof(short);
205 		}
206 	    }
207 	    char_cnt = min;
208 	    if ( *inlen==1 && *outlen>0 )
209 return( (size_t) -1 );			/* Incomplete multi-byte sequence */
210 	} else if ( cd->to<e_first2byte ) {
211 	    struct charmap *table = NULL;
212 	    table = alphabets_from_unicode[cd->to];
213 	    while ( *inlen>1 && *outlen>0 ) {
214 		int highch, lowch;
215 		if ( endian == end_little ) {
216 		    highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
217 		} else {
218 		    highch = *(unsigned char *) *inbuf, lowch = ((unsigned char *) *inbuf)[1];
219 		}
220 		if ( highch>=table->first && highch<=table->last &&
221 			(plane = table->table[highch])!=NULL &&
222 			(ch=plane[lowch])!=0 ) {
223 		    *((*outbuf)++) = ch;
224 		    -- *outlen;
225 		    *inlen -= 2;
226 		    *inbuf += 2;
227 		    ++char_cnt;
228 		} else
229 return( (size_t) -1 );
230 	    }
231 	} else if ( cd->to==e_utf8 ) {
232 	    while ( *inlen>1 && *outlen>0 ) {
233 		unichar_t uch;
234 		if ( endian == end_little ) {
235 		    uch = (((unsigned char *) *inbuf)[1]<<8) | (*((unsigned char *) *inbuf));
236 		} else {
237 		    uch = (*((unsigned char *) *inbuf)<<8) | (((unsigned char *) *inbuf)[1]);
238 		}
239 		if ( uch < 0x80 ) {
240 		    *((*outbuf)++) = uch;
241 		    --*outlen;
242 		} else if ( uch<0x800 ) {
243 		    if ( *outlen==1 )
244 return( (size_t) -1 );
245 		    *((*outbuf)++) = 0xc0 | (uch>>6);
246 		    *((*outbuf)++) = 0x80 | (uch&0x3f);
247 		    *outlen-=2;
248 		} else {	/* I'm not dealing with */
249 		    if ( *outlen<=2 )
250 return( (size_t) -1 );
251 		    *((*outbuf)++) = 0xe0 | (uch>>12);
252 		    *((*outbuf)++) = 0x80 | ((uch>>6)&0x3f);
253 		    *((*outbuf)++) = 0x80 | (uch&0x3f);
254 		    *outlen-=3;
255 		}
256 		*inbuf += 2;
257 		*inlen -= 2;
258 		++char_cnt;
259 	    }
260 	} else {
261 	    fprintf( stderr, "Unexpected encoding\n" );
262 return( (size_t) -1 );
263 	}
264     } else if ( cd->from==e_ucs4 ) {
265 	if ( cd->to==e_unicode ) {
266 	    int min = *inlen/sizeof(int32) < *outlen/sizeof(int16) ? *inlen/sizeof(int32) : *outlen/sizeof(int16);
267 	    int highch, lowch;
268 	    if ( endian == end_little ) {
269 		while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) {
270 		    highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
271 		    ((uint8 *) outbuf)[1] = highch; ((uint8 *) outbuf)[0] = lowch;
272 		    outbuf += sizeof(int16); inbuf += sizeof(int32);
273 		    *outlen -= sizeof(int16); *inlen -= sizeof(int32);
274 		}
275 	    } else {
276 		while ( *inlen>=sizeof(short) && *outlen>=sizeof(int32) ) {
277 		    highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3];
278 		    ((uint8 *) outbuf)[0] = highch; ((uint8 *) outbuf)[1] = lowch;
279 		    outbuf += sizeof(int16); inbuf += sizeof(int32);
280 		    *outlen -= sizeof(int16); *inlen -= sizeof(int32);
281 		}
282 	    }
283 	    char_cnt = min;
284 	    if ( *inlen>0 && *outlen>0 )
285 return( (size_t) -1 );			/* Incomplete multi-byte sequence */
286 	} else if ( cd->to<e_first2byte ) {
287 	    struct charmap *table = NULL;
288 	    table = alphabets_from_unicode[cd->to];
289 	    while ( *inlen>1 && *outlen>0 ) {
290 		int highch, lowch;
291 		if ( endian == end_little ) {
292 		    highch = ((unsigned char *) *inbuf)[1], lowch = *(unsigned char *) *inbuf;
293 		} else {
294 		    highch = ((unsigned char *) *inbuf)[2], lowch = ((unsigned char *) *inbuf)[3];
295 		}
296 		if ( highch>=table->first && highch<=table->last &&
297 			(plane = table->table[highch])!=NULL &&
298 			(ch=plane[lowch])!=0 ) {
299 		    *((*outbuf)++) = ch;
300 		    -- *outlen;
301 		    *inlen -= 4;
302 		    *inbuf += 4;
303 		    ++char_cnt;
304 		} else
305 return( (size_t) -1 );
306 	    }
307 	} else if ( cd->to==e_utf8 ) {
308 	    while ( *inlen>1 && *outlen>0 ) {
309 		int uch;
310 		if ( endian == end_little ) {
311 		    uch = (((unsigned char *) *inbuf)[3]<<24) |
312 			    (((unsigned char *) *inbuf)[2]<<16) |
313 			    (((unsigned char *) *inbuf)[1]<<8) |
314 			    (*((unsigned char *) *inbuf));
315 		} else {
316 		    uch = (*((unsigned char *) *inbuf)<<24) |
317 			    (((unsigned char *) *inbuf)[1]<<16) |
318 			    (((unsigned char *) *inbuf)[2]<<8) |
319 			    (((unsigned char *) *inbuf)[3]);
320 		}
321 		if ( uch < 0x80 ) {
322 		    *((*outbuf)++) = uch;
323 		    --*outlen;
324 		} else if ( uch<0x800 ) {
325 		    if ( *outlen==1 )
326 return( (size_t) -1 );
327 		    *((*outbuf)++) = 0xc0 | (uch>>6);
328 		    *((*outbuf)++) = 0x80 | (uch&0x3f);
329 		    *outlen-=2;
330 		} else if ( uch < 0x10000 ) {
331 		    if ( *outlen<=2 )
332 return( (size_t) -1 );
333 		    *((*outbuf)++) = 0xe0 | (uch>>12);
334 		    *((*outbuf)++) = 0x80 | ((uch>>6)&0x3f);
335 		    *((*outbuf)++) = 0x80 | (uch&0x3f);
336 		    *outlen-=3;
337 		} else {
338 		    uint32 val = uch-0x10000;
339 		    int u = ((val&0xf0000)>>16)+1, z=(val&0x0f000)>>12, y=(val&0x00fc0)>>6, x=val&0x0003f;
340 		    if ( *outlen<=3 )
341 return( (size_t) -1 );
342 		    *(*outbuf)++ = 0xf0 | (u>>2);
343 		    *(*outbuf)++ = 0x80 | ((u&3)<<4) | z;
344 		    *(*outbuf)++ = 0x80 | y;
345 		    *(*outbuf)++ = 0x80 | x;
346 		    *outlen-=4;
347 		}
348 		*inbuf += 4;
349 		*inlen -= 4;
350 		++char_cnt;
351 	    }
352 	} else {
353 	    fprintf( stderr, "Unexpected encoding\n" );
354 return( (size_t) -1 );
355 	}
356     } else if ( cd->to==e_unicode ) {
357 	const unichar_t *table;
358 	if ( cd->from<e_first2byte ) {
359 	    table = unicode_from_alphabets[cd->from];
360 	    while ( *inlen>0 && *outlen>1 ) {
361 		unichar_t ch = table[ *(unsigned char *) ((*inbuf)++)];
362 		--*inlen;
363 		if ( endian==end_little ) {
364 		    *((*outbuf)++) = ch&0xff;
365 		    *((*outbuf)++) = ch>>8;
366 		} else {
367 		    *((*outbuf)++) = ch>>8;
368 		    *((*outbuf)++) = ch&0xff;
369 		}
370 		*outlen -= sizeof(unichar_t);
371 		++char_cnt;
372 	    }
373 	} else if ( cd->from==e_jis || cd->from==e_jis2 ||
374 		cd->from==e_jiskorean || cd->from==e_jisgb ) {
375 	    table  = cd->from==e_jisgb     ? unicode_from_gb2312 :
376 		     cd->from==e_jiskorean ? unicode_from_ksc5601 :
377 		     cd->from==e_jis       ? unicode_from_jis208 :
378 		        unicode_from_jis212;
379 	    while ( *inlen>1 && *outlen>1 ) {
380 		unsigned char *ipt = (unsigned char *) *inbuf;
381 		int ch;
382 		if ( *ipt<0x21 || *ipt>0x7e || ipt[1]<0x21 || ipt[1]>0x7e )
383 return( (size_t) -1 );
384 		ch = (*ipt-0x21)*94 + (ipt[1]-0x21);
385 		ch = table[ch];
386 		*inlen -= 2;
387 		*inbuf = (char *) ipt+2;
388 		if ( endian==end_little ) {
389 		    *((*outbuf)++) = ch&0xff;
390 		    *((*outbuf)++) = ch>>8;
391 		} else {
392 		    *((*outbuf)++) = ch>>8;
393 		    *((*outbuf)++) = ch&0xff;
394 		}
395 		*outlen -= sizeof(unichar_t);
396 		++char_cnt;
397 	    }
398 	    if ( *inlen==1 && *outlen>0 )
399 return( (size_t) -1 );			/* Incomplete multi-byte sequence */
400 	} else if ( cd->from==e_wansung || cd->from==e_jisgbpk ) {
401 	    table  = cd->from==e_jisgbpk   ? unicode_from_gb2312 :
402 		      unicode_from_ksc5601 ;
403 	    while ( *inlen>0 && *outlen>1 ) {
404 		unsigned char *ipt = (unsigned char *) *inbuf;
405 		int ch;
406 		if ( *ipt<0x7f ) {
407 		    ch = *ipt;
408 		    --*inlen;
409 		    *inbuf = (char *) ipt+1;
410 		} else {
411 		    if ( *ipt<0xa1 || *ipt>0xfe || ipt[1]<0xa1 || ipt[1]>0xfe ||
412 			    *inlen==1 )
413 return( (size_t) -1 );
414 		    ch = (*ipt-0xa1)*94 + (ipt[1]-0xa1);
415 		    ch = table[ch];
416 		    *inlen -= 2;;
417 		    *inbuf = (char *) ipt+2;
418 		}
419 		if ( endian==end_little ) {
420 		    *((*outbuf)++) = ch&0xff;
421 		    *((*outbuf)++) = ch>>8;
422 		} else {
423 		    *((*outbuf)++) = ch>>8;
424 		    *((*outbuf)++) = ch&0xff;
425 		}
426 		*outlen -= sizeof(unichar_t);
427 		++char_cnt;
428 	    }
429 	} else if ( cd->from==e_johab || cd->from==e_big5 || cd->from==e_big5hkscs ) {
430 	    int offset;
431 	    if ( cd->from==e_big5 ) {
432 		offset = 0xa100;
433 		table = unicode_from_big5;
434 	    } else if ( cd->from==e_big5hkscs ) {
435 		offset = 0x8100;
436 		table = unicode_from_big5hkscs;
437 	    } else {
438 		offset = 0x8400;
439 		table = unicode_from_johab;
440 	    }
441 	    while ( *inlen>0 && *outlen>1 ) {
442 		unsigned char *ipt = (unsigned char *) *inbuf;
443 		int ch;
444 		if ( *ipt<0x7f ) {
445 		    ch = *ipt;
446 		    --*inlen;
447 		    *inbuf = (char *) ipt+1;
448 		} else {
449 		    if ( *inlen==1 )
450 return( (size_t) -1 );
451 		    ch = (*ipt<<8) | ipt[1];
452 		    if ( ch<offset )
453 return( (size_t) -1 );
454 		    ch -= offset;
455 		    ch = table[ch];
456 		    *inlen -= 2;
457 		    *inbuf = (char *) ipt+2;
458 		}
459 		if ( endian==end_little ) {
460 		    *((*outbuf)++) = ch&0xff;
461 		    *((*outbuf)++) = ch>>8;
462 		} else {
463 		    *((*outbuf)++) = ch>>8;
464 		    *((*outbuf)++) = ch&0xff;
465 		}
466 		*outlen -= sizeof(unichar_t);
467 		++char_cnt;
468 	    }
469 	} else if ( cd->from==e_sjis ) {
470 	    while ( *inlen>0 && *outlen>1 ) {
471 		unsigned char *ipt = (unsigned char *) *inbuf;
472 		int ch1 = *ipt;
473 		if ( ch1<127 || ( ch1>=161 && ch1<=223 )) {
474 		    ch = unicode_from_jis201[ch1];
475 		    *inbuf = (char *) ipt+1;
476 		    --*inlen;
477 		} else if ( *inlen==1 )
478 return( (size_t) -1 );
479 		else {
480 		    int ch2 = ipt[1];
481 		    if ( ch1 >= 129 && ch1<= 159 )
482 			ch1 -= 112;
483 		    else
484 			ch1 -= 176;
485 		    ch1 <<= 1;
486 		    if ( ch2>=159 )
487 			ch2-= 126;
488 		    else if ( ch2>127 ) {
489 			--ch1;
490 			ch2 -= 32;
491 		    } else {
492 			--ch1;
493 			ch2 -= 31;
494 		    }
495 		    if ( ch1-0x21>=94 || ch2-0x21>=94 )
496 return( (size_t) -1 );
497 		    ch = unicode_from_jis208[(ch1-0x21)*94+(ch2-0x21)];
498 		    *inlen -= 2;
499 		    *inbuf = (char *) ipt+2;
500 		}
501 		if ( endian==end_little ) {
502 		    *((*outbuf)++) = ch&0xff;
503 		    *((*outbuf)++) = ch>>8;
504 		} else {
505 		    *((*outbuf)++) = ch>>8;
506 		    *((*outbuf)++) = ch&0xff;
507 		}
508 		*outlen -= sizeof(unichar_t);
509 		++char_cnt;
510 	    }
511 	} else if ( cd->from==e_utf8 ) {
512 	    while ( *inlen>0 && *outlen>sizeof(unichar_t) ) {
513 		unsigned char *ipt = (unsigned char *) *inbuf;
514 		int ch = *ipt;
515 		if ( ch <= 127 ) {
516 		    *inbuf = (char *) ipt+1;
517 		    --*inlen;
518 		} else if ( ch<=0xdf ) {
519 		    if ( *inlen<2 || ipt[1]<0x80 )
520 return( (size_t) -1 );
521 		    ch = ((ch&0x1f)<<6) | (ipt[1] &0x3f);
522 		    *inlen -= 2;
523 		    *inbuf = (char *) ipt+2;
524 		} else if ( ch<=0xef ) {
525 		    if ( *inlen<3 || ipt[1]<0x80 || ipt[2]<0x80 )
526 return( (size_t) -1 );
527 		    ch = ((ch&0x1f)<<12) | ((ipt[1] &0x3f)<<6) | (ipt[2]&0x3f);
528 		    *inlen -= 3;
529 		    *inbuf = (char *) ipt+3;
530 		} else {
531 		    int w;
532 		    if ( *inlen<4 || *outlen<4 || ipt[1]<0x80 || ipt[2]<0x80 || ipt[3]<0x80 )
533 return( (size_t) -1 );
534 		    w = ( ((ch&0x7)<<2) | ((ipt[1]&0x30)>>4) )-1;
535 		    ch = 0xd800 | (w<<6) | ((ipt[1]&0xf)<<2) | ((ipt[2]&0x30)>>4);
536 		    if ( endian==end_little ) {
537 			*((*outbuf)++) = ch&0xff;
538 			*((*outbuf)++) = ch>>8;
539 		    } else {
540 			*((*outbuf)++) = ch>>8;
541 			*((*outbuf)++) = ch&0xff;
542 		    }
543 		    *outlen -= 2;
544 		    ch = 0xdc00 | ((ipt[2]&0xf)<<6) | (ipt[3]&0x3f);
545 		}
546 		if ( endian==end_little ) {
547 		    *((*outbuf)++) = ch&0xff;
548 		    *((*outbuf)++) = ch>>8;
549 		} else {
550 		    *((*outbuf)++) = ch>>8;
551 		    *((*outbuf)++) = ch&0xff;
552 		}
553 		*outlen -= sizeof(unichar_t);
554 		++char_cnt;
555 	    }
556 	} else {
557 	    fprintf( stderr, "Unexpected encoding\n" );
558 return( (size_t) -1 );
559 	}
560     } else if ( cd->to==e_ucs4 ) {
561 	const unichar_t *table;
562 	if ( cd->from<e_first2byte ) {
563 	    table = unicode_from_alphabets[cd->from];
564 	    while ( *inlen>0 && *outlen>1 ) {
565 		unichar_t ch = table[ *(unsigned char *) ((*inbuf)++)];
566 		--*inlen;
567 		if ( endian==end_little ) {
568 		    *((*outbuf)++) = 0;
569 		    *((*outbuf)++) = 0;
570 		    *((*outbuf)++) = ch&0xff;
571 		    *((*outbuf)++) = ch>>8;
572 		} else {
573 		    *((*outbuf)++) = ch>>8;
574 		    *((*outbuf)++) = ch&0xff;
575 		    *((*outbuf)++) = 0;
576 		    *((*outbuf)++) = 0;
577 		}
578 		*outlen -= sizeof(unichar_t);
579 		++char_cnt;
580 	    }
581 	} else if ( cd->from==e_jis || cd->from==e_jis2 ||
582 		cd->from==e_jiskorean || cd->from==e_jisgb ) {
583 	    table  = cd->from==e_jisgb     ? unicode_from_gb2312 :
584 		     cd->from==e_jiskorean ? unicode_from_ksc5601 :
585 		     cd->from==e_jis       ? unicode_from_jis208 :
586 		        unicode_from_jis212;
587 	    while ( *inlen>1 && *outlen>1 ) {
588 		unsigned char *ipt = (unsigned char *) *inbuf;
589 		int ch;
590 		if ( *ipt<0x21 || *ipt>0x7e || ipt[1]<0x21 || ipt[1]>0x7e )
591 return( (size_t) -1 );
592 		ch = (*ipt-0x21)*94 + (ipt[1]-0x21);
593 		ch = table[ch];
594 		*inlen -= 2;
595 		*inbuf = (char *) ipt+2;
596 		if ( endian==end_little ) {
597 		    *((*outbuf)++) = 0;
598 		    *((*outbuf)++) = 0;
599 		    *((*outbuf)++) = ch&0xff;
600 		    *((*outbuf)++) = ch>>8;
601 		} else {
602 		    *((*outbuf)++) = ch>>8;
603 		    *((*outbuf)++) = ch&0xff;
604 		    *((*outbuf)++) = 0;
605 		    *((*outbuf)++) = 0;
606 		}
607 		*outlen -= sizeof(unichar_t);
608 		++char_cnt;
609 	    }
610 	    if ( *inlen==1 && *outlen>0 )
611 return( (size_t) -1 );			/* Incomplete multi-byte sequence */
612 	} else if ( cd->from==e_wansung || cd->from==e_jisgbpk ) {
613 	    table  = cd->from==e_jisgbpk   ? unicode_from_gb2312 :
614 		      unicode_from_ksc5601 ;
615 	    while ( *inlen>0 && *outlen>1 ) {
616 		unsigned char *ipt = (unsigned char *) *inbuf;
617 		int ch;
618 		if ( *ipt<0x7f ) {
619 		    ch = *ipt;
620 		    --*inlen;
621 		    *inbuf = (char *) ipt+1;
622 		} else {
623 		    if ( *ipt<0xa1 || *ipt>0xfe || ipt[1]<0xa1 || ipt[1]>0xfe ||
624 			    *inlen==1 )
625 return( (size_t) -1 );
626 		    ch = (*ipt-0xa1)*94 + (ipt[1]-0xa1);
627 		    ch = table[ch];
628 		    *inlen -= 2;;
629 		    *inbuf = (char *) ipt+2;
630 		}
631 		if ( endian==end_little ) {
632 		    *((*outbuf)++) = 0;
633 		    *((*outbuf)++) = 0;
634 		    *((*outbuf)++) = ch&0xff;
635 		    *((*outbuf)++) = ch>>8;
636 		} else {
637 		    *((*outbuf)++) = ch>>8;
638 		    *((*outbuf)++) = ch&0xff;
639 		    *((*outbuf)++) = 0;
640 		    *((*outbuf)++) = 0;
641 		}
642 		*outlen -= sizeof(unichar_t);
643 		++char_cnt;
644 	    }
645 	} else if ( cd->from==e_johab || cd->from==e_big5 || cd->from==e_big5hkscs ) {
646 	    int offset;
647 	    if ( cd->from==e_big5 ) {
648 		offset = 0xa100;
649 		table = unicode_from_big5;
650 	    } else if ( cd->from==e_big5hkscs ) {
651 		offset = 0x8100;
652 		table = unicode_from_big5hkscs;
653 	    } else {
654 		offset = 0x8400;
655 		table = unicode_from_johab;
656 	    }
657 	    while ( *inlen>0 && *outlen>1 ) {
658 		unsigned char *ipt = (unsigned char *) *inbuf;
659 		int ch;
660 		if ( *ipt<0x7f ) {
661 		    ch = *ipt;
662 		    --*inlen;
663 		    *inbuf = (char *) ipt+1;
664 		} else {
665 		    if ( *inlen==1 )
666 return( (size_t) -1 );
667 		    ch = (*ipt<<8) | ipt[1];
668 		    if ( ch<offset )
669 return( (size_t) -1 );
670 		    ch -= offset;
671 		    ch = table[ch];
672 		    *inlen -= 2;
673 		    *inbuf = (char *) ipt+2;
674 		}
675 		if ( endian==end_little ) {
676 		    *((*outbuf)++) = 0;
677 		    *((*outbuf)++) = 0;
678 		    *((*outbuf)++) = ch&0xff;
679 		    *((*outbuf)++) = ch>>8;
680 		} else {
681 		    *((*outbuf)++) = ch>>8;
682 		    *((*outbuf)++) = ch&0xff;
683 		    *((*outbuf)++) = 0;
684 		    *((*outbuf)++) = 0;
685 		}
686 		*outlen -= sizeof(unichar_t);
687 		++char_cnt;
688 	    }
689 	} else if ( cd->from==e_sjis ) {
690 	    while ( *inlen>0 && *outlen>1 ) {
691 		unsigned char *ipt = (unsigned char *) *inbuf;
692 		int ch1 = *ipt;
693 		if ( ch1<127 || ( ch1>=161 && ch1<=223 )) {
694 		    ch = unicode_from_jis201[ch1];
695 		    *inbuf = (char *) ipt+1;
696 		    --*inlen;
697 		} else if ( *inlen==1 )
698 return( (size_t) -1 );
699 		else {
700 		    int ch2 = ipt[1];
701 		    if ( ch1 >= 129 && ch1<= 159 )
702 			ch1 -= 112;
703 		    else
704 			ch1 -= 176;
705 		    ch1 <<= 1;
706 		    if ( ch2>=159 )
707 			ch2-= 126;
708 		    else if ( ch2>127 ) {
709 			--ch1;
710 			ch2 -= 32;
711 		    } else {
712 			--ch1;
713 			ch2 -= 31;
714 		    }
715 		    if ( ch1-0x21>=94 || ch2-0x21>=94 )
716 return( (size_t) -1 );
717 		    ch = unicode_from_jis208[(ch1-0x21)*94+(ch2-0x21)];
718 		    *inlen -= 2;
719 		    *inbuf = (char *) ipt+2;
720 		}
721 		if ( endian==end_little ) {
722 		    *((*outbuf)++) = 0;
723 		    *((*outbuf)++) = 0;
724 		    *((*outbuf)++) = ch&0xff;
725 		    *((*outbuf)++) = ch>>8;
726 		} else {
727 		    *((*outbuf)++) = ch>>8;
728 		    *((*outbuf)++) = ch&0xff;
729 		    *((*outbuf)++) = 0;
730 		    *((*outbuf)++) = 0;
731 		}
732 		*outlen -= sizeof(unichar_t);
733 		++char_cnt;
734 	    }
735 	} else if ( cd->from==e_utf8 ) {
736 	    while ( *inlen>0 && *outlen>sizeof(unichar_t) ) {
737 		unsigned char *ipt = (unsigned char *) *inbuf;
738 		int ch = *ipt;
739 		if ( ch <= 127 ) {
740 		    *inbuf = (char *) ipt+1;
741 		    --*inlen;
742 		} else if ( ch<=0xdf ) {
743 		    if ( *inlen<2 || ipt[1]<0x80 )
744 return( (size_t) -1 );
745 		    ch = ((ch&0x1f)<<6) | (ipt[1] &0x3f);
746 		    *inlen -= 2;
747 		    *inbuf = (char *) ipt+2;
748 		} else if ( ch<=0xef ) {
749 		    if ( *inlen<3 || ipt[1]<0x80 || ipt[2]<0x80 )
750 return( (size_t) -1 );
751 		    ch = ((ch&0x1f)<<12) | ((ipt[1] &0x3f)<<6) | (ipt[2]&0x3f);
752 		    *inlen -= 3;
753 		    *inbuf = (char *) ipt+3;
754 		} else {
755 		    int w,w2;
756 		    w = ( ((*ipt&0x7)<<2) | ((ipt[1]&0x30)>>4) )-1;
757 		    w = (w<<6) | ((ipt[1]&0xf)<<2) | ((ipt[2]&0x30)>>4);
758 		    w2 = ((ipt[2]&0xf)<<6) | (ipt[3]&0x3f);
759 		    ch = w*0x400 + w2 + 0x10000;
760 		    *inbuf = (char *) ipt+4;
761 		}
762 		if ( endian==end_little ) {
763 		    *((*outbuf)++) = ch&0xff;
764 		    *((*outbuf)++) = ch>>8;
765 		    *((*outbuf)++) = ch>>16;
766 		    *((*outbuf)++) = ch>>24;
767 		} else {
768 		    *((*outbuf)++) = ch>>24;
769 		    *((*outbuf)++) = ch>>16;
770 		    *((*outbuf)++) = ch>>8;
771 		    *((*outbuf)++) = ch&0xff;
772 		}
773 		*outlen -= sizeof(unichar_t);
774 		++char_cnt;
775 	    }
776 	} else {
777 	    fprintf( stderr, "Unexpected encoding\n" );
778 return( (size_t) -1 );
779 	}
780     } else {
781 	fprintf( stderr, "One of the two encodings must be UCS2 in gww_iconv()\n" );
782 return( (size_t) -1 );
783     }
784 
785     if ( *outlen>=1 ) {
786 	**outbuf = '\0';
787 	if ( *outlen>1 )
788 	    (*outbuf)[1] = '\0';
789 	if ( cd->to==e_ucs4 && *outlen>3 ) {
790 	    (*outbuf)[2] = '\0';
791 	    (*outbuf)[3] = '\0';
792 	}
793     }
794 return( char_cnt );
795 }
796 #else
797 static const int a_file_must_define_something=1;
798 #endif 	/* HAVE_ICONV_H */
799