1 /* iconvtc.c -- Interface to iconv transcoding routines
2
3 (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4 See tidy.h for the copyright notice.
5
6 $Id: iconvtc.c,v 1.2 2008/08/09 11:55:27 hoehrmann Exp $
7 */
8
9 #include "tidy.h"
10 #include "forward.h"
11 #include "streamio.h"
12
13 #ifdef TIDY_ICONV_SUPPORT
14
15 #include <iconv.h>
16
17 /* maximum number of bytes for a single character */
18 #define TC_INBUFSIZE 16
19
20 /* maximum number of characters per byte sequence */
21 #define TC_OUTBUFSIZE 16
22
IconvInitInputTranscoder(void)23 Bool IconvInitInputTranscoder(void)
24 {
25 return no;
26 }
27
IconvUninitInputTranscoder(void)28 void IconvUninitInputTranscoder(void)
29 {
30 return;
31 }
32
IconvGetChar(byte firstByte,StreamIn * in,uint * bytesRead)33 int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
34 {
35 iconv_t cd;
36 TidyInputSource * source;
37 char inbuf[TC_INBUFSIZE] = { 0 };
38 char outbuf[TC_OUTBUFSIZE] = { 0 };
39 size_t inbufsize = 0;
40
41 assert( in != NULL );
42 assert( &in->source != NULL );
43 assert( bytesRead != NULL );
44 assert( in->iconvptr != 0 );
45
46 cd = (iconv_t)in->iconvptr;
47 source = &in->source;
48
49 inbuf[inbufsize++] = (char)firstByte;
50
51 while(inbufsize < TC_INBUFSIZE)
52 {
53 char * outbufptr = (char*)outbuf;
54 char * inbufptr = (char*)inbuf;
55 size_t readNow = inbufsize;
56 size_t writeNow = TC_OUTBUFSIZE;
57 size_t result = 0;
58 int iconv_errno = 0;
59 int nextByte = EndOfStream;
60
61 result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
62 iconv_errno = errno;
63
64 if (result != (size_t)(-1))
65 {
66 int c;
67
68 /* create codepoint from UTF-32LE octets */
69 c = (unsigned char)outbuf[0];
70 c += (unsigned char)outbuf[1] << 8;
71 c += (unsigned char)outbuf[2] << 16;
72 c += (unsigned char)outbuf[3] << 32;
73
74 /* set number of read bytes */
75 *bytesRead = inbufsize;
76
77 return c;
78 }
79
80 assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
81 assert( iconv_errno != E2BIG ); /* not enough memory */
82 assert( iconv_errno == EINVAL ); /* incomplete sequence */
83
84 /* we need more bytes */
85 nextByte = source->getByte(source->sourceData);
86
87 if (nextByte == EndOfStream)
88 {
89 /* todo: error message for broken stream? */
90
91 *bytesRead = inbufsize;
92 return EndOfStream;
93 }
94
95 inbuf[inbufsize++] = (char)nextByte;
96 }
97
98 /* No full character found after reading TC_INBUFSIZE bytes, */
99 /* give up to read this stream, it's obviously unreadable. */
100
101 /* todo: error message for broken stream? */
102 return EndOfStream;
103 }
104
105 #endif /* TIDY_ICONV_SUPPORT */
106