1 /* iconvtc.c -- Interface to iconv transcoding routines
2
3 (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4 See tidyp.h for the copyright notice.
5
6 */
7
8 #include "tidyp.h"
9 #include "forward.h"
10 #include "streamio.h"
11
12 #ifdef TIDY_ICONV_SUPPORT
13
14 #include <iconv.h>
15
16 /* maximum number of bytes for a single character */
17 #define TC_INBUFSIZE 16
18
19 /* maximum number of characters per byte sequence */
20 #define TC_OUTBUFSIZE 16
21
IconvInitInputTranscoder(void)22 Bool IconvInitInputTranscoder(void)
23 {
24 return no;
25 }
26
IconvUninitInputTranscoder(void)27 void IconvUninitInputTranscoder(void)
28 {
29 return;
30 }
31
IconvGetChar(byte firstByte,StreamIn * in,uint * bytesRead)32 int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
33 {
34 iconv_t cd;
35 TidyInputSource * source;
36 char inbuf[TC_INBUFSIZE] = { 0 };
37 char outbuf[TC_OUTBUFSIZE] = { 0 };
38 size_t inbufsize = 0;
39
40 assert( in != NULL );
41 assert( &in->source != NULL );
42 assert( bytesRead != NULL );
43 assert( in->iconvptr != 0 );
44
45 cd = (iconv_t)in->iconvptr;
46 source = &in->source;
47
48 inbuf[inbufsize++] = (char)firstByte;
49
50 while(inbufsize < TC_INBUFSIZE)
51 {
52 char * outbufptr = (char*)outbuf;
53 char * inbufptr = (char*)inbuf;
54 size_t readNow = inbufsize;
55 size_t writeNow = TC_OUTBUFSIZE;
56 size_t result = 0;
57 int iconv_errno = 0;
58 int nextByte = EndOfStream;
59
60 result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
61 iconv_errno = errno;
62
63 if (result != (size_t)(-1))
64 {
65 int c;
66
67 /* create codepoint from UTF-32LE octets */
68 c = (unsigned char)outbuf[0];
69 c += (unsigned char)outbuf[1] << 8;
70 c += (unsigned char)outbuf[2] << 16;
71 c += (unsigned char)outbuf[3] << 32;
72
73 /* set number of read bytes */
74 *bytesRead = inbufsize;
75
76 return c;
77 }
78
79 assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
80 assert( iconv_errno != E2BIG ); /* not enough memory */
81 assert( iconv_errno == EINVAL ); /* incomplete sequence */
82
83 /* we need more bytes */
84 nextByte = source->getByte(source->sourceData);
85
86 if (nextByte == EndOfStream)
87 {
88 /* todo: error message for broken stream? */
89
90 *bytesRead = inbufsize;
91 return EndOfStream;
92 }
93
94 inbuf[inbufsize++] = (char)nextByte;
95 }
96
97 /* No full character found after reading TC_INBUFSIZE bytes, */
98 /* give up to read this stream, it's obviously unreadable. */
99
100 /* todo: error message for broken stream? */
101 return EndOfStream;
102 }
103
104 #endif /* TIDY_ICONV_SUPPORT */
105