1 /* iconvtc.c -- Interface to iconv transcoding routines
2 
3   (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4   See tidyp.h for the copyright notice.
5 
6 */
7 
8 #include "tidyp.h"
9 #include "forward.h"
10 #include "streamio.h"
11 
12 #ifdef TIDY_ICONV_SUPPORT
13 
14 #include <iconv.h>
15 
16 /* maximum number of bytes for a single character */
17 #define TC_INBUFSIZE  16
18 
19 /* maximum number of characters per byte sequence */
20 #define TC_OUTBUFSIZE 16
21 
IconvInitInputTranscoder(void)22 Bool IconvInitInputTranscoder(void)
23 {
24     return no;
25 }
26 
IconvUninitInputTranscoder(void)27 void IconvUninitInputTranscoder(void)
28 {
29     return;
30 }
31 
IconvGetChar(byte firstByte,StreamIn * in,uint * bytesRead)32 int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
33 {
34     iconv_t cd;
35     TidyInputSource * source;
36     char inbuf[TC_INBUFSIZE] = { 0 };
37     char outbuf[TC_OUTBUFSIZE] = { 0 };
38     size_t inbufsize = 0;
39 
40     assert( in != NULL );
41     assert( &in->source != NULL );
42     assert( bytesRead != NULL );
43     assert( in->iconvptr != 0 );
44 
45     cd = (iconv_t)in->iconvptr;
46     source = &in->source;
47 
48     inbuf[inbufsize++] = (char)firstByte;
49 
50     while(inbufsize < TC_INBUFSIZE)
51     {
52         char * outbufptr = (char*)outbuf;
53         char * inbufptr = (char*)inbuf;
54         size_t readNow = inbufsize;
55         size_t writeNow = TC_OUTBUFSIZE;
56         size_t result = 0;
57         int iconv_errno = 0;
58         int nextByte = EndOfStream;
59 
60         result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
61         iconv_errno = errno;
62 
63         if (result != (size_t)(-1))
64         {
65             int c;
66 
67             /* create codepoint from UTF-32LE octets */
68             c = (unsigned char)outbuf[0];
69             c += (unsigned char)outbuf[1] << 8;
70             c += (unsigned char)outbuf[2] << 16;
71             c += (unsigned char)outbuf[3] << 32;
72 
73             /* set number of read bytes */
74             *bytesRead = inbufsize;
75 
76             return c;
77         }
78 
79         assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
80         assert( iconv_errno != E2BIG );  /* not enough memory         */
81         assert( iconv_errno == EINVAL ); /* incomplete sequence       */
82 
83         /* we need more bytes */
84         nextByte = source->getByte(source->sourceData);
85 
86         if (nextByte == EndOfStream)
87         {
88             /* todo: error message for broken stream? */
89 
90             *bytesRead = inbufsize;
91             return EndOfStream;
92         }
93 
94         inbuf[inbufsize++] = (char)nextByte;
95     }
96 
97     /* No full character found after reading TC_INBUFSIZE bytes, */
98     /* give up to read this stream, it's obviously unreadable.   */
99 
100     /* todo: error message for broken stream? */
101     return EndOfStream;
102 }
103 
104 #endif /* TIDY_ICONV_SUPPORT */
105