1 /* iconvtc.c -- Interface to iconv transcoding routines
2 
3   (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4   See tidy.h for the copyright notice.
5 
6   $Id: iconvtc.c,v 1.2 2008/08/09 11:55:27 hoehrmann Exp $
7 */
8 
9 #include "tidy.h"
10 #include "forward.h"
11 #include "streamio.h"
12 
13 #ifdef TIDY_ICONV_SUPPORT
14 
15 #include <iconv.h>
16 
17 /* maximum number of bytes for a single character */
18 #define TC_INBUFSIZE  16
19 
20 /* maximum number of characters per byte sequence */
21 #define TC_OUTBUFSIZE 16
22 
IconvInitInputTranscoder(void)23 Bool IconvInitInputTranscoder(void)
24 {
25     return no;
26 }
27 
IconvUninitInputTranscoder(void)28 void IconvUninitInputTranscoder(void)
29 {
30     return;
31 }
32 
IconvGetChar(byte firstByte,StreamIn * in,uint * bytesRead)33 int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
34 {
35     iconv_t cd;
36     TidyInputSource * source;
37     char inbuf[TC_INBUFSIZE] = { 0 };
38     char outbuf[TC_OUTBUFSIZE] = { 0 };
39     size_t inbufsize = 0;
40 
41     assert( in != NULL );
42     assert( &in->source != NULL );
43     assert( bytesRead != NULL );
44     assert( in->iconvptr != 0 );
45 
46     cd = (iconv_t)in->iconvptr;
47     source = &in->source;
48 
49     inbuf[inbufsize++] = (char)firstByte;
50 
51     while(inbufsize < TC_INBUFSIZE)
52     {
53         char * outbufptr = (char*)outbuf;
54         char * inbufptr = (char*)inbuf;
55         size_t readNow = inbufsize;
56         size_t writeNow = TC_OUTBUFSIZE;
57         size_t result = 0;
58         int iconv_errno = 0;
59         int nextByte = EndOfStream;
60 
61         result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
62         iconv_errno = errno;
63 
64         if (result != (size_t)(-1))
65         {
66             int c;
67 
68             /* create codepoint from UTF-32LE octets */
69             c = (unsigned char)outbuf[0];
70             c += (unsigned char)outbuf[1] << 8;
71             c += (unsigned char)outbuf[2] << 16;
72             c += (unsigned char)outbuf[3] << 32;
73 
74             /* set number of read bytes */
75             *bytesRead = inbufsize;
76 
77             return c;
78         }
79 
80         assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
81         assert( iconv_errno != E2BIG );  /* not enough memory         */
82         assert( iconv_errno == EINVAL ); /* incomplete sequence       */
83 
84         /* we need more bytes */
85         nextByte = source->getByte(source->sourceData);
86 
87         if (nextByte == EndOfStream)
88         {
89             /* todo: error message for broken stream? */
90 
91             *bytesRead = inbufsize;
92             return EndOfStream;
93         }
94 
95         inbuf[inbufsize++] = (char)nextByte;
96     }
97 
98     /* No full character found after reading TC_INBUFSIZE bytes, */
99     /* give up to read this stream, it's obviously unreadable.   */
100 
101     /* todo: error message for broken stream? */
102     return EndOfStream;
103 }
104 
105 #endif /* TIDY_ICONV_SUPPORT */
106