1 /*
2  * charconv.h - character code conversion library
3  *
4  *   Copyright (c) 2000-2020  Shiro Kawai  <shiro@acm.org>
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *
13  *   2. Redistributions in binary form must reproduce the above copyright
14  *      notice, this list of conditions and the following disclaimer in the
15  *      documentation and/or other materials provided with the distribution.
16  *
17  *   3. Neither the name of the authors nor the names of its contributors
18  *      may be used to endorse or promote products derived from this
19  *      software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef GAUCHE_CHARCONV_H
35 #define GAUCHE_CHARCONV_H
36 
37 #include <gauche.h>
38 #include "iconv-adapter.h"
39 
40 SCM_DECL_BEGIN
41 
42 struct ScmConvInfoRec;
43 
44 typedef ScmSize ScmConvProc(struct ScmConvInfoRec*, const char*, ScmSize,
45                             char*, ScmSize, ScmSize*);
46 typedef ScmSize ScmConvReset(struct ScmConvInfoRec*, char*, ScmSize);
47 typedef ScmSize ScmConvHandler(struct ScmConvInfoRec*, const char **,
48                                ScmSize*, char**, ScmSize*);
49 
50 /* Packaging conversion context info.*/
51 typedef struct ScmConvInfoRec {
52     ScmConvHandler *jconv;      /* jconv handler */
53     ScmConvProc *convert;       /* 1-character conversion routine */
54     ScmConvReset *reset;        /* reset routine */
55     iconv_t handle;             /* iconv handle, if the conversion is
56                                    handled by iconv */
57     const char *fromCode;       /* convert from ... */
58     const char *toCode;         /* conver to ... */
59     int istate;                 /* current input state */
60     int ostate;                 /* current output state */
61     ScmPort *remote;            /* source or drain port */
62     int ownerp;                 /* do I own remote port? */
63     int remoteClosed;           /* true if remore port is closed */
64     int replacep;               /* true if we replace unrecognized input
65                                    with replacement sequence */
66     ScmSize replaceSize;        /* size of replaceSeq */
67     const char *replaceSeq;     /* the replacement sequence, NULL terminated */
68     ScmSize bufsiz;             /* size of conversion buffer */
69     char *buf;                  /* internal conversion buffer */
70     char *ptr;                  /* current ptr in the internal conv buf */
71 } ScmConvInfo;
72 
73 /* Bitmask for 'flags' argument.
74    Scm_ConversionSupportedP only recognizes CVPORT_ICONV. */
75 enum {
76     CVPORT_OWNER = (1L<<0),     /* Close the inner port if the conversion port
77                                    is closed. */
78     CVPORT_REPLACE = (1L<<1),   /* Use replacement character for illegal
79                                    sequences instead of signaling an error */
80 };
81 
82 extern ScmObj Scm_MakeInputConversionPort(ScmPort *source,
83                                           const char *fromCode,
84                                           const char *toCode,
85                                           ScmSize bufsiz,
86                                           u_long flags);
87 extern ScmObj Scm_MakeOutputConversionPort(ScmPort *sink,
88                                            const char *toCode,
89                                            const char *fromCode,
90                                            ScmSize bufsiz,
91                                            u_long flags);
92 
93 typedef const char *(*ScmCodeGuessingProc)(const char *buf,
94                                            ScmSize bufsiz,
95                                            void *data);
96 
97 extern const char *Scm_GetCESName(ScmObj code, const char *argname);
98 extern int Scm_ConversionSupportedP(const char *from, const char *to,
99                                     u_long flags);
100 
101 extern void Scm_RegisterCodeGuessingProc(const char *code,
102                                          ScmCodeGuessingProc proc,
103                                          void *data);
104 
105 extern const char *Scm_GuessCES(const char *code,
106                                 const char *buf,
107                                 ScmSize buflen);
108 
109 /*
110  * jconv interface
111  *
112  *   jconv is a lower-level layer
113  */
114 
115 /* jconv error code */
116 #define ILLEGAL_SEQUENCE  (-1)  /* input contains illegal sequence */
117 #define INPUT_NOT_ENOUGH  (-2)  /* input terminates prematurely */
118 #define OUTPUT_NOT_ENOUGH (-3)  /* output buffer is too small */
119 #define NO_OUTPUT_CHAR    (-4)  /* char can't be represented in output CES */
120 
121 extern ScmConvInfo *jconv_open(const char *toCode,
122                                const char *fromCode,
123                                int useIconv);
124 extern int jconv_close(ScmConvInfo*);
125 extern ScmSize jconv(ScmConvInfo*, const char **inptr, ScmSize *inroom,
126                      char **outptr, ScmSize *outroom);
127 extern ScmSize jconv_reset(ScmConvInfo *, char *outptr, ScmSize outroom);
128 extern void jconv_set_replacement(ScmConvInfo *info);
129 
130 /* Given UCS char, return # of bytes required for UTF8 encoding.
131    We have these in char_utf8.h, but it is only available when the
132    native encoding is utf-8.   Eventually we need to factor these out.
133  */
134 #define UCS2UTF_NBYTES(ucs)                      \
135     (((ucs) < 0x80) ? 1 :                        \
136      (((ucs) < 0x800) ? 2 :                      \
137       (((ucs) < 0x10000) ? 3 :                   \
138        (((ucs) < 0x200000) ? 4 :                 \
139         (((ucs) < 0x4000000) ? 5 : 6)))))
140 
141 extern void jconv_ucs4_to_utf8(unsigned int ucs, char *cp);
142 extern int  jconv_utf8_to_ucs4(const char *cp,
143                                ScmSize size,
144                                ScmChar *ucs);   /* out */
145 
146 SCM_DECL_END
147 
148 #endif /*GAUCHE_CHARCONV_H*/
149 
150 /*
151  * Local variables:
152  * mode: c
153  * end:
154  */
155