1 /* 2 * charconv.h - character code conversion library 3 * 4 * Copyright (c) 2000-2020 Shiro Kawai <shiro@acm.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the authors nor the names of its contributors 18 * may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef GAUCHE_CHARCONV_H 35 #define GAUCHE_CHARCONV_H 36 37 #include <gauche.h> 38 #include "iconv-adapter.h" 39 40 SCM_DECL_BEGIN 41 42 struct ScmConvInfoRec; 43 44 typedef ScmSize ScmConvProc(struct ScmConvInfoRec*, const char*, ScmSize, 45 char*, ScmSize, ScmSize*); 46 typedef ScmSize ScmConvReset(struct ScmConvInfoRec*, char*, ScmSize); 47 typedef ScmSize ScmConvHandler(struct ScmConvInfoRec*, const char **, 48 ScmSize*, char**, ScmSize*); 49 50 /* Packaging conversion context info.*/ 51 typedef struct ScmConvInfoRec { 52 ScmConvHandler *jconv; /* jconv handler */ 53 ScmConvProc *convert; /* 1-character conversion routine */ 54 ScmConvReset *reset; /* reset routine */ 55 iconv_t handle; /* iconv handle, if the conversion is 56 handled by iconv */ 57 const char *fromCode; /* convert from ... */ 58 const char *toCode; /* conver to ... */ 59 int istate; /* current input state */ 60 int ostate; /* current output state */ 61 ScmPort *remote; /* source or drain port */ 62 int ownerp; /* do I own remote port? */ 63 int remoteClosed; /* true if remore port is closed */ 64 int replacep; /* true if we replace unrecognized input 65 with replacement sequence */ 66 ScmSize replaceSize; /* size of replaceSeq */ 67 const char *replaceSeq; /* the replacement sequence, NULL terminated */ 68 ScmSize bufsiz; /* size of conversion buffer */ 69 char *buf; /* internal conversion buffer */ 70 char *ptr; /* current ptr in the internal conv buf */ 71 } ScmConvInfo; 72 73 /* Bitmask for 'flags' argument. 74 Scm_ConversionSupportedP only recognizes CVPORT_ICONV. */ 75 enum { 76 CVPORT_OWNER = (1L<<0), /* Close the inner port if the conversion port 77 is closed. */ 78 CVPORT_REPLACE = (1L<<1), /* Use replacement character for illegal 79 sequences instead of signaling an error */ 80 }; 81 82 extern ScmObj Scm_MakeInputConversionPort(ScmPort *source, 83 const char *fromCode, 84 const char *toCode, 85 ScmSize bufsiz, 86 u_long flags); 87 extern ScmObj Scm_MakeOutputConversionPort(ScmPort *sink, 88 const char *toCode, 89 const char *fromCode, 90 ScmSize bufsiz, 91 u_long flags); 92 93 typedef const char *(*ScmCodeGuessingProc)(const char *buf, 94 ScmSize bufsiz, 95 void *data); 96 97 extern const char *Scm_GetCESName(ScmObj code, const char *argname); 98 extern int Scm_ConversionSupportedP(const char *from, const char *to, 99 u_long flags); 100 101 extern void Scm_RegisterCodeGuessingProc(const char *code, 102 ScmCodeGuessingProc proc, 103 void *data); 104 105 extern const char *Scm_GuessCES(const char *code, 106 const char *buf, 107 ScmSize buflen); 108 109 /* 110 * jconv interface 111 * 112 * jconv is a lower-level layer 113 */ 114 115 /* jconv error code */ 116 #define ILLEGAL_SEQUENCE (-1) /* input contains illegal sequence */ 117 #define INPUT_NOT_ENOUGH (-2) /* input terminates prematurely */ 118 #define OUTPUT_NOT_ENOUGH (-3) /* output buffer is too small */ 119 #define NO_OUTPUT_CHAR (-4) /* char can't be represented in output CES */ 120 121 extern ScmConvInfo *jconv_open(const char *toCode, 122 const char *fromCode, 123 int useIconv); 124 extern int jconv_close(ScmConvInfo*); 125 extern ScmSize jconv(ScmConvInfo*, const char **inptr, ScmSize *inroom, 126 char **outptr, ScmSize *outroom); 127 extern ScmSize jconv_reset(ScmConvInfo *, char *outptr, ScmSize outroom); 128 extern void jconv_set_replacement(ScmConvInfo *info); 129 130 /* Given UCS char, return # of bytes required for UTF8 encoding. 131 We have these in char_utf8.h, but it is only available when the 132 native encoding is utf-8. Eventually we need to factor these out. 133 */ 134 #define UCS2UTF_NBYTES(ucs) \ 135 (((ucs) < 0x80) ? 1 : \ 136 (((ucs) < 0x800) ? 2 : \ 137 (((ucs) < 0x10000) ? 3 : \ 138 (((ucs) < 0x200000) ? 4 : \ 139 (((ucs) < 0x4000000) ? 5 : 6))))) 140 141 extern void jconv_ucs4_to_utf8(unsigned int ucs, char *cp); 142 extern int jconv_utf8_to_ucs4(const char *cp, 143 ScmSize size, 144 ScmChar *ucs); /* out */ 145 146 SCM_DECL_END 147 148 #endif /*GAUCHE_CHARCONV_H*/ 149 150 /* 151 * Local variables: 152 * mode: c 153 * end: 154 */ 155