1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 2003-2011 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * character codeset coder
26  */
27 
28 #include <codex.h>
29 #include <iconv.h>
30 
31 typedef struct State_s
32 {
33 	iconv_t		cvt;
34 
35 	char*		bp;
36 
37 	char		buf[SF_BUFSIZE];
38 } State_t;
39 
40 static int
cc_options(Codexmeth_t * meth,Sfio_t * sp)41 cc_options(Codexmeth_t* meth, Sfio_t* sp)
42 {
43 	register iconv_list_t*	ic;
44 	register const char*	p;
45 	register int		c;
46 
47 	for (ic = iconv_list(NiL); ic; ic = iconv_list(ic))
48 	{
49 		sfputc(sp, '[');
50 		sfputc(sp, '+');
51 		sfputc(sp, '\b');
52 		p = ic->match;
53 		if (*p == '(')
54 			p++;
55 		while (c = *p++)
56 		{
57 			if (c == ')' && !*p)
58 				break;
59 			if (c == '?' || c == ']')
60 				sfputc(sp, c);
61 			sfputc(sp, c);
62 		}
63 		sfputc(sp, '?');
64 		p = ic->desc;
65 		while (c = *p++)
66 		{
67 			if (c == ']')
68 				sfputc(sp, c);
69 			sfputc(sp, c);
70 		}
71 		sfputc(sp, ']');
72 	}
73 	return 0;
74 }
75 
76 static int
cc_open(Codex_t * p,char * const args[],Codexnum_t flags)77 cc_open(Codex_t* p, char* const args[], Codexnum_t flags)
78 {
79 	State_t*	state;
80 	const char*	src;
81 	const char*	dst;
82 	iconv_t		cvt;
83 
84 	dst = (src = args[2]) ? args[3] : 0;
85 	if (flags & CODEX_DECODE)
86 	{
87 		if (!src)
88 		{
89 			if (p->disc->errorf)
90 				(*p->disc->errorf)(NiL, p->disc, 2, "%s: source codeset option must be specified", p->meth->name);
91 			return -1;
92 		}
93 	}
94 	else
95 	{
96 		if (!src)
97 		{
98 			if (p->disc->errorf)
99 				(*p->disc->errorf)(NiL, p->disc, 2, "%s: destination codeset option must be specified", p->meth->name);
100 			return -1;
101 		}
102 		if (!dst)
103 		{
104 			dst = src;
105 			src = 0;
106 		}
107 	}
108 	if ((cvt = iconv_open(dst, src)) == (iconv_t)(-1))
109 	{
110 		if (p->disc->errorf)
111 		{
112 			if ((cvt = iconv_open("utf-8", src)) == (iconv_t)(-1))
113 			{
114 				(*p->disc->errorf)(NiL, p->disc, 2, "%s: %s: unknown source codeset", p->meth->name, src);
115 				return -1;
116 			}
117 			iconv_close(cvt);
118 			if ((cvt = iconv_open(dst, "utf-8")) == (iconv_t)(-1))
119 			{
120 				(*p->disc->errorf)(NiL, p->disc, 2, "%s: %s: unknown destination codeset", p->meth->name, dst);
121 				return -1;
122 			}
123 			iconv_close(cvt);
124 			(*p->disc->errorf)(NiL, p->disc, 2, "%s: cannot convert from %s to %s", p->meth->name, src, dst);
125 		}
126 		return -1;
127 	}
128 	if (!(state = newof(0, State_t, 1, 0)))
129 	{
130 		if (p->disc->errorf)
131 			(*p->disc->errorf)(NiL, p->disc, 2, "out of space");
132 		iconv_close(cvt);
133 		return 0;
134 	}
135 	state->cvt = cvt;
136 	state->bp = state->buf;
137 	p->data = state;
138 	return 0;
139 }
140 
141 static int
cc_close(Codex_t * p)142 cc_close(Codex_t* p)
143 {
144 	State_t*	state = (State_t*)p->data;
145 	int		r;
146 
147 	if (!state)
148 		r = -1;
149 	else
150 	{
151 		r = iconv_close(state->cvt);
152 		free(state);
153 	}
154 	return r;
155 }
156 
157 static ssize_t
cc_read(Sfio_t * sp,void * buf,size_t n,Sfdisc_t * disc)158 cc_read(Sfio_t* sp, void* buf, size_t n, Sfdisc_t* disc)
159 {
160 	register State_t*	state = (State_t*)((Codex_t*)disc)->data;
161 	char*			fb;
162 	char*			tb;
163 	size_t			fn;
164 	size_t			tn;
165 	ssize_t			r;
166 
167 	fn = sizeof(state->buf) - (state->bp - state->buf);
168 	if (n < fn)
169 		fn = n;
170 	if ((r = sfrd(sp, state->bp, fn, disc)) <= 0)
171 		return (state->bp > state->buf) ? -1 : r;
172 	fb = state->buf;
173 	fn = r + (state->bp - state->buf);
174 	tb = buf;
175 	tn = n;
176 	n = 0;
177 	while (fn > 0 && tn > 0)
178 	{
179 		if ((r = iconv(state->cvt, &fb, &fn, &tb, &tn)) == -1)
180 		{
181 			if (!n)
182 				n = -1;
183 			break;
184 		}
185 		n += r;
186 	}
187 	if (fn && fb > state->buf)
188 	{
189 		tb = state->buf;
190 		while (fn--)
191 			*tb++ = *fb++;
192 		state->bp = tb;
193 	}
194 	return n;
195 }
196 
197 static ssize_t
cc_write(Sfio_t * sp,const void * buf,size_t n,Sfdisc_t * disc)198 cc_write(Sfio_t* sp, const void* buf, size_t n, Sfdisc_t* disc)
199 {
200 	register State_t*	state = (State_t*)((Codex_t*)disc)->data;
201 	char*			fb;
202 	char*			tb;
203 	size_t			fn;
204 	size_t			tn;
205 	size_t			r;
206 
207 	fb = (char*)buf;
208 	fn = n;
209 	n = 0;
210 	while (fn > 0)
211 	{
212 		tb = (char*)state->buf;
213 		tn = sizeof(buf);
214 		if ((r = iconv(state->cvt, &fb, &fn, &tb, &tn)) == (size_t)(-1))
215 			return n ? n : -1;
216 		n += r;
217 		if (sfwr(sp, state->buf, r, disc) != r)
218 			return n ? n : -1;
219 	}
220 	return n;
221 }
222 
223 static int
cc_sync(Codex_t * p)224 cc_sync(Codex_t* p)
225 {
226 	mbinit();
227 	return 0;
228 }
229 
230 Codexmeth_t	codex_iconv =
231 {
232 	"iconv",
233 	"iconv character codeset conversion. One or two character codeset"
234 	" options must be specified. Two options specify the source and"
235 	" destination codesets. One option specifies the decode source or"
236 	" encode destination codeset; the implied second codeset defaults"
237 	" to \bnative\b.",
238 	"[+(version)?codex-iconv (AT&T Research) 2000-05-09]"
239 	"[+(author)?Glenn Fowler <gsf@research.att.com>]",
240 	CODEX_DECODE|CODEX_ENCODE|CODEX_ICONV,
241 	cc_options,
242 	0,
243 	cc_open,
244 	cc_close,
245 	cc_sync,
246 	cc_sync,
247 	cc_read,
248 	cc_write,
249 	cc_sync,
250 	0,
251 	0,
252 	0,
253 	0,
254 	CODEXNEXT(iconv)
255 };
256 
257 CODEXLIB(iconv)
258