1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 2003-2013 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *               Glenn Fowler <glenn.s.fowler@gmail.com>                *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 
22 /*
23  * character codeset coder
24  */
25 
26 #include <codex.h>
27 #include <iconv.h>
28 
29 typedef struct State_s
30 {
31 	iconv_t		cvt;
32 
33 	char*		bp;
34 
35 	char		buf[SF_BUFSIZE];
36 } State_t;
37 
38 static int
cc_options(Codexmeth_t * meth,Sfio_t * sp)39 cc_options(Codexmeth_t* meth, Sfio_t* sp)
40 {
41 	register iconv_list_t*	ic;
42 	register const char*	p;
43 	register int		c;
44 
45 	for (ic = iconv_list(NiL); ic; ic = iconv_list(ic))
46 	{
47 		sfputc(sp, '[');
48 		sfputc(sp, '+');
49 		sfputc(sp, '\b');
50 		p = ic->match;
51 		if (*p == '(')
52 			p++;
53 		while (c = *p++)
54 		{
55 			if (c == ')' && !*p)
56 				break;
57 			if (c == '?' || c == ']')
58 				sfputc(sp, c);
59 			sfputc(sp, c);
60 		}
61 		sfputc(sp, '?');
62 		p = ic->desc;
63 		while (c = *p++)
64 		{
65 			if (c == ']')
66 				sfputc(sp, c);
67 			sfputc(sp, c);
68 		}
69 		sfputc(sp, ']');
70 	}
71 	return 0;
72 }
73 
74 static int
cc_open(Codex_t * p,char * const args[],Codexnum_t flags)75 cc_open(Codex_t* p, char* const args[], Codexnum_t flags)
76 {
77 	State_t*	state;
78 	const char*	src;
79 	const char*	dst;
80 	iconv_t		cvt;
81 
82 	dst = (src = args[2]) ? args[3] : 0;
83 	if (flags & CODEX_DECODE)
84 	{
85 		if (!src)
86 		{
87 			if (p->disc->errorf)
88 				(*p->disc->errorf)(NiL, p->disc, 2, "%s: source codeset option must be specified", p->meth->name);
89 			return -1;
90 		}
91 	}
92 	else
93 	{
94 		if (!src)
95 		{
96 			if (p->disc->errorf)
97 				(*p->disc->errorf)(NiL, p->disc, 2, "%s: destination codeset option must be specified", p->meth->name);
98 			return -1;
99 		}
100 		if (!dst)
101 		{
102 			dst = src;
103 			src = 0;
104 		}
105 	}
106 	if ((cvt = iconv_open(dst, src)) == (iconv_t)(-1))
107 	{
108 		if (p->disc->errorf)
109 		{
110 			if ((cvt = iconv_open("utf-8", src)) == (iconv_t)(-1))
111 			{
112 				(*p->disc->errorf)(NiL, p->disc, 2, "%s: %s: unknown source codeset", p->meth->name, src);
113 				return -1;
114 			}
115 			iconv_close(cvt);
116 			if ((cvt = iconv_open(dst, "utf-8")) == (iconv_t)(-1))
117 			{
118 				(*p->disc->errorf)(NiL, p->disc, 2, "%s: %s: unknown destination codeset", p->meth->name, dst);
119 				return -1;
120 			}
121 			iconv_close(cvt);
122 			(*p->disc->errorf)(NiL, p->disc, 2, "%s: cannot convert from %s to %s", p->meth->name, src, dst);
123 		}
124 		return -1;
125 	}
126 	if (!(state = newof(0, State_t, 1, 0)))
127 	{
128 		if (p->disc->errorf)
129 			(*p->disc->errorf)(NiL, p->disc, 2, "out of space");
130 		iconv_close(cvt);
131 		return 0;
132 	}
133 	state->cvt = cvt;
134 	state->bp = state->buf;
135 	p->data = state;
136 	return 0;
137 }
138 
139 static int
cc_close(Codex_t * p)140 cc_close(Codex_t* p)
141 {
142 	State_t*	state = (State_t*)p->data;
143 	int		r;
144 
145 	if (!state)
146 		r = -1;
147 	else
148 	{
149 		r = iconv_close(state->cvt);
150 		free(state);
151 	}
152 	return r;
153 }
154 
155 static ssize_t
cc_read(Sfio_t * sp,void * buf,size_t n,Sfdisc_t * disc)156 cc_read(Sfio_t* sp, void* buf, size_t n, Sfdisc_t* disc)
157 {
158 	register State_t*	state = (State_t*)((Codex_t*)disc)->data;
159 	char*			fb;
160 	char*			tb;
161 	size_t			fn;
162 	size_t			tn;
163 	ssize_t			r;
164 
165 	fn = sizeof(state->buf) - (state->bp - state->buf);
166 	if (n < fn)
167 		fn = n;
168 	if ((r = sfrd(sp, state->bp, fn, disc)) <= 0)
169 		return (state->bp > state->buf) ? -1 : r;
170 	fb = state->buf;
171 	fn = r + (state->bp - state->buf);
172 	tb = buf;
173 	tn = n;
174 	n = 0;
175 	while (fn > 0 && tn > 0)
176 	{
177 		if ((r = iconv(state->cvt, &fb, &fn, &tb, &tn)) == -1)
178 		{
179 			if (!n)
180 				n = -1;
181 			break;
182 		}
183 		n += r;
184 	}
185 	if (fn && fb > state->buf)
186 	{
187 		tb = state->buf;
188 		while (fn--)
189 			*tb++ = *fb++;
190 		state->bp = tb;
191 	}
192 	return n;
193 }
194 
195 static ssize_t
cc_write(Sfio_t * sp,const void * buf,size_t n,Sfdisc_t * disc)196 cc_write(Sfio_t* sp, const void* buf, size_t n, Sfdisc_t* disc)
197 {
198 	register State_t*	state = (State_t*)((Codex_t*)disc)->data;
199 	char*			fb;
200 	char*			tb;
201 	size_t			fn;
202 	size_t			tn;
203 	size_t			r;
204 
205 	fb = (char*)buf;
206 	fn = n;
207 	n = 0;
208 	while (fn > 0)
209 	{
210 		tb = (char*)state->buf;
211 		tn = sizeof(buf);
212 		if ((r = iconv(state->cvt, &fb, &fn, &tb, &tn)) == (size_t)(-1))
213 			return n ? n : -1;
214 		n += r;
215 		if (sfwr(sp, state->buf, r, disc) != r)
216 			return n ? n : -1;
217 	}
218 	return n;
219 }
220 
221 static int
cc_sync(Codex_t * p)222 cc_sync(Codex_t* p)
223 {
224 	State_t*	state = (State_t*)p->data;
225 
226 	(void)iconv(state->cvt, NiL, NiL, NiL, NiL);
227 	return 0;
228 }
229 
230 Codexmeth_t	codex_iconv =
231 {
232 	"iconv",
233 	"iconv character codeset conversion. One or two character codeset"
234 	" options must be specified. Two options specify the source and"
235 	" destination codesets. One option specifies the decode source or"
236 	" encode destination codeset; the implied second codeset defaults"
237 	" to \bnative\b.",
238 	"[-?\n@(#)$Id: codex-iconv (AT&T Research) 2000-05-09 $\n]" USAGE_LICENSE,
239 	CODEX_DECODE|CODEX_ENCODE|CODEX_ICONV,
240 	cc_options,
241 	0,
242 	cc_open,
243 	cc_close,
244 	cc_sync,
245 	cc_sync,
246 	cc_read,
247 	cc_write,
248 	cc_sync,
249 	0,
250 	0,
251 	0,
252 	0,
253 	CODEXNEXT(iconv)
254 };
255 
256 CODEXLIB(iconv)
257