1*d14d7d31Sis /*
2*d14d7d31Sis * CDDL HEADER START
3*d14d7d31Sis *
4*d14d7d31Sis * The contents of this file are subject to the terms of the
5*d14d7d31Sis * Common Development and Distribution License (the "License").
6*d14d7d31Sis * You may not use this file except in compliance with the License.
7*d14d7d31Sis *
8*d14d7d31Sis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*d14d7d31Sis * or http://www.opensolaris.org/os/licensing.
10*d14d7d31Sis * See the License for the specific language governing permissions
11*d14d7d31Sis * and limitations under the License.
12*d14d7d31Sis *
13*d14d7d31Sis * When distributing Covered Code, include this CDDL HEADER in each
14*d14d7d31Sis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*d14d7d31Sis * If applicable, add the following below this CDDL HEADER, with the
16*d14d7d31Sis * fields enclosed by brackets "[]" replaced with your own identifying
17*d14d7d31Sis * information: Portions Copyright [yyyy] [name of copyright owner]
18*d14d7d31Sis *
19*d14d7d31Sis * CDDL HEADER END
20*d14d7d31Sis */
21*d14d7d31Sis /*
22*d14d7d31Sis * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23*d14d7d31Sis * Use is subject to license terms.
24*d14d7d31Sis */
25*d14d7d31Sis
26*d14d7d31Sis /*
27*d14d7d31Sis * Kernel iconv code conversion functions (PSARC/2007/173).
28*d14d7d31Sis *
29*d14d7d31Sis * Man pages: kiconv_open(9F), kiconv(9F), kiconv_close(9F), and kiconvstr(9F).
30*d14d7d31Sis * Interface stability: Committed.
31*d14d7d31Sis */
32*d14d7d31Sis
33*d14d7d31Sis #include <sys/types.h>
34*d14d7d31Sis #include <sys/param.h>
35*d14d7d31Sis #include <sys/sysmacros.h>
36*d14d7d31Sis #include <sys/systm.h>
37*d14d7d31Sis #include <sys/debug.h>
38*d14d7d31Sis #include <sys/kmem.h>
39*d14d7d31Sis #include <sys/sunddi.h>
40*d14d7d31Sis #include <sys/ksynch.h>
41*d14d7d31Sis #include <sys/modctl.h>
42*d14d7d31Sis #include <sys/byteorder.h>
43*d14d7d31Sis #include <sys/errno.h>
44*d14d7d31Sis #include <sys/kiconv.h>
45*d14d7d31Sis #include <sys/kiconv_latin1.h>
46*d14d7d31Sis
47*d14d7d31Sis
48*d14d7d31Sis /*
49*d14d7d31Sis * The following macros indicate ids to the correct code conversion mapping
50*d14d7d31Sis * data tables to use. The actual tables are coming from <sys/kiconv_latin1.h>.
51*d14d7d31Sis */
52*d14d7d31Sis #define KICONV_TBLID_1252 (0x00)
53*d14d7d31Sis #define KICONV_TBLID_8859_1 (0x01)
54*d14d7d31Sis #define KICONV_TBLID_8859_15 (0x02)
55*d14d7d31Sis #define KICONV_TBLID_850 (0x03)
56*d14d7d31Sis
57*d14d7d31Sis #define KICONV_MAX_MAPPING_TBLID (0x03)
58*d14d7d31Sis
59*d14d7d31Sis /*
60*d14d7d31Sis * The following tables are coming from u8_textprep.c. We use them to
61*d14d7d31Sis * check on validity of UTF-8 characters and their bytes.
62*d14d7d31Sis */
63*d14d7d31Sis extern const int8_t u8_number_of_bytes[];
64*d14d7d31Sis extern const uint8_t u8_valid_min_2nd_byte[];
65*d14d7d31Sis extern const uint8_t u8_valid_max_2nd_byte[];
66*d14d7d31Sis
67*d14d7d31Sis
68*d14d7d31Sis /*
69*d14d7d31Sis * The following four functions, open_to_1252(), open_to_88591(),
70*d14d7d31Sis * open_to_885915(), and open_to_850(), are kiconv_open functions from
71*d14d7d31Sis * UTF-8 to corresponding single byte codesets.
72*d14d7d31Sis */
73*d14d7d31Sis static void *
open_to_1252()74*d14d7d31Sis open_to_1252()
75*d14d7d31Sis {
76*d14d7d31Sis kiconv_state_t s;
77*d14d7d31Sis
78*d14d7d31Sis s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
79*d14d7d31Sis s->id = KICONV_TBLID_1252;
80*d14d7d31Sis s->bom_processed = 0;
81*d14d7d31Sis
82*d14d7d31Sis return ((void *)s);
83*d14d7d31Sis }
84*d14d7d31Sis
85*d14d7d31Sis static void *
open_to_88591()86*d14d7d31Sis open_to_88591()
87*d14d7d31Sis {
88*d14d7d31Sis kiconv_state_t s;
89*d14d7d31Sis
90*d14d7d31Sis s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
91*d14d7d31Sis s->id = KICONV_TBLID_8859_1;
92*d14d7d31Sis s->bom_processed = 0;
93*d14d7d31Sis
94*d14d7d31Sis return ((void *)s);
95*d14d7d31Sis }
96*d14d7d31Sis
97*d14d7d31Sis static void *
open_to_885915()98*d14d7d31Sis open_to_885915()
99*d14d7d31Sis {
100*d14d7d31Sis kiconv_state_t s;
101*d14d7d31Sis
102*d14d7d31Sis s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
103*d14d7d31Sis s->id = KICONV_TBLID_8859_15;
104*d14d7d31Sis s->bom_processed = 0;
105*d14d7d31Sis
106*d14d7d31Sis return ((void *)s);
107*d14d7d31Sis }
108*d14d7d31Sis
109*d14d7d31Sis static void *
open_to_850()110*d14d7d31Sis open_to_850()
111*d14d7d31Sis {
112*d14d7d31Sis kiconv_state_t s;
113*d14d7d31Sis
114*d14d7d31Sis s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
115*d14d7d31Sis s->id = KICONV_TBLID_850;
116*d14d7d31Sis s->bom_processed = 0;
117*d14d7d31Sis
118*d14d7d31Sis return ((void *)s);
119*d14d7d31Sis }
120*d14d7d31Sis
121*d14d7d31Sis /*
122*d14d7d31Sis * The following four functions, open_fr_1252(), open_fr_88591(),
123*d14d7d31Sis * open_fr_885915(), and open_fr_850(), are kiconv_open functions from
124*d14d7d31Sis * corresponding single byte codesets to UTF-8.
125*d14d7d31Sis */
126*d14d7d31Sis static void *
open_fr_1252()127*d14d7d31Sis open_fr_1252()
128*d14d7d31Sis {
129*d14d7d31Sis return ((void *)KICONV_TBLID_1252);
130*d14d7d31Sis }
131*d14d7d31Sis
132*d14d7d31Sis static void *
open_fr_88591()133*d14d7d31Sis open_fr_88591()
134*d14d7d31Sis {
135*d14d7d31Sis return ((void *)KICONV_TBLID_8859_1);
136*d14d7d31Sis }
137*d14d7d31Sis
138*d14d7d31Sis static void *
open_fr_885915()139*d14d7d31Sis open_fr_885915()
140*d14d7d31Sis {
141*d14d7d31Sis return ((void *)KICONV_TBLID_8859_15);
142*d14d7d31Sis }
143*d14d7d31Sis
144*d14d7d31Sis static void *
open_fr_850()145*d14d7d31Sis open_fr_850()
146*d14d7d31Sis {
147*d14d7d31Sis return ((void *)KICONV_TBLID_850);
148*d14d7d31Sis }
149*d14d7d31Sis
150*d14d7d31Sis /*
151*d14d7d31Sis * The following close_to_sb() function is kiconv_close function for
152*d14d7d31Sis * the conversions from UTF-8 to single byte codesets. The close_fr_sb()
153*d14d7d31Sis * is kiconv_close function for the conversions from single byte codesets to
154*d14d7d31Sis * UTF-8.
155*d14d7d31Sis */
156*d14d7d31Sis static int
close_to_sb(void * s)157*d14d7d31Sis close_to_sb(void *s)
158*d14d7d31Sis {
159*d14d7d31Sis if (! s || s == (void *)-1)
160*d14d7d31Sis return (EBADF);
161*d14d7d31Sis
162*d14d7d31Sis kmem_free(s, sizeof (kiconv_state_data_t));
163*d14d7d31Sis
164*d14d7d31Sis return (0);
165*d14d7d31Sis }
166*d14d7d31Sis
167*d14d7d31Sis static int
close_fr_sb(void * s)168*d14d7d31Sis close_fr_sb(void *s)
169*d14d7d31Sis {
170*d14d7d31Sis if ((ulong_t)s > KICONV_MAX_MAPPING_TBLID)
171*d14d7d31Sis return (EBADF);
172*d14d7d31Sis
173*d14d7d31Sis return (0);
174*d14d7d31Sis }
175*d14d7d31Sis
176*d14d7d31Sis /*
177*d14d7d31Sis * The following is the common kiconv function for conversions from UTF-8
178*d14d7d31Sis * to single byte codesets.
179*d14d7d31Sis */
180*d14d7d31Sis static size_t
kiconv_to_sb(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)181*d14d7d31Sis kiconv_to_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
182*d14d7d31Sis size_t *outbytesleft, int *errno)
183*d14d7d31Sis {
184*d14d7d31Sis size_t id;
185*d14d7d31Sis size_t ret_val;
186*d14d7d31Sis uchar_t *ib;
187*d14d7d31Sis uchar_t *oldib;
188*d14d7d31Sis uchar_t *ob;
189*d14d7d31Sis uchar_t *ibtail;
190*d14d7d31Sis uchar_t *obtail;
191*d14d7d31Sis uint32_t u8;
192*d14d7d31Sis size_t i;
193*d14d7d31Sis size_t l;
194*d14d7d31Sis size_t h;
195*d14d7d31Sis size_t init_h;
196*d14d7d31Sis int8_t sz;
197*d14d7d31Sis boolean_t second;
198*d14d7d31Sis
199*d14d7d31Sis /* Check on the kiconv code conversion descriptor. */
200*d14d7d31Sis if (! kcd || kcd == (void *)-1) {
201*d14d7d31Sis *errno = EBADF;
202*d14d7d31Sis return ((size_t)-1);
203*d14d7d31Sis }
204*d14d7d31Sis
205*d14d7d31Sis /*
206*d14d7d31Sis * Get the table id we are going to use for the code conversion
207*d14d7d31Sis * and let's double check on it.
208*d14d7d31Sis */
209*d14d7d31Sis id = ((kiconv_state_t)kcd)->id;
210*d14d7d31Sis if (id > KICONV_MAX_MAPPING_TBLID) {
211*d14d7d31Sis *errno = EBADF;
212*d14d7d31Sis return ((size_t)-1);
213*d14d7d31Sis }
214*d14d7d31Sis
215*d14d7d31Sis /* If this is a state reset request, process and return. */
216*d14d7d31Sis if (! inbuf || ! (*inbuf)) {
217*d14d7d31Sis ((kiconv_state_t)kcd)->bom_processed = 0;
218*d14d7d31Sis return ((size_t)0);
219*d14d7d31Sis }
220*d14d7d31Sis
221*d14d7d31Sis ret_val = 0;
222*d14d7d31Sis ib = (uchar_t *)*inbuf;
223*d14d7d31Sis ob = (uchar_t *)*outbuf;
224*d14d7d31Sis ibtail = ib + *inbytesleft;
225*d14d7d31Sis obtail = ob + *outbytesleft;
226*d14d7d31Sis
227*d14d7d31Sis /*
228*d14d7d31Sis * The inital high value for the binary search we will be using
229*d14d7d31Sis * shortly is a literal constant as of today but to be future proof,
230*d14d7d31Sis * let's calculate it like the following at here.
231*d14d7d31Sis */
232*d14d7d31Sis init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
233*d14d7d31Sis
234*d14d7d31Sis /*
235*d14d7d31Sis * If we haven't checked on the UTF-8 signature BOM character in
236*d14d7d31Sis * the beginning of the conversion data stream, we check it and if
237*d14d7d31Sis * find one, we skip it since we have no use for it.
238*d14d7d31Sis */
239*d14d7d31Sis if (((kiconv_state_t)kcd)->bom_processed == 0 && (ibtail - ib) >= 3 &&
240*d14d7d31Sis *ib == 0xef && *(ib + 1) == 0xbb && *(ib + 2) == 0xbf)
241*d14d7d31Sis ib += 3;
242*d14d7d31Sis ((kiconv_state_t)kcd)->bom_processed = 1;
243*d14d7d31Sis
244*d14d7d31Sis while (ib < ibtail) {
245*d14d7d31Sis sz = u8_number_of_bytes[*ib];
246*d14d7d31Sis if (sz <= 0) {
247*d14d7d31Sis *errno = EILSEQ;
248*d14d7d31Sis ret_val = (size_t)-1;
249*d14d7d31Sis break;
250*d14d7d31Sis }
251*d14d7d31Sis
252*d14d7d31Sis /*
253*d14d7d31Sis * If there is no room to write at the output buffer,
254*d14d7d31Sis * issue E2BIG error.
255*d14d7d31Sis */
256*d14d7d31Sis if (ob >= obtail) {
257*d14d7d31Sis *errno = E2BIG;
258*d14d7d31Sis ret_val = (size_t)-1;
259*d14d7d31Sis break;
260*d14d7d31Sis }
261*d14d7d31Sis
262*d14d7d31Sis /*
263*d14d7d31Sis * If it is a 7-bit ASCII character, we don't need to
264*d14d7d31Sis * process further and we just copy the character over.
265*d14d7d31Sis *
266*d14d7d31Sis * If not, we collect the character bytes up to four bytes,
267*d14d7d31Sis * validate the bytes, and binary search for the corresponding
268*d14d7d31Sis * single byte codeset character byte. If we find it from
269*d14d7d31Sis * the mapping table, we put that into the output buffer;
270*d14d7d31Sis * otherwise, we put a replacement character instead as
271*d14d7d31Sis * a non-identical conversion.
272*d14d7d31Sis */
273*d14d7d31Sis if (sz == 1) {
274*d14d7d31Sis *ob++ = *ib++;
275*d14d7d31Sis continue;
276*d14d7d31Sis }
277*d14d7d31Sis
278*d14d7d31Sis /*
279*d14d7d31Sis * Issue EINVAL error if input buffer has an incomplete
280*d14d7d31Sis * character at the end of the buffer.
281*d14d7d31Sis */
282*d14d7d31Sis if ((ibtail - ib) < sz) {
283*d14d7d31Sis *errno = EINVAL;
284*d14d7d31Sis ret_val = (size_t)-1;
285*d14d7d31Sis break;
286*d14d7d31Sis }
287*d14d7d31Sis
288*d14d7d31Sis /*
289*d14d7d31Sis * We collect UTF-8 character bytes and also check if
290*d14d7d31Sis * this is a valid UTF-8 character without any bogus bytes
291*d14d7d31Sis * based on the latest UTF-8 binary representation.
292*d14d7d31Sis */
293*d14d7d31Sis oldib = ib;
294*d14d7d31Sis u8 = *ib++;
295*d14d7d31Sis second = B_TRUE;
296*d14d7d31Sis for (i = 1; i < sz; i++) {
297*d14d7d31Sis if (second) {
298*d14d7d31Sis if (*ib < u8_valid_min_2nd_byte[u8] ||
299*d14d7d31Sis *ib > u8_valid_max_2nd_byte[u8]) {
300*d14d7d31Sis *errno = EILSEQ;
301*d14d7d31Sis ret_val = (size_t)-1;
302*d14d7d31Sis ib = oldib;
303*d14d7d31Sis goto TO_SB_ILLEGAL_CHAR_ERR;
304*d14d7d31Sis }
305*d14d7d31Sis second = B_FALSE;
306*d14d7d31Sis } else if (*ib < 0x80 || *ib > 0xbf) {
307*d14d7d31Sis *errno = EILSEQ;
308*d14d7d31Sis ret_val = (size_t)-1;
309*d14d7d31Sis ib = oldib;
310*d14d7d31Sis goto TO_SB_ILLEGAL_CHAR_ERR;
311*d14d7d31Sis }
312*d14d7d31Sis u8 = (u8 << 8) | ((uint32_t)*ib);
313*d14d7d31Sis ib++;
314*d14d7d31Sis }
315*d14d7d31Sis
316*d14d7d31Sis i = l = 0;
317*d14d7d31Sis h = init_h;
318*d14d7d31Sis while (l <= h) {
319*d14d7d31Sis i = (l + h) / 2;
320*d14d7d31Sis if (to_sb_tbl[id][i].u8 == u8)
321*d14d7d31Sis break;
322*d14d7d31Sis else if (to_sb_tbl[id][i].u8 < u8)
323*d14d7d31Sis l = i + 1;
324*d14d7d31Sis else
325*d14d7d31Sis h = i - 1;
326*d14d7d31Sis }
327*d14d7d31Sis
328*d14d7d31Sis if (to_sb_tbl[id][i].u8 == u8) {
329*d14d7d31Sis *ob++ = to_sb_tbl[id][i].sb;
330*d14d7d31Sis } else {
331*d14d7d31Sis /*
332*d14d7d31Sis * If we don't find a character in the target
333*d14d7d31Sis * codeset, we insert an ASCII replacement character
334*d14d7d31Sis * at the output buffer and indicate such
335*d14d7d31Sis * "non-identical" conversion by increasing the
336*d14d7d31Sis * return value which is the non-identical conversion
337*d14d7d31Sis * counter if bigger than 0.
338*d14d7d31Sis */
339*d14d7d31Sis *ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
340*d14d7d31Sis ret_val++;
341*d14d7d31Sis }
342*d14d7d31Sis }
343*d14d7d31Sis
344*d14d7d31Sis TO_SB_ILLEGAL_CHAR_ERR:
345*d14d7d31Sis *inbuf = (char *)ib;
346*d14d7d31Sis *inbytesleft = ibtail - ib;
347*d14d7d31Sis *outbuf = (char *)ob;
348*d14d7d31Sis *outbytesleft = obtail - ob;
349*d14d7d31Sis
350*d14d7d31Sis return (ret_val);
351*d14d7d31Sis }
352*d14d7d31Sis
353*d14d7d31Sis /*
354*d14d7d31Sis * The following is the common kiconv function from single byte codesets to
355*d14d7d31Sis * UTF-8.
356*d14d7d31Sis */
357*d14d7d31Sis static size_t
kiconv_fr_sb(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)358*d14d7d31Sis kiconv_fr_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
359*d14d7d31Sis size_t *outbytesleft, int *errno)
360*d14d7d31Sis {
361*d14d7d31Sis size_t ret_val;
362*d14d7d31Sis uchar_t *ib;
363*d14d7d31Sis uchar_t *ob;
364*d14d7d31Sis uchar_t *ibtail;
365*d14d7d31Sis uchar_t *obtail;
366*d14d7d31Sis size_t i;
367*d14d7d31Sis size_t k;
368*d14d7d31Sis int8_t sz;
369*d14d7d31Sis
370*d14d7d31Sis /* Check on the kiconv code conversion descriptor validity. */
371*d14d7d31Sis if ((ulong_t)kcd > KICONV_MAX_MAPPING_TBLID) {
372*d14d7d31Sis *errno = EBADF;
373*d14d7d31Sis return ((size_t)-1);
374*d14d7d31Sis }
375*d14d7d31Sis
376*d14d7d31Sis /*
377*d14d7d31Sis * If this is a state reset request, there is nothing to do and so
378*d14d7d31Sis * we just return.
379*d14d7d31Sis */
380*d14d7d31Sis if (! inbuf || ! (*inbuf))
381*d14d7d31Sis return ((size_t)0);
382*d14d7d31Sis
383*d14d7d31Sis ret_val = 0;
384*d14d7d31Sis ib = (uchar_t *)*inbuf;
385*d14d7d31Sis ob = (uchar_t *)*outbuf;
386*d14d7d31Sis ibtail = ib + *inbytesleft;
387*d14d7d31Sis obtail = ob + *outbytesleft;
388*d14d7d31Sis
389*d14d7d31Sis while (ib < ibtail) {
390*d14d7d31Sis /*
391*d14d7d31Sis * If this is a 7-bit ASCII character, we just copy over and
392*d14d7d31Sis * that's all we need to do for this character.
393*d14d7d31Sis */
394*d14d7d31Sis if (*ib < 0x80) {
395*d14d7d31Sis if (ob >= obtail) {
396*d14d7d31Sis *errno = E2BIG;
397*d14d7d31Sis ret_val = (size_t)-1;
398*d14d7d31Sis break;
399*d14d7d31Sis }
400*d14d7d31Sis
401*d14d7d31Sis *ob++ = *ib++;
402*d14d7d31Sis continue;
403*d14d7d31Sis }
404*d14d7d31Sis
405*d14d7d31Sis /*
406*d14d7d31Sis * Otherwise, we get the corresponding UTF-8 character bytes
407*d14d7d31Sis * from the mapping table and copy them over.
408*d14d7d31Sis *
409*d14d7d31Sis * We don't need to worry about if the UTF-8 character bytes
410*d14d7d31Sis * at the mapping tables are valid or not since they are good.
411*d14d7d31Sis */
412*d14d7d31Sis k = *ib - 0x80;
413*d14d7d31Sis sz = u8_number_of_bytes[to_u8_tbl[(ulong_t)kcd][k].u8[0]];
414*d14d7d31Sis
415*d14d7d31Sis /*
416*d14d7d31Sis * If sz <= 0, that means we don't have any assigned character
417*d14d7d31Sis * at the code point, k + 0x80, of the single byte codeset
418*d14d7d31Sis * which is the fromcode. In other words, the input buffer
419*d14d7d31Sis * has an illegal character.
420*d14d7d31Sis */
421*d14d7d31Sis if (sz <= 0) {
422*d14d7d31Sis *errno = EILSEQ;
423*d14d7d31Sis ret_val = (size_t)-1;
424*d14d7d31Sis break;
425*d14d7d31Sis }
426*d14d7d31Sis
427*d14d7d31Sis if ((obtail - ob) < sz) {
428*d14d7d31Sis *errno = E2BIG;
429*d14d7d31Sis ret_val = (size_t)-1;
430*d14d7d31Sis break;
431*d14d7d31Sis }
432*d14d7d31Sis
433*d14d7d31Sis for (i = 0; i < sz; i++)
434*d14d7d31Sis *ob++ = to_u8_tbl[(ulong_t)kcd][k].u8[i];
435*d14d7d31Sis
436*d14d7d31Sis ib++;
437*d14d7d31Sis }
438*d14d7d31Sis
439*d14d7d31Sis *inbuf = (char *)ib;
440*d14d7d31Sis *inbytesleft = ibtail - ib;
441*d14d7d31Sis *outbuf = (char *)ob;
442*d14d7d31Sis *outbytesleft = obtail - ob;
443*d14d7d31Sis
444*d14d7d31Sis return (ret_val);
445*d14d7d31Sis }
446*d14d7d31Sis
447*d14d7d31Sis /*
448*d14d7d31Sis * The following is the common kiconvstr function from UTF-8 to single byte
449*d14d7d31Sis * codesets.
450*d14d7d31Sis */
451*d14d7d31Sis static size_t
kiconvstr_to_sb(size_t id,uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno)452*d14d7d31Sis kiconvstr_to_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
453*d14d7d31Sis size_t *outlen, int flag, int *errno)
454*d14d7d31Sis {
455*d14d7d31Sis size_t ret_val;
456*d14d7d31Sis uchar_t *oldib;
457*d14d7d31Sis uchar_t *ibtail;
458*d14d7d31Sis uchar_t *obtail;
459*d14d7d31Sis uint32_t u8;
460*d14d7d31Sis size_t i;
461*d14d7d31Sis size_t l;
462*d14d7d31Sis size_t h;
463*d14d7d31Sis size_t init_h;
464*d14d7d31Sis int8_t sz;
465*d14d7d31Sis boolean_t second;
466*d14d7d31Sis boolean_t do_not_ignore_null;
467*d14d7d31Sis
468*d14d7d31Sis /* Let's make sure that the table id is within the valid boundary. */
469*d14d7d31Sis if (id > KICONV_MAX_MAPPING_TBLID) {
470*d14d7d31Sis *errno = EBADF;
471*d14d7d31Sis return ((size_t)-1);
472*d14d7d31Sis }
473*d14d7d31Sis
474*d14d7d31Sis ret_val = 0;
475*d14d7d31Sis ibtail = ib + *inlen;
476*d14d7d31Sis obtail = ob + *outlen;
477*d14d7d31Sis do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
478*d14d7d31Sis init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
479*d14d7d31Sis
480*d14d7d31Sis /* Skip any UTF-8 signature BOM character in the beginning. */
481*d14d7d31Sis if ((ibtail - ib) >= 3 && *ib == 0xef && *(ib + 1) == 0xbb &&
482*d14d7d31Sis *(ib + 2) == 0xbf)
483*d14d7d31Sis ib += 3;
484*d14d7d31Sis
485*d14d7d31Sis /*
486*d14d7d31Sis * Basically this is pretty much the same as kiconv_to_sb() except
487*d14d7d31Sis * that we are now accepting two flag values and doing the processing
488*d14d7d31Sis * accordingly.
489*d14d7d31Sis */
490*d14d7d31Sis while (ib < ibtail) {
491*d14d7d31Sis sz = u8_number_of_bytes[*ib];
492*d14d7d31Sis if (sz <= 0) {
493*d14d7d31Sis if (flag & KICONV_REPLACE_INVALID) {
494*d14d7d31Sis if (ob >= obtail) {
495*d14d7d31Sis *errno = E2BIG;
496*d14d7d31Sis ret_val = (size_t)-1;
497*d14d7d31Sis break;
498*d14d7d31Sis }
499*d14d7d31Sis
500*d14d7d31Sis ib++;
501*d14d7d31Sis goto STR_TO_SB_REPLACE_INVALID;
502*d14d7d31Sis }
503*d14d7d31Sis
504*d14d7d31Sis *errno = EILSEQ;
505*d14d7d31Sis ret_val = (size_t)-1;
506*d14d7d31Sis break;
507*d14d7d31Sis }
508*d14d7d31Sis
509*d14d7d31Sis if (*ib == '\0' && do_not_ignore_null)
510*d14d7d31Sis break;
511*d14d7d31Sis
512*d14d7d31Sis if (ob >= obtail) {
513*d14d7d31Sis *errno = E2BIG;
514*d14d7d31Sis ret_val = (size_t)-1;
515*d14d7d31Sis break;
516*d14d7d31Sis }
517*d14d7d31Sis
518*d14d7d31Sis if (sz == 1) {
519*d14d7d31Sis *ob++ = *ib++;
520*d14d7d31Sis continue;
521*d14d7d31Sis }
522*d14d7d31Sis
523*d14d7d31Sis if ((ibtail - ib) < sz) {
524*d14d7d31Sis if (flag & KICONV_REPLACE_INVALID) {
525*d14d7d31Sis ib = ibtail;
526*d14d7d31Sis goto STR_TO_SB_REPLACE_INVALID;
527*d14d7d31Sis }
528*d14d7d31Sis
529*d14d7d31Sis *errno = EINVAL;
530*d14d7d31Sis ret_val = (size_t)-1;
531*d14d7d31Sis break;
532*d14d7d31Sis }
533*d14d7d31Sis
534*d14d7d31Sis oldib = ib;
535*d14d7d31Sis u8 = *ib++;
536*d14d7d31Sis second = B_TRUE;
537*d14d7d31Sis for (i = 1; i < sz; i++) {
538*d14d7d31Sis if (second) {
539*d14d7d31Sis if (*ib < u8_valid_min_2nd_byte[u8] ||
540*d14d7d31Sis *ib > u8_valid_max_2nd_byte[u8]) {
541*d14d7d31Sis if (flag & KICONV_REPLACE_INVALID) {
542*d14d7d31Sis ib = oldib + sz;
543*d14d7d31Sis goto STR_TO_SB_REPLACE_INVALID;
544*d14d7d31Sis }
545*d14d7d31Sis
546*d14d7d31Sis *errno = EILSEQ;
547*d14d7d31Sis ret_val = (size_t)-1;
548*d14d7d31Sis ib = oldib;
549*d14d7d31Sis goto STR_TO_SB_ILLEGAL_CHAR_ERR;
550*d14d7d31Sis }
551*d14d7d31Sis second = B_FALSE;
552*d14d7d31Sis } else if (*ib < 0x80 || *ib > 0xbf) {
553*d14d7d31Sis if (flag & KICONV_REPLACE_INVALID) {
554*d14d7d31Sis ib = oldib + sz;
555*d14d7d31Sis goto STR_TO_SB_REPLACE_INVALID;
556*d14d7d31Sis }
557*d14d7d31Sis
558*d14d7d31Sis *errno = EILSEQ;
559*d14d7d31Sis ret_val = (size_t)-1;
560*d14d7d31Sis ib = oldib;
561*d14d7d31Sis goto STR_TO_SB_ILLEGAL_CHAR_ERR;
562*d14d7d31Sis }
563*d14d7d31Sis u8 = (u8 << 8) | ((uint32_t)*ib);
564*d14d7d31Sis ib++;
565*d14d7d31Sis }
566*d14d7d31Sis
567*d14d7d31Sis i = l = 0;
568*d14d7d31Sis h = init_h;
569*d14d7d31Sis while (l <= h) {
570*d14d7d31Sis i = (l + h) / 2;
571*d14d7d31Sis if (to_sb_tbl[id][i].u8 == u8)
572*d14d7d31Sis break;
573*d14d7d31Sis else if (to_sb_tbl[id][i].u8 < u8)
574*d14d7d31Sis l = i + 1;
575*d14d7d31Sis else
576*d14d7d31Sis h = i - 1;
577*d14d7d31Sis }
578*d14d7d31Sis
579*d14d7d31Sis if (to_sb_tbl[id][i].u8 == u8) {
580*d14d7d31Sis *ob++ = to_sb_tbl[id][i].sb;
581*d14d7d31Sis } else {
582*d14d7d31Sis STR_TO_SB_REPLACE_INVALID:
583*d14d7d31Sis *ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
584*d14d7d31Sis ret_val++;
585*d14d7d31Sis }
586*d14d7d31Sis }
587*d14d7d31Sis
588*d14d7d31Sis STR_TO_SB_ILLEGAL_CHAR_ERR:
589*d14d7d31Sis *inlen = ibtail - ib;
590*d14d7d31Sis *outlen = obtail - ob;
591*d14d7d31Sis
592*d14d7d31Sis return (ret_val);
593*d14d7d31Sis }
594*d14d7d31Sis
595*d14d7d31Sis /*
596*d14d7d31Sis * The following four functions are entry points recorded at the conv_list[]
597*d14d7d31Sis * defined at below.
598*d14d7d31Sis */
599*d14d7d31Sis static size_t
kiconvstr_to_1252(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)600*d14d7d31Sis kiconvstr_to_1252(char *inarray, size_t *inlen, char *outarray,
601*d14d7d31Sis size_t *outlen, int flag, int *errno)
602*d14d7d31Sis {
603*d14d7d31Sis return (kiconvstr_to_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
604*d14d7d31Sis inlen, (uchar_t *)outarray, outlen, flag, errno));
605*d14d7d31Sis }
606*d14d7d31Sis
607*d14d7d31Sis static size_t
kiconvstr_to_1(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)608*d14d7d31Sis kiconvstr_to_1(char *inarray, size_t *inlen, char *outarray,
609*d14d7d31Sis size_t *outlen, int flag, int *errno)
610*d14d7d31Sis {
611*d14d7d31Sis return (kiconvstr_to_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
612*d14d7d31Sis inlen, (uchar_t *)outarray, outlen, flag, errno));
613*d14d7d31Sis }
614*d14d7d31Sis
615*d14d7d31Sis static size_t
kiconvstr_to_15(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)616*d14d7d31Sis kiconvstr_to_15(char *inarray, size_t *inlen, char *outarray,
617*d14d7d31Sis size_t *outlen, int flag, int *errno)
618*d14d7d31Sis {
619*d14d7d31Sis return (kiconvstr_to_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
620*d14d7d31Sis inlen, (uchar_t *)outarray, outlen, flag, errno));
621*d14d7d31Sis }
622*d14d7d31Sis
623*d14d7d31Sis static size_t
kiconvstr_to_850(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)624*d14d7d31Sis kiconvstr_to_850(char *inarray, size_t *inlen, char *outarray,
625*d14d7d31Sis size_t *outlen, int flag, int *errno)
626*d14d7d31Sis {
627*d14d7d31Sis return (kiconvstr_to_sb(KICONV_TBLID_850, (uchar_t *)inarray,
628*d14d7d31Sis inlen, (uchar_t *)outarray, outlen, flag, errno));
629*d14d7d31Sis }
630*d14d7d31Sis
631*d14d7d31Sis /*
632*d14d7d31Sis * The following is the common kiconvstr function for conversions from
633*d14d7d31Sis * single byte codesets to UTF-8.
634*d14d7d31Sis */
635*d14d7d31Sis static size_t
kiconvstr_fr_sb(size_t id,uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno)636*d14d7d31Sis kiconvstr_fr_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
637*d14d7d31Sis size_t *outlen, int flag, int *errno)
638*d14d7d31Sis {
639*d14d7d31Sis size_t ret_val;
640*d14d7d31Sis uchar_t *ibtail;
641*d14d7d31Sis uchar_t *obtail;
642*d14d7d31Sis size_t i;
643*d14d7d31Sis size_t k;
644*d14d7d31Sis int8_t sz;
645*d14d7d31Sis boolean_t do_not_ignore_null;
646*d14d7d31Sis
647*d14d7d31Sis ret_val = 0;
648*d14d7d31Sis ibtail = ib + *inlen;
649*d14d7d31Sis obtail = ob + *outlen;
650*d14d7d31Sis do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
651*d14d7d31Sis
652*d14d7d31Sis while (ib < ibtail) {
653*d14d7d31Sis if (*ib == '\0' && do_not_ignore_null)
654*d14d7d31Sis break;
655*d14d7d31Sis
656*d14d7d31Sis if (*ib < 0x80) {
657*d14d7d31Sis if (ob >= obtail) {
658*d14d7d31Sis *errno = E2BIG;
659*d14d7d31Sis ret_val = (size_t)-1;
660*d14d7d31Sis break;
661*d14d7d31Sis }
662*d14d7d31Sis *ob++ = *ib++;
663*d14d7d31Sis continue;
664*d14d7d31Sis }
665*d14d7d31Sis
666*d14d7d31Sis k = *ib - 0x80;
667*d14d7d31Sis sz = u8_number_of_bytes[to_u8_tbl[id][k].u8[0]];
668*d14d7d31Sis
669*d14d7d31Sis if (sz <= 0) {
670*d14d7d31Sis if (flag & KICONV_REPLACE_INVALID) {
671*d14d7d31Sis if ((obtail - ob) < 3) {
672*d14d7d31Sis *errno = E2BIG;
673*d14d7d31Sis ret_val = (size_t)-1;
674*d14d7d31Sis break;
675*d14d7d31Sis }
676*d14d7d31Sis
677*d14d7d31Sis /* Save KICONV_UTF8_REPLACEMENT_CHAR. */
678*d14d7d31Sis *ob++ = 0xef;
679*d14d7d31Sis *ob++ = 0xbf;
680*d14d7d31Sis *ob++ = 0xbd;
681*d14d7d31Sis ret_val++;
682*d14d7d31Sis ib++;
683*d14d7d31Sis
684*d14d7d31Sis continue;
685*d14d7d31Sis }
686*d14d7d31Sis
687*d14d7d31Sis *errno = EILSEQ;
688*d14d7d31Sis ret_val = (size_t)-1;
689*d14d7d31Sis break;
690*d14d7d31Sis }
691*d14d7d31Sis
692*d14d7d31Sis if ((obtail - ob) < sz) {
693*d14d7d31Sis *errno = E2BIG;
694*d14d7d31Sis ret_val = (size_t)-1;
695*d14d7d31Sis break;
696*d14d7d31Sis }
697*d14d7d31Sis
698*d14d7d31Sis for (i = 0; i < sz; i++)
699*d14d7d31Sis *ob++ = to_u8_tbl[id][k].u8[i];
700*d14d7d31Sis
701*d14d7d31Sis ib++;
702*d14d7d31Sis }
703*d14d7d31Sis
704*d14d7d31Sis *inlen = ibtail - ib;
705*d14d7d31Sis *outlen = obtail - ob;
706*d14d7d31Sis
707*d14d7d31Sis return (ret_val);
708*d14d7d31Sis }
709*d14d7d31Sis
710*d14d7d31Sis /*
711*d14d7d31Sis * The following four functions are also entry points recorded at
712*d14d7d31Sis * the conv_list[] at below.
713*d14d7d31Sis */
714*d14d7d31Sis static size_t
kiconvstr_fr_1252(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)715*d14d7d31Sis kiconvstr_fr_1252(char *inarray, size_t *inlen, char *outarray,
716*d14d7d31Sis size_t *outlen, int flag, int *errno)
717*d14d7d31Sis {
718*d14d7d31Sis return (kiconvstr_fr_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
719*d14d7d31Sis inlen, (uchar_t *)outarray, outlen, flag, errno));
720*d14d7d31Sis }
721*d14d7d31Sis
722*d14d7d31Sis static size_t
kiconvstr_fr_1(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)723*d14d7d31Sis kiconvstr_fr_1(char *inarray, size_t *inlen, char *outarray,
724*d14d7d31Sis size_t *outlen, int flag, int *errno)
725*d14d7d31Sis {
726*d14d7d31Sis return (kiconvstr_fr_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
727*d14d7d31Sis inlen, (uchar_t *)outarray, outlen, flag, errno));
728*d14d7d31Sis }
729*d14d7d31Sis
730*d14d7d31Sis static size_t
kiconvstr_fr_15(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)731*d14d7d31Sis kiconvstr_fr_15(char *inarray, size_t *inlen, char *outarray,
732*d14d7d31Sis size_t *outlen, int flag, int *errno)
733*d14d7d31Sis {
734*d14d7d31Sis return (kiconvstr_fr_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
735*d14d7d31Sis inlen, (uchar_t *)outarray, outlen, flag, errno));
736*d14d7d31Sis }
737*d14d7d31Sis
738*d14d7d31Sis static size_t
kiconvstr_fr_850(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)739*d14d7d31Sis kiconvstr_fr_850(char *inarray, size_t *inlen, char *outarray,
740*d14d7d31Sis size_t *outlen, int flag, int *errno)
741*d14d7d31Sis {
742*d14d7d31Sis return (kiconvstr_fr_sb(KICONV_TBLID_850, (uchar_t *)inarray,
743*d14d7d31Sis inlen, (uchar_t *)outarray, outlen, flag, errno));
744*d14d7d31Sis }
745*d14d7d31Sis
746*d14d7d31Sis /*
747*d14d7d31Sis * The following static vector contains the normalized code names
748*d14d7d31Sis * and their corresponding code ids. They are somewhat arbitrarily ordered
749*d14d7d31Sis * based on marketing data available. A code id could repeat for aliases.
750*d14d7d31Sis *
751*d14d7d31Sis * The vector was generated by using a small utility program called
752*d14d7d31Sis * codeidlistgen.c that you can find from PSARC/2007/173/materials/util/.
753*d14d7d31Sis *
754*d14d7d31Sis * The code ids must be portable, i.e., if needed, you can always generate
755*d14d7d31Sis * the code_list[] again with different code ids. You'll also need to
756*d14d7d31Sis * update the conv_list[] at below.
757*d14d7d31Sis */
758*d14d7d31Sis #define KICONV_MAX_CODEID_ENTRY 68
759*d14d7d31Sis #define KICONV_MAX_CODEID 42
760*d14d7d31Sis
761*d14d7d31Sis static kiconv_code_list_t code_list[KICONV_MAX_CODEID_ENTRY] = {
762*d14d7d31Sis { "utf8", 0 },
763*d14d7d31Sis { "cp1252", 1 },
764*d14d7d31Sis { "1252", 1 },
765*d14d7d31Sis { "iso88591", 2 },
766*d14d7d31Sis { "iso885915", 3 },
767*d14d7d31Sis { "cp850", 4 },
768*d14d7d31Sis { "850", 4 },
769*d14d7d31Sis { "eucjp", 5 },
770*d14d7d31Sis { "eucjpms", 6 },
771*d14d7d31Sis { "cp932", 7 },
772*d14d7d31Sis { "932", 7 },
773*d14d7d31Sis { "shiftjis", 8 },
774*d14d7d31Sis { "pck", 8 },
775*d14d7d31Sis { "sjis", 8 },
776*d14d7d31Sis { "gb18030", 9 },
777*d14d7d31Sis { "gbk", 10 },
778*d14d7d31Sis { "cp936", 10 },
779*d14d7d31Sis { "936", 10 },
780*d14d7d31Sis { "euccn", 11 },
781*d14d7d31Sis { "euckr", 12 },
782*d14d7d31Sis { "unifiedhangul", 13 },
783*d14d7d31Sis { "cp949", 13 },
784*d14d7d31Sis { "949", 13 },
785*d14d7d31Sis { "big5", 14 },
786*d14d7d31Sis { "cp950", 14 },
787*d14d7d31Sis { "950", 14 },
788*d14d7d31Sis { "big5hkscs", 15 },
789*d14d7d31Sis { "euctw", 16 },
790*d14d7d31Sis { "cp950hkscs", 17 },
791*d14d7d31Sis { "cp1250", 18 },
792*d14d7d31Sis { "1250", 18 },
793*d14d7d31Sis { "iso88592", 19 },
794*d14d7d31Sis { "cp852", 20 },
795*d14d7d31Sis { "852", 20 },
796*d14d7d31Sis { "cp1251", 21 },
797*d14d7d31Sis { "1251", 21 },
798*d14d7d31Sis { "iso88595", 22 },
799*d14d7d31Sis { "koi8r", 23 },
800*d14d7d31Sis { "cp866", 24 },
801*d14d7d31Sis { "866", 24 },
802*d14d7d31Sis { "cp1253", 25 },
803*d14d7d31Sis { "1253", 25 },
804*d14d7d31Sis { "iso88597", 26 },
805*d14d7d31Sis { "cp737", 27 },
806*d14d7d31Sis { "737", 27 },
807*d14d7d31Sis { "cp1254", 28 },
808*d14d7d31Sis { "1254", 28 },
809*d14d7d31Sis { "iso88599", 29 },
810*d14d7d31Sis { "cp857", 30 },
811*d14d7d31Sis { "857", 30 },
812*d14d7d31Sis { "cp1256", 31 },
813*d14d7d31Sis { "1256", 31 },
814*d14d7d31Sis { "iso88596", 32 },
815*d14d7d31Sis { "cp720", 33 },
816*d14d7d31Sis { "720", 33 },
817*d14d7d31Sis { "cp1255", 34 },
818*d14d7d31Sis { "1255", 34 },
819*d14d7d31Sis { "iso88598", 35 },
820*d14d7d31Sis { "cp862", 36 },
821*d14d7d31Sis { "862", 36 },
822*d14d7d31Sis { "cp1257", 37 },
823*d14d7d31Sis { "1257", 37 },
824*d14d7d31Sis { "iso885913", 38 },
825*d14d7d31Sis { "iso885910", 39 },
826*d14d7d31Sis { "iso885911", 40 },
827*d14d7d31Sis { "tis620", 40 },
828*d14d7d31Sis { "iso88593", 41 },
829*d14d7d31Sis { "iso88594", 42 },
830*d14d7d31Sis };
831*d14d7d31Sis
832*d14d7d31Sis /*
833*d14d7d31Sis * The list of code conversions supported are grouped together per
834*d14d7d31Sis * module which will be loaded as needed.
835*d14d7d31Sis */
836*d14d7d31Sis #define KICONV_MAX_CONVERSIONS 84
837*d14d7d31Sis
838*d14d7d31Sis static kiconv_conv_list_t conv_list[KICONV_MAX_CONVERSIONS] = {
839*d14d7d31Sis /* Embedded code conversions: */
840*d14d7d31Sis {
841*d14d7d31Sis 1, 0, KICONV_EMBEDDED,
842*d14d7d31Sis open_to_1252, kiconv_to_sb, close_to_sb, kiconvstr_to_1252
843*d14d7d31Sis },
844*d14d7d31Sis {
845*d14d7d31Sis 0, 1, KICONV_EMBEDDED,
846*d14d7d31Sis open_fr_1252, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1252
847*d14d7d31Sis },
848*d14d7d31Sis {
849*d14d7d31Sis 2, 0, KICONV_EMBEDDED,
850*d14d7d31Sis open_to_88591, kiconv_to_sb, close_to_sb, kiconvstr_to_1
851*d14d7d31Sis },
852*d14d7d31Sis {
853*d14d7d31Sis 0, 2, KICONV_EMBEDDED,
854*d14d7d31Sis open_fr_88591, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1
855*d14d7d31Sis },
856*d14d7d31Sis {
857*d14d7d31Sis 3, 0, KICONV_EMBEDDED,
858*d14d7d31Sis open_to_885915, kiconv_to_sb, close_to_sb, kiconvstr_to_15
859*d14d7d31Sis },
860*d14d7d31Sis {
861*d14d7d31Sis 0, 3, KICONV_EMBEDDED,
862*d14d7d31Sis open_fr_885915, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_15
863*d14d7d31Sis },
864*d14d7d31Sis {
865*d14d7d31Sis 4, 0, KICONV_EMBEDDED,
866*d14d7d31Sis open_to_850, kiconv_to_sb, close_to_sb, kiconvstr_to_850
867*d14d7d31Sis },
868*d14d7d31Sis {
869*d14d7d31Sis 0, 4, KICONV_EMBEDDED,
870*d14d7d31Sis open_fr_850, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_850
871*d14d7d31Sis },
872*d14d7d31Sis
873*d14d7d31Sis /* kiconv_ja module conversions: */
874*d14d7d31Sis { 0, 5, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
875*d14d7d31Sis { 5, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
876*d14d7d31Sis { 0, 6, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
877*d14d7d31Sis { 6, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
878*d14d7d31Sis { 0, 7, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
879*d14d7d31Sis { 7, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
880*d14d7d31Sis { 0, 8, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
881*d14d7d31Sis { 8, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
882*d14d7d31Sis
883*d14d7d31Sis /* kiconv_sc module conversions: */
884*d14d7d31Sis { 0, 9, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
885*d14d7d31Sis { 9, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
886*d14d7d31Sis { 0, 10, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
887*d14d7d31Sis { 10, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
888*d14d7d31Sis { 0, 11, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
889*d14d7d31Sis { 11, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
890*d14d7d31Sis
891*d14d7d31Sis /* kiconv_ko module conversions: */
892*d14d7d31Sis { 0, 12, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
893*d14d7d31Sis { 12, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
894*d14d7d31Sis { 0, 13, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
895*d14d7d31Sis { 13, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
896*d14d7d31Sis
897*d14d7d31Sis /* kiconv_tc module conversions: */
898*d14d7d31Sis { 0, 14, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
899*d14d7d31Sis { 14, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
900*d14d7d31Sis { 0, 15, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
901*d14d7d31Sis { 15, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
902*d14d7d31Sis { 0, 16, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
903*d14d7d31Sis { 16, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
904*d14d7d31Sis { 0, 17, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
905*d14d7d31Sis { 17, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
906*d14d7d31Sis
907*d14d7d31Sis /* kiconv_emea module conversions: */
908*d14d7d31Sis { 0, 18, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
909*d14d7d31Sis { 18, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
910*d14d7d31Sis { 0, 19, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
911*d14d7d31Sis { 19, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
912*d14d7d31Sis { 0, 20, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
913*d14d7d31Sis { 20, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
914*d14d7d31Sis { 0, 21, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
915*d14d7d31Sis { 21, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
916*d14d7d31Sis { 0, 22, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
917*d14d7d31Sis { 22, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
918*d14d7d31Sis { 0, 23, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
919*d14d7d31Sis { 23, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
920*d14d7d31Sis { 0, 24, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
921*d14d7d31Sis { 24, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
922*d14d7d31Sis { 0, 25, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
923*d14d7d31Sis { 25, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
924*d14d7d31Sis { 0, 26, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
925*d14d7d31Sis { 26, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
926*d14d7d31Sis { 0, 27, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
927*d14d7d31Sis { 27, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
928*d14d7d31Sis { 0, 28, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
929*d14d7d31Sis { 28, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
930*d14d7d31Sis { 0, 29, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
931*d14d7d31Sis { 29, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
932*d14d7d31Sis { 0, 30, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
933*d14d7d31Sis { 30, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
934*d14d7d31Sis { 0, 31, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
935*d14d7d31Sis { 31, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
936*d14d7d31Sis { 0, 32, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
937*d14d7d31Sis { 32, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
938*d14d7d31Sis { 0, 33, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
939*d14d7d31Sis { 33, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
940*d14d7d31Sis { 0, 34, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
941*d14d7d31Sis { 34, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
942*d14d7d31Sis { 0, 35, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
943*d14d7d31Sis { 35, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
944*d14d7d31Sis { 0, 36, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
945*d14d7d31Sis { 36, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
946*d14d7d31Sis { 0, 37, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
947*d14d7d31Sis { 37, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
948*d14d7d31Sis { 0, 38, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
949*d14d7d31Sis { 38, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
950*d14d7d31Sis { 0, 39, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
951*d14d7d31Sis { 39, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
952*d14d7d31Sis { 0, 40, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
953*d14d7d31Sis { 40, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
954*d14d7d31Sis { 0, 41, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
955*d14d7d31Sis { 41, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
956*d14d7d31Sis { 0, 42, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
957*d14d7d31Sis { 42, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
958*d14d7d31Sis };
959*d14d7d31Sis
960*d14d7d31Sis /* The list of implemeted and supported modules. */
961*d14d7d31Sis static kiconv_mod_list_t module_list[KICONV_MAX_MODULE_ID + 1] = {
962*d14d7d31Sis "kiconv_embedded", 0,
963*d14d7d31Sis "kiconv_ja", 0,
964*d14d7d31Sis "kiconv_sc", 0,
965*d14d7d31Sis "kiconv_ko", 0,
966*d14d7d31Sis "kiconv_tc", 0,
967*d14d7d31Sis "kiconv_emea", 0,
968*d14d7d31Sis };
969*d14d7d31Sis
970*d14d7d31Sis /*
971*d14d7d31Sis * We use conv_list_lock to restrict data access of both conv_list[] and
972*d14d7d31Sis * module_list[] as they are tightly coupled critical sections that need to be
973*d14d7d31Sis * dealt together as a unit.
974*d14d7d31Sis */
975*d14d7d31Sis static kmutex_t conv_list_lock;
976*d14d7d31Sis
977*d14d7d31Sis void
kiconv_init()978*d14d7d31Sis kiconv_init()
979*d14d7d31Sis {
980*d14d7d31Sis mutex_init(&conv_list_lock, NULL, MUTEX_DEFAULT, NULL);
981*d14d7d31Sis }
982*d14d7d31Sis
983*d14d7d31Sis /*
984*d14d7d31Sis * The following is used to check on whether a kiconv module is being
985*d14d7d31Sis * used or not at the _fini() of the module.
986*d14d7d31Sis */
987*d14d7d31Sis size_t
kiconv_module_ref_count(size_t mid)988*d14d7d31Sis kiconv_module_ref_count(size_t mid)
989*d14d7d31Sis {
990*d14d7d31Sis int count;
991*d14d7d31Sis
992*d14d7d31Sis if (mid <= 0 || mid > KICONV_MAX_MODULE_ID)
993*d14d7d31Sis return (0);
994*d14d7d31Sis
995*d14d7d31Sis mutex_enter(&conv_list_lock);
996*d14d7d31Sis
997*d14d7d31Sis count = module_list[mid].refcount;
998*d14d7d31Sis
999*d14d7d31Sis mutex_exit(&conv_list_lock);
1000*d14d7d31Sis
1001*d14d7d31Sis return (count);
1002*d14d7d31Sis }
1003*d14d7d31Sis
1004*d14d7d31Sis /*
1005*d14d7d31Sis * This function "normalizes" a given code name, n, by not including skippable
1006*d14d7d31Sis * characters and folding uppercase letters to corresponding lowercase letters.
1007*d14d7d31Sis * We only fold 7-bit ASCII uppercase characters since the names should be in
1008*d14d7d31Sis * Portable Character Set of 7-bit ASCII.
1009*d14d7d31Sis *
1010*d14d7d31Sis * By doing this, we will be able to maximize the code name matches.
1011*d14d7d31Sis */
1012*d14d7d31Sis static size_t
normalize_codename(const char * n)1013*d14d7d31Sis normalize_codename(const char *n)
1014*d14d7d31Sis {
1015*d14d7d31Sis char s[KICONV_MAX_CODENAME_LEN + 1];
1016*d14d7d31Sis size_t i;
1017*d14d7d31Sis
1018*d14d7d31Sis if (n == NULL)
1019*d14d7d31Sis return ((size_t)-1);
1020*d14d7d31Sis
1021*d14d7d31Sis for (i = 0; *n; n++) {
1022*d14d7d31Sis if (KICONV_SKIPPABLE_CHAR(*n))
1023*d14d7d31Sis continue;
1024*d14d7d31Sis
1025*d14d7d31Sis /* If unreasonably lengthy, we don't support such names. */
1026*d14d7d31Sis if (i >= KICONV_MAX_CODENAME_LEN)
1027*d14d7d31Sis return ((size_t)-1);
1028*d14d7d31Sis
1029*d14d7d31Sis s[i++] = (*n >= 'A' && *n <= 'Z') ? *n - 'A' + 'a' : *n;
1030*d14d7d31Sis }
1031*d14d7d31Sis s[i] = '\0';
1032*d14d7d31Sis
1033*d14d7d31Sis /* With the normalized name, find the corresponding codeset id. */
1034*d14d7d31Sis for (i = 0; i < KICONV_MAX_CODEID_ENTRY; i++)
1035*d14d7d31Sis if (strcmp(s, code_list[i].name) == 0)
1036*d14d7d31Sis return (code_list[i].id);
1037*d14d7d31Sis
1038*d14d7d31Sis /*
1039*d14d7d31Sis * In future time, we will also have a few more lines of code at below
1040*d14d7d31Sis * that will deal with other user-created modules' fromcodes and
1041*d14d7d31Sis * tocodes including aliases in a different vector. For now, we don't
1042*d14d7d31Sis * support that but only the known names to this project at this time.
1043*d14d7d31Sis */
1044*d14d7d31Sis
1045*d14d7d31Sis return ((size_t)-1);
1046*d14d7d31Sis }
1047*d14d7d31Sis
1048*d14d7d31Sis /*
1049*d14d7d31Sis * This function called from mod_install() registers supplied code
1050*d14d7d31Sis * conversions. At this point, it does not honor aliases and hence does not
1051*d14d7d31Sis * use nowait data field from the kiconv module info data structure.
1052*d14d7d31Sis */
1053*d14d7d31Sis int
kiconv_register_module(kiconv_module_info_t * info)1054*d14d7d31Sis kiconv_register_module(kiconv_module_info_t *info)
1055*d14d7d31Sis {
1056*d14d7d31Sis size_t mid;
1057*d14d7d31Sis size_t fid;
1058*d14d7d31Sis size_t tid;
1059*d14d7d31Sis size_t i;
1060*d14d7d31Sis size_t j;
1061*d14d7d31Sis kiconv_ops_t *op;
1062*d14d7d31Sis
1063*d14d7d31Sis /* Validate the given kiconv module info. */
1064*d14d7d31Sis if (info == NULL || info->module_name == NULL ||
1065*d14d7d31Sis info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1066*d14d7d31Sis return (EINVAL);
1067*d14d7d31Sis
1068*d14d7d31Sis /*
1069*d14d7d31Sis * Check if this is one of the known modules. At this point,
1070*d14d7d31Sis * we do not allow user-defined kiconv modules and that'd be for
1071*d14d7d31Sis * a future project.
1072*d14d7d31Sis */
1073*d14d7d31Sis for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1074*d14d7d31Sis if (strcmp(module_list[mid].name, info->module_name) == 0)
1075*d14d7d31Sis break;
1076*d14d7d31Sis if (mid > KICONV_MAX_MODULE_ID)
1077*d14d7d31Sis return (EINVAL);
1078*d14d7d31Sis
1079*d14d7d31Sis /* Let's register the conversions supplied. */
1080*d14d7d31Sis mutex_enter(&conv_list_lock);
1081*d14d7d31Sis
1082*d14d7d31Sis /*
1083*d14d7d31Sis * This is very unlikely situation but by any chance we don't want to
1084*d14d7d31Sis * register a module that is already in.
1085*d14d7d31Sis */
1086*d14d7d31Sis if (module_list[mid].refcount > 0) {
1087*d14d7d31Sis mutex_exit(&conv_list_lock);
1088*d14d7d31Sis return (EAGAIN);
1089*d14d7d31Sis }
1090*d14d7d31Sis
1091*d14d7d31Sis for (i = 0; i < info->kiconv_num_convs; i++) {
1092*d14d7d31Sis op = &(info->kiconv_ops_tbl[i]);
1093*d14d7d31Sis
1094*d14d7d31Sis fid = normalize_codename(op->fromcode);
1095*d14d7d31Sis tid = normalize_codename(op->tocode);
1096*d14d7d31Sis
1097*d14d7d31Sis /*
1098*d14d7d31Sis * If we find anything wrong in this particular conversion,
1099*d14d7d31Sis * we skip this one and continue to the next one. This include
1100*d14d7d31Sis * a case where there is a conversion already being assigned
1101*d14d7d31Sis * into the conv_list[] somehow, i.e., new one never kicks out
1102*d14d7d31Sis * old one.
1103*d14d7d31Sis */
1104*d14d7d31Sis if (op->kiconv_open == NULL || op->kiconv == NULL ||
1105*d14d7d31Sis op->kiconv_close == NULL || op->kiconvstr == NULL)
1106*d14d7d31Sis continue;
1107*d14d7d31Sis
1108*d14d7d31Sis for (j = 0; j < KICONV_MAX_CONVERSIONS; j++) {
1109*d14d7d31Sis if (conv_list[j].mid == mid &&
1110*d14d7d31Sis conv_list[j].fid == fid &&
1111*d14d7d31Sis conv_list[j].tid == tid) {
1112*d14d7d31Sis if (conv_list[j].open == NULL) {
1113*d14d7d31Sis conv_list[j].open = op->kiconv_open;
1114*d14d7d31Sis conv_list[j].kiconv = op->kiconv;
1115*d14d7d31Sis conv_list[j].close = op->kiconv_close;
1116*d14d7d31Sis conv_list[j].kiconvstr = op->kiconvstr;
1117*d14d7d31Sis }
1118*d14d7d31Sis break;
1119*d14d7d31Sis }
1120*d14d7d31Sis }
1121*d14d7d31Sis }
1122*d14d7d31Sis
1123*d14d7d31Sis mutex_exit(&conv_list_lock);
1124*d14d7d31Sis
1125*d14d7d31Sis return (0);
1126*d14d7d31Sis }
1127*d14d7d31Sis
1128*d14d7d31Sis /*
1129*d14d7d31Sis * The following function called during mod_remove() will try to unregister,
1130*d14d7d31Sis * i.e., clear up conversion function pointers, from the conv_list[] if it
1131*d14d7d31Sis * can. If there is any code conversions being used, then, the function will
1132*d14d7d31Sis * just return EBUSY indicating that the module cannot be unloaded.
1133*d14d7d31Sis */
1134*d14d7d31Sis int
kiconv_unregister_module(kiconv_module_info_t * info)1135*d14d7d31Sis kiconv_unregister_module(kiconv_module_info_t *info)
1136*d14d7d31Sis {
1137*d14d7d31Sis size_t mid;
1138*d14d7d31Sis size_t i;
1139*d14d7d31Sis
1140*d14d7d31Sis if (info == NULL || info->module_name == NULL ||
1141*d14d7d31Sis info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1142*d14d7d31Sis return (EINVAL);
1143*d14d7d31Sis
1144*d14d7d31Sis for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1145*d14d7d31Sis if (strcmp(module_list[mid].name, info->module_name) == 0)
1146*d14d7d31Sis break;
1147*d14d7d31Sis if (mid > KICONV_MAX_MODULE_ID)
1148*d14d7d31Sis return (EINVAL);
1149*d14d7d31Sis
1150*d14d7d31Sis mutex_enter(&conv_list_lock);
1151*d14d7d31Sis
1152*d14d7d31Sis /*
1153*d14d7d31Sis * If any of the conversions are used, then, this module canont be
1154*d14d7d31Sis * unloaded.
1155*d14d7d31Sis */
1156*d14d7d31Sis if (module_list[mid].refcount > 0) {
1157*d14d7d31Sis mutex_exit(&conv_list_lock);
1158*d14d7d31Sis return (EBUSY);
1159*d14d7d31Sis }
1160*d14d7d31Sis
1161*d14d7d31Sis /*
1162*d14d7d31Sis * Otherwise, we unregister all conversions from this module
1163*d14d7d31Sis * and be ready for the unloading. At this point, we only care about
1164*d14d7d31Sis * the conversions we know about with the module.
1165*d14d7d31Sis */
1166*d14d7d31Sis for (i = 0; i < KICONV_MAX_CONVERSIONS; i++) {
1167*d14d7d31Sis if (conv_list[i].mid == mid) {
1168*d14d7d31Sis conv_list[i].open = NULL;
1169*d14d7d31Sis conv_list[i].kiconv = NULL;
1170*d14d7d31Sis conv_list[i].close = NULL;
1171*d14d7d31Sis conv_list[i].kiconvstr = NULL;
1172*d14d7d31Sis }
1173*d14d7d31Sis }
1174*d14d7d31Sis
1175*d14d7d31Sis mutex_exit(&conv_list_lock);
1176*d14d7d31Sis
1177*d14d7d31Sis return (0);
1178*d14d7d31Sis }
1179*d14d7d31Sis
1180*d14d7d31Sis /*
1181*d14d7d31Sis * The following function check if asked code conversion is available
1182*d14d7d31Sis * and if necessary, load the corresponding kiconv module that contains
1183*d14d7d31Sis * the conversion (and others).
1184*d14d7d31Sis */
1185*d14d7d31Sis static kiconv_t
check_and_load_conversions(const char * tocode,const char * fromcode)1186*d14d7d31Sis check_and_load_conversions(const char *tocode, const char *fromcode)
1187*d14d7d31Sis {
1188*d14d7d31Sis kiconv_t kcd;
1189*d14d7d31Sis size_t tid;
1190*d14d7d31Sis size_t fid;
1191*d14d7d31Sis size_t mid;
1192*d14d7d31Sis size_t i;
1193*d14d7d31Sis
1194*d14d7d31Sis /* Normalize the given names and find the corresponding code ids. */
1195*d14d7d31Sis tid = normalize_codename(tocode);
1196*d14d7d31Sis if (tid == (size_t)-1)
1197*d14d7d31Sis return ((kiconv_t)-1);
1198*d14d7d31Sis
1199*d14d7d31Sis fid = normalize_codename(fromcode);
1200*d14d7d31Sis if (fid == (size_t)-1)
1201*d14d7d31Sis return ((kiconv_t)-1);
1202*d14d7d31Sis
1203*d14d7d31Sis /*
1204*d14d7d31Sis * Search the conversion.
1205*d14d7d31Sis *
1206*d14d7d31Sis * If the conversion isn't supported, just return -1.
1207*d14d7d31Sis * If the conversion is supported but there is no corresponding
1208*d14d7d31Sis * module loaded, try to load it and if successful, return
1209*d14d7d31Sis * a kiconv conversion descriptor memory block.
1210*d14d7d31Sis *
1211*d14d7d31Sis * We maintain a reference counter of uint_t for each module.
1212*d14d7d31Sis */
1213*d14d7d31Sis mutex_enter(&conv_list_lock);
1214*d14d7d31Sis
1215*d14d7d31Sis for (i = 0; i < KICONV_MAX_CONVERSIONS; i++)
1216*d14d7d31Sis if (conv_list[i].tid == tid && conv_list[i].fid == fid)
1217*d14d7d31Sis break;
1218*d14d7d31Sis if (i >= KICONV_MAX_CONVERSIONS) {
1219*d14d7d31Sis mutex_exit(&conv_list_lock);
1220*d14d7d31Sis return ((kiconv_t)-1);
1221*d14d7d31Sis }
1222*d14d7d31Sis
1223*d14d7d31Sis mid = conv_list[i].mid;
1224*d14d7d31Sis
1225*d14d7d31Sis if (conv_list[i].open == NULL) {
1226*d14d7d31Sis mutex_exit(&conv_list_lock);
1227*d14d7d31Sis
1228*d14d7d31Sis if (modload("kiconv", module_list[mid].name) < 0)
1229*d14d7d31Sis return ((kiconv_t)-1);
1230*d14d7d31Sis
1231*d14d7d31Sis /*
1232*d14d7d31Sis * Let's double check if something happened right after
1233*d14d7d31Sis * the modload and/or if the module really has the conversion.
1234*d14d7d31Sis */
1235*d14d7d31Sis mutex_enter(&conv_list_lock);
1236*d14d7d31Sis
1237*d14d7d31Sis if (conv_list[i].open == NULL) {
1238*d14d7d31Sis mutex_exit(&conv_list_lock);
1239*d14d7d31Sis return ((kiconv_t)-1);
1240*d14d7d31Sis }
1241*d14d7d31Sis }
1242*d14d7d31Sis
1243*d14d7d31Sis /*
1244*d14d7d31Sis * If we got the conversion, we will use the conversion function
1245*d14d7d31Sis * in the module and so let's increase the module's refcounter
1246*d14d7d31Sis * so that the module won't be kicked out. (To be more exact and
1247*d14d7d31Sis * specific, the "refcount" is thus the reference counter of
1248*d14d7d31Sis * the module functions being used.)
1249*d14d7d31Sis */
1250*d14d7d31Sis if (module_list[mid].refcount < UINT_MAX)
1251*d14d7d31Sis module_list[mid].refcount++;
1252*d14d7d31Sis
1253*d14d7d31Sis mutex_exit(&conv_list_lock);
1254*d14d7d31Sis
1255*d14d7d31Sis kcd = (kiconv_t)kmem_alloc(sizeof (kiconv_data_t), KM_SLEEP);
1256*d14d7d31Sis kcd->handle = (void *)-1;
1257*d14d7d31Sis kcd->id = i;
1258*d14d7d31Sis
1259*d14d7d31Sis return (kcd);
1260*d14d7d31Sis }
1261*d14d7d31Sis
1262*d14d7d31Sis /*
1263*d14d7d31Sis * The following are the four "Committed" interfaces.
1264*d14d7d31Sis */
1265*d14d7d31Sis kiconv_t
kiconv_open(const char * tocode,const char * fromcode)1266*d14d7d31Sis kiconv_open(const char *tocode, const char *fromcode)
1267*d14d7d31Sis {
1268*d14d7d31Sis kiconv_t kcd;
1269*d14d7d31Sis size_t mid;
1270*d14d7d31Sis
1271*d14d7d31Sis kcd = check_and_load_conversions(tocode, fromcode);
1272*d14d7d31Sis if (kcd == (kiconv_t)-1)
1273*d14d7d31Sis return ((kiconv_t)-1);
1274*d14d7d31Sis
1275*d14d7d31Sis kcd->handle = (conv_list[kcd->id].open)();
1276*d14d7d31Sis if (kcd->handle == (void *)-1) {
1277*d14d7d31Sis /*
1278*d14d7d31Sis * If the conversion couldn't be opened for some reason,
1279*d14d7d31Sis * then, we unallocate the kcd and, more importantly, before
1280*d14d7d31Sis * that, we also decrease the module reference counter.
1281*d14d7d31Sis */
1282*d14d7d31Sis mid = conv_list[kcd->id].mid;
1283*d14d7d31Sis
1284*d14d7d31Sis mutex_enter(&conv_list_lock);
1285*d14d7d31Sis
1286*d14d7d31Sis if (module_list[mid].refcount > 0)
1287*d14d7d31Sis module_list[mid].refcount--;
1288*d14d7d31Sis
1289*d14d7d31Sis mutex_exit(&conv_list_lock);
1290*d14d7d31Sis
1291*d14d7d31Sis kmem_free((void *)kcd, sizeof (kiconv_data_t));
1292*d14d7d31Sis
1293*d14d7d31Sis return ((kiconv_t)-1);
1294*d14d7d31Sis }
1295*d14d7d31Sis
1296*d14d7d31Sis return (kcd);
1297*d14d7d31Sis }
1298*d14d7d31Sis
1299*d14d7d31Sis size_t
kiconv(kiconv_t kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1300*d14d7d31Sis kiconv(kiconv_t kcd, char **inbuf, size_t *inbytesleft,
1301*d14d7d31Sis char **outbuf, size_t *outbytesleft, int *errno)
1302*d14d7d31Sis {
1303*d14d7d31Sis /* Do some minimum checking on the kiconv conversion descriptor. */
1304*d14d7d31Sis if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconv == NULL) {
1305*d14d7d31Sis *errno = EBADF;
1306*d14d7d31Sis return ((size_t)-1);
1307*d14d7d31Sis }
1308*d14d7d31Sis
1309*d14d7d31Sis return ((conv_list[kcd->id].kiconv)(kcd->handle, inbuf, inbytesleft,
1310*d14d7d31Sis outbuf, outbytesleft, errno));
1311*d14d7d31Sis }
1312*d14d7d31Sis
1313*d14d7d31Sis int
kiconv_close(kiconv_t kcd)1314*d14d7d31Sis kiconv_close(kiconv_t kcd)
1315*d14d7d31Sis {
1316*d14d7d31Sis int ret;
1317*d14d7d31Sis size_t mid;
1318*d14d7d31Sis
1319*d14d7d31Sis if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].close == NULL)
1320*d14d7d31Sis return (EBADF);
1321*d14d7d31Sis
1322*d14d7d31Sis mid = conv_list[kcd->id].mid;
1323*d14d7d31Sis
1324*d14d7d31Sis ret = (conv_list[kcd->id].close)(kcd->handle);
1325*d14d7d31Sis
1326*d14d7d31Sis kmem_free((void *)kcd, sizeof (kiconv_data_t));
1327*d14d7d31Sis
1328*d14d7d31Sis mutex_enter(&conv_list_lock);
1329*d14d7d31Sis
1330*d14d7d31Sis /*
1331*d14d7d31Sis * While we maintain reference conter for each module, once loaded,
1332*d14d7d31Sis * we don't modunload from kiconv functions even if the counter
1333*d14d7d31Sis * reaches back to zero.
1334*d14d7d31Sis */
1335*d14d7d31Sis if (module_list[mid].refcount > 0)
1336*d14d7d31Sis module_list[mid].refcount--;
1337*d14d7d31Sis
1338*d14d7d31Sis mutex_exit(&conv_list_lock);
1339*d14d7d31Sis
1340*d14d7d31Sis return (ret);
1341*d14d7d31Sis }
1342*d14d7d31Sis
1343*d14d7d31Sis size_t
kiconvstr(const char * tocode,const char * fromcode,char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)1344*d14d7d31Sis kiconvstr(const char *tocode, const char *fromcode, char *inarray,
1345*d14d7d31Sis size_t *inlen, char *outarray, size_t *outlen, int flag, int *errno)
1346*d14d7d31Sis {
1347*d14d7d31Sis kiconv_t kcd;
1348*d14d7d31Sis size_t ret;
1349*d14d7d31Sis size_t mid;
1350*d14d7d31Sis
1351*d14d7d31Sis kcd = check_and_load_conversions(tocode, fromcode);
1352*d14d7d31Sis if (kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconvstr == NULL) {
1353*d14d7d31Sis *errno = EBADF;
1354*d14d7d31Sis return ((size_t)-1);
1355*d14d7d31Sis }
1356*d14d7d31Sis
1357*d14d7d31Sis mid = conv_list[kcd->id].mid;
1358*d14d7d31Sis
1359*d14d7d31Sis ret = (conv_list[kcd->id].kiconvstr)(inarray, inlen, outarray, outlen,
1360*d14d7d31Sis flag, errno);
1361*d14d7d31Sis
1362*d14d7d31Sis kmem_free((void *)kcd, sizeof (kiconv_data_t));
1363*d14d7d31Sis
1364*d14d7d31Sis mutex_enter(&conv_list_lock);
1365*d14d7d31Sis
1366*d14d7d31Sis if (module_list[mid].refcount > 0)
1367*d14d7d31Sis module_list[mid].refcount--;
1368*d14d7d31Sis
1369*d14d7d31Sis mutex_exit(&conv_list_lock);
1370*d14d7d31Sis
1371*d14d7d31Sis return (ret);
1372*d14d7d31Sis }
1373