1 /*	$NetBSD: citrus_ctype_template.h,v 1.14 2002/05/24 04:04:30 thorpej Exp $	*/
2 
3 /*-
4  * Copyright (c)2002 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*-
30  * Copyright (c) 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Paul Borman at Krystal Technologies.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  */
64 
65 
66 /*
67  * CAUTION: THIS IS NOT STANDALONE FILE
68  *
69  * function templates of ctype encoding handler for each encodings.
70  *
71  * you need to define the macros below:
72  *
73  *   _FUNCNAME(method) :
74  *   	It should convine the real function name for the method.
75  *      e.g. _FUNCNAME(mbrtowc) should be expanded to
76  *             _EUC_ctype_mbrtowc
77  *           for EUC locale.
78  *
79  *   _CEI_TO_STATE(cei, method) :
80  *     It should be expanded to the pointer of the method-internal state
81  *     structures.
82  *     e.g. _CEI_TO_STATE(cei, mbrtowc) might be expanded to
83  *             (cei)->states.s_mbrtowc
84  *     This structure may use if the function is called as
85  *           mbrtowc(&wc, s, n, NULL);
86  *     Such individual structures are needed by:
87  *           mblen
88  *           mbrlen
89  *           mbrtowc
90  *           mbtowc
91  *           mbsrtowcs
92  *           wcrtomb
93  *           wcsrtombs
94  *           wctomb
95  *     These need to be keeped in the ctype encoding information structure,
96  *     pointed by "cei".
97  *
98  *   _ENCODING_INFO :
99  *     It should be expanded to the name of the encoding information structure.
100  *     e.g. For EUC encoding, this macro is expanded to _EUCInfo.
101  *     Encoding information structure need to contain the common informations
102  *     for the codeset.
103  *
104  *   _ENCODING_STATE :
105  *     It should be expanded to the name of the encoding state structure.
106  *     e.g. For EUC encoding, this macro is expanded to _EUCState.
107  *     Encoding state structure need to contain the context-dependent states,
108  *     which are "unpacked-form" of mbstate_t type and keeped during sequent
109  *     calls of mb/wc functions,
110  *
111  *   _ENCODING_IS_STATE_DEPENDENT :
112  *     If the encoding is state dependent, this should be expanded to
113  *     non-zero integral value.  Otherwise, 0.
114  *
115  *   _STATE_NEEDS_EXPLICIT_INIT(ps) :
116  *     If the encoding state pointed by "ps" needs to be initialized
117  *     explicitly, return non-zero. Otherwize, 0.
118  *
119  */
120 
121 
122 /* prototypes */
123 
124 __BEGIN_DECLS
125 static void _FUNCNAME(init_state)(_ENCODING_INFO * __restrict,
126 				  _ENCODING_STATE * __restrict);
127 static void _FUNCNAME(pack_state)(_ENCODING_INFO * __restrict,
128 				  void * __restrict,
129 				  const _ENCODING_STATE * __restrict);
130 static void _FUNCNAME(unpack_state)(_ENCODING_INFO * __restrict,
131 				    _ENCODING_STATE * __restrict,
132 				    const void * __restrict);
133 
134 
135 /*
136  * standard form of mbrtowc_priv.
137  *
138  * note (differences from real mbrtowc):
139  *   - 3rd parameter is not "const char *s" but "const char **s".
140  *     after the call of the function, *s will point the first byte of
141  *     the next character.
142  *   - additional 4th parameter is the size of src buffer.
143  *   - 5th parameter is unpacked encoding-dependent state structure.
144  *   - additional 6th parameter is the storage to be stored
145  *     the return value in the real mbrtowc context.
146  *   - return value means "errno" in the real mbrtowc context.
147  */
148 
149 static int _FUNCNAME(mbrtowc_priv)(_ENCODING_INFO * __restrict,
150 				   wchar_t * __restrict,
151 				   const char ** __restrict,
152 				   size_t, _ENCODING_STATE * __restrict,
153 				   size_t * __restrict);
154 
155 /*
156  * standard form of wcrtomb_priv.
157  *
158  * note (differences from real wcrtomb):
159  *   - additional 3th parameter is the size of src buffer.
160  *   - 5th parameter is unpacked encoding-dependent state structure.
161  *   - additional 6th parameter is the storage to be stored
162  *     the return value in the real mbrtowc context.
163  *   - return value means "errno" in the real wcrtomb context.
164  */
165 
166 static int _FUNCNAME(wcrtomb_priv)(_ENCODING_INFO * __restrict,
167 				   char * __restrict, size_t, wchar_t,
168 				   _ENCODING_STATE * __restrict,
169 				   size_t * __restrict);
170 __END_DECLS
171 
172 
173 /*
174  * macros
175  */
176 
177 #define _TO_CEI(_cl_)	((_CTYPE_INFO*)(_cl_))
178 
179 
180 /*
181  * templates
182  */
183 
184 /* internal routines */
185 
186 static __inline int
187 _FUNCNAME(mbtowc_priv)(_ENCODING_INFO * __restrict ei,
188 		       wchar_t * __restrict pwc,  const char * __restrict s,
189 		       size_t n, _ENCODING_STATE * __restrict psenc,
190 		       int * __restrict nresult)
191 {
192 	_ENCODING_STATE state;
193 	size_t nr;
194 	int err = 0;
195 
196 	_DIAGASSERT(ei != NULL);
197 	_DIAGASSERT(psenc != NULL);
198 
199 	if (s == NULL) {
200 		*nresult = _ENCODING_IS_STATE_DEPENDENT;
201 		return (0);
202 	}
203 
204 	state = *psenc;
205 	err = _FUNCNAME(mbrtowc_priv)(ei, pwc, (const char **)&s, n, psenc, &nr);
206 	if (err) {
207 		*nresult = -1;
208 		return (err);
209 	}
210 	if (nr==(size_t)-2) {
211 		*psenc = state;
212 		*nresult = -1;
213 		return (EILSEQ);
214 	}
215 
216 	*nresult = (int)nr;
217 
218 	return (0);
219 }
220 
221 static int
222 _FUNCNAME(mbsrtowcs_priv)(_ENCODING_INFO * __restrict ei,
223 			  wchar_t * __restrict pwcs,
224 			  const char ** __restrict s,
225 			  size_t n, _ENCODING_STATE * __restrict psenc,
226 			  size_t * __restrict nresult)
227 {
228 	int err, cnt;
229 	size_t siz;
230 	const char *s0;
231 	size_t mbcurmax;
232 
233 	_DIAGASSERT(nresult != 0);
234 	_DIAGASSERT(ei != NULL);
235 	_DIAGASSERT(psenc != NULL);
236 
237 	if (s == NULL || *s == NULL || n==0) {
238 		*nresult = (size_t)-1;
239 		return EILSEQ;
240 	}
241 
242 	if (!pwcs)
243 		n = 1;
244 
245 	cnt = 0;
246 	s0 = *s; /* to keep *s unchanged for now, use copy instead. */
247 	mbcurmax = _ENCODING_MB_CUR_MAX(ei);
248 	while (n > 0) {
249 		err = _FUNCNAME(mbrtowc_priv)(ei, pwcs, &s0, mbcurmax,
250 					      psenc, &siz);
251 		if (siz == (size_t)-2)
252 			err = EILSEQ;
253 		if (err) {
254 			cnt = -1;
255 			goto bye;
256 		}
257 		switch (siz) {
258 		case 0:
259 			if (pwcs) {
260 				_FUNCNAME(init_state)(ei, psenc);
261 			}
262 			s0 = 0;
263 			goto bye;
264 		default:
265 			if (pwcs) {
266 				pwcs++;
267 				n--;
268 			}
269 			cnt++;
270 			break;
271 		}
272 	}
273 bye:
274 	if (pwcs)
275 		*s = s0;
276 
277 	*nresult = (size_t)cnt;
278 
279 	return err;
280 }
281 
282 
283 static int
284 _FUNCNAME(wcsrtombs_priv)(_ENCODING_INFO * __restrict ei, char * __restrict s,
285 			  const wchar_t ** __restrict pwcs,
286 			  size_t n, _ENCODING_STATE * __restrict psenc,
287 			  size_t * __restrict nresult)
288 {
289 	int cnt = 0, err;
290 	char buf[MB_LEN_MAX];
291 	size_t siz;
292 	const wchar_t* pwcs0;
293 #if _ENCODING_IS_STATE_DEPENDENT
294 	_ENCODING_STATE state;
295 #endif
296 
297 	pwcs0 = *pwcs;
298 
299 	if (!s)
300 		n = 1;
301 
302 	while (n > 0) {
303 #if _ENCODING_IS_STATE_DEPENDENT
304 		state = *psenc;
305 #endif
306 		err = _FUNCNAME(wcrtomb_priv)(ei, buf, sizeof(buf),
307 					      *pwcs0, psenc, &siz);
308 		if (siz == (size_t)-1) {
309 			*nresult = siz;
310 			return (err);
311 		}
312 
313 		if (s) {
314 			if (n < siz) {
315 #if _ENCODING_IS_STATE_DEPENDENT
316 				*psenc = state;
317 #endif
318 				break;
319 			}
320 			memcpy(s, buf, siz);
321 			s += siz;
322 			n -= siz;
323 		}
324 		cnt += siz;
325 		if (!*pwcs0) {
326 			if (s) {
327 				_FUNCNAME(init_state)(ei, psenc);
328 			}
329 			pwcs0 = 0;
330 			cnt--; /* don't include terminating null */
331 			break;
332 		}
333 		pwcs0++;
334 	}
335 	if (s)
336 		*pwcs = pwcs0;
337 
338 	*nresult = (size_t)cnt;
339 	return (0);
340 }
341 
342 
343 /* ----------------------------------------------------------------------
344  * templates for public functions
345  */
346 
347 #define _RESTART_BEGIN(_func_, _cei_, _pspriv_, _pse_)			\
348 do {									\
349 	_ENCODING_STATE _state;						\
350 	do {								\
351 		if (_pspriv_ == NULL) {					\
352 			_pse_ = &_CEI_TO_STATE(_cei_, _func_);		\
353 			if (_STATE_NEEDS_EXPLICIT_INIT(_pse_))		\
354 			    _FUNCNAME(init_state)(_CEI_TO_EI(_cei_),	\
355 							psenc);		\
356 		} else {						\
357 			_pse_ = &_state;				\
358 			_FUNCNAME(unpack_state)(_CEI_TO_EI(_cei_),	\
359 						_pse_, _pspriv_);	\
360 		}							\
361 	} while (/*CONSTCOND*/0)
362 
363 #define _RESTART_END(_func_, _cei_, _pspriv_, _pse_)			\
364 	if (_pspriv_ != NULL) {						\
365 		_FUNCNAME(pack_state)(_CEI_TO_EI(_cei_), _pspriv_,	\
366 				      _pse_);				\
367 	}								\
368 } while (/*CONSTCOND*/0)
369 
370 int
371 _FUNCNAME(ctype_getops)(_citrus_ctype_ops_rec_t *ops, size_t lenops,
372 			u_int32_t expected_version)
373 {
374 	if (expected_version<_CITRUS_CTYPE_ABI_VERSION || lenops<sizeof(*ops))
375 		return (EINVAL);
376 
377 	memcpy(ops, &_FUNCNAME(ctype_ops), sizeof(_FUNCNAME(ctype_ops)));
378 
379 	return (0);
380 }
381 
382 static int
383 _FUNCNAME(ctype_init)(void ** __restrict cl,
384 		      void * __restrict var, size_t lenvar, size_t lenps)
385 {
386 	_CTYPE_INFO *cei;
387 
388 	_DIAGASSERT(cl != NULL);
389 
390 	/* sanity check to avoid overruns */
391 	if (sizeof(_ENCODING_STATE) > lenps)
392 		return (EINVAL);
393 
394 	cei = calloc(1, sizeof(_CTYPE_INFO));
395 	if (cei == NULL)
396 		return (ENOMEM);
397 
398 	*cl = (void *)cei;
399 
400 	return _FUNCNAME(stdencoding_init)(_CEI_TO_EI(cei), var, lenvar);
401 }
402 
403 static void
404 _FUNCNAME(ctype_uninit)(void *cl)
405 {
406 	if (cl) {
407 		_FUNCNAME(stdencoding_uninit)(_CEI_TO_EI(_TO_CEI(cl)));
408 		free(cl);
409 	}
410 }
411 
412 static unsigned
413 /*ARGSUSED*/
414 _FUNCNAME(ctype_get_mb_cur_max)(void *cl)
415 {
416 	return _ENCODING_MB_CUR_MAX(_CEI_TO_EI(_TO_CEI(cl)));
417 }
418 
419 static int
420 _FUNCNAME(ctype_mblen)(void * __restrict cl,
421 		       const char * __restrict s, size_t n,
422 		       int * __restrict nresult)
423 {
424 
425 	_DIAGASSERT(cl != NULL);
426 
427 	return _FUNCNAME(mbtowc_priv)(_CEI_TO_EI(_TO_CEI(cl)), NULL, s, n,
428 				      &_CEI_TO_STATE(_TO_CEI(cl), mblen),
429 				      nresult);
430 }
431 
432 static int
433 _FUNCNAME(ctype_mbrlen)(void * __restrict cl, const char * __restrict s,
434 			size_t n, void * __restrict pspriv,
435 			size_t * __restrict nresult)
436 {
437 	_ENCODING_STATE *psenc;
438 	int err = 0;
439 
440 	_DIAGASSERT(cl != NULL);
441 
442 	_RESTART_BEGIN(mbrlen, _TO_CEI(cl), pspriv, psenc);
443 	if (s == NULL) {
444 		_FUNCNAME(init_state)(_CEI_TO_EI(_TO_CEI(cl)), psenc);
445 		*nresult = 0;
446 	} else {
447 		err = _FUNCNAME(mbrtowc_priv)(
448 			cl, NULL, (const char **)&s, n, (void *)psenc, nresult);
449 	}
450 	_RESTART_END(mbrlen, _TO_CEI(cl), pspriv, psenc);
451 
452 	return (err);
453 }
454 
455 static int
456 _FUNCNAME(ctype_mbrtowc)(void * __restrict cl, wchar_t * __restrict pwc,
457 			 const char * __restrict s, size_t n,
458 			 void * __restrict pspriv, size_t * __restrict nresult)
459 {
460 	_ENCODING_STATE *psenc;
461 	int err = 0;
462 
463 	_DIAGASSERT(cl != NULL);
464 
465 	_RESTART_BEGIN(mbrtowc, _TO_CEI(cl), pspriv, psenc);
466 	if (s == NULL) {
467 		_FUNCNAME(init_state)(_CEI_TO_EI(_TO_CEI(cl)), psenc);
468 		*nresult = 0;
469 	} else {
470 		err = _FUNCNAME(mbrtowc_priv)(
471 			cl, pwc, (const char **)&s, n, (void *)psenc, nresult);
472 	}
473 	_RESTART_END(mbrtowc, _TO_CEI(cl), pspriv, psenc);
474 
475 	return (err);
476 }
477 
478 static int
479 /*ARGSUSED*/
480 _FUNCNAME(ctype_mbsinit)(void * __restrict cl, const void * __restrict pspriv,
481 			 int * __restrict nresult)
482 {
483 	_ENCODING_STATE state;
484 
485 	if (pspriv == NULL) {
486 		*nresult = 1;
487 		return (0);
488 	}
489 
490 	_FUNCNAME(unpack_state)(_CEI_TO_EI(_TO_CEI(cl)), &state, pspriv);
491 
492 	*nresult = (state.chlen == 0); /* XXX: FIXME */
493 
494 	return (0);
495 }
496 
497 static int
498 _FUNCNAME(ctype_mbsrtowcs)(void * __restrict cl, wchar_t * __restrict pwcs,
499 			   const char ** __restrict s, size_t n,
500 			   void * __restrict pspriv,
501 			   size_t * __restrict nresult)
502 {
503 	_ENCODING_STATE *psenc;
504 	int err = 0;
505 
506 	_DIAGASSERT(cl != NULL);
507 
508 	_RESTART_BEGIN(mbsrtowcs, _TO_CEI(cl), pspriv, psenc);
509 	err = _FUNCNAME(mbsrtowcs_priv)(cl, pwcs, s, n, psenc, nresult);
510 	_RESTART_END(mbsrtowcs, _TO_CEI(cl), pspriv, psenc);
511 
512 	return (err);
513 }
514 
515 static int
516 _FUNCNAME(ctype_mbstowcs)(void * __restrict cl, wchar_t * __restrict pwcs,
517 			  const char * __restrict s, size_t n,
518 			  size_t * __restrict nresult)
519 {
520 	int err;
521 	_ENCODING_STATE state;
522 
523 	_DIAGASSERT(cl != NULL);
524 
525 	_FUNCNAME(init_state)(_CEI_TO_EI(_TO_CEI(cl)), &state);
526 	err = _FUNCNAME(mbsrtowcs_priv)(cl, pwcs, (const char **)&s, n, &state, nresult);
527 	if (*nresult == (size_t)-2) {
528 		err = EILSEQ;
529 		*nresult = (size_t)-1;
530 	}
531 
532 	return (err);
533 }
534 
535 static int
536 _FUNCNAME(ctype_mbtowc)(void * __restrict cl, wchar_t * __restrict pwc,
537 			const char * __restrict s, size_t n,
538 			int * __restrict nresult)
539 {
540 
541 	_DIAGASSERT(cl != NULL);
542 
543 	return _FUNCNAME(mbtowc_priv)(cl, pwc, s, n,
544 				      &_CEI_TO_STATE(_TO_CEI(cl), mbtowc),
545 				      nresult);
546 }
547 
548 static int
549 _FUNCNAME(ctype_wcrtomb)(void * __restrict cl, char * __restrict s, wchar_t wc,
550 			 void * __restrict pspriv, size_t * __restrict nresult)
551 {
552 	_ENCODING_STATE *psenc;
553 	int err = 0;
554 
555 	_DIAGASSERT(cl != NULL);
556 
557 	_RESTART_BEGIN(wcrtomb, _TO_CEI(cl), pspriv, psenc);
558 	err = _FUNCNAME(wcrtomb_priv)(_CEI_TO_EI(_TO_CEI(cl)), s,
559 			    _ENCODING_MB_CUR_MAX(_CEI_TO_EI(_TO_CEI(cl))),
560 			    wc, psenc, nresult);
561 	_RESTART_END(wcrtomb, _TO_CEI(cl), pspriv, psenc);
562 
563 	return err;
564 }
565 
566 static int
567 /*ARGSUSED*/
568 _FUNCNAME(ctype_wcsrtombs)(void * __restrict cl, char * __restrict s,
569 			   const wchar_t ** __restrict pwcs, size_t n,
570 			   void * __restrict pspriv,
571 			   size_t * __restrict nresult)
572 {
573 	_ENCODING_STATE *psenc;
574 	int err = 0;
575 
576 	_DIAGASSERT(cl != NULL);
577 
578 	_RESTART_BEGIN(wcsrtombs, _TO_CEI(cl), pspriv, psenc);
579 	err = _FUNCNAME(wcsrtombs_priv)(cl, s, pwcs, n, psenc, nresult);
580 	_RESTART_END(wcsrtombs, _TO_CEI(cl), pspriv, psenc);
581 
582 	return err;
583 }
584 
585 static int
586 /*ARGSUSED*/
587 _FUNCNAME(ctype_wcstombs)(void * __restrict cl, char * __restrict s,
588 			  const wchar_t * __restrict pwcs, size_t n,
589 			  size_t * __restrict nresult)
590 {
591 	_ENCODING_STATE state;
592 	int err;
593 
594 	_DIAGASSERT(cl != NULL);
595 
596 	_FUNCNAME(init_state)(_CEI_TO_EI(_TO_CEI(cl)), &state);
597 	err = _FUNCNAME(wcsrtombs_priv)(cl, s, (const wchar_t **)&pwcs, n,
598 					&state, nresult);
599 
600 	return err;
601 }
602 
603 static int
604 _FUNCNAME(ctype_wctomb)(void * __restrict cl, char * __restrict s, wchar_t wc,
605 			int * __restrict nresult)
606 {
607 	size_t nr;
608 	int err = 0;
609 	char s0[MB_LEN_MAX];
610 
611 	_DIAGASSERT(cl != NULL);
612 
613 	if (s==NULL)
614 		s = s0;
615 
616 	err = _FUNCNAME(wcrtomb_priv)(cl, s,
617 		      _ENCODING_MB_CUR_MAX(_CEI_TO_EI(_TO_CEI(cl))),
618 		      wc, &_CEI_TO_STATE(_TO_CEI(cl), wctomb), &nr);
619 	*nresult = (int)nr;
620 
621 	return 0;
622 }
623