xref: /illumos-gate/usr/src/lib/libc/port/locale/mbftowc.c (revision 6ea3c060)
1 /*
2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "lint.h"
28 #include <stdlib.h>
29 #include <string.h>
30 #include <wchar.h>
31 
32 /*
33  * This function is apparently referenced by parts of ON.  It is
34  * not intended for public API usage -- and it is not documented.
35  *
36  * The usage appears to be to consume bytes until a character is
37  * gathered, using a supplied function.   It reads exactly one
38  * character and returns the number of bytes in the multibyte string
39  * that were consumed.
40  *
41  * The string "s" is storage for the multibyte string, the
42  * wc will receive the interpreted character, the peek function
43  * obtains the next character (as an int so we can get EOF),
44  * and errorc is stuffed with the character that is responsible
45  * for a parse error, if any.
46  */
47 
48 int
49 _mbftowc(char *s, wchar_t *wc, int (*peek)(void), int *errorc)
50 {
51 	int		c;
52 	mbstate_t	mbs;
53 	char		*start = s;
54 	size_t		cons = 0;
55 
56 	for (;;) {
57 		c = peek();
58 		if (c < 0) {
59 			/* No bytes returned? */
60 			return (s - start);
61 		}
62 
63 		*s = (char)c;
64 		s++;
65 
66 		(void) memset(&mbs, 0, sizeof (mbs));
67 		cons = mbrtowc(wc, start, s - start, &mbs);
68 		if ((int)cons >= 0) {
69 			/* fully translated character */
70 			return (cons);
71 		}
72 		if (cons == (size_t)-2) {
73 			/* incomplete, recycle */
74 			continue;
75 		}
76 
77 		/*
78 		 * Parse error, don't consider the first character part
79 		 * of the error.
80 		 */
81 		s--;
82 		cons = (s - start);
83 		*errorc = c >= 0 ?  c : 0;
84 		break;
85 	}
86 
87 	return (cons);
88 }
89