xref: /openbsd/lib/libc/locale/mbrtoc16.c (revision d415bd75)
1 /*	$OpenBSD: mbrtoc16.c,v 1.1 2023/08/20 15:02:51 schwarze Exp $ */
2 /*
3  * Copyright (c) 2022 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <stdint.h>
19 #include <uchar.h>
20 #include <wchar.h>
21 
22 /*
23  * Keep this structure compatible with
24  * struct _utf8_state in the file citrus/citrus_utf8.c.
25  * In particular, only use values for the "want" field
26  * that do not collide with values used by the function
27  * _citrus_utf8_ctype_mbrtowc().
28  */
29 struct _utf16_state {
30 	wchar_t	ch;
31 	int	want;
32 };
33 
34 size_t
35 mbrtoc16(char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
36 {
37 	static mbstate_t	 mbs;
38 	struct _utf16_state	*us;
39 	size_t			 rv;
40 	wchar_t			 wc;
41 
42 	/*
43 	 * Fall back to a state object local to this function
44 	 * and do not use the fallback object in mbrtowc(3)
45 	 * because an application program might mix calls to mbrtowc(3)
46 	 * and mbrtoc16(3) decoding different strings, and they must
47 	 * not clobber each other's state.
48 	 */
49 	if (ps == NULL)
50 		ps = &mbs;
51 
52 	us = (struct _utf16_state *)ps;
53 
54 	/*
55 	 * Handle the special case of NULL input first such that
56 	 * a low surrogate left over from a previous call does not
57 	 * clobber an object pointed to by the pc16 argument.
58 	 */
59 	if (s == NULL) {
60 		s = "";
61 		n = 1;
62 		pc16 = NULL;
63 	}
64 
65 	/*
66 	 * If the previous call stored a high surrogate,
67 	 * store the corresponding low surrogate now
68 	 * and do not inspect any further input yet.
69 	 */
70 	if (us->want == (size_t)-3) {
71 		if (pc16 != NULL)
72 			*pc16 = 0xdc00 + (us->ch & 0x3ff);
73 		us->ch = 0;
74 		us->want = 0;
75 		return -3;
76 	}
77 
78 	/*
79 	 * Decode the multibyte character.
80 	 * All the mbrtowc(3) use cases can be reached from here,
81 	 * including continuing an imcomplete character started earlier,
82 	 * decoding a NUL character, a valid complete character,
83 	 * an incomplete character to be continued later,
84 	 * or a decoding error.
85 	 */
86 	rv = mbrtowc(&wc, s, n, ps);
87 
88 	if (rv < (size_t)-2) {
89 		/* A new character that is valid and complete. */
90 		if (wc > UINT16_MAX) {
91 			/* Store a high surrogate. */
92 			if (pc16 != NULL)
93 				*pc16 = 0xd7c0 + (wc >> 10);
94 			/* Remember that the low surrogate is pending. */
95 			us->ch = wc;
96 			us->want = -3;
97 		} else if (pc16 != NULL)
98 			/* Store a basic multilingual plane codepoint. */
99 			*pc16 = wc;
100 	}
101 	return rv;
102 }
103