1 /* $OpenBSD: mbrtoc16.c,v 1.1 2023/08/20 15:02:51 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2022 Ingo Schwarze <schwarze@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <stdint.h> 19 #include <uchar.h> 20 #include <wchar.h> 21 22 /* 23 * Keep this structure compatible with 24 * struct _utf8_state in the file citrus/citrus_utf8.c. 25 * In particular, only use values for the "want" field 26 * that do not collide with values used by the function 27 * _citrus_utf8_ctype_mbrtowc(). 28 */ 29 struct _utf16_state { 30 wchar_t ch; 31 int want; 32 }; 33 34 size_t 35 mbrtoc16(char16_t *pc16, const char *s, size_t n, mbstate_t *ps) 36 { 37 static mbstate_t mbs; 38 struct _utf16_state *us; 39 size_t rv; 40 wchar_t wc; 41 42 /* 43 * Fall back to a state object local to this function 44 * and do not use the fallback object in mbrtowc(3) 45 * because an application program might mix calls to mbrtowc(3) 46 * and mbrtoc16(3) decoding different strings, and they must 47 * not clobber each other's state. 48 */ 49 if (ps == NULL) 50 ps = &mbs; 51 52 us = (struct _utf16_state *)ps; 53 54 /* 55 * Handle the special case of NULL input first such that 56 * a low surrogate left over from a previous call does not 57 * clobber an object pointed to by the pc16 argument. 58 */ 59 if (s == NULL) { 60 s = ""; 61 n = 1; 62 pc16 = NULL; 63 } 64 65 /* 66 * If the previous call stored a high surrogate, 67 * store the corresponding low surrogate now 68 * and do not inspect any further input yet. 69 */ 70 if (us->want == (size_t)-3) { 71 if (pc16 != NULL) 72 *pc16 = 0xdc00 + (us->ch & 0x3ff); 73 us->ch = 0; 74 us->want = 0; 75 return -3; 76 } 77 78 /* 79 * Decode the multibyte character. 80 * All the mbrtowc(3) use cases can be reached from here, 81 * including continuing an imcomplete character started earlier, 82 * decoding a NUL character, a valid complete character, 83 * an incomplete character to be continued later, 84 * or a decoding error. 85 */ 86 rv = mbrtowc(&wc, s, n, ps); 87 88 if (rv < (size_t)-2) { 89 /* A new character that is valid and complete. */ 90 if (wc > UINT16_MAX) { 91 /* Store a high surrogate. */ 92 if (pc16 != NULL) 93 *pc16 = 0xd7c0 + (wc >> 10); 94 /* Remember that the low surrogate is pending. */ 95 us->ch = wc; 96 us->want = -3; 97 } else if (pc16 != NULL) 98 /* Store a basic multilingual plane codepoint. */ 99 *pc16 = wc; 100 } 101 return rv; 102 } 103