1 /*-------------------------------------------------------------------------
2    c16stombs.c - convert a wide character string to a multibyte string
3 
4    Copyright (C) 2018, Philipp Klaus Krause, krauseph@informatik.uni-freiburg.de
5 
6    This library is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by the
8    Free Software Foundation; either version 2, or (at your option) any
9    later version.
10 
11    This library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this library; see the file COPYING. If not, write to the
18    Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
19    MA 02110-1301, USA.
20 
21    As a special exception, if you link this library with other files,
22    some of which are compiled with SDCC, to produce an executable,
23    this library does not by itself cause the resulting executable to
24    be covered by the GNU General Public License. This exception does
25    not however invalidate any other reasons why the executable file
26    might be covered by the GNU General Public License.
27 -------------------------------------------------------------------------*/
28 
29 #include <uchar.h>
30 
31 #include <stdlib.h>
32 #include <string.h>
33 #include <limits.h>
34 #include <wchar.h>
35 
36 #ifndef __STDC_UTF_16__
37 #error Encoding for char16_t strings must be UTF-16
38 #endif
39 #ifndef __STDC_ISO_10646__
40 #error Encoding for wchar_t strings must be UCS-4
41 #endif
42 
43 // This implementation assumes that wctomb() does not keep internal state.
__c16stombs(char * restrict s,const char16_t * restrict c16s,size_t n)44 size_t __c16stombs(char *restrict s, const char16_t *restrict c16s, size_t n)
45 {
46 	size_t m = 0;
47 	char b[MB_LEN_MAX];
48 
49 	for(;;)
50 	{
51 		int l;
52 		wchar_t codepoint;
53 
54 		if(c16s[0] < 0xd800 || c16s[0] >= 0xe000) // Basic multilingual plane
55 		{
56 			codepoint = c16s[0];
57 			c16s++;
58 		}
59                 else if(c16s[0] > 0xdbff || c16s[1] < 0xdc00 || c16s[1] > 0xdfff) // Unpaired surrogate
60 			return(-1);
61 		else
62 		{
63 			codepoint = ((unsigned long)(c16s[0]) << 10) -(0xd800ul << 10) + (unsigned long)(c16s[1]) - 0xdc00ul + 0x10000ul;
64 			c16s += 2;
65 		}
66 
67 		if(!codepoint)
68 		{
69 			if(m < n)
70 				*s = 0;
71 			break;
72 		}
73 
74 		l = wctomb(b, codepoint);
75 
76 		if(l < 0)
77 			return(-1);
78 
79 		if(m + l > n)
80 			break;
81 
82 		memcpy(s, b, l);
83 		s += l;
84 		m += l;
85 	}
86 
87 	return(m);
88 }
89 
90