1 /*
2 	utf.c (13.09.09)
3 	exFAT file system implementation library.
4 
5 	Free exFAT implementation.
6 	Copyright (C) 2010-2018  Andrew Nayenko
7 
8 	This program is free software; you can redistribute it and/or modify
9 	it under the terms of the GNU General Public License as published by
10 	the Free Software Foundation, either version 2 of the License, or
11 	(at your option) any later version.
12 
13 	This program is distributed in the hope that it will be useful,
14 	but WITHOUT ANY WARRANTY; without even the implied warranty of
15 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 	GNU General Public License for more details.
17 
18 	You should have received a copy of the GNU General Public License along
19 	with this program; if not, write to the Free Software Foundation, Inc.,
20 	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 
23 #include "exfat.h"
24 #include <errno.h>
25 
wchar_to_utf8(char * output,wchar_t wc,size_t outsize)26 static char* wchar_to_utf8(char* output, wchar_t wc, size_t outsize)
27 {
28 	if (wc <= 0x7f)
29 	{
30 		if (outsize < 1)
31 			return NULL;
32 		*output++ = (char) wc;
33 	}
34 	else if (wc <= 0x7ff)
35 	{
36 		if (outsize < 2)
37 			return NULL;
38 		*output++ = 0xc0 | (wc >> 6);
39 		*output++ = 0x80 | (wc & 0x3f);
40 	}
41 	else if (wc <= 0xffff)
42 	{
43 		if (outsize < 3)
44 			return NULL;
45 		*output++ = 0xe0 | (wc >> 12);
46 		*output++ = 0x80 | ((wc >> 6) & 0x3f);
47 		*output++ = 0x80 | (wc & 0x3f);
48 	}
49 	else if (wc <= 0x1fffff)
50 	{
51 		if (outsize < 4)
52 			return NULL;
53 		*output++ = 0xf0 | (wc >> 18);
54 		*output++ = 0x80 | ((wc >> 12) & 0x3f);
55 		*output++ = 0x80 | ((wc >> 6) & 0x3f);
56 		*output++ = 0x80 | (wc & 0x3f);
57 	}
58 	else if (wc <= 0x3ffffff)
59 	{
60 		if (outsize < 5)
61 			return NULL;
62 		*output++ = 0xf8 | (wc >> 24);
63 		*output++ = 0x80 | ((wc >> 18) & 0x3f);
64 		*output++ = 0x80 | ((wc >> 12) & 0x3f);
65 		*output++ = 0x80 | ((wc >> 6) & 0x3f);
66 		*output++ = 0x80 | (wc & 0x3f);
67 	}
68 	else if (wc <= 0x7fffffff)
69 	{
70 		if (outsize < 6)
71 			return NULL;
72 		*output++ = 0xfc | (wc >> 30);
73 		*output++ = 0x80 | ((wc >> 24) & 0x3f);
74 		*output++ = 0x80 | ((wc >> 18) & 0x3f);
75 		*output++ = 0x80 | ((wc >> 12) & 0x3f);
76 		*output++ = 0x80 | ((wc >> 6) & 0x3f);
77 		*output++ = 0x80 | (wc & 0x3f);
78 	}
79 	else
80 		return NULL;
81 
82 	return output;
83 }
84 
utf16_to_wchar(const le16_t * input,wchar_t * wc,size_t insize)85 static const le16_t* utf16_to_wchar(const le16_t* input, wchar_t* wc,
86 		size_t insize)
87 {
88 	if ((le16_to_cpu(input[0]) & 0xfc00) == 0xd800)
89 	{
90 		if (insize < 2 || (le16_to_cpu(input[1]) & 0xfc00) != 0xdc00)
91 			return NULL;
92 		*wc = ((wchar_t) (le16_to_cpu(input[0]) & 0x3ff) << 10);
93 		*wc |= (le16_to_cpu(input[1]) & 0x3ff);
94 		*wc += 0x10000;
95 		return input + 2;
96 	}
97 	else
98 	{
99 		*wc = le16_to_cpu(*input);
100 		return input + 1;
101 	}
102 }
103 
utf16_to_utf8(char * output,const le16_t * input,size_t outsize,size_t insize)104 int utf16_to_utf8(char* output, const le16_t* input, size_t outsize,
105 		size_t insize)
106 {
107 	const le16_t* inp = input;
108 	char* outp = output;
109 	wchar_t wc;
110 
111 	while (inp - input < insize)
112 	{
113 		inp = utf16_to_wchar(inp, &wc, insize - (inp - input));
114 		if (inp == NULL)
115 		{
116 			exfat_error("illegal UTF-16 sequence");
117 			return -EILSEQ;
118 		}
119 		outp = wchar_to_utf8(outp, wc, outsize - (outp - output));
120 		if (outp == NULL)
121 		{
122 			exfat_error("name is too long");
123 			return -ENAMETOOLONG;
124 		}
125 		if (wc == 0)
126 			return 0;
127 	}
128 	if (outp - output >= outsize)
129 	{
130 		exfat_error("name is too long");
131 		return -ENAMETOOLONG;
132 	}
133 	*outp = '\0';
134 	return 0;
135 }
136 
utf8_to_wchar(const char * input,wchar_t * wc,size_t insize)137 static const char* utf8_to_wchar(const char* input, wchar_t* wc,
138 		size_t insize)
139 {
140 	if ((input[0] & 0x80) == 0 && insize >= 1)
141 	{
142 		*wc = (wchar_t) input[0];
143 		return input + 1;
144 	}
145 	if ((input[0] & 0xe0) == 0xc0 && insize >= 2)
146 	{
147 		*wc = (((wchar_t) input[0] & 0x1f) << 6) |
148 		       ((wchar_t) input[1] & 0x3f);
149 		return input + 2;
150 	}
151 	if ((input[0] & 0xf0) == 0xe0 && insize >= 3)
152 	{
153 		*wc = (((wchar_t) input[0] & 0x0f) << 12) |
154 		      (((wchar_t) input[1] & 0x3f) << 6) |
155 		       ((wchar_t) input[2] & 0x3f);
156 		return input + 3;
157 	}
158 	if ((input[0] & 0xf8) == 0xf0 && insize >= 4)
159 	{
160 		*wc = (((wchar_t) input[0] & 0x07) << 18) |
161 		      (((wchar_t) input[1] & 0x3f) << 12) |
162 		      (((wchar_t) input[2] & 0x3f) << 6) |
163 		       ((wchar_t) input[3] & 0x3f);
164 		return input + 4;
165 	}
166 	if ((input[0] & 0xfc) == 0xf8 && insize >= 5)
167 	{
168 		*wc = (((wchar_t) input[0] & 0x03) << 24) |
169 		      (((wchar_t) input[1] & 0x3f) << 18) |
170 		      (((wchar_t) input[2] & 0x3f) << 12) |
171 		      (((wchar_t) input[3] & 0x3f) << 6) |
172 		       ((wchar_t) input[4] & 0x3f);
173 		return input + 5;
174 	}
175 	if ((input[0] & 0xfe) == 0xfc && insize >= 6)
176 	{
177 		*wc = (((wchar_t) input[0] & 0x01) << 30) |
178 		      (((wchar_t) input[1] & 0x3f) << 24) |
179 		      (((wchar_t) input[2] & 0x3f) << 18) |
180 		      (((wchar_t) input[3] & 0x3f) << 12) |
181 		      (((wchar_t) input[4] & 0x3f) << 6) |
182 		       ((wchar_t) input[5] & 0x3f);
183 		return input + 6;
184 	}
185 	return NULL;
186 }
187 
wchar_to_utf16(le16_t * output,wchar_t wc,size_t outsize)188 static le16_t* wchar_to_utf16(le16_t* output, wchar_t wc, size_t outsize)
189 {
190 	if (wc <= 0xffff) /* if character is from BMP */
191 	{
192 		if (outsize == 0)
193 			return NULL;
194 		output[0] = cpu_to_le16(wc);
195 		return output + 1;
196 	}
197 	if (outsize < 2)
198 		return NULL;
199 	wc -= 0x10000;
200 	output[0] = cpu_to_le16(0xd800 | ((wc >> 10) & 0x3ff));
201 	output[1] = cpu_to_le16(0xdc00 | (wc & 0x3ff));
202 	return output + 2;
203 }
204 
utf8_to_utf16(le16_t * output,const char * input,size_t outsize,size_t insize)205 int utf8_to_utf16(le16_t* output, const char* input, size_t outsize,
206 		size_t insize)
207 {
208 	const char* inp = input;
209 	le16_t* outp = output;
210 	wchar_t wc;
211 
212 	while (inp - input < insize)
213 	{
214 		inp = utf8_to_wchar(inp, &wc, insize - (inp - input));
215 		if (inp == NULL)
216 		{
217 			exfat_error("illegal UTF-8 sequence");
218 			return -EILSEQ;
219 		}
220 		outp = wchar_to_utf16(outp, wc, outsize - (outp - output));
221 		if (outp == NULL)
222 		{
223 			exfat_error("name is too long");
224 			return -ENAMETOOLONG;
225 		}
226 		if (wc == 0)
227 			break;
228 	}
229 	if (outp - output >= outsize)
230 	{
231 		exfat_error("name is too long");
232 		return -ENAMETOOLONG;
233 	}
234 	*outp = cpu_to_le16(0);
235 	return 0;
236 }
237 
utf16_length(const le16_t * str)238 size_t utf16_length(const le16_t* str)
239 {
240 	size_t i = 0;
241 
242 	while (le16_to_cpu(str[i]))
243 		i++;
244 	return i;
245 }
246