1 /*
2 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
7 * Reserved. This file contains Original Code and/or Modifications of
8 * Original Code as defined in and that are subject to the Apple Public
9 * Source License Version 1.0 (the 'License'). You may not use this file
10 * except in compliance with the License. Please obtain a copy of the
11 * License at http://www.apple.com/publicsource and read it before using
12 * this file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
19 * License for the specific language governing rights and limitations
20 * under the License."
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24 /* CSTYLED */
25 /*
26 * @(#)charsets.c *
27 * (c) 2004 Apple Computer, Inc. All Rights Reserved
28 *
29 *
30 * charsets.c -- Routines converting between UTF-8, 16-bit
31 * little-endian Unicode, and various Windows
32 * code pages.
33 *
34 * MODIFICATION HISTORY:
35 * 28-Nov-2004 Guy Harris New today
36 */
37
38 #include <stdlib.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <errno.h>
43 #include <iconv.h>
44 #include <langinfo.h>
45 #include <strings.h>
46 #include <libintl.h>
47
48 #include <sys/isa_defs.h>
49 #include <netsmb/smb_lib.h>
50 #include <netsmb/mchain.h>
51
52 #include "charsets.h"
53
54 /*
55 * On Solaris, we will need to do some rewriting to use our iconv
56 * routines for the conversions. For now, we're effectively
57 * stubbing out code, leaving the details of what happens on
58 * Darwin in case it's useful as a guide later.
59 */
60
61 static unsigned
xtoi(char u)62 xtoi(char u)
63 {
64 if (isdigit(u))
65 return (u - '0');
66 else if (islower(u))
67 return (10 + u - 'a');
68 else if (isupper(u))
69 return (10 + u - 'A');
70 return (16);
71 }
72
73
74 /*
75 * Removes the "%" escape sequences from a URL component.
76 * See IETF RFC 2396.
77 */
78 char *
unpercent(char * component)79 unpercent(char *component)
80 {
81 char c, *s;
82 unsigned hi, lo;
83
84 if (component == NULL)
85 return (component);
86
87 for (s = component; (c = *s) != 0; s++) {
88 if (c != '%')
89 continue;
90 if ((hi = xtoi(s[1])) > 15 || (lo = xtoi(s[2])) > 15)
91 continue; /* ignore invalid escapes */
92 s[0] = hi*16 + lo;
93 /*
94 * This was strcpy(s + 1, s + 3);
95 * But nowadays leftward overlapping copies are
96 * officially undefined in C. Ours seems to
97 * work or not depending upon alignment.
98 */
99 memmove(s+1, s+3, strlen(s+3) + 1);
100 }
101 return (component);
102 }
103
104 /* BEGIN CSTYLED */
105 #ifdef NOTPORTED
106 static CFStringEncoding
get_windows_encoding_equivalent(void)107 get_windows_encoding_equivalent( void )
108 {
109
110 CFStringEncoding encoding;
111 uint32_t index,region;
112
113 /* important! use root ID so you can read the config file! */
114 seteuid(eff_uid);
115 __CFStringGetInstallationEncodingAndRegion(&index,®ion);
116 seteuid(real_uid);
117
118 switch ( index )
119 {
120 case kCFStringEncodingMacRoman:
121 if (region) /* anything nonzero is not US */
122 encoding = kCFStringEncodingDOSLatin1;
123 else /* US region */
124 encoding = kCFStringEncodingDOSLatinUS;
125 break;
126
127 case kCFStringEncodingMacJapanese:
128 encoding = kCFStringEncodingDOSJapanese;
129 break;
130
131 case kCFStringEncodingMacChineseTrad:
132 encoding = kCFStringEncodingDOSChineseTrad;
133 break;
134
135 case kCFStringEncodingMacKorean:
136 encoding = kCFStringEncodingDOSKorean;
137 break;
138
139 case kCFStringEncodingMacArabic:
140 encoding = kCFStringEncodingDOSArabic;
141 break;
142
143 case kCFStringEncodingMacHebrew:
144 encoding = kCFStringEncodingDOSHebrew;
145 break;
146
147 case kCFStringEncodingMacGreek:
148 encoding = kCFStringEncodingDOSGreek;
149 break;
150
151 case kCFStringEncodingMacCyrillic:
152 encoding = kCFStringEncodingDOSCyrillic;
153 break;
154
155 case kCFStringEncodingMacThai:
156 encoding = kCFStringEncodingDOSThai;
157 break;
158
159 case kCFStringEncodingMacChineseSimp:
160 encoding = kCFStringEncodingDOSChineseSimplif;
161 break;
162
163 case kCFStringEncodingMacCentralEurRoman:
164 encoding = kCFStringEncodingDOSLatin2;
165 break;
166
167 case kCFStringEncodingMacTurkish:
168 encoding = kCFStringEncodingDOSTurkish;
169 break;
170
171 case kCFStringEncodingMacCroatian:
172 encoding = kCFStringEncodingDOSLatin2;
173 break;
174
175 case kCFStringEncodingMacIcelandic:
176 encoding = kCFStringEncodingDOSIcelandic;
177 break;
178
179 case kCFStringEncodingMacRomanian:
180 encoding = kCFStringEncodingDOSLatin2;
181 break;
182
183 case kCFStringEncodingMacFarsi:
184 encoding = kCFStringEncodingDOSArabic;
185 break;
186
187 case kCFStringEncodingMacUkrainian:
188 encoding = kCFStringEncodingDOSCyrillic;
189 break;
190
191 default:
192 encoding = kCFStringEncodingDOSLatin1;
193 break;
194 }
195
196 return encoding;
197 }
198 #endif /* NOTPORTED */
199
200 /*
201 * XXX - NLS, or CF? We should probably use the same routine for all
202 * conversions.
203 */
204 char *
convert_wincs_to_utf8(const char * windows_string)205 convert_wincs_to_utf8(const char *windows_string)
206 {
207 #ifdef NOTPORTED
208 CFStringRef s;
209 CFIndex maxlen;
210 char *result;
211
212 s = CFStringCreateWithCString(NULL, windows_string,
213 get_windows_encoding_equivalent());
214 if (s == NULL) {
215 smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1,
216 windows_string);
217
218 /* kCFStringEncodingMacRoman should always succeed */
219 s = CFStringCreateWithCString(NULL, windows_string,
220 kCFStringEncodingMacRoman);
221 if (s == NULL) {
222 smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping",
223 -1, windows_string);
224 return NULL;
225 }
226 }
227
228 maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
229 kCFStringEncodingUTF8) + 1;
230 result = malloc(maxlen);
231 if (result == NULL) {
232 smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1,
233 windows_string);
234 CFRelease(s);
235 return NULL;
236 }
237 if (!CFStringGetCString(s, result, maxlen, kCFStringEncodingUTF8)) {
238 smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping",
239 -1, windows_string);
240 CFRelease(s);
241 return NULL;
242 }
243 CFRelease(s);
244 return result;
245 #else /* NOTPORTED */
246 return (strdup((char*)windows_string));
247 #endif /* NOTPORTED */
248 }
249
250 /*
251 * XXX - NLS, or CF? We should probably use the same routine for all
252 * conversions.
253 */
254 char *
convert_utf8_to_wincs(const char * utf8_string)255 convert_utf8_to_wincs(const char *utf8_string)
256 {
257 #ifdef NOTPORTED
258 CFStringRef s;
259 CFIndex maxlen;
260 char *result;
261
262 s = CFStringCreateWithCString(NULL, utf8_string,
263 kCFStringEncodingUTF8);
264 if (s == NULL) {
265 smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
266 utf8_string);
267 return NULL;
268 }
269
270 maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
271 get_windows_encoding_equivalent()) + 1;
272 result = malloc(maxlen);
273 if (result == NULL) {
274 smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1,
275 utf8_string);
276 CFRelease(s);
277 return NULL;
278 }
279 if (!CFStringGetCString(s, result, maxlen,
280 get_windows_encoding_equivalent())) {
281 smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping",
282 -1, utf8_string);
283 CFRelease(s);
284 return NULL;
285 }
286 CFRelease(s);
287 return result;
288 #else /* NOTPORTED */
289 return (strdup((char*)utf8_string));
290 #endif /* NOTPORTED */
291 }
292 /* END CSTYLED */
293
294 /*
295 * We replaced these routines for Solaris:
296 * convert_leunicode_to_utf8
297 * convert_unicode_to_utf8
298 * convert_utf8_to_leunicode
299 * with new code in: utf_str.c
300 */
301