xref: /illumos-gate/usr/src/lib/libsmbfs/smb/charsets.c (revision b2b3ca14)
1 /*
2  * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
7  * Reserved.  This file contains Original Code and/or Modifications of
8  * Original Code as defined in and that are subject to the Apple Public
9  * Source License Version 1.0 (the 'License').  You may not use this file
10  * except in compliance with the License.  Please obtain a copy of the
11  * License at http://www.apple.com/publicsource and read it before using
12  * this file.
13  *
14  * The Original Code and all software distributed under the License are
15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
19  * License for the specific language governing rights and limitations
20  * under the License."
21  *
22  * @APPLE_LICENSE_HEADER_END@
23  */
24 /* CSTYLED */
25 /*
26  *      @(#)charsets.c      *
27  *      (c) 2004   Apple Computer, Inc.  All Rights Reserved
28  *
29  *
30  *      charsets.c -- Routines converting between UTF-8, 16-bit
31  *			little-endian Unicode, and various Windows
32  *			code pages.
33  *
34  *      MODIFICATION HISTORY:
35  *       28-Nov-2004     Guy Harris	New today
36  */
37 
38 #include <stdlib.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <errno.h>
43 #include <iconv.h>
44 #include <langinfo.h>
45 #include <strings.h>
46 
47 #include <netsmb/smb_lib.h>
48 #include <netsmb/mchain.h>
49 
50 #include "charsets.h"
51 
52 /*
53  * On Solaris, we will need to do some rewriting to use our iconv
54  * routines for the conversions.  For now, we're effectively
55  * stubbing out code, leaving the details of what happens on
56  * Darwin in case it's useful as a guide later.
57  */
58 
59 static unsigned
60 xtoi(char u)
61 {
62 	if (isdigit(u))
63 		return (u - '0');
64 	else if (islower(u))
65 		return (10 + u - 'a');
66 	else if (isupper(u))
67 		return (10 + u - 'A');
68 	return (16);
69 }
70 
71 
72 /*
73  * Removes the "%" escape sequences from a URL component.
74  * See IETF RFC 2396.
75  */
76 char *
77 unpercent(char *component)
78 {
79 	char c, *s;
80 	unsigned hi, lo;
81 
82 	if (component == NULL)
83 		return (component);
84 
85 	for (s = component; (c = *s) != 0; s++) {
86 		if (c != '%')
87 			continue;
88 		if ((hi = xtoi(s[1])) > 15 || (lo = xtoi(s[2])) > 15)
89 			continue; /* ignore invalid escapes */
90 		s[0] = hi*16 + lo;
91 		/*
92 		 * This was strcpy(s + 1, s + 3);
93 		 * But nowadays leftward overlapping copies are
94 		 * officially undefined in C.  Ours seems to
95 		 * work or not depending upon alignment.
96 		 */
97 		memmove(s+1, s+3, strlen(s+3) + 1);
98 	}
99 	return (component);
100 }
101 
102 /* BEGIN CSTYLED */
103 #ifdef NOTPORTED
104 static CFStringEncoding
105 get_windows_encoding_equivalent( void )
106 {
107 
108 	CFStringEncoding encoding;
109 	uint32_t index,region;
110 
111 	/* important! use root ID so you can read the config file! */
112 	seteuid(eff_uid);
113 	__CFStringGetInstallationEncodingAndRegion(&index,&region);
114 	seteuid(real_uid);
115 
116 	switch ( index )
117 	{
118 		case	kCFStringEncodingMacRoman:
119 			if (region) /* anything nonzero is not US */
120 				encoding = kCFStringEncodingDOSLatin1;
121 			else /* US region */
122 				encoding = kCFStringEncodingDOSLatinUS;
123 			break;
124 
125 		case	kCFStringEncodingMacJapanese:
126 			encoding = kCFStringEncodingDOSJapanese;
127 			break;
128 
129 		case	kCFStringEncodingMacChineseTrad:
130 			encoding = kCFStringEncodingDOSChineseTrad;
131 			break;
132 
133 		case	kCFStringEncodingMacKorean:
134 			encoding = kCFStringEncodingDOSKorean;
135 			break;
136 
137 		case	kCFStringEncodingMacArabic:
138 			encoding = kCFStringEncodingDOSArabic;
139 			break;
140 
141 		case	kCFStringEncodingMacHebrew:
142 			encoding = kCFStringEncodingDOSHebrew;
143 			break;
144 
145 		case	kCFStringEncodingMacGreek:
146 			encoding = kCFStringEncodingDOSGreek;
147 			break;
148 
149 		case	kCFStringEncodingMacCyrillic:
150 			encoding = kCFStringEncodingDOSCyrillic;
151 			break;
152 
153 		case	kCFStringEncodingMacThai:
154 			encoding = kCFStringEncodingDOSThai;
155 			break;
156 
157 		case	kCFStringEncodingMacChineseSimp:
158 			encoding = kCFStringEncodingDOSChineseSimplif;
159 			break;
160 
161 		case	kCFStringEncodingMacCentralEurRoman:
162 			encoding = kCFStringEncodingDOSLatin2;
163 			break;
164 
165 		case	kCFStringEncodingMacTurkish:
166 			encoding = kCFStringEncodingDOSTurkish;
167 			break;
168 
169 		case	kCFStringEncodingMacCroatian:
170 			encoding = kCFStringEncodingDOSLatin2;
171 			break;
172 
173 		case	kCFStringEncodingMacIcelandic:
174 			encoding = kCFStringEncodingDOSIcelandic;
175 			break;
176 
177 		case	kCFStringEncodingMacRomanian:
178 			encoding = kCFStringEncodingDOSLatin2;
179 			break;
180 
181 		case	kCFStringEncodingMacFarsi:
182 			encoding = kCFStringEncodingDOSArabic;
183 			break;
184 
185 		case	kCFStringEncodingMacUkrainian:
186 			encoding = kCFStringEncodingDOSCyrillic;
187 			break;
188 
189 		default:
190 			encoding = kCFStringEncodingDOSLatin1;
191 			break;
192 	}
193 
194 	return encoding;
195 }
196 #endif /* NOTPORTED */
197 
198 /*
199  * XXX - NLS, or CF?  We should probably use the same routine for all
200  * conversions.
201  */
202 char *
203 convert_wincs_to_utf8(const char *windows_string)
204 {
205 #ifdef NOTPORTED
206 	CFStringRef s;
207 	CFIndex maxlen;
208 	char *result;
209 
210 	s = CFStringCreateWithCString(NULL, windows_string,
211 		get_windows_encoding_equivalent());
212 	if (s == NULL) {
213 		smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1,
214 		    windows_string);
215 
216 		/* kCFStringEncodingMacRoman should always succeed */
217 		s = CFStringCreateWithCString(NULL, windows_string,
218 		    kCFStringEncodingMacRoman);
219 		if (s == NULL) {
220 			smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping",
221 			    -1, windows_string);
222 			return NULL;
223 		}
224 	}
225 
226 	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
227 	    kCFStringEncodingUTF8) + 1;
228 	result = malloc(maxlen);
229 	if (result == NULL) {
230 		smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1,
231 		    windows_string);
232 		CFRelease(s);
233 		return NULL;
234 	}
235 	if (!CFStringGetCString(s, result, maxlen, kCFStringEncodingUTF8)) {
236 		smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping",
237 		    -1, windows_string);
238 		CFRelease(s);
239 		return NULL;
240 	}
241 	CFRelease(s);
242 	return result;
243 #else /* NOTPORTED */
244 	return (strdup((char*)windows_string));
245 #endif /* NOTPORTED */
246 }
247 
248 /*
249  * XXX - NLS, or CF?  We should probably use the same routine for all
250  * conversions.
251  */
252 char *
253 convert_utf8_to_wincs(const char *utf8_string)
254 {
255 #ifdef NOTPORTED
256 	CFStringRef s;
257 	CFIndex maxlen;
258 	char *result;
259 
260 	s = CFStringCreateWithCString(NULL, utf8_string,
261 	    kCFStringEncodingUTF8);
262 	if (s == NULL) {
263 		smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
264 		    utf8_string);
265 		return NULL;
266 	}
267 
268 	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
269 	    get_windows_encoding_equivalent()) + 1;
270 	result = malloc(maxlen);
271 	if (result == NULL) {
272 		smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1,
273 		    utf8_string);
274 		CFRelease(s);
275 		return NULL;
276 	}
277 	if (!CFStringGetCString(s, result, maxlen,
278 	    get_windows_encoding_equivalent())) {
279 		smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping",
280 		    -1, utf8_string);
281 		CFRelease(s);
282 		return NULL;
283 	}
284 	CFRelease(s);
285 	return result;
286 #else /* NOTPORTED */
287 	return (strdup((char*)utf8_string));
288 #endif /* NOTPORTED */
289 }
290 /* END CSTYLED */
291 
292 /*
293  * We replaced these routines for Solaris:
294  *	convert_leunicode_to_utf8
295  *	convert_unicode_to_utf8
296  *	convert_utf8_to_leunicode
297  * with new code in: utf_str.c
298  */
299