xref: /dragonfly/contrib/libpcap/charconv.c (revision ea16f64e)
1*ea16f64eSAntonio Huete Jimenez /* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
2*ea16f64eSAntonio Huete Jimenez /*
3*ea16f64eSAntonio Huete Jimenez  * Copyright (c) 1993, 1994, 1995, 1996, 1997
4*ea16f64eSAntonio Huete Jimenez  *	The Regents of the University of California.  All rights reserved.
5*ea16f64eSAntonio Huete Jimenez  *
6*ea16f64eSAntonio Huete Jimenez  * Redistribution and use in source and binary forms, with or without
7*ea16f64eSAntonio Huete Jimenez  * modification, are permitted provided that the following conditions
8*ea16f64eSAntonio Huete Jimenez  * are met:
9*ea16f64eSAntonio Huete Jimenez  * 1. Redistributions of source code must retain the above copyright
10*ea16f64eSAntonio Huete Jimenez  *    notice, this list of conditions and the following disclaimer.
11*ea16f64eSAntonio Huete Jimenez  * 2. Redistributions in binary form must reproduce the above copyright
12*ea16f64eSAntonio Huete Jimenez  *    notice, this list of conditions and the following disclaimer in the
13*ea16f64eSAntonio Huete Jimenez  *    documentation and/or other materials provided with the distribution.
14*ea16f64eSAntonio Huete Jimenez  * 3. All advertising materials mentioning features or use of this software
15*ea16f64eSAntonio Huete Jimenez  *    must display the following acknowledgement:
16*ea16f64eSAntonio Huete Jimenez  *	This product includes software developed by the Computer Systems
17*ea16f64eSAntonio Huete Jimenez  *	Engineering Group at Lawrence Berkeley Laboratory.
18*ea16f64eSAntonio Huete Jimenez  * 4. Neither the name of the University nor of the Laboratory may be used
19*ea16f64eSAntonio Huete Jimenez  *    to endorse or promote products derived from this software without
20*ea16f64eSAntonio Huete Jimenez  *    specific prior written permission.
21*ea16f64eSAntonio Huete Jimenez  *
22*ea16f64eSAntonio Huete Jimenez  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23*ea16f64eSAntonio Huete Jimenez  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24*ea16f64eSAntonio Huete Jimenez  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25*ea16f64eSAntonio Huete Jimenez  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26*ea16f64eSAntonio Huete Jimenez  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27*ea16f64eSAntonio Huete Jimenez  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28*ea16f64eSAntonio Huete Jimenez  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29*ea16f64eSAntonio Huete Jimenez  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30*ea16f64eSAntonio Huete Jimenez  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31*ea16f64eSAntonio Huete Jimenez  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32*ea16f64eSAntonio Huete Jimenez  * SUCH DAMAGE.
33*ea16f64eSAntonio Huete Jimenez  */
34*ea16f64eSAntonio Huete Jimenez 
35*ea16f64eSAntonio Huete Jimenez #ifdef _WIN32
36*ea16f64eSAntonio Huete Jimenez #include <stdio.h>
37*ea16f64eSAntonio Huete Jimenez 
38*ea16f64eSAntonio Huete Jimenez #include <pcap/pcap.h>	/* Needed for PCAP_ERRBUF_SIZE */
39*ea16f64eSAntonio Huete Jimenez 
40*ea16f64eSAntonio Huete Jimenez #include "charconv.h"
41*ea16f64eSAntonio Huete Jimenez 
42*ea16f64eSAntonio Huete Jimenez wchar_t *
cp_to_utf_16le(UINT codepage,const char * cp_string,DWORD flags)43*ea16f64eSAntonio Huete Jimenez cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags)
44*ea16f64eSAntonio Huete Jimenez {
45*ea16f64eSAntonio Huete Jimenez 	int utf16le_len;
46*ea16f64eSAntonio Huete Jimenez 	wchar_t *utf16le_string;
47*ea16f64eSAntonio Huete Jimenez 
48*ea16f64eSAntonio Huete Jimenez 	/*
49*ea16f64eSAntonio Huete Jimenez 	 * Map from the specified code page to UTF-16LE.
50*ea16f64eSAntonio Huete Jimenez 	 * First, find out how big a buffer we'll need.
51*ea16f64eSAntonio Huete Jimenez 	 */
52*ea16f64eSAntonio Huete Jimenez 	utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
53*ea16f64eSAntonio Huete Jimenez 	    NULL, 0);
54*ea16f64eSAntonio Huete Jimenez 	if (utf16le_len == 0) {
55*ea16f64eSAntonio Huete Jimenez 		/*
56*ea16f64eSAntonio Huete Jimenez 		 * Error.  Fail with EINVAL.
57*ea16f64eSAntonio Huete Jimenez 		 */
58*ea16f64eSAntonio Huete Jimenez 		errno = EINVAL;
59*ea16f64eSAntonio Huete Jimenez 		return (NULL);
60*ea16f64eSAntonio Huete Jimenez 	}
61*ea16f64eSAntonio Huete Jimenez 
62*ea16f64eSAntonio Huete Jimenez 	/*
63*ea16f64eSAntonio Huete Jimenez 	 * Now attempt to allocate a buffer for that.
64*ea16f64eSAntonio Huete Jimenez 	 */
65*ea16f64eSAntonio Huete Jimenez 	utf16le_string = malloc(utf16le_len * sizeof (wchar_t));
66*ea16f64eSAntonio Huete Jimenez 	if (utf16le_string == NULL) {
67*ea16f64eSAntonio Huete Jimenez 		/*
68*ea16f64eSAntonio Huete Jimenez 		 * Not enough memory; assume errno has been
69*ea16f64eSAntonio Huete Jimenez 		 * set, and fail.
70*ea16f64eSAntonio Huete Jimenez 		 */
71*ea16f64eSAntonio Huete Jimenez 		return (NULL);
72*ea16f64eSAntonio Huete Jimenez 	}
73*ea16f64eSAntonio Huete Jimenez 
74*ea16f64eSAntonio Huete Jimenez 	/*
75*ea16f64eSAntonio Huete Jimenez 	 * Now convert.
76*ea16f64eSAntonio Huete Jimenez 	 */
77*ea16f64eSAntonio Huete Jimenez 	utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
78*ea16f64eSAntonio Huete Jimenez 	    utf16le_string, utf16le_len);
79*ea16f64eSAntonio Huete Jimenez 	if (utf16le_len == 0) {
80*ea16f64eSAntonio Huete Jimenez 		/*
81*ea16f64eSAntonio Huete Jimenez 		 * Error.  Fail with EINVAL.
82*ea16f64eSAntonio Huete Jimenez 		 * XXX - should this ever happen, given that
83*ea16f64eSAntonio Huete Jimenez 		 * we already ran the string through
84*ea16f64eSAntonio Huete Jimenez 		 * MultiByteToWideChar() to find out how big
85*ea16f64eSAntonio Huete Jimenez 		 * a buffer we needed?
86*ea16f64eSAntonio Huete Jimenez 		 */
87*ea16f64eSAntonio Huete Jimenez 		free(utf16le_string);
88*ea16f64eSAntonio Huete Jimenez 		errno = EINVAL;
89*ea16f64eSAntonio Huete Jimenez 		return (NULL);
90*ea16f64eSAntonio Huete Jimenez 	}
91*ea16f64eSAntonio Huete Jimenez 	return (utf16le_string);
92*ea16f64eSAntonio Huete Jimenez }
93*ea16f64eSAntonio Huete Jimenez 
94*ea16f64eSAntonio Huete Jimenez char *
utf_16le_to_cp(UINT codepage,const wchar_t * utf16le_string)95*ea16f64eSAntonio Huete Jimenez utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string)
96*ea16f64eSAntonio Huete Jimenez {
97*ea16f64eSAntonio Huete Jimenez 	int cp_len;
98*ea16f64eSAntonio Huete Jimenez 	char *cp_string;
99*ea16f64eSAntonio Huete Jimenez 
100*ea16f64eSAntonio Huete Jimenez 	/*
101*ea16f64eSAntonio Huete Jimenez 	 * Map from UTF-16LE to the specified code page.
102*ea16f64eSAntonio Huete Jimenez 	 * First, find out how big a buffer we'll need.
103*ea16f64eSAntonio Huete Jimenez 	 * We convert composite characters to precomposed characters,
104*ea16f64eSAntonio Huete Jimenez 	 * as that's what Windows expects.
105*ea16f64eSAntonio Huete Jimenez 	 */
106*ea16f64eSAntonio Huete Jimenez 	cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
107*ea16f64eSAntonio Huete Jimenez 	    utf16le_string, -1, NULL, 0, NULL, NULL);
108*ea16f64eSAntonio Huete Jimenez 	if (cp_len == 0) {
109*ea16f64eSAntonio Huete Jimenez 		/*
110*ea16f64eSAntonio Huete Jimenez 		 * Error.  Fail with EINVAL.
111*ea16f64eSAntonio Huete Jimenez 		 */
112*ea16f64eSAntonio Huete Jimenez 		errno = EINVAL;
113*ea16f64eSAntonio Huete Jimenez 		return (NULL);
114*ea16f64eSAntonio Huete Jimenez 	}
115*ea16f64eSAntonio Huete Jimenez 
116*ea16f64eSAntonio Huete Jimenez 	/*
117*ea16f64eSAntonio Huete Jimenez 	 * Now attempt to allocate a buffer for that.
118*ea16f64eSAntonio Huete Jimenez 	 */
119*ea16f64eSAntonio Huete Jimenez 	cp_string = malloc(cp_len * sizeof (char));
120*ea16f64eSAntonio Huete Jimenez 	if (cp_string == NULL) {
121*ea16f64eSAntonio Huete Jimenez 		/*
122*ea16f64eSAntonio Huete Jimenez 		 * Not enough memory; assume errno has been
123*ea16f64eSAntonio Huete Jimenez 		 * set, and fail.
124*ea16f64eSAntonio Huete Jimenez 		 */
125*ea16f64eSAntonio Huete Jimenez 		return (NULL);
126*ea16f64eSAntonio Huete Jimenez 	}
127*ea16f64eSAntonio Huete Jimenez 
128*ea16f64eSAntonio Huete Jimenez 	/*
129*ea16f64eSAntonio Huete Jimenez 	 * Now convert.
130*ea16f64eSAntonio Huete Jimenez 	 */
131*ea16f64eSAntonio Huete Jimenez 	cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
132*ea16f64eSAntonio Huete Jimenez 	    utf16le_string, -1, cp_string, cp_len, NULL, NULL);
133*ea16f64eSAntonio Huete Jimenez 	if (cp_len == 0) {
134*ea16f64eSAntonio Huete Jimenez 		/*
135*ea16f64eSAntonio Huete Jimenez 		 * Error.  Fail with EINVAL.
136*ea16f64eSAntonio Huete Jimenez 		 * XXX - should this ever happen, given that
137*ea16f64eSAntonio Huete Jimenez 		 * we already ran the string through
138*ea16f64eSAntonio Huete Jimenez 		 * WideCharToMultiByte() to find out how big
139*ea16f64eSAntonio Huete Jimenez 		 * a buffer we needed?
140*ea16f64eSAntonio Huete Jimenez 		 */
141*ea16f64eSAntonio Huete Jimenez 		free(cp_string);
142*ea16f64eSAntonio Huete Jimenez 		errno = EINVAL;
143*ea16f64eSAntonio Huete Jimenez 		return (NULL);
144*ea16f64eSAntonio Huete Jimenez 	}
145*ea16f64eSAntonio Huete Jimenez 	return (cp_string);
146*ea16f64eSAntonio Huete Jimenez }
147*ea16f64eSAntonio Huete Jimenez 
148*ea16f64eSAntonio Huete Jimenez /*
149*ea16f64eSAntonio Huete Jimenez  * Convert an error message string from UTF-8 to the local code page, as
150*ea16f64eSAntonio Huete Jimenez  * best we can.
151*ea16f64eSAntonio Huete Jimenez  *
152*ea16f64eSAntonio Huete Jimenez  * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate
153*ea16f64eSAntonio Huete Jimenez  * if it doesn't fit.
154*ea16f64eSAntonio Huete Jimenez  */
155*ea16f64eSAntonio Huete Jimenez void
utf_8_to_acp_truncated(char * errbuf)156*ea16f64eSAntonio Huete Jimenez utf_8_to_acp_truncated(char *errbuf)
157*ea16f64eSAntonio Huete Jimenez {
158*ea16f64eSAntonio Huete Jimenez 	wchar_t *utf_16_errbuf;
159*ea16f64eSAntonio Huete Jimenez 	int retval;
160*ea16f64eSAntonio Huete Jimenez 	DWORD err;
161*ea16f64eSAntonio Huete Jimenez 
162*ea16f64eSAntonio Huete Jimenez 	/*
163*ea16f64eSAntonio Huete Jimenez 	 * Do this by converting to UTF-16LE and then to the local
164*ea16f64eSAntonio Huete Jimenez 	 * code page.  That means we get to use Microsoft's
165*ea16f64eSAntonio Huete Jimenez 	 * conversion routines, rather than having to understand
166*ea16f64eSAntonio Huete Jimenez 	 * all the code pages ourselves, *and* that this routine
167*ea16f64eSAntonio Huete Jimenez 	 * can convert in place.
168*ea16f64eSAntonio Huete Jimenez 	 */
169*ea16f64eSAntonio Huete Jimenez 
170*ea16f64eSAntonio Huete Jimenez 	/*
171*ea16f64eSAntonio Huete Jimenez 	 * Map from UTF-8 to UTF-16LE.
172*ea16f64eSAntonio Huete Jimenez 	 * First, find out how big a buffer we'll need.
173*ea16f64eSAntonio Huete Jimenez 	 * Convert any invalid characters to REPLACEMENT CHARACTER.
174*ea16f64eSAntonio Huete Jimenez 	 */
175*ea16f64eSAntonio Huete Jimenez 	utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0);
176*ea16f64eSAntonio Huete Jimenez 	if (utf_16_errbuf == NULL) {
177*ea16f64eSAntonio Huete Jimenez 		/*
178*ea16f64eSAntonio Huete Jimenez 		 * Error.  Give up.
179*ea16f64eSAntonio Huete Jimenez 		 */
180*ea16f64eSAntonio Huete Jimenez 		snprintf(errbuf, PCAP_ERRBUF_SIZE,
181*ea16f64eSAntonio Huete Jimenez 		    "Can't convert error string to the local code page");
182*ea16f64eSAntonio Huete Jimenez 		return;
183*ea16f64eSAntonio Huete Jimenez 	}
184*ea16f64eSAntonio Huete Jimenez 
185*ea16f64eSAntonio Huete Jimenez 	/*
186*ea16f64eSAntonio Huete Jimenez 	 * Now, convert that to the local code page.
187*ea16f64eSAntonio Huete Jimenez 	 * Use the current thread's code page.  For unconvertable
188*ea16f64eSAntonio Huete Jimenez 	 * characters, let it pick the "best fit" character.
189*ea16f64eSAntonio Huete Jimenez 	 *
190*ea16f64eSAntonio Huete Jimenez 	 * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated()
191*ea16f64eSAntonio Huete Jimenez 	 * does if the buffer isn't big enough, but we don't want to have
192*ea16f64eSAntonio Huete Jimenez 	 * to handle all local code pages ourselves; doing so requires
193*ea16f64eSAntonio Huete Jimenez 	 * knowledge of all those code pages, including knowledge of how
194*ea16f64eSAntonio Huete Jimenez 	 * characters are formed in thoe code pages so that we can avoid
195*ea16f64eSAntonio Huete Jimenez 	 * cutting a multi-byte character into pieces.
196*ea16f64eSAntonio Huete Jimenez 	 *
197*ea16f64eSAntonio Huete Jimenez 	 * Converting to an un-truncated string using Windows APIs, and
198*ea16f64eSAntonio Huete Jimenez 	 * then copying to the buffer, still requires knowledge of how
199*ea16f64eSAntonio Huete Jimenez 	 * characters are formed in the target code page.
200*ea16f64eSAntonio Huete Jimenez 	 */
201*ea16f64eSAntonio Huete Jimenez 	retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1,
202*ea16f64eSAntonio Huete Jimenez 	    errbuf, PCAP_ERRBUF_SIZE, NULL, NULL);
203*ea16f64eSAntonio Huete Jimenez 	if (retval == 0) {
204*ea16f64eSAntonio Huete Jimenez 		err = GetLastError();
205*ea16f64eSAntonio Huete Jimenez 		free(utf_16_errbuf);
206*ea16f64eSAntonio Huete Jimenez 		if (err == ERROR_INSUFFICIENT_BUFFER)
207*ea16f64eSAntonio Huete Jimenez 			snprintf(errbuf, PCAP_ERRBUF_SIZE,
208*ea16f64eSAntonio Huete Jimenez 			    "The error string, in the local code page, didn't fit in the buffer");
209*ea16f64eSAntonio Huete Jimenez 		else
210*ea16f64eSAntonio Huete Jimenez 			snprintf(errbuf, PCAP_ERRBUF_SIZE,
211*ea16f64eSAntonio Huete Jimenez 			    "Can't convert error string to the local code page");
212*ea16f64eSAntonio Huete Jimenez 		return;
213*ea16f64eSAntonio Huete Jimenez 	}
214*ea16f64eSAntonio Huete Jimenez 	free(utf_16_errbuf);
215*ea16f64eSAntonio Huete Jimenez }
216*ea16f64eSAntonio Huete Jimenez #endif
217