xref: /freebsd/contrib/libpcap/charconv.c (revision 6f9cba8f)
1*6f9cba8fSJoseph Mingrone /* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
2*6f9cba8fSJoseph Mingrone /*
3*6f9cba8fSJoseph Mingrone  * Copyright (c) 1993, 1994, 1995, 1996, 1997
4*6f9cba8fSJoseph Mingrone  *	The Regents of the University of California.  All rights reserved.
5*6f9cba8fSJoseph Mingrone  *
6*6f9cba8fSJoseph Mingrone  * Redistribution and use in source and binary forms, with or without
7*6f9cba8fSJoseph Mingrone  * modification, are permitted provided that the following conditions
8*6f9cba8fSJoseph Mingrone  * are met:
9*6f9cba8fSJoseph Mingrone  * 1. Redistributions of source code must retain the above copyright
10*6f9cba8fSJoseph Mingrone  *    notice, this list of conditions and the following disclaimer.
11*6f9cba8fSJoseph Mingrone  * 2. Redistributions in binary form must reproduce the above copyright
12*6f9cba8fSJoseph Mingrone  *    notice, this list of conditions and the following disclaimer in the
13*6f9cba8fSJoseph Mingrone  *    documentation and/or other materials provided with the distribution.
14*6f9cba8fSJoseph Mingrone  * 3. All advertising materials mentioning features or use of this software
15*6f9cba8fSJoseph Mingrone  *    must display the following acknowledgement:
16*6f9cba8fSJoseph Mingrone  *	This product includes software developed by the Computer Systems
17*6f9cba8fSJoseph Mingrone  *	Engineering Group at Lawrence Berkeley Laboratory.
18*6f9cba8fSJoseph Mingrone  * 4. Neither the name of the University nor of the Laboratory may be used
19*6f9cba8fSJoseph Mingrone  *    to endorse or promote products derived from this software without
20*6f9cba8fSJoseph Mingrone  *    specific prior written permission.
21*6f9cba8fSJoseph Mingrone  *
22*6f9cba8fSJoseph Mingrone  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23*6f9cba8fSJoseph Mingrone  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24*6f9cba8fSJoseph Mingrone  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25*6f9cba8fSJoseph Mingrone  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26*6f9cba8fSJoseph Mingrone  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27*6f9cba8fSJoseph Mingrone  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28*6f9cba8fSJoseph Mingrone  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29*6f9cba8fSJoseph Mingrone  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30*6f9cba8fSJoseph Mingrone  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31*6f9cba8fSJoseph Mingrone  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32*6f9cba8fSJoseph Mingrone  * SUCH DAMAGE.
33*6f9cba8fSJoseph Mingrone  */
34*6f9cba8fSJoseph Mingrone 
35*6f9cba8fSJoseph Mingrone #ifdef _WIN32
36*6f9cba8fSJoseph Mingrone #include <stdio.h>
37*6f9cba8fSJoseph Mingrone #include <errno.h>
38*6f9cba8fSJoseph Mingrone 
39*6f9cba8fSJoseph Mingrone #include <pcap/pcap.h>	/* Needed for PCAP_ERRBUF_SIZE */
40*6f9cba8fSJoseph Mingrone 
41*6f9cba8fSJoseph Mingrone #include "charconv.h"
42*6f9cba8fSJoseph Mingrone 
43*6f9cba8fSJoseph Mingrone wchar_t *
cp_to_utf_16le(UINT codepage,const char * cp_string,DWORD flags)44*6f9cba8fSJoseph Mingrone cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags)
45*6f9cba8fSJoseph Mingrone {
46*6f9cba8fSJoseph Mingrone 	int utf16le_len;
47*6f9cba8fSJoseph Mingrone 	wchar_t *utf16le_string;
48*6f9cba8fSJoseph Mingrone 
49*6f9cba8fSJoseph Mingrone 	/*
50*6f9cba8fSJoseph Mingrone 	 * Map from the specified code page to UTF-16LE.
51*6f9cba8fSJoseph Mingrone 	 * First, find out how big a buffer we'll need.
52*6f9cba8fSJoseph Mingrone 	 */
53*6f9cba8fSJoseph Mingrone 	utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
54*6f9cba8fSJoseph Mingrone 	    NULL, 0);
55*6f9cba8fSJoseph Mingrone 	if (utf16le_len == 0) {
56*6f9cba8fSJoseph Mingrone 		/*
57*6f9cba8fSJoseph Mingrone 		 * Error.  Fail with EINVAL.
58*6f9cba8fSJoseph Mingrone 		 */
59*6f9cba8fSJoseph Mingrone 		errno = EINVAL;
60*6f9cba8fSJoseph Mingrone 		return (NULL);
61*6f9cba8fSJoseph Mingrone 	}
62*6f9cba8fSJoseph Mingrone 
63*6f9cba8fSJoseph Mingrone 	/*
64*6f9cba8fSJoseph Mingrone 	 * Now attempt to allocate a buffer for that.
65*6f9cba8fSJoseph Mingrone 	 */
66*6f9cba8fSJoseph Mingrone 	utf16le_string = malloc(utf16le_len * sizeof (wchar_t));
67*6f9cba8fSJoseph Mingrone 	if (utf16le_string == NULL) {
68*6f9cba8fSJoseph Mingrone 		/*
69*6f9cba8fSJoseph Mingrone 		 * Not enough memory; assume errno has been
70*6f9cba8fSJoseph Mingrone 		 * set, and fail.
71*6f9cba8fSJoseph Mingrone 		 */
72*6f9cba8fSJoseph Mingrone 		return (NULL);
73*6f9cba8fSJoseph Mingrone 	}
74*6f9cba8fSJoseph Mingrone 
75*6f9cba8fSJoseph Mingrone 	/*
76*6f9cba8fSJoseph Mingrone 	 * Now convert.
77*6f9cba8fSJoseph Mingrone 	 */
78*6f9cba8fSJoseph Mingrone 	utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
79*6f9cba8fSJoseph Mingrone 	    utf16le_string, utf16le_len);
80*6f9cba8fSJoseph Mingrone 	if (utf16le_len == 0) {
81*6f9cba8fSJoseph Mingrone 		/*
82*6f9cba8fSJoseph Mingrone 		 * Error.  Fail with EINVAL.
83*6f9cba8fSJoseph Mingrone 		 * XXX - should this ever happen, given that
84*6f9cba8fSJoseph Mingrone 		 * we already ran the string through
85*6f9cba8fSJoseph Mingrone 		 * MultiByteToWideChar() to find out how big
86*6f9cba8fSJoseph Mingrone 		 * a buffer we needed?
87*6f9cba8fSJoseph Mingrone 		 */
88*6f9cba8fSJoseph Mingrone 		free(utf16le_string);
89*6f9cba8fSJoseph Mingrone 		errno = EINVAL;
90*6f9cba8fSJoseph Mingrone 		return (NULL);
91*6f9cba8fSJoseph Mingrone 	}
92*6f9cba8fSJoseph Mingrone 	return (utf16le_string);
93*6f9cba8fSJoseph Mingrone }
94*6f9cba8fSJoseph Mingrone 
95*6f9cba8fSJoseph Mingrone char *
utf_16le_to_cp(UINT codepage,const wchar_t * utf16le_string)96*6f9cba8fSJoseph Mingrone utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string)
97*6f9cba8fSJoseph Mingrone {
98*6f9cba8fSJoseph Mingrone 	int cp_len;
99*6f9cba8fSJoseph Mingrone 	char *cp_string;
100*6f9cba8fSJoseph Mingrone 
101*6f9cba8fSJoseph Mingrone 	/*
102*6f9cba8fSJoseph Mingrone 	 * Map from UTF-16LE to the specified code page.
103*6f9cba8fSJoseph Mingrone 	 * First, find out how big a buffer we'll need.
104*6f9cba8fSJoseph Mingrone 	 * We convert composite characters to precomposed characters,
105*6f9cba8fSJoseph Mingrone 	 * as that's what Windows expects.
106*6f9cba8fSJoseph Mingrone 	 */
107*6f9cba8fSJoseph Mingrone 	cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
108*6f9cba8fSJoseph Mingrone 	    utf16le_string, -1, NULL, 0, NULL, NULL);
109*6f9cba8fSJoseph Mingrone 	if (cp_len == 0) {
110*6f9cba8fSJoseph Mingrone 		/*
111*6f9cba8fSJoseph Mingrone 		 * Error.  Fail with EINVAL.
112*6f9cba8fSJoseph Mingrone 		 */
113*6f9cba8fSJoseph Mingrone 		errno = EINVAL;
114*6f9cba8fSJoseph Mingrone 		return (NULL);
115*6f9cba8fSJoseph Mingrone 	}
116*6f9cba8fSJoseph Mingrone 
117*6f9cba8fSJoseph Mingrone 	/*
118*6f9cba8fSJoseph Mingrone 	 * Now attempt to allocate a buffer for that.
119*6f9cba8fSJoseph Mingrone 	 */
120*6f9cba8fSJoseph Mingrone 	cp_string = malloc(cp_len * sizeof (char));
121*6f9cba8fSJoseph Mingrone 	if (cp_string == NULL) {
122*6f9cba8fSJoseph Mingrone 		/*
123*6f9cba8fSJoseph Mingrone 		 * Not enough memory; assume errno has been
124*6f9cba8fSJoseph Mingrone 		 * set, and fail.
125*6f9cba8fSJoseph Mingrone 		 */
126*6f9cba8fSJoseph Mingrone 		return (NULL);
127*6f9cba8fSJoseph Mingrone 	}
128*6f9cba8fSJoseph Mingrone 
129*6f9cba8fSJoseph Mingrone 	/*
130*6f9cba8fSJoseph Mingrone 	 * Now convert.
131*6f9cba8fSJoseph Mingrone 	 */
132*6f9cba8fSJoseph Mingrone 	cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
133*6f9cba8fSJoseph Mingrone 	    utf16le_string, -1, cp_string, cp_len, NULL, NULL);
134*6f9cba8fSJoseph Mingrone 	if (cp_len == 0) {
135*6f9cba8fSJoseph Mingrone 		/*
136*6f9cba8fSJoseph Mingrone 		 * Error.  Fail with EINVAL.
137*6f9cba8fSJoseph Mingrone 		 * XXX - should this ever happen, given that
138*6f9cba8fSJoseph Mingrone 		 * we already ran the string through
139*6f9cba8fSJoseph Mingrone 		 * WideCharToMultiByte() to find out how big
140*6f9cba8fSJoseph Mingrone 		 * a buffer we needed?
141*6f9cba8fSJoseph Mingrone 		 */
142*6f9cba8fSJoseph Mingrone 		free(cp_string);
143*6f9cba8fSJoseph Mingrone 		errno = EINVAL;
144*6f9cba8fSJoseph Mingrone 		return (NULL);
145*6f9cba8fSJoseph Mingrone 	}
146*6f9cba8fSJoseph Mingrone 	return (cp_string);
147*6f9cba8fSJoseph Mingrone }
148*6f9cba8fSJoseph Mingrone 
149*6f9cba8fSJoseph Mingrone /*
150*6f9cba8fSJoseph Mingrone  * Convert an error message string from UTF-8 to the local code page, as
151*6f9cba8fSJoseph Mingrone  * best we can.
152*6f9cba8fSJoseph Mingrone  *
153*6f9cba8fSJoseph Mingrone  * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate
154*6f9cba8fSJoseph Mingrone  * if it doesn't fit.
155*6f9cba8fSJoseph Mingrone  */
156*6f9cba8fSJoseph Mingrone void
utf_8_to_acp_truncated(char * errbuf)157*6f9cba8fSJoseph Mingrone utf_8_to_acp_truncated(char *errbuf)
158*6f9cba8fSJoseph Mingrone {
159*6f9cba8fSJoseph Mingrone 	wchar_t *utf_16_errbuf;
160*6f9cba8fSJoseph Mingrone 	int retval;
161*6f9cba8fSJoseph Mingrone 	DWORD err;
162*6f9cba8fSJoseph Mingrone 
163*6f9cba8fSJoseph Mingrone 	/*
164*6f9cba8fSJoseph Mingrone 	 * Do this by converting to UTF-16LE and then to the local
165*6f9cba8fSJoseph Mingrone 	 * code page.  That means we get to use Microsoft's
166*6f9cba8fSJoseph Mingrone 	 * conversion routines, rather than having to understand
167*6f9cba8fSJoseph Mingrone 	 * all the code pages ourselves, *and* that this routine
168*6f9cba8fSJoseph Mingrone 	 * can convert in place.
169*6f9cba8fSJoseph Mingrone 	 */
170*6f9cba8fSJoseph Mingrone 
171*6f9cba8fSJoseph Mingrone 	/*
172*6f9cba8fSJoseph Mingrone 	 * Map from UTF-8 to UTF-16LE.
173*6f9cba8fSJoseph Mingrone 	 * First, find out how big a buffer we'll need.
174*6f9cba8fSJoseph Mingrone 	 * Convert any invalid characters to REPLACEMENT CHARACTER.
175*6f9cba8fSJoseph Mingrone 	 */
176*6f9cba8fSJoseph Mingrone 	utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0);
177*6f9cba8fSJoseph Mingrone 	if (utf_16_errbuf == NULL) {
178*6f9cba8fSJoseph Mingrone 		/*
179*6f9cba8fSJoseph Mingrone 		 * Error.  Give up.
180*6f9cba8fSJoseph Mingrone 		 */
181*6f9cba8fSJoseph Mingrone 		snprintf(errbuf, PCAP_ERRBUF_SIZE,
182*6f9cba8fSJoseph Mingrone 		    "Can't convert error string to the local code page");
183*6f9cba8fSJoseph Mingrone 		return;
184*6f9cba8fSJoseph Mingrone 	}
185*6f9cba8fSJoseph Mingrone 
186*6f9cba8fSJoseph Mingrone 	/*
187*6f9cba8fSJoseph Mingrone 	 * Now, convert that to the local code page.
188*6f9cba8fSJoseph Mingrone 	 * Use the current thread's code page.  For unconvertable
189*6f9cba8fSJoseph Mingrone 	 * characters, let it pick the "best fit" character.
190*6f9cba8fSJoseph Mingrone 	 *
191*6f9cba8fSJoseph Mingrone 	 * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated()
192*6f9cba8fSJoseph Mingrone 	 * does if the buffer isn't big enough, but we don't want to have
193*6f9cba8fSJoseph Mingrone 	 * to handle all local code pages ourselves; doing so requires
194*6f9cba8fSJoseph Mingrone 	 * knowledge of all those code pages, including knowledge of how
195*6f9cba8fSJoseph Mingrone 	 * characters are formed in thoe code pages so that we can avoid
196*6f9cba8fSJoseph Mingrone 	 * cutting a multi-byte character into pieces.
197*6f9cba8fSJoseph Mingrone 	 *
198*6f9cba8fSJoseph Mingrone 	 * Converting to an un-truncated string using Windows APIs, and
199*6f9cba8fSJoseph Mingrone 	 * then copying to the buffer, still requires knowledge of how
200*6f9cba8fSJoseph Mingrone 	 * characters are formed in the target code page.
201*6f9cba8fSJoseph Mingrone 	 */
202*6f9cba8fSJoseph Mingrone 	retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1,
203*6f9cba8fSJoseph Mingrone 	    errbuf, PCAP_ERRBUF_SIZE, NULL, NULL);
204*6f9cba8fSJoseph Mingrone 	if (retval == 0) {
205*6f9cba8fSJoseph Mingrone 		err = GetLastError();
206*6f9cba8fSJoseph Mingrone 		free(utf_16_errbuf);
207*6f9cba8fSJoseph Mingrone 		if (err == ERROR_INSUFFICIENT_BUFFER)
208*6f9cba8fSJoseph Mingrone 			snprintf(errbuf, PCAP_ERRBUF_SIZE,
209*6f9cba8fSJoseph Mingrone 			    "The error string, in the local code page, didn't fit in the buffer");
210*6f9cba8fSJoseph Mingrone 		else
211*6f9cba8fSJoseph Mingrone 			snprintf(errbuf, PCAP_ERRBUF_SIZE,
212*6f9cba8fSJoseph Mingrone 			    "Can't convert error string to the local code page");
213*6f9cba8fSJoseph Mingrone 		return;
214*6f9cba8fSJoseph Mingrone 	}
215*6f9cba8fSJoseph Mingrone 	free(utf_16_errbuf);
216*6f9cba8fSJoseph Mingrone }
217*6f9cba8fSJoseph Mingrone #endif
218