xref: /dragonfly/contrib/libpcap/charconv.c (revision f9993810)
1 /* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
2 /*
3  * Copyright (c) 1993, 1994, 1995, 1996, 1997
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by the Computer Systems
17  *	Engineering Group at Lawrence Berkeley Laboratory.
18  * 4. Neither the name of the University nor of the Laboratory may be used
19  *    to endorse or promote products derived from this software without
20  *    specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #ifdef _WIN32
36 #include <stdio.h>
37 
38 #include <pcap/pcap.h>	/* Needed for PCAP_ERRBUF_SIZE */
39 
40 #include "charconv.h"
41 
42 wchar_t *
43 cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags)
44 {
45 	int utf16le_len;
46 	wchar_t *utf16le_string;
47 
48 	/*
49 	 * Map from the specified code page to UTF-16LE.
50 	 * First, find out how big a buffer we'll need.
51 	 */
52 	utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
53 	    NULL, 0);
54 	if (utf16le_len == 0) {
55 		/*
56 		 * Error.  Fail with EINVAL.
57 		 */
58 		errno = EINVAL;
59 		return (NULL);
60 	}
61 
62 	/*
63 	 * Now attempt to allocate a buffer for that.
64 	 */
65 	utf16le_string = malloc(utf16le_len * sizeof (wchar_t));
66 	if (utf16le_string == NULL) {
67 		/*
68 		 * Not enough memory; assume errno has been
69 		 * set, and fail.
70 		 */
71 		return (NULL);
72 	}
73 
74 	/*
75 	 * Now convert.
76 	 */
77 	utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
78 	    utf16le_string, utf16le_len);
79 	if (utf16le_len == 0) {
80 		/*
81 		 * Error.  Fail with EINVAL.
82 		 * XXX - should this ever happen, given that
83 		 * we already ran the string through
84 		 * MultiByteToWideChar() to find out how big
85 		 * a buffer we needed?
86 		 */
87 		free(utf16le_string);
88 		errno = EINVAL;
89 		return (NULL);
90 	}
91 	return (utf16le_string);
92 }
93 
94 char *
95 utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string)
96 {
97 	int cp_len;
98 	char *cp_string;
99 
100 	/*
101 	 * Map from UTF-16LE to the specified code page.
102 	 * First, find out how big a buffer we'll need.
103 	 * We convert composite characters to precomposed characters,
104 	 * as that's what Windows expects.
105 	 */
106 	cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
107 	    utf16le_string, -1, NULL, 0, NULL, NULL);
108 	if (cp_len == 0) {
109 		/*
110 		 * Error.  Fail with EINVAL.
111 		 */
112 		errno = EINVAL;
113 		return (NULL);
114 	}
115 
116 	/*
117 	 * Now attempt to allocate a buffer for that.
118 	 */
119 	cp_string = malloc(cp_len * sizeof (char));
120 	if (cp_string == NULL) {
121 		/*
122 		 * Not enough memory; assume errno has been
123 		 * set, and fail.
124 		 */
125 		return (NULL);
126 	}
127 
128 	/*
129 	 * Now convert.
130 	 */
131 	cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
132 	    utf16le_string, -1, cp_string, cp_len, NULL, NULL);
133 	if (cp_len == 0) {
134 		/*
135 		 * Error.  Fail with EINVAL.
136 		 * XXX - should this ever happen, given that
137 		 * we already ran the string through
138 		 * WideCharToMultiByte() to find out how big
139 		 * a buffer we needed?
140 		 */
141 		free(cp_string);
142 		errno = EINVAL;
143 		return (NULL);
144 	}
145 	return (cp_string);
146 }
147 
148 /*
149  * Convert an error message string from UTF-8 to the local code page, as
150  * best we can.
151  *
152  * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate
153  * if it doesn't fit.
154  */
155 void
156 utf_8_to_acp_truncated(char *errbuf)
157 {
158 	wchar_t *utf_16_errbuf;
159 	int retval;
160 	DWORD err;
161 
162 	/*
163 	 * Do this by converting to UTF-16LE and then to the local
164 	 * code page.  That means we get to use Microsoft's
165 	 * conversion routines, rather than having to understand
166 	 * all the code pages ourselves, *and* that this routine
167 	 * can convert in place.
168 	 */
169 
170 	/*
171 	 * Map from UTF-8 to UTF-16LE.
172 	 * First, find out how big a buffer we'll need.
173 	 * Convert any invalid characters to REPLACEMENT CHARACTER.
174 	 */
175 	utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0);
176 	if (utf_16_errbuf == NULL) {
177 		/*
178 		 * Error.  Give up.
179 		 */
180 		snprintf(errbuf, PCAP_ERRBUF_SIZE,
181 		    "Can't convert error string to the local code page");
182 		return;
183 	}
184 
185 	/*
186 	 * Now, convert that to the local code page.
187 	 * Use the current thread's code page.  For unconvertable
188 	 * characters, let it pick the "best fit" character.
189 	 *
190 	 * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated()
191 	 * does if the buffer isn't big enough, but we don't want to have
192 	 * to handle all local code pages ourselves; doing so requires
193 	 * knowledge of all those code pages, including knowledge of how
194 	 * characters are formed in thoe code pages so that we can avoid
195 	 * cutting a multi-byte character into pieces.
196 	 *
197 	 * Converting to an un-truncated string using Windows APIs, and
198 	 * then copying to the buffer, still requires knowledge of how
199 	 * characters are formed in the target code page.
200 	 */
201 	retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1,
202 	    errbuf, PCAP_ERRBUF_SIZE, NULL, NULL);
203 	if (retval == 0) {
204 		err = GetLastError();
205 		free(utf_16_errbuf);
206 		if (err == ERROR_INSUFFICIENT_BUFFER)
207 			snprintf(errbuf, PCAP_ERRBUF_SIZE,
208 			    "The error string, in the local code page, didn't fit in the buffer");
209 		else
210 			snprintf(errbuf, PCAP_ERRBUF_SIZE,
211 			    "Can't convert error string to the local code page");
212 		return;
213 	}
214 	free(utf_16_errbuf);
215 }
216 #endif
217