1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #ifndef _LOG4CXX_HELPERS_TRANSCODER_H
19 #define _LOG4CXX_HELPERS_TRANSCODER_H
20 
21 #if defined(_MSC_VER)
22 	#pragma warning ( push )
23 	#pragma warning ( disable: 4231 4251 4275 4786 )
24 #endif
25 
26 #include <log4cxx/logstring.h>
27 
28 
29 namespace log4cxx
30 {
31 namespace helpers
32 {
33 class ByteBuffer;
34 class Pool;
35 /**
36 *    Simple transcoder for converting between
37 *      external char and wchar_t strings and
38 *      internal strings.
39 *
40 */
41 class LOG4CXX_EXPORT Transcoder
42 {
43 	public:
44 
45 
46 		/**
47 		 *   Appends this specified string of UTF-8 characters to LogString.
48 		 */
49 		static void decodeUTF8(const std::string& src, LogString& dst);
50 		/**
51 		 *    Converts the LogString to a UTF-8 string.
52 		 */
53 		static void encodeUTF8(const LogString& src, std::string& dst);
54 		/**
55 		 *    Converts the LogString to a UTF-8 string.
56 		 */
57 		static char* encodeUTF8(const LogString& src, log4cxx::helpers::Pool& p);
58 		/**
59 		 *    Append UCS-4 code point to a byte buffer as UTF-8.
60 		 */
61 		static void encodeUTF8(unsigned int sv, ByteBuffer& dst);
62 		/**
63 		 *    Append UCS-4 code point to a byte buffer as UTF-16LE.
64 		 */
65 		static void encodeUTF16LE(unsigned int sv, ByteBuffer& dst);
66 		/**
67 		 *    Append UCS-4 code point to a byte buffer as UTF-16BE.
68 		 */
69 		static void encodeUTF16BE(unsigned int sv, ByteBuffer& dst);
70 
71 
72 		/**
73 		 *   Decodes next character from a UTF-8 string.
74 		 *   @param in string from which the character is extracted.
75 		 *   @param iter iterator addressing start of character, will be
76 		 *   advanced to next character if successful.
77 		 *   @return scalar value (UCS-4) or 0xFFFF if invalid sequence.
78 		 */
79 		static unsigned int decode(const std::string& in,
80 			std::string::const_iterator& iter);
81 
82 		/**
83 		  *   Appends UCS-4 value to a UTF-8 string.
84 		  *   @param ch UCS-4 value.
85 		  *   @param dst destination.
86 		  */
87 		static void encode(unsigned int ch, std::string& dst);
88 
89 		/**
90 		 *    Appends string in the current code-page
91 		 *       to a LogString.
92 		 */
93 		static void decode(const std::string& src, LogString& dst);
94 		/**
95 		 *     Appends a LogString to a string in the current
96 		 *        code-page.  Unrepresentable characters may be
97 		 *        replaced with loss characters.
98 		*/
99 		static void encode(const LogString& src, std::string& dst);
100 
101 		/**
102 		  *     Encodes the specified LogString to the current
103 		  *       character set.
104 		  *      @param src string to encode.
105 		  *      @param p pool from which to allocate return value.
106 		  *      @return pool allocated string.
107 		  */
108 		static char* encode(const LogString& src, log4cxx::helpers::Pool& p);
109 
110 
111 
112 #if LOG4CXX_WCHAR_T_API || LOG4CXX_LOGCHAR_IS_WCHAR_T || defined(WIN32) || defined(_WIN32)
113 		static void decode(const std::wstring& src, LogString& dst);
114 		static void encode(const LogString& src, std::wstring& dst);
115 		static wchar_t* wencode(const LogString& src, log4cxx::helpers::Pool& p);
116 
117 		/**
118 		 *   Decodes next character from a wstring.
119 		 *   @param in string from which the character is extracted.
120 		 *   @param iter iterator addressing start of character, will be
121 		 *   advanced to next character if successful.
122 		 *   @return scalar value (UCS-4) or 0xFFFF if invalid sequence.
123 		 */
124 		static unsigned int decode(const std::wstring& in,
125 			std::wstring::const_iterator& iter);
126 
127 		/**
128 		  *   Appends UCS-4 value to a UTF-8 string.
129 		  *   @param ch UCS-4 value.
130 		  *   @param dst destination.
131 		  */
132 		static void encode(unsigned int ch, std::wstring& dst);
133 
134 #endif
135 
136 
137 #if LOG4CXX_UNICHAR_API || LOG4CXX_CFSTRING_API || LOG4CXX_LOGCHAR_IS_UNICHAR
138 		static void decode(const std::basic_string<UniChar>& src, LogString& dst);
139 		static void encode(const LogString& src, std::basic_string<UniChar>& dst);
140 
141 		/**
142 		 *   Decodes next character from a UniChar string.
143 		 *   @param in string from which the character is extracted.
144 		 *   @param iter iterator addressing start of character, will be
145 		 *   advanced to next character if successful.
146 		 *   @return scalar value (UCS-4) or 0xFFFF if invalid sequence.
147 		 */
148 		static unsigned int decode(const std::basic_string<UniChar>& in,
149 			std::basic_string<UniChar>::const_iterator& iter);
150 
151 		/**
152 		  *   Appends UCS-4 value to a UTF-8 string.
153 		  *   @param ch UCS-4 value.
154 		  *   @param dst destination.
155 		  */
156 		static void encode(unsigned int ch, std::basic_string<UniChar>& dst);
157 
158 #endif
159 
160 #if LOG4CXX_CFSTRING_API
161 		static void decode(const CFStringRef& src, LogString& dst);
162 		static CFStringRef encode(const LogString& src);
163 #endif
164 
165 		enum { LOSSCHAR = 0x3F };
166 
167 		/**
168 		 *   Returns a logchar value given a character literal in the ASCII charset.
169 		 *   Used to implement the LOG4CXX_STR macro for EBCDIC and UNICHAR.
170 		 */
171 		static logchar decode(char v);
172 		/**
173 		 *   Returns a LogString given a string literal in the ASCII charset.
174 		 *   Used to implement the LOG4CXX_STR macro for EBCDIC and UNICHAR.
175 		 */
176 		static LogString decode(const char* v);
177 
178 		/**
179 		 *   Encodes a charset name in the default encoding
180 		 *      without using a CharsetEncoder (which could trigger recursion).
181 		 */
182 		static std::string encodeCharsetName(const LogString& charsetName);
183 
184 	private:
185 
186 	private:
187 		Transcoder();
188 		Transcoder(const Transcoder&);
189 		Transcoder& operator=(const Transcoder&);
190 		enum { BUFSIZE = 256 };
191 		static size_t encodeUTF8(unsigned int ch, char* dst);
192 		static size_t encodeUTF16BE(unsigned int ch, char* dst);
193 		static size_t encodeUTF16LE(unsigned int ch, char* dst);
194 
195 };
196 }
197 }
198 
199 #define LOG4CXX_ENCODE_CHAR(var, src) \
200 	std::string var;                      \
201 	log4cxx::helpers::Transcoder::encode(src, var)
202 
203 #define LOG4CXX_DECODE_CHAR(var, src) \
204 	log4cxx::LogString var;                      \
205 	log4cxx::helpers::Transcoder::decode(src, var)
206 
207 #define LOG4CXX_DECODE_CFSTRING(var, src) \
208 	log4cxx::LogString var;                      \
209 	log4cxx::helpers::Transcoder::decode(src, var)
210 
211 #define LOG4CXX_ENCODE_CFSTRING(var, src) \
212 	CFStringRef var = log4cxx::helpers::Transcoder::encode(src)
213 
214 
215 #if LOG4CXX_LOGCHAR_IS_WCHAR
216 
217 #define LOG4CXX_ENCODE_WCHAR(var, src) \
218 	const std::wstring& var = src
219 
220 #define LOG4CXX_DECODE_WCHAR(var, src) \
221 	const log4cxx::LogString& var = src
222 
223 #else
224 
225 #define LOG4CXX_ENCODE_WCHAR(var, src) \
226 	std::wstring var;                      \
227 	log4cxx::helpers::Transcoder::encode(src, var)
228 
229 #define LOG4CXX_DECODE_WCHAR(var, src) \
230 	log4cxx::LogString var;                      \
231 	log4cxx::helpers::Transcoder::decode(src, var)
232 
233 #endif
234 
235 #if LOG4CXX_LOGCHAR_IS_UNICHAR
236 
237 #define LOG4CXX_ENCODE_UNICHAR(var, src) \
238 	const std::basic_string<UniChar>& var = src
239 
240 #define LOG4CXX_DECODE_UNICHAR(var, src) \
241 	const log4cxx::LogString& var = src
242 
243 #else
244 
245 #define LOG4CXX_ENCODE_UNICHAR(var, src) \
246 	std::basic_string<UniChar> var;          \
247 	log4cxx::helpers::Transcoder::encode(src, var)
248 
249 #define LOG4CXX_DECODE_UNICHAR(var, src) \
250 	log4cxx::LogString var;                      \
251 	log4cxx::helpers::Transcoder::decode(src, var)
252 
253 #endif
254 
255 #if defined(_MSC_VER)
256 	#pragma warning (pop)
257 #endif
258 
259 #endif //_LOG4CXX_HELPERS_TRANSCODER_H
260