1 #include "stdafx.h"
2 #include "MASTER.h"
3 
4 #define TECPLOTENGINEMODULE
5 
6 /*
7 ******************************************************************
8 ******************************************************************
9 *******                                                   ********
10 ******  (C) 1988-2010 Tecplot, Inc.                        *******
11 *******                                                   ********
12 ******************************************************************
13 ******************************************************************
14 */
15 #define Q_UNICODEMODULE
16 
17 #include "GLOBAL.h"
18 #include "TASSERT.h"
19 
20 #if !defined TECPLOTKERNEL
21 #include "TranslatedString.h"
22 #endif
23 
24 
25 #if defined TECPLOTKERNEL
26 /* CORE SOURCE CODE REMOVED */
27 #endif
28 
29 #include "ALLOC.h"
30 
31 #include "Q_UNICODE.h"
32 
33 using namespace std;
34 
35 namespace tecplot
36 {
37 namespace strutil
38 {
39 
40 typedef std::map<std::string, char *>      EnvStringPoolMap_t;
41 static EnvStringPoolMap_t       mapEnvStringPool;
42 
43 
44 #if defined MSWIN
45 
46 
WStringToString(wstring str)47 string WStringToString(wstring str)
48 {
49     REQUIRE("str is any wide string");
50     string Result = WideCharToUtf8(str.c_str());
51 
52     ENSURE("Result is any string");
53     return Result;
54 }
55 
StringToWString(string str)56 wstring StringToWString(string str)
57 {
58     REQUIRE("str is any string");
59 
60     wstring Result = Utf8ToWideChar(str.c_str());
61 
62     ENSURE("Result is any string");
63     return Result;
64 }
65 #endif
66 
67 /************************************************
68  * Utf8Api
69  ************************************************/
70 #define VALID_CODE_PAGE(cp) \
71   ( (cp) == 932 || (cp) == CP_UTF8 || (cp) == CP_ACP || (cp) == CP_OEMCP || (cp) == CP_THREAD_ACP )
72 
73 
74 #if defined TECPLOTKERNEL
75 /* CORE SOURCE CODE REMOVED */
76 #if defined MSWIN && !defined ENGINE
77 #endif
78 #if defined MSWIN
79 #endif
80 #endif
81 
82 
83 #if defined MSWIN
84 
85 #if defined TECPLOTKERNEL
86 /* CORE SOURCE CODE REMOVED */
87 #endif /* TECPLOTKERNEL */
88 
WideCharToMultiByte(const wchar_t * WideString,unsigned int CodePage)89 std::string  WideCharToMultiByte(const wchar_t* WideString,
90                                  unsigned int       CodePage)
91 {
92     REQUIRE(VALID_REF(WideString));
93     REQUIRE(VALID_CODE_PAGE(CodePage));
94 
95     string  strResult;
96     wstring wString(WideString);
97 
98 
99     if (wString.length() > 0)
100     {
101         size_t nLen =
102             (size_t) ::WideCharToMultiByte(CodePage,
103                                            0,
104                                            wString.c_str(),
105                                            -1,
106                                            NULL,
107                                            0,
108                                            NULL,
109                                            NULL);
110         if (nLen > 0)
111         {
112             char *pBuffer = ALLOC_ARRAY(nLen, char, "pBuffer");
113 
114             VERIFY(::WideCharToMultiByte(CodePage,
115                                          0,
116                                          WideString,
117                                          (int)(wString.length() + 1),
118                                          pBuffer,
119                                          (int)nLen,
120                                          NULL,
121                                          NULL) != 0);
122 
123             strResult = pBuffer;
124             FREE_ARRAY(pBuffer, "pBuffer");
125 
126         }
127         else
128         {
129             // this should never be an error
130             CHECK(FALSE);
131         }
132     }
133     else
134     {
135         // output 'str' remains empty
136     }
137 
138     ENSURE("strResult is a valid STL string");
139     return strResult;
140 
141 
142 }
143 
MultiByteToWideChar(const char * UTF8String,unsigned int CodePage)144 wstring MultiByteToWideChar(const char     *UTF8String,
145                             unsigned int    CodePage)
146 {
147     REQUIRE(VALID_REF(UTF8String));
148     REQUIRE(VALID_CODE_PAGE(CodePage));
149 
150     wstring strResult;
151     string  UTF8str(UTF8String);
152 
153     size_t wLen;
154 
155     if (UTF8str.length() > 0)
156     {
157         wLen =
158             (size_t) ::MultiByteToWideChar(CodePage,
159                                            0,
160                                            UTF8str.c_str(),
161                                            -1,
162                                            NULL,
163                                            0);
164         if (wLen > 0)
165         {
166             wchar_t *wBuffer = ALLOC_ARRAY(wLen + 1, wchar_t, "wBuffer");
167             VERIFY(::MultiByteToWideChar(CodePage,
168                                          0,
169                                          UTF8str.c_str(),
170                                          (int)(UTF8str.length() + 1),
171                                          wBuffer,
172                                          (int)wLen) != 0);
173 
174             strResult = wBuffer;
175             FREE_ARRAY(wBuffer, "wBuffer");
176 
177         }
178         else
179         {
180             CHECK(FALSE); // We should never get an error here
181         }
182     }
183     else
184     {
185         // strResult is left empty
186     }
187 
188     ENSURE("strResult is a valid CString");
189 
190     wstring strRet(strResult);
191     return strRet;
192 
193 }
194 #endif
195 
196 
197 
198 #if defined MSWIN
WideCharToUtf8(const wchar_t * str)199 std::string WideCharToUtf8(const wchar_t *str)
200 {
201     REQUIRE(VALID_REF(str)); /* really cannot be NULL - 2007-10-22 CAM/DTO */
202 
203 #if defined TECPLOTKERNEL
204 /* CORE SOURCE CODE REMOVED */
205 #endif
206 
207     UINT CodePage = CP_ACP;
208 
209     string Result = "";
210 
211 #if defined TECPLOTKERNEL
212 /* CORE SOURCE CODE REMOVED */
213 #endif
214 
215     Result = WideCharToMultiByte(str, CodePage);
216 
217     ENSURE("Result is any string");
218     return Result;
219 }
220 
Utf8ToWideChar(const char * str)221 wstring Utf8ToWideChar(const char *str)
222 {
223     REQUIRE(VALID_REF(str)); /* really cannot be NULL - 2007-10-22 CAM/DTO */
224 
225 #if defined TECPLOTKERNEL
226 /* CORE SOURCE CODE REMOVED */
227 #endif
228 
229     UINT CodePage = CP_ACP;
230     wstring Result;
231 
232 #if defined TECPLOTKERNEL
233 /* CORE SOURCE CODE REMOVED */
234 #endif
235 
236     Result = MultiByteToWideChar(str, CodePage);
237 
238     ENSURE("Result is any string");
239     return Result;
240 }
241 #endif
242 
243 
IsValidUtf8LeadByte(Byte_t uch)244 Boolean_t IsValidUtf8LeadByte(Byte_t uch)
245 {
246     REQUIRE("uch is any byte");
247     Boolean_t Result =
248         uch <= 0x7F                   ||
249         (uch >= 0xC0 && uch <= 0xDF) ||
250         (uch >= 0xE0 && uch <= 0xEF) ||
251         (uch >= 0xF0 && uch <= 0xF4);
252 
253     ENSURE(VALID_BOOLEAN(Result));
254     return Result;
255 }
256 
IsValidUtf8ContinuingByte(Byte_t uch)257 Boolean_t IsValidUtf8ContinuingByte(Byte_t uch)
258 {
259     REQUIRE("uch is any char");
260 
261     Boolean_t Result =
262         (uch >= 0x80 && uch <= 0xBF);
263 
264     ENSURE(VALID_BOOLEAN(Result));
265     return Result;
266 }
267 
IsValidUtf8Byte(Byte_t uch)268 Boolean_t IsValidUtf8Byte(Byte_t uch)
269 {
270     REQUIRE("uch is any char");
271     Boolean_t Result =
272         IsValidUtf8LeadByte(uch)        ||
273         IsValidUtf8ContinuingByte(uch);
274 
275     REQUIRE(VALID_BOOLEAN(Result));
276     return Result;
277 }
278 
279 /**
280  */
IsPrintable8BitAsciiChar(wchar_t wChar)281 Boolean_t IsPrintable8BitAsciiChar(wchar_t wChar)
282 {
283     return ((wChar >= static_cast<wchar_t>(33)  && wChar <= static_cast<wchar_t>(126)) ||
284             (wChar >= static_cast<wchar_t>(160) && wChar <= static_cast<wchar_t>(255)));
285 }
286 
287 
ShouldConvertWideStringToUtf8String(const wchar_t * str)288 Boolean_t ShouldConvertWideStringToUtf8String(const wchar_t *str)
289 {
290     Boolean_t Result = FALSE;
291 
292 #if defined MSWIN && defined TECPLOTKERNEL
293 /* CORE SOURCE CODE REMOVED */
294 #else
295     UNUSED(str);
296 #endif
297 
298     ENSURE(VALID_BOOLEAN(Result));
299     return Result;
300 
301 }
302 
IsValidUtf8String(const char * str)303 Boolean_t IsValidUtf8String(const char *str)
304 {
305     Boolean_t IsValid = TRUE;
306     REQUIRE(VALID_REF(str));
307 
308 #if defined MSWIN
309     size_t len                      = strlen(str);
310     Boolean_t InUtf8Sequence        = FALSE;
311     int       Utf8SequenceCount     = 0;
312     int       MaxUtf8SequenceCount  = 0;
313 
314     /* we want to process the final \0, so go to <= len */
315 
316     for (size_t ii = 0; IsValid && ii <= len; ii++)
317     {
318         Byte_t uch = (Byte_t)str[ii];
319 
320         if (uch <= 0x7F)
321         {
322             /* This must be the end of a sequence,
323                so the sequence count must match
324                the max sequence count */
325 
326             InUtf8Sequence        = FALSE;
327             IsValid               = (Utf8SequenceCount == MaxUtf8SequenceCount);
328             Utf8SequenceCount     = 0;
329             MaxUtf8SequenceCount  = 0;
330         }
331         else if (uch >= 0x80 && uch <= 0xBF)
332         {
333             /* Continuing byte in a multi byte sequence */
334             if (InUtf8Sequence)
335             {
336                 Utf8SequenceCount++;
337             }
338             else
339             {
340                 IsValid = FALSE;
341             }
342 
343         }
344         else if (uch >= 0xC0 && uch <= 0xDF)
345         {
346             /* Lead byte of 000080-0007FF */
347             IsValid               = (Utf8SequenceCount == MaxUtf8SequenceCount);
348             InUtf8Sequence        = TRUE;
349             Utf8SequenceCount     = 0;
350             MaxUtf8SequenceCount  = 1;
351         }
352         else if (uch >= 0xE0 && uch <= 0xEF)
353         {
354             /* Lead byte of 000800-00FFFF */
355             IsValid               = (Utf8SequenceCount == MaxUtf8SequenceCount);
356             InUtf8Sequence        = TRUE;
357             Utf8SequenceCount     = 0;
358             MaxUtf8SequenceCount  = 2;
359         }
360         else if (uch >= 0xF0 && uch <= 0xF4)
361         {
362             /* Lead byte of 010000-10FFFF */
363             IsValid               = (Utf8SequenceCount == MaxUtf8SequenceCount);
364             Utf8SequenceCount     = 0;
365             InUtf8Sequence        = TRUE;
366             MaxUtf8SequenceCount  = 3;
367         }
368 
369         else
370         {
371             /* Invalid Utf 8 */
372             IsValid = FALSE;
373         }
374     }
375 #endif
376 
377     ENSURE(VALID_BOOLEAN(IsValid));
378     return IsValid;
379 }
380 
381 
382 /**
383  */
IsNullOrZeroLengthString(const char * str)384 Boolean_t IsNullOrZeroLengthString(const char *str)
385 {
386     REQUIRE(VALID_REF_OR_NULL(str));
387 
388     Boolean_t Result = (str == NULL || strlen(str) == 0);
389 
390     ENSURE(VALID_BOOLEAN(Result));
391     return Result;
392 }
393 
394 /**
395  */
IsNullOrZeroLengthString(TranslatedString TS)396 Boolean_t IsNullOrZeroLengthString(TranslatedString TS)
397 {
398     REQUIRE(TS.isValid());
399     return TS.isNullOrZeroLength();
400 }
401 
402 /**
403  * Convert an ASCII character, 0..255, to a UTF-8 encoded string. This function
404  * was copied from http://www.daniweb.com/forums/thread151622.html
405  */
AsciiToUtf8String(unsigned char asciiChar)406 std::string AsciiToUtf8String(unsigned char asciiChar)
407 {
408     std::string result;
409 
410 	if (asciiChar < 128)
411 	{
412         /*
413          * if the character is less than 128 then leave it as it is since
414          * anything less than 128 is represented in binary as 0xxxxxxx
415          */
416 		result += asciiChar;
417 	}
418 	else
419 	{
420         /*
421          * If the character is 128 or above, then it is represented as
422          * 110xxxxx 10xxxxxx (2 bytes). So for getting the first byte we
423          * right shift the character 6 times  and or it with 0xC0 (11000000)
424          * i.e. asciiChar >> 6 = 000xxx, then 000xxxxx OR 11000000 = 110xxxxx.
425          * For the second byte we need the lower 6 bits, so just block the
426          * first 2 bits, i.e. (00111111 AND xxxxxxxx) OR 10000000 = 10xxxxxx
427          */
428 		result += (char)((asciiChar & 0x3F) | 0x80);
429 		result += (char)((asciiChar >> 6) | 0xC0);
430 	}
431 
432 	return result;
433 }
434 
435 }
436 }
437 
438 #if defined MSWIN && TECPLOTKERNEL && (!defined NO_ASSERTS || defined CHECKED_BUILD)
439 /* Keeping Trace out of the release builds
440    will verify for us that it has been optimized away.
441 
442    See the definition of TRACE in MASTER.h for
443    more information... */
MSWinTrace(const char * Format,...)444 void MSWinTrace(const char *Format, ...)
445 {
446     REQUIRE(VALID_REF(Format));
447 
448     const int BufferSize = 512; /* Only print the first 512 characers */
449     va_list Arguments;
450 
451     /* Don't use ALLOC_ARRAY here */
452     char *buffer = new char[BufferSize];
453     memset(buffer, 0, BufferSize);
454 
455     va_start(Arguments, Format);
456     _vsnprintf(buffer, BufferSize - 1, Format, Arguments);
457     va_end(Arguments);
458 
459     ::OutputDebugStringA(buffer);
460 
461     delete [] buffer;
462 }
463 
464 #endif
465