1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * $Id$
20 */
21
22
23 // ---------------------------------------------------------------------------
24 // Includes
25 // ---------------------------------------------------------------------------
26 #if HAVE_CONFIG_H
27 # include <config.h>
28 #endif
29
30 #include <xercesc/util/PlatformUtils.hpp>
31 #include <xercesc/util/TranscodingException.hpp>
32 #include <xercesc/util/XMLException.hpp>
33 #include <xercesc/util/XMLString.hpp>
34 #include <xercesc/util/XMLUniDefs.hpp>
35 #include <xercesc/util/XMLUni.hpp>
36 #include <xercesc/util/RefHashTableOf.hpp>
37 #include "Win32TransService.hpp"
38
39 XERCES_CPP_NAMESPACE_BEGIN
40
41
42 // ---------------------------------------------------------------------------
43 // Local, const data
44 // ---------------------------------------------------------------------------
45 static const XMLCh gMyServiceId[] =
46 {
47 chLatin_W, chLatin_i, chLatin_n, chDigit_3, chDigit_2, chNull
48 };
49
50
51 #if !HAVE_WCSUPR
_wcsupr(LPWSTR str)52 void _wcsupr(LPWSTR str)
53 {
54 int nLen=XMLString::stringLen(str);
55 ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, str, nLen, str, nLen);
56 }
57 #endif
58
59 #if !HAVE_WCSLWR
_wcslwr(LPWSTR str)60 void _wcslwr(LPWSTR str)
61 {
62 int nLen=XMLString::stringLen(str);
63 ::LCMapStringW( GetThreadLocale(), LCMAP_LOWERCASE, str, nLen, str, nLen);
64 }
65 #endif
66
67 #if !HAVE_WCSNICMP
_wcsnicmp(LPCWSTR comp1,LPCWSTR comp2,unsigned int nLen)68 int _wcsnicmp(LPCWSTR comp1, LPCWSTR comp2, unsigned int nLen)
69 {
70 unsigned int len = XMLString::stringLen( comp1);
71 unsigned int otherLen = XMLString::stringLen( comp2);
72 unsigned int countChar = 0;
73 unsigned int maxChars;
74 int theResult = 0;
75
76 // Determine at what string index the comparison stops.
77 len = ( len > nLen ) ? nLen : len;
78 otherLen = ( otherLen > nLen ) ? nLen : otherLen;
79 maxChars = ( len > otherLen ) ? otherLen : len;
80
81 // Handle situation when one argument or the other is NULL
82 // by returning +/- string length of non-NULL argument (inferred
83 // from XMLString::CompareNString).
84
85 // Obs. Definition of stringLen(XMLCh*) implies NULL ptr and ptr
86 // to Empty String are equivalent. It handles NULL args, BTW.
87
88 if ( !comp1 )
89 {
90 // Negative because null ptr (c1) less than string (c2).
91 return ( 0 - otherLen );
92 }
93
94 if ( !comp2 )
95 {
96 // Positive because string (c1) still greater than null ptr (c2).
97 return len;
98 }
99
100 // Copy const parameter strings (plus terminating nul) into locals.
101 XMLCh* firstBuf = (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate( (++len) * sizeof(XMLCh) );//new XMLCh[ ++len];
102 XMLCh* secondBuf = (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate( (++otherLen) * sizeof(XMLCh) );//new XMLCh[ ++otherLen];
103 memcpy( firstBuf, comp1, len * sizeof(XMLCh));
104 memcpy( secondBuf, comp2, otherLen * sizeof(XMLCh));
105
106 // Then uppercase both strings, losing their case info.
107 ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, (LPWSTR)firstBuf, len, (LPWSTR)firstBuf, len);
108 ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, (LPWSTR)secondBuf, otherLen, (LPWSTR)secondBuf, otherLen);
109
110 // Strings are equal until proven otherwise.
111 while ( ( countChar < maxChars ) && ( !theResult ) )
112 {
113 theResult = (int)(firstBuf[countChar]) - (int)(secondBuf[countChar]);
114 ++countChar;
115 }
116
117 XMLPlatformUtils::fgMemoryManager->deallocate(firstBuf);//delete [] firstBuf;
118 XMLPlatformUtils::fgMemoryManager->deallocate(secondBuf);//delete [] secondBuf;
119
120 return theResult;
121 }
122 #endif
123
124 #if !HAVE_WCSICMP
_wcsicmp(LPCWSTR comp1,LPCWSTR comp2)125 int _wcsicmp(LPCWSTR comp1, LPCWSTR comp2)
126 {
127 unsigned int len = XMLString::stringLen( comp1);
128 unsigned int otherLen = XMLString::stringLen( comp2);
129 // Must compare terminating NUL to return difference if one string is shorter than the other.
130 unsigned int maxChars = ( len > otherLen ) ? otherLen : len;
131 return _wcsnicmp(comp1, comp2, maxChars+1);
132 }
133 #endif
134
135
xmlch_wcsupr(XMLCh * str)136 static inline void xmlch_wcsupr(XMLCh* str)
137 {
138 _wcsupr(reinterpret_cast<LPWSTR>(str));
139 }
140
xmlch_wcsicmp(const XMLCh * comp1,const XMLCh * comp2)141 static inline int xmlch_wcsicmp(const XMLCh* comp1, const XMLCh* comp2)
142 {
143 return _wcsicmp(reinterpret_cast<LPCWSTR>(comp1), reinterpret_cast<LPCWSTR>(comp2));
144 }
145
xmlch_wcsnicmp(const XMLCh * comp1,const XMLCh * comp2,const XMLSize_t maxChars)146 static inline int xmlch_wcsnicmp(const XMLCh* comp1, const XMLCh* comp2, const XMLSize_t maxChars)
147 {
148 return _wcsnicmp(reinterpret_cast<LPCWSTR>(comp1), reinterpret_cast<LPCWSTR>(comp2), maxChars);
149 }
150
xmlch_wcslwr(XMLCh * str)151 static inline void xmlch_wcslwr(XMLCh* str)
152 {
153 _wcslwr(reinterpret_cast<LPWSTR>(str));
154 }
155
156
157 // it's a local function (instead of a static function) so that we are not
158 // forced to include <windows.h> in the header
isAlias(const HKEY encodingKey,char * const aliasBuf=0,const unsigned int nameBufSz=0)159 bool isAlias(const HKEY encodingKey
160 , char* const aliasBuf = 0
161 , const unsigned int nameBufSz = 0)
162 {
163 DWORD theType;
164 DWORD theSize = nameBufSz;
165 return (::RegQueryValueExA
166 (
167 encodingKey
168 , "AliasForCharset"
169 , 0
170 , &theType
171 , (LPBYTE)aliasBuf
172 , &theSize
173 ) == ERROR_SUCCESS);
174 }
175
176 // ---------------------------------------------------------------------------
177 // This is the simple CPMapEntry class. It just contains an encoding name
178 // and a code page for that encoding.
179 // ---------------------------------------------------------------------------
180 class CPMapEntry : public XMemory
181 {
182 public :
183 // -----------------------------------------------------------------------
184 // Constructors and Destructor
185 // -----------------------------------------------------------------------
186 CPMapEntry
187 (
188 const XMLCh* const encodingName
189 , const unsigned int ieId
190 , MemoryManager* manager
191 );
192
193 CPMapEntry
194 (
195 const char* const encodingName
196 , const unsigned int ieId
197 , MemoryManager* manager
198 );
199
200 ~CPMapEntry();
201
202
203 // -----------------------------------------------------------------------
204 // Getter methods
205 // -----------------------------------------------------------------------
206 const XMLCh* getEncodingName() const;
207 const XMLCh* getKey() const;
208 unsigned int getIEEncoding() const;
209
210
211 private :
212 // -----------------------------------------------------------------------
213 // Unimplemented constructors and operators
214 // -----------------------------------------------------------------------
215 CPMapEntry();
216 CPMapEntry(const CPMapEntry&);
217 CPMapEntry& operator=(const CPMapEntry&);
218
219
220 // -----------------------------------------------------------------------
221 // Private data members
222 //
223 // fEncodingName
224 // This is the encoding name for the code page that this instance
225 // represents.
226 //
227 // fIEId
228 // This is the code page id.
229 // -----------------------------------------------------------------------
230 XMLCh* fEncodingName;
231 unsigned int fIEId;
232 MemoryManager* fManager;
233 };
234
235 // ---------------------------------------------------------------------------
236 // CPMapEntry: Constructors and Destructor
237 // ---------------------------------------------------------------------------
CPMapEntry(const char * const encodingName,const unsigned int ieId,MemoryManager * manager)238 CPMapEntry::CPMapEntry( const char* const encodingName
239 , const unsigned int ieId
240 , MemoryManager* manager) :
241 fEncodingName(0)
242 , fIEId(ieId)
243 , fManager(manager)
244 {
245 // Transcode the name to Unicode and store that copy
246 int targetLen=::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, encodingName, -1, NULL, 0);
247 if(targetLen!=0)
248 {
249 fEncodingName = (XMLCh*) fManager->allocate
250 (
251 (targetLen + 1) * sizeof(XMLCh)
252 );//new XMLCh[targetLen + 1];
253 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, encodingName, -1, (LPWSTR)fEncodingName, targetLen);
254 fEncodingName[targetLen] = 0;
255
256 //
257 // Upper case it because we are using a hash table and need to be
258 // sure that we find all case combinations.
259 //
260 xmlch_wcsupr(fEncodingName);
261 }
262 }
263
CPMapEntry(const XMLCh * const encodingName,const unsigned int ieId,MemoryManager * manager)264 CPMapEntry::CPMapEntry( const XMLCh* const encodingName
265 , const unsigned int ieId
266 , MemoryManager* manager) :
267
268 fEncodingName(0)
269 , fIEId(ieId)
270 , fManager(manager)
271 {
272 fEncodingName = XMLString::replicate(encodingName, fManager);
273
274 //
275 // Upper case it because we are using a hash table and need to be
276 // sure that we find all case combinations.
277 //
278 xmlch_wcsupr(fEncodingName);
279 }
280
~CPMapEntry()281 CPMapEntry::~CPMapEntry()
282 {
283 fManager->deallocate(fEncodingName);//delete [] fEncodingName;
284 }
285
286
287 // ---------------------------------------------------------------------------
288 // CPMapEntry: Getter methods
289 // ---------------------------------------------------------------------------
getEncodingName() const290 const XMLCh* CPMapEntry::getEncodingName() const
291 {
292 return fEncodingName;
293 }
294
getIEEncoding() const295 unsigned int CPMapEntry::getIEEncoding() const
296 {
297 return fIEId;
298 }
299
300
301 static bool onXPOrLater = false;
302
303
304 //---------------------------------------------------------------------------
305 //
306 // class Win32TransService Implementation ...
307 //
308 //---------------------------------------------------------------------------
309
310
311 // ---------------------------------------------------------------------------
312 // Win32TransService: Constructors and Destructor
313 // ---------------------------------------------------------------------------
Win32TransService(MemoryManager * manager)314 Win32TransService::Win32TransService(MemoryManager* manager) :
315 fCPMap(NULL)
316 , fManager(manager)
317 {
318 // Figure out if we are on XP or later and save that flag for later use.
319 // We need this because of certain code page conversion calls.
320 OSVERSIONINFO OSVer;
321 OSVer.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
322 ::GetVersionEx(&OSVer);
323
324 if ((OSVer.dwPlatformId == VER_PLATFORM_WIN32_NT) &&
325 (OSVer.dwMajorVersion > 5 || (OSVer.dwMajorVersion == 5 && OSVer.dwMinorVersion > 0)))
326 {
327 onXPOrLater = true;
328 }
329
330 fCPMap = new RefHashTableOf<CPMapEntry>(109);
331
332 //
333 // Open up the registry key that contains the info we want. Note that,
334 // if this key does not exist, then we just return. It will just mean
335 // that we don't have any support except for intrinsic encodings supported
336 // by the parser itself (and the LCP support of course.
337 //
338 HKEY charsetKey;
339 if (::RegOpenKeyExA
340 (
341 HKEY_CLASSES_ROOT
342 , "MIME\\Database\\Charset"
343 , 0
344 , KEY_READ
345 , &charsetKey))
346 {
347 return;
348 }
349
350 //
351 // Read in the registry keys that hold the code page ids. Skip for now
352 // those entries which indicate that they are aliases for some other
353 // encodings. We'll come back and do a second round for those and look
354 // up the original name and get the code page id.
355 //
356 // Note that we have to use A versions here so that this will run on
357 // 98, and transcode the strings to Unicode.
358 //
359 const unsigned int nameBufSz = 1024;
360 char nameBuf[nameBufSz + 1];
361 DWORD subIndex;
362 DWORD theSize;
363 for (subIndex = 0;;++subIndex)
364 {
365 // Get the name of the next key
366 theSize = nameBufSz;
367 if (::RegEnumKeyExA
368 (
369 charsetKey
370 , subIndex
371 , nameBuf
372 , &theSize
373 , 0, 0, 0, 0) == ERROR_NO_MORE_ITEMS)
374 {
375 break;
376 }
377
378 // Open this subkey
379 HKEY encodingKey;
380 if (::RegOpenKeyExA
381 (
382 charsetKey
383 , nameBuf
384 , 0
385 , KEY_READ
386 , &encodingKey))
387 {
388 continue;
389 }
390
391 //
392 // Let's see if its an alias. If so, then ignore it in this first
393 // loop. Else, we'll add a new entry for this one.
394 //
395 if (!isAlias(encodingKey))
396 {
397 //
398 // Lets get the two values out of this key that we are
399 // interested in. There should be a code page entry and an
400 // IE entry.
401 //
402 // The Codepage entry is the default code page for a computer using that charset
403 // while the InternetEncoding holds the code page that represents that charset
404 //
405 DWORD theType;
406 unsigned int CPId;
407 unsigned int IEId;
408
409 theSize = sizeof(unsigned int);
410 if (::RegQueryValueExA
411 (
412 encodingKey
413 , "Codepage"
414 , 0
415 , &theType
416 , (LPBYTE)&CPId
417 , &theSize) != ERROR_SUCCESS)
418 {
419 ::RegCloseKey(encodingKey);
420 continue;
421 }
422
423 //
424 // If this is not a valid Id, and it might not be because its
425 // not loaded on this system, then don't take it.
426 //
427 if (::IsValidCodePage(CPId))
428 {
429 theSize = sizeof(unsigned int);
430 if (::RegQueryValueExA
431 (
432 encodingKey
433 , "InternetEncoding"
434 , 0
435 , &theType
436 , (LPBYTE)&IEId
437 , &theSize) != ERROR_SUCCESS)
438 {
439 ::RegCloseKey(encodingKey);
440 continue;
441 }
442
443 CPMapEntry* newEntry = new (fManager) CPMapEntry(nameBuf, IEId, fManager);
444 fCPMap->put((void*)newEntry->getEncodingName(), newEntry);
445 }
446 }
447
448 // And close the subkey handle
449 ::RegCloseKey(encodingKey);
450 }
451
452 //
453 // Now loop one more time and this time we do just the aliases. For
454 // each one we find, we look up that name in the map we've already
455 // built and add a new entry with this new name and the same id
456 // values we stored for the original.
457 //
458 char aliasBuf[nameBufSz + 1];
459 for (subIndex = 0;;++subIndex)
460 {
461 // Get the name of the next key
462 theSize = nameBufSz;
463 if (::RegEnumKeyExA
464 (
465 charsetKey
466 , subIndex
467 , nameBuf
468 , &theSize
469 , 0, 0, 0, 0) == ERROR_NO_MORE_ITEMS)
470 {
471 break;
472 }
473
474 // Open this subkey
475 HKEY encodingKey;
476 if (::RegOpenKeyExA
477 (
478 charsetKey
479 , nameBuf
480 , 0
481 , KEY_READ
482 , &encodingKey))
483 {
484 continue;
485 }
486
487 //
488 // If it's an alias, look up the name in the map. If we find it,
489 // then construct a new one with the new name and the aliased
490 // ids.
491 //
492 if (isAlias(encodingKey, aliasBuf, nameBufSz))
493 {
494 int targetLen = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, aliasBuf, -1, NULL, 0);
495 if(targetLen!=0)
496 {
497 XMLCh* uniAlias = (XMLCh*) fManager->allocate
498 (
499 (targetLen + 1) * sizeof(XMLCh)
500 );//new XMLCh[targetLen + 1];
501 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, aliasBuf, -1, (LPWSTR)uniAlias, targetLen);
502 uniAlias[targetLen] = 0;
503 xmlch_wcsupr(uniAlias);
504
505 // Look up the alias name
506 CPMapEntry* aliasedEntry = fCPMap->get(uniAlias);
507 if (aliasedEntry)
508 {
509 int targetLen = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, nameBuf, -1, NULL, 0);
510 if(targetLen!=0)
511 {
512 XMLCh* uniName = (XMLCh*) fManager->allocate
513 (
514 (targetLen + 1) * sizeof(XMLCh)
515 );//new XMLCh[targetLen + 1];
516 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, nameBuf, -1, (LPWSTR)uniName, targetLen);
517 uniName[targetLen] = 0;
518 xmlch_wcsupr(uniName);
519
520 //
521 // If the name is actually different, then take it.
522 // Otherwise, don't take it. They map aliases that are
523 // just different case.
524 //
525 if (!XMLString::equals(uniName, aliasedEntry->getEncodingName()))
526 {
527 CPMapEntry* newEntry = new (fManager) CPMapEntry(uniName, aliasedEntry->getIEEncoding(), fManager);
528 fCPMap->put((void*)newEntry->getEncodingName(), newEntry);
529 }
530
531 fManager->deallocate(uniName);//delete [] uniName;
532 }
533 }
534 fManager->deallocate(uniAlias);//delete [] uniAlias;
535 }
536 }
537
538 // And close the subkey handle
539 ::RegCloseKey(encodingKey);
540 }
541
542 // And close the main key handle
543 ::RegCloseKey(charsetKey);
544 }
545
~Win32TransService()546 Win32TransService::~Win32TransService()
547 {
548 delete fCPMap;
549 }
550
551
552 // ---------------------------------------------------------------------------
553 // Win32TransService: The virtual transcoding service API
554 // ---------------------------------------------------------------------------
compareIString(const XMLCh * const comp1,const XMLCh * const comp2)555 int Win32TransService::compareIString( const XMLCh* const comp1
556 , const XMLCh* const comp2)
557 {
558 return xmlch_wcsicmp(comp1, comp2);
559 }
560
561
compareNIString(const XMLCh * const comp1,const XMLCh * const comp2,const XMLSize_t maxChars)562 int Win32TransService::compareNIString( const XMLCh* const comp1
563 , const XMLCh* const comp2
564 , const XMLSize_t maxChars)
565 {
566 return xmlch_wcsnicmp(comp1, comp2, maxChars);
567 }
568
569
getId() const570 const XMLCh* Win32TransService::getId() const
571 {
572 return gMyServiceId;
573 }
574
makeNewLCPTranscoder(MemoryManager * manager)575 XMLLCPTranscoder* Win32TransService::makeNewLCPTranscoder(MemoryManager* manager)
576 {
577 // Just allocate a new LCP transcoder of our type
578 return new (manager) Win32LCPTranscoder;
579 }
580
581
supportsSrcOfs() const582 bool Win32TransService::supportsSrcOfs() const
583 {
584 //
585 // Since the only mechanism we have to translate XML text in this
586 // transcoder basically require us to do work that allows us to support
587 // source offsets, we might as well do it.
588 //
589 return true;
590 }
591
592
upperCase(XMLCh * const toUpperCase)593 void Win32TransService::upperCase(XMLCh* const toUpperCase)
594 {
595 xmlch_wcsupr(toUpperCase);
596 }
597
lowerCase(XMLCh * const toLowerCase)598 void Win32TransService::lowerCase(XMLCh* const toLowerCase)
599 {
600 xmlch_wcslwr(toLowerCase);
601 }
602
603 XMLTranscoder*
makeNewXMLTranscoder(const XMLCh * const encodingName,XMLTransService::Codes & resValue,const XMLSize_t blockSize,MemoryManager * const manager)604 Win32TransService::makeNewXMLTranscoder(const XMLCh* const encodingName
605 , XMLTransService::Codes& resValue
606 , const XMLSize_t blockSize
607 , MemoryManager* const manager)
608 {
609 const XMLSize_t upLen = 1024;
610 XMLCh upEncoding[upLen + 1];
611
612 //
613 // Get an upper cased copy of the encoding name, since we use a hash
614 // table and we store them all in upper case.
615 //
616 XMLString::copyNString(upEncoding, encodingName, upLen);
617 xmlch_wcsupr(upEncoding);
618
619 // Now to try to find this guy in the CP map
620 CPMapEntry* theEntry = fCPMap->get(upEncoding);
621
622 // If not found, then return a null pointer
623 if (!theEntry)
624 {
625 resValue = XMLTransService::UnsupportedEncoding;
626 return 0;
627 }
628
629 // We found it, so return a Win32 transcoder for this encoding
630 return new (manager) Win32Transcoder
631 (
632 encodingName
633 , theEntry->getIEEncoding()
634 , blockSize
635 , manager
636 );
637 }
638
639
640
641
642
643
644
645
646 //---------------------------------------------------------------------------
647 //
648 // class Win32Transcoder Implementation ...
649 //
650 //---------------------------------------------------------------------------
651
652
653 inline DWORD
getFlagsValue(UINT idCP,DWORD desiredFlags)654 getFlagsValue(
655 UINT idCP,
656 DWORD desiredFlags)
657 {
658 if (idCP == 50220 ||
659 idCP == 50227 ||
660 (idCP >= 57002 &&
661 idCP <= 57011))
662 {
663 // These code pages do not support any
664 // flag options.
665 return 0;
666 }
667 else if (idCP == 65001)
668 {
669 // UTF-8 only supports MB_ERR_INVALID_CHARS on
670 // versions of Windows since XP
671 if (!onXPOrLater)
672 {
673 return 0;
674 }
675 else
676 {
677 return desiredFlags & MB_ERR_INVALID_CHARS ?
678 MB_ERR_INVALID_CHARS : 0;
679 }
680 }
681 else
682 {
683 return desiredFlags;
684 }
685 }
686
687
688
689 // ---------------------------------------------------------------------------
690 // Win32Transcoder: Constructors and Destructor
691 // ---------------------------------------------------------------------------
Win32Transcoder(const XMLCh * const encodingName,const unsigned int ieCP,const XMLSize_t blockSize,MemoryManager * const manager)692 Win32Transcoder::Win32Transcoder(const XMLCh* const encodingName
693 , const unsigned int ieCP
694 , const XMLSize_t blockSize
695 , MemoryManager* const manager) :
696
697 XMLTranscoder(encodingName, blockSize, manager)
698 , fIECP(ieCP)
699 , fUsedDef(FALSE)
700 , fPtrUsedDef(0)
701 , fFromFlags(getFlagsValue(ieCP, MB_PRECOMPOSED | MB_ERR_INVALID_CHARS))
702 #if defined(WC_NO_BEST_FIT_CHARS)
703 , fToFlags(getFlagsValue(ieCP, WC_COMPOSITECHECK | WC_SEPCHARS | WC_NO_BEST_FIT_CHARS))
704 #else
705 , fToFlags(getFlagsValue(ieCP, WC_COMPOSITECHECK | WC_SEPCHARS))
706 #endif
707 {
708 // Some code pages require that MultiByteToWideChar and WideCharToMultiByte
709 // be passed 0 for their second parameters (dwFlags). If that's the case,
710 // it's also necessary to pass null pointers for the last two parameters
711 // to WideCharToMultiByte. This is apparently because it's impossible to
712 // determine whether or not a substitution (replacement) character was used.
713 if (fToFlags)
714 {
715 fPtrUsedDef = &fUsedDef;
716 }
717 }
718
~Win32Transcoder()719 Win32Transcoder::~Win32Transcoder()
720 {
721 }
722
723
724 // ---------------------------------------------------------------------------
725 // Win32Transcoder: The virtual transcoder API
726 // ---------------------------------------------------------------------------
727 XMLSize_t
transcodeFrom(const XMLByte * const srcData,const XMLSize_t srcCount,XMLCh * const toFill,const XMLSize_t maxChars,XMLSize_t & bytesEaten,unsigned char * const charSizes)728 Win32Transcoder::transcodeFrom( const XMLByte* const srcData
729 , const XMLSize_t srcCount
730 , XMLCh* const toFill
731 , const XMLSize_t maxChars
732 , XMLSize_t& bytesEaten
733 , unsigned char* const charSizes)
734 {
735 // Get temp pointers to the in and out buffers, and the chars sizes one
736 XMLCh* outPtr = toFill;
737 const XMLByte* inPtr = srcData;
738 unsigned char* sizesPtr = charSizes;
739
740 // Calc end pointers for each of them
741 XMLCh* outEnd = toFill + maxChars;
742 const XMLByte* inEnd = srcData + srcCount;
743
744 //
745 // Now loop until we either get our max chars, or cannot get a whole
746 // character from the input buffer.
747 //
748 bytesEaten = 0;
749 while ((outPtr < outEnd) && (inPtr < inEnd))
750 {
751 //
752 // If we are looking at a leading byte of a multibyte sequence,
753 // then we are going to eat 2 bytes, else 1.
754 //
755 unsigned char toEat = ::IsDBCSLeadByteEx(fIECP, *inPtr) ?
756 2 : 1;
757
758 // Make sure a whole char is in the source
759 if (inPtr + toEat > inEnd)
760 break;
761
762 // Try to translate this next char and check for an error
763 const unsigned int converted = ::MultiByteToWideChar
764 (
765 fIECP
766 , fFromFlags
767 , (const char*)inPtr
768 , toEat
769 , reinterpret_cast<LPWSTR>(outPtr)
770 , 1
771 );
772
773 if (converted != 1)
774 {
775 if (toEat == 1)
776 {
777 XMLCh tmpBuf[17];
778 XMLString::binToText((unsigned int)(*inPtr), tmpBuf, 16, 16, getMemoryManager());
779 ThrowXMLwithMemMgr2
780 (
781 TranscodingException
782 , XMLExcepts::Trans_BadSrcCP
783 , tmpBuf
784 , getEncodingName()
785 , getMemoryManager()
786 );
787 }
788 else
789 {
790 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
791 }
792 }
793
794 // Update the char sizes array for this round
795 *sizesPtr++ = toEat;
796
797 // And update the bytes eaten count
798 bytesEaten += toEat;
799
800 // And update our in/out ptrs
801 inPtr += toEat;
802 outPtr++;
803 }
804
805 // Return the chars we output
806 return (outPtr - toFill);
807 }
808
809
810 XMLSize_t
transcodeTo(const XMLCh * const srcData,const XMLSize_t srcCount,XMLByte * const toFill,const XMLSize_t maxBytes,XMLSize_t & charsEaten,const UnRepOpts options)811 Win32Transcoder::transcodeTo(const XMLCh* const srcData
812 , const XMLSize_t srcCount
813 , XMLByte* const toFill
814 , const XMLSize_t maxBytes
815 , XMLSize_t& charsEaten
816 , const UnRepOpts options)
817 {
818 // Get pointers to the start and end of each buffer
819 const XMLCh* srcPtr = srcData;
820 const XMLCh* srcEnd = srcData + srcCount;
821 XMLByte* outPtr = toFill;
822 XMLByte* outEnd = toFill + maxBytes;
823
824 //
825 // Now loop until we either get our max chars, or cannot get a whole
826 // character from the input buffer.
827 //
828 // NOTE: We have to use a loop for this unfortunately because the
829 // conversion API is too dumb to tell us how many chars it converted if
830 // it couldn't do the whole source.
831 //
832 fUsedDef = FALSE;
833 while ((outPtr < outEnd) && (srcPtr < srcEnd))
834 {
835 //
836 // Do one char and see if it made it.
837 const int bytesStored = ::WideCharToMultiByte
838 (
839 fIECP
840 , fToFlags
841 , reinterpret_cast<LPCWSTR>(srcPtr)
842 , 1
843 , (char*)outPtr
844 , (int)(outEnd - outPtr)
845 , 0
846 , fPtrUsedDef
847 );
848
849 // If we didn't transcode anything, then we are done
850 if (!bytesStored)
851 break;
852
853 //
854 // If the defaault char was used and the options indicate that
855 // this isn't allowed, then throw.
856 //
857 if (fUsedDef && (options == UnRep_Throw))
858 {
859 XMLCh tmpBuf[17];
860 XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
861 ThrowXMLwithMemMgr2
862 (
863 TranscodingException
864 , XMLExcepts::Trans_Unrepresentable
865 , tmpBuf
866 , getEncodingName()
867 , getMemoryManager()
868 );
869 }
870
871 // Update our pointers
872 outPtr += bytesStored;
873 srcPtr++;
874 }
875
876 // Update the chars eaten
877 charsEaten = srcPtr - srcData;
878
879 // And return the bytes we stored
880 return outPtr - toFill;
881 }
882
883
canTranscodeTo(const unsigned int toCheck)884 bool Win32Transcoder::canTranscodeTo(const unsigned int toCheck)
885 {
886 //
887 // If the passed value is really a surrogate embedded together, then
888 // we need to break it out into its two chars. Else just one.
889 //
890 XMLCh srcBuf[2];
891 unsigned int srcCount = 1;
892 if (toCheck & 0xFFFF0000)
893 {
894 srcBuf[0] = XMLCh((toCheck >> 10) + 0xD800);
895 srcBuf[1] = XMLCh((toCheck & 0x3FF) + 0xDC00);
896 srcCount++;
897 }
898 else
899 {
900 srcBuf[0] = XMLCh(toCheck);
901 }
902
903 //
904 // Use a local temp buffer that would hold any sane multi-byte char
905 // sequence and try to transcode this guy into it.
906 //
907 char tmpBuf[64];
908
909 fUsedDef = FALSE;
910
911 const unsigned int bytesStored = ::WideCharToMultiByte
912 (
913 fIECP
914 , fToFlags
915 , reinterpret_cast<LPCWSTR>(srcBuf)
916 , srcCount
917 , tmpBuf
918 , 64
919 , 0
920 , fPtrUsedDef
921 );
922
923 if (!bytesStored || fUsedDef)
924 return false;
925
926 return true;
927 }
928
929
930
931
932 //---------------------------------------------------------------------------
933 //
934 // class Win32Transcoder Implementation ...
935 //
936 //---------------------------------------------------------------------------
937
938 // ---------------------------------------------------------------------------
939 // Win32LCPTranscoder: Constructors and Destructor
940 // ---------------------------------------------------------------------------
Win32LCPTranscoder()941 Win32LCPTranscoder::Win32LCPTranscoder()
942 {
943 }
944
~Win32LCPTranscoder()945 Win32LCPTranscoder::~Win32LCPTranscoder()
946 {
947 }
948
949
950 // ---------------------------------------------------------------------------
951 // Win32LCPTranscoder: Implementation of the virtual transcoder interface
952 // ---------------------------------------------------------------------------
calcRequiredSize(const char * const srcText,MemoryManager * const)953 XMLSize_t Win32LCPTranscoder::calcRequiredSize(const char* const srcText
954 , MemoryManager* const /*manager*/)
955 {
956 if (!srcText)
957 return 0;
958
959 return ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, srcText, -1, NULL, 0);
960 }
961
962
calcRequiredSize(const XMLCh * const srcText,MemoryManager * const)963 XMLSize_t Win32LCPTranscoder::calcRequiredSize(const XMLCh* const srcText
964 , MemoryManager* const /*manager*/)
965 {
966 if (!srcText)
967 return 0;
968
969 return ::WideCharToMultiByte(CP_ACP, 0, reinterpret_cast<LPCWSTR>(srcText), -1, NULL, 0, NULL, NULL);
970 }
971
transcode(const XMLCh * const toTranscode,MemoryManager * const manager)972 char* Win32LCPTranscoder::transcode(const XMLCh* const toTranscode,
973 MemoryManager* const manager)
974 {
975 if (!toTranscode)
976 return 0;
977
978 char* retVal = 0;
979 if (*toTranscode)
980 {
981 // Calc the needed size
982 const XMLSize_t neededLen = calcRequiredSize(toTranscode, manager);
983
984 // Allocate a buffer of that size plus one for the null and transcode
985 retVal = (char*) manager->allocate((neededLen + 1) * sizeof(char)); //new char[neededLen + 1];
986 ::WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)toTranscode, -1, retVal, (int)neededLen+1, NULL, NULL);
987
988 // And cap it off anyway just to make sure
989 retVal[neededLen] = 0;
990 }
991 else
992 {
993 retVal = (char*) manager->allocate(sizeof(char)); //new char[1];
994 retVal[0] = 0;
995 }
996 return retVal;
997 }
998
transcode(const char * const toTranscode,MemoryManager * const manager)999 XMLCh* Win32LCPTranscoder::transcode(const char* const toTranscode,
1000 MemoryManager* const manager)
1001 {
1002 if (!toTranscode)
1003 return 0;
1004
1005 XMLCh* retVal = 0;
1006 if (*toTranscode)
1007 {
1008 // Calculate the buffer size required
1009 const XMLSize_t neededLen = calcRequiredSize(toTranscode, manager);
1010 if (neededLen == 0)
1011 {
1012 retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1];
1013 retVal[0] = 0;
1014 return retVal;
1015 }
1016
1017 // Allocate a buffer of that size plus one for the null and transcode
1018 retVal = (XMLCh*) manager->allocate((neededLen + 1) * sizeof(XMLCh)); //new XMLCh[neededLen + 1];
1019 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, toTranscode, -1, (LPWSTR)retVal, (int)neededLen + 1);
1020
1021 // Cap it off just to make sure. We are so paranoid!
1022 retVal[neededLen] = 0;
1023 }
1024 else
1025 {
1026 retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1];
1027 retVal[0] = 0;
1028 }
1029 return retVal;
1030 }
1031
1032
transcode(const char * const toTranscode,XMLCh * const toFill,const XMLSize_t maxChars,MemoryManager * const)1033 bool Win32LCPTranscoder::transcode( const char* const toTranscode
1034 , XMLCh* const toFill
1035 , const XMLSize_t maxChars
1036 , MemoryManager* const /*manager*/)
1037 {
1038 // Check for a couple of psycho corner cases
1039 if (!toTranscode || !maxChars)
1040 {
1041 toFill[0] = 0;
1042 return true;
1043 }
1044
1045 if (!*toTranscode)
1046 {
1047 toFill[0] = 0;
1048 return true;
1049 }
1050
1051 // This one has a fixed size output, so try it and if it fails it fails
1052 if ( 0 == ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, toTranscode, -1, (LPWSTR)toFill, (int)(maxChars + 1)) )
1053 return false;
1054 return true;
1055 }
1056
1057
transcode(const XMLCh * const toTranscode,char * const toFill,const XMLSize_t maxBytes,MemoryManager * const)1058 bool Win32LCPTranscoder::transcode( const XMLCh* const toTranscode
1059 , char* const toFill
1060 , const XMLSize_t maxBytes
1061 , MemoryManager* const /*manager*/)
1062 {
1063 // Watch for a couple of pyscho corner cases
1064 if (!toTranscode || !maxBytes)
1065 {
1066 toFill[0] = 0;
1067 return true;
1068 }
1069
1070 if (!*toTranscode)
1071 {
1072 toFill[0] = 0;
1073 return true;
1074 }
1075
1076 // This one has a fixed size output, so try it and if it fails it fails
1077 if ( 0 == ::WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)toTranscode, -1, toFill, (int)(maxBytes + 1), NULL, NULL) )
1078 return false;
1079
1080 // Cap it off just in case
1081 toFill[maxBytes] = 0;
1082 return true;
1083 }
1084
1085
1086 XERCES_CPP_NAMESPACE_END
1087