1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id$
20  */
21 
22 
23 // ---------------------------------------------------------------------------
24 //  Includes
25 // ---------------------------------------------------------------------------
26 #if HAVE_CONFIG_H
27 #	include <config.h>
28 #endif
29 
30 #include <xercesc/util/PlatformUtils.hpp>
31 #include <xercesc/util/TranscodingException.hpp>
32 #include <xercesc/util/XMLException.hpp>
33 #include <xercesc/util/XMLString.hpp>
34 #include <xercesc/util/XMLUniDefs.hpp>
35 #include <xercesc/util/XMLUni.hpp>
36 #include <xercesc/util/RefHashTableOf.hpp>
37 #include "Win32TransService.hpp"
38 
39 XERCES_CPP_NAMESPACE_BEGIN
40 
41 
42 // ---------------------------------------------------------------------------
43 //  Local, const data
44 // ---------------------------------------------------------------------------
45 static const XMLCh gMyServiceId[] =
46 {
47     chLatin_W, chLatin_i, chLatin_n, chDigit_3, chDigit_2, chNull
48 };
49 
50 
51 #if !HAVE_WCSUPR
_wcsupr(LPWSTR str)52 void _wcsupr(LPWSTR str)
53 {
54     int nLen=XMLString::stringLen(str);
55     ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, str, nLen, str, nLen);
56 }
57 #endif
58 
59 #if !HAVE_WCSLWR
_wcslwr(LPWSTR str)60 void _wcslwr(LPWSTR str)
61 {
62     int nLen=XMLString::stringLen(str);
63     ::LCMapStringW( GetThreadLocale(), LCMAP_LOWERCASE, str, nLen, str, nLen);
64 }
65 #endif
66 
67 #if !HAVE_WCSNICMP
_wcsnicmp(LPCWSTR comp1,LPCWSTR comp2,unsigned int nLen)68 int _wcsnicmp(LPCWSTR comp1, LPCWSTR comp2, unsigned int nLen)
69 {
70     unsigned int len = XMLString::stringLen( comp1);
71     unsigned int otherLen = XMLString::stringLen( comp2);
72     unsigned int countChar = 0;
73     unsigned int maxChars;
74     int          theResult = 0;
75 
76     // Determine at what string index the comparison stops.
77     len = ( len > nLen ) ? nLen : len;
78     otherLen = ( otherLen > nLen ) ? nLen : otherLen;
79     maxChars = ( len > otherLen ) ? otherLen : len;
80 
81     // Handle situation when one argument or the other is NULL
82     // by returning +/- string length of non-NULL argument (inferred
83     // from XMLString::CompareNString).
84 
85     // Obs. Definition of stringLen(XMLCh*) implies NULL ptr and ptr
86     // to Empty String are equivalent.  It handles NULL args, BTW.
87 
88     if ( !comp1 )
89     {
90         // Negative because null ptr (c1) less than string (c2).
91         return ( 0 - otherLen );
92     }
93 
94     if ( !comp2 )
95     {
96         // Positive because string (c1) still greater than null ptr (c2).
97         return len;
98     }
99 
100     // Copy const parameter strings (plus terminating nul) into locals.
101     XMLCh* firstBuf = (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate( (++len) * sizeof(XMLCh) );//new XMLCh[ ++len];
102     XMLCh* secondBuf = (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate( (++otherLen) * sizeof(XMLCh) );//new XMLCh[ ++otherLen];
103     memcpy( firstBuf, comp1, len * sizeof(XMLCh));
104     memcpy( secondBuf, comp2, otherLen * sizeof(XMLCh));
105 
106     // Then uppercase both strings, losing their case info.
107     ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, (LPWSTR)firstBuf, len, (LPWSTR)firstBuf, len);
108     ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, (LPWSTR)secondBuf, otherLen, (LPWSTR)secondBuf, otherLen);
109 
110     // Strings are equal until proven otherwise.
111     while ( ( countChar < maxChars ) && ( !theResult ) )
112     {
113         theResult = (int)(firstBuf[countChar]) - (int)(secondBuf[countChar]);
114         ++countChar;
115     }
116 
117     XMLPlatformUtils::fgMemoryManager->deallocate(firstBuf);//delete [] firstBuf;
118     XMLPlatformUtils::fgMemoryManager->deallocate(secondBuf);//delete [] secondBuf;
119 
120     return theResult;
121 }
122 #endif
123 
124 #if !HAVE_WCSICMP
_wcsicmp(LPCWSTR comp1,LPCWSTR comp2)125 int _wcsicmp(LPCWSTR comp1, LPCWSTR comp2)
126 {
127     unsigned int len = XMLString::stringLen( comp1);
128     unsigned int otherLen = XMLString::stringLen( comp2);
129     // Must compare terminating NUL to return difference if one string is shorter than the other.
130     unsigned int maxChars = ( len > otherLen ) ? otherLen : len;
131     return _wcsnicmp(comp1, comp2, maxChars+1);
132 }
133 #endif
134 
135 
xmlch_wcsupr(XMLCh * str)136 static inline void xmlch_wcsupr(XMLCh* str)
137 {
138     _wcsupr(reinterpret_cast<LPWSTR>(str));
139 }
140 
xmlch_wcsicmp(const XMLCh * comp1,const XMLCh * comp2)141 static inline int xmlch_wcsicmp(const XMLCh* comp1, const XMLCh* comp2)
142 {
143     return _wcsicmp(reinterpret_cast<LPCWSTR>(comp1), reinterpret_cast<LPCWSTR>(comp2));
144 }
145 
xmlch_wcsnicmp(const XMLCh * comp1,const XMLCh * comp2,const XMLSize_t maxChars)146 static inline int xmlch_wcsnicmp(const XMLCh* comp1, const XMLCh* comp2, const XMLSize_t maxChars)
147 {
148     return _wcsnicmp(reinterpret_cast<LPCWSTR>(comp1), reinterpret_cast<LPCWSTR>(comp2), maxChars);
149 }
150 
xmlch_wcslwr(XMLCh * str)151 static inline void xmlch_wcslwr(XMLCh* str)
152 {
153     _wcslwr(reinterpret_cast<LPWSTR>(str));
154 }
155 
156 
157 // it's a local function (instead of a static function) so that we are not
158 // forced to include <windows.h> in the header
isAlias(const HKEY encodingKey,char * const aliasBuf=0,const unsigned int nameBufSz=0)159 bool isAlias(const   HKEY            encodingKey
160              ,       char* const     aliasBuf = 0
161              , const unsigned int    nameBufSz = 0)
162 {
163     DWORD theType;
164     DWORD theSize = nameBufSz;
165     return (::RegQueryValueExA
166     (
167         encodingKey
168         , "AliasForCharset"
169         , 0
170         , &theType
171         , (LPBYTE)aliasBuf
172         , &theSize
173     ) == ERROR_SUCCESS);
174 }
175 
176 // ---------------------------------------------------------------------------
177 //  This is the simple CPMapEntry class. It just contains an encoding name
178 //  and a code page for that encoding.
179 // ---------------------------------------------------------------------------
180 class CPMapEntry : public XMemory
181 {
182 public :
183     // -----------------------------------------------------------------------
184     //  Constructors and Destructor
185     // -----------------------------------------------------------------------
186     CPMapEntry
187     (
188         const   XMLCh* const    encodingName
189         , const unsigned int    ieId
190         , MemoryManager*        manager
191     );
192 
193     CPMapEntry
194     (
195         const   char* const     encodingName
196         , const unsigned int    ieId
197         , MemoryManager*        manager
198     );
199 
200     ~CPMapEntry();
201 
202 
203     // -----------------------------------------------------------------------
204     //  Getter methods
205     // -----------------------------------------------------------------------
206     const XMLCh* getEncodingName() const;
207     const XMLCh* getKey() const;
208     unsigned int getIEEncoding() const;
209 
210 
211 private :
212     // -----------------------------------------------------------------------
213     //  Unimplemented constructors and operators
214     // -----------------------------------------------------------------------
215     CPMapEntry();
216     CPMapEntry(const CPMapEntry&);
217     CPMapEntry& operator=(const CPMapEntry&);
218 
219 
220     // -----------------------------------------------------------------------
221     //  Private data members
222     //
223     //  fEncodingName
224     //      This is the encoding name for the code page that this instance
225     //      represents.
226     //
227     //  fIEId
228     //      This is the code page id.
229     // -----------------------------------------------------------------------
230     XMLCh*          fEncodingName;
231     unsigned int    fIEId;
232     MemoryManager*  fManager;
233 };
234 
235 // ---------------------------------------------------------------------------
236 //  CPMapEntry: Constructors and Destructor
237 // ---------------------------------------------------------------------------
CPMapEntry(const char * const encodingName,const unsigned int ieId,MemoryManager * manager)238 CPMapEntry::CPMapEntry( const   char* const     encodingName
239                         , const unsigned int    ieId
240                         , MemoryManager*        manager) :
241     fEncodingName(0)
242     , fIEId(ieId)
243     , fManager(manager)
244 {
245     // Transcode the name to Unicode and store that copy
246     int targetLen=::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, encodingName, -1, NULL, 0);
247     if(targetLen!=0)
248     {
249         fEncodingName = (XMLCh*) fManager->allocate
250         (
251             (targetLen + 1) * sizeof(XMLCh)
252         );//new XMLCh[targetLen + 1];
253         ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, encodingName, -1, (LPWSTR)fEncodingName, targetLen);
254         fEncodingName[targetLen] = 0;
255 
256         //
257         //  Upper case it because we are using a hash table and need to be
258         //  sure that we find all case combinations.
259         //
260         xmlch_wcsupr(fEncodingName);
261   }
262 }
263 
CPMapEntry(const XMLCh * const encodingName,const unsigned int ieId,MemoryManager * manager)264 CPMapEntry::CPMapEntry( const   XMLCh* const    encodingName
265                         , const unsigned int    ieId
266                         , MemoryManager*        manager) :
267 
268     fEncodingName(0)
269     , fIEId(ieId)
270     , fManager(manager)
271 {
272     fEncodingName = XMLString::replicate(encodingName, fManager);
273 
274     //
275     //  Upper case it because we are using a hash table and need to be
276     //  sure that we find all case combinations.
277     //
278     xmlch_wcsupr(fEncodingName);
279 }
280 
~CPMapEntry()281 CPMapEntry::~CPMapEntry()
282 {
283     fManager->deallocate(fEncodingName);//delete [] fEncodingName;
284 }
285 
286 
287 // ---------------------------------------------------------------------------
288 //  CPMapEntry: Getter methods
289 // ---------------------------------------------------------------------------
getEncodingName() const290 const XMLCh* CPMapEntry::getEncodingName() const
291 {
292     return fEncodingName;
293 }
294 
getIEEncoding() const295 unsigned int CPMapEntry::getIEEncoding() const
296 {
297     return fIEId;
298 }
299 
300 
301 static bool onXPOrLater = false;
302 
303 
304 //---------------------------------------------------------------------------
305 //
306 //  class Win32TransService Implementation ...
307 //
308 //---------------------------------------------------------------------------
309 
310 
311 // ---------------------------------------------------------------------------
312 //  Win32TransService: Constructors and Destructor
313 // ---------------------------------------------------------------------------
Win32TransService(MemoryManager * manager)314 Win32TransService::Win32TransService(MemoryManager* manager) :
315     fCPMap(NULL)
316     , fManager(manager)
317 {
318     // Figure out if we are on XP or later and save that flag for later use.
319     // We need this because of certain code page conversion calls.
320     OSVERSIONINFO   OSVer;
321     OSVer.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
322     ::GetVersionEx(&OSVer);
323 
324     if ((OSVer.dwPlatformId == VER_PLATFORM_WIN32_NT) &&
325         (OSVer.dwMajorVersion > 5 || (OSVer.dwMajorVersion == 5 && OSVer.dwMinorVersion > 0)))
326     {
327         onXPOrLater = true;
328     }
329 
330     fCPMap = new RefHashTableOf<CPMapEntry>(109);
331 
332     //
333     //  Open up the registry key that contains the info we want. Note that,
334     //  if this key does not exist, then we just return. It will just mean
335     //  that we don't have any support except for intrinsic encodings supported
336     //  by the parser itself (and the LCP support of course.
337     //
338     HKEY charsetKey;
339     if (::RegOpenKeyExA
340     (
341         HKEY_CLASSES_ROOT
342         , "MIME\\Database\\Charset"
343         , 0
344         , KEY_READ
345         , &charsetKey))
346     {
347         return;
348     }
349 
350     //
351     //  Read in the registry keys that hold the code page ids. Skip for now
352     //  those entries which indicate that they are aliases for some other
353     //  encodings. We'll come back and do a second round for those and look
354     //  up the original name and get the code page id.
355     //
356     //  Note that we have to use A versions here so that this will run on
357     //  98, and transcode the strings to Unicode.
358     //
359     const unsigned int nameBufSz = 1024;
360     char nameBuf[nameBufSz + 1];
361     DWORD subIndex;
362     DWORD theSize;
363     for (subIndex = 0;;++subIndex)
364     {
365         // Get the name of the next key
366         theSize = nameBufSz;
367         if (::RegEnumKeyExA
368         (
369             charsetKey
370             , subIndex
371             , nameBuf
372             , &theSize
373             , 0, 0, 0, 0) == ERROR_NO_MORE_ITEMS)
374         {
375             break;
376         }
377 
378         // Open this subkey
379         HKEY encodingKey;
380         if (::RegOpenKeyExA
381         (
382             charsetKey
383             , nameBuf
384             , 0
385             , KEY_READ
386             , &encodingKey))
387         {
388             continue;
389         }
390 
391         //
392         //  Let's see if its an alias. If so, then ignore it in this first
393         //  loop. Else, we'll add a new entry for this one.
394         //
395         if (!isAlias(encodingKey))
396         {
397             //
398             //  Lets get the two values out of this key that we are
399             //  interested in. There should be a code page entry and an
400             //  IE entry.
401             //
402             //  The Codepage entry is the default code page for a computer using that charset
403             //  while the InternetEncoding holds the code page that represents that charset
404             //
405             DWORD theType;
406             unsigned int CPId;
407             unsigned int IEId;
408 
409             theSize = sizeof(unsigned int);
410             if (::RegQueryValueExA
411             (
412                 encodingKey
413                 , "Codepage"
414                 , 0
415                 , &theType
416                 , (LPBYTE)&CPId
417                 , &theSize) != ERROR_SUCCESS)
418             {
419                 ::RegCloseKey(encodingKey);
420                 continue;
421             }
422 
423             //
424             //  If this is not a valid Id, and it might not be because its
425             //  not loaded on this system, then don't take it.
426             //
427             if (::IsValidCodePage(CPId))
428             {
429                 theSize = sizeof(unsigned int);
430                 if (::RegQueryValueExA
431                 (
432                     encodingKey
433                     , "InternetEncoding"
434                     , 0
435                     , &theType
436                     , (LPBYTE)&IEId
437                     , &theSize) != ERROR_SUCCESS)
438                 {
439                     ::RegCloseKey(encodingKey);
440                     continue;
441                 }
442 
443                 CPMapEntry* newEntry = new (fManager) CPMapEntry(nameBuf, IEId, fManager);
444                 fCPMap->put((void*)newEntry->getEncodingName(), newEntry);
445             }
446         }
447 
448         // And close the subkey handle
449         ::RegCloseKey(encodingKey);
450     }
451 
452     //
453     //  Now loop one more time and this time we do just the aliases. For
454     //  each one we find, we look up that name in the map we've already
455     //  built and add a new entry with this new name and the same id
456     //  values we stored for the original.
457     //
458     char aliasBuf[nameBufSz + 1];
459     for (subIndex = 0;;++subIndex)
460     {
461         // Get the name of the next key
462         theSize = nameBufSz;
463         if (::RegEnumKeyExA
464         (
465             charsetKey
466             , subIndex
467             , nameBuf
468             , &theSize
469             , 0, 0, 0, 0) == ERROR_NO_MORE_ITEMS)
470         {
471             break;
472         }
473 
474         // Open this subkey
475         HKEY encodingKey;
476         if (::RegOpenKeyExA
477         (
478             charsetKey
479             , nameBuf
480             , 0
481             , KEY_READ
482             , &encodingKey))
483         {
484             continue;
485         }
486 
487         //
488         //  If it's an alias, look up the name in the map. If we find it,
489         //  then construct a new one with the new name and the aliased
490         //  ids.
491         //
492         if (isAlias(encodingKey, aliasBuf, nameBufSz))
493         {
494             int targetLen = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, aliasBuf, -1, NULL, 0);
495             if(targetLen!=0)
496             {
497                 XMLCh* uniAlias = (XMLCh*) fManager->allocate
498                 (
499                     (targetLen + 1) * sizeof(XMLCh)
500                 );//new XMLCh[targetLen + 1];
501                 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, aliasBuf, -1, (LPWSTR)uniAlias, targetLen);
502                 uniAlias[targetLen] = 0;
503                 xmlch_wcsupr(uniAlias);
504 
505                 // Look up the alias name
506                 CPMapEntry* aliasedEntry = fCPMap->get(uniAlias);
507                 if (aliasedEntry)
508                 {
509                     int targetLen = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, nameBuf, -1, NULL, 0);
510                     if(targetLen!=0)
511                     {
512                         XMLCh* uniName = (XMLCh*) fManager->allocate
513                         (
514                             (targetLen + 1) * sizeof(XMLCh)
515                         );//new XMLCh[targetLen + 1];
516                         ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, nameBuf, -1, (LPWSTR)uniName, targetLen);
517                         uniName[targetLen] = 0;
518                         xmlch_wcsupr(uniName);
519 
520                         //
521                         //  If the name is actually different, then take it.
522                         //  Otherwise, don't take it. They map aliases that are
523                         //  just different case.
524                         //
525 						if (!XMLString::equals(uniName, aliasedEntry->getEncodingName()))
526                         {
527                             CPMapEntry* newEntry = new (fManager) CPMapEntry(uniName, aliasedEntry->getIEEncoding(), fManager);
528                             fCPMap->put((void*)newEntry->getEncodingName(), newEntry);
529                         }
530 
531                         fManager->deallocate(uniName);//delete [] uniName;
532                     }
533                 }
534                 fManager->deallocate(uniAlias);//delete [] uniAlias;
535             }
536         }
537 
538         // And close the subkey handle
539         ::RegCloseKey(encodingKey);
540     }
541 
542     // And close the main key handle
543     ::RegCloseKey(charsetKey);
544 }
545 
~Win32TransService()546 Win32TransService::~Win32TransService()
547 {
548     delete fCPMap;
549 }
550 
551 
552 // ---------------------------------------------------------------------------
553 //  Win32TransService: The virtual transcoding service API
554 // ---------------------------------------------------------------------------
compareIString(const XMLCh * const comp1,const XMLCh * const comp2)555 int Win32TransService::compareIString(  const   XMLCh* const    comp1
556                                         , const XMLCh* const    comp2)
557 {
558     return xmlch_wcsicmp(comp1, comp2);
559 }
560 
561 
compareNIString(const XMLCh * const comp1,const XMLCh * const comp2,const XMLSize_t maxChars)562 int Win32TransService::compareNIString( const   XMLCh* const    comp1
563                                         , const XMLCh* const    comp2
564                                         , const XMLSize_t       maxChars)
565 {
566     return xmlch_wcsnicmp(comp1, comp2, maxChars);
567 }
568 
569 
getId() const570 const XMLCh* Win32TransService::getId() const
571 {
572     return gMyServiceId;
573 }
574 
makeNewLCPTranscoder(MemoryManager * manager)575 XMLLCPTranscoder* Win32TransService::makeNewLCPTranscoder(MemoryManager* manager)
576 {
577     // Just allocate a new LCP transcoder of our type
578     return new (manager) Win32LCPTranscoder;
579 }
580 
581 
supportsSrcOfs() const582 bool Win32TransService::supportsSrcOfs() const
583 {
584     //
585     //  Since the only mechanism we have to translate XML text in this
586     //  transcoder basically require us to do work that allows us to support
587     //  source offsets, we might as well do it.
588     //
589     return true;
590 }
591 
592 
upperCase(XMLCh * const toUpperCase)593 void Win32TransService::upperCase(XMLCh* const toUpperCase)
594 {
595     xmlch_wcsupr(toUpperCase);
596 }
597 
lowerCase(XMLCh * const toLowerCase)598 void Win32TransService::lowerCase(XMLCh* const toLowerCase)
599 {
600     xmlch_wcslwr(toLowerCase);
601 }
602 
603 XMLTranscoder*
makeNewXMLTranscoder(const XMLCh * const encodingName,XMLTransService::Codes & resValue,const XMLSize_t blockSize,MemoryManager * const manager)604 Win32TransService::makeNewXMLTranscoder(const   XMLCh* const            encodingName
605                                         ,       XMLTransService::Codes& resValue
606                                         , const XMLSize_t               blockSize
607                                         ,       MemoryManager* const    manager)
608 {
609     const XMLSize_t upLen = 1024;
610     XMLCh upEncoding[upLen + 1];
611 
612     //
613     //  Get an upper cased copy of the encoding name, since we use a hash
614     //  table and we store them all in upper case.
615     //
616     XMLString::copyNString(upEncoding, encodingName, upLen);
617     xmlch_wcsupr(upEncoding);
618 
619     // Now to try to find this guy in the CP map
620     CPMapEntry* theEntry = fCPMap->get(upEncoding);
621 
622     // If not found, then return a null pointer
623     if (!theEntry)
624     {
625         resValue = XMLTransService::UnsupportedEncoding;
626         return 0;
627     }
628 
629     // We found it, so return a Win32 transcoder for this encoding
630     return new (manager) Win32Transcoder
631     (
632         encodingName
633         , theEntry->getIEEncoding()
634         , blockSize
635         , manager
636     );
637 }
638 
639 
640 
641 
642 
643 
644 
645 
646 //---------------------------------------------------------------------------
647 //
648 //  class Win32Transcoder Implementation ...
649 //
650 //---------------------------------------------------------------------------
651 
652 
653 inline DWORD
getFlagsValue(UINT idCP,DWORD desiredFlags)654 getFlagsValue(
655             UINT    idCP,
656             DWORD   desiredFlags)
657 {
658     if (idCP == 50220 ||
659         idCP == 50227 ||
660         (idCP >= 57002 &&
661          idCP <= 57011))
662     {
663         // These code pages do not support any
664         // flag options.
665         return 0;
666     }
667     else if (idCP == 65001)
668     {
669         // UTF-8 only supports MB_ERR_INVALID_CHARS on
670         // versions of Windows since XP
671         if (!onXPOrLater)
672         {
673             return 0;
674         }
675         else
676         {
677             return desiredFlags & MB_ERR_INVALID_CHARS ?
678                         MB_ERR_INVALID_CHARS : 0;
679         }
680     }
681     else
682     {
683         return desiredFlags;
684     }
685 }
686 
687 
688 
689 // ---------------------------------------------------------------------------
690 //  Win32Transcoder: Constructors and Destructor
691 // ---------------------------------------------------------------------------
Win32Transcoder(const XMLCh * const encodingName,const unsigned int ieCP,const XMLSize_t blockSize,MemoryManager * const manager)692 Win32Transcoder::Win32Transcoder(const  XMLCh* const   encodingName
693                                 , const unsigned int   ieCP
694                                 , const XMLSize_t      blockSize
695                                 , MemoryManager* const manager) :
696 
697     XMLTranscoder(encodingName, blockSize, manager)
698     , fIECP(ieCP)
699     , fUsedDef(FALSE)
700     , fPtrUsedDef(0)
701     , fFromFlags(getFlagsValue(ieCP, MB_PRECOMPOSED | MB_ERR_INVALID_CHARS))
702 #if defined(WC_NO_BEST_FIT_CHARS)
703     , fToFlags(getFlagsValue(ieCP, WC_COMPOSITECHECK | WC_SEPCHARS | WC_NO_BEST_FIT_CHARS))
704 #else
705     , fToFlags(getFlagsValue(ieCP, WC_COMPOSITECHECK | WC_SEPCHARS))
706 #endif
707 {
708     // Some code pages require that MultiByteToWideChar and WideCharToMultiByte
709     // be passed 0 for their second parameters (dwFlags).  If that's the case,
710     // it's also necessary to pass null pointers for the last two parameters
711     // to WideCharToMultiByte.  This is apparently because it's impossible to
712     // determine whether or not a substitution (replacement) character was used.
713     if (fToFlags)
714     {
715         fPtrUsedDef = &fUsedDef;
716     }
717 }
718 
~Win32Transcoder()719 Win32Transcoder::~Win32Transcoder()
720 {
721 }
722 
723 
724 // ---------------------------------------------------------------------------
725 //  Win32Transcoder: The virtual transcoder API
726 // ---------------------------------------------------------------------------
727 XMLSize_t
transcodeFrom(const XMLByte * const srcData,const XMLSize_t srcCount,XMLCh * const toFill,const XMLSize_t maxChars,XMLSize_t & bytesEaten,unsigned char * const charSizes)728 Win32Transcoder::transcodeFrom( const   XMLByte* const      srcData
729                                 , const XMLSize_t           srcCount
730                                 ,       XMLCh* const        toFill
731                                 , const XMLSize_t           maxChars
732                                 ,       XMLSize_t&          bytesEaten
733                                 ,       unsigned char* const charSizes)
734 {
735     // Get temp pointers to the in and out buffers, and the chars sizes one
736     XMLCh*          outPtr = toFill;
737     const XMLByte*  inPtr  = srcData;
738     unsigned char*  sizesPtr = charSizes;
739 
740     // Calc end pointers for each of them
741     XMLCh*          outEnd = toFill + maxChars;
742     const XMLByte*  inEnd  = srcData + srcCount;
743 
744     //
745     //  Now loop until we either get our max chars, or cannot get a whole
746     //  character from the input buffer.
747     //
748     bytesEaten = 0;
749     while ((outPtr < outEnd) && (inPtr < inEnd))
750     {
751         //
752         //  If we are looking at a leading byte of a multibyte sequence,
753         //  then we are going to eat 2 bytes, else 1.
754         //
755         unsigned char toEat = ::IsDBCSLeadByteEx(fIECP, *inPtr) ?
756                                     2 : 1;
757 
758         // Make sure a whole char is in the source
759         if (inPtr + toEat > inEnd)
760             break;
761 
762         // Try to translate this next char and check for an error
763         const unsigned int converted = ::MultiByteToWideChar
764         (
765             fIECP
766             , fFromFlags
767             , (const char*)inPtr
768             , toEat
769             , reinterpret_cast<LPWSTR>(outPtr)
770             , 1
771         );
772 
773         if (converted != 1)
774         {
775             if (toEat == 1)
776             {
777                 XMLCh tmpBuf[17];
778                 XMLString::binToText((unsigned int)(*inPtr), tmpBuf, 16, 16, getMemoryManager());
779                 ThrowXMLwithMemMgr2
780                 (
781                     TranscodingException
782                     , XMLExcepts::Trans_BadSrcCP
783                     , tmpBuf
784                     , getEncodingName()
785                     , getMemoryManager()
786                 );
787             }
788              else
789             {
790                 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
791             }
792         }
793 
794         // Update the char sizes array for this round
795         *sizesPtr++ = toEat;
796 
797         // And update the bytes eaten count
798         bytesEaten += toEat;
799 
800         // And update our in/out ptrs
801         inPtr += toEat;
802         outPtr++;
803     }
804 
805     // Return the chars we output
806     return (outPtr - toFill);
807 }
808 
809 
810 XMLSize_t
transcodeTo(const XMLCh * const srcData,const XMLSize_t srcCount,XMLByte * const toFill,const XMLSize_t maxBytes,XMLSize_t & charsEaten,const UnRepOpts options)811 Win32Transcoder::transcodeTo(const  XMLCh* const    srcData
812                             , const XMLSize_t       srcCount
813                             ,       XMLByte* const  toFill
814                             , const XMLSize_t       maxBytes
815                             ,       XMLSize_t&      charsEaten
816                             , const UnRepOpts       options)
817 {
818     // Get pointers to the start and end of each buffer
819     const XMLCh*    srcPtr = srcData;
820     const XMLCh*    srcEnd = srcData + srcCount;
821     XMLByte*        outPtr = toFill;
822     XMLByte*        outEnd = toFill + maxBytes;
823 
824     //
825     //  Now loop until we either get our max chars, or cannot get a whole
826     //  character from the input buffer.
827     //
828     //  NOTE: We have to use a loop for this unfortunately because the
829     //  conversion API is too dumb to tell us how many chars it converted if
830     //  it couldn't do the whole source.
831     //
832     fUsedDef = FALSE;
833     while ((outPtr < outEnd) && (srcPtr < srcEnd))
834     {
835         //
836         //  Do one char and see if it made it.
837         const int bytesStored = ::WideCharToMultiByte
838         (
839             fIECP
840             , fToFlags
841             , reinterpret_cast<LPCWSTR>(srcPtr)
842             , 1
843             , (char*)outPtr
844             , (int)(outEnd - outPtr)
845             , 0
846             , fPtrUsedDef
847         );
848 
849         // If we didn't transcode anything, then we are done
850         if (!bytesStored)
851             break;
852 
853         //
854         //  If the defaault char was used and the options indicate that
855         //  this isn't allowed, then throw.
856         //
857         if (fUsedDef && (options == UnRep_Throw))
858         {
859             XMLCh tmpBuf[17];
860             XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
861             ThrowXMLwithMemMgr2
862             (
863                 TranscodingException
864                 , XMLExcepts::Trans_Unrepresentable
865                 , tmpBuf
866                 , getEncodingName()
867                 , getMemoryManager()
868             );
869         }
870 
871         // Update our pointers
872         outPtr += bytesStored;
873         srcPtr++;
874     }
875 
876     // Update the chars eaten
877     charsEaten = srcPtr - srcData;
878 
879     // And return the bytes we stored
880     return outPtr - toFill;
881 }
882 
883 
canTranscodeTo(const unsigned int toCheck)884 bool Win32Transcoder::canTranscodeTo(const unsigned int toCheck)
885 {
886     //
887     //  If the passed value is really a surrogate embedded together, then
888     //  we need to break it out into its two chars. Else just one.
889     //
890     XMLCh           srcBuf[2];
891     unsigned int    srcCount = 1;
892     if (toCheck & 0xFFFF0000)
893     {
894         srcBuf[0] = XMLCh((toCheck >> 10) + 0xD800);
895         srcBuf[1] = XMLCh((toCheck & 0x3FF) + 0xDC00);
896         srcCount++;
897     }
898      else
899     {
900         srcBuf[0] = XMLCh(toCheck);
901     }
902 
903     //
904     //  Use a local temp buffer that would hold any sane multi-byte char
905     //  sequence and try to transcode this guy into it.
906     //
907     char tmpBuf[64];
908 
909     fUsedDef = FALSE;
910 
911     const unsigned int bytesStored = ::WideCharToMultiByte
912     (
913         fIECP
914         , fToFlags
915         , reinterpret_cast<LPCWSTR>(srcBuf)
916         , srcCount
917         , tmpBuf
918         , 64
919         , 0
920         , fPtrUsedDef
921     );
922 
923     if (!bytesStored || fUsedDef)
924         return false;
925 
926     return true;
927 }
928 
929 
930 
931 
932 //---------------------------------------------------------------------------
933 //
934 //  class Win32Transcoder Implementation ...
935 //
936 //---------------------------------------------------------------------------
937 
938 // ---------------------------------------------------------------------------
939 //  Win32LCPTranscoder: Constructors and Destructor
940 // ---------------------------------------------------------------------------
Win32LCPTranscoder()941 Win32LCPTranscoder::Win32LCPTranscoder()
942 {
943 }
944 
~Win32LCPTranscoder()945 Win32LCPTranscoder::~Win32LCPTranscoder()
946 {
947 }
948 
949 
950 // ---------------------------------------------------------------------------
951 //  Win32LCPTranscoder: Implementation of the virtual transcoder interface
952 // ---------------------------------------------------------------------------
calcRequiredSize(const char * const srcText,MemoryManager * const)953 XMLSize_t Win32LCPTranscoder::calcRequiredSize(const char* const srcText
954                                                   , MemoryManager* const /*manager*/)
955 {
956     if (!srcText)
957         return 0;
958 
959     return ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, srcText, -1, NULL, 0);
960 }
961 
962 
calcRequiredSize(const XMLCh * const srcText,MemoryManager * const)963 XMLSize_t Win32LCPTranscoder::calcRequiredSize(const XMLCh* const srcText
964                                                   , MemoryManager* const /*manager*/)
965 {
966     if (!srcText)
967         return 0;
968 
969     return ::WideCharToMultiByte(CP_ACP, 0, reinterpret_cast<LPCWSTR>(srcText), -1, NULL, 0, NULL, NULL);
970 }
971 
transcode(const XMLCh * const toTranscode,MemoryManager * const manager)972 char* Win32LCPTranscoder::transcode(const XMLCh* const toTranscode,
973                                     MemoryManager* const manager)
974 {
975     if (!toTranscode)
976         return 0;
977 
978     char* retVal = 0;
979     if (*toTranscode)
980     {
981         // Calc the needed size
982         const XMLSize_t neededLen = calcRequiredSize(toTranscode, manager);
983 
984         // Allocate a buffer of that size plus one for the null and transcode
985         retVal = (char*) manager->allocate((neededLen + 1) * sizeof(char)); //new char[neededLen + 1];
986         ::WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)toTranscode, -1, retVal, (int)neededLen+1, NULL, NULL);
987 
988         // And cap it off anyway just to make sure
989         retVal[neededLen] = 0;
990     }
991      else
992     {
993         retVal = (char*) manager->allocate(sizeof(char)); //new char[1];
994         retVal[0] = 0;
995     }
996     return retVal;
997 }
998 
transcode(const char * const toTranscode,MemoryManager * const manager)999 XMLCh* Win32LCPTranscoder::transcode(const char* const toTranscode,
1000                                      MemoryManager* const manager)
1001 {
1002     if (!toTranscode)
1003         return 0;
1004 
1005     XMLCh* retVal = 0;
1006     if (*toTranscode)
1007     {
1008         // Calculate the buffer size required
1009         const XMLSize_t neededLen = calcRequiredSize(toTranscode, manager);
1010         if (neededLen == 0)
1011         {
1012             retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1];
1013             retVal[0] = 0;
1014             return retVal;
1015         }
1016 
1017         // Allocate a buffer of that size plus one for the null and transcode
1018         retVal = (XMLCh*) manager->allocate((neededLen + 1) * sizeof(XMLCh)); //new XMLCh[neededLen + 1];
1019         ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, toTranscode, -1, (LPWSTR)retVal, (int)neededLen + 1);
1020 
1021         // Cap it off just to make sure. We are so paranoid!
1022         retVal[neededLen] = 0;
1023     }
1024      else
1025     {
1026         retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1];
1027         retVal[0] = 0;
1028     }
1029     return retVal;
1030 }
1031 
1032 
transcode(const char * const toTranscode,XMLCh * const toFill,const XMLSize_t maxChars,MemoryManager * const)1033 bool Win32LCPTranscoder::transcode( const   char* const     toTranscode
1034                                     ,       XMLCh* const    toFill
1035                                     , const XMLSize_t       maxChars
1036                                     , MemoryManager* const  /*manager*/)
1037 {
1038     // Check for a couple of psycho corner cases
1039     if (!toTranscode || !maxChars)
1040     {
1041         toFill[0] = 0;
1042         return true;
1043     }
1044 
1045     if (!*toTranscode)
1046     {
1047         toFill[0] = 0;
1048         return true;
1049     }
1050 
1051     // This one has a fixed size output, so try it and if it fails it fails
1052     if ( 0 == ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, toTranscode, -1, (LPWSTR)toFill, (int)(maxChars + 1)) )
1053         return false;
1054     return true;
1055 }
1056 
1057 
transcode(const XMLCh * const toTranscode,char * const toFill,const XMLSize_t maxBytes,MemoryManager * const)1058 bool Win32LCPTranscoder::transcode( const   XMLCh* const    toTranscode
1059                                     ,       char* const     toFill
1060                                     , const XMLSize_t       maxBytes
1061                                     , MemoryManager* const  /*manager*/)
1062 {
1063     // Watch for a couple of pyscho corner cases
1064     if (!toTranscode || !maxBytes)
1065     {
1066         toFill[0] = 0;
1067         return true;
1068     }
1069 
1070     if (!*toTranscode)
1071     {
1072         toFill[0] = 0;
1073         return true;
1074     }
1075 
1076     // This one has a fixed size output, so try it and if it fails it fails
1077     if ( 0 == ::WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)toTranscode, -1, toFill, (int)(maxBytes + 1), NULL, NULL) )
1078         return false;
1079 
1080     // Cap it off just in case
1081     toFill[maxBytes] = 0;
1082     return true;
1083 }
1084 
1085 
1086 XERCES_CPP_NAMESPACE_END
1087