1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id$
20  */
21 
22 #if !defined(XERCESC_INCLUDE_GUARD_TRANSSERVICE_HPP)
23 #define XERCESC_INCLUDE_GUARD_TRANSSERVICE_HPP
24 
25 #include <xercesc/util/XMemory.hpp>
26 #include <xercesc/util/PlatformUtils.hpp>
27 #include <xercesc/framework/XMLRecognizer.hpp>
28 #include <xercesc/util/RefHashTableOf.hpp>
29 #include <xercesc/util/RefVectorOf.hpp>
30 #include <xercesc/util/Janitor.hpp>
31 
32 XERCES_CPP_NAMESPACE_BEGIN
33 
34 // Forward references
35 //class XMLPlatformUtils;
36 class XMLLCPTranscoder;
37 class XMLTranscoder;
38 class ENameMap;
39 
40 
41 //
42 //  This class is an abstract base class which are used to abstract the
43 //  transcoding services that Xerces uses. The parser's actual transcoding
44 //  needs are small so it is desirable to allow different implementations
45 //  to be provided.
46 //
47 //  The transcoding service has to provide a couple of required string
48 //  and character operations, but its most important service is the creation
49 //  of transcoder objects. There are two types of transcoders, which are
50 //  discussed below in the XMLTranscoder class' description.
51 //
52 class XMLUTIL_EXPORT XMLTransService : public XMemory
53 {
54 public :
55     // -----------------------------------------------------------------------
56     //  Class specific types
57     // -----------------------------------------------------------------------
58     enum Codes
59     {
60         Ok
61         , UnsupportedEncoding
62         , InternalFailure
63         , SupportFilesNotFound
64     };
65 
66     struct TransRec
67     {
68         XMLCh       intCh;
69         XMLByte     extCh;
70     };
71 
72 
73     // -----------------------------------------------------------------------
74     //  Public constructors and destructor
75     // -----------------------------------------------------------------------
76     virtual ~XMLTransService();
77 
78 
79     // -----------------------------------------------------------------------
80     //  Non-virtual API
81     // -----------------------------------------------------------------------
82     XMLTranscoder* makeNewTranscoderFor
83     (
84         const   XMLCh* const            encodingName
85         ,       XMLTransService::Codes& resValue
86         , const XMLSize_t               blockSize
87         , MemoryManager* const          manager = XMLPlatformUtils::fgMemoryManager
88     );
89 
90     XMLTranscoder* makeNewTranscoderFor
91     (
92         const   char* const             encodingName
93         ,       XMLTransService::Codes& resValue
94         , const XMLSize_t               blockSize
95         , MemoryManager* const          manager = XMLPlatformUtils::fgMemoryManager
96     );
97 
98     XMLTranscoder* makeNewTranscoderFor
99     (
100         XMLRecognizer::Encodings        encodingEnum
101         ,       XMLTransService::Codes& resValue
102         , const XMLSize_t               blockSize
103         , MemoryManager* const          manager = XMLPlatformUtils::fgMemoryManager
104     );
105 
106 
107     // -----------------------------------------------------------------------
108     //  The virtual transcoding service API
109     // -----------------------------------------------------------------------
110     virtual int compareIString
111     (
112         const   XMLCh* const    comp1
113         , const XMLCh* const    comp2
114     ) = 0;
115 
116     virtual int compareNIString
117     (
118         const   XMLCh* const    comp1
119         , const XMLCh* const    comp2
120         , const XMLSize_t       maxChars
121     ) = 0;
122 
123     virtual const XMLCh* getId() const = 0;
124 
125     // -----------------------------------------------------------------------
126     //    Create a new transcoder for the local code page.
127     //
128     //  @param manager The memory manager to use.
129     // -----------------------------------------------------------------------
130     virtual XMLLCPTranscoder* makeNewLCPTranscoder(MemoryManager* manager) = 0;
131 
132     virtual bool supportsSrcOfs() const = 0;
133 
134     virtual void upperCase(XMLCh* const toUpperCase) = 0;
135     virtual void lowerCase(XMLCh* const toLowerCase) = 0;
136 
137     // -----------------------------------------------------------------------
138     //    Allow users to add their own encodings to the intrinsic mapping
139     //    table
140     //    Usage:
141     //        XMLTransService::addEncoding (
142     //            gMyEncodingNameString
143     //            , new ENameMapFor<MyTransClassType>(gMyEncodingNameString)
144     //        );
145     // -----------------------------------------------------------------------
146     static void addEncoding(const XMLCh* const encoding, ENameMap* const ownMapping);
147 
148 
149 protected :
150     // -----------------------------------------------------------------------
151     //  Hidden constructors
152     // -----------------------------------------------------------------------
153     XMLTransService();
154 
155 
156     // -----------------------------------------------------------------------
157     //  Protected virtual methods.
158     // -----------------------------------------------------------------------
159 #ifdef OS390
160     friend class Uniconv390TransService;
161 #endif
162     virtual XMLTranscoder* makeNewXMLTranscoder
163     (
164         const   XMLCh* const            encodingName
165         ,       XMLTransService::Codes& resValue
166         , const XMLSize_t               blockSize
167         , MemoryManager* const          manager
168     ) = 0;
169 
170     // -----------------------------------------------------------------------
171     //  Protected init method for platform utils to call
172     // -----------------------------------------------------------------------
173     friend class XMLPlatformUtils;
174     virtual void initTransService();
175 
176     // -----------------------------------------------------------------------
177     // protected static members
178     //  gMappings
179     //      This is a hash table of ENameMap objects. It is created and filled
180     //      in when the platform init calls our initTransService() method.
181     //
182     //  gMappingsRecognizer
183     //      This is an array of ENameMap objects, predefined for those
184     //      already recognized by XMLRecognizer::Encodings.
185     //
186 
187     static RefHashTableOf<ENameMap>*    gMappings;
188     static RefVectorOf<ENameMap>*       gMappingsRecognizer;
189 
190 private :
191     // -----------------------------------------------------------------------
192     //  Unimplemented constructors and operators
193     // -----------------------------------------------------------------------
194     XMLTransService(const XMLTransService&);
195     XMLTransService& operator=(const XMLTransService&);
196 
197     // -----------------------------------------------------------------------
198     //  Hidden method to enable/disable strict IANA encoding check
199     //  Caller: XMLPlatformUtils
200     // -----------------------------------------------------------------------
201     void strictIANAEncoding(const bool newState);
202     bool isStrictIANAEncoding();
203 
204     friend class XMLInitializer;
205 };
206 
207 /**
208   *   <code>XMLTranscoder</code> is for transcoding non-local code
209   *   page encodings, i.e.  named encodings. These are used internally
210   *   by the scanner to internalize raw XML into the internal Unicode
211   *   format, and by writer classes to convert that internal Unicode
212   *   format (which comes out of the parser) back out to a format that
213   *   the receiving client code wants to use.
214   */
215 class XMLUTIL_EXPORT XMLTranscoder : public XMemory
216 {
217 public :
218 
219     /**
220      * This enum is used by the <code>transcodeTo()</code> method
221      * to indicate how to react to unrepresentable characters. The
222      * <code>transcodeFrom()</code> method always works the
223      * same. It will consider any invalid data to be an error and
224      * throw.
225      */
226     enum UnRepOpts
227     {
228         UnRep_Throw        /**< Throw an exception */
229         , UnRep_RepChar        /**< Use the replacement char */
230     };
231 
232 
233     /** @name Destructor. */
234     //@{
235 
236      /**
237       * Destructor for XMLTranscoder
238       *
239       */
240     virtual ~XMLTranscoder();
241     //@}
242 
243 
244 
245     /** @name The virtual transcoding interface */
246     //@{
247 
248     /** Converts from the encoding of the service to the internal XMLCh* encoding
249       *
250       * @param srcData the source buffer to be transcoded
251       * @param srcCount number of bytes in the source buffer
252       * @param toFill the destination buffer
253       * @param maxChars the max number of characters in the destination buffer
254       * @param bytesEaten after transcoding, this will hold the number of bytes
255       *    that were processed from the source buffer
256       * @param charSizes an array which must be at least as big as maxChars
257       *    into which will be inserted values that indicate how many
258       *    bytes from the input went into each XMLCh that was created
259       *    into toFill. Since many encodings use variable numbers of
260       *    byte per character, this provides a means to find out what
261       *    bytes in the input went into making a particular output
262       *    UTF-16 character.
263       * @return Returns the number of chars put into the target buffer
264       */
265 
266 
267     virtual XMLSize_t transcodeFrom
268     (
269         const   XMLByte* const          srcData
270         , const XMLSize_t               srcCount
271         ,       XMLCh* const            toFill
272         , const XMLSize_t               maxChars
273         ,       XMLSize_t&              bytesEaten
274         ,       unsigned char* const    charSizes
275     ) = 0;
276 
277     /** Converts from the internal XMLCh* encoding to the encoding of the service
278       *
279       * @param srcData    the source buffer to be transcoded
280       * @param srcCount   number of characters in the source buffer
281       * @param toFill     the destination buffer
282       * @param maxBytes   the max number of bytes in the destination buffer
283       * @param charsEaten after transcoding, this will hold the number of chars
284       *    that were processed from the source buffer
285       * @param options    options to pass to the transcoder that explain how to
286       *    respond to an unrepresentable character
287       * @return Returns the number of chars put into the target buffer
288       */
289 
290     virtual XMLSize_t transcodeTo
291     (
292         const   XMLCh* const    srcData
293         , const XMLSize_t       srcCount
294         ,       XMLByte* const  toFill
295         , const XMLSize_t       maxBytes
296         ,       XMLSize_t&      charsEaten
297         , const UnRepOpts       options
298     ) = 0;
299 
300     /** Query whether the transcoder can handle a given character
301       *
302       * @param toCheck   the character code point to check
303       */
304 
305     virtual bool canTranscodeTo
306     (
307         const   unsigned int    toCheck
308     ) = 0;
309 
310     //@}
311 
312     /** @name Getter methods */
313     //@{
314 
315     /** Get the internal block size
316      *
317        * @return The block size indicated in the constructor.
318        */
319     XMLSize_t getBlockSize() const;
320 
321     /** Get the encoding name
322       *
323       * @return the name of the encoding that this
324       *    <code>XMLTranscoder</code> object is for
325       */
326     const XMLCh* getEncodingName() const;
327     //@}
328 
329     /** @name Getter methods*/
330     //@{
331 
332     /** Get the plugged-in memory manager
333       *
334       * This method returns the plugged-in memory manager user for dynamic
335       * memory allocation/deallocation.
336       *
337       * @return the plugged-in memory manager
338       */
339     MemoryManager* getMemoryManager() const;
340 
341     //@}
342 
343 protected :
344     // -----------------------------------------------------------------------
345     //  Hidden constructors
346     // -----------------------------------------------------------------------
347     XMLTranscoder
348     (
349         const   XMLCh* const    encodingName
350         , const XMLSize_t       blockSize
351         , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
352     );
353 
354 
355     // -----------------------------------------------------------------------
356     //  Protected helper methods
357     // -----------------------------------------------------------------------
358 
359 private :
360     // -----------------------------------------------------------------------
361     //  Unimplemented constructors and operators
362     // -----------------------------------------------------------------------
363     XMLTranscoder(const XMLTranscoder&);
364     XMLTranscoder& operator=(const XMLTranscoder&);
365 
366     // -----------------------------------------------------------------------
367     //  Private data members
368     //
369     //  fBlockSize
370     //      This is the block size indicated in the constructor.
371     //
372     //  fEncodingName
373     //      This is the name of the encoding this encoder is for. All basic
374     //      XML transcoder's are for named encodings.
375     // -----------------------------------------------------------------------
376     XMLSize_t       fBlockSize;
377     XMLCh*          fEncodingName;
378     MemoryManager*  fMemoryManager;
379 };
380 
381 
382 //
383 //  This class is a specialized transcoder that only transcodes between
384 //  the internal XMLCh format and the local code page. It is specialized
385 //  for the very common job of translating data from the client app's
386 //  native code page to the internal format and vice versa.
387 //
388 class XMLUTIL_EXPORT XMLLCPTranscoder : public XMemory
389 {
390 public :
391     // -----------------------------------------------------------------------
392     //  Public constructors and destructor
393     // -----------------------------------------------------------------------
394     virtual ~XMLLCPTranscoder();
395 
396 
397     // -----------------------------------------------------------------------
398     //  The virtual transcoder API
399     //
400     //  NOTE:   All these APIs don't include null terminator characters in
401     //          their parameters. So calcRequiredSize() returns the number
402     //          of actual chars, not including the null. maxBytes and maxChars
403     //          parameters refer to actual chars, not including the null so
404     //          its assumed that the buffer is physically one char or byte
405     //          larger.
406     // -----------------------------------------------------------------------
407 
408     // -----------------------------------------------------------------------
409     //  The 'normal' way to transcode a XMLCh-string from/to local string
410     //  representation
411     //
412     //  NOTE: Both methods return a string allocated via the MemoryManager.
413     //        It is the responsibility of the calling environment to
414     //        release this string after use.
415     // -----------------------------------------------------------------------
416     virtual char* transcode(const XMLCh* const toTranscode,
417                             MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
418 
419     virtual XMLCh* transcode(const char* const toTranscode,
420                              MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
421 
422 
423     // -----------------------------------------------------------------------
424     //  DEPRECATED old transcode interface
425     // -----------------------------------------------------------------------
426     virtual XMLSize_t calcRequiredSize(const char* const srcText
427         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
428 
429     virtual XMLSize_t calcRequiredSize(const XMLCh* const srcText
430         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
431 
432     virtual bool transcode
433     (
434         const   char* const     toTranscode
435         ,       XMLCh* const    toFill
436         , const XMLSize_t       maxChars
437         , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
438     ) = 0;
439 
440     virtual bool transcode
441     (
442         const   XMLCh* const    toTranscode
443         ,       char* const     toFill
444         , const XMLSize_t       maxBytes
445         , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
446     ) = 0;
447 
448 
449 protected :
450     // -----------------------------------------------------------------------
451     //  Hidden constructors
452     // -----------------------------------------------------------------------
453     XMLLCPTranscoder();
454 
455 
456 private :
457     // -----------------------------------------------------------------------
458     //  Unimplemented constructors and operators
459     // -----------------------------------------------------------------------
460     XMLLCPTranscoder(const XMLLCPTranscoder&);
461     XMLLCPTranscoder& operator=(const XMLLCPTranscoder&);
462 };
463 
464 //
465 // This class can be used to transcode to a target encoding. It manages the
466 // memory allocated for the transcode in an exception safe manner, automatically
467 // deleting it when the class goes out of scope.
468 //
469 class XMLUTIL_EXPORT TranscodeToStr
470 {
471 public:
472     // -----------------------------------------------------------------------
473     //  Public constructors and destructor
474     // -----------------------------------------------------------------------
475 
476     /** Converts from the internal XMLCh* encoding to the specified encoding
477       *
478       * @param in       the null terminated source buffer to be transcoded
479       * @param encoding the name of the encoding to transcode to
480       * @param manager  the memory manager to use
481       */
482     TranscodeToStr(const XMLCh *in, const char *encoding,
483                    MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
484 
485     /** Converts from the internal XMLCh* encoding to the specified encoding
486       *
487       * @param in       the source buffer to be transcoded
488       * @param length   the length of the source buffer
489       * @param encoding the name of the encoding to transcode to
490       * @param manager  the memory manager to use
491       */
492     TranscodeToStr(const XMLCh *in, XMLSize_t length, const char *encoding,
493                    MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
494 
495     /** Converts from the internal XMLCh* encoding to the specified encoding
496       *
497       * @param in       the null terminated source buffer to be transcoded
498       * @param trans    the transcoder to use
499       * @param manager  the memory manager to use
500       */
501     TranscodeToStr(const XMLCh *in, XMLTranscoder* trans,
502                    MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
503 
504     /** Converts from the internal XMLCh* encoding to the specified encoding
505       *
506       * @param in       the source buffer to be transcoded
507       * @param length   the length of the source buffer
508       * @param trans    the transcoder to use
509       * @param manager  the memory manager to use
510       */
511     TranscodeToStr(const XMLCh *in, XMLSize_t length, XMLTranscoder* trans,
512                    MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
513 
514     ~TranscodeToStr();
515 
516     /** @name Getter methods */
517     //@{
518 
519     /** Returns the transcoded, null terminated string
520       * @return the transcoded string
521       */
522     const XMLByte *str() const;
523 
524     /** Returns the transcoded, null terminated string - adopting
525       * the memory allocated to it from the TranscodeToStr object
526       * @return the transcoded string
527       */
528     XMLByte *adopt();
529 
530     /** Returns the length of the transcoded string in bytes. The length
531       * does not include the null terminator.
532       * @return the length of the transcoded string in bytes
533       */
534     XMLSize_t length () const;
535 
536     //@}
537 
538 private:
539     // -----------------------------------------------------------------------
540     //  Unimplemented constructors and operators
541     // -----------------------------------------------------------------------
542     TranscodeToStr(const TranscodeToStr &);
543     TranscodeToStr &operator=(const TranscodeToStr &);
544 
545     // -----------------------------------------------------------------------
546     //  Private helper methods
547     // -----------------------------------------------------------------------
548     void transcode(const XMLCh *in, XMLSize_t len, XMLTranscoder* trans);
549 
550     // -----------------------------------------------------------------------
551     //  Private data members
552     //
553     //  fString
554     //      The transcoded string
555     //
556     //  fBytesWritten
557     //      The length of the transcoded string in bytes
558     // -----------------------------------------------------------------------
559     ArrayJanitor<XMLByte> fString;
560     XMLSize_t fBytesWritten;
561     MemoryManager *fMemoryManager;
562 };
563 
564 //
565 // This class can be used to transcode from a source encoding. It manages the
566 // memory allocated for the transcode in an exception safe manner, automatically
567 // deleting it when the class goes out of scope.
568 //
569 class XMLUTIL_EXPORT TranscodeFromStr
570 {
571 public:
572     // -----------------------------------------------------------------------
573     //  Public constructors and destructor
574     // -----------------------------------------------------------------------
575 
576     /** Converts from the specified encoding to the internal XMLCh* encoding
577       *
578       * @param data     the source buffer to be transcoded
579       * @param length   the length of the source buffer
580       * @param encoding the name of the encoding to transcode to
581       * @param manager  the memory manager to use
582       */
583     TranscodeFromStr(const XMLByte *data, XMLSize_t length, const char *encoding,
584                      MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
585 
586     /** Converts from the specified encoding to the internal XMLCh* encoding
587       *
588       * @param data     the source buffer to be transcoded
589       * @param length   the length of the source buffer
590       * @param trans    the transcoder to use
591       * @param manager  the memory manager to use
592       */
593     TranscodeFromStr(const XMLByte *data, XMLSize_t length, XMLTranscoder *trans,
594                      MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
595 
596     ~TranscodeFromStr();
597 
598     /** @name Getter methods */
599     //@{
600 
601     /** Returns the transcoded, null terminated string
602       * @return the transcoded string
603       */
604     const XMLCh *str() const;
605 
606     /** Returns the transcoded, null terminated string - adopting
607       * the memory allocated to it from the TranscodeFromStr object
608       * @return the transcoded string
609       */
610     XMLCh *adopt();
611 
612     /** Returns the length of the transcoded string in characters. The length
613       * does not include the null terminator.
614       * @return the length of the transcoded string in characters
615       */
616     XMLSize_t length() const;
617 
618     //@}
619 
620 private:
621     // -----------------------------------------------------------------------
622     //  Unimplemented constructors and operators
623     // -----------------------------------------------------------------------
624     TranscodeFromStr(const TranscodeFromStr &);
625     TranscodeFromStr &operator=(const TranscodeFromStr &);
626 
627     // -----------------------------------------------------------------------
628     //  Private helper methods
629     // -----------------------------------------------------------------------
630     void transcode(const XMLByte *in, XMLSize_t length, XMLTranscoder *trans);
631 
632     // -----------------------------------------------------------------------
633     //  Private data members
634     //
635     //  fString
636     //      The transcoded string
637     //
638     //  fCharsWritten
639     //      The length of the transcoded string in characters
640     // -----------------------------------------------------------------------
641     ArrayJanitor<XMLCh> fString;
642     XMLSize_t fCharsWritten;
643     MemoryManager *fMemoryManager;
644 };
645 
646 // ---------------------------------------------------------------------------
647 //  XMLTranscoder: Getter methods
648 // ---------------------------------------------------------------------------
getMemoryManager() const649 inline MemoryManager* XMLTranscoder::getMemoryManager() const
650 {
651     return fMemoryManager;
652 }
653 
654 // ---------------------------------------------------------------------------
655 //  XMLTranscoder: Protected helper methods
656 // ---------------------------------------------------------------------------
getBlockSize() const657 inline XMLSize_t XMLTranscoder::getBlockSize() const
658 {
659     return fBlockSize;
660 }
661 
getEncodingName() const662 inline const XMLCh* XMLTranscoder::getEncodingName() const
663 {
664     return fEncodingName;
665 }
666 
667 // ---------------------------------------------------------------------------
668 //  TranscodeToStr: Getter methods
669 // ---------------------------------------------------------------------------
str() const670 inline const XMLByte *TranscodeToStr::str() const
671 {
672     return fString.get();
673 }
674 
adopt()675 inline XMLByte *TranscodeToStr::adopt()
676 {
677     fBytesWritten = 0;
678     return fString.release();
679 }
680 
length() const681 inline XMLSize_t TranscodeToStr::length () const
682 {
683     return fBytesWritten;
684 }
685 
686 // ---------------------------------------------------------------------------
687 //  TranscodeFromStr: Getter methods
688 // ---------------------------------------------------------------------------
str() const689 inline const XMLCh *TranscodeFromStr::str() const
690 {
691     return fString.get();
692 }
693 
adopt()694 inline XMLCh *TranscodeFromStr::adopt()
695 {
696     fCharsWritten = 0;
697     return fString.release();
698 }
699 
length() const700 inline XMLSize_t TranscodeFromStr::length() const
701 {
702     return fCharsWritten;
703 }
704 
705 XERCES_CPP_NAMESPACE_END
706 
707 #endif
708