1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * $Id$
20 */
21
22 #if !defined(XERCESC_INCLUDE_GUARD_TRANSSERVICE_HPP)
23 #define XERCESC_INCLUDE_GUARD_TRANSSERVICE_HPP
24
25 #include <xercesc/util/XMemory.hpp>
26 #include <xercesc/util/PlatformUtils.hpp>
27 #include <xercesc/framework/XMLRecognizer.hpp>
28 #include <xercesc/util/RefHashTableOf.hpp>
29 #include <xercesc/util/RefVectorOf.hpp>
30 #include <xercesc/util/Janitor.hpp>
31
32 XERCES_CPP_NAMESPACE_BEGIN
33
34 // Forward references
35 //class XMLPlatformUtils;
36 class XMLLCPTranscoder;
37 class XMLTranscoder;
38 class ENameMap;
39
40
41 //
42 // This class is an abstract base class which are used to abstract the
43 // transcoding services that Xerces uses. The parser's actual transcoding
44 // needs are small so it is desirable to allow different implementations
45 // to be provided.
46 //
47 // The transcoding service has to provide a couple of required string
48 // and character operations, but its most important service is the creation
49 // of transcoder objects. There are two types of transcoders, which are
50 // discussed below in the XMLTranscoder class' description.
51 //
52 class XMLUTIL_EXPORT XMLTransService : public XMemory
53 {
54 public :
55 // -----------------------------------------------------------------------
56 // Class specific types
57 // -----------------------------------------------------------------------
58 enum Codes
59 {
60 Ok
61 , UnsupportedEncoding
62 , InternalFailure
63 , SupportFilesNotFound
64 };
65
66 struct TransRec
67 {
68 XMLCh intCh;
69 XMLByte extCh;
70 };
71
72
73 // -----------------------------------------------------------------------
74 // Public constructors and destructor
75 // -----------------------------------------------------------------------
76 virtual ~XMLTransService();
77
78
79 // -----------------------------------------------------------------------
80 // Non-virtual API
81 // -----------------------------------------------------------------------
82 XMLTranscoder* makeNewTranscoderFor
83 (
84 const XMLCh* const encodingName
85 , XMLTransService::Codes& resValue
86 , const XMLSize_t blockSize
87 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
88 );
89
90 XMLTranscoder* makeNewTranscoderFor
91 (
92 const char* const encodingName
93 , XMLTransService::Codes& resValue
94 , const XMLSize_t blockSize
95 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
96 );
97
98 XMLTranscoder* makeNewTranscoderFor
99 (
100 XMLRecognizer::Encodings encodingEnum
101 , XMLTransService::Codes& resValue
102 , const XMLSize_t blockSize
103 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
104 );
105
106
107 // -----------------------------------------------------------------------
108 // The virtual transcoding service API
109 // -----------------------------------------------------------------------
110 virtual int compareIString
111 (
112 const XMLCh* const comp1
113 , const XMLCh* const comp2
114 ) = 0;
115
116 virtual int compareNIString
117 (
118 const XMLCh* const comp1
119 , const XMLCh* const comp2
120 , const XMLSize_t maxChars
121 ) = 0;
122
123 virtual const XMLCh* getId() const = 0;
124
125 // -----------------------------------------------------------------------
126 // Create a new transcoder for the local code page.
127 //
128 // @param manager The memory manager to use.
129 // -----------------------------------------------------------------------
130 virtual XMLLCPTranscoder* makeNewLCPTranscoder(MemoryManager* manager) = 0;
131
132 virtual bool supportsSrcOfs() const = 0;
133
134 virtual void upperCase(XMLCh* const toUpperCase) = 0;
135 virtual void lowerCase(XMLCh* const toLowerCase) = 0;
136
137 // -----------------------------------------------------------------------
138 // Allow users to add their own encodings to the intrinsic mapping
139 // table
140 // Usage:
141 // XMLTransService::addEncoding (
142 // gMyEncodingNameString
143 // , new ENameMapFor<MyTransClassType>(gMyEncodingNameString)
144 // );
145 // -----------------------------------------------------------------------
146 static void addEncoding(const XMLCh* const encoding, ENameMap* const ownMapping);
147
148
149 protected :
150 // -----------------------------------------------------------------------
151 // Hidden constructors
152 // -----------------------------------------------------------------------
153 XMLTransService();
154
155
156 // -----------------------------------------------------------------------
157 // Protected virtual methods.
158 // -----------------------------------------------------------------------
159 #ifdef OS390
160 friend class Uniconv390TransService;
161 #endif
162 virtual XMLTranscoder* makeNewXMLTranscoder
163 (
164 const XMLCh* const encodingName
165 , XMLTransService::Codes& resValue
166 , const XMLSize_t blockSize
167 , MemoryManager* const manager
168 ) = 0;
169
170 // -----------------------------------------------------------------------
171 // Protected init method for platform utils to call
172 // -----------------------------------------------------------------------
173 friend class XMLPlatformUtils;
174 virtual void initTransService();
175
176 // -----------------------------------------------------------------------
177 // protected static members
178 // gMappings
179 // This is a hash table of ENameMap objects. It is created and filled
180 // in when the platform init calls our initTransService() method.
181 //
182 // gMappingsRecognizer
183 // This is an array of ENameMap objects, predefined for those
184 // already recognized by XMLRecognizer::Encodings.
185 //
186
187 static RefHashTableOf<ENameMap>* gMappings;
188 static RefVectorOf<ENameMap>* gMappingsRecognizer;
189
190 private :
191 // -----------------------------------------------------------------------
192 // Unimplemented constructors and operators
193 // -----------------------------------------------------------------------
194 XMLTransService(const XMLTransService&);
195 XMLTransService& operator=(const XMLTransService&);
196
197 // -----------------------------------------------------------------------
198 // Hidden method to enable/disable strict IANA encoding check
199 // Caller: XMLPlatformUtils
200 // -----------------------------------------------------------------------
201 void strictIANAEncoding(const bool newState);
202 bool isStrictIANAEncoding();
203
204 friend class XMLInitializer;
205 };
206
207 /**
208 * <code>XMLTranscoder</code> is for transcoding non-local code
209 * page encodings, i.e. named encodings. These are used internally
210 * by the scanner to internalize raw XML into the internal Unicode
211 * format, and by writer classes to convert that internal Unicode
212 * format (which comes out of the parser) back out to a format that
213 * the receiving client code wants to use.
214 */
215 class XMLUTIL_EXPORT XMLTranscoder : public XMemory
216 {
217 public :
218
219 /**
220 * This enum is used by the <code>transcodeTo()</code> method
221 * to indicate how to react to unrepresentable characters. The
222 * <code>transcodeFrom()</code> method always works the
223 * same. It will consider any invalid data to be an error and
224 * throw.
225 */
226 enum UnRepOpts
227 {
228 UnRep_Throw /**< Throw an exception */
229 , UnRep_RepChar /**< Use the replacement char */
230 };
231
232
233 /** @name Destructor. */
234 //@{
235
236 /**
237 * Destructor for XMLTranscoder
238 *
239 */
240 virtual ~XMLTranscoder();
241 //@}
242
243
244
245 /** @name The virtual transcoding interface */
246 //@{
247
248 /** Converts from the encoding of the service to the internal XMLCh* encoding
249 *
250 * @param srcData the source buffer to be transcoded
251 * @param srcCount number of bytes in the source buffer
252 * @param toFill the destination buffer
253 * @param maxChars the max number of characters in the destination buffer
254 * @param bytesEaten after transcoding, this will hold the number of bytes
255 * that were processed from the source buffer
256 * @param charSizes an array which must be at least as big as maxChars
257 * into which will be inserted values that indicate how many
258 * bytes from the input went into each XMLCh that was created
259 * into toFill. Since many encodings use variable numbers of
260 * byte per character, this provides a means to find out what
261 * bytes in the input went into making a particular output
262 * UTF-16 character.
263 * @return Returns the number of chars put into the target buffer
264 */
265
266
267 virtual XMLSize_t transcodeFrom
268 (
269 const XMLByte* const srcData
270 , const XMLSize_t srcCount
271 , XMLCh* const toFill
272 , const XMLSize_t maxChars
273 , XMLSize_t& bytesEaten
274 , unsigned char* const charSizes
275 ) = 0;
276
277 /** Converts from the internal XMLCh* encoding to the encoding of the service
278 *
279 * @param srcData the source buffer to be transcoded
280 * @param srcCount number of characters in the source buffer
281 * @param toFill the destination buffer
282 * @param maxBytes the max number of bytes in the destination buffer
283 * @param charsEaten after transcoding, this will hold the number of chars
284 * that were processed from the source buffer
285 * @param options options to pass to the transcoder that explain how to
286 * respond to an unrepresentable character
287 * @return Returns the number of chars put into the target buffer
288 */
289
290 virtual XMLSize_t transcodeTo
291 (
292 const XMLCh* const srcData
293 , const XMLSize_t srcCount
294 , XMLByte* const toFill
295 , const XMLSize_t maxBytes
296 , XMLSize_t& charsEaten
297 , const UnRepOpts options
298 ) = 0;
299
300 /** Query whether the transcoder can handle a given character
301 *
302 * @param toCheck the character code point to check
303 */
304
305 virtual bool canTranscodeTo
306 (
307 const unsigned int toCheck
308 ) = 0;
309
310 //@}
311
312 /** @name Getter methods */
313 //@{
314
315 /** Get the internal block size
316 *
317 * @return The block size indicated in the constructor.
318 */
319 XMLSize_t getBlockSize() const;
320
321 /** Get the encoding name
322 *
323 * @return the name of the encoding that this
324 * <code>XMLTranscoder</code> object is for
325 */
326 const XMLCh* getEncodingName() const;
327 //@}
328
329 /** @name Getter methods*/
330 //@{
331
332 /** Get the plugged-in memory manager
333 *
334 * This method returns the plugged-in memory manager user for dynamic
335 * memory allocation/deallocation.
336 *
337 * @return the plugged-in memory manager
338 */
339 MemoryManager* getMemoryManager() const;
340
341 //@}
342
343 protected :
344 // -----------------------------------------------------------------------
345 // Hidden constructors
346 // -----------------------------------------------------------------------
347 XMLTranscoder
348 (
349 const XMLCh* const encodingName
350 , const XMLSize_t blockSize
351 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
352 );
353
354
355 // -----------------------------------------------------------------------
356 // Protected helper methods
357 // -----------------------------------------------------------------------
358
359 private :
360 // -----------------------------------------------------------------------
361 // Unimplemented constructors and operators
362 // -----------------------------------------------------------------------
363 XMLTranscoder(const XMLTranscoder&);
364 XMLTranscoder& operator=(const XMLTranscoder&);
365
366 // -----------------------------------------------------------------------
367 // Private data members
368 //
369 // fBlockSize
370 // This is the block size indicated in the constructor.
371 //
372 // fEncodingName
373 // This is the name of the encoding this encoder is for. All basic
374 // XML transcoder's are for named encodings.
375 // -----------------------------------------------------------------------
376 XMLSize_t fBlockSize;
377 XMLCh* fEncodingName;
378 MemoryManager* fMemoryManager;
379 };
380
381
382 //
383 // This class is a specialized transcoder that only transcodes between
384 // the internal XMLCh format and the local code page. It is specialized
385 // for the very common job of translating data from the client app's
386 // native code page to the internal format and vice versa.
387 //
388 class XMLUTIL_EXPORT XMLLCPTranscoder : public XMemory
389 {
390 public :
391 // -----------------------------------------------------------------------
392 // Public constructors and destructor
393 // -----------------------------------------------------------------------
394 virtual ~XMLLCPTranscoder();
395
396
397 // -----------------------------------------------------------------------
398 // The virtual transcoder API
399 //
400 // NOTE: All these APIs don't include null terminator characters in
401 // their parameters. So calcRequiredSize() returns the number
402 // of actual chars, not including the null. maxBytes and maxChars
403 // parameters refer to actual chars, not including the null so
404 // its assumed that the buffer is physically one char or byte
405 // larger.
406 // -----------------------------------------------------------------------
407
408 // -----------------------------------------------------------------------
409 // The 'normal' way to transcode a XMLCh-string from/to local string
410 // representation
411 //
412 // NOTE: Both methods return a string allocated via the MemoryManager.
413 // It is the responsibility of the calling environment to
414 // release this string after use.
415 // -----------------------------------------------------------------------
416 virtual char* transcode(const XMLCh* const toTranscode,
417 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
418
419 virtual XMLCh* transcode(const char* const toTranscode,
420 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
421
422
423 // -----------------------------------------------------------------------
424 // DEPRECATED old transcode interface
425 // -----------------------------------------------------------------------
426 virtual XMLSize_t calcRequiredSize(const char* const srcText
427 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
428
429 virtual XMLSize_t calcRequiredSize(const XMLCh* const srcText
430 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
431
432 virtual bool transcode
433 (
434 const char* const toTranscode
435 , XMLCh* const toFill
436 , const XMLSize_t maxChars
437 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
438 ) = 0;
439
440 virtual bool transcode
441 (
442 const XMLCh* const toTranscode
443 , char* const toFill
444 , const XMLSize_t maxBytes
445 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
446 ) = 0;
447
448
449 protected :
450 // -----------------------------------------------------------------------
451 // Hidden constructors
452 // -----------------------------------------------------------------------
453 XMLLCPTranscoder();
454
455
456 private :
457 // -----------------------------------------------------------------------
458 // Unimplemented constructors and operators
459 // -----------------------------------------------------------------------
460 XMLLCPTranscoder(const XMLLCPTranscoder&);
461 XMLLCPTranscoder& operator=(const XMLLCPTranscoder&);
462 };
463
464 //
465 // This class can be used to transcode to a target encoding. It manages the
466 // memory allocated for the transcode in an exception safe manner, automatically
467 // deleting it when the class goes out of scope.
468 //
469 class XMLUTIL_EXPORT TranscodeToStr
470 {
471 public:
472 // -----------------------------------------------------------------------
473 // Public constructors and destructor
474 // -----------------------------------------------------------------------
475
476 /** Converts from the internal XMLCh* encoding to the specified encoding
477 *
478 * @param in the null terminated source buffer to be transcoded
479 * @param encoding the name of the encoding to transcode to
480 * @param manager the memory manager to use
481 */
482 TranscodeToStr(const XMLCh *in, const char *encoding,
483 MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
484
485 /** Converts from the internal XMLCh* encoding to the specified encoding
486 *
487 * @param in the source buffer to be transcoded
488 * @param length the length of the source buffer
489 * @param encoding the name of the encoding to transcode to
490 * @param manager the memory manager to use
491 */
492 TranscodeToStr(const XMLCh *in, XMLSize_t length, const char *encoding,
493 MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
494
495 /** Converts from the internal XMLCh* encoding to the specified encoding
496 *
497 * @param in the null terminated source buffer to be transcoded
498 * @param trans the transcoder to use
499 * @param manager the memory manager to use
500 */
501 TranscodeToStr(const XMLCh *in, XMLTranscoder* trans,
502 MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
503
504 /** Converts from the internal XMLCh* encoding to the specified encoding
505 *
506 * @param in the source buffer to be transcoded
507 * @param length the length of the source buffer
508 * @param trans the transcoder to use
509 * @param manager the memory manager to use
510 */
511 TranscodeToStr(const XMLCh *in, XMLSize_t length, XMLTranscoder* trans,
512 MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
513
514 ~TranscodeToStr();
515
516 /** @name Getter methods */
517 //@{
518
519 /** Returns the transcoded, null terminated string
520 * @return the transcoded string
521 */
522 const XMLByte *str() const;
523
524 /** Returns the transcoded, null terminated string - adopting
525 * the memory allocated to it from the TranscodeToStr object
526 * @return the transcoded string
527 */
528 XMLByte *adopt();
529
530 /** Returns the length of the transcoded string in bytes. The length
531 * does not include the null terminator.
532 * @return the length of the transcoded string in bytes
533 */
534 XMLSize_t length () const;
535
536 //@}
537
538 private:
539 // -----------------------------------------------------------------------
540 // Unimplemented constructors and operators
541 // -----------------------------------------------------------------------
542 TranscodeToStr(const TranscodeToStr &);
543 TranscodeToStr &operator=(const TranscodeToStr &);
544
545 // -----------------------------------------------------------------------
546 // Private helper methods
547 // -----------------------------------------------------------------------
548 void transcode(const XMLCh *in, XMLSize_t len, XMLTranscoder* trans);
549
550 // -----------------------------------------------------------------------
551 // Private data members
552 //
553 // fString
554 // The transcoded string
555 //
556 // fBytesWritten
557 // The length of the transcoded string in bytes
558 // -----------------------------------------------------------------------
559 ArrayJanitor<XMLByte> fString;
560 XMLSize_t fBytesWritten;
561 MemoryManager *fMemoryManager;
562 };
563
564 //
565 // This class can be used to transcode from a source encoding. It manages the
566 // memory allocated for the transcode in an exception safe manner, automatically
567 // deleting it when the class goes out of scope.
568 //
569 class XMLUTIL_EXPORT TranscodeFromStr
570 {
571 public:
572 // -----------------------------------------------------------------------
573 // Public constructors and destructor
574 // -----------------------------------------------------------------------
575
576 /** Converts from the specified encoding to the internal XMLCh* encoding
577 *
578 * @param data the source buffer to be transcoded
579 * @param length the length of the source buffer
580 * @param encoding the name of the encoding to transcode to
581 * @param manager the memory manager to use
582 */
583 TranscodeFromStr(const XMLByte *data, XMLSize_t length, const char *encoding,
584 MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
585
586 /** Converts from the specified encoding to the internal XMLCh* encoding
587 *
588 * @param data the source buffer to be transcoded
589 * @param length the length of the source buffer
590 * @param trans the transcoder to use
591 * @param manager the memory manager to use
592 */
593 TranscodeFromStr(const XMLByte *data, XMLSize_t length, XMLTranscoder *trans,
594 MemoryManager *manager = XMLPlatformUtils::fgMemoryManager);
595
596 ~TranscodeFromStr();
597
598 /** @name Getter methods */
599 //@{
600
601 /** Returns the transcoded, null terminated string
602 * @return the transcoded string
603 */
604 const XMLCh *str() const;
605
606 /** Returns the transcoded, null terminated string - adopting
607 * the memory allocated to it from the TranscodeFromStr object
608 * @return the transcoded string
609 */
610 XMLCh *adopt();
611
612 /** Returns the length of the transcoded string in characters. The length
613 * does not include the null terminator.
614 * @return the length of the transcoded string in characters
615 */
616 XMLSize_t length() const;
617
618 //@}
619
620 private:
621 // -----------------------------------------------------------------------
622 // Unimplemented constructors and operators
623 // -----------------------------------------------------------------------
624 TranscodeFromStr(const TranscodeFromStr &);
625 TranscodeFromStr &operator=(const TranscodeFromStr &);
626
627 // -----------------------------------------------------------------------
628 // Private helper methods
629 // -----------------------------------------------------------------------
630 void transcode(const XMLByte *in, XMLSize_t length, XMLTranscoder *trans);
631
632 // -----------------------------------------------------------------------
633 // Private data members
634 //
635 // fString
636 // The transcoded string
637 //
638 // fCharsWritten
639 // The length of the transcoded string in characters
640 // -----------------------------------------------------------------------
641 ArrayJanitor<XMLCh> fString;
642 XMLSize_t fCharsWritten;
643 MemoryManager *fMemoryManager;
644 };
645
646 // ---------------------------------------------------------------------------
647 // XMLTranscoder: Getter methods
648 // ---------------------------------------------------------------------------
getMemoryManager() const649 inline MemoryManager* XMLTranscoder::getMemoryManager() const
650 {
651 return fMemoryManager;
652 }
653
654 // ---------------------------------------------------------------------------
655 // XMLTranscoder: Protected helper methods
656 // ---------------------------------------------------------------------------
getBlockSize() const657 inline XMLSize_t XMLTranscoder::getBlockSize() const
658 {
659 return fBlockSize;
660 }
661
getEncodingName() const662 inline const XMLCh* XMLTranscoder::getEncodingName() const
663 {
664 return fEncodingName;
665 }
666
667 // ---------------------------------------------------------------------------
668 // TranscodeToStr: Getter methods
669 // ---------------------------------------------------------------------------
str() const670 inline const XMLByte *TranscodeToStr::str() const
671 {
672 return fString.get();
673 }
674
adopt()675 inline XMLByte *TranscodeToStr::adopt()
676 {
677 fBytesWritten = 0;
678 return fString.release();
679 }
680
length() const681 inline XMLSize_t TranscodeToStr::length () const
682 {
683 return fBytesWritten;
684 }
685
686 // ---------------------------------------------------------------------------
687 // TranscodeFromStr: Getter methods
688 // ---------------------------------------------------------------------------
str() const689 inline const XMLCh *TranscodeFromStr::str() const
690 {
691 return fString.get();
692 }
693
adopt()694 inline XMLCh *TranscodeFromStr::adopt()
695 {
696 fCharsWritten = 0;
697 return fString.release();
698 }
699
length() const700 inline XMLSize_t TranscodeFromStr::length() const
701 {
702 return fCharsWritten;
703 }
704
705 XERCES_CPP_NAMESPACE_END
706
707 #endif
708