1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /**
19  * $Id$
20  */
21 
22 
23 // ---------------------------------------------------------------------------
24 //  Includes
25 // ---------------------------------------------------------------------------
26 #include <xercesc/util/PlatformUtils.hpp>
27 #include <xercesc/util/XMLUniDefs.hpp>
28 #include <xercesc/util/XMLString.hpp>
29 #include <xercesc/util/TransService.hpp>
30 #include <xercesc/util/TranscodingException.hpp>
31 #include <xercesc/util/XMLExceptMsgs.hpp>
32 #include <xercesc/framework/XMLFormatter.hpp>
33 #include <xercesc/util/Janitor.hpp>
34 #include <xercesc/util/XMLChar.hpp>
35 
36 #include <string.h>
37 
38 XERCES_CPP_NAMESPACE_BEGIN
39 
40 // ---------------------------------------------------------------------------
41 //  Local data
42 //
43 //  gXXXRef
44 //      These are hard coded versions of the char refs we put out for the
45 //      standard char refs.
46 //
47 //  gEscapeChars
48 //      For each style of escape, we have a list of the chars that must
49 //      be escaped for that style. The first null hit in each list indicates
50 //      no more valid entries in that list. The first entry is a dummy for
51 //      the NoEscapes style.
52 // ---------------------------------------------------------------------------
53 static const XMLCh  gAmpRef[] =
54 {
55     chAmpersand, chLatin_a, chLatin_m, chLatin_p, chSemiColon, chNull
56 };
57 
58 static const XMLCh  gAposRef[] =
59 {
60     chAmpersand, chLatin_a, chLatin_p, chLatin_o, chLatin_s, chSemiColon, chNull
61 };
62 
63 static const XMLCh  gGTRef[] =
64 {
65     chAmpersand, chLatin_g, chLatin_t, chSemiColon, chNull
66 };
67 
68 static const XMLCh  gLTRef[] =
69 {
70     chAmpersand, chLatin_l, chLatin_t, chSemiColon, chNull
71 };
72 
73 static const XMLCh  gQuoteRef[] =
74 {
75     chAmpersand, chLatin_q, chLatin_u, chLatin_o, chLatin_t, chSemiColon, chNull
76 };
77 
78 static const unsigned int kEscapeCount = 7;
79 static const XMLCh gEscapeChars[XMLFormatter::EscapeFlags_Count][kEscapeCount] =
80 {
81         { chNull      , chNull       , chNull        , chNull       , chNull        , chNull    , chNull }
82     ,   { chAmpersand , chCloseAngle , chDoubleQuote , chOpenAngle  , chSingleQuote , chNull    , chNull }
83     ,   { chAmpersand , chOpenAngle  , chDoubleQuote , chLF         , chCR          , chHTab    , chNull }
84     ,   { chAmpersand , chOpenAngle  , chCloseAngle  , chCR         , chNull        , chNull    , chNull }
85 };
86 
87 // ---------------------------------------------------------------------------
88 //  Local methods
89 // ---------------------------------------------------------------------------
inEscapeList(const XMLFormatter::EscapeFlags escStyle,const XMLCh toCheck)90 bool XMLFormatter::inEscapeList(const XMLFormatter::EscapeFlags escStyle
91                               , const XMLCh                     toCheck)
92 {
93     const XMLCh* escList = gEscapeChars[escStyle];
94     while (*escList)
95     {
96         if (*escList++ == toCheck)
97             return true;
98     }
99 
100     /***
101      *  XML1.1
102      *
103      *  Finally, there is considerable demand to define a standard representation of
104      *  arbitrary Unicode characters in XML documents. Therefore, XML 1.1 allows the
105      *  use of character references to the control characters #x1 through #x1F,
106      *  most of which are forbidden in XML 1.0. For reasons of robustness, however,
107      *  these characters still cannot be used directly in documents.
108      *  In order to improve the robustness of character encoding detection, the
109      *  additional control characters #x7F through #x9F, which were freely allowed in
110      *  XML 1.0 documents, now must also appear only as character references.
111      *  (Whitespace characters are of course exempt.) The minor sacrifice of backward
112      *  compatibility is considered not significant.
113      *  Due to potential problems with APIs, #x0 is still forbidden both directly and
114      *  as a character reference.
115      *
116     ***/
117     if (fIsXML11)
118     {
119         // for XML11
120         if ( XMLChar1_1::isControlChar(toCheck, 0) &&
121             !XMLChar1_1::isWhitespace(toCheck, 0)   )
122         {
123             return true;
124         }
125         else
126         {
127             return false;
128         }
129     }
130     else
131     {
132         return false;
133     }
134 
135 }
136 
137 
138 // ---------------------------------------------------------------------------
139 //  XMLFormatter: Constructors and Destructor
140 // ---------------------------------------------------------------------------
XMLFormatter(const char * const outEncoding,const char * const docVersion,XMLFormatTarget * const target,const EscapeFlags escapeFlags,const UnRepFlags unrepFlags,MemoryManager * const manager)141 XMLFormatter::XMLFormatter( const   char* const             outEncoding
142                             , const char* const             docVersion
143                             ,       XMLFormatTarget* const  target
144                             , const EscapeFlags             escapeFlags
145                             , const UnRepFlags              unrepFlags
146                             ,       MemoryManager* const    manager)
147     : fEscapeFlags(escapeFlags)
148     , fOutEncoding(0)
149     , fTarget(target)
150     , fUnRepFlags(unrepFlags)
151     , fXCoder(0)
152     , fAposRef(0)
153     , fAposLen(0)
154     , fAmpRef(0)
155     , fAmpLen(0)
156     , fGTRef(0)
157     , fGTLen(0)
158     , fLTRef(0)
159     , fLTLen(0)
160     , fQuoteRef(0)
161     , fQuoteLen(0)
162     , fIsXML11(false)
163     , fMemoryManager(manager)
164 {
165     // Transcode the encoding string
166     fOutEncoding = XMLString::transcode(outEncoding, fMemoryManager);
167 
168     // Try to create a transcoder for this encoding
169     XMLTransService::Codes resCode;
170     fXCoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
171     (
172         fOutEncoding
173         , resCode
174         , kTmpBufSize
175         , fMemoryManager
176     );
177 
178     if (!fXCoder)
179     {
180         fMemoryManager->deallocate(fOutEncoding); //delete [] fOutEncoding;
181         ThrowXMLwithMemMgr1
182         (
183             TranscodingException
184             , XMLExcepts::Trans_CantCreateCvtrFor
185             , outEncoding
186             , fMemoryManager
187         );
188     }
189 
190     XMLCh* const tmpDocVer = XMLString::transcode(docVersion, fMemoryManager);
191     ArrayJanitor<XMLCh> jname(tmpDocVer, fMemoryManager);
192     fIsXML11 = XMLString::equals(tmpDocVer, XMLUni::fgVersion1_1);
193 }
194 
195 
XMLFormatter(const XMLCh * const outEncoding,const XMLCh * const docVersion,XMLFormatTarget * const target,const EscapeFlags escapeFlags,const UnRepFlags unrepFlags,MemoryManager * const manager)196 XMLFormatter::XMLFormatter( const   XMLCh* const            outEncoding
197                             , const XMLCh* const            docVersion
198                             ,       XMLFormatTarget* const  target
199                             , const EscapeFlags             escapeFlags
200                             , const UnRepFlags              unrepFlags
201                             ,       MemoryManager* const    manager)
202     : fEscapeFlags(escapeFlags)
203     , fOutEncoding(0)
204     , fTarget(target)
205     , fUnRepFlags(unrepFlags)
206     , fXCoder(0)
207     , fAposRef(0)
208     , fAposLen(0)
209     , fAmpRef(0)
210     , fAmpLen(0)
211     , fGTRef(0)
212     , fGTLen(0)
213     , fLTRef(0)
214     , fLTLen(0)
215     , fQuoteRef(0)
216     , fQuoteLen(0)
217     , fIsXML11(false)
218     , fMemoryManager(manager)
219 {
220     // Try to create a transcoder for this encoding
221     XMLTransService::Codes resCode;
222     fXCoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
223     (
224         outEncoding
225         , resCode
226         , kTmpBufSize
227         , fMemoryManager
228     );
229 
230     if (!fXCoder)
231     {
232         ThrowXMLwithMemMgr1
233         (
234             TranscodingException
235             , XMLExcepts::Trans_CantCreateCvtrFor
236             , outEncoding
237             , fMemoryManager
238         );
239     }
240 
241     // Copy the encoding string
242     fOutEncoding = XMLString::replicate(outEncoding, fMemoryManager);
243 
244     fIsXML11 = XMLString::equals(docVersion, XMLUni::fgVersion1_1);
245 }
246 
XMLFormatter(const char * const outEncoding,XMLFormatTarget * const target,const EscapeFlags escapeFlags,const UnRepFlags unrepFlags,MemoryManager * const manager)247 XMLFormatter::XMLFormatter( const   char* const             outEncoding
248                             ,       XMLFormatTarget* const  target
249                             , const EscapeFlags             escapeFlags
250                             , const UnRepFlags              unrepFlags
251                             ,       MemoryManager* const    manager)
252     : fEscapeFlags(escapeFlags)
253     , fOutEncoding(0)
254     , fTarget(target)
255     , fUnRepFlags(unrepFlags)
256     , fXCoder(0)
257     , fAposRef(0)
258     , fAposLen(0)
259     , fAmpRef(0)
260     , fAmpLen(0)
261     , fGTRef(0)
262     , fGTLen(0)
263     , fLTRef(0)
264     , fLTLen(0)
265     , fQuoteRef(0)
266     , fQuoteLen(0)
267     , fIsXML11(false)
268     , fMemoryManager(manager)
269 {
270     // this constructor uses "1.0" for the docVersion
271 
272     // Transcode the encoding string
273     fOutEncoding = XMLString::transcode(outEncoding, fMemoryManager);
274 
275     // Try to create a transcoder for this encoding
276     XMLTransService::Codes resCode;
277     fXCoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
278     (
279         fOutEncoding
280         , resCode
281         , kTmpBufSize
282         , fMemoryManager
283     );
284 
285     if (!fXCoder)
286     {
287         fMemoryManager->deallocate(fOutEncoding); //delete [] fOutEncoding;
288         ThrowXMLwithMemMgr1
289         (
290             TranscodingException
291             , XMLExcepts::Trans_CantCreateCvtrFor
292             , outEncoding
293             , fMemoryManager
294         );
295     }
296 
297     //XMLCh* const tmpDocVer = XMLString::transcode("1.0", fMemoryManager);
298     //ArrayJanitor<XMLCh> jname(tmpDocVer, fMemoryManager);
299     //fIsXML11 = XMLString::equals(tmpDocVer, XMLUni::fgVersion1_1);
300     fIsXML11 = false;  // docVersion 1.0 is not 1.1!
301 }
302 
303 
XMLFormatter(const XMLCh * const outEncoding,XMLFormatTarget * const target,const EscapeFlags escapeFlags,const UnRepFlags unrepFlags,MemoryManager * const manager)304 XMLFormatter::XMLFormatter( const   XMLCh* const            outEncoding
305                             ,       XMLFormatTarget* const  target
306                             , const EscapeFlags             escapeFlags
307                             , const UnRepFlags              unrepFlags
308                             ,       MemoryManager* const    manager)
309     : fEscapeFlags(escapeFlags)
310     , fOutEncoding(0)
311     , fTarget(target)
312     , fUnRepFlags(unrepFlags)
313     , fXCoder(0)
314     , fAposRef(0)
315     , fAposLen(0)
316     , fAmpRef(0)
317     , fAmpLen(0)
318     , fGTRef(0)
319     , fGTLen(0)
320     , fLTRef(0)
321     , fLTLen(0)
322     , fQuoteRef(0)
323     , fQuoteLen(0)
324     , fIsXML11(false)
325     , fMemoryManager(manager)
326 {
327     // this constructor uses XMLUni::fgVersion1_0 for the docVersion
328 
329     // Try to create a transcoder for this encoding
330     XMLTransService::Codes resCode;
331     fXCoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
332     (
333         outEncoding
334         , resCode
335         , kTmpBufSize
336         , fMemoryManager
337     );
338 
339     if (!fXCoder)
340     {
341         ThrowXMLwithMemMgr1
342         (
343             TranscodingException
344             , XMLExcepts::Trans_CantCreateCvtrFor
345             , outEncoding
346             , fMemoryManager
347         );
348     }
349 
350     // Copy the encoding string
351     fOutEncoding = XMLString::replicate(outEncoding, fMemoryManager);
352 
353     //fIsXML11 = XMLString::equals(docVersion, XMLUni::fgVersion1_1);
354     fIsXML11 = false;  // docVersion 1.0 is not 1.1!
355 }
356 
~XMLFormatter()357 XMLFormatter::~XMLFormatter()
358 {
359     fMemoryManager->deallocate(fAposRef); //delete [] fAposRef;
360     fMemoryManager->deallocate(fAmpRef); //delete [] fAmpRef;
361     fMemoryManager->deallocate(fGTRef); //delete [] fGTRef;
362     fMemoryManager->deallocate(fLTRef); //delete [] fLTRef;
363     fMemoryManager->deallocate(fQuoteRef); //delete [] fQuoteRef;
364     fMemoryManager->deallocate(fOutEncoding); //delete [] fOutEncoding;
365     delete fXCoder;
366 
367     // We DO NOT own the target object!
368 }
369 
370 
371 // ---------------------------------------------------------------------------
372 //  XMLFormatter: Formatting methods
373 // ---------------------------------------------------------------------------
374 void
formatBuf(const XMLCh * const toFormat,const XMLSize_t count,const EscapeFlags escapeFlags,const UnRepFlags unrepFlags)375 XMLFormatter::formatBuf(const   XMLCh* const    toFormat
376                         , const XMLSize_t       count
377                         , const EscapeFlags     escapeFlags
378                         , const UnRepFlags      unrepFlags)
379 {
380     //
381     //  Figure out the actual escape flag value. If the parameter is not
382     //  the default, then take it. Else take the current default.
383     //
384     const EscapeFlags actualEsc = (escapeFlags == DefaultEscape)
385                                 ? fEscapeFlags : escapeFlags;
386 
387     // And do the same for the unrep flags
388     const UnRepFlags  actualUnRep = (unrepFlags == DefaultUnRep)
389                                     ? fUnRepFlags : unrepFlags;
390 
391     //
392     //  If the actual unrep action is that they want to provide char refs
393     //  for unrepresentable chars, then this one is a much more difficult
394     //  one to do cleanly, and we handle it separately.
395     //
396     if (actualUnRep == UnRep_CharRef)
397     {
398         specialFormat(toFormat, count, actualEsc);
399         return;
400     }
401 
402     //
403     //  If we don't have any escape flags set, then we can do the most
404     //  efficient loop, else we have to do it the hard way.
405     //
406     const XMLCh*    srcPtr = toFormat;
407     const XMLCh*    endPtr = toFormat + count;
408     if (actualEsc == NoEscapes)
409     {
410         //
411         //  Just do a whole buffer at a time into the temp buffer, cap
412         //  it off, and send it to the target.
413         //
414         if (srcPtr < endPtr)
415            srcPtr += handleUnEscapedChars(srcPtr, endPtr - srcPtr, actualUnRep);
416     }
417      else
418     {
419         //
420         //  Escape chars that require it according to the scale flags
421         //  we were given. For the others, try to accumulate them and
422         //  format them in as big as bulk as we can.
423         //
424         while (srcPtr < endPtr)
425         {
426             //
427             //  Run a temp pointer up until we hit a character that we have
428             //  to escape. Then we can convert all the chars between our
429             //  current source pointer and here all at once.
430             //
431             const XMLCh* tmpPtr = srcPtr;
432             while ((tmpPtr < endPtr) && !inEscapeList(actualEsc, *tmpPtr))
433                 tmpPtr++;
434 
435             //
436             //  If we got any chars, then lets convert them and write them
437             //  out.
438             //
439             if (tmpPtr > srcPtr)
440                srcPtr += handleUnEscapedChars(srcPtr, tmpPtr - srcPtr,
441                                               actualUnRep);
442 
443              else if (tmpPtr < endPtr)
444             {
445                 //
446                 //  Ok, so we've hit a char that must be escaped. So do
447                 //  this one specially.
448                 //
449                 const XMLByte * theChars;
450                 switch (*srcPtr) {
451                     case chAmpersand :
452                         theChars = getCharRef(fAmpLen, fAmpRef, gAmpRef);
453                         fTarget->writeChars(theChars, fAmpLen, this);
454                         break;
455 
456                     case chSingleQuote :
457                         theChars = getCharRef(fAposLen, fAposRef, gAposRef);
458                         fTarget->writeChars(theChars, fAposLen, this);
459                         break;
460 
461                     case chDoubleQuote :
462                         theChars = getCharRef(fQuoteLen, fQuoteRef, gQuoteRef);
463                         fTarget->writeChars(theChars, fQuoteLen, this);
464                         break;
465 
466                     case chCloseAngle :
467                         theChars = getCharRef(fGTLen, fGTRef, gGTRef);
468                         fTarget->writeChars(theChars, fGTLen, this);
469                         break;
470 
471                     case chOpenAngle :
472                         theChars = getCharRef(fLTLen, fLTRef, gLTRef);
473                         fTarget->writeChars(theChars, fLTLen, this);
474                         break;
475 
476                     default:
477                         // control characters
478                         writeCharRef(*srcPtr);
479                         break;
480                 }
481                 srcPtr++;
482             }
483         }
484     }
485 }
486 
487 
488 XMLSize_t
handleUnEscapedChars(const XMLCh * srcPtr,const XMLSize_t oCount,const UnRepFlags actualUnRep)489 XMLFormatter::handleUnEscapedChars(const XMLCh *                  srcPtr,
490                                    const XMLSize_t                oCount,
491                                    const UnRepFlags               actualUnRep)
492 {
493    //
494    //  Use that to figure out what I should pass to the transcoder. If we
495    //  are doing character references or failing for unrepresentable chars,
496    //  then we just throw, since we should never get a call for something
497    //  we cannot represent. Else, we tell it to just use the replacement
498    //  char.
499    //
500    const XMLTranscoder::UnRepOpts unRepOpts = (actualUnRep == UnRep_Replace)
501                                              ? XMLTranscoder::UnRep_RepChar
502                                              : XMLTranscoder::UnRep_Throw;
503 
504    XMLSize_t charsEaten;
505    XMLSize_t count = oCount;
506 
507    while (count) {
508      const XMLSize_t srcChars = (count > XMLSize_t (kTmpBufSize))
509        ? XMLSize_t (kTmpBufSize) : count;
510 
511       const XMLSize_t outBytes
512          = fXCoder->transcodeTo(srcPtr, srcChars,
513                                 fTmpBuf, kTmpBufSize,
514                                 charsEaten, unRepOpts);
515 
516       if (outBytes) {
517          fTmpBuf[outBytes]     = 0; fTmpBuf[outBytes + 1] = 0;
518          fTmpBuf[outBytes + 2] = 0; fTmpBuf[outBytes + 3] = 0;
519          fTarget->writeChars(fTmpBuf, outBytes, this);
520       }
521 
522       srcPtr += charsEaten;
523       count  -= charsEaten;
524    }
525 
526    return oCount; // This should be an assertion that count == 0.
527 }
528 
529 
operator <<(const XMLCh * const toFormat)530 XMLFormatter& XMLFormatter::operator<<(const XMLCh* const toFormat)
531 {
532     const XMLSize_t len = XMLString::stringLen(toFormat);
533     formatBuf(toFormat, len);
534     return *this;
535 }
536 
operator <<(const XMLCh toFormat)537 XMLFormatter& XMLFormatter::operator<<(const XMLCh toFormat)
538 {
539     // Make a temp string format that
540     XMLCh szTmp[2];
541     szTmp[0] = toFormat;
542     szTmp[1] = 0;
543 
544     formatBuf(szTmp, 1);
545     return *this;
546 }
547 
548 /**
549  * the parameter, count, is needed since stringLen()
550  * does not work on a BOM like "0xFE0xFF0x000x00" or
551  * "0x000x000xFF0xFE"
552  **/
writeBOM(const XMLByte * const toFormat,const XMLSize_t count)553 void XMLFormatter::writeBOM(const XMLByte* const toFormat
554                           , const XMLSize_t      count)
555 {
556     fTarget->writeChars(toFormat, count, this);
557 }
558 
559 // ---------------------------------------------------------------------------
560 //  XMLFormatter: Private helper methods
561 // ---------------------------------------------------------------------------
writeCharRef(const XMLCh & toWrite)562 void XMLFormatter::writeCharRef(const XMLCh &toWrite)
563 {
564     XMLCh tmpBuf[32];
565     tmpBuf[0] = chAmpersand;
566     tmpBuf[1] = chPound;
567     tmpBuf[2] = chLatin_x;
568 
569     // Build a char ref for the current char
570     XMLString::binToText(toWrite, &tmpBuf[3], 8, 16, fMemoryManager);
571     const XMLSize_t bufLen = XMLString::stringLen(tmpBuf);
572     tmpBuf[bufLen] = chSemiColon;
573     tmpBuf[bufLen+1] = chNull;
574 
575     // write it out
576     formatBuf(tmpBuf
577             , bufLen + 1
578             , XMLFormatter::NoEscapes
579             , XMLFormatter::UnRep_Fail);
580 
581 }
582 
writeCharRef(XMLSize_t toWrite)583 void XMLFormatter::writeCharRef(XMLSize_t toWrite)
584 {
585     XMLCh tmpBuf[64];
586     tmpBuf[0] = chAmpersand;
587     tmpBuf[1] = chPound;
588     tmpBuf[2] = chLatin_x;
589 
590     // Build a char ref for the current char
591     XMLString::sizeToText(toWrite, &tmpBuf[3], 32, 16, fMemoryManager);
592     const XMLSize_t bufLen = XMLString::stringLen(tmpBuf);
593     tmpBuf[bufLen] = chSemiColon;
594     tmpBuf[bufLen+1] = chNull;
595 
596     // write it out
597     formatBuf(tmpBuf
598             , bufLen + 1
599             , XMLFormatter::NoEscapes
600             , XMLFormatter::UnRep_Fail);
601 
602 }
603 
604 
getCharRef(XMLSize_t & count,XMLByte * & ref,const XMLCh * stdRef)605 const XMLByte* XMLFormatter::getCharRef(XMLSize_t     &count,
606                                         XMLByte*      &ref,
607                                         const XMLCh *  stdRef)
608 {
609    if (!ref) {
610 
611        XMLSize_t charsEaten;
612        const XMLSize_t outBytes =
613            fXCoder->transcodeTo(stdRef, XMLString::stringLen(stdRef),
614                                 fTmpBuf, kTmpBufSize, charsEaten,
615                                 XMLTranscoder::UnRep_Throw);
616 
617        fTmpBuf[outBytes] = 0;
618        fTmpBuf[outBytes + 1] = 0;
619        fTmpBuf[outBytes + 2] = 0;
620        fTmpBuf[outBytes + 3] = 0;
621 
622        ref = (XMLByte*) fMemoryManager->allocate
623        (
624            (outBytes + 4) * sizeof(XMLByte)
625        );//new XMLByte[outBytes + 4];
626        memcpy(ref, fTmpBuf, outBytes + 4);
627        count = outBytes;
628    }
629 
630    return ref;
631 }
632 
specialFormat(const XMLCh * const toFormat,const XMLSize_t count,const EscapeFlags escapeFlags)633 void XMLFormatter::specialFormat(const  XMLCh* const    toFormat
634                                 , const XMLSize_t       count
635                                 , const EscapeFlags     escapeFlags)
636 {
637     //
638     //  We have to check each character and see if it could be represented.
639     //  As long as it can, we just keep up with where we started and how
640     //  many chars we've checked. When we hit an unrepresentable one, we
641     //  stop, transcode everything we've collected, then start handling
642     //  the unrepresentables via char refs. We repeat this until we get all
643     //  the chars done.
644     //
645     const XMLCh*    srcPtr = toFormat;
646     const XMLCh*    endPtr = toFormat + count;
647 
648     while (srcPtr < endPtr)
649     {
650         const XMLCh* tmpPtr = srcPtr;
651         while (tmpPtr < endPtr)
652         {
653             if (fXCoder->canTranscodeTo(*tmpPtr))
654                 tmpPtr++;
655             else
656                 break;
657         }
658 
659         if (tmpPtr > srcPtr)
660         {
661             // We got at least some chars that can be done normally
662             formatBuf
663             (
664                 srcPtr
665                 , tmpPtr - srcPtr
666                 , escapeFlags
667                 , XMLFormatter::UnRep_Fail
668             );
669 
670             // Update the source pointer to our new spot
671             srcPtr = tmpPtr;
672         }
673          else
674         {
675 
676             //  We hit something unrepresentable. So continue forward doing
677             //  char refs until we hit something representable again or the
678             //  end of input.
679             //
680             while (srcPtr < endPtr)
681             {
682                 if ((*srcPtr & 0xFC00) == 0xD800) {
683                     // we have encountered a surrogate, need to recombine before printing out
684                     // use writeCharRef that takes XMLSize_t to get values larger than
685                     // hex 0xFFFF printed.
686                     tmpPtr = srcPtr;
687                     tmpPtr++; // point at low surrogate
688                     writeCharRef((XMLSize_t) (0x10000+((*srcPtr-0xD800)<<10)+*tmpPtr-0xDC00));
689                     srcPtr++; // advance to low surrogate (will advance again below)
690                 }
691                 else {
692                     writeCharRef(*srcPtr);
693                 }
694 
695                 // Move up the source pointer and break out if needed
696                 srcPtr++;
697                 if (fXCoder->canTranscodeTo(*srcPtr))
698                     break;
699             }
700         }
701     }
702 }
703 
704 XERCES_CPP_NAMESPACE_END
705