1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <cassert>
23 
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
27 
28 #include "converter.hxx"
29 #include "convertiso2022cn.hxx"
30 #include "tenchelp.hxx"
31 #include "unichars.hxx"
32 
33 namespace {
34 
35 enum ImplIso2022CnToUnicodeState // order is important:
36 {
37     IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII,
38     IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO,
39     IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2,
40     IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432,
41     IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2,
42     IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC,
43     IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR,
44     IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN,
45     IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK
46 };
47 
48 struct ImplIso2022CnToUnicodeContext
49 {
50     ImplIso2022CnToUnicodeState m_eState;
51     sal_uInt32 m_nRow;
52     bool m_bSo;
53     bool m_b116431;
54 };
55 
56 enum ImplUnicodeToIso2022CnDesignator
57 {
58     IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE,
59     IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312,
60     IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431
61 };
62 
63 struct ImplUnicodeToIso2022CnContext
64 {
65     sal_Unicode m_nHighSurrogate;
66     ImplUnicodeToIso2022CnDesignator m_eSoDesignator;
67     bool m_b116432Designator;
68     bool m_bSo;
69 };
70 
71 }
72 
ImplCreateIso2022CnToUnicodeContext()73 void * ImplCreateIso2022CnToUnicodeContext()
74 {
75     ImplIso2022CnToUnicodeContext * pContext =
76         new ImplIso2022CnToUnicodeContext;
77     pContext->m_eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
78     pContext->m_bSo = false;
79     pContext->m_b116431 = false;
80     return pContext;
81 }
82 
ImplResetIso2022CnToUnicodeContext(void * pContext)83 void ImplResetIso2022CnToUnicodeContext(void * pContext)
84 {
85     if (pContext)
86     {
87         static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_eState
88             = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
89         static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_bSo = false;
90         static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_b116431 = false;
91     }
92 }
93 
ImplDestroyIso2022CnToUnicodeContext(void * pContext)94 void ImplDestroyIso2022CnToUnicodeContext(void * pContext)
95 {
96     delete static_cast< ImplIso2022CnToUnicodeContext * >(pContext);
97 }
98 
ImplConvertIso2022CnToUnicode(void const * pData,void * pContext,char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)99 sal_Size ImplConvertIso2022CnToUnicode(void const * pData,
100                                        void * pContext,
101                                        char const * pSrcBuf,
102                                        sal_Size nSrcBytes,
103                                        sal_Unicode * pDestBuf,
104                                        sal_Size nDestChars,
105                                        sal_uInt32 nFlags,
106                                        sal_uInt32 * pInfo,
107                                        sal_Size * pSrcCvtBytes)
108 {
109     ImplDBCSToUniLeadTab const * pGb2312Data
110         = static_cast< ImplIso2022CnConverterData const * >(pData)->
111               m_pGb2312ToUnicodeData;
112     sal_uInt16 const * pCns116431992Data
113         = static_cast< ImplIso2022CnConverterData const * >(pData)->
114               m_pCns116431992ToUnicodeData;
115     sal_Int32 const * pCns116431992RowOffsets
116         = static_cast< ImplIso2022CnConverterData const * >(pData)->
117               m_pCns116431992ToUnicodeRowOffsets;
118     sal_Int32 const * pCns116431992PlaneOffsets
119         = static_cast< ImplIso2022CnConverterData const * >(pData)->
120               m_pCns116431992ToUnicodePlaneOffsets;
121     ImplIso2022CnToUnicodeState eState
122         = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
123     sal_uInt32 nRow = 0;
124     bool bSo = false;
125     bool b116431 = false;
126     sal_uInt32 nInfo = 0;
127     sal_Size nConverted = 0;
128     sal_Unicode * pDestBufPtr = pDestBuf;
129     sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
130     sal_Size startOfCurrentChar = 0;
131 
132     if (pContext)
133     {
134         eState = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_eState;
135         nRow = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_nRow;
136         bSo = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_bSo;
137         b116431 = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_b116431;
138     }
139 
140     for (; nConverted < nSrcBytes; ++nConverted)
141     {
142         bool bUndefined = true;
143         sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
144         sal_uInt32 nPlane;
145         switch (eState)
146         {
147         case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII:
148             if (nChar == 0x0E) // SO
149             {
150                 bSo = true;
151                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO;
152             }
153             else if (nChar == 0x1B) // ESC
154                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC;
155             else if (nChar < 0x80)
156                 if (pDestBufPtr != pDestBufEnd) {
157                     *pDestBufPtr++ = static_cast<sal_Unicode>(nChar);
158                     startOfCurrentChar = nConverted + 1;
159                 } else
160                     goto no_output;
161             else
162             {
163                 bUndefined = false;
164                 goto bad_input;
165             }
166             break;
167 
168         case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO:
169             if (nChar == 0x0F) // SI
170             {
171                 bSo = false;
172                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
173             }
174             else if (nChar == 0x1B) // ESC
175                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC;
176             else if (nChar >= 0x21 && nChar <= 0x7E)
177             {
178                 nRow = nChar;
179                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2;
180             }
181             else
182             {
183                 bUndefined = false;
184                 goto bad_input;
185             }
186             break;
187 
188         case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2:
189             if (nChar >= 0x21 && nChar <= 0x7E)
190                 if (b116431)
191                 {
192                     nPlane = 0;
193                     goto transform;
194                 }
195                 else
196                 {
197                     sal_uInt16 nUnicode = 0;
198                     sal_uInt32 nFirst;
199                     nRow += 0x80;
200                     nChar += 0x80;
201                     nFirst = pGb2312Data[nRow].mnTrailStart;
202                     if (nChar >= nFirst
203                         && nChar <= pGb2312Data[nRow].mnTrailEnd)
204                         nUnicode = pGb2312Data[nRow].
205                                        mpToUniTrailTab[nChar - nFirst];
206                     if (nUnicode != 0)
207                         if (pDestBufPtr != pDestBufEnd)
208                         {
209                             *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
210                             eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO;
211                             startOfCurrentChar = nConverted + 1;
212                         }
213                         else
214                             goto no_output;
215                     else
216                         goto bad_input;
217                 }
218             else
219             {
220                 bUndefined = false;
221                 goto bad_input;
222             }
223             break;
224 
225         case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432:
226             if (nChar >= 0x21 && nChar <= 0x7E)
227             {
228                 nRow = nChar;
229                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2;
230             }
231             else
232             {
233                 bUndefined = false;
234                 goto bad_input;
235             }
236             break;
237 
238         case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2:
239             if (nChar >= 0x21 && nChar <= 0x7E)
240             {
241                 nPlane = 1;
242                 goto transform;
243             }
244             else
245             {
246                 bUndefined = false;
247                 goto bad_input;
248             }
249             break;
250 
251         case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC:
252             if (nChar == 0x24) // $
253                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR;
254             else if (nChar == 0x4E) // N
255                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432;
256             else
257             {
258                 bUndefined = false;
259                 goto bad_input;
260             }
261             break;
262 
263         case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR:
264             if (nChar == 0x29) // )
265                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN;
266             else if (nChar == 0x2A) // *
267                 eState
268                     = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK;
269             else
270             {
271                 bUndefined = false;
272                 goto bad_input;
273             }
274             break;
275 
276         case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN:
277             if (nChar == 0x41) // A
278             {
279                 b116431 = false;
280                 eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
281                                IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
282             }
283             else if (nChar == 0x47) // G
284             {
285                 b116431 = true;
286                 eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
287                                IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
288             }
289             else
290             {
291                 bUndefined = false;
292                 goto bad_input;
293             }
294             break;
295 
296         case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK:
297             if (nChar == 0x48) // H
298                 eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
299                                IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
300             else
301             {
302                 bUndefined = false;
303                 goto bad_input;
304             }
305             break;
306         }
307         continue;
308 
309     transform:
310         {
311             sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
312             if (nPlaneOffset == -1)
313                 goto bad_input;
314             else
315             {
316                 sal_Int32 nOffset
317                     = pCns116431992RowOffsets[nPlaneOffset + (nRow - 0x21)];
318                 if (nOffset == -1)
319                     goto bad_input;
320                 else
321                 {
322                     sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
323                     sal_uInt32 nFirst = nFirstLast & 0xFF;
324                     sal_uInt32 nLast = nFirstLast >> 8;
325                     nChar -= 0x20;
326                     if (nChar >= nFirst && nChar <= nLast)
327                     {
328                         sal_uInt32 nUnicode
329                             = pCns116431992Data[nOffset + (nChar - nFirst)];
330                         if (nUnicode == 0xFFFF)
331                             goto bad_input;
332                         else if (ImplIsHighSurrogate(nUnicode))
333                             if (pDestBufEnd - pDestBufPtr >= 2)
334                             {
335                                 nOffset += nLast - nFirst + 1;
336                                 nFirst = pCns116431992Data[nOffset++];
337                                 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
338                                 *pDestBufPtr++
339                                     = static_cast<sal_Unicode>(pCns116431992Data[
340                                               nOffset + (nChar - nFirst)]);
341                                 startOfCurrentChar = nConverted + 1;
342                             }
343                             else
344                                 goto no_output;
345                         else
346                             if (pDestBufPtr != pDestBufEnd) {
347                                 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
348                                 startOfCurrentChar = nConverted + 1;
349                             } else
350                                 goto no_output;
351                     }
352                     else
353                         goto bad_input;
354                     eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
355                                    IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
356                 }
357             }
358             continue;
359         }
360 
361     bad_input:
362         switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
363                     bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
364                     &nInfo))
365         {
366         case sal::detail::textenc::BAD_INPUT_STOP:
367             eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
368             b116431 = false;
369             if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
370                 ++nConverted;
371             } else {
372                 nConverted = startOfCurrentChar;
373             }
374             break;
375 
376         case sal::detail::textenc::BAD_INPUT_CONTINUE:
377             eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
378             b116431 = false;
379             startOfCurrentChar = nConverted + 1;
380             continue;
381 
382         case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
383             goto no_output;
384         }
385         break;
386 
387     no_output:
388         --pSrcBuf;
389         nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
390         break;
391     }
392 
393     if (eState > IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO
394         && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
395                          | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL))
396                == 0)
397     {
398         if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
399             nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
400         else
401             switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
402                         false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
403                         &nInfo))
404             {
405             case sal::detail::textenc::BAD_INPUT_STOP:
406                 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) != 0) {
407                     nConverted = startOfCurrentChar;
408                 }
409                 [[fallthrough]];
410             case sal::detail::textenc::BAD_INPUT_CONTINUE:
411                 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
412                 b116431 = false;
413                 break;
414 
415             case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
416                 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
417                 break;
418             }
419     }
420 
421     if (pContext)
422     {
423         static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_eState = eState;
424         static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_nRow = nRow;
425         static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_bSo = bSo;
426         static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_b116431 = b116431;
427     }
428     if (pInfo)
429         *pInfo = nInfo;
430     if (pSrcCvtBytes)
431         *pSrcCvtBytes = nConverted;
432 
433     return pDestBufPtr - pDestBuf;
434 }
435 
ImplCreateUnicodeToIso2022CnContext()436 void * ImplCreateUnicodeToIso2022CnContext()
437 {
438     ImplUnicodeToIso2022CnContext * pContext =
439         new ImplUnicodeToIso2022CnContext;
440     pContext->m_nHighSurrogate = 0;
441     pContext->m_eSoDesignator = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
442     pContext->m_b116432Designator = false;
443     pContext->m_bSo = false;
444     return pContext;
445 }
446 
ImplResetUnicodeToIso2022CnContext(void * pContext)447 void ImplResetUnicodeToIso2022CnContext(void * pContext)
448 {
449     if (pContext)
450     {
451         static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_nHighSurrogate = 0;
452         static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_eSoDesignator
453             = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
454         static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_b116432Designator
455             = false;
456         static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_bSo = false;
457     }
458 }
459 
ImplDestroyUnicodeToIso2022CnContext(void * pContext)460 void ImplDestroyUnicodeToIso2022CnContext(void * pContext)
461 {
462     delete static_cast< ImplUnicodeToIso2022CnContext * >(pContext);
463 }
464 
ImplIso2022CnTranslateTo2312(ImplUniToDBCSHighTab const * pGb2312Data,sal_uInt32 nChar)465 static sal_uInt32 ImplIso2022CnTranslateTo2312(ImplUniToDBCSHighTab const *
466                                                    pGb2312Data,
467                                                sal_uInt32 nChar)
468 {
469     sal_uInt32 nIndex1 = nChar >> 8;
470     if (nIndex1 < 0x100)
471     {
472         sal_uInt32 nIndex2 = nChar & 0xFF;
473         sal_uInt32 nFirst = pGb2312Data[nIndex1].mnLowStart;
474         if (nIndex2 >= nFirst && nIndex2 <= pGb2312Data[nIndex1].mnLowEnd)
475             return pGb2312Data[nIndex1].mpToUniTrailTab[nIndex2 - nFirst]
476                        & 0x7F7F;
477     }
478     return 0;
479 }
480 
481 static sal_uInt32
ImplIso2022CnTranslateTo116431(sal_uInt8 const * pCns116431992Data,sal_Int32 const * pCns116431992PageOffsets,sal_Int32 const * pCns116431992PlaneOffsets,sal_uInt32 nChar)482 ImplIso2022CnTranslateTo116431(sal_uInt8 const * pCns116431992Data,
483                                sal_Int32 const * pCns116431992PageOffsets,
484                                sal_Int32 const * pCns116431992PlaneOffsets,
485                                sal_uInt32 nChar)
486 {
487     sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
488     sal_uInt32 nFirst;
489     sal_uInt32 nLast;
490     sal_uInt32 nPlane;
491     if (nOffset == -1)
492         return 0;
493     nOffset = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)];
494     if (nOffset == -1)
495         return 0;
496     nFirst = pCns116431992Data[nOffset++];
497     nLast = pCns116431992Data[nOffset++];
498     nChar &= 0xFF;
499     if (nChar < nFirst || nChar > nLast)
500         return 0;
501     nOffset += 3 * (nChar - nFirst);
502     nPlane = pCns116431992Data[nOffset++];
503     if (nPlane != 1)
504         return 0;
505     return (0x20 + pCns116431992Data[nOffset]) << 8
506                | (0x20 + pCns116431992Data[nOffset + 1]);
507 }
508 
ImplConvertUnicodeToIso2022Cn(void const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)509 sal_Size ImplConvertUnicodeToIso2022Cn(void const * pData,
510                                        void * pContext,
511                                        sal_Unicode const * pSrcBuf,
512                                        sal_Size nSrcChars,
513                                        char * pDestBuf,
514                                        sal_Size nDestBytes,
515                                        sal_uInt32 nFlags,
516                                        sal_uInt32 * pInfo,
517                                        sal_Size * pSrcCvtChars)
518 {
519     ImplUniToDBCSHighTab const * pGb2312Data
520         = static_cast< ImplIso2022CnConverterData const * >(pData)->
521               m_pUnicodeToGb2312Data;
522     sal_uInt8 const * pCns116431992Data
523         = static_cast< ImplIso2022CnConverterData const * >(pData)->
524               m_pUnicodeToCns116431992Data;
525     sal_Int32 const * pCns116431992PageOffsets
526         = static_cast< ImplIso2022CnConverterData const * >(pData)->
527               m_pUnicodeToCns116431992PageOffsets;
528     sal_Int32 const * pCns116431992PlaneOffsets
529         = static_cast< ImplIso2022CnConverterData const * >(pData)->
530               m_pUnicodeToCns116431992PlaneOffsets;
531     sal_Unicode nHighSurrogate = 0;
532     ImplUnicodeToIso2022CnDesignator eSoDesignator
533         = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
534     bool b116432Designator = false;
535     bool bSo = false;
536     sal_uInt32 nInfo = 0;
537     sal_Size nConverted = 0;
538     char * pDestBufPtr = pDestBuf;
539     char * pDestBufEnd = pDestBuf + nDestBytes;
540     bool bWritten;
541 
542     if (pContext)
543     {
544         nHighSurrogate
545             = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_nHighSurrogate;
546         eSoDesignator
547             = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_eSoDesignator;
548         b116432Designator = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->
549                                 m_b116432Designator;
550         bSo = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_bSo;
551     }
552 
553     for (; nConverted < nSrcChars; ++nConverted)
554     {
555         bool bUndefined = true;
556         sal_uInt32 nChar = *pSrcBuf++;
557         if (nHighSurrogate == 0)
558         {
559             if (ImplIsHighSurrogate(nChar))
560             {
561                 nHighSurrogate = static_cast<sal_Unicode>(nChar);
562                 continue;
563             }
564             else if (ImplIsLowSurrogate(nChar))
565             {
566                 bUndefined = false;
567                 goto bad_input;
568             }
569         }
570         else if (ImplIsLowSurrogate(nChar))
571             nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
572         else
573         {
574             bUndefined = false;
575             goto bad_input;
576         }
577 
578         assert(rtl::isUnicodeScalarValue(nChar));
579 
580         if (nChar == 0x0A || nChar == 0x0D) // LF, CR
581         {
582             if (bSo)
583             {
584                 if (pDestBufPtr != pDestBufEnd)
585                 {
586                     *pDestBufPtr++ = 0x0F; // SI
587                     bSo = false;
588                     eSoDesignator
589                         = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
590                     b116432Designator = false;
591                 }
592                 else
593                     goto no_output;
594             }
595             if (pDestBufPtr != pDestBufEnd)
596                 *pDestBufPtr++ = static_cast< char >(nChar);
597             else
598                 goto no_output;
599         }
600         else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B)
601             goto bad_input;
602         else if (nChar < 0x80)
603         {
604             if (bSo)
605             {
606                 if (pDestBufPtr != pDestBufEnd)
607                 {
608                     *pDestBufPtr++ = 0x0F; // SI
609                     bSo = false;
610                 }
611                 else
612                     goto no_output;
613             }
614             if (pDestBufPtr != pDestBufEnd)
615                 *pDestBufPtr++ = static_cast< char >(nChar);
616             else
617                 goto no_output;
618         }
619         else
620         {
621             sal_uInt32 nBytes = 0;
622             ImplUnicodeToIso2022CnDesignator eNewDesignator =
623                          IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
624             switch (eSoDesignator)
625             {
626             case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE:
627                 nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar);
628                 if (nBytes != 0)
629                 {
630                     eNewDesignator
631                         = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312;
632                     break;
633                 }
634                 nBytes = ImplIso2022CnTranslateTo116431(
635                              pCns116431992Data,
636                              pCns116431992PageOffsets,
637                              pCns116431992PlaneOffsets,
638                              nChar);
639                 if (nBytes != 0)
640                 {
641                     eNewDesignator
642                         = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431;
643                     break;
644                 }
645                 break;
646 
647             case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312:
648                 nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar);
649                 if (nBytes != 0)
650                 {
651                     eNewDesignator
652                         = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
653                     break;
654                 }
655                 nBytes = ImplIso2022CnTranslateTo116431(
656                              pCns116431992Data,
657                              pCns116431992PageOffsets,
658                              pCns116431992PlaneOffsets,
659                              nChar);
660                 if (nBytes != 0)
661                 {
662                     eNewDesignator
663                         = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431;
664                     break;
665                 }
666                 break;
667 
668             case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431:
669                 nBytes = ImplIso2022CnTranslateTo116431(
670                              pCns116431992Data,
671                              pCns116431992PageOffsets,
672                              pCns116431992PlaneOffsets,
673                              nChar);
674                 if (nBytes != 0)
675                 {
676                     eNewDesignator
677                         = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
678                     break;
679                 }
680                 nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar);
681                 if (nBytes != 0)
682                 {
683                     eNewDesignator
684                         = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312;
685                     break;
686                 }
687                 break;
688             }
689             if (nBytes != 0)
690             {
691                 if (eNewDesignator
692                         != IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE)
693                 {
694                     if (bSo)
695                     {
696                         if (pDestBufPtr != pDestBufEnd)
697                         {
698                             *pDestBufPtr++ = 0x0F; // SI
699                             bSo = false;
700                         }
701                         else
702                             goto no_output;
703                     }
704                     if (pDestBufEnd - pDestBufPtr >= 4)
705                     {
706                         *pDestBufPtr++ = 0x1B; // ESC
707                         *pDestBufPtr++ = 0x24; // $
708                         *pDestBufPtr++ = 0x29; // )
709                         *pDestBufPtr++
710                             = eNewDesignator
711                               == IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312 ?
712                                   0x41 : 0x47; // A, G
713                         eSoDesignator = eNewDesignator;
714                     }
715                     else
716                         goto no_output;
717                 }
718                 if (!bSo)
719                 {
720                     if (pDestBufPtr != pDestBufEnd)
721                     {
722                         *pDestBufPtr++ = 0x0E; // SO
723                         bSo = true;
724                     }
725                     else
726                         goto no_output;
727                 }
728                 if (pDestBufEnd - pDestBufPtr >= 4)
729                 {
730                     *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
731                     *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
732                 }
733                 else
734                     goto no_output;
735             }
736             else
737             {
738                 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
739                 sal_uInt32 nFirst;
740                 sal_uInt32 nLast;
741                 sal_uInt32 nPlane;
742                 if (nOffset == -1)
743                     goto bad_input;
744                 nOffset
745                     = pCns116431992PageOffsets[nOffset
746                                                    + ((nChar & 0xFF00) >> 8)];
747                 if (nOffset == -1)
748                     goto bad_input;
749                 nFirst = pCns116431992Data[nOffset++];
750                 nLast = pCns116431992Data[nOffset++];
751                 nChar &= 0xFF;
752                 if (nChar < nFirst || nChar > nLast)
753                     goto bad_input;
754                 nOffset += 3 * (nChar - nFirst);
755                 nPlane = pCns116431992Data[nOffset++];
756                 if (nPlane != 2)
757                     goto bad_input;
758                 if (!b116432Designator)
759                 {
760                     if (pDestBufEnd - pDestBufPtr >= 4)
761                     {
762                         *pDestBufPtr++ = 0x1B; // ESC
763                         *pDestBufPtr++ = 0x24; // $
764                         *pDestBufPtr++ = 0x2A; // *
765                         *pDestBufPtr++ = 0x48; // H
766                         b116432Designator = true;
767                     }
768                     else
769                         goto no_output;
770                 }
771                 if (pDestBufEnd - pDestBufPtr >= 4)
772                 {
773                     *pDestBufPtr++ = 0x1B; // ESC
774                     *pDestBufPtr++ = 0x4E; // N
775                     *pDestBufPtr++
776                         = static_cast< char >(0x20 + pCns116431992Data[nOffset++]);
777                     *pDestBufPtr++
778                         = static_cast< char >(0x20 + pCns116431992Data[nOffset]);
779                 }
780                 else
781                     goto no_output;
782             }
783         }
784         nHighSurrogate = 0;
785         continue;
786 
787     bad_input:
788         switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
789                     bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
790                     &nInfo, "\x0F" /* SI */, bSo ? 1 : 0, &bWritten))
791         {
792         case sal::detail::textenc::BAD_INPUT_STOP:
793             nHighSurrogate = 0;
794             break;
795 
796         case sal::detail::textenc::BAD_INPUT_CONTINUE:
797             if (bWritten)
798                 bSo = false;
799             nHighSurrogate = 0;
800             continue;
801 
802         case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
803             goto no_output;
804         }
805         break;
806 
807     no_output:
808         --pSrcBuf;
809         nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
810         break;
811     }
812 
813     if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
814                       | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
815             == 0)
816     {
817         bool bFlush = true;
818         if (nHighSurrogate != 0)
819         {
820             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
821                 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
822             else
823                 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
824                             false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
825                             "\x0F" /* SI */, bSo ? 1 : 0, &bWritten))
826                 {
827                 case sal::detail::textenc::BAD_INPUT_STOP:
828                     nHighSurrogate = 0;
829                     bFlush = false;
830                     break;
831 
832                 case sal::detail::textenc::BAD_INPUT_CONTINUE:
833                     if (bWritten)
834                         bSo = false;
835                     nHighSurrogate = 0;
836                     break;
837 
838                 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
839                     nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
840                     break;
841                 }
842         }
843         if (bFlush && bSo && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
844         {
845             if (pDestBufPtr != pDestBufEnd)
846             {
847                 *pDestBufPtr++ = 0x0F; // SI
848                 bSo = false;
849             }
850             else
851                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
852         }
853     }
854 
855     if (pContext)
856     {
857         static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_nHighSurrogate
858             = nHighSurrogate;
859         static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_eSoDesignator
860             = eSoDesignator;
861         static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_b116432Designator
862             = b116432Designator;
863         static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_bSo = bSo;
864     }
865     if (pInfo)
866         *pInfo = nInfo;
867     if (pSrcCvtChars)
868         *pSrcCvtChars = nConverted;
869 
870     return pDestBufPtr - pDestBuf;
871 }
872 
873 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
874