1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <cassert>
23 
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
27 
28 #include "context.hxx"
29 #include "convertbig5hkscs.hxx"
30 #include "converter.hxx"
31 #include "tenchelp.hxx"
32 #include "unichars.hxx"
33 
34 namespace {
35 
36 struct ImplBig5HkscsToUnicodeContext
37 {
38     sal_Int32 m_nRow; // 0--255; 0 means none
39 };
40 
41 }
42 
ImplCreateBig5HkscsToUnicodeContext()43 void * ImplCreateBig5HkscsToUnicodeContext()
44 {
45     ImplBig5HkscsToUnicodeContext * pContext =
46         new ImplBig5HkscsToUnicodeContext;
47     pContext->m_nRow = 0;
48     return pContext;
49 }
50 
ImplResetBig5HkscsToUnicodeContext(void * pContext)51 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
52 {
53     if (pContext)
54         static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = 0;
55 }
56 
ImplDestroyBig5HkscsToUnicodeContext(void * pContext)57 void ImplDestroyBig5HkscsToUnicodeContext(void * pContext)
58 {
59     delete static_cast< ImplBig5HkscsToUnicodeContext * >(pContext);
60 }
61 
ImplConvertBig5HkscsToUnicode(void const * pData,void * pContext,char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)62 sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
63                                        void * pContext,
64                                        char const * pSrcBuf,
65                                        sal_Size nSrcBytes,
66                                        sal_Unicode * pDestBuf,
67                                        sal_Size nDestChars,
68                                        sal_uInt32 nFlags,
69                                        sal_uInt32 * pInfo,
70                                        sal_Size * pSrcCvtBytes)
71 {
72     sal_uInt16 const * pBig5Hkscs2001Data
73         = static_cast< ImplBig5HkscsConverterData const * >(pData)->
74               m_pBig5Hkscs2001ToUnicodeData;
75     sal_Int32 const * pBig5Hkscs2001RowOffsets
76         = static_cast< ImplBig5HkscsConverterData const * >(pData)->
77               m_pBig5Hkscs2001ToUnicodeRowOffsets;
78     ImplDBCSToUniLeadTab const * pBig5Data
79         = static_cast< ImplBig5HkscsConverterData const * >(pData)->
80               m_pBig5ToUnicodeData;
81     sal_Int32 nRow = 0;
82     sal_uInt32 nInfo = 0;
83     sal_Size nConverted = 0;
84     sal_Unicode * pDestBufPtr = pDestBuf;
85     sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
86     sal_Size startOfCurrentChar = 0;
87 
88     if (pContext)
89         nRow = static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow;
90 
91     for (; nConverted < nSrcBytes; ++nConverted)
92     {
93         bool bUndefined = true;
94         sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
95         if (nRow == 0)
96             if (nChar < 0x80)
97                 if (pDestBufPtr != pDestBufEnd) {
98                     *pDestBufPtr++ = static_cast<sal_Unicode>(nChar);
99                     startOfCurrentChar = nConverted + 1;
100                 } else
101                     goto no_output;
102             else if (nChar >= 0x81 && nChar <= 0xFE)
103                 nRow = nChar;
104             else
105             {
106                 bUndefined = false;
107                 goto bad_input;
108             }
109         else
110             if ((nChar >= 0x40 && nChar <= 0x7E)
111                 || (nChar >= 0xA1 && nChar <= 0xFE))
112             {
113                 sal_uInt32 nUnicode = 0xFFFF;
114                 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
115                 sal_uInt32 nFirst=0;
116                 sal_uInt32 nLast=0;
117                 if (nOffset != -1)
118                 {
119                     sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
120                     nFirst = nFirstLast & 0xFF;
121                     nLast = nFirstLast >> 8;
122                     if (nChar >= nFirst && nChar <= nLast)
123                         nUnicode
124                             = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
125                 }
126                 if (nUnicode == 0xFFFF)
127                 {
128                     sal_uInt32 n = pBig5Data[nRow].mnTrailStart;
129                     if (nChar >= n && nChar <= pBig5Data[nRow].mnTrailEnd)
130                     {
131                         nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n];
132                         if (nUnicode == 0)
133                             nUnicode = 0xFFFF;
134                         assert(!ImplIsHighSurrogate(nUnicode));
135                     }
136                 }
137                 if (nUnicode == 0xFFFF)
138                 {
139                     ImplDBCSEUDCData const * p
140                         = static_cast< ImplBig5HkscsConverterData const * >(pData)->
141                               m_pEudcData;
142                     sal_uInt32 nCount
143                         = static_cast< ImplBig5HkscsConverterData const * >(pData)->
144                               m_nEudcCount;
145                     sal_uInt32 i;
146                     for (i = 0; i < nCount; ++i)
147                     {
148                         if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
149                         {
150                             if (nChar < p->mnTrail1Start)
151                                 break;
152                             if (nChar <= p->mnTrail1End)
153                             {
154                                 nUnicode
155                                     = p->mnUniStart
156                                           + (nRow - p->mnLeadStart)
157                                                 * p->mnTrailRangeCount
158                                           + (nChar - p->mnTrail1Start);
159                                 break;
160                             }
161                             if (p->mnTrailCount < 2
162                                 || nChar < p->mnTrail2Start)
163                                 break;
164                             if (nChar <= p->mnTrail2End)
165                             {
166                                 nUnicode
167                                     = p->mnUniStart
168                                           + (nRow - p->mnLeadStart)
169                                                 * p->mnTrailRangeCount
170                                           + (nChar - p->mnTrail2Start)
171                                           + (p->mnTrail1End - p->mnTrail1Start
172                                                  + 1);
173                                 break;
174                             }
175                             if (p->mnTrailCount < 3
176                                 || nChar < p->mnTrail3Start)
177                                 break;
178                             if (nChar <= p->mnTrail3End)
179                             {
180                                 nUnicode
181                                     = p->mnUniStart
182                                           + (nRow - p->mnLeadStart)
183                                                 * p->mnTrailRangeCount
184                                           + (nChar - p->mnTrail3Start)
185                                           + (p->mnTrail1End - p->mnTrail1Start
186                                                  + 1)
187                                           + (p->mnTrail2End - p->mnTrail2Start
188                                                  + 1);
189                                 break;
190                             }
191                             break;
192                         }
193                         ++p;
194                     }
195                     assert(!ImplIsHighSurrogate(nUnicode));
196                 }
197                 if (nUnicode == 0xFFFF)
198                     goto bad_input;
199                 if (ImplIsHighSurrogate(nUnicode))
200                     if (pDestBufEnd - pDestBufPtr >= 2)
201                     {
202                         nOffset += nLast - nFirst + 1;
203                         nFirst = pBig5Hkscs2001Data[nOffset++];
204                         *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
205                         *pDestBufPtr++
206                             = static_cast<sal_Unicode>(pBig5Hkscs2001Data[
207                                                 nOffset + (nChar - nFirst)]);
208                         startOfCurrentChar = nConverted + 1;
209                     }
210                     else
211                         goto no_output;
212                 else
213                     if (pDestBufPtr != pDestBufEnd) {
214                         *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
215                         startOfCurrentChar = nConverted + 1;
216                     } else
217                         goto no_output;
218                 nRow = 0;
219             }
220             else
221             {
222                 bUndefined = false;
223                 goto bad_input;
224             }
225         continue;
226 
227     bad_input:
228         switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
229                     bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
230                     &nInfo))
231         {
232         case sal::detail::textenc::BAD_INPUT_STOP:
233             nRow = 0;
234             if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
235                 ++nConverted;
236             } else {
237                 nConverted = startOfCurrentChar;
238             }
239             break;
240 
241         case sal::detail::textenc::BAD_INPUT_CONTINUE:
242             nRow = 0;
243             startOfCurrentChar = nConverted + 1;
244             continue;
245 
246         case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
247             goto no_output;
248         }
249         break;
250 
251     no_output:
252         --pSrcBuf;
253         nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
254         break;
255     }
256 
257     if (nRow != 0
258         && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
259                          | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL))
260                == 0)
261     {
262         if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
263             nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
264         else
265             switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
266                         false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
267                         &nInfo))
268             {
269             case sal::detail::textenc::BAD_INPUT_STOP:
270                 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) != 0) {
271                     nConverted = startOfCurrentChar;
272                 }
273                 [[fallthrough]];
274             case sal::detail::textenc::BAD_INPUT_CONTINUE:
275                 nRow = 0;
276                 break;
277 
278             case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
279                 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
280                 break;
281             }
282     }
283 
284     if (pContext)
285         static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = nRow;
286     if (pInfo)
287         *pInfo = nInfo;
288     if (pSrcCvtBytes)
289         *pSrcCvtBytes = nConverted;
290 
291     return pDestBufPtr - pDestBuf;
292 }
293 
ImplConvertUnicodeToBig5Hkscs(void const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)294 sal_Size ImplConvertUnicodeToBig5Hkscs(void const * pData,
295                                        void * pContext,
296                                        sal_Unicode const * pSrcBuf,
297                                        sal_Size nSrcChars,
298                                        char * pDestBuf,
299                                        sal_Size nDestBytes,
300                                        sal_uInt32 nFlags,
301                                        sal_uInt32 * pInfo,
302                                        sal_Size * pSrcCvtChars)
303 {
304     sal_uInt16 const * pBig5Hkscs2001Data
305         = static_cast< ImplBig5HkscsConverterData const * >(pData)->
306               m_pUnicodeToBig5Hkscs2001Data;
307     sal_Int32 const * pBig5Hkscs2001PageOffsets
308         = static_cast< ImplBig5HkscsConverterData const * >(pData)->
309               m_pUnicodeToBig5Hkscs2001PageOffsets;
310     sal_Int32 const * pBig5Hkscs2001PlaneOffsets
311         = static_cast< ImplBig5HkscsConverterData const * >(pData)->
312               m_pUnicodeToBig5Hkscs2001PlaneOffsets;
313     ImplUniToDBCSHighTab const * pBig5Data
314         = static_cast< ImplBig5HkscsConverterData const * >(pData)->
315               m_pUnicodeToBig5Data;
316     sal_Unicode nHighSurrogate = 0;
317     sal_uInt32 nInfo = 0;
318     sal_Size nConverted = 0;
319     char * pDestBufPtr = pDestBuf;
320     char * pDestBufEnd = pDestBuf + nDestBytes;
321 
322     if (pContext)
323         nHighSurrogate
324             = static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate;
325 
326     for (; nConverted < nSrcChars; ++nConverted)
327     {
328         bool bUndefined = true;
329         sal_uInt32 nChar = *pSrcBuf++;
330         if (nHighSurrogate == 0)
331         {
332             if (ImplIsHighSurrogate(nChar))
333             {
334                 nHighSurrogate = static_cast<sal_Unicode>(nChar);
335                 continue;
336             }
337             else if (ImplIsLowSurrogate(nChar))
338             {
339                 bUndefined = false;
340                 goto bad_input;
341             }
342         }
343         else if (ImplIsLowSurrogate(nChar))
344             nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
345         else
346         {
347             bUndefined = false;
348             goto bad_input;
349         }
350 
351         assert(rtl::isUnicodeScalarValue(nChar));
352 
353         if (nChar < 0x80)
354             if (pDestBufPtr != pDestBufEnd)
355                 *pDestBufPtr++ = static_cast< char >(nChar);
356             else
357                 goto no_output;
358         else
359         {
360             sal_uInt32 nBytes = 0;
361             sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
362             if (nOffset != -1)
363             {
364                 nOffset
365                     = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
366                                                                >> 8)];
367                 if (nOffset != -1)
368                 {
369                     sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
370                     sal_uInt32 nFirst = nFirstLast & 0xFF;
371                     sal_uInt32 nLast = nFirstLast >> 8;
372                     sal_uInt32 nIndex = nChar & 0xFF;
373                     if (nIndex >= nFirst && nIndex <= nLast)
374                     {
375                         nBytes
376                             = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
377                     }
378                 }
379             }
380             if (nBytes == 0)
381             {
382                 sal_uInt32 nIndex1 = nChar >> 8;
383                 if (nIndex1 < 0x100)
384                 {
385                     sal_uInt32 nIndex2 = nChar & 0xFF;
386                     sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
387                     if (nIndex2 >= nFirst
388                         && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
389                         nBytes = pBig5Data[nIndex1].
390                                      mpToUniTrailTab[nIndex2 - nFirst];
391                 }
392             }
393             if (nBytes == 0)
394             {
395                 ImplDBCSEUDCData const * p
396                     = static_cast< ImplBig5HkscsConverterData const * >(pData)->
397                           m_pEudcData;
398                 sal_uInt32 nCount
399                     = static_cast< ImplBig5HkscsConverterData const * >(pData)->
400                           m_nEudcCount;
401                 sal_uInt32 i;
402                 for (i = 0; i < nCount; ++i) {
403                     if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
404                     {
405                         sal_uInt32 nIndex = nChar - p->mnUniStart;
406                         sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
407                         sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
408                         sal_uInt32 nSize;
409                         nBytes = (p->mnLeadStart + nLeadOff) << 8;
410                         nSize = p->mnTrail1End - p->mnTrail1Start + 1;
411                         if (nTrailOff < nSize)
412                         {
413                             nBytes |= p->mnTrail1Start + nTrailOff;
414                             break;
415                         }
416                         nTrailOff -= nSize;
417                         nSize = p->mnTrail2End - p->mnTrail2Start + 1;
418                         if (nTrailOff < nSize)
419                         {
420                             nBytes |= p->mnTrail2Start + nTrailOff;
421                             break;
422                         }
423                         nTrailOff -= nSize;
424                         nBytes |= p->mnTrail3Start + nTrailOff;
425                         break;
426                     }
427                     ++p;
428                 }
429             }
430             if (nBytes == 0)
431                 goto bad_input;
432             if (pDestBufEnd - pDestBufPtr >= 2)
433             {
434                 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
435                 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
436             }
437             else
438                 goto no_output;
439         }
440         nHighSurrogate = 0;
441         continue;
442 
443     bad_input:
444         switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
445                     bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
446                     &nInfo, nullptr, 0, nullptr))
447         {
448         case sal::detail::textenc::BAD_INPUT_STOP:
449             nHighSurrogate = 0;
450             break;
451 
452         case sal::detail::textenc::BAD_INPUT_CONTINUE:
453             nHighSurrogate = 0;
454             continue;
455 
456         case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
457             goto no_output;
458         }
459         break;
460 
461     no_output:
462         --pSrcBuf;
463         nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
464         break;
465     }
466 
467     if (nHighSurrogate != 0
468         && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
469                          | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
470                == 0)
471     {
472         if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
473             nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
474         else
475             switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
476                         false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
477                         nullptr, 0, nullptr))
478             {
479             case sal::detail::textenc::BAD_INPUT_STOP:
480             case sal::detail::textenc::BAD_INPUT_CONTINUE:
481                 nHighSurrogate = 0;
482                 break;
483 
484             case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
485                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
486                 break;
487             }
488     }
489 
490     if (pContext)
491         static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate
492             = nHighSurrogate;
493     if (pInfo)
494         *pInfo = nInfo;
495     if (pSrcCvtChars)
496         *pSrcCvtChars = nConverted;
497 
498     return pDestBufPtr - pDestBuf;
499 }
500 
501 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
502