1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <sal/config.h>
21
22 #include <cassert>
23
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
27
28 #include "context.hxx"
29 #include "convertbig5hkscs.hxx"
30 #include "converter.hxx"
31 #include "tenchelp.hxx"
32 #include "unichars.hxx"
33
34 namespace {
35
36 struct ImplBig5HkscsToUnicodeContext
37 {
38 sal_Int32 m_nRow; // 0--255; 0 means none
39 };
40
41 }
42
ImplCreateBig5HkscsToUnicodeContext()43 void * ImplCreateBig5HkscsToUnicodeContext()
44 {
45 ImplBig5HkscsToUnicodeContext * pContext =
46 new ImplBig5HkscsToUnicodeContext;
47 pContext->m_nRow = 0;
48 return pContext;
49 }
50
ImplResetBig5HkscsToUnicodeContext(void * pContext)51 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
52 {
53 if (pContext)
54 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = 0;
55 }
56
ImplDestroyBig5HkscsToUnicodeContext(void * pContext)57 void ImplDestroyBig5HkscsToUnicodeContext(void * pContext)
58 {
59 delete static_cast< ImplBig5HkscsToUnicodeContext * >(pContext);
60 }
61
ImplConvertBig5HkscsToUnicode(void const * pData,void * pContext,char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)62 sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
63 void * pContext,
64 char const * pSrcBuf,
65 sal_Size nSrcBytes,
66 sal_Unicode * pDestBuf,
67 sal_Size nDestChars,
68 sal_uInt32 nFlags,
69 sal_uInt32 * pInfo,
70 sal_Size * pSrcCvtBytes)
71 {
72 sal_uInt16 const * pBig5Hkscs2001Data
73 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
74 m_pBig5Hkscs2001ToUnicodeData;
75 sal_Int32 const * pBig5Hkscs2001RowOffsets
76 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
77 m_pBig5Hkscs2001ToUnicodeRowOffsets;
78 ImplDBCSToUniLeadTab const * pBig5Data
79 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
80 m_pBig5ToUnicodeData;
81 sal_Int32 nRow = 0;
82 sal_uInt32 nInfo = 0;
83 sal_Size nConverted = 0;
84 sal_Unicode * pDestBufPtr = pDestBuf;
85 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
86 sal_Size startOfCurrentChar = 0;
87
88 if (pContext)
89 nRow = static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow;
90
91 for (; nConverted < nSrcBytes; ++nConverted)
92 {
93 bool bUndefined = true;
94 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
95 if (nRow == 0)
96 if (nChar < 0x80)
97 if (pDestBufPtr != pDestBufEnd) {
98 *pDestBufPtr++ = static_cast<sal_Unicode>(nChar);
99 startOfCurrentChar = nConverted + 1;
100 } else
101 goto no_output;
102 else if (nChar >= 0x81 && nChar <= 0xFE)
103 nRow = nChar;
104 else
105 {
106 bUndefined = false;
107 goto bad_input;
108 }
109 else
110 if ((nChar >= 0x40 && nChar <= 0x7E)
111 || (nChar >= 0xA1 && nChar <= 0xFE))
112 {
113 sal_uInt32 nUnicode = 0xFFFF;
114 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
115 sal_uInt32 nFirst=0;
116 sal_uInt32 nLast=0;
117 if (nOffset != -1)
118 {
119 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
120 nFirst = nFirstLast & 0xFF;
121 nLast = nFirstLast >> 8;
122 if (nChar >= nFirst && nChar <= nLast)
123 nUnicode
124 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
125 }
126 if (nUnicode == 0xFFFF)
127 {
128 sal_uInt32 n = pBig5Data[nRow].mnTrailStart;
129 if (nChar >= n && nChar <= pBig5Data[nRow].mnTrailEnd)
130 {
131 nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n];
132 if (nUnicode == 0)
133 nUnicode = 0xFFFF;
134 assert(!ImplIsHighSurrogate(nUnicode));
135 }
136 }
137 if (nUnicode == 0xFFFF)
138 {
139 ImplDBCSEUDCData const * p
140 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
141 m_pEudcData;
142 sal_uInt32 nCount
143 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
144 m_nEudcCount;
145 sal_uInt32 i;
146 for (i = 0; i < nCount; ++i)
147 {
148 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
149 {
150 if (nChar < p->mnTrail1Start)
151 break;
152 if (nChar <= p->mnTrail1End)
153 {
154 nUnicode
155 = p->mnUniStart
156 + (nRow - p->mnLeadStart)
157 * p->mnTrailRangeCount
158 + (nChar - p->mnTrail1Start);
159 break;
160 }
161 if (p->mnTrailCount < 2
162 || nChar < p->mnTrail2Start)
163 break;
164 if (nChar <= p->mnTrail2End)
165 {
166 nUnicode
167 = p->mnUniStart
168 + (nRow - p->mnLeadStart)
169 * p->mnTrailRangeCount
170 + (nChar - p->mnTrail2Start)
171 + (p->mnTrail1End - p->mnTrail1Start
172 + 1);
173 break;
174 }
175 if (p->mnTrailCount < 3
176 || nChar < p->mnTrail3Start)
177 break;
178 if (nChar <= p->mnTrail3End)
179 {
180 nUnicode
181 = p->mnUniStart
182 + (nRow - p->mnLeadStart)
183 * p->mnTrailRangeCount
184 + (nChar - p->mnTrail3Start)
185 + (p->mnTrail1End - p->mnTrail1Start
186 + 1)
187 + (p->mnTrail2End - p->mnTrail2Start
188 + 1);
189 break;
190 }
191 break;
192 }
193 ++p;
194 }
195 assert(!ImplIsHighSurrogate(nUnicode));
196 }
197 if (nUnicode == 0xFFFF)
198 goto bad_input;
199 if (ImplIsHighSurrogate(nUnicode))
200 if (pDestBufEnd - pDestBufPtr >= 2)
201 {
202 nOffset += nLast - nFirst + 1;
203 nFirst = pBig5Hkscs2001Data[nOffset++];
204 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
205 *pDestBufPtr++
206 = static_cast<sal_Unicode>(pBig5Hkscs2001Data[
207 nOffset + (nChar - nFirst)]);
208 startOfCurrentChar = nConverted + 1;
209 }
210 else
211 goto no_output;
212 else
213 if (pDestBufPtr != pDestBufEnd) {
214 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
215 startOfCurrentChar = nConverted + 1;
216 } else
217 goto no_output;
218 nRow = 0;
219 }
220 else
221 {
222 bUndefined = false;
223 goto bad_input;
224 }
225 continue;
226
227 bad_input:
228 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
229 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
230 &nInfo))
231 {
232 case sal::detail::textenc::BAD_INPUT_STOP:
233 nRow = 0;
234 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
235 ++nConverted;
236 } else {
237 nConverted = startOfCurrentChar;
238 }
239 break;
240
241 case sal::detail::textenc::BAD_INPUT_CONTINUE:
242 nRow = 0;
243 startOfCurrentChar = nConverted + 1;
244 continue;
245
246 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
247 goto no_output;
248 }
249 break;
250
251 no_output:
252 --pSrcBuf;
253 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
254 break;
255 }
256
257 if (nRow != 0
258 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
259 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL))
260 == 0)
261 {
262 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
263 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
264 else
265 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
266 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
267 &nInfo))
268 {
269 case sal::detail::textenc::BAD_INPUT_STOP:
270 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) != 0) {
271 nConverted = startOfCurrentChar;
272 }
273 [[fallthrough]];
274 case sal::detail::textenc::BAD_INPUT_CONTINUE:
275 nRow = 0;
276 break;
277
278 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
279 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
280 break;
281 }
282 }
283
284 if (pContext)
285 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = nRow;
286 if (pInfo)
287 *pInfo = nInfo;
288 if (pSrcCvtBytes)
289 *pSrcCvtBytes = nConverted;
290
291 return pDestBufPtr - pDestBuf;
292 }
293
ImplConvertUnicodeToBig5Hkscs(void const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)294 sal_Size ImplConvertUnicodeToBig5Hkscs(void const * pData,
295 void * pContext,
296 sal_Unicode const * pSrcBuf,
297 sal_Size nSrcChars,
298 char * pDestBuf,
299 sal_Size nDestBytes,
300 sal_uInt32 nFlags,
301 sal_uInt32 * pInfo,
302 sal_Size * pSrcCvtChars)
303 {
304 sal_uInt16 const * pBig5Hkscs2001Data
305 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
306 m_pUnicodeToBig5Hkscs2001Data;
307 sal_Int32 const * pBig5Hkscs2001PageOffsets
308 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
309 m_pUnicodeToBig5Hkscs2001PageOffsets;
310 sal_Int32 const * pBig5Hkscs2001PlaneOffsets
311 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
312 m_pUnicodeToBig5Hkscs2001PlaneOffsets;
313 ImplUniToDBCSHighTab const * pBig5Data
314 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
315 m_pUnicodeToBig5Data;
316 sal_Unicode nHighSurrogate = 0;
317 sal_uInt32 nInfo = 0;
318 sal_Size nConverted = 0;
319 char * pDestBufPtr = pDestBuf;
320 char * pDestBufEnd = pDestBuf + nDestBytes;
321
322 if (pContext)
323 nHighSurrogate
324 = static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate;
325
326 for (; nConverted < nSrcChars; ++nConverted)
327 {
328 bool bUndefined = true;
329 sal_uInt32 nChar = *pSrcBuf++;
330 if (nHighSurrogate == 0)
331 {
332 if (ImplIsHighSurrogate(nChar))
333 {
334 nHighSurrogate = static_cast<sal_Unicode>(nChar);
335 continue;
336 }
337 else if (ImplIsLowSurrogate(nChar))
338 {
339 bUndefined = false;
340 goto bad_input;
341 }
342 }
343 else if (ImplIsLowSurrogate(nChar))
344 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
345 else
346 {
347 bUndefined = false;
348 goto bad_input;
349 }
350
351 assert(rtl::isUnicodeScalarValue(nChar));
352
353 if (nChar < 0x80)
354 if (pDestBufPtr != pDestBufEnd)
355 *pDestBufPtr++ = static_cast< char >(nChar);
356 else
357 goto no_output;
358 else
359 {
360 sal_uInt32 nBytes = 0;
361 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
362 if (nOffset != -1)
363 {
364 nOffset
365 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
366 >> 8)];
367 if (nOffset != -1)
368 {
369 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
370 sal_uInt32 nFirst = nFirstLast & 0xFF;
371 sal_uInt32 nLast = nFirstLast >> 8;
372 sal_uInt32 nIndex = nChar & 0xFF;
373 if (nIndex >= nFirst && nIndex <= nLast)
374 {
375 nBytes
376 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
377 }
378 }
379 }
380 if (nBytes == 0)
381 {
382 sal_uInt32 nIndex1 = nChar >> 8;
383 if (nIndex1 < 0x100)
384 {
385 sal_uInt32 nIndex2 = nChar & 0xFF;
386 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
387 if (nIndex2 >= nFirst
388 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
389 nBytes = pBig5Data[nIndex1].
390 mpToUniTrailTab[nIndex2 - nFirst];
391 }
392 }
393 if (nBytes == 0)
394 {
395 ImplDBCSEUDCData const * p
396 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
397 m_pEudcData;
398 sal_uInt32 nCount
399 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
400 m_nEudcCount;
401 sal_uInt32 i;
402 for (i = 0; i < nCount; ++i) {
403 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
404 {
405 sal_uInt32 nIndex = nChar - p->mnUniStart;
406 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
407 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
408 sal_uInt32 nSize;
409 nBytes = (p->mnLeadStart + nLeadOff) << 8;
410 nSize = p->mnTrail1End - p->mnTrail1Start + 1;
411 if (nTrailOff < nSize)
412 {
413 nBytes |= p->mnTrail1Start + nTrailOff;
414 break;
415 }
416 nTrailOff -= nSize;
417 nSize = p->mnTrail2End - p->mnTrail2Start + 1;
418 if (nTrailOff < nSize)
419 {
420 nBytes |= p->mnTrail2Start + nTrailOff;
421 break;
422 }
423 nTrailOff -= nSize;
424 nBytes |= p->mnTrail3Start + nTrailOff;
425 break;
426 }
427 ++p;
428 }
429 }
430 if (nBytes == 0)
431 goto bad_input;
432 if (pDestBufEnd - pDestBufPtr >= 2)
433 {
434 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
435 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
436 }
437 else
438 goto no_output;
439 }
440 nHighSurrogate = 0;
441 continue;
442
443 bad_input:
444 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
445 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
446 &nInfo, nullptr, 0, nullptr))
447 {
448 case sal::detail::textenc::BAD_INPUT_STOP:
449 nHighSurrogate = 0;
450 break;
451
452 case sal::detail::textenc::BAD_INPUT_CONTINUE:
453 nHighSurrogate = 0;
454 continue;
455
456 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
457 goto no_output;
458 }
459 break;
460
461 no_output:
462 --pSrcBuf;
463 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
464 break;
465 }
466
467 if (nHighSurrogate != 0
468 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
469 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
470 == 0)
471 {
472 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
473 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
474 else
475 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
476 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
477 nullptr, 0, nullptr))
478 {
479 case sal::detail::textenc::BAD_INPUT_STOP:
480 case sal::detail::textenc::BAD_INPUT_CONTINUE:
481 nHighSurrogate = 0;
482 break;
483
484 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
485 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
486 break;
487 }
488 }
489
490 if (pContext)
491 static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate
492 = nHighSurrogate;
493 if (pInfo)
494 *pInfo = nInfo;
495 if (pSrcCvtChars)
496 *pSrcCvtChars = nConverted;
497
498 return pDestBufPtr - pDestBuf;
499 }
500
501 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
502