1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <sal/config.h>
21
22 #include <cassert>
23
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
27
28 #include "converter.hxx"
29 #include "convertiso2022cn.hxx"
30 #include "tenchelp.hxx"
31 #include "unichars.hxx"
32
33 namespace {
34
35 enum ImplIso2022CnToUnicodeState // order is important:
36 {
37 IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII,
38 IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO,
39 IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2,
40 IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432,
41 IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2,
42 IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC,
43 IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR,
44 IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN,
45 IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK
46 };
47
48 struct ImplIso2022CnToUnicodeContext
49 {
50 ImplIso2022CnToUnicodeState m_eState;
51 sal_uInt32 m_nRow;
52 bool m_bSo;
53 bool m_b116431;
54 };
55
56 enum ImplUnicodeToIso2022CnDesignator
57 {
58 IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE,
59 IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312,
60 IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431
61 };
62
63 struct ImplUnicodeToIso2022CnContext
64 {
65 sal_Unicode m_nHighSurrogate;
66 ImplUnicodeToIso2022CnDesignator m_eSoDesignator;
67 bool m_b116432Designator;
68 bool m_bSo;
69 };
70
71 }
72
ImplCreateIso2022CnToUnicodeContext()73 void * ImplCreateIso2022CnToUnicodeContext()
74 {
75 ImplIso2022CnToUnicodeContext * pContext =
76 new ImplIso2022CnToUnicodeContext;
77 pContext->m_eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
78 pContext->m_bSo = false;
79 pContext->m_b116431 = false;
80 return pContext;
81 }
82
ImplResetIso2022CnToUnicodeContext(void * pContext)83 void ImplResetIso2022CnToUnicodeContext(void * pContext)
84 {
85 if (pContext)
86 {
87 static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_eState
88 = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
89 static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_bSo = false;
90 static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_b116431 = false;
91 }
92 }
93
ImplDestroyIso2022CnToUnicodeContext(void * pContext)94 void ImplDestroyIso2022CnToUnicodeContext(void * pContext)
95 {
96 delete static_cast< ImplIso2022CnToUnicodeContext * >(pContext);
97 }
98
ImplConvertIso2022CnToUnicode(void const * pData,void * pContext,char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)99 sal_Size ImplConvertIso2022CnToUnicode(void const * pData,
100 void * pContext,
101 char const * pSrcBuf,
102 sal_Size nSrcBytes,
103 sal_Unicode * pDestBuf,
104 sal_Size nDestChars,
105 sal_uInt32 nFlags,
106 sal_uInt32 * pInfo,
107 sal_Size * pSrcCvtBytes)
108 {
109 ImplDBCSToUniLeadTab const * pGb2312Data
110 = static_cast< ImplIso2022CnConverterData const * >(pData)->
111 m_pGb2312ToUnicodeData;
112 sal_uInt16 const * pCns116431992Data
113 = static_cast< ImplIso2022CnConverterData const * >(pData)->
114 m_pCns116431992ToUnicodeData;
115 sal_Int32 const * pCns116431992RowOffsets
116 = static_cast< ImplIso2022CnConverterData const * >(pData)->
117 m_pCns116431992ToUnicodeRowOffsets;
118 sal_Int32 const * pCns116431992PlaneOffsets
119 = static_cast< ImplIso2022CnConverterData const * >(pData)->
120 m_pCns116431992ToUnicodePlaneOffsets;
121 ImplIso2022CnToUnicodeState eState
122 = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
123 sal_uInt32 nRow = 0;
124 bool bSo = false;
125 bool b116431 = false;
126 sal_uInt32 nInfo = 0;
127 sal_Size nConverted = 0;
128 sal_Unicode * pDestBufPtr = pDestBuf;
129 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
130 sal_Size startOfCurrentChar = 0;
131
132 if (pContext)
133 {
134 eState = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_eState;
135 nRow = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_nRow;
136 bSo = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_bSo;
137 b116431 = static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_b116431;
138 }
139
140 for (; nConverted < nSrcBytes; ++nConverted)
141 {
142 bool bUndefined = true;
143 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
144 sal_uInt32 nPlane;
145 switch (eState)
146 {
147 case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII:
148 if (nChar == 0x0E) // SO
149 {
150 bSo = true;
151 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO;
152 }
153 else if (nChar == 0x1B) // ESC
154 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC;
155 else if (nChar < 0x80)
156 if (pDestBufPtr != pDestBufEnd) {
157 *pDestBufPtr++ = static_cast<sal_Unicode>(nChar);
158 startOfCurrentChar = nConverted + 1;
159 } else
160 goto no_output;
161 else
162 {
163 bUndefined = false;
164 goto bad_input;
165 }
166 break;
167
168 case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO:
169 if (nChar == 0x0F) // SI
170 {
171 bSo = false;
172 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
173 }
174 else if (nChar == 0x1B) // ESC
175 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC;
176 else if (nChar >= 0x21 && nChar <= 0x7E)
177 {
178 nRow = nChar;
179 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2;
180 }
181 else
182 {
183 bUndefined = false;
184 goto bad_input;
185 }
186 break;
187
188 case IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO_2:
189 if (nChar >= 0x21 && nChar <= 0x7E)
190 if (b116431)
191 {
192 nPlane = 0;
193 goto transform;
194 }
195 else
196 {
197 sal_uInt16 nUnicode = 0;
198 sal_uInt32 nFirst;
199 nRow += 0x80;
200 nChar += 0x80;
201 nFirst = pGb2312Data[nRow].mnTrailStart;
202 if (nChar >= nFirst
203 && nChar <= pGb2312Data[nRow].mnTrailEnd)
204 nUnicode = pGb2312Data[nRow].
205 mpToUniTrailTab[nChar - nFirst];
206 if (nUnicode != 0)
207 if (pDestBufPtr != pDestBufEnd)
208 {
209 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
210 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO;
211 startOfCurrentChar = nConverted + 1;
212 }
213 else
214 goto no_output;
215 else
216 goto bad_input;
217 }
218 else
219 {
220 bUndefined = false;
221 goto bad_input;
222 }
223 break;
224
225 case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432:
226 if (nChar >= 0x21 && nChar <= 0x7E)
227 {
228 nRow = nChar;
229 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2;
230 }
231 else
232 {
233 bUndefined = false;
234 goto bad_input;
235 }
236 break;
237
238 case IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432_2:
239 if (nChar >= 0x21 && nChar <= 0x7E)
240 {
241 nPlane = 1;
242 goto transform;
243 }
244 else
245 {
246 bUndefined = false;
247 goto bad_input;
248 }
249 break;
250
251 case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC:
252 if (nChar == 0x24) // $
253 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR;
254 else if (nChar == 0x4E) // N
255 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_116432;
256 else
257 {
258 bUndefined = false;
259 goto bad_input;
260 }
261 break;
262
263 case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR:
264 if (nChar == 0x29) // )
265 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN;
266 else if (nChar == 0x2A) // *
267 eState
268 = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK;
269 else
270 {
271 bUndefined = false;
272 goto bad_input;
273 }
274 break;
275
276 case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN:
277 if (nChar == 0x41) // A
278 {
279 b116431 = false;
280 eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
281 IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
282 }
283 else if (nChar == 0x47) // G
284 {
285 b116431 = true;
286 eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
287 IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
288 }
289 else
290 {
291 bUndefined = false;
292 goto bad_input;
293 }
294 break;
295
296 case IMPL_ISO_2022_CN_TO_UNICODE_STATE_ESC_DOLLAR_ASTERISK:
297 if (nChar == 0x48) // H
298 eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
299 IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
300 else
301 {
302 bUndefined = false;
303 goto bad_input;
304 }
305 break;
306 }
307 continue;
308
309 transform:
310 {
311 sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane];
312 if (nPlaneOffset == -1)
313 goto bad_input;
314 else
315 {
316 sal_Int32 nOffset
317 = pCns116431992RowOffsets[nPlaneOffset + (nRow - 0x21)];
318 if (nOffset == -1)
319 goto bad_input;
320 else
321 {
322 sal_uInt32 nFirstLast = pCns116431992Data[nOffset++];
323 sal_uInt32 nFirst = nFirstLast & 0xFF;
324 sal_uInt32 nLast = nFirstLast >> 8;
325 nChar -= 0x20;
326 if (nChar >= nFirst && nChar <= nLast)
327 {
328 sal_uInt32 nUnicode
329 = pCns116431992Data[nOffset + (nChar - nFirst)];
330 if (nUnicode == 0xFFFF)
331 goto bad_input;
332 else if (ImplIsHighSurrogate(nUnicode))
333 if (pDestBufEnd - pDestBufPtr >= 2)
334 {
335 nOffset += nLast - nFirst + 1;
336 nFirst = pCns116431992Data[nOffset++];
337 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
338 *pDestBufPtr++
339 = static_cast<sal_Unicode>(pCns116431992Data[
340 nOffset + (nChar - nFirst)]);
341 startOfCurrentChar = nConverted + 1;
342 }
343 else
344 goto no_output;
345 else
346 if (pDestBufPtr != pDestBufEnd) {
347 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
348 startOfCurrentChar = nConverted + 1;
349 } else
350 goto no_output;
351 }
352 else
353 goto bad_input;
354 eState = bSo ? IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO :
355 IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
356 }
357 }
358 continue;
359 }
360
361 bad_input:
362 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
363 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
364 &nInfo))
365 {
366 case sal::detail::textenc::BAD_INPUT_STOP:
367 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
368 b116431 = false;
369 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
370 ++nConverted;
371 } else {
372 nConverted = startOfCurrentChar;
373 }
374 break;
375
376 case sal::detail::textenc::BAD_INPUT_CONTINUE:
377 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
378 b116431 = false;
379 startOfCurrentChar = nConverted + 1;
380 continue;
381
382 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
383 goto no_output;
384 }
385 break;
386
387 no_output:
388 --pSrcBuf;
389 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
390 break;
391 }
392
393 if (eState > IMPL_ISO_2022_CN_TO_UNICODE_STATE_SO
394 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
395 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL))
396 == 0)
397 {
398 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
399 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
400 else
401 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
402 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
403 &nInfo))
404 {
405 case sal::detail::textenc::BAD_INPUT_STOP:
406 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) != 0) {
407 nConverted = startOfCurrentChar;
408 }
409 [[fallthrough]];
410 case sal::detail::textenc::BAD_INPUT_CONTINUE:
411 eState = IMPL_ISO_2022_CN_TO_UNICODE_STATE_ASCII;
412 b116431 = false;
413 break;
414
415 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
416 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
417 break;
418 }
419 }
420
421 if (pContext)
422 {
423 static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_eState = eState;
424 static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_nRow = nRow;
425 static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_bSo = bSo;
426 static_cast< ImplIso2022CnToUnicodeContext * >(pContext)->m_b116431 = b116431;
427 }
428 if (pInfo)
429 *pInfo = nInfo;
430 if (pSrcCvtBytes)
431 *pSrcCvtBytes = nConverted;
432
433 return pDestBufPtr - pDestBuf;
434 }
435
ImplCreateUnicodeToIso2022CnContext()436 void * ImplCreateUnicodeToIso2022CnContext()
437 {
438 ImplUnicodeToIso2022CnContext * pContext =
439 new ImplUnicodeToIso2022CnContext;
440 pContext->m_nHighSurrogate = 0;
441 pContext->m_eSoDesignator = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
442 pContext->m_b116432Designator = false;
443 pContext->m_bSo = false;
444 return pContext;
445 }
446
ImplResetUnicodeToIso2022CnContext(void * pContext)447 void ImplResetUnicodeToIso2022CnContext(void * pContext)
448 {
449 if (pContext)
450 {
451 static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_nHighSurrogate = 0;
452 static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_eSoDesignator
453 = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
454 static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_b116432Designator
455 = false;
456 static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_bSo = false;
457 }
458 }
459
ImplDestroyUnicodeToIso2022CnContext(void * pContext)460 void ImplDestroyUnicodeToIso2022CnContext(void * pContext)
461 {
462 delete static_cast< ImplUnicodeToIso2022CnContext * >(pContext);
463 }
464
ImplIso2022CnTranslateTo2312(ImplUniToDBCSHighTab const * pGb2312Data,sal_uInt32 nChar)465 static sal_uInt32 ImplIso2022CnTranslateTo2312(ImplUniToDBCSHighTab const *
466 pGb2312Data,
467 sal_uInt32 nChar)
468 {
469 sal_uInt32 nIndex1 = nChar >> 8;
470 if (nIndex1 < 0x100)
471 {
472 sal_uInt32 nIndex2 = nChar & 0xFF;
473 sal_uInt32 nFirst = pGb2312Data[nIndex1].mnLowStart;
474 if (nIndex2 >= nFirst && nIndex2 <= pGb2312Data[nIndex1].mnLowEnd)
475 return pGb2312Data[nIndex1].mpToUniTrailTab[nIndex2 - nFirst]
476 & 0x7F7F;
477 }
478 return 0;
479 }
480
481 static sal_uInt32
ImplIso2022CnTranslateTo116431(sal_uInt8 const * pCns116431992Data,sal_Int32 const * pCns116431992PageOffsets,sal_Int32 const * pCns116431992PlaneOffsets,sal_uInt32 nChar)482 ImplIso2022CnTranslateTo116431(sal_uInt8 const * pCns116431992Data,
483 sal_Int32 const * pCns116431992PageOffsets,
484 sal_Int32 const * pCns116431992PlaneOffsets,
485 sal_uInt32 nChar)
486 {
487 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
488 sal_uInt32 nFirst;
489 sal_uInt32 nLast;
490 sal_uInt32 nPlane;
491 if (nOffset == -1)
492 return 0;
493 nOffset = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)];
494 if (nOffset == -1)
495 return 0;
496 nFirst = pCns116431992Data[nOffset++];
497 nLast = pCns116431992Data[nOffset++];
498 nChar &= 0xFF;
499 if (nChar < nFirst || nChar > nLast)
500 return 0;
501 nOffset += 3 * (nChar - nFirst);
502 nPlane = pCns116431992Data[nOffset++];
503 if (nPlane != 1)
504 return 0;
505 return (0x20 + pCns116431992Data[nOffset]) << 8
506 | (0x20 + pCns116431992Data[nOffset + 1]);
507 }
508
ImplConvertUnicodeToIso2022Cn(void const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)509 sal_Size ImplConvertUnicodeToIso2022Cn(void const * pData,
510 void * pContext,
511 sal_Unicode const * pSrcBuf,
512 sal_Size nSrcChars,
513 char * pDestBuf,
514 sal_Size nDestBytes,
515 sal_uInt32 nFlags,
516 sal_uInt32 * pInfo,
517 sal_Size * pSrcCvtChars)
518 {
519 ImplUniToDBCSHighTab const * pGb2312Data
520 = static_cast< ImplIso2022CnConverterData const * >(pData)->
521 m_pUnicodeToGb2312Data;
522 sal_uInt8 const * pCns116431992Data
523 = static_cast< ImplIso2022CnConverterData const * >(pData)->
524 m_pUnicodeToCns116431992Data;
525 sal_Int32 const * pCns116431992PageOffsets
526 = static_cast< ImplIso2022CnConverterData const * >(pData)->
527 m_pUnicodeToCns116431992PageOffsets;
528 sal_Int32 const * pCns116431992PlaneOffsets
529 = static_cast< ImplIso2022CnConverterData const * >(pData)->
530 m_pUnicodeToCns116431992PlaneOffsets;
531 sal_Unicode nHighSurrogate = 0;
532 ImplUnicodeToIso2022CnDesignator eSoDesignator
533 = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
534 bool b116432Designator = false;
535 bool bSo = false;
536 sal_uInt32 nInfo = 0;
537 sal_Size nConverted = 0;
538 char * pDestBufPtr = pDestBuf;
539 char * pDestBufEnd = pDestBuf + nDestBytes;
540 bool bWritten;
541
542 if (pContext)
543 {
544 nHighSurrogate
545 = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_nHighSurrogate;
546 eSoDesignator
547 = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_eSoDesignator;
548 b116432Designator = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->
549 m_b116432Designator;
550 bSo = static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_bSo;
551 }
552
553 for (; nConverted < nSrcChars; ++nConverted)
554 {
555 bool bUndefined = true;
556 sal_uInt32 nChar = *pSrcBuf++;
557 if (nHighSurrogate == 0)
558 {
559 if (ImplIsHighSurrogate(nChar))
560 {
561 nHighSurrogate = static_cast<sal_Unicode>(nChar);
562 continue;
563 }
564 else if (ImplIsLowSurrogate(nChar))
565 {
566 bUndefined = false;
567 goto bad_input;
568 }
569 }
570 else if (ImplIsLowSurrogate(nChar))
571 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
572 else
573 {
574 bUndefined = false;
575 goto bad_input;
576 }
577
578 assert(rtl::isUnicodeScalarValue(nChar));
579
580 if (nChar == 0x0A || nChar == 0x0D) // LF, CR
581 {
582 if (bSo)
583 {
584 if (pDestBufPtr != pDestBufEnd)
585 {
586 *pDestBufPtr++ = 0x0F; // SI
587 bSo = false;
588 eSoDesignator
589 = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
590 b116432Designator = false;
591 }
592 else
593 goto no_output;
594 }
595 if (pDestBufPtr != pDestBufEnd)
596 *pDestBufPtr++ = static_cast< char >(nChar);
597 else
598 goto no_output;
599 }
600 else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B)
601 goto bad_input;
602 else if (nChar < 0x80)
603 {
604 if (bSo)
605 {
606 if (pDestBufPtr != pDestBufEnd)
607 {
608 *pDestBufPtr++ = 0x0F; // SI
609 bSo = false;
610 }
611 else
612 goto no_output;
613 }
614 if (pDestBufPtr != pDestBufEnd)
615 *pDestBufPtr++ = static_cast< char >(nChar);
616 else
617 goto no_output;
618 }
619 else
620 {
621 sal_uInt32 nBytes = 0;
622 ImplUnicodeToIso2022CnDesignator eNewDesignator =
623 IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
624 switch (eSoDesignator)
625 {
626 case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE:
627 nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar);
628 if (nBytes != 0)
629 {
630 eNewDesignator
631 = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312;
632 break;
633 }
634 nBytes = ImplIso2022CnTranslateTo116431(
635 pCns116431992Data,
636 pCns116431992PageOffsets,
637 pCns116431992PlaneOffsets,
638 nChar);
639 if (nBytes != 0)
640 {
641 eNewDesignator
642 = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431;
643 break;
644 }
645 break;
646
647 case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312:
648 nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar);
649 if (nBytes != 0)
650 {
651 eNewDesignator
652 = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
653 break;
654 }
655 nBytes = ImplIso2022CnTranslateTo116431(
656 pCns116431992Data,
657 pCns116431992PageOffsets,
658 pCns116431992PlaneOffsets,
659 nChar);
660 if (nBytes != 0)
661 {
662 eNewDesignator
663 = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431;
664 break;
665 }
666 break;
667
668 case IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_116431:
669 nBytes = ImplIso2022CnTranslateTo116431(
670 pCns116431992Data,
671 pCns116431992PageOffsets,
672 pCns116431992PlaneOffsets,
673 nChar);
674 if (nBytes != 0)
675 {
676 eNewDesignator
677 = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE;
678 break;
679 }
680 nBytes = ImplIso2022CnTranslateTo2312(pGb2312Data, nChar);
681 if (nBytes != 0)
682 {
683 eNewDesignator
684 = IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312;
685 break;
686 }
687 break;
688 }
689 if (nBytes != 0)
690 {
691 if (eNewDesignator
692 != IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_NONE)
693 {
694 if (bSo)
695 {
696 if (pDestBufPtr != pDestBufEnd)
697 {
698 *pDestBufPtr++ = 0x0F; // SI
699 bSo = false;
700 }
701 else
702 goto no_output;
703 }
704 if (pDestBufEnd - pDestBufPtr >= 4)
705 {
706 *pDestBufPtr++ = 0x1B; // ESC
707 *pDestBufPtr++ = 0x24; // $
708 *pDestBufPtr++ = 0x29; // )
709 *pDestBufPtr++
710 = eNewDesignator
711 == IMPL_UNICODE_TO_ISO_2022_CN_DESIGNATOR_2312 ?
712 0x41 : 0x47; // A, G
713 eSoDesignator = eNewDesignator;
714 }
715 else
716 goto no_output;
717 }
718 if (!bSo)
719 {
720 if (pDestBufPtr != pDestBufEnd)
721 {
722 *pDestBufPtr++ = 0x0E; // SO
723 bSo = true;
724 }
725 else
726 goto no_output;
727 }
728 if (pDestBufEnd - pDestBufPtr >= 4)
729 {
730 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
731 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
732 }
733 else
734 goto no_output;
735 }
736 else
737 {
738 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16];
739 sal_uInt32 nFirst;
740 sal_uInt32 nLast;
741 sal_uInt32 nPlane;
742 if (nOffset == -1)
743 goto bad_input;
744 nOffset
745 = pCns116431992PageOffsets[nOffset
746 + ((nChar & 0xFF00) >> 8)];
747 if (nOffset == -1)
748 goto bad_input;
749 nFirst = pCns116431992Data[nOffset++];
750 nLast = pCns116431992Data[nOffset++];
751 nChar &= 0xFF;
752 if (nChar < nFirst || nChar > nLast)
753 goto bad_input;
754 nOffset += 3 * (nChar - nFirst);
755 nPlane = pCns116431992Data[nOffset++];
756 if (nPlane != 2)
757 goto bad_input;
758 if (!b116432Designator)
759 {
760 if (pDestBufEnd - pDestBufPtr >= 4)
761 {
762 *pDestBufPtr++ = 0x1B; // ESC
763 *pDestBufPtr++ = 0x24; // $
764 *pDestBufPtr++ = 0x2A; // *
765 *pDestBufPtr++ = 0x48; // H
766 b116432Designator = true;
767 }
768 else
769 goto no_output;
770 }
771 if (pDestBufEnd - pDestBufPtr >= 4)
772 {
773 *pDestBufPtr++ = 0x1B; // ESC
774 *pDestBufPtr++ = 0x4E; // N
775 *pDestBufPtr++
776 = static_cast< char >(0x20 + pCns116431992Data[nOffset++]);
777 *pDestBufPtr++
778 = static_cast< char >(0x20 + pCns116431992Data[nOffset]);
779 }
780 else
781 goto no_output;
782 }
783 }
784 nHighSurrogate = 0;
785 continue;
786
787 bad_input:
788 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
789 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
790 &nInfo, "\x0F" /* SI */, bSo ? 1 : 0, &bWritten))
791 {
792 case sal::detail::textenc::BAD_INPUT_STOP:
793 nHighSurrogate = 0;
794 break;
795
796 case sal::detail::textenc::BAD_INPUT_CONTINUE:
797 if (bWritten)
798 bSo = false;
799 nHighSurrogate = 0;
800 continue;
801
802 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
803 goto no_output;
804 }
805 break;
806
807 no_output:
808 --pSrcBuf;
809 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
810 break;
811 }
812
813 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
814 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
815 == 0)
816 {
817 bool bFlush = true;
818 if (nHighSurrogate != 0)
819 {
820 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
821 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
822 else
823 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
824 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
825 "\x0F" /* SI */, bSo ? 1 : 0, &bWritten))
826 {
827 case sal::detail::textenc::BAD_INPUT_STOP:
828 nHighSurrogate = 0;
829 bFlush = false;
830 break;
831
832 case sal::detail::textenc::BAD_INPUT_CONTINUE:
833 if (bWritten)
834 bSo = false;
835 nHighSurrogate = 0;
836 break;
837
838 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
839 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
840 break;
841 }
842 }
843 if (bFlush && bSo && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
844 {
845 if (pDestBufPtr != pDestBufEnd)
846 {
847 *pDestBufPtr++ = 0x0F; // SI
848 bSo = false;
849 }
850 else
851 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
852 }
853 }
854
855 if (pContext)
856 {
857 static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_nHighSurrogate
858 = nHighSurrogate;
859 static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_eSoDesignator
860 = eSoDesignator;
861 static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_b116432Designator
862 = b116432Designator;
863 static_cast< ImplUnicodeToIso2022CnContext * >(pContext)->m_bSo = bSo;
864 }
865 if (pInfo)
866 *pInfo = nInfo;
867 if (pSrcCvtChars)
868 *pSrcCvtChars = nConverted;
869
870 return pDestBufPtr - pDestBuf;
871 }
872
873 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
874