1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <textconversion.hxx>
22 #include <com/sun/star/i18n/TextConversionType.hpp>
23 #include <com/sun/star/i18n/TextConversionOption.hpp>
24 #include <com/sun/star/lang/NoSupportException.hpp>
25 #include <com/sun/star/linguistic2/ConversionDirection.hpp>
26 #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
27 #include <com/sun/star/linguistic2/ConversionDictionaryList.hpp>
28 #include <memory>
29 
30 using namespace com::sun::star::lang;
31 using namespace com::sun::star::i18n;
32 using namespace com::sun::star::linguistic2;
33 using namespace com::sun::star::uno;
34 
35 
36 namespace i18npool {
37 
TextConversion_zh(const Reference<XComponentContext> & xContext)38 TextConversion_zh::TextConversion_zh( const Reference < XComponentContext >& xContext )
39     : TextConversionService("com.sun.star.i18n.TextConversion_zh")
40 {
41     xCDL = ConversionDictionaryList::create(xContext);
42 }
43 
getOneCharConversion(sal_Unicode ch,const sal_Unicode * Data,const sal_uInt16 * Index)44 static sal_Unicode getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
45 {
46     if (Data && Index) {
47         sal_Unicode address = Index[ch>>8];
48         if (address != 0xFFFF)
49             address = Data[address + (ch & 0xFF)];
50         return (address != 0xFFFF) ? address : ch;
51     } else {
52         return ch;
53     }
54 }
55 
56 #ifdef DISABLE_DYNLOADING
57 
58 extern "C" {
59 
60 const sal_Unicode* getSTC_CharData_T2S();
61 const sal_uInt16* getSTC_CharIndex_T2S();
62 const sal_Unicode* getSTC_CharData_S2V();
63 const sal_uInt16* getSTC_CharIndex_S2V();
64 const sal_Unicode* getSTC_CharData_S2T();
65 const sal_uInt16* getSTC_CharIndex_S2T();
66 
67 const sal_Unicode *getSTC_WordData(sal_Int32&);
68 
69 const sal_uInt16 *getSTC_WordIndex_T2S(sal_Int32&);
70 const sal_uInt16 *getSTC_WordEntry_T2S();
71 const sal_uInt16 *getSTC_WordIndex_S2T(sal_Int32&);
72 const sal_uInt16 *getSTC_WordEntry_S2T();
73 
74 }
75 
76 #endif
77 
78 OUString
getCharConversion(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,bool toSChinese,sal_Int32 nConversionOptions)79 TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions)
80 {
81     const sal_Unicode *Data;
82     const sal_uInt16 *Index;
83 
84 #ifndef DISABLE_DYNLOADING
85     if (toSChinese) {
86         Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
87         Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
88     } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
89         Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
90         Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
91     } else {
92         Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
93         Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
94     }
95 #else
96     if (toSChinese) {
97         Data = getSTC_CharData_T2S();
98         Index = getSTC_CharIndex_T2S();
99     } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
100         Data = getSTC_CharData_S2V();
101         Index = getSTC_CharIndex_S2V();
102     } else {
103         Data = getSTC_CharData_S2T();
104         Index = getSTC_CharIndex_S2T();
105     }
106 #endif
107 
108     rtl_uString * newStr = rtl_uString_alloc(nLength);
109     for (sal_Int32 i = 0; i < nLength; i++)
110         newStr->buffer[i] =
111             getOneCharConversion(aText[nStartPos+i], Data, Index);
112     return OUString(newStr, SAL_NO_ACQUIRE); //take ownership
113 }
114 
115 OUString
getWordConversion(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,bool toSChinese,sal_Int32 nConversionOptions,Sequence<sal_Int32> & offset)116 TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
117 {
118     sal_Int32 dictLen = 0;
119     sal_Int32 maxLen = 0;
120     const sal_uInt16 *index;
121     const sal_uInt16 *entry;
122     const sal_Unicode *charData;
123     const sal_uInt16 *charIndex;
124     bool one2one=true;
125 
126 #ifndef DISABLE_DYNLOADING
127     const sal_Unicode *wordData = reinterpret_cast<const sal_Unicode* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordData"))(dictLen);
128     if (toSChinese) {
129         index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen);
130         entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_T2S"))();
131         charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
132         charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
133     } else {
134         index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen);
135         entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_S2T"))();
136         if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
137             charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
138             charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
139         } else {
140             charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
141             charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
142         }
143     }
144 #else
145     const sal_Unicode *wordData = getSTC_WordData(dictLen);
146     if (toSChinese) {
147         index = getSTC_WordIndex_T2S(maxLen);
148         entry = getSTC_WordEntry_T2S();
149         charData = getSTC_CharData_T2S();
150         charIndex = getSTC_CharIndex_T2S();
151     } else {
152         index = getSTC_WordIndex_S2T(maxLen);
153         entry = getSTC_WordEntry_S2T();
154         if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
155             charData = getSTC_CharData_S2V();
156             charIndex = getSTC_CharIndex_S2V();
157         } else {
158             charData = getSTC_CharData_S2T();
159             charIndex = getSTC_CharIndex_S2T();
160         }
161     }
162 #endif
163 
164     if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
165         return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
166 
167     std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nLength * 2 + 1]);
168     sal_Int32 currPos = 0, count = 0;
169     while (currPos < nLength) {
170         sal_Int32 len = nLength - currPos;
171         bool found = false;
172         if (len > maxLen)
173             len = maxLen;
174         for (; len > 0 && ! found; len--) {
175             OUString word = aText.copy(nStartPos + currPos, len);
176             sal_Int32 current = 0;
177             // user dictionary
178             if (xCDL.is()) {
179                 Sequence < OUString > conversions;
180                 try {
181                     conversions = xCDL->queryConversions(word, 0, len,
182                             aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
183                             /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
184                             nConversionOptions);
185                 }
186                 catch ( NoSupportException & ) {
187                     // clear reference (when there is no user dictionary) in order
188                     // to not always have to catch this exception again
189                     // in further calls. (save time)
190                     xCDL = nullptr;
191                 }
192                 catch (...) {
193                     // catch all other exceptions to allow
194                     // querying the system dictionary in the next line
195                 }
196                 if (conversions.hasElements()) {
197                     if (offset.hasElements()) {
198                         if (word.getLength() != conversions[0].getLength())
199                             one2one=false;
200                         while (current < conversions[0].getLength()) {
201                             offset[count] = nStartPos + currPos + (current *
202                                     word.getLength() / conversions[0].getLength());
203                             newStr[count++] = conversions[0][current++];
204                         }
205                         // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
206                     } else {
207                         while (current < conversions[0].getLength())
208                             newStr[count++] = conversions[0][current++];
209                     }
210                     currPos += word.getLength();
211                     found = true;
212                 }
213             }
214 
215             if (!found && index[len+1] - index[len] > 0) {
216                 sal_Int32 bottom = static_cast<sal_Int32>(index[len]);
217                 sal_Int32 top = static_cast<sal_Int32>(index[len+1]) - 1;
218 
219                 while (bottom <= top && !found) {
220                     current = (top + bottom) / 2;
221                     const sal_Int32 result = word.compareTo(wordData + entry[current]);
222                     if (result < 0)
223                         top = current - 1;
224                     else if (result > 0)
225                         bottom = current + 1;
226                     else {
227                         if (toSChinese)   // Traditionary/Simplified conversion,
228                             for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
229                         else  // Simplified/Traditionary conversion, forwards search for next word
230                             current = entry[current] + word.getLength() + 1;
231                         sal_Int32 start=current;
232                         if (offset.hasElements()) {
233                             if (word.getLength() != OUString(&wordData[current]).getLength())
234                                 one2one=false;
235                             sal_Int32 convertedLength=OUString(&wordData[current]).getLength();
236                             while (wordData[current]) {
237                                 offset[count]=nStartPos + currPos + ((current-start) *
238                                     word.getLength() / convertedLength);
239                                 newStr[count++] = wordData[current++];
240                             }
241                             // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
242                         } else {
243                             while (wordData[current])
244                                 newStr[count++] = wordData[current++];
245                         }
246                         currPos += word.getLength();
247                         found = true;
248                     }
249                 }
250             }
251         }
252         if (!found) {
253             if (offset.hasElements())
254                 offset[count]=nStartPos+currPos;
255             newStr[count++] =
256                 getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
257             currPos++;
258         }
259     }
260     if (offset.hasElements())
261         offset.realloc(one2one ? 0 : count);
262     OUString aRet(newStr.get(), count);
263     return aRet;
264 }
265 
266 TextConversionResult SAL_CALL
getConversions(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,const Locale & rLocale,sal_Int16 nConversionType,sal_Int32 nConversionOptions)267 TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
268     const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
269 {
270     TextConversionResult result;
271 
272     result.Candidates.realloc(1);
273     result.Candidates[0] = getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions);
274     result.Boundary.startPos = nStartPos;
275     result.Boundary.endPos = nStartPos + nLength;
276 
277     return result;
278 }
279 
280 OUString SAL_CALL
getConversion(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,const Locale & rLocale,sal_Int16 nConversionType,sal_Int32 nConversionOptions)281 TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
282     const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
283 {
284     if (rLocale.Language != "zh" || ( nConversionType != TextConversionType::TO_SCHINESE && nConversionType != TextConversionType::TO_TCHINESE) )
285         throw NoSupportException(); // Conversion type is not supported in this service.
286 
287     aLocale=rLocale;
288     bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
289 
290     if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
291         // char to char dictionary
292         return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
293     else {
294         Sequence <sal_Int32> offset;
295         // word to word dictionary
296         return  getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
297     }
298 }
299 
300 OUString SAL_CALL
getConversionWithOffset(const OUString & aText,sal_Int32 nStartPos,sal_Int32 nLength,const Locale & rLocale,sal_Int16 nConversionType,sal_Int32 nConversionOptions,Sequence<sal_Int32> & offset)301 TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
302     const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
303 {
304     if (rLocale.Language != "zh" || ( nConversionType != TextConversionType::TO_SCHINESE && nConversionType != TextConversionType::TO_TCHINESE) )
305         throw NoSupportException(); // Conversion type is not supported in this service.
306 
307     aLocale=rLocale;
308     bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
309 
310     if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
311         offset.realloc(0);
312         // char to char dictionary
313         return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
314     } else {
315         if (offset.getLength() < 2*nLength)
316             offset.realloc(2*nLength);
317         // word to word dictionary
318         return  getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
319     }
320 }
321 
322 sal_Bool SAL_CALL
interactiveConversion(const Locale &,sal_Int16,sal_Int32)323 TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
324 {
325     return false;
326 }
327 
328 }
329 
330 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
331