1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <cclass_unicode.hxx>
21 #include <com/sun/star/i18n/KCharacterType.hpp>
22 #include <com/sun/star/i18n/WordType.hpp>
23 #include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
24 #include <unicode/uchar.h>
25 #include <cppuhelper/exc_hlp.hxx>
26 #include <cppuhelper/supportsservice.hxx>
27 #include <breakiteratorImpl.hxx>
28 #include <transliteration_body.hxx>
29 #include <rtl/ref.hxx>
30 
31 using namespace ::com::sun::star;
32 using namespace ::com::sun::star::uno;
33 using namespace ::com::sun::star::i18n;
34 using namespace ::com::sun::star::lang;
35 
36 namespace i18npool {
37 
38 //  class cclass_Unicode
39 //  ----------------------------------------------------;
40 
cclass_Unicode(const uno::Reference<XComponentContext> & rxContext)41 cclass_Unicode::cclass_Unicode( const uno::Reference < XComponentContext >& rxContext ) :
42         trans( new Transliteration_casemapping() ),
43         m_xContext( rxContext ),
44         nStartTypes( 0 ),
45         nContTypes( 0 ),
46         eState( ssGetChar ),
47         cGroupSep( ',' ),
48         cDecimalSep( '.' ),
49         cDecimalSepAlt( 0 )
50 {
51 }
52 
~cclass_Unicode()53 cclass_Unicode::~cclass_Unicode() {
54     destroyParserTable();
55 }
56 
57 
58 OUString SAL_CALL
toUpper(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)59 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
60     sal_Int32 len = Text.getLength();
61     if (nPos >= len)
62         return OUString();
63     if (nCount + nPos > len)
64         nCount = len - nPos;
65 
66     trans->setMappingType(MappingType::ToUpper, rLocale);
67     return trans->transliterateString2String(Text, nPos, nCount);
68 }
69 
70 OUString SAL_CALL
toLower(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)71 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
72     sal_Int32 len = Text.getLength();
73     if (nPos >= len)
74         return OUString();
75     if (nCount + nPos > len)
76         nCount = len - nPos;
77 
78     trans->setMappingType(MappingType::ToLower, rLocale);
79     return trans->transliterateString2String(Text, nPos, nCount);
80 }
81 
82 OUString SAL_CALL
toTitle(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)83 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
84     try
85     {
86         sal_Int32 len = Text.getLength();
87         if (nPos >= len)
88             return OUString();
89         if (nCount + nPos > len)
90             nCount = len - nPos;
91 
92         trans->setMappingType(MappingType::ToTitle, rLocale);
93         rtl_uString* pStr = rtl_uString_alloc(nCount);
94         sal_Unicode* out = pStr->buffer;
95         rtl::Reference< BreakIteratorImpl > xBrk(new BreakIteratorImpl(m_xContext));
96         Boundary bdy = xBrk->getWordBoundary(Text, nPos, rLocale,
97                     WordType::ANYWORD_IGNOREWHITESPACES, true);
98         for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
99             if (i >= bdy.endPos)
100                 bdy = xBrk->nextWord(Text, bdy.endPos, rLocale,
101                             WordType::ANYWORD_IGNOREWHITESPACES);
102             *out = (i == bdy.startPos) ?
103                 trans->transliterateChar2Char(Text[i]) : Text[i];
104         }
105         *out = 0;
106         return OUString( pStr, SAL_NO_ACQUIRE );
107     }
108     catch (const RuntimeException&)
109     {
110         throw;
111     }
112     catch (const Exception& e)
113     {
114         uno::Any a(cppu::getCaughtException());
115         throw lang::WrappedTargetRuntimeException(
116             "wrapped " + a.getValueTypeName() + ": " + e.Message,
117             uno::Reference<uno::XInterface>(), a);
118     }
119 }
120 
121 sal_Int16 SAL_CALL
getType(const OUString & Text,sal_Int32 nPos)122 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) {
123     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
124     return static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, 0)));
125 }
126 
127 sal_Int16 SAL_CALL
getCharacterDirection(const OUString & Text,sal_Int32 nPos)128 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) {
129     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
130     return static_cast<sal_Int16>(u_charDirection(Text.iterateCodePoints(&nPos, 0)));
131 }
132 
133 
134 sal_Int16 SAL_CALL
getScript(const OUString & Text,sal_Int32 nPos)135 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) {
136     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
137     // ICU Unicode script type UBlockCode starts from 1 for Basic Latin,
138     // while OO.o enum UnicideScript starts from 0.
139     // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
140     return static_cast<sal_Int16>(ublock_getCode(Text.iterateCodePoints(&nPos, 0)))-1;
141 }
142 
143 
144 sal_Int32
getCharType(const OUString & Text,sal_Int32 * nPos,sal_Int32 increment)145 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
146     using namespace ::com::sun::star::i18n::KCharacterType;
147 
148     sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
149     switch ( u_charType(ch) ) {
150     // Upper
151     case U_UPPERCASE_LETTER :
152         return UPPER|LETTER|PRINTABLE|BASE_FORM;
153 
154     // Lower
155     case U_LOWERCASE_LETTER :
156         return LOWER|LETTER|PRINTABLE|BASE_FORM;
157 
158     // Title
159     case U_TITLECASE_LETTER :
160         return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
161 
162     // Letter
163     case U_MODIFIER_LETTER :
164     case U_OTHER_LETTER :
165         return LETTER|PRINTABLE|BASE_FORM;
166 
167     // Digit
168     case U_DECIMAL_DIGIT_NUMBER:
169     case U_LETTER_NUMBER:
170     case U_OTHER_NUMBER:
171         return DIGIT|PRINTABLE|BASE_FORM;
172 
173     // Base
174     case U_NON_SPACING_MARK:
175     case U_ENCLOSING_MARK:
176     case U_COMBINING_SPACING_MARK:
177         return BASE_FORM|PRINTABLE;
178 
179     // Print
180     case U_SPACE_SEPARATOR:
181 
182     case U_DASH_PUNCTUATION:
183     case U_INITIAL_PUNCTUATION:
184     case U_FINAL_PUNCTUATION:
185     case U_CONNECTOR_PUNCTUATION:
186     case U_OTHER_PUNCTUATION:
187 
188     case U_MATH_SYMBOL:
189     case U_CURRENCY_SYMBOL:
190     case U_MODIFIER_SYMBOL:
191     case U_OTHER_SYMBOL:
192         return PRINTABLE;
193 
194     // Control
195     case U_CONTROL_CHAR:
196     case U_FORMAT_CHAR:
197         return CONTROL;
198 
199     case U_LINE_SEPARATOR:
200     case U_PARAGRAPH_SEPARATOR:
201         return CONTROL|PRINTABLE;
202 
203     // for all others
204     default:
205         return U_GENERAL_OTHER_TYPES;
206     }
207 }
208 
209 sal_Int32 SAL_CALL
getCharacterType(const OUString & Text,sal_Int32 nPos,const Locale &)210 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) {
211     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
212     return getCharType(Text, &nPos, 0);
213 
214 }
215 
216 sal_Int32 SAL_CALL
getStringType(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale &)217 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) {
218     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
219 
220     sal_Int32 result = 0;
221 
222     while (nCount > 0 && nPos < Text.getLength())
223     {
224         sal_Int32 nOrigPos = nPos;
225         result |= getCharType(Text, &nPos, 1);
226         sal_Int32 nUtf16Units = nPos - nOrigPos;
227         nCount -= nUtf16Units;
228     }
229 
230     return result;
231 }
232 
parseAnyToken(const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)233 ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
234             const OUString& Text,
235             sal_Int32 nPos,
236             const Locale& rLocale,
237             sal_Int32 startCharTokenType,
238             const OUString& userDefinedCharactersStart,
239             sal_Int32 contCharTokenType,
240             const OUString& userDefinedCharactersCont )
241 {
242     ParseResult r;
243     if ( Text.getLength() <= nPos )
244         return r;
245 
246     setupParserTable( rLocale,
247         startCharTokenType, userDefinedCharactersStart,
248         contCharTokenType, userDefinedCharactersCont );
249     parseText( r, Text, nPos );
250 
251     return r;
252 }
253 
254 
parsePredefinedToken(sal_Int32 nTokenType,const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)255 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
256             sal_Int32 nTokenType,
257             const OUString& Text,
258             sal_Int32 nPos,
259             const Locale& rLocale,
260             sal_Int32 startCharTokenType,
261             const OUString& userDefinedCharactersStart,
262             sal_Int32 contCharTokenType,
263             const OUString& userDefinedCharactersCont )
264 {
265     ParseResult r;
266     if ( Text.getLength() <= nPos )
267         return r;
268 
269     setupParserTable( rLocale,
270         startCharTokenType, userDefinedCharactersStart,
271         contCharTokenType, userDefinedCharactersCont );
272     parseText( r, Text, nPos, nTokenType );
273 
274     return r;
275 }
276 
getImplementationName()277 OUString SAL_CALL cclass_Unicode::getImplementationName()
278 {
279     return "com.sun.star.i18n.CharacterClassification_Unicode";
280 }
281 
supportsService(const OUString & rServiceName)282 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName)
283 {
284     return cppu::supportsService(this, rServiceName);
285 }
286 
getSupportedServiceNames()287 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames()
288 {
289     return { "com.sun.star.i18n.CharacterClassification_Unicode" };
290 }
291 
292 }
293 
294 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(css::uno::XComponentContext * context,css::uno::Sequence<css::uno::Any> const &)295 com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(
296     css::uno::XComponentContext *context,
297     css::uno::Sequence<css::uno::Any> const &)
298 {
299     return cppu::acquire(new i18npool::cclass_Unicode(context));
300 }
301 
302 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
303