1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <indexentrysupplier_default.hxx>
21 #include <collatorImpl.hxx>
22 #include <localedata.hxx>
23 #include <i18nutil/unicode.hxx>
24 #include <com/sun/star/i18n/CollatorOptions.hpp>
25 #include <o3tl/temporary.hxx>
26 
27 using namespace ::com::sun::star;
28 using namespace ::com::sun::star::uno;
29 using namespace ::com::sun::star::i18n;
30 using namespace ::com::sun::star::lang;
31 
32 namespace i18npool {
33 
IndexEntrySupplier_Unicode(const css::uno::Reference<css::uno::XComponentContext> & rxContext)34 IndexEntrySupplier_Unicode::IndexEntrySupplier_Unicode(
35     const css::uno::Reference < css::uno::XComponentContext >& rxContext ) :
36     IndexEntrySupplier_Common(rxContext)
37 {
38     implementationName = "com.sun.star.i18n.IndexEntrySupplier_Unicode";
39     index.reset( new Index(rxContext) );
40 }
41 
~IndexEntrySupplier_Unicode()42 IndexEntrySupplier_Unicode::~IndexEntrySupplier_Unicode()
43 {
44 }
45 
loadAlgorithm(const lang::Locale & rLocale,const OUString & rAlgorithm,sal_Int32 collatorOptions)46 sal_Bool SAL_CALL IndexEntrySupplier_Unicode::loadAlgorithm( const lang::Locale& rLocale,
47     const OUString& rAlgorithm, sal_Int32 collatorOptions )
48 {
49     index->init(rLocale, rAlgorithm);
50     return IndexEntrySupplier_Common::loadAlgorithm(rLocale, rAlgorithm, collatorOptions);
51 }
52 
getIndexKey(const OUString & rIndexEntry,const OUString & rPhoneticEntry,const lang::Locale & rLocale)53 OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexKey( const OUString& rIndexEntry,
54     const OUString& rPhoneticEntry, const lang::Locale& rLocale )
55 {
56     return index->getIndexDescription(getEntry(rIndexEntry, rPhoneticEntry, rLocale));
57 }
58 
compareIndexEntry(const OUString & rIndexEntry1,const OUString & rPhoneticEntry1,const lang::Locale & rLocale1,const OUString & rIndexEntry2,const OUString & rPhoneticEntry2,const lang::Locale & rLocale2)59 sal_Int16 SAL_CALL IndexEntrySupplier_Unicode::compareIndexEntry(
60     const OUString& rIndexEntry1, const OUString& rPhoneticEntry1, const lang::Locale& rLocale1,
61     const OUString& rIndexEntry2, const OUString& rPhoneticEntry2, const lang::Locale& rLocale2 )
62 {
63     sal_Int16 result =
64             index->getIndexWeight(getEntry(rIndexEntry1, rPhoneticEntry1, rLocale1)) -
65             index->getIndexWeight(getEntry(rIndexEntry2, rPhoneticEntry2, rLocale2));
66     if (result == 0)
67         return IndexEntrySupplier_Common::compareIndexEntry(
68                     rIndexEntry1, rPhoneticEntry1, rLocale1,
69                     rIndexEntry2, rPhoneticEntry2, rLocale2);
70     return result > 0 ? 1 : -1;
71 }
72 
getIndexCharacter(const OUString & rIndexEntry,const lang::Locale & rLocale,const OUString & rAlgorithm)73 OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexCharacter( const OUString& rIndexEntry,
74     const lang::Locale& rLocale, const OUString& rAlgorithm ) {
75 
76     if (loadAlgorithm( rLocale, rAlgorithm, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT))
77         return index->getIndexDescription(rIndexEntry);
78     else
79         return IndexEntrySupplier_Common::getIndexCharacter(rIndexEntry, rLocale, rAlgorithm);
80 }
81 
IndexTable()82 IndexTable::IndexTable()
83     : start(0)
84     , end(0)
85     , table(nullptr)
86 {
87 }
88 
~IndexTable()89 IndexTable::~IndexTable()
90 {
91     if (table) free(table);
92 }
93 
init(sal_Unicode start_,sal_Unicode end_,IndexKey const * keys,sal_Int16 key_count,Index * index)94 void IndexTable::init(sal_Unicode start_, sal_Unicode end_, IndexKey const *keys, sal_Int16 key_count, Index *index)
95 {
96     start=start_;
97     end=end_;
98     table = static_cast<sal_uInt8*>(malloc((end-start+1)*sizeof(sal_uInt8)));
99     for (sal_Unicode i = start; i <= end; i++) {
100         sal_Int16 j;
101         for (j = 0; j < key_count; j++) {
102             if (keys[j].key > 0 && (i == keys[j].key || index->compare(i, keys[j].key) == 0)) {
103                 table[i-start] = sal::static_int_cast<sal_uInt8>(j);
104                 break;
105             }
106         }
107         if (j == key_count)
108             table[i-start] = 0xFF;
109     }
110 }
111 
Index(const css::uno::Reference<css::uno::XComponentContext> & rxContext)112 Index::Index(const css::uno::Reference < css::uno::XComponentContext >& rxContext)
113     : table_count(0)
114     , key_count(0)
115     , mkey_count(0)
116     , collator( new CollatorImpl(rxContext) )
117 {
118 }
119 
~Index()120 Index::~Index()
121 {
122 }
123 
compare(sal_Unicode c1,sal_Unicode c2)124 sal_Int16 Index::compare(sal_Unicode c1, sal_Unicode c2)
125 {
126     return sal::static_int_cast<sal_Int16>( collator->compareString(OUString(&c1, 1), OUString(&c2, 1)) );
127 }
128 
getIndexWeight(const OUString & rIndexEntry)129 sal_Int16 Index::getIndexWeight(const OUString& rIndexEntry)
130 {
131     sal_Int32 startPos=0;
132     if (!skipping_chars.isEmpty())
133         while (skipping_chars.indexOf(rIndexEntry[startPos]) >= 0)
134             startPos++;
135     if (mkey_count > 0) {
136         for (sal_Int16 i = 0; i < mkey_count; i++) {
137             sal_Int32 len = keys[mkeys[i]].mkey.getLength();
138             if (collator->compareSubstring(rIndexEntry, startPos, len,
139                                     keys[mkeys[i]].mkey, 0, len) == 0)
140                 return mkeys[i];
141         }
142     }
143     sal_Unicode code = startPos < rIndexEntry.getLength() ? rIndexEntry[startPos] : 0;
144     for (sal_Int16 i = 0; i < table_count; i++) {
145         if (tables[i].start <= code && code <= tables[i].end)
146             return tables[i].table[code-tables[i].start];
147     }
148     return 0xFF;
149 }
150 
getIndexDescription(const OUString & rIndexEntry)151 OUString Index::getIndexDescription(const OUString& rIndexEntry)
152 {
153     sal_Int16 wgt = getIndexWeight(rIndexEntry);
154     if (wgt < MAX_KEYS) {
155         if (!keys[wgt].desc.isEmpty())
156             return keys[wgt].desc;
157         else if (keys[wgt].key > 0)
158             return OUString(&keys[wgt].key, 1);
159         else
160             return keys[wgt].mkey;
161     }
162     sal_uInt32 indexChar=rIndexEntry.iterateCodePoints(&o3tl::temporary(sal_Int32(0)), 0);
163     return OUString(&indexChar, 1);
164 }
165 
166 #define LOCALE_EN lang::Locale("en", OUString(), OUString())
167 
makeIndexKeys(const lang::Locale & rLocale,std::u16string_view algorithm)168 void Index::makeIndexKeys(const lang::Locale &rLocale, std::u16string_view algorithm)
169 {
170     OUString keyStr = LocaleDataImpl::get()->getIndexKeysByAlgorithm(rLocale, algorithm);
171 
172     if (keyStr.isEmpty()) {
173         keyStr = LocaleDataImpl::get()->getIndexKeysByAlgorithm(LOCALE_EN,
174                     LocaleDataImpl::get()->getDefaultIndexAlgorithm(LOCALE_EN));
175         if (keyStr.isEmpty())
176             throw RuntimeException(
177                 "Index::makeIndexKeys: No index keys returned by algorithm");
178     }
179 
180     sal_Int16 len = sal::static_int_cast<sal_Int16>( keyStr.getLength() );
181     mkey_count=key_count=0;
182     skipping_chars=OUString();
183     sal_Int16 i, j;
184 
185     for (i = 0; i < len && key_count < MAX_KEYS; i++)
186     {
187         sal_Unicode curr = keyStr[i];
188         sal_Unicode close = ')';
189 
190         if (unicode::isWhiteSpace(curr))
191             continue;
192 
193         switch(curr) {
194             case u'-': {
195                     if (key_count <= 0 || i + 1 >= len)
196                         throw RuntimeException("Index::makeIndexKeys: key_count<=0||"
197                                                 "'-' is the last char of KeyString");
198                     for (curr = keyStr[++i]; key_count < MAX_KEYS && keys[key_count-1].key < curr; key_count++) {
199                         keys[key_count].key = keys[key_count-1].key+1;
200                         keys[key_count].desc.clear();
201                     }
202                     break;
203                 }
204             case u'[':
205                 for (i++; i < len && keyStr[i] != ']'; i++) {
206                     if (unicode::isWhiteSpace(keyStr[i])) {
207                         continue;
208                     } else if (keyStr[i] == '_') {
209                         for (curr=keyStr[i-1]+1;  curr <= keyStr[i+1]; curr++)
210                             skipping_chars+=OUStringChar(curr);
211                         i+=2;
212                     } else {
213                         skipping_chars+=OUStringChar(keyStr[i]);
214                     }
215                 }
216                 break;
217             case u'{':
218                 close = '}';
219                 [[fallthrough]];
220             case u'(': {
221                     if (key_count <= 0)
222                         throw RuntimeException("Index::makeIndexKeys: key_count<=0");
223 
224                     sal_Int16 end = i+1;
225                     for (; end < len && keyStr[end] != close; end++) ;
226 
227                     if (end >= len) // no found
228                         throw RuntimeException("Index::makeIndexKeys: Closing bracket not found");
229                     if (close == ')')
230                         keys[key_count-1].desc = keyStr.copy(i+1, end-i-1);
231                     else {
232                         mkeys[mkey_count++]=key_count;
233                         keys[key_count].key = 0;
234                         keys[key_count].mkey = keyStr.copy(i+1, end-i-1);
235                         keys[key_count++].desc.clear();
236                     }
237                     i=end+1;
238                     break;
239                 }
240             default:
241                 keys[key_count].key = curr;
242                 keys[key_count++].desc.clear();
243                 break;
244         }
245     }
246     for (i = 0; i < mkey_count; i++) {
247         for (j=i+1; j < mkey_count; j++) {
248             if (keys[mkeys[i]].mkey.getLength() < keys[mkeys[j]].mkey.getLength()) {
249                 sal_Int16 k = mkeys[i];
250                 mkeys[i] = mkeys[j];
251                 mkeys[j] = k;
252             }
253         }
254     }
255 }
256 
init(const lang::Locale & rLocale,const OUString & algorithm)257 void Index::init(const lang::Locale &rLocale, const OUString& algorithm)
258 {
259     makeIndexKeys(rLocale, algorithm);
260 
261     Sequence< UnicodeScript > scriptList = LocaleDataImpl::get()->getUnicodeScripts( rLocale );
262 
263     if (!scriptList.hasElements()) {
264         scriptList = LocaleDataImpl::get()->getUnicodeScripts(LOCALE_EN);
265         if (!scriptList.hasElements())
266             throw RuntimeException("Index::init: scriptList is empty");
267     }
268 
269     table_count = sal::static_int_cast<sal_Int16>( scriptList.getLength() );
270     if (table_count > MAX_TABLES)
271         throw RuntimeException("Index::init: Length of scriptList is too big");
272 
273     collator->loadCollatorAlgorithm(algorithm, rLocale, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT);
274     sal_Int16 j=0;
275     sal_Unicode start = unicode::getUnicodeScriptStart(UnicodeScript(0));
276     sal_Unicode end = unicode::getUnicodeScriptEnd(UnicodeScript(0));
277     for (sal_Int32 i= (scriptList[0] == UnicodeScript(0)) ? 1 : 0; i< scriptList.getLength(); i++) {
278         if (unicode::getUnicodeScriptStart(scriptList[i]) != end+1) {
279             tables[j++].init(start, end, keys, key_count, this);
280             start = unicode::getUnicodeScriptStart(scriptList[i]);
281         }
282         end = unicode::getUnicodeScriptEnd(scriptList[i]);
283     }
284     tables[j++].init(start, end, keys, key_count, this);
285     table_count = j;
286 }
287 
288 }
289 
290 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
291