1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <i18nlangtag/languagetag.hxx>
21 #include <i18nlangtag/mslangid.hxx>
22 #include <rtl/ustrbuf.hxx>
23 #include <sal/macros.h>
24 #include <nativenumbersupplier.hxx>
25 #include <localedata.hxx>
26 #include "data/numberchar.h"
27 #include <comphelper/processfactory.hxx>
28 #include <cppuhelper/supportsservice.hxx>
29 #include <map>
30 #include <memory>
31 #include <string_view>
32 #include <unordered_map>
33 #include <com/sun/star/i18n/CharacterClassification.hpp>
34 #include <com/sun/star/i18n/NativeNumberMode.hpp>
35 #include <com/sun/star/linguistic2/NumberText.hpp>
36
37 using namespace ::com::sun::star::uno;
38 using namespace ::com::sun::star::i18n;
39 using namespace ::com::sun::star::lang;
40
41 namespace {
42
43 struct Number {
44 sal_Int16 number;
45 const sal_Unicode *multiplierChar;
46 sal_Int16 numberFlag;
47 sal_Int16 exponentCount;
48 const sal_Int16 *multiplierExponent;
49 };
50
51 }
52
53 #define NUMBER_OMIT_ZERO (1 << 0)
54 #define NUMBER_OMIT_ONLY_ZERO (1 << 1)
55 #define NUMBER_OMIT_ONE_1 (1 << 2)
56 #define NUMBER_OMIT_ONE_2 (1 << 3)
57 #define NUMBER_OMIT_ONE_3 (1 << 4)
58 #define NUMBER_OMIT_ONE_4 (1 << 5)
59 #define NUMBER_OMIT_ONE_5 (1 << 6)
60 #define NUMBER_OMIT_ONE_6 (1 << 7)
61 #define NUMBER_OMIT_ONE_7 (1 << 8)
62 #define NUMBER_OMIT_ONE (NUMBER_OMIT_ONE_1|NUMBER_OMIT_ONE_2|NUMBER_OMIT_ONE_3|NUMBER_OMIT_ONE_4|NUMBER_OMIT_ONE_5|NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
63 #define NUMBER_OMIT_ONE_CHECK(bit) (1 << (2 + bit))
64 #define NUMBER_OMIT_ALL ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE|NUMBER_OMIT_ONLY_ZERO )
65 #define NUMBER_OMIT_ZERO_ONE ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE )
66 #define NUMBER_OMIT_ONE_67 (NUMBER_OMIT_ONE_6|NUMBER_OMIT_ONE_7)
67 #define NUMBER_OMIT_ZERO_ONE_67 ( NUMBER_OMIT_ZERO|NUMBER_OMIT_ONE_67 )
68
69 namespace i18npool {
70
71 namespace {
72
73 struct theNatNumMutex : public rtl::Static<osl::Mutex, theNatNumMutex> {};
74
75 }
76
77 static OUString getHebrewNativeNumberString(const OUString& aNumberString, bool useGeresh);
78
79 static OUString getCyrillicNativeNumberString(const OUString& aNumberString);
80
81 /// @throws RuntimeException
AsciiToNativeChar(const OUString & inStr,sal_Int32 nCount,Sequence<sal_Int32> & offset,bool useOffset,sal_Int16 number)82 static OUString AsciiToNativeChar( const OUString& inStr, sal_Int32 nCount,
83 Sequence< sal_Int32 >& offset, bool useOffset, sal_Int16 number )
84 {
85 const sal_Unicode *src = inStr.getStr();
86 rtl_uString *newStr = rtl_uString_alloc(nCount);
87 if (useOffset)
88 offset.realloc(nCount);
89
90 for (sal_Int32 i = 0; i < nCount; i++)
91 {
92 sal_Unicode ch = src[i];
93 if (isNumber(ch))
94 newStr->buffer[i] = NumberChar[number][ ch - NUMBER_ZERO ];
95 else if (i+1 < nCount && isNumber(src[i+1])) {
96 if (i > 0 && isNumber(src[i-1]) && isSeparator(ch))
97 newStr->buffer[i] = SeparatorChar[number] ? SeparatorChar[number] : ch;
98 else
99 newStr->buffer[i] = isDecimal(ch) ? (DecimalChar[number] ? DecimalChar[number] : ch) :
100 isMinus(ch) ? (MinusChar[number] ? MinusChar[number] : ch) : ch;
101 }
102 else
103 newStr->buffer[i] = ch;
104 if (useOffset)
105 offset[i] = i;
106 }
107 return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
108 }
109
AsciiToNative_numberMaker(const sal_Unicode * str,sal_Int32 begin,sal_Int32 len,sal_Unicode * dst,sal_Int32 & count,sal_Int16 multiChar_index,Sequence<sal_Int32> & offset,bool useOffset,sal_Int32 startPos,const Number * number,const sal_Unicode * numberChar)110 static bool AsciiToNative_numberMaker(const sal_Unicode *str, sal_Int32 begin, sal_Int32 len,
111 sal_Unicode *dst, sal_Int32& count, sal_Int16 multiChar_index, Sequence< sal_Int32 >& offset, bool useOffset, sal_Int32 startPos,
112 const Number *number, const sal_Unicode* numberChar)
113 {
114 sal_Unicode multiChar = (multiChar_index == -1 ? 0 : number->multiplierChar[multiChar_index]);
115 if ( len <= number->multiplierExponent[number->exponentCount-1] ) {
116 if (number->multiplierExponent[number->exponentCount-1] > 1) {
117 bool bNotZero = false;
118 for (const sal_Int32 end = begin+len; begin < end; begin++) {
119 if (bNotZero || str[begin] != NUMBER_ZERO) {
120 dst[count] = numberChar[str[begin] - NUMBER_ZERO];
121 if (useOffset)
122 offset[count] = begin + startPos;
123 count++;
124 bNotZero = true;
125 }
126 }
127 if (bNotZero && multiChar > 0) {
128 dst[count] = multiChar;
129 if (useOffset)
130 offset[count] = begin + startPos;
131 count++;
132 }
133 return bNotZero;
134 } else if (str[begin] != NUMBER_ZERO) {
135 if (!(number->numberFlag & (multiChar_index < 0 ? 0 : NUMBER_OMIT_ONE_CHECK(multiChar_index))) || str[begin] != NUMBER_ONE) {
136 dst[count] = numberChar[str[begin] - NUMBER_ZERO];
137 if (useOffset)
138 offset[count] = begin + startPos;
139 count++;
140 }
141 if (multiChar > 0) {
142 dst[count] = multiChar;
143 if (useOffset)
144 offset[count] = begin + startPos;
145 count++;
146 }
147 } else if (!(number->numberFlag & NUMBER_OMIT_ZERO) && count > 0 && dst[count-1] != numberChar[0]) {
148 dst[count] = numberChar[0];
149 if (useOffset)
150 offset[count] = begin + startPos;
151 count++;
152 }
153 return str[begin] != NUMBER_ZERO;
154 } else {
155 bool bPrintPower = false;
156 // sal_Int16 last = 0;
157 for (sal_Int16 i = 1; i <= number->exponentCount; i++) {
158 sal_Int32 tmp = len - (i == number->exponentCount ? 0 : number->multiplierExponent[i]);
159 if (tmp > 0) {
160 bPrintPower |= AsciiToNative_numberMaker(str, begin, tmp, dst, count,
161 (i == number->exponentCount ? -1 : i), offset, useOffset, startPos, number, numberChar);
162 begin += tmp;
163 len -= tmp;
164 }
165 }
166 if (bPrintPower) {
167 if (count > 0 && number->multiplierExponent[number->exponentCount-1] == 1 &&
168 dst[count-1] == numberChar[0])
169 count--;
170 if (multiChar > 0) {
171 dst[count] = multiChar;
172 if (useOffset)
173 offset[count] = begin + startPos;
174 count++;
175 }
176 }
177 return bPrintPower;
178 }
179 }
180
181 /// @throws RuntimeException
AsciiToNative(const OUString & inStr,sal_Int32 nCount,Sequence<sal_Int32> & offset,bool useOffset,const Number * number)182 static OUString AsciiToNative( const OUString& inStr, sal_Int32 nCount,
183 Sequence< sal_Int32 >& offset, bool useOffset, const Number* number )
184 {
185 OUString aRet;
186
187 sal_Int32 strLen = inStr.getLength();
188 const sal_Unicode *numberChar = NumberChar[number->number];
189
190 if (nCount > strLen)
191 nCount = strLen;
192
193 if (nCount > 0)
194 {
195 const sal_Unicode *str = inStr.getStr();
196 std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nCount * 2 + 1]);
197 std::unique_ptr<sal_Unicode[]> srcStr(new sal_Unicode[nCount + 1]); // for keeping number without comma
198 sal_Int32 i, len = 0, count = 0;
199
200 if (useOffset)
201 offset.realloc( nCount * 2 );
202 bool bDoDecimal = false;
203
204 for (i = 0; i <= nCount; i++)
205 {
206 if (i < nCount && isNumber(str[i])) {
207 if (bDoDecimal) {
208 newStr[count] = numberChar[str[i] - NUMBER_ZERO];
209 if (useOffset)
210 offset[count] = i;
211 count++;
212 }
213 else
214 srcStr[len++] = str[i];
215 } else {
216 if (len > 0) {
217 if (i < nCount-1 && isSeparator(str[i]) && isNumber(str[i+1]))
218 continue; // skip comma inside number string
219 bool bNotZero = false;
220 for (sal_Int32 begin = 0, end = len % number->multiplierExponent[0];
221 end <= len; begin = end, end += number->multiplierExponent[0]) {
222 if (end == 0) continue;
223 sal_Int32 _count = count;
224 bNotZero |= AsciiToNative_numberMaker(srcStr.get(), begin, end - begin, newStr.get(), count,
225 end == len ? -1 : 0, offset, useOffset, i - len, number, numberChar);
226 if (count > 0 && number->multiplierExponent[number->exponentCount-1] == 1 &&
227 newStr[count-1] == numberChar[0])
228 count--;
229 if (bNotZero && _count == count && end != len) {
230 newStr[count] = number->multiplierChar[0];
231 if (useOffset)
232 offset[count] = i - len;
233 count++;
234 }
235 }
236 if (! bNotZero && ! (number->numberFlag & NUMBER_OMIT_ONLY_ZERO)) {
237 newStr[count] = numberChar[0];
238 if (useOffset)
239 offset[count] = i - len;
240 count++;
241 }
242 len = 0;
243 }
244 if (i < nCount) {
245 bDoDecimal = (!bDoDecimal && i < nCount-1 && isDecimal(str[i]) && isNumber(str[i+1]));
246 if (bDoDecimal)
247 newStr[count] = (DecimalChar[number->number] ? DecimalChar[number->number] : str[i]);
248 else if (i < nCount-1 && isMinus(str[i]) && isNumber(str[i+1]))
249 newStr[count] = (MinusChar[number->number] ? MinusChar[number->number] : str[i]);
250 else if (i < nCount-1 && isSeparator(str[i]) && isNumber(str[i+1]))
251 newStr[count] = (SeparatorChar[number->number] ? SeparatorChar[number->number] : str[i]);
252 else
253 newStr[count] = str[i];
254 if (useOffset)
255 offset[count] = i;
256 count++;
257 }
258 }
259 }
260
261 if (useOffset)
262 offset.realloc(count);
263 aRet = OUString(newStr.get(), count);
264 }
265 return aRet;
266 }
267
268 namespace
269 {
NativeToAscii_numberMaker(sal_Int16 max,sal_Int16 prev,const sal_Unicode * str,sal_Int32 & i,sal_Int32 nCount,sal_Unicode * dst,sal_Int32 & count,Sequence<sal_Int32> & offset,bool useOffset,OUString & numberChar,OUString & multiplierChar)270 void NativeToAscii_numberMaker(sal_Int16 max, sal_Int16 prev, const sal_Unicode *str,
271 sal_Int32& i, sal_Int32 nCount, sal_Unicode *dst, sal_Int32& count, Sequence< sal_Int32 >& offset, bool useOffset,
272 OUString& numberChar, OUString& multiplierChar)
273 {
274 sal_Int16 curr = 0, num = 0, end = 0, shift = 0;
275 while (++i < nCount) {
276 if ((curr = sal::static_int_cast<sal_Int16>( numberChar.indexOf(str[i]) )) >= 0) {
277 if (num > 0)
278 break;
279 num = curr % 10;
280 } else if ((curr = sal::static_int_cast<sal_Int16>( multiplierChar.indexOf(str[i]) )) >= 0) {
281 curr = MultiplierExponent_7_CJK[curr % ExponentCount_7_CJK];
282 if (prev > curr && num == 0) num = 1; // One may be omitted in informal format
283 shift = end = 0;
284 if (curr >= max)
285 max = curr;
286 else if (curr > prev)
287 shift = max - curr;
288 else
289 end = curr;
290 while (end++ < prev) {
291 dst[count] = NUMBER_ZERO + (end == prev ? num : 0);
292 if (useOffset)
293 offset[count] = i;
294 count++;
295 }
296 if (shift) {
297 count -= max;
298 for (const sal_Int32 countEnd = count+shift; count < countEnd; count++) {
299 dst[count] = dst[count + curr];
300 if (useOffset)
301 offset[count] = offset[count + curr];
302 }
303 max = curr;
304 }
305 NativeToAscii_numberMaker(max, curr, str, i, nCount, dst,
306 count, offset, useOffset, numberChar, multiplierChar);
307 return;
308 } else
309 break;
310 }
311 while (end++ < prev) {
312 dst[count] = NUMBER_ZERO + (end == prev ? num : 0);
313 if (useOffset)
314 offset[count] = i - 1;
315 count++;
316 }
317 }
318
319 /// @throws RuntimeException
NativeToAscii(const OUString & inStr,sal_Int32 nCount,Sequence<sal_Int32> & offset,bool useOffset)320 OUString NativeToAscii(const OUString& inStr,
321 sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset )
322 {
323 OUString aRet;
324
325 sal_Int32 strLen = inStr.getLength();
326
327 if (nCount > strLen)
328 nCount = strLen;
329
330 if (nCount > 0) {
331 const sal_Unicode *str = inStr.getStr();
332 std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nCount * MultiplierExponent_7_CJK[0] + 2]);
333 if (useOffset)
334 offset.realloc( nCount * MultiplierExponent_7_CJK[0] + 1 );
335 sal_Int32 count = 0, index;
336 sal_Int32 i;
337
338 OUString numberChar, multiplierChar, decimalChar, separatorChar;
339 numberChar = OUString(NumberChar[0], 10*NumberChar_Count);
340 multiplierChar = OUString(MultiplierChar_7_CJK[0], ExponentCount_7_CJK*Multiplier_Count);
341 decimalChar = OUString(DecimalChar, NumberChar_Count);
342 std::u16string_view const minusChar(MinusChar, NumberChar_Count);
343 separatorChar = OUString(
344 reinterpret_cast<sal_Unicode *>(SeparatorChar), NumberChar_Count);
345
346 for ( i = 0; i < nCount; i++) {
347 if ((index = multiplierChar.indexOf(str[i])) >= 0) {
348 if (count == 0 || !isNumber(newStr[count-1])) { // add 1 in front of multiplier
349 newStr[count] = NUMBER_ONE;
350 if (useOffset)
351 offset[count] = i;
352 count++;
353 }
354 index = MultiplierExponent_7_CJK[index % ExponentCount_7_CJK];
355 NativeToAscii_numberMaker(
356 sal::static_int_cast<sal_Int16>( index ), sal::static_int_cast<sal_Int16>( index ),
357 str, i, nCount, newStr.get(), count, offset, useOffset,
358 numberChar, multiplierChar);
359 } else {
360 if ((index = numberChar.indexOf(str[i])) >= 0)
361 newStr[count] = sal::static_int_cast<sal_Unicode>( (index % 10) + NUMBER_ZERO );
362 else if (separatorChar.indexOf(str[i]) >= 0 &&
363 (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
364 multiplierChar.indexOf(str[i+1]) >= 0)))
365 newStr[count] = SeparatorChar[NumberChar_HalfWidth];
366 else if (decimalChar.indexOf(str[i]) >= 0 &&
367 (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
368 multiplierChar.indexOf(str[i+1]) >= 0)))
369 // Only when decimal point is followed by numbers,
370 // it will be convert to ASCII decimal point
371 newStr[count] = DecimalChar[NumberChar_HalfWidth];
372 else if (minusChar.find(str[i]) != std::u16string_view::npos &&
373 (i < nCount-1 && (numberChar.indexOf(str[i+1]) >= 0 ||
374 multiplierChar.indexOf(str[i+1]) >= 0)))
375 // Only when minus is followed by numbers,
376 // it will be convert to ASCII minus sign
377 newStr[count] = MinusChar[NumberChar_HalfWidth];
378 else
379 newStr[count] = str[i];
380 if (useOffset)
381 offset[count] = i;
382 count++;
383 }
384 }
385
386 if (useOffset) {
387 offset.realloc(count);
388 }
389 aRet = OUString(newStr.get(), count);
390 }
391 return aRet;
392 }
393
394 const Number natnum4[4] = {
395 { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh], 0,
396 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
397 { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], 0,
398 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
399 { NumberChar_Modern_ja, MultiplierChar_7_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE_67,
400 ExponentCount_7_CJK, MultiplierExponent_7_CJK },
401 { NumberChar_Lower_ko, MultiplierChar_6_CJK[Multiplier_Lower_ko], NUMBER_OMIT_ZERO,
402 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
403 };
404
405 const Number natnum5[4] = {
406 { NumberChar_Upper_zh, MultiplierChar_6_CJK[Multiplier_Upper_zh], 0,
407 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
408 { NumberChar_Upper_zh_TW, MultiplierChar_6_CJK[Multiplier_Upper_zh_TW], 0,
409 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
410 { NumberChar_Traditional_ja, MultiplierChar_7_CJK[Multiplier_Traditional_ja], NUMBER_OMIT_ZERO_ONE_67,
411 ExponentCount_7_CJK, MultiplierExponent_7_CJK },
412 { NumberChar_Upper_ko, MultiplierChar_6_CJK[Multiplier_Upper_ko], 0,
413 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
414 };
415
416 const Number natnum6[4] = {
417 { NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Lower_zh], 0,
418 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
419 { NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], 0,
420 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
421 { NumberChar_FullWidth, MultiplierChar_7_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE_67,
422 ExponentCount_7_CJK, MultiplierExponent_7_CJK },
423 { NumberChar_FullWidth, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ZERO,
424 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
425 };
426
427 const Number natnum7[4] = {
428 { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh], NUMBER_OMIT_ALL,
429 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
430 { NumberChar_Lower_zh, MultiplierChar_6_CJK[Multiplier_Lower_zh_TW], NUMBER_OMIT_ALL,
431 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
432 { NumberChar_Modern_ja, MultiplierChar_2_CJK[Multiplier_Modern_ja], NUMBER_OMIT_ZERO_ONE,
433 ExponentCount_2_CJK, MultiplierExponent_2_CJK },
434 { NumberChar_Lower_ko, MultiplierChar_6_CJK[Multiplier_Lower_ko], NUMBER_OMIT_ALL,
435 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
436 };
437
438 const Number natnum8[4] = {
439 { NumberChar_Upper_zh, MultiplierChar_6_CJK[Multiplier_Upper_zh], NUMBER_OMIT_ALL,
440 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
441 { NumberChar_Upper_zh_TW, MultiplierChar_6_CJK[Multiplier_Upper_zh_TW], NUMBER_OMIT_ALL,
442 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
443 { NumberChar_Traditional_ja, MultiplierChar_2_CJK[Multiplier_Traditional_ja], NUMBER_OMIT_ZERO_ONE,
444 ExponentCount_2_CJK, MultiplierExponent_2_CJK },
445 { NumberChar_Upper_ko, MultiplierChar_6_CJK[Multiplier_Upper_ko], NUMBER_OMIT_ALL,
446 ExponentCount_6_CJK, MultiplierExponent_6_CJK },
447 };
448
449 const Number natnum10 = { NumberChar_Hangul_ko, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ZERO,
450 ExponentCount_6_CJK, MultiplierExponent_6_CJK };
451 const Number natnum11 = { NumberChar_Hangul_ko, MultiplierChar_6_CJK[Multiplier_Hangul_ko], NUMBER_OMIT_ALL,
452 ExponentCount_6_CJK, MultiplierExponent_6_CJK };
453
454 //! ATTENTION: Do not change order of elements!
455 //! Append new languages to the end of the list!
456 const char *natnum1Locales[] = {
457 "zh_CN",
458 "zh_TW",
459 "ja",
460 "ko",
461 "he",
462 "ar",
463 "th",
464 "hi",
465 "or",
466 "mr",
467 "bn",
468 "pa",
469 "gu",
470 "ta",
471 "te",
472 "kn",
473 "ml",
474 "lo",
475 "bo",
476 "my",
477 "km",
478 "mn",
479 "ne",
480 "dz",
481 "fa",
482 "cu"
483 };
484 const sal_Int16 nbOfLocale = SAL_N_ELEMENTS(natnum1Locales);
485
486 //! ATTENTION: Do not change order of elements!
487 //! Number and order must match elements of natnum1Locales!
488 const sal_Int16 natnum1[] = {
489 NumberChar_Lower_zh,
490 NumberChar_Lower_zh,
491 NumberChar_Modern_ja,
492 NumberChar_Lower_ko,
493 NumberChar_he,
494 NumberChar_Indic_ar,
495 NumberChar_th,
496 NumberChar_hi,
497 NumberChar_or,
498 NumberChar_mr,
499 NumberChar_bn,
500 NumberChar_pa,
501 NumberChar_gu,
502 NumberChar_ta,
503 NumberChar_te,
504 NumberChar_kn,
505 NumberChar_ml,
506 NumberChar_lo,
507 NumberChar_bo,
508 NumberChar_my,
509 NumberChar_km,
510 NumberChar_mn,
511 NumberChar_ne,
512 NumberChar_dz,
513 NumberChar_EastIndic_ar,
514 NumberChar_cu
515 };
516 const sal_Int16 sizeof_natnum1 = SAL_N_ELEMENTS(natnum1);
517
518 //! ATTENTION: Do not change order of elements!
519 //! Order must match first elements of natnum1Locales!
520 const sal_Int16 natnum2[] = {
521 NumberChar_Upper_zh,
522 NumberChar_Upper_zh_TW,
523 NumberChar_Traditional_ja,
524 NumberChar_Upper_ko,
525 NumberChar_he
526 };
527 const sal_Int16 sizeof_natnum2 = SAL_N_ELEMENTS(natnum2);
528
getLanguageNumber(const Locale & rLocale)529 sal_Int16 getLanguageNumber( const Locale& rLocale)
530 {
531 // return zh_TW for TW, HK and MO, return zh_CN for other zh locales.
532 if (rLocale.Language == "zh") return MsLangId::isTraditionalChinese(rLocale) ? 1 : 0;
533
534 for (sal_Int16 i = 2; i < nbOfLocale; i++)
535 if (rLocale.Language.equalsAsciiL(natnum1Locales[i], 2))
536 return i;
537
538 return -1;
539 }
540
541 struct Separators
542 {
543 sal_Unicode DecimalSeparator;
544 sal_Unicode ThousandSeparator;
Separatorsi18npool::__anoncfe4ddba0311::Separators545 Separators(const Locale& rLocale)
546 {
547 LocaleDataItem aLocaleItem = LocaleDataImpl::get()->getLocaleItem(rLocale);
548 DecimalSeparator = aLocaleItem.decimalSeparator.toChar();
549 ThousandSeparator = aLocaleItem.thousandSeparator.toChar();
550 }
551 };
552
getLocaleSeparators(const Locale & rLocale,const OUString & rLocStr)553 Separators getLocaleSeparators(const Locale& rLocale, const OUString& rLocStr)
554 {
555 // Guard the static variable below.
556 osl::MutexGuard aGuard(theNatNumMutex::get());
557 // Maximum a couple hundred of pairs with 4-byte structs - so no need for smart managing
558 static std::unordered_map<OUString, Separators> aLocaleSeparatorsBuf;
559 auto it = aLocaleSeparatorsBuf.find(rLocStr);
560 if (it == aLocaleSeparatorsBuf.end())
561 {
562 it = aLocaleSeparatorsBuf.emplace(rLocStr, Separators(rLocale)).first;
563 }
564 return it->second;
565 }
566
getNumberText(const Locale & rLocale,const OUString & rNumberString,std::u16string_view sNumberTextParams)567 OUString getNumberText(const Locale& rLocale, const OUString& rNumberString,
568 std::u16string_view sNumberTextParams)
569 {
570 sal_Int32 i, count = 0;
571 const sal_Int32 len = rNumberString.getLength();
572 const sal_Unicode* src = rNumberString.getStr();
573
574 OUString aLoc = LanguageTag::convertToBcp47(rLocale);
575 Separators aSeparators = getLocaleSeparators(rLocale, aLoc);
576
577 OUStringBuffer sBuf(len);
578 for (i = 0; i < len; i++)
579 {
580 sal_Unicode ch = src[i];
581 if (isNumber(ch))
582 {
583 ++count;
584 sBuf.append(ch);
585 }
586 else if (ch == aSeparators.DecimalSeparator)
587 // Convert any decimal separator to point - in case libnumbertext has a different one
588 // for this locale (it seems that point is supported for all locales in libnumbertext)
589 sBuf.append('.');
590 else if (ch == aSeparators.ThousandSeparator && count > 0)
591 continue;
592 else if (isMinus(ch) && count == 0)
593 sBuf.append(ch);
594 else
595 break;
596 }
597
598 // Handle also month and day names for NatNum12 date formatting
599 const OUString& rNumberStr = (count == 0) ? rNumberString : sBuf.makeStringAndClear();
600
601 // Guard the static variables below.
602 osl::MutexGuard aGuard( theNatNumMutex::get());
603
604 static auto xNumberText
605 = css::linguistic2::NumberText::create(comphelper::getProcessComponentContext());
606 OUString numbertext_prefix;
607 // default "cardinal" gets empty prefix
608 if (!sNumberTextParams.empty() && sNumberTextParams != u"cardinal")
609 numbertext_prefix = OUString::Concat(sNumberTextParams) + " ";
610 // Several hundreds of headings could result typing lags because
611 // of the continuous update of the multiple number names during typing.
612 // We fix this by buffering the result of the conversion.
613 static std::unordered_map<OUString, std::map<OUString, OUString>> aBuff;
614 auto& rItems = aBuff[rNumberStr];
615 auto& rItem = rItems[numbertext_prefix + aLoc];
616 if (rItem.isEmpty())
617 {
618 rItem = xNumberText->getNumberText(numbertext_prefix + rNumberStr, rLocale);
619 // use number at missing number to text conversion
620 if (rItem.isEmpty())
621 rItem = rNumberStr;
622 }
623 OUString sResult = rItem;
624 if (i != 0 && i < len)
625 sResult += rNumberString.subView(i);
626 return sResult;
627 }
628 }
629
getNativeNumberString(const OUString & aNumberString,const Locale & rLocale,sal_Int16 nNativeNumberMode,Sequence<sal_Int32> & offset,const OUString & rNativeNumberParams)630 OUString NativeNumberSupplierService::getNativeNumberString(const OUString& aNumberString, const Locale& rLocale,
631 sal_Int16 nNativeNumberMode,
632 Sequence<sal_Int32>& offset,
633 const OUString& rNativeNumberParams)
634 {
635 if (!isValidNatNum(rLocale, nNativeNumberMode))
636 return aNumberString;
637
638 if (nNativeNumberMode == NativeNumberMode::NATNUM12)
639 {
640 // handle capitalization prefixes "capitalize", "upper", "lower" and "title"
641
642 enum WhichCasing
643 {
644 CAPITALIZE,
645 UPPER,
646 LOWER,
647 TITLE
648 };
649
650 struct CasingEntry
651 {
652 std::u16string_view aLiteral;
653 WhichCasing eCasing;
654 };
655
656 static const CasingEntry Casings[] =
657 {
658 { std::u16string_view(u"capitalize"), CAPITALIZE },
659 { std::u16string_view(u"upper"), UPPER },
660 { std::u16string_view(u"lower"), LOWER },
661 { std::u16string_view(u"title"), TITLE }
662 };
663
664 sal_Int32 nStripCase = 0;
665 size_t nCasing;
666 for (nCasing = 0; nCasing < SAL_N_ELEMENTS(Casings); ++nCasing)
667 {
668 if (rNativeNumberParams.startsWith( Casings[nCasing].aLiteral))
669 {
670 nStripCase = Casings[nCasing].aLiteral.size();
671 break;
672 }
673 }
674
675 if (nStripCase > 0 && (rNativeNumberParams.getLength() == nStripCase ||
676 rNativeNumberParams[nStripCase++] == ' '))
677 {
678 OUString aStr = getNumberText(rLocale, aNumberString, rNativeNumberParams.subView(nStripCase));
679
680 if (!xCharClass.is())
681 xCharClass = CharacterClassification::create(comphelper::getProcessComponentContext());
682
683 switch (Casings[nCasing].eCasing)
684 {
685 case CAPITALIZE:
686 return xCharClass->toTitle(aStr, 0, 1, aLocale) +
687 (aStr.getLength() > 1 ? aStr.copy(1) : OUString());
688 case UPPER:
689 return xCharClass->toUpper(aStr, 0, aStr.getLength(), aLocale);
690 case LOWER:
691 return xCharClass->toLower(aStr, 0, aStr.getLength(), aLocale);
692 case TITLE:
693 return xCharClass->toTitle(aStr, 0, aStr.getLength(), aLocale);
694 }
695 }
696 else
697 {
698 return getNumberText(rLocale, aNumberString, rNativeNumberParams);
699 }
700 }
701
702 sal_Int16 langnum = getLanguageNumber(rLocale);
703 if (langnum == -1)
704 return aNumberString;
705
706 const Number *number = nullptr;
707 sal_Int16 num = -1;
708
709 switch (nNativeNumberMode)
710 {
711 case NativeNumberMode::NATNUM0: // Ascii
712 return NativeToAscii(aNumberString, aNumberString.getLength(), offset, useOffset);
713 case NativeNumberMode::NATNUM1: // Char, Lower
714 num = natnum1[langnum];
715 break;
716 case NativeNumberMode::NATNUM2: // Char, Upper
717 num = natnum2[langnum];
718 break;
719 case NativeNumberMode::NATNUM3: // Char, FullWidth
720 num = NumberChar_FullWidth;
721 break;
722 case NativeNumberMode::NATNUM4: // Text, Lower, Long
723 number = &natnum4[langnum];
724 break;
725 case NativeNumberMode::NATNUM5: // Text, Upper, Long
726 number = &natnum5[langnum];
727 break;
728 case NativeNumberMode::NATNUM6: // Text, FullWidth
729 number = &natnum6[langnum];
730 break;
731 case NativeNumberMode::NATNUM7: // Text. Lower, Short
732 number = &natnum7[langnum];
733 break;
734 case NativeNumberMode::NATNUM8: // Text, Upper, Short
735 number = &natnum8[langnum];
736 break;
737 case NativeNumberMode::NATNUM9: // Char, Hangul
738 num = NumberChar_Hangul_ko;
739 break;
740 case NativeNumberMode::NATNUM10: // Text, Hangul, Long
741 number = &natnum10;
742 break;
743 case NativeNumberMode::NATNUM11: // Text, Hangul, Short
744 number = &natnum11;
745 break;
746 default:
747 break;
748 }
749
750 if (number || num >= 0) {
751 if (aLocale.Language != rLocale.Language ||
752 aLocale.Country != rLocale.Country ||
753 aLocale.Variant != rLocale.Variant) {
754 LocaleDataItem item = LocaleDataImpl::get()->getLocaleItem( rLocale );
755 aLocale = rLocale;
756 DecimalChar[NumberChar_HalfWidth]=item.decimalSeparator.toChar();
757 if (DecimalChar[NumberChar_HalfWidth] > 0x7E || DecimalChar[NumberChar_HalfWidth] < 0x21)
758 DecimalChar[NumberChar_FullWidth]=0xFF0E;
759 else
760 DecimalChar[NumberChar_FullWidth]=DecimalChar[NumberChar_HalfWidth]+0xFEE0;
761 SeparatorChar[NumberChar_HalfWidth]=item.thousandSeparator.toChar();
762 if (SeparatorChar[NumberChar_HalfWidth] > 0x7E || SeparatorChar[NumberChar_HalfWidth] < 0x21)
763 SeparatorChar[NumberChar_FullWidth]=0xFF0C;
764 else
765 SeparatorChar[NumberChar_FullWidth]=SeparatorChar[NumberChar_HalfWidth]+0xFEE0;
766 }
767 if (number)
768 return AsciiToNative( aNumberString, aNumberString.getLength(), offset, useOffset, number );
769 else if (num == NumberChar_he)
770 return getHebrewNativeNumberString(aNumberString,
771 nNativeNumberMode == NativeNumberMode::NATNUM2);
772 else if (num == NumberChar_cu)
773 return getCyrillicNativeNumberString(aNumberString);
774 else
775 return AsciiToNativeChar(aNumberString, aNumberString.getLength(), offset, useOffset, num);
776 }
777 else
778 return aNumberString;
779 }
780
getNativeNumberString(const OUString & aNumberString,const Locale & rLocale,sal_Int16 nNativeNumberMode)781 OUString SAL_CALL NativeNumberSupplierService::getNativeNumberString(const OUString& aNumberString, const Locale& rLocale,
782 sal_Int16 nNativeNumberMode)
783 {
784 Sequence< sal_Int32 > offset;
785 return getNativeNumberString(aNumberString, rLocale, nNativeNumberMode, offset);
786 }
787
getNativeNumberStringParams(const OUString & rNumberString,const css::lang::Locale & rLocale,sal_Int16 nNativeNumberMode,const OUString & rNativeNumberParams)788 OUString SAL_CALL NativeNumberSupplierService::getNativeNumberStringParams(
789 const OUString& rNumberString, const css::lang::Locale& rLocale, sal_Int16 nNativeNumberMode,
790 const OUString& rNativeNumberParams)
791 {
792 Sequence<sal_Int32> offset;
793 return getNativeNumberString(rNumberString, rLocale, nNativeNumberMode, offset, rNativeNumberParams);
794 }
795
getNativeNumberChar(const sal_Unicode inChar,const Locale & rLocale,sal_Int16 nNativeNumberMode)796 sal_Unicode NativeNumberSupplierService::getNativeNumberChar( const sal_Unicode inChar, const Locale& rLocale, sal_Int16 nNativeNumberMode )
797 {
798 if (nNativeNumberMode == NativeNumberMode::NATNUM0) { // Ascii
799 for (const auto & i : NumberChar)
800 for (sal_Int16 j = 0; j < 10; j++)
801 if (inChar == i[j])
802 return j;
803 return inChar;
804 }
805
806 if (!isNumber(inChar))
807 return inChar;
808
809 if (!isValidNatNum(rLocale, nNativeNumberMode))
810 return inChar;
811
812 sal_Int16 langnum = getLanguageNumber(rLocale);
813 if (langnum == -1)
814 return inChar;
815
816 switch (nNativeNumberMode)
817 {
818 case NativeNumberMode::NATNUM1: // Char, Lower
819 case NativeNumberMode::NATNUM4: // Text, Lower, Long
820 case NativeNumberMode::NATNUM7: // Text. Lower, Short
821 return NumberChar[natnum1[langnum]][inChar - NUMBER_ZERO];
822 case NativeNumberMode::NATNUM2: // Char, Upper
823 case NativeNumberMode::NATNUM5: // Text, Upper, Long
824 case NativeNumberMode::NATNUM8: // Text, Upper, Short
825 return NumberChar[natnum2[langnum]][inChar - NUMBER_ZERO];
826 case NativeNumberMode::NATNUM3: // Char, FullWidth
827 case NativeNumberMode::NATNUM6: // Text, FullWidth
828 return NumberChar[NumberChar_FullWidth][inChar - NUMBER_ZERO];
829 case NativeNumberMode::NATNUM9: // Char, Hangul
830 case NativeNumberMode::NATNUM10: // Text, Hangul, Long
831 case NativeNumberMode::NATNUM11: // Text, Hangul, Short
832 return NumberChar[NumberChar_Hangul_ko][inChar - NUMBER_ZERO];
833 default:
834 break;
835 }
836
837 return inChar;
838 }
839
isValidNatNum(const Locale & rLocale,sal_Int16 nNativeNumberMode)840 sal_Bool SAL_CALL NativeNumberSupplierService::isValidNatNum( const Locale& rLocale, sal_Int16 nNativeNumberMode )
841 {
842 sal_Int16 langnum = getLanguageNumber(rLocale);
843
844 switch (nNativeNumberMode) {
845 case NativeNumberMode::NATNUM0: // Ascii
846 case NativeNumberMode::NATNUM3: // Char, FullWidth
847 case NativeNumberMode::NATNUM12: // spell out numbers, dates and money amounts
848 return true;
849 case NativeNumberMode::NATNUM1: // Char, Lower
850 return (langnum >= 0);
851 case NativeNumberMode::NATNUM2: // Char, Upper
852 if (langnum == 4) // Hebrew numbering
853 return true;
854 [[fallthrough]];
855 case NativeNumberMode::NATNUM4: // Text, Lower, Long
856 case NativeNumberMode::NATNUM5: // Text, Upper, Long
857 case NativeNumberMode::NATNUM6: // Text, FullWidth
858 case NativeNumberMode::NATNUM7: // Text. Lower, Short
859 case NativeNumberMode::NATNUM8: // Text, Upper, Short
860 return (langnum >= 0 && langnum < 4); // CJK numbering
861 case NativeNumberMode::NATNUM9: // Char, Hangul
862 case NativeNumberMode::NATNUM10: // Text, Hangul, Long
863 case NativeNumberMode::NATNUM11: // Text, Hangul, Short
864 return (langnum == 3); // Korean numbering
865 }
866 return false;
867 }
868
convertToXmlAttributes(const Locale & rLocale,sal_Int16 nNativeNumberMode)869 NativeNumberXmlAttributes SAL_CALL NativeNumberSupplierService::convertToXmlAttributes( const Locale& rLocale, sal_Int16 nNativeNumberMode )
870 {
871 static const sal_Int16 attShort = 0;
872 static const sal_Int16 attMedium = 1;
873 static const sal_Int16 attLong = 2;
874 static const char *attType[] = { "short", "medium", "long" };
875
876 sal_Int16 number = NumberChar_HalfWidth, type = attShort;
877
878 sal_Int16 langnum = -1;
879 if (isValidNatNum(rLocale, nNativeNumberMode)) {
880 langnum = getLanguageNumber(rLocale);
881 }
882 if (langnum != -1) {
883 switch (nNativeNumberMode) {
884 case NativeNumberMode::NATNUM0: // Ascii
885 number = NumberChar_HalfWidth;
886 type = attShort;
887 break;
888 case NativeNumberMode::NATNUM1: // Char, Lower
889 number = natnum1[langnum];
890 type = attShort;
891 break;
892 case NativeNumberMode::NATNUM2: // Char, Upper
893 number = natnum2[langnum];
894 type = number == NumberChar_he ? attMedium : attShort;
895 break;
896 case NativeNumberMode::NATNUM3: // Char, FullWidth
897 number = NumberChar_FullWidth;
898 type = attShort;
899 break;
900 case NativeNumberMode::NATNUM4: // Text, Lower, Long
901 number = natnum1[langnum];
902 type = attLong;
903 break;
904 case NativeNumberMode::NATNUM5: // Text, Upper, Long
905 number = natnum2[langnum];
906 type = attLong;
907 break;
908 case NativeNumberMode::NATNUM6: // Text, FullWidth
909 number = NumberChar_FullWidth;
910 type = attLong;
911 break;
912 case NativeNumberMode::NATNUM7: // Text. Lower, Short
913 number = natnum1[langnum];
914 type = attMedium;
915 break;
916 case NativeNumberMode::NATNUM8: // Text, Upper, Short
917 number = natnum2[langnum];
918 type = attMedium;
919 break;
920 case NativeNumberMode::NATNUM9: // Char, Hangul
921 number = NumberChar_Hangul_ko;
922 type = attShort;
923 break;
924 case NativeNumberMode::NATNUM10: // Text, Hangul, Long
925 number = NumberChar_Hangul_ko;
926 type = attLong;
927 break;
928 case NativeNumberMode::NATNUM11: // Text, Hangul, Short
929 number = NumberChar_Hangul_ko;
930 type = attMedium;
931 break;
932 default:
933 break;
934 }
935 }
936 return NativeNumberXmlAttributes(rLocale, OUString(&NumberChar[number][1], 1),
937 OUString::createFromAscii(attType[type]));
938 }
939
natNumIn(sal_Int16 num,const sal_Int16 natnum[],sal_Int16 len)940 static bool natNumIn(sal_Int16 num, const sal_Int16 natnum[], sal_Int16 len)
941 {
942 for (sal_Int16 i = 0; i < len; i++)
943 if (natnum[i] == num)
944 return true;
945 return false;
946 }
947
convertFromXmlAttributes(const NativeNumberXmlAttributes & aAttr)948 sal_Int16 SAL_CALL NativeNumberSupplierService::convertFromXmlAttributes( const NativeNumberXmlAttributes& aAttr )
949 {
950 sal_Unicode numberChar[NumberChar_Count];
951 for (sal_Int16 i = 0; i < NumberChar_Count; i++)
952 numberChar[i] = NumberChar[i][1];
953 OUString number(numberChar, NumberChar_Count);
954
955 sal_Int16 num = sal::static_int_cast<sal_Int16>( number.indexOf(aAttr.Format) );
956
957 if ( aAttr.Style == "short" ) {
958 if (num == NumberChar_FullWidth)
959 return NativeNumberMode::NATNUM3;
960 else if (num == NumberChar_Hangul_ko)
961 return NativeNumberMode::NATNUM9;
962 else if (natNumIn(num, natnum1, sizeof_natnum1))
963 return NativeNumberMode::NATNUM1;
964 else if (natNumIn(num, natnum2, sizeof_natnum2))
965 return NativeNumberMode::NATNUM2;
966 } else if ( aAttr.Style == "medium" ) {
967 if (num == NumberChar_Hangul_ko)
968 return NativeNumberMode::NATNUM11;
969 else if (num == NumberChar_he)
970 return NativeNumberMode::NATNUM2;
971 else if (natNumIn(num, natnum1, sizeof_natnum1))
972 return NativeNumberMode::NATNUM7;
973 else if (natNumIn(num, natnum2, sizeof_natnum2))
974 return NativeNumberMode::NATNUM8;
975 } else if ( aAttr.Style == "long" ) {
976 if (num == NumberChar_FullWidth)
977 return NativeNumberMode::NATNUM6;
978 else if (num == NumberChar_Hangul_ko)
979 return NativeNumberMode::NATNUM10;
980 else if (natNumIn(num, natnum1, sizeof_natnum1))
981 return NativeNumberMode::NATNUM4;
982 else if (natNumIn(num, natnum2, sizeof_natnum2))
983 return NativeNumberMode::NATNUM5;
984 } else {
985 throw RuntimeException();
986 }
987 return NativeNumberMode::NATNUM0;
988 }
989
990
991 // Following code generates Hebrew Number,
992 // see numerical system in the Hebrew Numbering System in following link for details,
993 // http://smontagu.org/writings/HebrewNumbers.html
994
995 namespace {
996
997 struct HebrewNumberChar {
998 sal_Unicode code;
999 sal_Int16 value;
1000 };
1001
1002 }
1003
1004 HebrewNumberChar const HebrewNumberCharArray[] = {
1005 { 0x05ea, 400 },
1006 { 0x05ea, 400 },
1007 { 0x05e9, 300 },
1008 { 0x05e8, 200 },
1009 { 0x05e7, 100 },
1010 { 0x05e6, 90 },
1011 { 0x05e4, 80 },
1012 { 0x05e2, 70 },
1013 { 0x05e1, 60 },
1014 { 0x05e0, 50 },
1015 { 0x05de, 40 },
1016 { 0x05dc, 30 },
1017 { 0x05db, 20 },
1018 { 0x05d9, 10 },
1019 { 0x05d8, 9 },
1020 { 0x05d7, 8 },
1021 { 0x05d6, 7 },
1022 { 0x05d5, 6 },
1023 { 0x05d4, 5 },
1024 { 0x05d3, 4 },
1025 { 0x05d2, 3 },
1026 { 0x05d1, 2 },
1027 { 0x05d0, 1 }
1028 };
1029
1030 const sal_Unicode thousand[] = {0x05d0, 0x05dc, 0x05e3, 0x0};
1031 const sal_Unicode thousands[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x0};
1032 const sal_Unicode thousands_last[] = {0x05d0, 0x05dc, 0x05e4, 0x05d9, 0x05dd, 0x0};
1033 const sal_Unicode geresh = 0x05f3;
1034 const sal_Unicode gershayim = 0x05f4;
1035
makeHebrewNumber(sal_Int64 value,OUStringBuffer & output,bool isLast,bool useGeresh)1036 static void makeHebrewNumber(sal_Int64 value, OUStringBuffer& output, bool isLast, bool useGeresh)
1037 {
1038 sal_Int16 num = sal::static_int_cast<sal_Int16>(value % 1000);
1039
1040 if (value > 1000) {
1041 makeHebrewNumber(value / 1000, output, num != 0, useGeresh);
1042 output.append(" ");
1043 }
1044 if (num == 0) {
1045 output.append(value == 1000 ? thousand : isLast ? thousands_last : thousands);
1046 } else {
1047 sal_Int16 nbOfChar = 0;
1048 for (sal_Int32 j = 0; num > 0 && j < sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray)); j++) {
1049 if (num - HebrewNumberCharArray[j].value >= 0) {
1050 nbOfChar++;
1051 // https://en.wikipedia.org/wiki/Hebrew_numerals#Key_exceptions
1052 // By convention, the numbers 15 and 16 are represented as 9 + 6 and 9 + 7
1053 if (num == 15 || num == 16) // substitution for 15 and 16
1054 j++;
1055 assert(j < sal_Int32(SAL_N_ELEMENTS(HebrewNumberCharArray)));
1056 num = sal::static_int_cast<sal_Int16>( num - HebrewNumberCharArray[j].value );
1057 output.append(HebrewNumberCharArray[j].code);
1058 }
1059 }
1060 if (useGeresh) {
1061 if (nbOfChar > 1) // a number is written as more than one character
1062 output.insert(output.getLength() - 1, gershayim);
1063 else if (nbOfChar == 1) // a number is written as a single character
1064 output.append(geresh);
1065 }
1066 }
1067 }
1068
getHebrewNativeNumberString(const OUString & aNumberString,bool useGeresh)1069 OUString getHebrewNativeNumberString(const OUString& aNumberString, bool useGeresh)
1070 {
1071 sal_Int64 value = 0;
1072 sal_Int32 i, count = 0, len = aNumberString.getLength();
1073 const sal_Unicode *src = aNumberString.getStr();
1074
1075 for (i = 0; i < len; i++) {
1076 sal_Unicode ch = src[i];
1077 if (isNumber(ch)) {
1078 if (++count >= 20) // Number is too long, could not be handled.
1079 return aNumberString;
1080 value = value * 10 + (ch - NUMBER_ZERO);
1081 }
1082 else if (isSeparator(ch) && count > 0) continue;
1083 else if (isMinus(ch) && count == 0) continue;
1084 else break;
1085 }
1086
1087 if (value > 0) {
1088 OUStringBuffer output(count*2 + 2 + len - i);
1089
1090 makeHebrewNumber(value, output, true, useGeresh);
1091
1092 if (i < len)
1093 output.append(aNumberString.subView(i));
1094
1095 return output.makeStringAndClear();
1096 }
1097 else
1098 return aNumberString;
1099 }
1100
1101 // Support for Cyrillic Numerals
1102 // See UTN 41 for implementation information
1103 // http://www.unicode.org/notes/tn41/
1104
1105 const sal_Unicode cyrillicThousandsMark = 0x0482;
1106 const sal_Unicode cyrillicTitlo = 0x0483;
1107 const sal_Unicode cyrillicTen = 0x0456;
1108
1109 namespace {
1110
1111 struct CyrillicNumberChar {
1112 sal_Unicode code;
1113 sal_Int16 value;
1114 };
1115
1116 }
1117
1118 CyrillicNumberChar const CyrillicNumberCharArray[] = {
1119 { 0x0446, 900 },
1120 { 0x047f, 800 },
1121 { 0x0471, 700 },
1122 { 0x0445, 600 },
1123 { 0x0444, 500 },
1124 { 0x0443, 400 },
1125 { 0x0442, 300 },
1126 { 0x0441, 200 },
1127 { 0x0440, 100 },
1128 { 0x0447, 90 },
1129 { 0x043f, 80 },
1130 { 0x047b, 70 },
1131 { 0x046f, 60 },
1132 { 0x043d, 50 },
1133 { 0x043c, 40 },
1134 { 0x043b, 30 },
1135 { 0x043a, 20 },
1136 { 0x0456, 10 },
1137 { 0x0473, 9 },
1138 { 0x0438, 8 },
1139 { 0x0437, 7 },
1140 { 0x0455, 6 },
1141 { 0x0454, 5 },
1142 { 0x0434, 4 },
1143 { 0x0433, 3 },
1144 { 0x0432, 2 },
1145 { 0x0430, 1 }
1146 };
1147
makeCyrillicNumber(sal_Int64 value,OUStringBuffer & output,bool addTitlo)1148 static void makeCyrillicNumber(sal_Int64 value, OUStringBuffer& output, bool addTitlo)
1149 {
1150 sal_Int16 num = sal::static_int_cast<sal_Int16>(value % 1000);
1151 if (value >= 1000) {
1152 output.append(cyrillicThousandsMark);
1153 makeCyrillicNumber(value / 1000, output, false);
1154 if (value >= 10000 && (value - 10000) % 1000 != 0) {
1155 output.append(" ");
1156 }
1157 if (value % 1000 == 0)
1158 addTitlo = false;
1159 }
1160
1161 for (sal_Int32 j = 0; num > 0 && j < sal_Int32(SAL_N_ELEMENTS(CyrillicNumberCharArray)); j++) {
1162 if (num < 20 && num > 10) {
1163 num -= 10;
1164 makeCyrillicNumber(num, output, false);
1165 output.append(cyrillicTen);
1166 break;
1167 }
1168
1169 if (CyrillicNumberCharArray[j].value <= num) {
1170 output.append(CyrillicNumberCharArray[j].code);
1171 num = sal::static_int_cast<sal_Int16>( num - CyrillicNumberCharArray[j].value );
1172 }
1173 }
1174
1175 if (!addTitlo)
1176 return;
1177
1178 if (output.getLength() == 1) {
1179 output.append(cyrillicTitlo);
1180 } else if (output.getLength() == 2) {
1181 if (value > 800 && value < 900) {
1182 output.append(cyrillicTitlo);
1183 } else {
1184 output.insert(1, cyrillicTitlo);
1185 }
1186 } else if (output.getLength() > 2) {
1187 if (output.indexOf(" ") == output.getLength() - 2) {
1188 output.append(cyrillicTitlo);
1189 } else {
1190 output.insert(output.getLength() - 1, cyrillicTitlo);
1191 }
1192 }
1193 }
1194
getCyrillicNativeNumberString(const OUString & aNumberString)1195 OUString getCyrillicNativeNumberString(const OUString& aNumberString)
1196 {
1197 sal_Int64 value = 0;
1198 sal_Int32 i, count = 0, len = aNumberString.getLength();
1199 const sal_Unicode *src = aNumberString.getStr();
1200
1201 for (i = 0; i < len; i++) {
1202 sal_Unicode ch = src[i];
1203 if (isNumber(ch)) {
1204 if (++count >= 8) // Number is too long, could not be handled.
1205 return aNumberString;
1206 value = value * 10 + (ch - NUMBER_ZERO);
1207 }
1208 else if (isSeparator(ch) && count > 0) continue;
1209 else if (isMinus(ch) && count == 0) continue;
1210 else break;
1211 }
1212
1213 if (value > 0) {
1214 OUStringBuffer output(count*2 + 2 + len - i);
1215
1216 makeCyrillicNumber(value, output, true);
1217
1218 if (i < len)
1219 output.append(aNumberString.subView(i));
1220
1221 return output.makeStringAndClear();
1222 }
1223 else
1224 return aNumberString;
1225 }
1226
1227 constexpr OUStringLiteral implementationName = u"com.sun.star.i18n.NativeNumberSupplier";
1228
getImplementationName()1229 OUString SAL_CALL NativeNumberSupplierService::getImplementationName()
1230 {
1231 return implementationName;
1232 }
1233
1234 sal_Bool SAL_CALL
supportsService(const OUString & rServiceName)1235 NativeNumberSupplierService::supportsService(const OUString& rServiceName)
1236 {
1237 return cppu::supportsService(this, rServiceName);
1238 }
1239
1240 Sequence< OUString > SAL_CALL
getSupportedServiceNames()1241 NativeNumberSupplierService::getSupportedServiceNames()
1242 {
1243 return {implementationName, "com.sun.star.i18n.NativeNumberSupplier2"};
1244 }
1245
1246 }
1247
1248 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
com_sun_star_i18n_NativeNumberSupplier_get_implementation(css::uno::XComponentContext *,css::uno::Sequence<css::uno::Any> const &)1249 com_sun_star_i18n_NativeNumberSupplier_get_implementation(
1250 css::uno::XComponentContext *,
1251 css::uno::Sequence<css::uno::Any> const &)
1252 {
1253 return cppu::acquire(new i18npool::NativeNumberSupplierService());
1254 }
1255
1256 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
1257