1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <com/sun/star/uno/Reference.h>
21 #include <cppuhelper/factory.hxx>
22 #include <cppuhelper/supportsservice.hxx>
23 #include <com/sun/star/lang/XSingleServiceFactory.hpp>
24 #include <com/sun/star/registry/XRegistryKey.hpp>
25 #include <com/sun/star/beans/XPropertySet.hpp>
26 #include <com/sun/star/linguistic2/LinguServiceManager.hpp>
27 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
28 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
29 #include <i18nlangtag/languagetag.hxx>
30 #include <tools/debug.hxx>
31 #include <comphelper/lok.hxx>
32 #include <comphelper/processfactory.hxx>
33 #include <comphelper/sequence.hxx>
34 #include <osl/mutex.hxx>
35 #include <osl/thread.h>
36 #include <unotools/pathoptions.hxx>
37 #include <unotools/lingucfg.hxx>
38 #include <unotools/resmgr.hxx>
39
40 #include <rtl/string.hxx>
41 #include <rtl/ustrbuf.hxx>
42 #include <rtl/textenc.h>
43
44 #include <svtools/strings.hrc>
45
46 #include "nthesimp.hxx"
47 #include <linguistic/misc.hxx>
48 #include <linguistic/lngprops.hxx>
49 #include "nthesdta.hxx"
50
51 #include <vector>
52 #include <numeric>
53 #include <set>
54 #include <string.h>
55
56 // XML-header to query SPELLML support
57 #define SPELLML_SUPPORT "<?xml?>"
58
59 using namespace osl;
60 using namespace com::sun::star;
61 using namespace com::sun::star::beans;
62 using namespace com::sun::star::lang;
63 using namespace com::sun::star::uno;
64 using namespace com::sun::star::linguistic2;
65 using namespace linguistic;
66
GetLngSvcMgr_Impl()67 static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl()
68 {
69 uno::Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() );
70 uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ;
71 return xRes;
72 }
73
Thesaurus()74 Thesaurus::Thesaurus() :
75 aEvtListeners ( GetLinguMutex() )
76 {
77 bDisposing = false;
78 pPropHelper = nullptr;
79 prevLocale = LANGUAGE_DONTKNOW;
80 }
81
~Thesaurus()82 Thesaurus::~Thesaurus()
83 {
84 mvThesInfo.clear();
85 if (pPropHelper)
86 {
87 pPropHelper->RemoveAsPropListener();
88 }
89 }
90
GetPropHelper_Impl()91 PropertyHelper_Thesaurus& Thesaurus::GetPropHelper_Impl()
92 {
93 if (!pPropHelper)
94 {
95 Reference< XLinguProperties > xPropSet = GetLinguProperties();
96
97 pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet );
98 pPropHelper->AddAsPropListener(); //! after a reference is established
99 }
100 return *pPropHelper;
101 }
102
getLocales()103 Sequence< Locale > SAL_CALL Thesaurus::getLocales()
104 {
105 MutexGuard aGuard( GetLinguMutex() );
106
107 // this routine should return the locales supported by the installed
108 // dictionaries.
109 if (mvThesInfo.empty())
110 {
111 SvtLinguConfig aLinguCfg;
112
113 // get list of dictionaries-to-use
114 std::vector< SvtLinguConfigDictionaryEntry > aDics;
115 uno::Sequence< OUString > aFormatList;
116 aLinguCfg.GetSupportedDictionaryFormatsFor( "Thesauri",
117 "org.openoffice.lingu.new.Thesaurus", aFormatList );
118 for (const auto& rFormat : std::as_const(aFormatList))
119 {
120 std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
121 aLinguCfg.GetActiveDictionariesByFormat( rFormat ) );
122 aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
123 }
124
125 //!! for compatibility with old dictionaries (the ones not using extensions
126 //!! or new configuration entries, but still using the dictionary.lst file)
127 //!! Get the list of old style spell checking dictionaries to use...
128 std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
129 GetOldStyleDics( "THES" ) );
130
131 // to prefer dictionaries with configuration entries we will only
132 // use those old style dictionaries that add a language that
133 // is not yet supported by the list of new style dictionaries
134 MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
135
136 if (!aDics.empty())
137 {
138 // get supported locales from the dictionaries-to-use...
139 std::set<OUString> aLocaleNamesSet;
140 for (auto const& dict : aDics)
141 {
142 for (const auto& rLocaleName : dict.aLocaleNames)
143 {
144 if (!comphelper::LibreOfficeKit::isWhitelistedLanguage(rLocaleName))
145 continue;
146
147 aLocaleNamesSet.insert( rLocaleName );
148 }
149 }
150 // ... and add them to the resulting sequence
151 std::vector<Locale> aLocalesVec;
152 aLocalesVec.reserve(aLocaleNamesSet.size());
153
154 std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec),
155 [](const OUString& localeName) -> Locale { return LanguageTag::convertToLocale(localeName); });
156
157 aSuppLocales = comphelper::containerToSequence(aLocalesVec);
158
159 //! For each dictionary and each locale we need a separate entry.
160 //! If this results in more than one dictionary per locale than (for now)
161 //! it is undefined which dictionary gets used.
162 //! In the future the implementation should support using several dictionaries
163 //! for one locale.
164 sal_Int32 numthes = std::accumulate(aDics.begin(), aDics.end(), 0,
165 [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
166 return nSum + dict.aLocaleNames.getLength(); });
167
168 // add dictionary information
169 mvThesInfo.resize(numthes);
170
171 sal_Int32 k = 0;
172 for (auto const& dict : aDics)
173 {
174 if (dict.aLocaleNames.hasElements() &&
175 dict.aLocations.hasElements())
176 {
177 // currently only one language per dictionary is supported in the actual implementation...
178 // Thus here we work-around this by adding the same dictionary several times.
179 // Once for each of its supported locales.
180 for (const auto& rLocaleName : dict.aLocaleNames)
181 {
182 LanguageTag aLanguageTag(rLocaleName);
183 mvThesInfo[k].aEncoding = RTL_TEXTENCODING_DONTKNOW;
184 mvThesInfo[k].aLocale = aLanguageTag.getLocale();
185 mvThesInfo[k].aCharSetInfo.reset( new CharClass( aLanguageTag ) );
186 // also both files have to be in the same directory and the
187 // file names must only differ in the extension (.aff/.dic).
188 // Thus we use the first location only and strip the extension part.
189 OUString aLocation = dict.aLocations[0];
190 sal_Int32 nPos = aLocation.lastIndexOf( '.' );
191 aLocation = aLocation.copy( 0, nPos );
192 mvThesInfo[k].aName = aLocation;
193
194 ++k;
195 }
196 }
197 }
198 DBG_ASSERT( k == numthes, "index mismatch?" );
199 }
200 else
201 {
202 /* no dictionary found so register no dictionaries */
203 mvThesInfo.clear();
204 aSuppLocales.realloc(0);
205 }
206 }
207
208 return aSuppLocales;
209 }
210
hasLocale(const Locale & rLocale)211 sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale)
212 {
213 MutexGuard aGuard( GetLinguMutex() );
214
215 if (!aSuppLocales.hasElements())
216 getLocales();
217
218 return comphelper::findValue(aSuppLocales, rLocale) != -1;
219 }
220
queryMeanings(const OUString & qTerm,const Locale & rLocale,const css::uno::Sequence<css::beans::PropertyValue> & rProperties)221 Sequence < Reference < css::linguistic2::XMeaning > > SAL_CALL Thesaurus::queryMeanings(
222 const OUString& qTerm, const Locale& rLocale,
223 const css::uno::Sequence< css::beans::PropertyValue >& rProperties)
224 {
225 MutexGuard aGuard( GetLinguMutex() );
226
227 uno::Sequence< Reference< XMeaning > > aMeanings( 1 );
228 uno::Sequence< Reference< XMeaning > > noMeanings( 0 );
229 uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() );
230 uno::Reference< XSpellChecker1 > xSpell;
231
232 OUString aRTerm(qTerm);
233 OUString aPTerm(qTerm);
234 CapType ct = CapType::UNKNOWN;
235 sal_Int32 stem = 0;
236 sal_Int32 stem2 = 0;
237
238 LanguageType nLanguage = LinguLocaleToLanguage( rLocale );
239
240 if (LinguIsUnspecified( nLanguage) || aRTerm.isEmpty())
241 return noMeanings;
242
243 if (!hasLocale( rLocale ))
244 #ifdef LINGU_EXCEPTIONS
245 throw( IllegalArgumentException() );
246 #else
247 return noMeanings;
248 #endif
249
250 if (prevTerm == qTerm && prevLocale == nLanguage)
251 return prevMeanings;
252
253 mentry * pmean = nullptr;
254 sal_Int32 nmean = 0;
255
256 PropertyHelper_Thesaurus &rHelper = GetPropHelper();
257 rHelper.SetTmpPropVals( rProperties );
258
259 MyThes * pTH = nullptr;
260 rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
261 CharClass * pCC = nullptr;
262
263 // find the first thesaurus that matches the locale
264 for (size_t i =0; i < mvThesInfo.size(); i++)
265 {
266 if (rLocale == mvThesInfo[i].aLocale)
267 {
268 // open up and initialize this thesaurus if need be
269 if (!mvThesInfo[i].aThes)
270 {
271 OUString datpath = mvThesInfo[i].aName + ".dat";
272 OUString idxpath = mvThesInfo[i].aName + ".idx";
273 OUString ndat;
274 OUString nidx;
275 osl::FileBase::getSystemPathFromFileURL(datpath,ndat);
276 osl::FileBase::getSystemPathFromFileURL(idxpath,nidx);
277
278 #if defined(_WIN32)
279 // MyThes waits UTF-8 encoded paths with \\?\ long path prefix.
280 OString aTmpidx = Win_AddLongPathPrefix(OUStringToOString(nidx, RTL_TEXTENCODING_UTF8));
281 OString aTmpdat = Win_AddLongPathPrefix(OUStringToOString(ndat, RTL_TEXTENCODING_UTF8));
282 #else
283 OString aTmpidx(OU2ENC(nidx,osl_getThreadTextEncoding()));
284 OString aTmpdat(OU2ENC(ndat,osl_getThreadTextEncoding()));
285 #endif
286
287 mvThesInfo[i].aThes.reset( new MyThes(aTmpidx.getStr(),aTmpdat.getStr()) );
288 mvThesInfo[i].aEncoding = getTextEncodingFromCharset(mvThesInfo[i].aThes->get_th_encoding());
289 }
290 pTH = mvThesInfo[i].aThes.get();
291 eEnc = mvThesInfo[i].aEncoding;
292 pCC = mvThesInfo[i].aCharSetInfo.get();
293
294 if (pTH)
295 break;
296 }
297 }
298
299 // we don't want to work with a default text encoding since following incorrect
300 // results may occur only for specific text and thus may be hard to notice.
301 // Thus better always make a clean exit here if the text encoding is in question.
302 // Hopefully something not working at all will raise proper attention quickly. ;-)
303 DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
304 if (eEnc == RTL_TEXTENCODING_DONTKNOW)
305 return noMeanings;
306
307 while (pTH)
308 {
309 // convert word to all lower case for searching
310 if (!stem)
311 ct = capitalType(aRTerm, pCC);
312 OUString nTerm(makeLowerCase(aRTerm, pCC));
313 OString aTmp( OU2ENC(nTerm, eEnc) );
314 nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean);
315
316 if (nmean)
317 aMeanings.realloc( nmean );
318
319 mentry * pe = pmean;
320 OUString codeTerm = qTerm;
321 Reference< XSpellAlternatives > xTmpRes2;
322
323 if (stem)
324 {
325 xTmpRes2 = xSpell->spell( "<?xml?><query type='analyze'><word>" +
326 aPTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
327 if (xTmpRes2.is())
328 {
329 Sequence<OUString>seq = xTmpRes2->getAlternatives();
330 if (seq.hasElements())
331 {
332 codeTerm = seq[0];
333 stem2 = 1;
334 }
335 }
336 }
337
338 for (int j = 0; j < nmean; j++)
339 {
340 int count = pe->count;
341 if (count)
342 {
343 Sequence< OUString > aStr( count );
344 OUString *pStr = aStr.getArray();
345
346 for (int i=0; i < count; i++)
347 {
348 OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),eEnc );
349 sal_Int32 catpos = sTerm.indexOf('(');
350 OUString catst;
351 if (catpos > 2)
352 {
353 // remove category name for affixation and casing
354 catst = " " + sTerm.copy(catpos);
355 sTerm = sTerm.copy(0, catpos);
356 sTerm = sTerm.trim();
357 }
358 // generate synonyms with affixes
359 if (stem && stem2)
360 {
361 Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='generate'><word>" +
362 sTerm + "</word>" + codeTerm + "</query>", static_cast<sal_uInt16>(nLanguage), rProperties );
363 if (xTmpRes.is())
364 {
365 Sequence<OUString>seq = xTmpRes->getAlternatives();
366 if (seq.hasElements())
367 sTerm = seq[0];
368 }
369 }
370
371 CapType ct1 = capitalType(sTerm, pCC);
372 if (CapType::MIXED == ct1)
373 ct = ct1;
374 OUString cTerm;
375 switch (ct)
376 {
377 case CapType::ALLCAP:
378 cTerm = makeUpperCase(sTerm, pCC);
379 break;
380 case CapType::INITCAP:
381 cTerm = makeInitCap(sTerm, pCC);
382 break;
383 default:
384 cTerm = sTerm;
385 break;
386 }
387 OUString aAlt( cTerm + catst);
388 pStr[i] = aAlt;
389 }
390 Meaning * pMn = new Meaning(aRTerm);
391 OUString dTerm(pe->defn,strlen(pe->defn),eEnc );
392 pMn->SetMeaning(dTerm);
393 pMn->SetSynonyms(aStr);
394 Reference<XMeaning>* pMeaning = aMeanings.getArray();
395 pMeaning[j] = pMn;
396 }
397 pe++;
398 }
399 pTH->CleanUpAfterLookup(&pmean,nmean);
400
401 if (nmean)
402 {
403 prevTerm = qTerm;
404 prevMeanings = aMeanings;
405 prevLocale = nLanguage;
406 return aMeanings;
407 }
408
409 if (stem || !xLngSvcMgr.is())
410 return noMeanings;
411 stem = 1;
412
413 xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
414 if (!xSpell.is() || !xSpell->isValid( SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage), rProperties ))
415 return noMeanings;
416 Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" +
417 aRTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
418 if (xTmpRes.is())
419 {
420 Sequence<OUString>seq = xTmpRes->getAlternatives();
421 if (seq.hasElements())
422 {
423 aRTerm = seq[0]; // XXX Use only the first stem
424 continue;
425 }
426 }
427
428 // stem the last word of the synonym (for categories after affixation)
429 aRTerm = aRTerm.trim();
430 sal_Int32 pos = aRTerm.lastIndexOf(' ');
431 if (!pos)
432 return noMeanings;
433 xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" +
434 aRTerm.copy(pos + 1) + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
435 if (xTmpRes.is())
436 {
437 Sequence<OUString>seq = xTmpRes->getAlternatives();
438 if (seq.hasElements())
439 {
440 aPTerm = aRTerm.copy(pos + 1);
441 aRTerm = aRTerm.copy(0, pos + 1) + seq[0];
442 #if 0
443 for (int i = 0; i < seq.getLength(); i++)
444 {
445 OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8);
446 fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
447 }
448 #endif
449 continue;
450 }
451 }
452 break;
453 }
454 return noMeanings;
455 }
456
457 /// @throws Exception
Thesaurus_CreateInstance(const Reference<XMultiServiceFactory> &)458 static Reference< XInterface > Thesaurus_CreateInstance(
459 const Reference< XMultiServiceFactory > & /*rSMgr*/ )
460 {
461 Reference< XInterface > xService = static_cast<cppu::OWeakObject*>(new Thesaurus);
462 return xService;
463 }
464
getServiceDisplayName(const Locale & rLocale)465 OUString SAL_CALL Thesaurus::getServiceDisplayName(const Locale& rLocale)
466 {
467 std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
468 return Translate::get(STR_DESCRIPTION_MYTHES, loc);
469 }
470
initialize(const Sequence<Any> & rArguments)471 void SAL_CALL Thesaurus::initialize( const Sequence< Any >& rArguments )
472 {
473 MutexGuard aGuard( GetLinguMutex() );
474
475 if (!pPropHelper)
476 {
477 sal_Int32 nLen = rArguments.getLength();
478 if (1 == nLen)
479 {
480 Reference< XLinguProperties > xPropSet;
481 rArguments.getConstArray()[0] >>= xPropSet;
482
483 //! Pointer allows for access of the non-UNO functions.
484 //! And the reference to the UNO-functions while increasing
485 //! the ref-count and will implicitly free the memory
486 //! when the object is no longer used.
487 pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet );
488 pPropHelper->AddAsPropListener(); //! after a reference is established
489 }
490 else
491 OSL_FAIL( "wrong number of arguments in sequence" );
492 }
493 }
494
makeLowerCase(const OUString & aTerm,CharClass const * pCC)495 OUString Thesaurus::makeLowerCase(const OUString& aTerm, CharClass const * pCC)
496 {
497 if (pCC)
498 return pCC->lowercase(aTerm);
499 return aTerm;
500 }
501
makeUpperCase(const OUString & aTerm,CharClass const * pCC)502 OUString Thesaurus::makeUpperCase(const OUString& aTerm, CharClass const * pCC)
503 {
504 if (pCC)
505 return pCC->uppercase(aTerm);
506 return aTerm;
507 }
508
makeInitCap(const OUString & aTerm,CharClass const * pCC)509 OUString Thesaurus::makeInitCap(const OUString& aTerm, CharClass const * pCC)
510 {
511 sal_Int32 tlen = aTerm.getLength();
512 if (pCC && tlen)
513 {
514 OUString bTemp = aTerm.copy(0,1);
515 if (tlen > 1)
516 {
517 return ( pCC->uppercase(bTemp, 0, 1)
518 + pCC->lowercase(aTerm,1,(tlen-1)) );
519 }
520
521 return pCC->uppercase(bTemp, 0, 1);
522 }
523 return aTerm;
524 }
525
dispose()526 void SAL_CALL Thesaurus::dispose()
527 {
528 MutexGuard aGuard( GetLinguMutex() );
529
530 if (!bDisposing)
531 {
532 bDisposing = true;
533 EventObject aEvtObj( static_cast<XThesaurus *>(this) );
534 aEvtListeners.disposeAndClear( aEvtObj );
535 if (pPropHelper)
536 {
537 pPropHelper->RemoveAsPropListener();
538 delete pPropHelper;
539 pPropHelper = nullptr;
540 }
541 }
542 }
543
addEventListener(const Reference<XEventListener> & rxListener)544 void SAL_CALL Thesaurus::addEventListener( const Reference< XEventListener >& rxListener )
545 {
546 MutexGuard aGuard( GetLinguMutex() );
547
548 if (!bDisposing && rxListener.is())
549 aEvtListeners.addInterface( rxListener );
550 }
551
removeEventListener(const Reference<XEventListener> & rxListener)552 void SAL_CALL Thesaurus::removeEventListener( const Reference< XEventListener >& rxListener )
553 {
554 MutexGuard aGuard( GetLinguMutex() );
555
556 if (!bDisposing && rxListener.is())
557 aEvtListeners.removeInterface( rxListener );
558 }
559
560 // Service specific part
getImplementationName()561 OUString SAL_CALL Thesaurus::getImplementationName()
562 {
563 return getImplementationName_Static();
564 }
565
supportsService(const OUString & ServiceName)566 sal_Bool SAL_CALL Thesaurus::supportsService( const OUString& ServiceName )
567 {
568 return cppu::supportsService(this, ServiceName);
569 }
570
getSupportedServiceNames()571 Sequence< OUString > SAL_CALL Thesaurus::getSupportedServiceNames()
572 {
573 return getSupportedServiceNames_Static();
574 }
575
getSupportedServiceNames_Static()576 Sequence< OUString > Thesaurus::getSupportedServiceNames_Static()
577 throw()
578 {
579 Sequence< OUString > aSNS { SN_THESAURUS };
580 return aSNS;
581 }
582
583 extern "C"
584 {
lnth_component_getFactory(const sal_Char * pImplName,void * pServiceManager,void *)585 SAL_DLLPUBLIC_EXPORT void * lnth_component_getFactory(
586 const sal_Char * pImplName, void * pServiceManager, void * /*pRegistryKey*/ )
587 {
588 void * pRet = nullptr;
589 if ( Thesaurus::getImplementationName_Static().equalsAscii( pImplName ) )
590 {
591
592 Reference< XSingleServiceFactory > xFactory =
593 cppu::createOneInstanceFactory(
594 static_cast< XMultiServiceFactory * >( pServiceManager ),
595 Thesaurus::getImplementationName_Static(),
596 Thesaurus_CreateInstance,
597 Thesaurus::getSupportedServiceNames_Static());
598 // acquire, because we return an interface pointer instead of a reference
599 xFactory->acquire();
600 pRet = xFactory.get();
601 }
602 return pRet;
603 }
604 }
605
606 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
607