1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <cstdlib>
23 #include <string_view>
24 
25 #include <i18nlangtag/languagetag.hxx>
26 #include <i18nutil/searchopt.hxx>
27 #include <i18nutil/transliteration.hxx>
28 #include <com/sun/star/util/TextSearch2.hpp>
29 #include <com/sun/star/util/SearchAlgorithms2.hpp>
30 #include <com/sun/star/util/SearchFlags.hpp>
31 #include <sal/log.hxx>
32 #include <unotools/charclass.hxx>
33 #include <comphelper/processfactory.hxx>
34 #include <unotools/textsearch.hxx>
35 #include <rtl/instance.hxx>
36 #include <rtl/ustrbuf.hxx>
37 
38 using namespace ::com::sun::star::util;
39 using namespace ::com::sun::star::uno;
40 using namespace ::com::sun::star::lang;
41 
42 namespace utl
43 {
44 
SearchParam(const OUString & rText,SearchType eType,bool bCaseSensitive,sal_uInt32 cWildEscChar,bool bWildMatchSel)45 SearchParam::SearchParam( const OUString &rText,
46                                 SearchType eType,
47                                 bool bCaseSensitive,
48                                 sal_uInt32 cWildEscChar,
49                                 bool bWildMatchSel )
50 {
51     sSrchStr        = rText;
52     m_eSrchType     = eType;
53 
54     m_cWildEscChar  = cWildEscChar;
55 
56     m_bCaseSense    = bCaseSensitive;
57     m_bWildMatchSel = bWildMatchSel;
58 }
59 
SearchParam(const SearchParam & rParam)60 SearchParam::SearchParam( const SearchParam& rParam )
61 {
62     sSrchStr        = rParam.sSrchStr;
63     m_eSrchType     = rParam.m_eSrchType;
64 
65     m_cWildEscChar  = rParam.m_cWildEscChar;
66 
67     m_bCaseSense    = rParam.m_bCaseSense;
68     m_bWildMatchSel = rParam.m_bWildMatchSel;
69 }
70 
~SearchParam()71 SearchParam::~SearchParam() {}
72 
lcl_Equals(const i18nutil::SearchOptions2 & rSO1,const i18nutil::SearchOptions2 & rSO2)73 static bool lcl_Equals( const i18nutil::SearchOptions2& rSO1, const i18nutil::SearchOptions2& rSO2 )
74 {
75     return
76         rSO1.AlgorithmType2 == rSO2.AlgorithmType2 &&
77         rSO1.WildcardEscapeCharacter == rSO2.WildcardEscapeCharacter &&
78         rSO1.algorithmType == rSO2.algorithmType &&
79         rSO1.searchFlag == rSO2.searchFlag &&
80         rSO1.searchString == rSO2.searchString &&
81         rSO1.replaceString == rSO2.replaceString &&
82         rSO1.changedChars == rSO2.changedChars &&
83         rSO1.deletedChars == rSO2.deletedChars &&
84         rSO1.insertedChars == rSO2.insertedChars &&
85         rSO1.Locale.Language == rSO2.Locale.Language &&
86         rSO1.Locale.Country == rSO2.Locale.Country &&
87         rSO1.Locale.Variant == rSO2.Locale.Variant &&
88         rSO1.transliterateFlags == rSO2.transliterateFlags;
89 }
90 
91 namespace
92 {
93     struct CachedTextSearch
94     {
95         ::osl::Mutex mutex;
96         i18nutil::SearchOptions2 Options;
97         css::uno::Reference< css::util::XTextSearch2 > xTextSearch;
98     };
99 
100     struct theCachedTextSearch
101         : public rtl::Static< CachedTextSearch, theCachedTextSearch > {};
102 }
103 
getXTextSearch(const i18nutil::SearchOptions2 & rPara)104 Reference<XTextSearch2> TextSearch::getXTextSearch( const i18nutil::SearchOptions2& rPara )
105 {
106     CachedTextSearch &rCache = theCachedTextSearch::get();
107 
108     osl::MutexGuard aGuard(rCache.mutex);
109 
110     if ( lcl_Equals(rCache.Options, rPara) )
111         return rCache.xTextSearch;
112 
113     Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
114     rCache.xTextSearch.set( ::TextSearch2::create(xContext) );
115     rCache.xTextSearch->setOptions2( rPara.toUnoSearchOptions2() );
116     rCache.Options = rPara;
117 
118     return rCache.xTextSearch;
119 }
120 
TextSearch(const SearchParam & rParam,LanguageType eLang)121 TextSearch::TextSearch(const SearchParam & rParam, LanguageType eLang )
122 {
123     if( LANGUAGE_NONE == eLang )
124         eLang = LANGUAGE_SYSTEM;
125     css::lang::Locale aLocale( LanguageTag::convertToLocale( eLang ) );
126 
127     Init( rParam, aLocale);
128 }
129 
TextSearch(const SearchParam & rParam,const CharClass & rCClass)130 TextSearch::TextSearch(const SearchParam & rParam, const CharClass& rCClass )
131 {
132     Init( rParam, rCClass.getLanguageTag().getLocale() );
133 }
134 
TextSearch(const i18nutil::SearchOptions2 & rPara)135 TextSearch::TextSearch( const i18nutil::SearchOptions2& rPara )
136 {
137     xTextSearch = getXTextSearch( rPara );
138 }
139 
UpgradeToSearchOptions2(const i18nutil::SearchOptions & rOptions)140 i18nutil::SearchOptions2 TextSearch::UpgradeToSearchOptions2( const i18nutil::SearchOptions& rOptions )
141 {
142     sal_Int16 nAlgorithmType2;
143     switch (rOptions.algorithmType)
144     {
145         case SearchAlgorithms_REGEXP:
146             nAlgorithmType2 = SearchAlgorithms2::REGEXP;
147             break;
148         case SearchAlgorithms_APPROXIMATE:
149             nAlgorithmType2 = SearchAlgorithms2::APPROXIMATE;
150             break;
151         case SearchAlgorithms_ABSOLUTE:
152             nAlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
153             break;
154         default:
155             for (;;) std::abort();
156     }
157     // It would be nice if an inherited struct had a ctor that takes an
158     // instance of the object the struct derived from...
159     i18nutil::SearchOptions2 aOptions2(
160             rOptions.algorithmType,
161             rOptions.searchFlag,
162             rOptions.searchString,
163             rOptions.replaceString,
164             rOptions.Locale,
165             rOptions.changedChars,
166             rOptions.deletedChars,
167             rOptions.insertedChars,
168             rOptions.transliterateFlags,
169             nAlgorithmType2,
170             0       // no wildcard search, no escape character...
171             );
172     return aOptions2;
173 }
174 
Init(const SearchParam & rParam,const css::lang::Locale & rLocale)175 void TextSearch::Init( const SearchParam & rParam,
176                         const css::lang::Locale& rLocale )
177 {
178     // convert SearchParam to the UNO SearchOptions2
179     i18nutil::SearchOptions2 aSOpt;
180 
181     switch( rParam.GetSrchType() )
182     {
183     case SearchParam::SearchType::Wildcard:
184         aSOpt.AlgorithmType2 = SearchAlgorithms2::WILDCARD;
185         aSOpt.algorithmType = SearchAlgorithms::SearchAlgorithms_MAKE_FIXED_SIZE;    // no old enum for that
186         aSOpt.WildcardEscapeCharacter = rParam.GetWildEscChar();
187         if (rParam.IsWildMatchSel())
188             aSOpt.searchFlag |= SearchFlags::WILD_MATCH_SELECTION;
189         break;
190 
191     case SearchParam::SearchType::Regexp:
192         aSOpt.AlgorithmType2 = SearchAlgorithms2::REGEXP;
193         aSOpt.algorithmType = SearchAlgorithms_REGEXP;
194         break;
195 
196     case SearchParam::SearchType::Normal:
197         aSOpt.AlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
198         aSOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
199         break;
200 
201     default:
202         for (;;) std::abort();
203     }
204     aSOpt.searchString = rParam.GetSrchStr();
205     aSOpt.replaceString = "";
206     aSOpt.Locale = rLocale;
207     aSOpt.transliterateFlags = TransliterationFlags::NONE;
208     if( !rParam.IsCaseSensitive() )
209     {
210         aSOpt.searchFlag |= SearchFlags::ALL_IGNORE_CASE;
211         aSOpt.transliterateFlags |= TransliterationFlags::IGNORE_CASE;
212     }
213 
214     xTextSearch = getXTextSearch( aSOpt );
215 }
216 
SetLocale(const i18nutil::SearchOptions2 & rOptions,const css::lang::Locale & rLocale)217 void TextSearch::SetLocale( const i18nutil::SearchOptions2& rOptions,
218                             const css::lang::Locale& rLocale )
219 {
220     i18nutil::SearchOptions2 aSOpt( rOptions );
221     aSOpt.Locale = rLocale;
222 
223     xTextSearch = getXTextSearch( aSOpt );
224 }
225 
~TextSearch()226 TextSearch::~TextSearch()
227 {
228 }
229 
230 /*
231  * General search methods. These methods will call the respective
232  * methods, such as ordinary string searching or regular expression
233  * matching, using the method pointer.
234  */
SearchForward(const OUString & rStr,sal_Int32 * pStart,sal_Int32 * pEnd,css::util::SearchResult * pRes)235 bool TextSearch::SearchForward( const OUString &rStr,
236                     sal_Int32* pStart, sal_Int32* pEnd,
237                     css::util::SearchResult* pRes)
238 {
239     bool bRet = false;
240     try
241     {
242         if( xTextSearch.is() )
243         {
244             SearchResult aRet( xTextSearch->searchForward( rStr, *pStart, *pEnd ));
245             if( aRet.subRegExpressions > 0 )
246             {
247                 bRet = true;
248                 // the XTextsearch returns in startOffset the higher position
249                 // and the endposition is always exclusive.
250                 // The caller of this function will have in startPos the
251                 // lower pos. and end
252                 *pStart = aRet.startOffset[ 0 ];
253                 *pEnd = aRet.endOffset[ 0 ];
254                 if( pRes )
255                     *pRes = aRet;
256             }
257         }
258     }
259     catch ( Exception& )
260     {
261         SAL_WARN( "unotools.i18n", "SearchForward: Exception caught!" );
262     }
263     return bRet;
264 }
265 
searchForward(const OUString & rStr)266 bool TextSearch::searchForward( const OUString &rStr )
267 {
268     sal_Int32 pStart = 0;
269     sal_Int32 pEnd = rStr.getLength();
270 
271     bool bResult = SearchForward(rStr, &pStart, &pEnd);
272 
273     return bResult;
274 }
275 
SearchBackward(const OUString & rStr,sal_Int32 * pStart,sal_Int32 * pEnde,SearchResult * pRes)276 bool TextSearch::SearchBackward( const OUString & rStr, sal_Int32* pStart,
277                                 sal_Int32* pEnde, SearchResult* pRes )
278 {
279     bool bRet = false;
280     try
281     {
282         if( xTextSearch.is() )
283         {
284             SearchResult aRet( xTextSearch->searchBackward( rStr, *pStart, *pEnde ));
285             if( aRet.subRegExpressions )
286             {
287                 bRet = true;
288                 // the XTextsearch returns in startOffset the higher position
289                 // and the endposition is always exclusive.
290                 // The caller of this function will have in startPos the
291                 // lower pos. and end
292                 *pEnde = aRet.startOffset[ 0 ];
293                 *pStart = aRet.endOffset[ 0 ];
294                 if( pRes )
295                     *pRes = aRet;
296             }
297         }
298     }
299     catch ( Exception& )
300     {
301         SAL_WARN( "unotools.i18n", "SearchBackward: Exception caught!" );
302     }
303     return bRet;
304 }
305 
ReplaceBackReferences(OUString & rReplaceStr,const OUString & rStr,const SearchResult & rResult) const306 void TextSearch::ReplaceBackReferences( OUString& rReplaceStr, const OUString &rStr, const SearchResult& rResult ) const
307 {
308     if( rResult.subRegExpressions > 0 )
309     {
310         sal_Unicode sFndChar;
311         sal_Int32 i;
312         OUStringBuffer sBuff(rReplaceStr.getLength()*4);
313         for(i = 0; i < rReplaceStr.getLength(); i++)
314         {
315             if( rReplaceStr[i] == '&')
316             {
317                 sal_Int32 nStart = rResult.startOffset[0];
318                 sal_Int32 nLength = rResult.endOffset[0] - rResult.startOffset[0];
319                 sBuff.append(std::u16string_view(rStr).substr(nStart, nLength));
320             }
321             else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '$')
322             {
323                 sFndChar = rReplaceStr[ i + 1 ];
324                 switch(sFndChar)
325                 {   // placeholder for a backward reference?
326                 case '0':
327                 case '1':
328                 case '2':
329                 case '3':
330                 case '4':
331                 case '5':
332                 case '6':
333                 case '7':
334                 case '8':
335                 case '9':
336                     {
337                         int j = sFndChar - '0'; // index
338                         if(j < rResult.subRegExpressions)
339                         {
340                             sal_Int32 nSttReg = rResult.startOffset[j];
341                             sal_Int32 nRegLen = rResult.endOffset[j];
342                             if (nSttReg < 0 || nRegLen < 0) // A "not found" optional capture
343                             {
344                                 nSttReg = nRegLen = 0; // Copy empty string
345                             }
346                             else if (nRegLen >= nSttReg)
347                             {
348                                 nRegLen = nRegLen - nSttReg;
349                             }
350                             else
351                             {
352                                 nRegLen = nSttReg - nRegLen;
353                                 nSttReg = rResult.endOffset[j];
354                             }
355                             // Copy reference from found string
356                             sBuff.append(std::u16string_view(rStr).substr(nSttReg, nRegLen));
357                         }
358                         i += 1;
359                     }
360                     break;
361                 default:
362                     sBuff.append(rReplaceStr[i]);
363                     sBuff.append(rReplaceStr[i+1]);
364                     i += 1;
365                     break;
366                 }
367             }
368             else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '\\')
369             {
370                 sFndChar = rReplaceStr[ i+1 ];
371                 switch(sFndChar)
372                 {
373                 case '\\':
374                 case '&':
375                 case '$':
376                     sBuff.append(sFndChar);
377                     i+=1;
378                     break;
379                 case 't':
380                     sBuff.append('\t');
381                     i += 1;
382                     break;
383                 default:
384                     sBuff.append(rReplaceStr[i]);
385                     sBuff.append(rReplaceStr[i+1]);
386                     i += 1;
387                     break;
388                 }
389             }
390             else
391             {
392                 sBuff.append(rReplaceStr[i]);
393             }
394         }
395         rReplaceStr = sBuff.makeStringAndClear();
396     }
397 }
398 
399 }   // namespace utl
400 
401 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
402