1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <comphelper/processfactory.hxx>
21 #include <i18nlangtag/languagetag.hxx>
22 #include <i18nutil/unicode.hxx>
23 #include <sot/formats.hxx>
24 #include <sfx2/mieclip.hxx>
25 #include <com/sun/star/i18n/CalendarFieldIndex.hpp>
26 #include <sal/log.hxx>
27 #include <unotools/charclass.hxx>
28 #include <osl/module.hxx>
29 
30 #include <global.hxx>
31 #include <docsh.hxx>
32 #include <undoblk.hxx>
33 #include <rangenam.hxx>
34 #include <tabvwsh.hxx>
35 #include <filter.hxx>
36 #include <asciiopt.hxx>
37 #include <formulacell.hxx>
38 #include <cellform.hxx>
39 #include <progress.hxx>
40 #include <scitems.hxx>
41 #include <editable.hxx>
42 #include <compiler.hxx>
43 #include <warnbox.hxx>
44 #include <clipparam.hxx>
45 #include <impex.hxx>
46 #include <editutil.hxx>
47 #include <patattr.hxx>
48 #include <docpool.hxx>
49 #include <stringutil.hxx>
50 #include <cellvalue.hxx>
51 #include <tokenarray.hxx>
52 #include <documentimport.hxx>
53 #include <refundo.hxx>
54 #include <mtvelements.hxx>
55 
56 #include <globstr.hrc>
57 #include <scresid.hxx>
58 #include <o3tl/safeint.hxx>
59 #include <tools/svlibrary.h>
60 #include <unotools/configmgr.hxx>
61 #include <vcl/svapp.hxx>
62 #include <vcl/weld.hxx>
63 #include <editeng/editobj.hxx>
64 
65 #include <memory>
66 #include <string_view>
67 
68 #include <osl/endian.h>
69 
70 // We don't want to end up with 2GB read in one line just because of malformed
71 // multiline fields, so chop it _somewhere_, which is twice supported columns
72 // times arbitrary maximum cell content length, 2*1024*64K=128M, and because
73 // it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of
74 // luck anyway.
75 constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16;
76 constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit;
77 
78 namespace
79 {
80     const char SYLK_LF[]  = "\x1b :";
81 
lcl_IsEndianSwap(const SvStream & rStrm)82     bool lcl_IsEndianSwap( const SvStream& rStrm )
83     {
84     #ifdef OSL_BIGENDIAN
85         return rStrm.GetEndian() != SvStreamEndian::BIG;
86     #else
87         return rStrm.GetEndian() != SvStreamEndian::LITTLE;
88     #endif
89     }
90 }
91 
92 namespace {
93 
94 enum class SylkVersion
95 {
96     SCALC3,    // Wrote wrongly quoted strings and unescaped semicolons.
97     OOO32,     // Correct strings, plus multiline content.
98     OWN,       // Place our new versions, if any, before this value.
99     OTHER      // Assume that aliens wrote correct strings.
100 };
101 
102 }
103 
104 // Whole document without Undo
ScImportExport(ScDocument & r)105 ScImportExport::ScImportExport( ScDocument& r )
106     : pDocSh( dynamic_cast< ScDocShell* >(r.GetDocumentShell()) ), rDoc( r ),
107       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
108       cSep( '\t' ), cStr( '"' ),
109       bFormulas( false ), bIncludeFiltered( true ),
110       bAll( true ), bSingle( true ), bUndo( false ),
111       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
112       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
113       mExportTextOptions()
114 {
115     pUndoDoc = nullptr;
116     pExtOptions = nullptr;
117 }
118 
119 // Insert am current cell without range(es)
ScImportExport(ScDocument & r,const ScAddress & rPt)120 ScImportExport::ScImportExport( ScDocument& r, const ScAddress& rPt )
121     : pDocSh( dynamic_cast< ScDocShell* >(r.GetDocumentShell()) ), rDoc( r ),
122       aRange( rPt ),
123       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
124       cSep( '\t' ), cStr( '"' ),
125       bFormulas( false ), bIncludeFiltered( true ),
126       bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
127       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
128       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
129       mExportTextOptions()
130 {
131     pUndoDoc = nullptr;
132     pExtOptions = nullptr;
133 }
134 
135 //  ctor with a range is only used for export
136 //! ctor with a string (and bSingle=true) is also used for DdeSetData
ScImportExport(ScDocument & r,const ScRange & rRange)137 ScImportExport::ScImportExport( ScDocument& r, const ScRange& rRange )
138     : pDocSh( dynamic_cast<ScDocShell* >(r.GetDocumentShell()) ), rDoc( r ),
139       aRange( rRange ),
140       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
141       cSep( '\t' ), cStr( '"' ),
142       bFormulas( false ), bIncludeFiltered( true ),
143       bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ),
144       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
145       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
146       mExportTextOptions()
147 {
148     pUndoDoc = nullptr;
149     pExtOptions = nullptr;
150     // Only one sheet (table) supported
151     aRange.aEnd.SetTab( aRange.aStart.Tab() );
152 }
153 
154 // Evaluate input string - either range, cell or the whole document (when error)
155 // If a View exists, the TabNo of the view will be used.
ScImportExport(ScDocument & r,const OUString & rPos)156 ScImportExport::ScImportExport( ScDocument& r, const OUString& rPos )
157     : pDocSh( dynamic_cast< ScDocShell* >(r.GetDocumentShell()) ), rDoc( r ),
158       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
159       cSep( '\t' ), cStr( '"' ),
160       bFormulas( false ), bIncludeFiltered( true ),
161       bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
162       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
163       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
164       mExportTextOptions()
165 {
166     pUndoDoc = nullptr;
167     pExtOptions = nullptr;
168 
169     SCTAB nTab = ScDocShell::GetCurTab();
170     aRange.aStart.SetTab( nTab );
171     OUString aPos( rPos );
172     // Named range?
173     ScRangeName* pRange = rDoc.GetRangeName();
174     if (pRange)
175     {
176         const ScRangeData* pData = pRange->findByUpperName(ScGlobal::getCharClassPtr()->uppercase(aPos));
177         if (pData)
178         {
179             if( pData->HasType( ScRangeData::Type::RefArea )
180                 || pData->HasType( ScRangeData::Type::AbsArea )
181                 || pData->HasType( ScRangeData::Type::AbsPos ) )
182             {
183                 pData->GetSymbol(aPos);
184             }
185         }
186     }
187     formula::FormulaGrammar::AddressConvention eConv = rDoc.GetAddressConvention();
188     // Range?
189     if (aRange.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
190         bSingle = false;
191     // Cell?
192     else if (aRange.aStart.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
193         aRange.aEnd = aRange.aStart;
194     else
195         bAll = true;
196 }
197 
~ScImportExport()198 ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE
199 {
200     pUndoDoc.reset();
201     pExtOptions.reset();
202 }
203 
SetExtOptions(const ScAsciiOptions & rOpt)204 void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt )
205 {
206     if ( pExtOptions )
207         *pExtOptions = rOpt;
208     else
209         pExtOptions.reset(new ScAsciiOptions( rOpt ));
210 
211     //  "normal" Options
212 
213     cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false);
214     cStr = rOpt.GetTextSep();
215 }
216 
SetFilterOptions(const OUString & rFilterOptions)217 void ScImportExport::SetFilterOptions(const OUString& rFilterOptions)
218 {
219     maFilterOptions = rFilterOptions;
220 }
221 
IsFormatSupported(SotClipboardFormatId nFormat)222 bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat )
223 {
224     return nFormat == SotClipboardFormatId::STRING
225               || nFormat == SotClipboardFormatId::STRING_TSVC
226               || nFormat == SotClipboardFormatId::SYLK
227               || nFormat == SotClipboardFormatId::LINK
228               || nFormat == SotClipboardFormatId::HTML
229               || nFormat == SotClipboardFormatId::HTML_SIMPLE
230               || nFormat == SotClipboardFormatId::DIF;
231 }
232 
233 // Prepare for Undo
StartPaste()234 bool ScImportExport::StartPaste()
235 {
236     if ( !bAll )
237     {
238         ScEditableTester aTester( rDoc, aRange );
239         if ( !aTester.IsEditable() )
240         {
241             std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(ScDocShell::GetActiveDialogParent(),
242                                                           VclMessageType::Info, VclButtonsType::Ok,
243                                                           ScResId(aTester.GetMessageId())));
244             xInfoBox->run();
245             return false;
246         }
247     }
248     if( bUndo && pDocSh && rDoc.IsUndoEnabled())
249     {
250         pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO ));
251         pUndoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
252         rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc);
253     }
254     return true;
255 }
256 
257 // Create Undo/Redo actions, Invalidate/Repaint
EndPaste(bool bAutoRowHeight)258 void ScImportExport::EndPaste(bool bAutoRowHeight)
259 {
260     bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight(
261                     aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() );
262 
263     if( pUndoDoc && rDoc.IsUndoEnabled() && pDocSh )
264     {
265         ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO ));
266         pRedoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
267         rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc);
268         ScMarkData aDestMark(pRedoDoc->GetSheetLimits());
269         aDestMark.SetMarkArea(aRange);
270         pDocSh->GetUndoManager()->AddUndoAction(
271             std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr));
272     }
273     pUndoDoc.reset();
274     if( pDocSh )
275     {
276         if (!bHeight)
277             pDocSh->PostPaint( aRange, PaintPartFlags::Grid );
278         pDocSh->SetDocumentModified();
279     }
280     ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell();
281     if ( pViewSh )
282         pViewSh->UpdateInputHandler();
283 
284 }
285 
ExportData(const OUString & rMimeType,css::uno::Any & rValue)286 bool ScImportExport::ExportData( const OUString& rMimeType,
287                                  css::uno::Any & rValue )
288 {
289     SvMemoryStream aStrm;
290     // mba: no BaseURL for data exchange
291     if( ExportStream( aStrm, OUString(),
292                 SotExchange::GetFormatIdFromMimeType( rMimeType ) ))
293     {
294         aStrm.WriteUChar( 0 );
295         rValue <<= css::uno::Sequence< sal_Int8 >(
296                                         static_cast<sal_Int8 const *>(aStrm.GetData()),
297                                         aStrm.TellEnd() );
298         return true;
299     }
300     return false;
301 }
302 
ImportString(const OUString & rText,SotClipboardFormatId nFmt)303 bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt )
304 {
305     switch ( nFmt )
306     {
307         // formats supporting unicode
308         case SotClipboardFormatId::STRING :
309         case SotClipboardFormatId::STRING_TSVC :
310         {
311             ScImportStringStream aStrm( rText);
312             return ImportStream( aStrm, OUString(), nFmt );
313             // ImportStream must handle RTL_TEXTENCODING_UNICODE
314         }
315         default:
316         {
317             rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
318             OString aTmp( rText.getStr(), rText.getLength(), eEnc );
319             SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(char), StreamMode::READ );
320             aStrm.SetStreamCharSet( eEnc );
321             SetNoEndianSwap( aStrm );       //! no swapping in memory
322             return ImportStream( aStrm, OUString(), nFmt );
323         }
324     }
325 }
326 
ExportString(OUString & rText,SotClipboardFormatId nFmt)327 bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt )
328 {
329     if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC )
330     {
331         SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]");
332         rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
333         OString aTmp;
334         bool bOk = ExportByteString( aTmp, eEnc, nFmt );
335         rText = OStringToOUString( aTmp, eEnc );
336         return bOk;
337     }
338     //  nSizeLimit not needed for OUString
339 
340     SvMemoryStream aStrm;
341     aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
342     SetNoEndianSwap( aStrm );       //! no swapping in memory
343     // mba: no BaseURL for data exc
344     if( ExportStream( aStrm, OUString(), nFmt ) )
345     {
346         aStrm.WriteUInt16( 0 );
347         rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) );
348         return true;
349     }
350     rText.clear();
351     return false;
352 
353     // ExportStream must handle RTL_TEXTENCODING_UNICODE
354 }
355 
ExportByteString(OString & rText,rtl_TextEncoding eEnc,SotClipboardFormatId nFmt)356 bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt )
357 {
358     OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" );
359     if ( eEnc == RTL_TEXTENCODING_UNICODE )
360         eEnc = osl_getThreadTextEncoding();
361 
362     if (!nSizeLimit)
363         nSizeLimit = SAL_MAX_UINT16;
364 
365     SvMemoryStream aStrm;
366     aStrm.SetStreamCharSet( eEnc );
367     SetNoEndianSwap( aStrm );       //! no swapping in memory
368     // mba: no BaseURL for data exchange
369     if( ExportStream( aStrm, OUString(), nFmt ) )
370     {
371         aStrm.WriteChar( 0 );
372         if( aStrm.TellEnd() <= nSizeLimit )
373         {
374             rText = static_cast<const char*>(aStrm.GetData());
375             return true;
376         }
377     }
378     rText.clear();
379     return false;
380 }
381 
ImportStream(SvStream & rStrm,const OUString & rBaseURL,SotClipboardFormatId nFmt)382 bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
383 {
384     if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
385     {
386         if( ExtText2Doc( rStrm ) )      // evaluate pExtOptions
387             return true;
388     }
389     if( nFmt == SotClipboardFormatId::SYLK )
390     {
391         if( Sylk2Doc( rStrm ) )
392             return true;
393     }
394     if( nFmt == SotClipboardFormatId::DIF )
395     {
396         if( Dif2Doc( rStrm ) )
397             return true;
398     }
399     if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
400     {
401         if( RTF2Doc( rStrm, rBaseURL ) )
402             return true;
403     }
404     if( nFmt == SotClipboardFormatId::LINK )
405         return true;            // Link-Import?
406     if ( nFmt == SotClipboardFormatId::HTML )
407     {
408         if( HTML2Doc( rStrm, rBaseURL ) )
409             return true;
410     }
411     if ( nFmt == SotClipboardFormatId::HTML_SIMPLE )
412     {
413         MSE40HTMLClipFormatObj aMSE40ClpObj;                // needed to skip the header data
414         SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm );
415         if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) )
416             return true;
417     }
418 
419     return false;
420 }
421 
ExportStream(SvStream & rStrm,const OUString & rBaseURL,SotClipboardFormatId nFmt)422 bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
423 {
424     if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
425     {
426         if( Doc2Text( rStrm ) )
427             return true;
428     }
429     if( nFmt == SotClipboardFormatId::SYLK )
430     {
431         if( Doc2Sylk( rStrm ) )
432             return true;
433     }
434     if( nFmt == SotClipboardFormatId::DIF )
435     {
436         if( Doc2Dif( rStrm ) )
437             return true;
438     }
439     if( nFmt == SotClipboardFormatId::LINK && !bAll )
440     {
441         OUString aDocName;
442         if ( rDoc.IsClipboard() )
443             aDocName = ScGlobal::GetClipDocName();
444         else
445         {
446             SfxObjectShell* pShell = rDoc.GetDocumentShell();
447             if (pShell)
448                 aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME );
449         }
450 
451         OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" );
452         if( !aDocName.isEmpty() )
453         {
454             // Always use Calc A1 syntax for paste link.
455             OUString aRefName;
456             ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D;
457             if( bSingle )
458                 aRefName = aRange.aStart.Format(nFlags, &rDoc, formula::FormulaGrammar::CONV_OOO);
459             else
460             {
461                 if( aRange.aStart.Tab() != aRange.aEnd.Tab() )
462                     nFlags |= ScRefFlags::TAB2_3D;
463                 aRefName = aRange.Format(rDoc, nFlags, formula::FormulaGrammar::CONV_OOO);
464             }
465             OUString aAppName = Application::GetAppName();
466 
467             // extra bits are used to tell the client to prefer external
468             // reference link.
469 
470             WriteUnicodeOrByteString( rStrm, aAppName, true );
471             WriteUnicodeOrByteString( rStrm, aDocName, true );
472             WriteUnicodeOrByteString( rStrm, aRefName, true );
473             WriteUnicodeOrByteString( rStrm, "calc:extref", true );
474             if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE )
475                 rStrm.WriteUInt16( 0 );
476             else
477                 rStrm.WriteChar( 0 );
478             return rStrm.GetError() == ERRCODE_NONE;
479         }
480     }
481     if( nFmt == SotClipboardFormatId::HTML )
482     {
483         if( Doc2HTML( rStrm, rBaseURL ) )
484             return true;
485     }
486     if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
487     {
488         if( Doc2RTF( rStrm ) )
489             return true;
490     }
491 
492     return false;
493 }
494 
WriteUnicodeOrByteString(SvStream & rStrm,const OUString & rString,bool bZero)495 void ScImportExport::WriteUnicodeOrByteString( SvStream& rStrm, const OUString& rString, bool bZero )
496 {
497     rtl_TextEncoding eEnc = rStrm.GetStreamCharSet();
498     if ( eEnc == RTL_TEXTENCODING_UNICODE )
499     {
500         if ( !lcl_IsEndianSwap( rStrm ) )
501             rStrm.WriteBytes(rString.getStr(), rString.getLength() * sizeof(sal_Unicode));
502         else
503         {
504             const sal_Unicode* p = rString.getStr();
505             const sal_Unicode* const pStop = p + rString.getLength();
506             while ( p < pStop )
507             {
508                 rStrm.WriteUInt16( *p );
509             }
510         }
511         if ( bZero )
512             rStrm.WriteUInt16( 0 );
513     }
514     else
515     {
516         OString aByteStr(OUStringToOString(rString, eEnc));
517         rStrm.WriteOString( aByteStr );
518         if ( bZero )
519             rStrm.WriteChar( 0 );
520     }
521 }
522 
523 // This function could be replaced by endlub()
WriteUnicodeOrByteEndl(SvStream & rStrm)524 void ScImportExport::WriteUnicodeOrByteEndl( SvStream& rStrm )
525 {
526     if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE )
527     {   // same as endl() but unicode
528         switch ( rStrm.GetLineDelimiter() )
529         {
530             case LINEEND_CR :
531                 rStrm.WriteUInt16( '\r' );
532             break;
533             case LINEEND_LF :
534                 rStrm.WriteUInt16( '\n' );
535             break;
536             default:
537                 rStrm.WriteUInt16( '\r' ).WriteUInt16( '\n' );
538         }
539     }
540     else
541         endl( rStrm );
542 }
543 
CountVisualWidth(const OUString & rStr,sal_Int32 & nIdx,sal_Int32 nMaxWidth)544 sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth)
545 {
546     sal_Int32 nWidth = 0;
547     while(nIdx < rStr.getLength() && nWidth < nMaxWidth)
548     {
549         sal_uInt32 nCode = rStr.iterateCodePoints(&nIdx);
550 
551         if (unicode::isCJKIVSCharacter(nCode) || (nCode >= 0x3000 && nCode <= 0x303F))
552             nWidth += 2;
553         else if (!unicode::isIVSSelector(nCode))
554             nWidth += 1;
555     }
556 
557     if (nIdx < rStr.getLength())
558     {
559         sal_Int32 nTmpIdx = nIdx;
560         sal_uInt32 nCode = rStr.iterateCodePoints(&nTmpIdx);
561 
562         if (unicode::isIVSSelector(nCode))
563             nIdx = nTmpIdx;
564     }
565     return nWidth;
566 }
567 
CountVisualWidth(const OUString & rStr)568 sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr)
569 {
570     sal_Int32 nIdx = 0;
571     return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32);
572 }
573 
SetNoEndianSwap(SvStream & rStrm)574 void ScImportExport::SetNoEndianSwap( SvStream& rStrm )
575 {
576 #ifdef OSL_BIGENDIAN
577     rStrm.SetEndian( SvStreamEndian::BIG );
578 #else
579     rStrm.SetEndian( SvStreamEndian::LITTLE );
580 #endif
581 }
582 
583 namespace {
584 
585 enum QuoteType
586 {
587     FIELDSTART_QUOTE,
588     FIRST_QUOTE,
589     SECOND_QUOTE,
590     FIELDEND_QUOTE,
591     DONTKNOW_QUOTE
592 };
593 
594 }
595 
596 /** Determine if *p is a quote that ends a quoted field.
597 
598     Precondition: we are parsing a quoted field already and *p is a quote.
599 
600     @return
601         FIELDEND_QUOTE if end of field quote
602         DONTKNOW_QUOTE anything else
603  */
lcl_isFieldEndQuote(const sal_Unicode * p,const sal_Unicode * pSeps,sal_Unicode & rcDetectSep)604 static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep )
605 {
606     // Due to broken CSV generators that don't double embedded quotes check if
607     // a field separator immediately or with trailing spaces follows the quote,
608     // only then end the field, or at end of string.
609     constexpr sal_Unicode cBlank = ' ';
610     if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank))
611         return FIELDEND_QUOTE;
612     // Detect a possible blank separator if it's not already in the list (which
613     // was checked right above for p[1]==cBlank).
614     const bool bBlankSep = (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank);
615     while (p[1] == cBlank)
616         ++p;
617     if (!p[1] || ScGlobal::UnicodeStrChr( pSeps, p[1]))
618         return FIELDEND_QUOTE;
619     // Extended separator detection after a closing quote (with or without
620     // blanks). Note that nQuotes is incremented *after* the call so is not yet
621     // even here, and that with separator detection we reach here only if
622     // lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or
623     // SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have
624     // to be checked.
625     if (!rcDetectSep)
626     {
627         constexpr sal_Unicode vSep[] = { ',', '\t', ';' };
628         for (const sal_Unicode c : vSep)
629         {
630             if (p[1] == c)
631             {
632                 rcDetectSep = c;
633                 return FIELDEND_QUOTE;
634             }
635         }
636     }
637     // Blank separator is least significant, after others.
638     if (bBlankSep)
639     {
640         rcDetectSep = cBlank;
641         return FIELDEND_QUOTE;
642     }
643     return DONTKNOW_QUOTE;
644 }
645 
646 /** Determine if *p is a quote that is escaped by being doubled or ends a
647     quoted field.
648 
649     Precondition: *p is a quote.
650 
651     @param nQuotes
652         Quote characters encountered so far.
653         Odd (after opening quote) means either no embedded quotes or only quote
654         pairs so far.
655         Even means either not in a quoted field or already one quote
656         encountered, the first of a pair.
657 
658     @return
659         FIELDSTART_QUOTE if first quote in a field, either starting content or
660                             embedded so caller should check beforehand.
661         FIRST_QUOTE      if first of a doubled quote
662         SECOND_QUOTE     if second of a doubled quote
663         FIELDEND_QUOTE   if end of field quote
664         DONTKNOW_QUOTE   if an unescaped quote we don't consider as end of field,
665                             do not increment nQuotes in caller then!
666  */
lcl_isEscapedOrFieldEndQuote(sal_Int32 nQuotes,const sal_Unicode * p,const sal_Unicode * pSeps,sal_Unicode cStr,sal_Unicode & rcDetectSep)667 static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p,
668         const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep )
669 {
670     if ((nQuotes & 1) == 0)
671     {
672         if (p[-1] == cStr)
673             return SECOND_QUOTE;
674         else
675         {
676             SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?");
677             return FIELDSTART_QUOTE;
678         }
679     }
680     if (p[1] == cStr)
681         return FIRST_QUOTE;
682     return lcl_isFieldEndQuote( p, pSeps, rcDetectSep);
683 }
684 
685 /** Append characters of [p1,p2) to rField.
686 
687     @returns TRUE if ok; FALSE if data overflow, truncated
688  */
lcl_appendLineData(OUString & rField,const sal_Unicode * p1,const sal_Unicode * p2)689 static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 )
690 {
691     if (rField.getLength() + (p2 - p1) <= nArbitraryCellLengthLimit)
692     {
693         rField += std::u16string_view( p1, sal::static_int_cast<sal_Int32>( p2 - p1 ) );
694         return true;
695     }
696     else
697     {
698         SAL_WARN( "sc", "lcl_appendLineData: data overflow");
699         rField += std::u16string_view( p1, nArbitraryCellLengthLimit - rField.getLength() );
700         return false;
701     }
702 }
703 
704 namespace {
705 
706 enum class DoubledQuoteMode
707 {
708     KEEP_ALL,   // both are taken, additionally start and end quote are included in string
709     ESCAPE,     // escaped quote, one is taken, one ignored
710 };
711 
712 }
713 
lcl_ScanString(const sal_Unicode * p,OUString & rString,const sal_Unicode * pSeps,sal_Unicode cStr,DoubledQuoteMode eMode,bool & rbOverflowCell)714 static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rString,
715             const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell )
716 {
717     if (eMode != DoubledQuoteMode::KEEP_ALL)
718         p++;    //! jump over opening quote
719     bool bCont;
720     do
721     {
722         bCont = false;
723         const sal_Unicode* p0 = p;
724         for( ;; )
725         {
726             if( !*p )
727                 break;
728             if( *p == cStr )
729             {
730                 if ( *++p != cStr )
731                 {
732                     // break or continue for loop
733                     if (eMode == DoubledQuoteMode::ESCAPE)
734                     {
735                         sal_Unicode cDetectSep = 0xffff;    // No separator detection here.
736                         if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE)
737                             break;
738                         else
739                             continue;
740                     }
741                     else
742                         break;
743                 }
744                 // doubled quote char
745                 switch ( eMode )
746                 {
747                     case DoubledQuoteMode::KEEP_ALL :
748                         p++;            // both for us (not breaking for-loop)
749                     break;
750                     case DoubledQuoteMode::ESCAPE :
751                         p++;            // one for us (breaking for-loop)
752                         bCont = true;   // and more
753                     break;
754                 }
755                 if ( eMode == DoubledQuoteMode::ESCAPE )
756                     break;
757             }
758             else
759                 p++;
760         }
761         if ( p0 < p )
762         {
763             if (!lcl_appendLineData( rString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p)))
764                 rbOverflowCell = true;
765         }
766     } while ( bCont );
767     return p;
768 }
769 
lcl_UnescapeSylk(OUString & rString,SylkVersion eVersion)770 static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion )
771 {
772     // Older versions didn't escape the semicolon.
773     // Older versions quoted the string and doubled embedded quotes, but not
774     // the semicolons, which was plain wrong.
775     if (eVersion >= SylkVersion::OOO32)
776         rString = rString.replaceAll(";;", ";");
777     else
778         rString = rString.replaceAll("\"\"", "\"");
779 
780     rString = rString.replaceAll(SYLK_LF, "\n");
781 }
782 
lcl_ScanSylkString(const sal_Unicode * p,OUString & rString,SylkVersion eVersion)783 static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p,
784         OUString& rString, SylkVersion eVersion )
785 {
786     const sal_Unicode* pStartQuote = p;
787     const sal_Unicode* pEndQuote = nullptr;
788     while( *(++p) )
789     {
790         if( *p == '"' )
791         {
792             pEndQuote = p;
793             if (eVersion >= SylkVersion::OOO32)
794             {
795                 if (*(p+1) == ';')
796                 {
797                     if (*(p+2) == ';')
798                     {
799                         p += 2;     // escaped ';'
800                         pEndQuote = nullptr;
801                     }
802                     else
803                         break;      // end field
804                 }
805             }
806             else
807             {
808                 if (*(p+1) == '"')
809                 {
810                     ++p;            // escaped '"'
811                     pEndQuote = nullptr;
812                 }
813                 else if (*(p+1) == ';')
814                     break;          // end field
815             }
816         }
817     }
818     if (!pEndQuote)
819         pEndQuote = p;  // Take all data as string.
820     rString += std::u16string_view(pStartQuote + 1, sal::static_int_cast<sal_Int32>( pEndQuote - pStartQuote - 1 ) );
821     lcl_UnescapeSylk( rString, eVersion);
822     return p;
823 }
824 
lcl_ScanSylkFormula(const sal_Unicode * p,OUString & rString,SylkVersion eVersion)825 static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p,
826         OUString& rString, SylkVersion eVersion )
827 {
828     const sal_Unicode* pStart = p;
829     if (eVersion >= SylkVersion::OOO32)
830     {
831         while (*p)
832         {
833             if (*p == ';')
834             {
835                 if (*(p+1) == ';')
836                     ++p;        // escaped ';'
837                 else
838                     break;      // end field
839             }
840             ++p;
841         }
842         rString += std::u16string_view( pStart, sal::static_int_cast<sal_Int32>( p - pStart));
843         lcl_UnescapeSylk( rString, eVersion);
844     }
845     else
846     {
847         // Nasty. If in old versions the formula contained a semicolon, it was
848         // quoted and embedded quotes were doubled, but semicolons were not. If
849         // there was no semicolon, it could still contain quotes and doubled
850         // embedded quotes if it was something like ="a""b", which was saved as
851         // E"a""b" as is and has to be preserved, even if older versions
852         // couldn't even load it correctly. However, theoretically another
853         // field might follow and thus the line contain a semicolon again, such
854         // as ...;E"a""b";...
855         bool bQuoted = false;
856         if (*p == '"')
857         {
858             // May be a quoted expression or just a string constant expression
859             // with quotes.
860             while (*(++p))
861             {
862                 if (*p == '"')
863                 {
864                     if (*(p+1) == '"')
865                         ++p;            // escaped '"'
866                     else
867                         break;          // closing '"', had no ';' yet
868                 }
869                 else if (*p == ';')
870                 {
871                     bQuoted = true;     // ';' within quoted expression
872                     break;
873                 }
874             }
875             p = pStart;
876         }
877         if (bQuoted)
878             p = lcl_ScanSylkString( p, rString, eVersion);
879         else
880         {
881             while (*p && *p != ';')
882                 ++p;
883             rString += std::u16string_view( pStart, sal::static_int_cast<sal_Int32>( p - pStart));
884         }
885     }
886     return p;
887 }
888 
lcl_DoubleEscapeChar(OUString & rString,sal_Unicode cStr)889 static void lcl_DoubleEscapeChar( OUString& rString, sal_Unicode cStr )
890 {
891     sal_Int32 n = 0;
892     while( ( n = rString.indexOf( cStr, n ) ) != -1 )
893     {
894         rString = rString.replaceAt( n, 0, OUString(cStr) );
895         n += 2;
896     }
897 }
898 
lcl_WriteString(SvStream & rStrm,OUString & rString,sal_Unicode cQuote,sal_Unicode cEsc)899 static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc )
900 {
901     if (cEsc)
902         lcl_DoubleEscapeChar( rString, cEsc );
903 
904     if (cQuote)
905     {
906         rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote);
907     }
908 
909     ScImportExport::WriteUnicodeOrByteString( rStrm, rString );
910 }
911 
lcl_WriteSimpleString(SvStream & rStrm,const OUString & rString)912 static void lcl_WriteSimpleString( SvStream& rStrm, const OUString& rString )
913 {
914     ScImportExport::WriteUnicodeOrByteString( rStrm, rString );
915 }
916 
Text2Doc(SvStream & rStrm)917 bool ScImportExport::Text2Doc( SvStream& rStrm )
918 {
919     bool bOk = true;
920 
921     sal_Unicode pSeps[2];
922     pSeps[0] = cSep;
923     pSeps[1] = 0;
924 
925     ScSetStringParam aSetStringParam;
926     aSetStringParam.mbCheckLinkFormula = true;
927 
928     SCCOL nStartCol = aRange.aStart.Col();
929     SCROW nStartRow = aRange.aStart.Row();
930     SCCOL nEndCol = aRange.aEnd.Col();
931     SCROW nEndRow = aRange.aEnd.Row();
932     sal_uLong  nOldPos = rStrm.Tell();
933     rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
934     bool   bData = !bSingle;
935     if( !bSingle)
936         bOk = StartPaste();
937 
938     while( bOk )
939     {
940         OUString aLine;
941         OUString aCell;
942         SCROW nRow = nStartRow;
943         rStrm.Seek( nOldPos );
944         for( ;; )
945         {
946             rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit );
947             // tdf#125440 When inserting tab separated string, consider quotes as field markers
948             DoubledQuoteMode mode = aLine.indexOf("\t") >= 0 ? DoubledQuoteMode::ESCAPE : DoubledQuoteMode::KEEP_ALL;
949             if( rStrm.eof() )
950                 break;
951             SCCOL nCol = nStartCol;
952             const sal_Unicode* p = aLine.getStr();
953             while( *p )
954             {
955                 aCell.clear();
956                 const sal_Unicode* q = p;
957                 while (*p && *p != cSep)
958                 {
959                     // Always look for a pairing quote and ignore separator in between.
960                     while (*p && *p == cStr)
961                         q = p = lcl_ScanString( p, aCell, pSeps, cStr, mode, bOverflowCell );
962                     // All until next separator or quote.
963                     while (*p && *p != cSep && *p != cStr)
964                         ++p;
965                     if (!lcl_appendLineData( aCell, q, p))
966                         bOverflowCell = true;   // display warning on import
967                     q = p;
968                 }
969                 if (*p)
970                     ++p;
971                 if (rDoc.ValidCol(nCol) && rDoc.ValidRow(nRow) )
972                 {
973                     if( bSingle )
974                     {
975                         if (nCol>nEndCol) nEndCol = nCol;
976                         if (nRow>nEndRow) nEndRow = nRow;
977                     }
978                     if( bData && nCol <= nEndCol && nRow <= nEndRow )
979                         rDoc.SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam );
980                 }
981                 else                            // too many columns/rows
982                 {
983                     if (!rDoc.ValidRow(nRow))
984                         bOverflowRow = true;    // display warning on import
985                     if (!rDoc.ValidCol(nCol))
986                         bOverflowCol = true;    // display warning on import
987                 }
988                 ++nCol;
989             }
990             ++nRow;
991         }
992 
993         if( !bData )
994         {
995             aRange.aEnd.SetCol( nEndCol );
996             aRange.aEnd.SetRow( nEndRow );
997             bOk = StartPaste();
998             bData = true;
999         }
1000         else
1001             break;
1002     }
1003 
1004     EndPaste();
1005     if (bOk && mbImportBroadcast)
1006     {
1007         rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
1008         pDocSh->PostDataChanged();
1009     }
1010 
1011     return bOk;
1012 }
1013 
1014 //  Extended Ascii-Import
1015 
lcl_PutString(ScDocumentImport & rDocImport,bool bUseDocImport,SCCOL nCol,SCROW nRow,SCTAB nTab,const OUString & rStr,sal_uInt8 nColFormat,SvNumberFormatter * pFormatter,bool bDetectNumFormat,bool bSkipEmptyCells,const::utl::TransliterationWrapper & rTransliteration,CalendarWrapper & rCalendar,const::utl::TransliterationWrapper * pSecondTransliteration,CalendarWrapper * pSecondCalendar)1016 static bool lcl_PutString(
1017     ScDocumentImport& rDocImport, bool bUseDocImport,
1018     SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat,
1019     SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bSkipEmptyCells,
1020     const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar,
1021     const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar )
1022 {
1023     ScDocument& rDoc = rDocImport.getDoc();
1024     bool bMultiLine = false;
1025     if ( nColFormat == SC_COL_SKIP || !rDoc.ValidCol(nCol) || !rDoc.ValidRow(nRow) )
1026         return bMultiLine;
1027     if ( rStr.isEmpty() )
1028     {
1029         if ( !bSkipEmptyCells )
1030         {   // delete destination cell
1031             if ( bUseDocImport )
1032                 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr );
1033             else
1034                 rDoc.SetString( nCol, nRow, nTab, rStr );
1035         }
1036         return false;
1037     }
1038 
1039     if ( nColFormat == SC_COL_TEXT )
1040     {
1041         double fDummy;
1042         sal_uInt32 nIndex = 0;
1043         if (pFormatter->IsNumberFormat(rStr, nIndex, fDummy))
1044         {
1045             // Set the format of this cell to Text.
1046             sal_uInt32 nFormat = pFormatter->GetStandardFormat(SvNumFormatType::TEXT);
1047             ScPatternAttr aNewAttrs(rDoc.GetPool());
1048             SfxItemSet& rSet = aNewAttrs.GetItemSet();
1049             rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) );
1050             rDoc.ApplyPattern(nCol, nRow, nTab, aNewAttrs);
1051         }
1052         if ( bUseDocImport )
1053         {
1054             if(ScStringUtil::isMultiline(rStr))
1055             {
1056                 ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
1057                 rEngine.SetTextCurrentDefaults(rStr);
1058                 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1059                 return true;
1060             }
1061             else
1062             {
1063                 rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr);
1064                 return false;
1065             }
1066         } else
1067         {
1068             rDoc.SetTextCell(ScAddress(nCol, nRow, nTab), rStr);
1069             return bMultiLine;
1070         }
1071     }
1072 
1073     if ( nColFormat == SC_COL_ENGLISH )
1074     {
1075         //! SetString with Extra-Flag ???
1076 
1077         SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1078         sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US);
1079         double fVal;
1080         if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) )
1081         {
1082             // Numberformat will not be set to English
1083             if ( bUseDocImport )
1084                 rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal );
1085             else
1086                 rDoc.SetValue( nCol, nRow, nTab, fVal );
1087             return bMultiLine;
1088         }
1089         // else, continue with SetString
1090     }
1091     else if ( nColFormat != SC_COL_STANDARD ) // Datumformats
1092     {
1093         const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t
1094         const sal_Int32 nLen = rStr.getLength();
1095         sal_Int32 nStart[nMaxNumberParts];
1096         sal_Int32 nEnd[nMaxNumberParts];
1097 
1098         sal_uInt16 nDP, nMP, nYP;
1099         switch ( nColFormat )
1100         {
1101             case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; break;
1102             case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; break;
1103             case SC_COL_DMY:
1104             default:         nDP = 0; nMP = 1; nYP = 2; break;
1105         }
1106 
1107         sal_uInt16 nFound = 0;
1108         bool bInNum = false;
1109         for ( sal_Int32 nPos=0; nPos<nLen && (bInNum ||
1110                     nFound<nMaxNumberParts); nPos++ )
1111         {
1112             if (bInNum && nFound == 3 && nColFormat == SC_COL_YMD &&
1113                     nPos <= nStart[nFound]+2 && rStr[nPos] == 'T')
1114                 bInNum = false;     // ISO-8601: YYYY-MM-DDThh:mm...
1115             else if ((((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1))
1116                         && ScGlobal::getCharClassPtr()->isLetterNumeric( rStr, nPos))
1117                     || ScGlobal::getCharClassPtr()->isDigit( rStr, nPos))
1118             {
1119                 if (!bInNum)
1120                 {
1121                     bInNum = true;
1122                     nStart[nFound] = nPos;
1123                     ++nFound;
1124                 }
1125                 nEnd[nFound-1] = nPos;
1126             }
1127             else
1128                 bInNum = false;
1129         }
1130 
1131         if ( nFound == 1 )
1132         {
1133             //  try to break one number (without separators) into date fields
1134 
1135             sal_Int32 nDateStart = nStart[0];
1136             sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart;
1137 
1138             if ( nDateLen >= 5 && nDateLen <= 8 &&
1139                     ScGlobal::getCharClassPtr()->isNumeric( rStr.copy( nDateStart, nDateLen ) ) )
1140             {
1141                 //  6 digits: 2 each for day, month, year
1142                 //  8 digits: 4 for year, 2 each for day and month
1143                 //  5 or 7 digits: first field is shortened by 1
1144 
1145                 bool bLongYear = ( nDateLen >= 7 );
1146                 bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 );
1147 
1148                 sal_uInt16 nFieldStart = nDateStart;
1149                 for (sal_uInt16 nPos=0; nPos<3; nPos++)
1150                 {
1151                     sal_uInt16 nFieldEnd = nFieldStart + 1;     // default: 2 digits
1152                     if ( bLongYear && nPos == nYP )
1153                         nFieldEnd += 2;                     // 2 extra digits for long year
1154                     if ( bShortFirst && nPos == 0 )
1155                         --nFieldEnd;                        // first field shortened?
1156 
1157                     nStart[nPos] = nFieldStart;
1158                     nEnd[nPos]   = nFieldEnd;
1159                     nFieldStart  = nFieldEnd + 1;
1160                 }
1161                 nFound = 3;
1162             }
1163         }
1164 
1165         if ( nFound >= 3 )
1166         {
1167             using namespace ::com::sun::star;
1168             bool bSecondCal = false;
1169             sal_uInt16 nDay  = static_cast<sal_uInt16>(rStr.copy( nStart[nDP], nEnd[nDP]+1-nStart[nDP] ).toInt32());
1170             sal_uInt16 nYear = static_cast<sal_uInt16>(rStr.copy( nStart[nYP], nEnd[nYP]+1-nStart[nYP] ).toInt32());
1171             OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] );
1172             sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32());
1173             if (!nMonth)
1174             {
1175                 static constexpr OUStringLiteral aSepShortened = u"SEP";
1176                 uno::Sequence< i18n::CalendarItem2 > xMonths;
1177                 sal_Int32 i, nMonthCount;
1178                 //  first test all month names from local international
1179                 xMonths = rCalendar.getMonths();
1180                 nMonthCount = xMonths.getLength();
1181                 for (i=0; i<nMonthCount && !nMonth; i++)
1182                 {
1183                     if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) ||
1184                          rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) )
1185                         nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1186                     else if ( i == 8 && rTransliteration.isEqual( "SEPT",
1187                                 xMonths[i].AbbrevName ) &&
1188                             rTransliteration.isEqual( aMStr, aSepShortened ) )
1189                     {   // correct English abbreviation is SEPT,
1190                         // but data mostly contains SEP only
1191                         nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1192                     }
1193                 }
1194                 //  if none found, then test english month names
1195                 if ( !nMonth && pSecondCalendar && pSecondTransliteration )
1196                 {
1197                     xMonths = pSecondCalendar->getMonths();
1198                     nMonthCount = xMonths.getLength();
1199                     for (i=0; i<nMonthCount && !nMonth; i++)
1200                     {
1201                         if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) ||
1202                              pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) )
1203                         {
1204                             nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1205                             bSecondCal = true;
1206                         }
1207                         else if ( i == 8 && pSecondTransliteration->isEqual(
1208                                     aMStr, aSepShortened ) )
1209                         {   // correct English abbreviation is SEPT,
1210                             // but data mostly contains SEP only
1211                             nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1212                             bSecondCal = true;
1213                         }
1214                     }
1215                 }
1216             }
1217 
1218             SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1219             if ( nYear < 100 )
1220                 nYear = pDocFormatter->ExpandTwoDigitYear( nYear );
1221 
1222             CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar);
1223             sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear();
1224             if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths )
1225             {
1226                 --nMonth;
1227                 pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay );
1228                 pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth );
1229                 pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear );
1230                 sal_Int16 nHour, nMinute, nSecond;
1231                 // #i14974# The imported value should have no fractional value, so set the
1232                 // time fields to zero (ICU calendar instance defaults to current date/time)
1233                 nHour = nMinute = nSecond = 0;
1234                 if (nFound > 3)
1235                     nHour = static_cast<sal_Int16>(rStr.copy( nStart[3], nEnd[3]+1-nStart[3]).toInt32());
1236                 if (nFound > 4)
1237                     nMinute = static_cast<sal_Int16>(rStr.copy( nStart[4], nEnd[4]+1-nStart[4]).toInt32());
1238                 if (nFound > 5)
1239                     nSecond = static_cast<sal_Int16>(rStr.copy( nStart[5], nEnd[5]+1-nStart[5]).toInt32());
1240                 // do not use calendar's milliseconds, to avoid fractional part truncation
1241                 double fFrac = 0.0;
1242                 if (nFound > 6)
1243                 {
1244                     sal_Unicode cDec = '.';
1245                     OUString aT = OUStringChar(cDec) + rStr.subView( nStart[6], nEnd[6]+1-nStart[6]);
1246                     rtl_math_ConversionStatus eStatus;
1247                     double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus );
1248                     if (eStatus == rtl_math_ConversionStatus_Ok)
1249                         fFrac = fV / 86400.0;
1250                 }
1251                 sal_Int32 nPos;
1252                 if (nFound > 3 && 1 <= nHour && nHour <= 12  // nHour 0 and >=13 can't be AM/PM
1253                         && (nPos = nEnd[nFound-1] + 1) < nLen)
1254                 {
1255                     // Dreaded AM/PM may be following.
1256                     while (nPos < nLen && rStr[nPos] == ' ')
1257                         ++nPos;
1258                     if (nPos < nLen)
1259                     {
1260                         sal_Int32 nStop = nPos;
1261                         while (nStop < nLen && rStr[nStop] != ' ')
1262                             ++nStop;
1263                         OUString aAmPm = rStr.copy( nPos, nStop - nPos);
1264                         // For AM only 12 needs to be treated, whereas for PM
1265                         // it must not. Check both, locale and second/English
1266                         // strings.
1267                         if (nHour == 12 &&
1268                                 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimeAM()) ||
1269                                  (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "AM"))))
1270                         {
1271                             nHour = 0;
1272                         }
1273                         else if (nHour < 12 &&
1274                                 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimePM()) ||
1275                                  (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "PM"))))
1276                         {
1277                             nHour += 12;
1278                         }
1279                     }
1280                 }
1281                 pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour );
1282                 pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute );
1283                 pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond );
1284                 pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 );
1285                 if ( pCalendar->isValid() )
1286                 {
1287                     double fDiff = DateTime(pDocFormatter->GetNullDate()) -
1288                         pCalendar->getEpochStart();
1289                     // #i14974# must use getLocalDateTime to get the same
1290                     // date values as set above
1291                     double fDays = pCalendar->getLocalDateTime() + fFrac;
1292                     fDays -= fDiff;
1293 
1294                     LanguageType eLatin, eCjk, eCtl;
1295                     rDoc.GetLanguage( eLatin, eCjk, eCtl );
1296                     LanguageType eDocLang = eLatin;     //! which language for date formats?
1297 
1298                     SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE);
1299                     sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang );
1300                     // maybe there is a special format including seconds or milliseconds
1301                     if (nFound > 5)
1302                         nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang);
1303 
1304                     ScAddress aPos(nCol,nRow,nTab);
1305                     if ( bUseDocImport )
1306                         rDocImport.setNumericCell(aPos, fDays);
1307                     else
1308                         rDoc.SetValue( aPos, fDays );
1309                     rDoc.SetNumberFormat(aPos, nFormat);
1310 
1311                     return bMultiLine;     // success
1312                 }
1313             }
1314         }
1315     }
1316 
1317     // Standard or date not determined -> SetString / EditCell
1318     if( rStr.indexOf( '\n' ) == -1 )
1319     {
1320         ScSetStringParam aParam;
1321         aParam.mpNumFormatter = pFormatter;
1322         aParam.mbDetectNumberFormat = bDetectNumFormat;
1323         aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly;
1324         aParam.mbHandleApostrophe = false;
1325         aParam.mbCheckLinkFormula = true;
1326         if ( bUseDocImport )
1327             rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam);
1328         else
1329             rDoc.SetString( nCol, nRow, nTab, rStr, &aParam );
1330     }
1331     else
1332     {
1333         bMultiLine = true;
1334         ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
1335         rEngine.SetTextCurrentDefaults(rStr);
1336         if ( bUseDocImport )
1337             rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1338         else
1339             rDoc.SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() );
1340     }
1341     return bMultiLine;
1342 }
1343 
lcl_GetFixed(const OUString & rLine,sal_Int32 nStart,sal_Int32 nNext,bool & rbIsQuoted,bool & rbOverflowCell)1344 static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext,
1345                      bool& rbIsQuoted, bool& rbOverflowCell )
1346 {
1347     sal_Int32 nLen = rLine.getLength();
1348     if (nNext > nLen)
1349         nNext = nLen;
1350     if ( nNext <= nStart )
1351         return EMPTY_OUSTRING;
1352 
1353     const sal_Unicode* pStr = rLine.getStr();
1354 
1355     sal_Int32 nSpace = nNext;
1356     while ( nSpace > nStart && pStr[nSpace-1] == ' ' )
1357         --nSpace;
1358 
1359     rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"');
1360     if (rbIsQuoted)
1361     {
1362         bool bFits = (nSpace - nStart - 3 <= nArbitraryCellLengthLimit);
1363         if (bFits)
1364             return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2));
1365         else
1366         {
1367             SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1368             rbOverflowCell = true;
1369             return rLine.copy(nStart+1, nArbitraryCellLengthLimit);
1370         }
1371     }
1372     else
1373     {
1374         bool bFits = (nSpace - nStart <= nArbitraryCellLengthLimit);
1375         if (bFits)
1376             return rLine.copy(nStart, nSpace-nStart);
1377         else
1378         {
1379             SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1380             rbOverflowCell = true;
1381             return rLine.copy(nStart, nArbitraryCellLengthLimit);
1382         }
1383     }
1384 }
1385 
ExtText2Doc(SvStream & rStrm)1386 bool ScImportExport::ExtText2Doc( SvStream& rStrm )
1387 {
1388     if (!pExtOptions)
1389         return Text2Doc( rStrm );
1390 
1391     sal_uInt64 const nOldPos = rStrm.Tell();
1392     sal_uInt64 const nRemaining = rStrm.remainingSize();
1393     std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh,
1394             ScResId( STR_LOAD_DOC ), nRemaining, true ));
1395     rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
1396 
1397     SCCOL nStartCol = aRange.aStart.Col();
1398     SCCOL nEndCol = aRange.aEnd.Col();
1399     SCROW nStartRow = aRange.aStart.Row();
1400     SCTAB nTab = aRange.aStart.Tab();
1401 
1402     bool    bFixed              = pExtOptions->IsFixedLen();
1403     OUString aSeps              = pExtOptions->GetFieldSeps();  // Need non-const for ReadCsvLine(),
1404     const sal_Unicode* pSeps    = aSeps.getStr();               // but it will be const anyway (asserted below).
1405     bool    bMerge              = pExtOptions->IsMergeSeps();
1406     bool    bRemoveSpace        = pExtOptions->IsRemoveSpace();
1407     sal_uInt16  nInfoCount      = pExtOptions->GetInfoCount();
1408     const sal_Int32* pColStart  = pExtOptions->GetColStart();
1409     const sal_uInt8* pColFormat = pExtOptions->GetColFormat();
1410     tools::Long nSkipLines             = pExtOptions->GetStartRow();
1411 
1412     LanguageType eDocLang = pExtOptions->GetLanguage();
1413     SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang);
1414     bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber();
1415     bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells();
1416 
1417     // For date recognition
1418     ::utl::TransliterationWrapper aTransliteration(
1419         comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE );
1420     aTransliteration.loadModuleIfNeeded( eDocLang );
1421     CalendarWrapper aCalendar( comphelper::getProcessComponentContext() );
1422     aCalendar.loadDefaultCalendar(
1423         LanguageTag::convertToLocale( eDocLang ) );
1424     std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration;
1425     std::unique_ptr< CalendarWrapper > pEnglishCalendar;
1426     if ( eDocLang != LANGUAGE_ENGLISH_US )
1427     {
1428         pEnglishTransliteration.reset(new ::utl::TransliterationWrapper (
1429             comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ));
1430         aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US );
1431         pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() ));
1432         pEnglishCalendar->loadDefaultCalendar(
1433             LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) );
1434     }
1435 
1436     OUString aLine;
1437     OUString aCell;
1438     sal_uInt16 i;
1439     SCROW nRow = nStartRow;
1440     sal_Unicode cDetectSep = 0xffff;    // No separator detection here.
1441 
1442     while(--nSkipLines>0)
1443     {
1444         aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored
1445         if ( rStrm.eof() )
1446             break;
1447     }
1448 
1449     // Determine range for Undo.
1450     // We don't need this during import of a file to a new sheet or document...
1451     bool bDetermineRange = bUndo;
1452 
1453     // Row heights don't need to be adjusted on the fly if EndPaste() is called
1454     // afterwards, which happens only if bDetermineRange. This variable also
1455     // survives the toggle of bDetermineRange down at the end of the do{} loop.
1456     bool bRangeIsDetermined = bDetermineRange;
1457 
1458     bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText();
1459 
1460     sal_uLong nOriginalStreamPos = rStrm.Tell();
1461 
1462     ScDocumentImport aDocImport(rDoc);
1463     do
1464     {
1465         for( ;; )
1466         {
1467             aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep);
1468             if ( rStrm.eof() && aLine.isEmpty() )
1469                 break;
1470 
1471             assert(pSeps == aSeps.getStr());
1472 
1473             if ( nRow > rDoc.MaxRow() )
1474             {
1475                 bOverflowRow = true;    // display warning on import
1476                 break;  // for
1477             }
1478 
1479             EmbeddedNullTreatment( aLine);
1480 
1481             sal_Int32 nLineLen = aLine.getLength();
1482             SCCOL nCol = nStartCol;
1483             bool bMultiLine = false;
1484             if ( bFixed ) //  Fixed line length
1485             {
1486                 sal_Int32 nStartIdx = 0;
1487                 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1488                 // overflow if there is really data following to be put behind
1489                 // the last column, which doesn't happen if info is
1490                 // SC_COL_SKIP.
1491                 for ( i=0; i<nInfoCount && nCol <= rDoc.MaxCol()+1; i++ )
1492                 {
1493                     sal_uInt8 nFmt = pColFormat[i];
1494                     if (nFmt != SC_COL_SKIP)        // otherwise don't increment nCol either
1495                     {
1496                         if (nCol > rDoc.MaxCol())
1497                             bOverflowCol = true;    // display warning on import
1498                         else if (!bDetermineRange)
1499                         {
1500                             sal_Int32 nNextIdx = nStartIdx;
1501                             if ( i + 1 < nInfoCount )
1502                                 CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] );
1503                             else
1504                                 nNextIdx = nLineLen;
1505 
1506                             bool bIsQuoted = false;
1507                             aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell );
1508                             if (bIsQuoted && bQuotedAsText)
1509                                 nFmt = SC_COL_TEXT;
1510 
1511                             bMultiLine |= lcl_PutString(
1512                                 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1513                                 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration, aCalendar,
1514                                 pEnglishTransliteration.get(), pEnglishCalendar.get());
1515 
1516                             nStartIdx = nNextIdx;
1517                         }
1518                         ++nCol;
1519                     }
1520                 }
1521             }
1522             else // Search for the separator
1523             {
1524                 SCCOL nSourceCol = 0;
1525                 sal_uInt16 nInfoStart = 0;
1526                 const sal_Unicode* p = aLine.getStr();
1527                 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1528                 // overflow if there is really data following to be put behind
1529                 // the last column, which doesn't happen if info is
1530                 // SC_COL_SKIP.
1531                 while (*p && nCol <= rDoc.MaxCol()+1)
1532                 {
1533                     bool bIsQuoted = false;
1534                     p = ScImportExport::ScanNextFieldFromString( p, aCell,
1535                             cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace );
1536 
1537                     sal_uInt8 nFmt = SC_COL_STANDARD;
1538                     for ( i=nInfoStart; i<nInfoCount; i++ )
1539                     {
1540                         if ( pColStart[i] == nSourceCol + 1 )       // pColStart is 1-based
1541                         {
1542                             nFmt = pColFormat[i];
1543                             nInfoStart = i + 1;     // ColInfos are in succession
1544                             break;  // for
1545                         }
1546                     }
1547                     if ( nFmt != SC_COL_SKIP )
1548                     {
1549                         if (nCol > rDoc.MaxCol())
1550                             bOverflowCol = true;    // display warning on import
1551                         else if (!bDetermineRange)
1552                         {
1553                             if (bIsQuoted && bQuotedAsText)
1554                                 nFmt = SC_COL_TEXT;
1555 
1556                             bMultiLine |= lcl_PutString(
1557                                 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1558                                 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration,
1559                                 aCalendar, pEnglishTransliteration.get(), pEnglishCalendar.get());
1560                         }
1561                         ++nCol;
1562                     }
1563 
1564                     ++nSourceCol;
1565                 }
1566             }
1567             if (nEndCol < nCol)
1568                 nEndCol = nCol;     //! points to the next free or even rDoc.MaxCol()+2
1569 
1570             if (!bDetermineRange)
1571             {
1572                 if (bMultiLine && !bRangeIsDetermined && pDocSh)
1573                     pDocSh->AdjustRowHeight( nRow, nRow, nTab);
1574                 xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos );
1575             }
1576             ++nRow;
1577         }
1578         // so far nRow/nEndCol pointed to the next free
1579         if (nRow > nStartRow)
1580             --nRow;
1581         if (nEndCol > nStartCol)
1582             nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), rDoc.MaxCol());
1583 
1584         if (bDetermineRange)
1585         {
1586             aRange.aEnd.SetCol( nEndCol );
1587             aRange.aEnd.SetRow( nRow );
1588 
1589             if ( !mbApi && nStartCol != nEndCol &&
1590                  !rDoc.IsBlockEmpty( nTab, nStartCol + 1, nStartRow, nEndCol, nRow ) )
1591             {
1592                 ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent());
1593                 if (aBox.run() != RET_YES)
1594                 {
1595                     return false;
1596                 }
1597             }
1598 
1599             rStrm.Seek( nOriginalStreamPos );
1600             nRow = nStartRow;
1601             if (!StartPaste())
1602             {
1603                 EndPaste(false);
1604                 return false;
1605             }
1606         }
1607 
1608         bDetermineRange = !bDetermineRange;     // toggle
1609     } while (!bDetermineRange);
1610     if ( !mbOverwriting )
1611         aDocImport.finalize();
1612 
1613     xProgress.reset();    // make room for AdjustRowHeight progress
1614     if (bRangeIsDetermined)
1615         EndPaste(false);
1616 
1617     if (mbImportBroadcast && !mbOverwriting)
1618     {
1619         rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
1620         pDocSh->PostDataChanged();
1621     }
1622     return true;
1623 }
1624 
EmbeddedNullTreatment(OUString & rStr)1625 void ScImportExport::EmbeddedNullTreatment( OUString & rStr )
1626 {
1627     // A nasty workaround for data with embedded NULL characters. As long as we
1628     // can't handle them properly as cell content (things assume 0-terminated
1629     // strings at too many places) simply strip all NULL characters from raw
1630     // data. Excel does the same. See fdo#57841 for sample data.
1631 
1632     // The normal case is no embedded NULL, check first before de-/allocating
1633     // ustring stuff.
1634     const sal_Unicode cNull = 0;
1635     sal_Int32 i;
1636     if ((i = rStr.indexOf( cNull)) >= 0)
1637     {
1638         // Do not use OUString::replaceAll(...,""), in case of repeated null
1639         // bytes that reallocates for each and for massive amounts takes
1640         // ~endless. See tdf#147421 with 3577016 trailing null-bytes.
1641         const sal_Int32 nLen = rStr.getLength();
1642         OUStringBuffer aBuf( nLen);
1643         sal_Int32 s = 0;
1644         sal_Unicode const * const p = rStr.getStr();
1645         do
1646         {
1647             // Append good substring.
1648             aBuf.append( p + s, i - s);
1649             // Skip all cNull.
1650             while (++i < nLen && *(p+i) == cNull)
1651                 ;
1652             // Find next cNull after good if characters left, else end.
1653             if (i < nLen)
1654             {
1655                 s = i;
1656                 i = rStr.indexOf( cNull, i);
1657             }
1658             else
1659             {
1660                 s = nLen;
1661             }
1662         }
1663         while (0 <= i && i < nLen);
1664         // Append good trailing substring, if any.
1665         if (s < nLen)
1666             aBuf.append( p + s, nLen - s);
1667 
1668         rStr = aBuf.makeStringAndClear();
1669     }
1670 }
1671 
ScanNextFieldFromString(const sal_Unicode * p,OUString & rField,sal_Unicode cStr,const sal_Unicode * pSeps,bool bMergeSeps,bool & rbIsQuoted,bool & rbOverflowCell,bool bRemoveSpace)1672 const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p,
1673         OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted,
1674         bool& rbOverflowCell, bool bRemoveSpace )
1675 {
1676     rbIsQuoted = false;
1677     rField.clear();
1678     const sal_Unicode cBlank = ' ';
1679     if (cStr && !ScGlobal::UnicodeStrChr(pSeps, cBlank))
1680     {
1681         // Cope with broken generators that put leading blanks before a quoted
1682         // field, like "field1", "field2", "..."
1683         // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180
1684         const sal_Unicode* pb = p;
1685         while (*pb == cBlank)
1686             ++pb;
1687         if (*pb == cStr)
1688             p = pb;
1689     }
1690     if (cStr && *p == cStr) // String in quotes
1691     {
1692         rbIsQuoted = true;
1693         const sal_Unicode* p1;
1694         p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell );
1695         while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) )
1696             p++;
1697         // Append remaining unquoted and undelimited data (dirty, dirty) to
1698         // this field.
1699         if (p > p1)
1700         {
1701             const sal_Unicode* ptrim_f = p;
1702             if ( bRemoveSpace )
1703             {
1704                 while ( ptrim_f > p1  && ( *(ptrim_f - 1) == cBlank ) )
1705                     --ptrim_f;
1706             }
1707             if (!lcl_appendLineData( rField, p1, ptrim_f))
1708                 rbOverflowCell = true;
1709         }
1710         if( *p )
1711             p++;
1712     }
1713     else                        // up to delimiter
1714     {
1715         const sal_Unicode* p0 = p;
1716         while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) )
1717             p++;
1718         const sal_Unicode* ptrim_i = p0;
1719         const sal_Unicode* ptrim_f = p;  // [ptrim_i,ptrim_f) is cell data after trimming
1720         if ( bRemoveSpace )
1721         {
1722             while ( ptrim_i < ptrim_f && *ptrim_i == cBlank )
1723                 ++ptrim_i;
1724             while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) )
1725                 --ptrim_f;
1726         }
1727         if (!lcl_appendLineData( rField, ptrim_i, ptrim_f))
1728             rbOverflowCell = true;
1729         if( *p )
1730             p++;
1731     }
1732     if ( bMergeSeps )           // skip following delimiters
1733     {
1734         while ( *p && ScGlobal::UnicodeStrChr( pSeps, *p ) )
1735             p++;
1736     }
1737     return p;
1738 }
1739 
1740 namespace {
1741 
1742 /**
1743  * Check if a given string has any line break characters or separators.
1744  *
1745  * @param rStr string to inspect.
1746  * @param cSep separator character.
1747  */
hasLineBreaksOrSeps(const OUString & rStr,sal_Unicode cSep)1748 bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep )
1749 {
1750     const sal_Unicode* p = rStr.getStr();
1751     for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p)
1752     {
1753         sal_Unicode c = *p;
1754         if (c == cSep)
1755             // separator found.
1756             return true;
1757 
1758         switch (c)
1759         {
1760             case '\n':
1761             case '\r':
1762                 // line break found.
1763                 return true;
1764             default:
1765                 ;
1766         }
1767     }
1768     return false;
1769 }
1770 
1771 }
1772 
Doc2Text(SvStream & rStrm)1773 bool ScImportExport::Doc2Text( SvStream& rStrm )
1774 {
1775     SCCOL nCol;
1776     SCROW nRow;
1777     SCCOL nStartCol = aRange.aStart.Col();
1778     SCROW nStartRow = aRange.aStart.Row();
1779     SCTAB nStartTab = aRange.aStart.Tab();
1780     SCCOL nEndCol = aRange.aEnd.Col();
1781     SCROW nEndRow = aRange.aEnd.Row();
1782     SCTAB nEndTab = aRange.aEnd.Tab();
1783 
1784     if (!rDoc.GetClipParam().isMultiRange() && nStartTab == nEndTab)
1785         if (!rDoc.ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow ))
1786             return false;
1787 
1788     OUString aCellStr;
1789 
1790     bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF);
1791 
1792     // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab.
1793     std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 );
1794     for( SCCOL i = nStartCol; i <= nEndCol; ++i )
1795         rDoc.InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i );
1796     for (nRow = nStartRow; nRow <= nEndRow; nRow++)
1797     {
1798         if (bIncludeFiltered || !rDoc.RowFiltered( nRow, nStartTab ))
1799         {
1800             for (nCol = nStartCol; nCol <= nEndCol; nCol++)
1801             {
1802                 ScAddress aPos(nCol, nRow, nStartTab);
1803                 sal_uInt32 nNumFmt = rDoc.GetNumberFormat(aPos);
1804                 SvNumberFormatter* pFormatter = rDoc.GetFormatTable();
1805 
1806                 ScRefCellValue aCell(rDoc, aPos, blockPos[ nCol - nStartCol ]);
1807                 switch (aCell.meType)
1808                 {
1809                     case CELLTYPE_FORMULA:
1810                     {
1811                         if (bFormulas)
1812                         {
1813                             aCell.mpFormula->GetFormula( aCellStr );
1814                             if( aCellStr.indexOf( cSep ) != -1 )
1815                                 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1816                             else
1817                                 lcl_WriteSimpleString( rStrm, aCellStr );
1818                         }
1819                         else
1820                         {
1821                             const Color* pColor;
1822                             ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, rDoc);
1823 
1824                             bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1825                             if( bMultiLineText )
1826                             {
1827                                 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1828                                     aCellStr = aCellStr.replaceAll( "\n", " " );
1829                                 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1830                                     aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1831                             }
1832 
1833                             if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1834                                 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1835 
1836                             if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) )
1837                                 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1838                             else
1839                                 lcl_WriteSimpleString( rStrm, aCellStr );
1840                         }
1841                     }
1842                     break;
1843                     case CELLTYPE_VALUE:
1844                     {
1845                         const Color* pColor;
1846                         ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, rDoc);
1847                         lcl_WriteSimpleString( rStrm, aCellStr );
1848                     }
1849                     break;
1850                     case CELLTYPE_NONE:
1851                     break;
1852                     default:
1853                     {
1854                         const Color* pColor;
1855                         ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, rDoc);
1856 
1857                         bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1858                         if( bMultiLineText )
1859                         {
1860                             if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1861                                 aCellStr = aCellStr.replaceAll( "\n", " " );
1862                             else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1863                                 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1864                         }
1865 
1866                         if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1867                             aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1868 
1869                         if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) )
1870                             lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1871                         else
1872                             lcl_WriteSimpleString( rStrm, aCellStr );
1873                     }
1874                 }
1875                 if( nCol < nEndCol )
1876                     lcl_WriteSimpleString( rStrm, OUString(cSep) );
1877             }
1878             WriteUnicodeOrByteEndl( rStrm );
1879             if( rStrm.GetError() != ERRCODE_NONE )
1880                 break;
1881             if( nSizeLimit && rStrm.Tell() > nSizeLimit )
1882                 break;
1883         }
1884     }
1885 
1886     return rStrm.GetError() == ERRCODE_NONE;
1887 }
1888 
Sylk2Doc(SvStream & rStrm)1889 bool ScImportExport::Sylk2Doc( SvStream& rStrm )
1890 {
1891     bool bOk = true;
1892     bool bMyDoc = false;
1893     SylkVersion eVersion = SylkVersion::OTHER;
1894 
1895     // US-English separators for StringToDouble
1896     sal_Unicode const cDecSep = '.';
1897     sal_Unicode const cGrpSep = ',';
1898 
1899     SCCOL nStartCol = aRange.aStart.Col();
1900     SCROW nStartRow = aRange.aStart.Row();
1901     SCCOL nEndCol = aRange.aEnd.Col();
1902     SCROW nEndRow = aRange.aEnd.Row();
1903     sal_uLong nOldPos = rStrm.Tell();
1904     bool bData = !bSingle;
1905     ::std::vector< sal_uInt32 > aFormats;
1906 
1907     if( !bSingle)
1908         bOk = StartPaste();
1909 
1910     while( bOk )
1911     {
1912         OUString aLine;
1913         OUString aText;
1914         OString aByteLine;
1915         SCCOL nCol = nStartCol;
1916         SCROW nRow = nStartRow;
1917         SCCOL nRefCol = nCol;
1918         SCROW nRefRow = nRow;
1919         rStrm.Seek( nOldPos );
1920         for( ;; )
1921         {
1922             //! allow unicode
1923             rStrm.ReadLine( aByteLine );
1924             aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet());
1925             if( rStrm.eof() )
1926                 break;
1927             bool bInvalidCol = false;
1928             bool bInvalidRow = false;
1929             const sal_Unicode* p = aLine.getStr();
1930             sal_Unicode cTag = *p++;
1931             if( cTag == 'C' )       // Content
1932             {
1933                 if( *p++ != ';' )
1934                     return false;
1935 
1936                 bool bInvalidRefCol = false;
1937                 bool bInvalidRefRow = false;
1938                 while( *p )
1939                 {
1940                     sal_Unicode ch = *p++;
1941                     ch = ScGlobal::ToUpperAlpha( ch );
1942                     switch( ch )
1943                     {
1944                         case 'X':
1945                         {
1946                             bInvalidCol = false;
1947                             bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol);
1948                             if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
1949                             {
1950                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
1951                                 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
1952                                 bInvalidCol = bOverflowCol = true;
1953                             }
1954                             break;
1955                         }
1956                         case 'Y':
1957                         {
1958                             bInvalidRow = false;
1959                             bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow);
1960                             if (bFail || nRow < 0 || nMaxImportRow < nRow)
1961                             {
1962                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
1963                                 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
1964                                 bInvalidRow = bOverflowRow = true;
1965                             }
1966                             break;
1967                         }
1968                         case 'C':
1969                         {
1970                             bInvalidRefCol = false;
1971                             bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nRefCol);
1972                             if (bFail || nRefCol < 0 || rDoc.MaxCol() < nRefCol)
1973                             {
1974                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol);
1975                                 nRefCol = std::clamp<SCCOL>(nRefCol, 0, rDoc.MaxCol());
1976                                 bInvalidRefCol = bOverflowCol = true;
1977                             }
1978                             break;
1979                         }
1980                         case 'R':
1981                         {
1982                             bInvalidRefRow = false;
1983                             bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRefRow);
1984                             if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow)
1985                             {
1986                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow);
1987                                 nRefRow = std::clamp<SCROW>(nRefRow, 0, nMaxImportRow);
1988                                 bInvalidRefRow = bOverflowRow = true;
1989                             }
1990                             break;
1991                         }
1992                         case 'K':
1993                         {
1994                             if( !bSingle &&
1995                                     ( nCol < nStartCol || nCol > nEndCol
1996                                       || nRow < nStartRow || nRow > nEndRow
1997                                       || nCol > rDoc.MaxCol() || nRow > nMaxImportRow
1998                                       || bInvalidCol || bInvalidRow ) )
1999                                 break;
2000                             if( !bData )
2001                             {
2002                                 if( nRow > nEndRow )
2003                                     nEndRow = nRow;
2004                                 if( nCol > nEndCol )
2005                                     nEndCol = nCol;
2006                                 break;
2007                             }
2008                             bool bText;
2009                             if( *p == '"' )
2010                             {
2011                                 bText = true;
2012                                 aText.clear();
2013                                 p = lcl_ScanSylkString( p, aText, eVersion);
2014                             }
2015                             else
2016                                 bText = false;
2017                             const sal_Unicode* q = p;
2018                             while( *q && *q != ';' )
2019                                 q++;
2020                             if ( (*q != ';' || *(q+1) != 'I') && !bInvalidCol && !bInvalidRow )
2021                             {   // don't ignore value
2022                                 if( bText )
2023                                 {
2024                                     rDoc.EnsureTable(aRange.aStart.Tab());
2025                                     rDoc.SetTextCell(
2026                                         ScAddress(nCol, nRow, aRange.aStart.Tab()), aText);
2027                                 }
2028                                 else
2029                                 {
2030                                     double fVal = rtl_math_uStringToDouble( p,
2031                                             aLine.getStr() + aLine.getLength(),
2032                                             cDecSep, cGrpSep, nullptr, nullptr );
2033                                     rDoc.SetValue( nCol, nRow, aRange.aStart.Tab(), fVal );
2034                                 }
2035                             }
2036                         }
2037                         break;
2038                         case 'E':
2039                         case 'M':
2040                         {
2041                             if ( ch == 'M' )
2042                             {
2043                                 if ( nRefCol < nCol )
2044                                     nRefCol = nCol;
2045                                 if ( nRefRow < nRow )
2046                                     nRefRow = nRow;
2047                                 if ( !bData )
2048                                 {
2049                                     if( nRefRow > nEndRow )
2050                                         nEndRow = nRefRow;
2051                                     if( nRefCol > nEndCol )
2052                                         nEndCol = nRefCol;
2053                                 }
2054                             }
2055                             if( !bMyDoc || !bData )
2056                                 break;
2057                             aText = "=";
2058                             p = lcl_ScanSylkFormula( p, aText, eVersion);
2059 
2060                             if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow)))
2061                                 break;
2062 
2063                             ScAddress aPos( nCol, nRow, aRange.aStart.Tab() );
2064                             /* FIXME: do we want GRAM_ODFF_A1 instead? At the
2065                              * end it probably should be GRAM_ODFF_R1C1, since
2066                              * R1C1 is what Excel writes in SYLK, or even
2067                              * better GRAM_ENGLISH_XL_R1C1. */
2068                             const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1;
2069                             ScCompiler aComp(rDoc, aPos, eGrammar);
2070                             std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray
2071                             rDoc.CheckLinkFormulaNeedingCheck(*xCode);
2072                             if ( ch == 'M' )
2073                             {
2074                                 ScMarkData aMark(rDoc.GetSheetLimits());
2075                                 aMark.SelectTable( aPos.Tab(), true );
2076                                 rDoc.InsertMatrixFormula( nCol, nRow, nRefCol,
2077                                     nRefRow, aMark, EMPTY_OUSTRING, xCode.get() );
2078                             }
2079                             else
2080                             {
2081                                 ScFormulaCell* pFCell = new ScFormulaCell(
2082                                         rDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE);
2083                                 rDoc.SetFormulaCell(aPos, pFCell);
2084                             }
2085                         }
2086                         break;
2087                     }
2088                     while( *p && *p != ';' )
2089                         p++;
2090                     if( *p )
2091                         p++;
2092                 }
2093             }
2094             else if( cTag == 'F' )      // Format
2095             {
2096                 if( *p++ != ';' )
2097                     return false;
2098                 sal_Int32 nFormat = -1;
2099                 while( *p )
2100                 {
2101                     sal_Unicode ch = *p++;
2102                     ch = ScGlobal::ToUpperAlpha( ch );
2103                     switch( ch )
2104                     {
2105                         case 'X':
2106                         {
2107                             bInvalidCol = false;
2108                             bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol);
2109                             if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
2110                             {
2111                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
2112                                 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
2113                                 bInvalidCol = bOverflowCol = true;
2114                             }
2115                             break;
2116                         }
2117                         case 'Y':
2118                         {
2119                             bInvalidRow = false;
2120                             bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow);
2121                             if (bFail || nRow < 0 || nMaxImportRow < nRow)
2122                             {
2123                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
2124                                 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
2125                                 bInvalidRow = bOverflowRow = true;
2126                             }
2127                             break;
2128                         }
2129                         case 'P' :
2130                             if ( bData )
2131                             {
2132                                 // F;P<n> sets format code of P;P<code> at
2133                                 // current position, or at ;X;Y if specified.
2134                                 // Note that ;X;Y may appear after ;P
2135                                 const sal_Unicode* p0 = p;
2136                                 while( *p && *p != ';' )
2137                                     p++;
2138                                 OUString aNumber(p0, p - p0);
2139                                 nFormat = aNumber.toInt32();
2140                             }
2141                             break;
2142                     }
2143                     while( *p && *p != ';' )
2144                         p++;
2145                     if( *p )
2146                         p++;
2147                 }
2148                 if ( !bData )
2149                 {
2150                     if( nRow > nEndRow )
2151                         nEndRow = nRow;
2152                     if( nCol > nEndCol )
2153                         nEndCol = nCol;
2154                 }
2155                 if ( 0 <= nFormat && nFormat < static_cast<sal_Int32>(aFormats.size()) && !bInvalidCol && !bInvalidRow )
2156                 {
2157                     sal_uInt32 nKey = aFormats[nFormat];
2158                     rDoc.ApplyAttr( nCol, nRow, aRange.aStart.Tab(),
2159                             SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) );
2160                 }
2161             }
2162             else if( cTag == 'P' )
2163             {
2164                 if ( bData && *p == ';' && *(p+1) == 'P' )
2165                 {
2166                     OUString aCode( p+2 );
2167 
2168                     sal_uInt32 nKey;
2169                     sal_Int32 nCheckPos;
2170 
2171                     if (aCode.getLength() > 2048 && utl::ConfigManager::IsFuzzing())
2172                     {
2173                         // consider an excessive length as a failure when fuzzing
2174                         nCheckPos = 1;
2175                     }
2176                     else
2177                     {
2178                         // unescape doubled semicolons
2179                         aCode = aCode.replaceAll(";;", ";");
2180                         // get rid of Xcl escape characters
2181                         aCode = aCode.replaceAll("\x1b", "");
2182                         SvNumFormatType nType;
2183                         rDoc.GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey,
2184                                                                     LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false);
2185                     }
2186 
2187                     if ( nCheckPos )
2188                         nKey = 0;
2189 
2190                     aFormats.push_back( nKey );
2191                 }
2192             }
2193             else if( cTag == 'I' && *p == 'D' )
2194             {
2195                 aLine = aLine.copy(4);
2196                 if (aLine == "CALCOOO32")
2197                     eVersion = SylkVersion::OOO32;
2198                 else if (aLine == "SCALC3")
2199                     eVersion = SylkVersion::SCALC3;
2200                 bMyDoc = (eVersion <= SylkVersion::OWN);
2201             }
2202             else if( cTag == 'E' )                      // End
2203                 break;
2204         }
2205         if( !bData )
2206         {
2207             aRange.aEnd.SetCol( nEndCol );
2208             aRange.aEnd.SetRow( nEndRow );
2209             bOk = StartPaste();
2210             bData = true;
2211         }
2212         else
2213             break;
2214     }
2215 
2216     EndPaste();
2217     return bOk;
2218 }
2219 
Doc2Sylk(SvStream & rStrm)2220 bool ScImportExport::Doc2Sylk( SvStream& rStrm )
2221 {
2222     SCCOL nCol;
2223     SCROW nRow;
2224     SCCOL nStartCol = aRange.aStart.Col();
2225     SCROW nStartRow = aRange.aStart.Row();
2226     SCCOL nEndCol = aRange.aEnd.Col();
2227     SCROW nEndRow = aRange.aEnd.Row();
2228     OUString aCellStr;
2229     OUString aValStr;
2230     lcl_WriteSimpleString( rStrm, "ID;PCALCOOO32" );
2231     WriteUnicodeOrByteEndl( rStrm );
2232 
2233     for (nRow = nStartRow; nRow <= nEndRow; nRow++)
2234     {
2235         for (nCol = nStartCol; nCol <= nEndCol; nCol++)
2236         {
2237             OUString aBufStr;
2238             double nVal;
2239             bool bForm = false;
2240             SCROW r = nRow - nStartRow + 1;
2241             SCCOL c = nCol - nStartCol + 1;
2242             ScRefCellValue aCell(rDoc, ScAddress(nCol, nRow, aRange.aStart.Tab()));
2243             CellType eType = aCell.meType;
2244             switch( eType )
2245             {
2246                 case CELLTYPE_FORMULA:
2247                     bForm = bFormulas;
2248                     if( rDoc.HasValueData( nCol, nRow, aRange.aStart.Tab()) )
2249                         goto hasvalue;
2250                     else
2251                         goto hasstring;
2252 
2253                 case CELLTYPE_VALUE:
2254                 hasvalue:
2255                     rDoc.GetValue( nCol, nRow, aRange.aStart.Tab(), nVal );
2256 
2257                     aValStr = ::rtl::math::doubleToUString( nVal,
2258                             rtl_math_StringFormat_Automatic,
2259                             rtl_math_DecimalPlaces_Max, '.', true );
2260 
2261                     aBufStr = "C;X"
2262                             + OUString::number( c )
2263                             + ";Y"
2264                             + OUString::number( r )
2265                             + ";K"
2266                             + aValStr;
2267                     lcl_WriteSimpleString( rStrm, aBufStr );
2268                     goto checkformula;
2269 
2270                 case CELLTYPE_STRING:
2271                 case CELLTYPE_EDIT:
2272                 hasstring:
2273                     aCellStr = rDoc.GetString(nCol, nRow, aRange.aStart.Tab());
2274                     aCellStr = aCellStr.replaceAll("\n", SYLK_LF);
2275 
2276                     aBufStr = "C;X"
2277                             + OUString::number( c )
2278                             + ";Y"
2279                             + OUString::number( r )
2280                             + ";K";
2281                     lcl_WriteSimpleString( rStrm, aBufStr );
2282                     lcl_WriteString( rStrm, aCellStr, '"', ';' );
2283 
2284                 checkformula:
2285                     if( bForm )
2286                     {
2287                         const ScFormulaCell* pFCell = aCell.mpFormula;
2288                         switch ( pFCell->GetMatrixFlag() )
2289                         {
2290                             case ScMatrixMode::Reference :
2291                                 aCellStr.clear();
2292                             break;
2293                             default:
2294                                 OUString aOUCellStr;
2295                                 pFCell->GetFormula( aOUCellStr,formula::FormulaGrammar::GRAM_PODF_A1);
2296                                 aCellStr = aOUCellStr;
2297                                 /* FIXME: do we want GRAM_ODFF_A1 instead? At
2298                                  * the end it probably should be
2299                                  * GRAM_ODFF_R1C1, since R1C1 is what Excel
2300                                  * writes in SYLK, or even better
2301                                  * GRAM_ENGLISH_XL_R1C1. */
2302                         }
2303                         if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE &&
2304                                 aCellStr.startsWith("{") &&
2305                                 aCellStr.endsWith("}") )
2306                         {   // cut off matrix {} characters
2307                             aCellStr = aCellStr.copy(1, aCellStr.getLength()-2);
2308                         }
2309                         if ( aCellStr[0] == '=' )
2310                             aCellStr = aCellStr.copy(1);
2311                         OUString aPrefix;
2312                         switch ( pFCell->GetMatrixFlag() )
2313                         {
2314                             case ScMatrixMode::Formula :
2315                             {   // diff expression with 'M' M$-extension
2316                                 SCCOL nC;
2317                                 SCROW nR;
2318                                 pFCell->GetMatColsRows( nC, nR );
2319                                 nC += c - 1;
2320                                 nR += r - 1;
2321                                 aPrefix = ";R"
2322                                         + OUString::number( nR )
2323                                         + ";C"
2324                                         + OUString::number( nC )
2325                                         + ";M";
2326                             }
2327                             break;
2328                             case ScMatrixMode::Reference :
2329                             {   // diff expression with 'I' M$-extension
2330                                 ScAddress aPos;
2331                                 (void)pFCell->GetMatrixOrigin( rDoc, aPos );
2332                                 aPrefix = ";I;R"
2333                                         + OUString::number( aPos.Row() - nStartRow + 1 )
2334                                         + ";C"
2335                                         + OUString::number( aPos.Col() - nStartCol + 1 );
2336                             }
2337                             break;
2338                             default:
2339                                 // formula Expression
2340                                 aPrefix = ";E";
2341                         }
2342                         lcl_WriteSimpleString( rStrm, aPrefix );
2343                         if ( !aCellStr.isEmpty() )
2344                             lcl_WriteString( rStrm, aCellStr, 0, ';' );
2345                     }
2346                     WriteUnicodeOrByteEndl( rStrm );
2347                     break;
2348 
2349                 default:
2350                 {
2351                     // added to avoid warnings
2352                 }
2353             }
2354         }
2355     }
2356     lcl_WriteSimpleString( rStrm, OUString( 'E' ) );
2357     WriteUnicodeOrByteEndl( rStrm );
2358     return rStrm.GetError() == ERRCODE_NONE;
2359 }
2360 
Doc2HTML(SvStream & rStrm,const OUString & rBaseURL)2361 bool ScImportExport::Doc2HTML( SvStream& rStrm, const OUString& rBaseURL )
2362 {
2363     // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options
2364     ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll,
2365         aStreamPath, aNonConvertibleChars, maFilterOptions );
2366     return rStrm.GetError() == ERRCODE_NONE;
2367 }
2368 
Doc2RTF(SvStream & rStrm)2369 bool ScImportExport::Doc2RTF( SvStream& rStrm )
2370 {
2371     //  rtl_TextEncoding is ignored in ScExportRTF
2372     ScFormatFilter::Get().ScExportRTF( rStrm, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW );
2373     return rStrm.GetError() == ERRCODE_NONE;
2374 }
2375 
Doc2Dif(SvStream & rStrm)2376 bool ScImportExport::Doc2Dif( SvStream& rStrm )
2377 {
2378     // for DIF in the clipboard, IBM_850 is always used
2379     ScFormatFilter::Get().ScExportDif( rStrm, &rDoc, aRange, RTL_TEXTENCODING_IBM_850 );
2380     return true;
2381 }
2382 
Dif2Doc(SvStream & rStrm)2383 bool ScImportExport::Dif2Doc( SvStream& rStrm )
2384 {
2385     SCTAB nTab = aRange.aStart.Tab();
2386     ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) );
2387     pImportDoc->InitUndo( rDoc, nTab, nTab );
2388 
2389     // for DIF in the clipboard, IBM_850 is always used
2390     ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 );
2391 
2392     SCCOL nEndCol;
2393     SCROW nEndRow;
2394     pImportDoc->GetCellArea( nTab, nEndCol, nEndRow );
2395     // if there are no cells in the imported content, nEndCol/nEndRow may be before the start
2396     if ( nEndCol < aRange.aStart.Col() )
2397         nEndCol = aRange.aStart.Col();
2398     if ( nEndRow < aRange.aStart.Row() )
2399         nEndRow = aRange.aStart.Row();
2400     aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab );
2401 
2402     bool bOk = StartPaste();
2403     if (bOk)
2404     {
2405         InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2406         rDoc.DeleteAreaTab( aRange, nFlags );
2407         pImportDoc->CopyToDocument(aRange, nFlags, false, rDoc);
2408         EndPaste();
2409     }
2410 
2411     return bOk;
2412 }
2413 
RTF2Doc(SvStream & rStrm,const OUString & rBaseURL)2414 bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL )
2415 {
2416     std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( &rDoc, aRange );
2417     if (!pImp)
2418         return false;
2419     pImp->Read( rStrm, rBaseURL );
2420     aRange = pImp->GetRange();
2421 
2422     bool bOk = StartPaste();
2423     if (bOk)
2424     {
2425         InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2426         rDoc.DeleteAreaTab( aRange, nFlags );
2427         pImp->WriteToDocument();
2428         EndPaste();
2429     }
2430     return bOk;
2431 }
2432 
HTML2Doc(SvStream & rStrm,const OUString & rBaseURL)2433 bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL )
2434 {
2435     std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( &rDoc, rBaseURL, aRange);
2436     if (!pImp)
2437         return false;
2438     pImp->Read( rStrm, rBaseURL );
2439     aRange = pImp->GetRange();
2440 
2441     bool bOk = StartPaste();
2442     if (bOk)
2443     {
2444         // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in
2445         // a Draw Layer but no Draw View -> create Draw Layer and View here
2446         if (pDocSh)
2447             pDocSh->MakeDrawLayer();
2448 
2449         InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2450         rDoc.DeleteAreaTab( aRange, nFlags );
2451 
2452         if (pExtOptions)
2453         {
2454             // Pick up import options if available.
2455             LanguageType eLang = pExtOptions->GetLanguage();
2456             SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang);
2457             bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber();
2458             pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber);
2459         }
2460         else
2461             // Regular import, with no options.
2462             pImp->WriteToDocument();
2463 
2464         EndPaste();
2465     }
2466     return bOk;
2467 }
2468 
2469 #ifndef DISABLE_DYNLOADING
2470 
thisModule()2471 extern "C" { static void thisModule() {} }
2472 
2473 #else
2474 
2475 extern "C" {
2476 ScFormatFilterPlugin* ScFilterCreate();
2477 }
2478 
2479 #endif
2480 
2481 typedef ScFormatFilterPlugin * (*FilterFn)();
Get()2482 ScFormatFilterPlugin &ScFormatFilter::Get()
2483 {
2484     static ScFormatFilterPlugin *plugin = [&]()
2485     {
2486 #ifndef DISABLE_DYNLOADING
2487         OUString sFilterLib(SVLIBRARY("scfilt"));
2488         static ::osl::Module aModule;
2489         bool bLoaded = aModule.is();
2490         if (!bLoaded)
2491             bLoaded = aModule.loadRelative(&thisModule, sFilterLib);
2492         if (!bLoaded)
2493             bLoaded = aModule.load(sFilterLib);
2494         if (bLoaded)
2495         {
2496             oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" );
2497             if (fn != nullptr)
2498                 return reinterpret_cast<FilterFn>(fn)();
2499         }
2500         assert(false);
2501         return static_cast<ScFormatFilterPlugin*>(nullptr);
2502 #else
2503         return ScFilterCreate();
2504 #endif
2505     }();
2506 
2507     return *plugin;
2508 }
2509 
2510 // Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
2511 // array.
lcl_UnicodeStrChr(const sal_Unicode * pStr,sal_Unicode c)2512 static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
2513         sal_Unicode c )
2514 {
2515     while (*pStr)
2516     {
2517         if (*pStr == c)
2518             return pStr;
2519         ++pStr;
2520     }
2521     return nullptr;
2522 }
2523 
ScImportStringStream(const OUString & rStr)2524 ScImportStringStream::ScImportStringStream( const OUString& rStr )
2525     : SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()),
2526             rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ)
2527 {
2528     SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
2529 #ifdef OSL_BIGENDIAN
2530     SetEndian(SvStreamEndian::BIG);
2531 #else
2532     SetEndian(SvStreamEndian::LITTLE);
2533 #endif
2534 }
2535 
ReadCsvLine(SvStream & rStream,bool bEmbeddedLineBreak,OUString & rFieldSeparators,sal_Unicode cFieldQuote,sal_Unicode & rcDetectSep,sal_uInt32 nMaxSourceLines)2536 OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
2537         OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep, sal_uInt32 nMaxSourceLines )
2538 {
2539     enum RetryState
2540     {
2541         FORBID,
2542         ALLOW,
2543         RETRY,
2544         RETRIED
2545     } eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID);
2546 
2547     sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0);
2548 
2549 Label_RetryWithNewSep:
2550 
2551     if (eRetryState == RetryState::RETRY)
2552     {
2553         eRetryState = RetryState::RETRIED;
2554         rStream.Seek( nStreamPos);
2555     }
2556 
2557     OUString aStr;
2558     rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2559 
2560     if (bEmbeddedLineBreak)
2561     {
2562         sal_uInt32 nLine = 0;
2563 
2564         const sal_Unicode* pSeps = rFieldSeparators.getStr();
2565 
2566         QuoteType eQuoteState = FIELDEND_QUOTE;
2567         bool bFieldStart = true;
2568 
2569         sal_Int32 nLastOffset = 0;
2570         sal_Int32 nQuotes = 0;
2571         while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit)
2572         {
2573             const sal_Unicode * p = aStr.getStr() + nLastOffset;
2574             const sal_Unicode * const pStop = aStr.getStr() + aStr.getLength();
2575             while (p < pStop)
2576             {
2577                 if (!*p)
2578                 {
2579                     // Skip embedded null-characters. They don't change
2580                     // anything and are handled at a higher level.
2581                     ++p;
2582                     continue;
2583                 }
2584 
2585                 if (nQuotes)
2586                 {
2587                     if (*p == cFieldQuote)
2588                     {
2589                         if (bFieldStart)
2590                         {
2591                             ++nQuotes;
2592                             bFieldStart = false;
2593                             eQuoteState = FIELDSTART_QUOTE;
2594                         }
2595                         // Do not detect a FIELDSTART_QUOTE if not in
2596                         // bFieldStart mode, in which case for unquoted content
2597                         // we are in FIELDEND_QUOTE state.
2598                         else if (eQuoteState != FIELDEND_QUOTE)
2599                         {
2600                             eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep);
2601 
2602                             if (eRetryState == RetryState::ALLOW && rcDetectSep)
2603                             {
2604                                 eRetryState = RetryState::RETRY;
2605                                 rFieldSeparators += OUStringChar(rcDetectSep);
2606                                 pSeps = rFieldSeparators.getStr();
2607                                 goto Label_RetryWithNewSep;
2608                             }
2609 
2610                             // DONTKNOW_QUOTE is an embedded unescaped quote we
2611                             // don't count for pairing.
2612                             if (eQuoteState != DONTKNOW_QUOTE)
2613                                 ++nQuotes;
2614                         }
2615                     }
2616                     else if (eQuoteState == FIELDEND_QUOTE)
2617                     {
2618                         if (bFieldStart)
2619                             // If blank is a separator it starts a field, if it
2620                             // is not and thus maybe leading before quote we
2621                             // are still at start of field regarding quotes.
2622                             bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2623                         else
2624                             bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2625                     }
2626                 }
2627                 else
2628                 {
2629                     if (*p == cFieldQuote && bFieldStart)
2630                     {
2631                         nQuotes = 1;
2632                         eQuoteState = FIELDSTART_QUOTE;
2633                         bFieldStart = false;
2634                     }
2635                     else if (eQuoteState == FIELDEND_QUOTE)
2636                     {
2637                         // This also skips leading blanks at beginning of line
2638                         // if followed by a quote. It's debatable whether we
2639                         // actually want that or not, but congruent with what
2640                         // ScanNextFieldFromString() does.
2641                         if (bFieldStart)
2642                             bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2643                         else
2644                             bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2645                     }
2646                 }
2647                 // A quote character inside a field content does not start
2648                 // a quote.
2649                 ++p;
2650             }
2651 
2652             if ((nQuotes & 1) == 0)
2653                 // We still have a (theoretical?) problem here if due to
2654                 // nArbitraryLineLengthLimit (or nMaxSourceLines below) we
2655                 // split a string right between a doubled quote pair.
2656                 break;
2657             else if (++nLine >= nMaxSourceLines && nMaxSourceLines > 0)
2658                 // Unconditionally increment nLine even if nMaxSourceLines==0
2659                 // so it can be observed in debugger.
2660                 break;
2661             else
2662             {
2663                 nLastOffset = aStr.getLength();
2664                 OUString aNext;
2665                 rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2666                 aStr += "\n" + aNext;
2667             }
2668         }
2669     }
2670     return aStr;
2671 }
2672 
2673 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
2674