1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <comphelper/processfactory.hxx>
21 #include <i18nlangtag/languagetag.hxx>
22 #include <sot/formats.hxx>
23 #include <sfx2/mieclip.hxx>
24 #include <com/sun/star/i18n/CalendarFieldIndex.hpp>
25 #include <sal/log.hxx>
26 #include <unotools/charclass.hxx>
27 #include <osl/module.hxx>
28 
29 #include <global.hxx>
30 #include <docsh.hxx>
31 #include <undoblk.hxx>
32 #include <rangenam.hxx>
33 #include <tabvwsh.hxx>
34 #include <filter.hxx>
35 #include <asciiopt.hxx>
36 #include <formulacell.hxx>
37 #include <cellform.hxx>
38 #include <progress.hxx>
39 #include <scitems.hxx>
40 #include <editable.hxx>
41 #include <compiler.hxx>
42 #include <warnbox.hxx>
43 #include <clipparam.hxx>
44 #include <impex.hxx>
45 #include <editutil.hxx>
46 #include <patattr.hxx>
47 #include <docpool.hxx>
48 #include <stringutil.hxx>
49 #include <cellvalue.hxx>
50 #include <tokenarray.hxx>
51 #include <documentimport.hxx>
52 #include <refundo.hxx>
53 #include <mtvelements.hxx>
54 
55 #include <globstr.hrc>
56 #include <scresid.hxx>
57 #include <o3tl/safeint.hxx>
58 #include <tools/svlibrary.h>
59 #include <unotools/configmgr.hxx>
60 #include <vcl/svapp.hxx>
61 #include <vcl/weld.hxx>
62 #include <editeng/editobj.hxx>
63 
64 #include <memory>
65 #include <osl/endian.h>
66 
67 // We don't want to end up with 2GB read in one line just because of malformed
68 // multiline fields, so chop it _somewhere_, which is twice supported columns
69 // times maximum cell content length, 2*1024*64K=128M, and because it's
70 // sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of luck
71 // anyway.
72 static const sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * 65536;
73 
74 namespace
75 {
76     const char SYLK_LF[]  = "\x1b :";
77 
lcl_IsEndianSwap(const SvStream & rStrm)78     bool lcl_IsEndianSwap( const SvStream& rStrm )
79     {
80     #ifdef OSL_BIGENDIAN
81         return rStrm.GetEndian() != SvStreamEndian::BIG;
82     #else
83         return rStrm.GetEndian() != SvStreamEndian::LITTLE;
84     #endif
85     }
86 }
87 
88 enum class SylkVersion
89 {
90     SCALC3,    // Wrote wrongly quoted strings and unescaped semicolons.
91     OOO32,     // Correct strings, plus multiline content.
92     OWN,       // Place our new versions, if any, before this value.
93     OTHER      // Assume that aliens wrote correct strings.
94 };
95 
96 // Whole document without Undo
ScImportExport(ScDocument * p)97 ScImportExport::ScImportExport( ScDocument* p )
98     : pDocSh( dynamic_cast< ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ),
99       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? pDoc->MaxRow() : SCROWS32K),
100       cSep( '\t' ), cStr( '"' ),
101       bFormulas( false ), bIncludeFiltered( true ),
102       bAll( true ), bSingle( true ), bUndo( false ),
103       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
104       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
105       mExportTextOptions()
106 {
107     pUndoDoc = nullptr;
108     pExtOptions = nullptr;
109 }
110 
111 // Insert am current cell without range(es)
ScImportExport(ScDocument * p,const ScAddress & rPt)112 ScImportExport::ScImportExport( ScDocument* p, const ScAddress& rPt )
113     : pDocSh( dynamic_cast< ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ),
114       aRange( rPt ),
115       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? pDoc->MaxRow() : SCROWS32K),
116       cSep( '\t' ), cStr( '"' ),
117       bFormulas( false ), bIncludeFiltered( true ),
118       bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
119       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
120       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
121       mExportTextOptions()
122 {
123     pUndoDoc = nullptr;
124     pExtOptions = nullptr;
125 }
126 
127 //  ctor with a range is only used for export
128 //! ctor with a string (and bSingle=true) is also used for DdeSetData
ScImportExport(ScDocument * p,const ScRange & r)129 ScImportExport::ScImportExport( ScDocument* p, const ScRange& r )
130     : pDocSh( dynamic_cast<ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ),
131       aRange( r ),
132       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? pDoc->MaxRow() : SCROWS32K),
133       cSep( '\t' ), cStr( '"' ),
134       bFormulas( false ), bIncludeFiltered( true ),
135       bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ),
136       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
137       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
138       mExportTextOptions()
139 {
140     pUndoDoc = nullptr;
141     pExtOptions = nullptr;
142     // Only one sheet (table) supported
143     aRange.aEnd.SetTab( aRange.aStart.Tab() );
144 }
145 
146 // Evaluate input string - either range, cell or the whole document (when error)
147 // If a View exists, the TabNo of the view will be used.
ScImportExport(ScDocument * p,const OUString & rPos)148 ScImportExport::ScImportExport( ScDocument* p, const OUString& rPos )
149     : pDocSh( dynamic_cast< ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ),
150       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? pDoc->MaxRow() : SCROWS32K),
151       cSep( '\t' ), cStr( '"' ),
152       bFormulas( false ), bIncludeFiltered( true ),
153       bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
154       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
155       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
156       mExportTextOptions()
157 {
158     pUndoDoc = nullptr;
159     pExtOptions = nullptr;
160 
161     SCTAB nTab = ScDocShell::GetCurTab();
162     aRange.aStart.SetTab( nTab );
163     OUString aPos( rPos );
164     // Named range?
165     ScRangeName* pRange = pDoc->GetRangeName();
166     if (pRange)
167     {
168         const ScRangeData* pData = pRange->findByUpperName(ScGlobal::pCharClass->uppercase(aPos));
169         if (pData)
170         {
171             if( pData->HasType( ScRangeData::Type::RefArea )
172                 || pData->HasType( ScRangeData::Type::AbsArea )
173                 || pData->HasType( ScRangeData::Type::AbsPos ) )
174             {
175                 pData->GetSymbol(aPos);
176             }
177         }
178     }
179     formula::FormulaGrammar::AddressConvention eConv = pDoc->GetAddressConvention();
180     // Range?
181     if (aRange.Parse(aPos, pDoc, eConv) & ScRefFlags::VALID)
182         bSingle = false;
183     // Cell?
184     else if (aRange.aStart.Parse(aPos, pDoc, eConv) & ScRefFlags::VALID)
185         aRange.aEnd = aRange.aStart;
186     else
187         bAll = true;
188 }
189 
~ScImportExport()190 ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE
191 {
192     pUndoDoc.reset();
193     pExtOptions.reset();
194 }
195 
SetExtOptions(const ScAsciiOptions & rOpt)196 void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt )
197 {
198     if ( pExtOptions )
199         *pExtOptions = rOpt;
200     else
201         pExtOptions.reset(new ScAsciiOptions( rOpt ));
202 
203     //  "normal" Options
204 
205     cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false);
206     cStr = rOpt.GetTextSep();
207 }
208 
SetFilterOptions(const OUString & rFilterOptions)209 void ScImportExport::SetFilterOptions(const OUString& rFilterOptions)
210 {
211     maFilterOptions = rFilterOptions;
212 }
213 
IsFormatSupported(SotClipboardFormatId nFormat)214 bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat )
215 {
216     return nFormat == SotClipboardFormatId::STRING
217               || nFormat == SotClipboardFormatId::STRING_TSVC
218               || nFormat == SotClipboardFormatId::SYLK
219               || nFormat == SotClipboardFormatId::LINK
220               || nFormat == SotClipboardFormatId::HTML
221               || nFormat == SotClipboardFormatId::HTML_SIMPLE
222               || nFormat == SotClipboardFormatId::DIF;
223 }
224 
225 // Prepare for Undo
StartPaste()226 bool ScImportExport::StartPaste()
227 {
228     if ( !bAll )
229     {
230         ScEditableTester aTester( pDoc, aRange );
231         if ( !aTester.IsEditable() )
232         {
233             vcl::Window* pWin = Application::GetDefDialogParent();
234             std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(pWin ? pWin->GetFrameWeld() : nullptr,
235                                                           VclMessageType::Info, VclButtonsType::Ok,
236                                                           ScResId(aTester.GetMessageId())));
237             xInfoBox->run();
238             return false;
239         }
240     }
241     if( bUndo && pDocSh && pDoc->IsUndoEnabled())
242     {
243         pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO ));
244         pUndoDoc->InitUndo( pDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
245         pDoc->CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc);
246     }
247     return true;
248 }
249 
250 // Create Undo/Redo actions, Invalidate/Repaint
EndPaste(bool bAutoRowHeight)251 void ScImportExport::EndPaste(bool bAutoRowHeight)
252 {
253     bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight(
254                     aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() );
255 
256     if( pUndoDoc && pDoc->IsUndoEnabled() && pDocSh )
257     {
258         ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO ));
259         pRedoDoc->InitUndo( pDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
260         pDoc->CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc);
261         ScMarkData aDestMark(pRedoDoc->MaxRow(), pRedoDoc->MaxCol());
262         aDestMark.SetMarkArea(aRange);
263         pDocSh->GetUndoManager()->AddUndoAction(
264             std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr));
265     }
266     pUndoDoc.reset();
267     if( pDocSh )
268     {
269         if (!bHeight)
270             pDocSh->PostPaint( aRange, PaintPartFlags::Grid );
271         pDocSh->SetDocumentModified();
272     }
273     ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell();
274     if ( pViewSh )
275         pViewSh->UpdateInputHandler();
276 
277 }
278 
ExportData(const OUString & rMimeType,css::uno::Any & rValue)279 bool ScImportExport::ExportData( const OUString& rMimeType,
280                                  css::uno::Any & rValue )
281 {
282     SvMemoryStream aStrm;
283     // mba: no BaseURL for data exchange
284     if( ExportStream( aStrm, OUString(),
285                 SotExchange::GetFormatIdFromMimeType( rMimeType ) ))
286     {
287         aStrm.WriteUChar( 0 );
288         rValue <<= css::uno::Sequence< sal_Int8 >(
289                                         static_cast<sal_Int8 const *>(aStrm.GetData()),
290                                         aStrm.TellEnd() );
291         return true;
292     }
293     return false;
294 }
295 
ImportString(const OUString & rText,SotClipboardFormatId nFmt)296 bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt )
297 {
298     switch ( nFmt )
299     {
300         // formats supporting unicode
301         case SotClipboardFormatId::STRING :
302         case SotClipboardFormatId::STRING_TSVC :
303         {
304             ScImportStringStream aStrm( rText);
305             return ImportStream( aStrm, OUString(), nFmt );
306             // ImportStream must handle RTL_TEXTENCODING_UNICODE
307         }
308         default:
309         {
310             rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
311             OString aTmp( rText.getStr(), rText.getLength(), eEnc );
312             SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(sal_Char), StreamMode::READ );
313             aStrm.SetStreamCharSet( eEnc );
314             SetNoEndianSwap( aStrm );       //! no swapping in memory
315             return ImportStream( aStrm, OUString(), nFmt );
316         }
317     }
318 }
319 
ExportString(OUString & rText,SotClipboardFormatId nFmt)320 bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt )
321 {
322     if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC )
323     {
324         SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]");
325         rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
326         OString aTmp;
327         bool bOk = ExportByteString( aTmp, eEnc, nFmt );
328         rText = OStringToOUString( aTmp, eEnc );
329         return bOk;
330     }
331     //  nSizeLimit not needed for OUString
332 
333     SvMemoryStream aStrm;
334     aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
335     SetNoEndianSwap( aStrm );       //! no swapping in memory
336     // mba: no BaseURL for data exc
337     if( ExportStream( aStrm, OUString(), nFmt ) )
338     {
339         aStrm.WriteUInt16( 0 );
340         rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) );
341         return true;
342     }
343     rText.clear();
344     return false;
345 
346     // ExportStream must handle RTL_TEXTENCODING_UNICODE
347 }
348 
ExportByteString(OString & rText,rtl_TextEncoding eEnc,SotClipboardFormatId nFmt)349 bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt )
350 {
351     OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" );
352     if ( eEnc == RTL_TEXTENCODING_UNICODE )
353         eEnc = osl_getThreadTextEncoding();
354 
355     if (!nSizeLimit)
356         nSizeLimit = SAL_MAX_UINT16;
357 
358     SvMemoryStream aStrm;
359     aStrm.SetStreamCharSet( eEnc );
360     SetNoEndianSwap( aStrm );       //! no swapping in memory
361     // mba: no BaseURL for data exchange
362     if( ExportStream( aStrm, OUString(), nFmt ) )
363     {
364         aStrm.WriteChar( 0 );
365         if( aStrm.TellEnd() <= nSizeLimit )
366         {
367             rText = static_cast<const sal_Char*>(aStrm.GetData());
368             return true;
369         }
370     }
371     rText.clear();
372     return false;
373 }
374 
ImportStream(SvStream & rStrm,const OUString & rBaseURL,SotClipboardFormatId nFmt)375 bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
376 {
377     if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
378     {
379         if( ExtText2Doc( rStrm ) )      // evaluate pExtOptions
380             return true;
381     }
382     if( nFmt == SotClipboardFormatId::SYLK )
383     {
384         if( Sylk2Doc( rStrm ) )
385             return true;
386     }
387     if( nFmt == SotClipboardFormatId::DIF )
388     {
389         if( Dif2Doc( rStrm ) )
390             return true;
391     }
392     if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
393     {
394         if( RTF2Doc( rStrm, rBaseURL ) )
395             return true;
396     }
397     if( nFmt == SotClipboardFormatId::LINK )
398         return true;            // Link-Import?
399     if ( nFmt == SotClipboardFormatId::HTML )
400     {
401         if( HTML2Doc( rStrm, rBaseURL ) )
402             return true;
403     }
404     if ( nFmt == SotClipboardFormatId::HTML_SIMPLE )
405     {
406         MSE40HTMLClipFormatObj aMSE40ClpObj;                // needed to skip the header data
407         SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm );
408         if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) )
409             return true;
410     }
411 
412     return false;
413 }
414 
ExportStream(SvStream & rStrm,const OUString & rBaseURL,SotClipboardFormatId nFmt)415 bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
416 {
417     if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
418     {
419         if( Doc2Text( rStrm ) )
420             return true;
421     }
422     if( nFmt == SotClipboardFormatId::SYLK )
423     {
424         if( Doc2Sylk( rStrm ) )
425             return true;
426     }
427     if( nFmt == SotClipboardFormatId::DIF )
428     {
429         if( Doc2Dif( rStrm ) )
430             return true;
431     }
432     if( nFmt == SotClipboardFormatId::LINK && !bAll )
433     {
434         OUString aDocName;
435         if ( pDoc->IsClipboard() )
436             aDocName = ScGlobal::GetClipDocName();
437         else
438         {
439             SfxObjectShell* pShell = pDoc->GetDocumentShell();
440             if (pShell)
441                 aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME );
442         }
443 
444         OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" );
445         if( !aDocName.isEmpty() )
446         {
447             // Always use Calc A1 syntax for paste link.
448             OUString aRefName;
449             ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D;
450             if( bSingle )
451                 aRefName = aRange.aStart.Format(nFlags, pDoc, formula::FormulaGrammar::CONV_OOO);
452             else
453             {
454                 if( aRange.aStart.Tab() != aRange.aEnd.Tab() )
455                     nFlags |= ScRefFlags::TAB2_3D;
456                 aRefName = aRange.Format(nFlags, pDoc, formula::FormulaGrammar::CONV_OOO);
457             }
458             OUString aAppName = Application::GetAppName();
459 
460             // extra bits are used to tell the client to prefer external
461             // reference link.
462             OUString const aExtraBits("calc:extref");
463 
464             WriteUnicodeOrByteString( rStrm, aAppName, true );
465             WriteUnicodeOrByteString( rStrm, aDocName, true );
466             WriteUnicodeOrByteString( rStrm, aRefName, true );
467             WriteUnicodeOrByteString( rStrm, aExtraBits, true );
468             if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE )
469                 rStrm.WriteUInt16( 0 );
470             else
471                 rStrm.WriteChar( 0 );
472             return rStrm.GetError() == ERRCODE_NONE;
473         }
474     }
475     if( nFmt == SotClipboardFormatId::HTML )
476     {
477         if( Doc2HTML( rStrm, rBaseURL ) )
478             return true;
479     }
480     if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
481     {
482         if( Doc2RTF( rStrm ) )
483             return true;
484     }
485 
486     return false;
487 }
488 
WriteUnicodeOrByteString(SvStream & rStrm,const OUString & rString,bool bZero)489 void ScImportExport::WriteUnicodeOrByteString( SvStream& rStrm, const OUString& rString, bool bZero )
490 {
491     rtl_TextEncoding eEnc = rStrm.GetStreamCharSet();
492     if ( eEnc == RTL_TEXTENCODING_UNICODE )
493     {
494         if ( !lcl_IsEndianSwap( rStrm ) )
495             rStrm.WriteBytes(rString.getStr(), rString.getLength() * sizeof(sal_Unicode));
496         else
497         {
498             const sal_Unicode* p = rString.getStr();
499             const sal_Unicode* const pStop = p + rString.getLength();
500             while ( p < pStop )
501             {
502                 rStrm.WriteUInt16( *p );
503             }
504         }
505         if ( bZero )
506             rStrm.WriteUInt16( 0 );
507     }
508     else
509     {
510         OString aByteStr(OUStringToOString(rString, eEnc));
511         rStrm.WriteOString( aByteStr );
512         if ( bZero )
513             rStrm.WriteChar( 0 );
514     }
515 }
516 
517 // This function could be replaced by endlub()
WriteUnicodeOrByteEndl(SvStream & rStrm)518 void ScImportExport::WriteUnicodeOrByteEndl( SvStream& rStrm )
519 {
520     if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE )
521     {   // same as endl() but unicode
522         switch ( rStrm.GetLineDelimiter() )
523         {
524             case LINEEND_CR :
525                 rStrm.WriteUInt16( '\r' );
526             break;
527             case LINEEND_LF :
528                 rStrm.WriteUInt16( '\n' );
529             break;
530             default:
531                 rStrm.WriteUInt16( '\r' ).WriteUInt16( '\n' );
532         }
533     }
534     else
535         endl( rStrm );
536 }
537 
SetNoEndianSwap(SvStream & rStrm)538 void ScImportExport::SetNoEndianSwap( SvStream& rStrm )
539 {
540 #ifdef OSL_BIGENDIAN
541     rStrm.SetEndian( SvStreamEndian::BIG );
542 #else
543     rStrm.SetEndian( SvStreamEndian::LITTLE );
544 #endif
545 }
546 
547 enum QuoteType
548 {
549     FIELDSTART_QUOTE,
550     FIRST_QUOTE,
551     SECOND_QUOTE,
552     FIELDEND_QUOTE,
553     DONTKNOW_QUOTE
554 };
555 
556 /** Determine if *p is a quote that ends a quoted field.
557 
558     Precondition: we are parsing a quoted field already and *p is a quote.
559 
560     @return
561         FIELDEND_QUOTE if end of field quote
562         DONTKNOW_QUOTE anything else
563  */
lcl_isFieldEndQuote(const sal_Unicode * p,const sal_Unicode * pSeps,sal_Unicode & rcDetectSep)564 static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep )
565 {
566     // Due to broken CSV generators that don't double embedded quotes check if
567     // a field separator immediately or with trailing spaces follows the quote,
568     // only then end the field, or at end of string.
569     const sal_Unicode cBlank = ' ';
570     if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank))
571         return FIELDEND_QUOTE;
572     // Detect a possible blank separator if it's not already in the list (which
573     // was checked right above for p[1]==cBlank).
574     if (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank)
575         rcDetectSep = cBlank;
576     while (p[1] == cBlank)
577         ++p;
578     if (!p[1] || ScGlobal::UnicodeStrChr( pSeps, p[1]))
579         return FIELDEND_QUOTE;
580     return DONTKNOW_QUOTE;
581 }
582 
583 /** Determine if *p is a quote that is escaped by being doubled or ends a
584     quoted field.
585 
586     Precondition: *p is a quote.
587 
588     @param nQuotes
589         Quote characters encountered so far.
590         Odd (after opening quote) means either no embedded quotes or only quote
591         pairs so far.
592         Even means either not in a quoted field or already one quote
593         encountered, the first of a pair.
594 
595     @return
596         FIELDSTART_QUOTE if first quote in a field, either starting content or
597                             embedded so caller should check beforehand.
598         FIRST_QUOTE      if first of a doubled quote
599         SECOND_QUOTE     if second of a doubled quote
600         FIELDEND_QUOTE   if end of field quote
601         DONTKNOW_QUOTE   if an unescaped quote we don't consider as end of field,
602                             do not increment nQuotes in caller then!
603  */
lcl_isEscapedOrFieldEndQuote(sal_Int32 nQuotes,const sal_Unicode * p,const sal_Unicode * pSeps,sal_Unicode cStr,sal_Unicode & rcDetectSep)604 static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p,
605         const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep )
606 {
607     if ((nQuotes % 2) == 0)
608     {
609         if (p[-1] == cStr)
610             return SECOND_QUOTE;
611         else
612         {
613             SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?");
614             return FIELDSTART_QUOTE;
615         }
616     }
617     if (p[1] == cStr)
618         return FIRST_QUOTE;
619     return lcl_isFieldEndQuote( p, pSeps, rcDetectSep);
620 }
621 
622 /** Append characters of [p1,p2) to rField.
623 
624     @returns TRUE if ok; FALSE if data overflow, truncated
625  */
lcl_appendLineData(OUString & rField,const sal_Unicode * p1,const sal_Unicode * p2)626 static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 )
627 {
628     OSL_ENSURE( rField.getLength() + (p2 - p1) <= SAL_MAX_UINT16, "lcl_appendLineData: data overflow");
629     if (rField.getLength() + (p2 - p1) <= SAL_MAX_UINT16)
630     {
631         rField += OUString( p1, sal::static_int_cast<sal_Int32>( p2 - p1 ) );
632         return true;
633     }
634     else
635     {
636         rField += OUString( p1, SAL_MAX_UINT16 - rField.getLength() );
637         return false;
638     }
639 }
640 
641 enum class DoubledQuoteMode
642 {
643     KEEP_ALL,   // both are taken, additionally start and end quote are included in string
644     ESCAPE,     // escaped quote, one is taken, one ignored
645 };
646 
lcl_ScanString(const sal_Unicode * p,OUString & rString,const sal_Unicode * pSeps,sal_Unicode cStr,DoubledQuoteMode eMode,bool & rbOverflowCell)647 static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rString,
648             const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell )
649 {
650     if (eMode != DoubledQuoteMode::KEEP_ALL)
651         p++;    //! jump over opening quote
652     bool bCont;
653     do
654     {
655         bCont = false;
656         const sal_Unicode* p0 = p;
657         for( ;; )
658         {
659             if( !*p )
660                 break;
661             if( *p == cStr )
662             {
663                 if ( *++p != cStr )
664                 {
665                     // break or continue for loop
666                     if (eMode == DoubledQuoteMode::ESCAPE)
667                     {
668                         sal_Unicode cDetectSep = 0xffff;    // No separator detection here.
669                         if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE)
670                             break;
671                         else
672                             continue;
673                     }
674                     else
675                         break;
676                 }
677                 // doubled quote char
678                 switch ( eMode )
679                 {
680                     case DoubledQuoteMode::KEEP_ALL :
681                         p++;            // both for us (not breaking for-loop)
682                     break;
683                     case DoubledQuoteMode::ESCAPE :
684                         p++;            // one for us (breaking for-loop)
685                         bCont = true;   // and more
686                     break;
687                 }
688                 if ( eMode == DoubledQuoteMode::ESCAPE )
689                     break;
690             }
691             else
692                 p++;
693         }
694         if ( p0 < p )
695         {
696             if (!lcl_appendLineData( rString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p)))
697                 rbOverflowCell = true;
698         }
699     } while ( bCont );
700     return p;
701 }
702 
lcl_UnescapeSylk(OUString & rString,SylkVersion eVersion)703 static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion )
704 {
705     // Older versions didn't escape the semicolon.
706     // Older versions quoted the string and doubled embedded quotes, but not
707     // the semicolons, which was plain wrong.
708     if (eVersion >= SylkVersion::OOO32)
709         rString = rString.replaceAll(";;", ";");
710     else
711         rString = rString.replaceAll("\"\"", "\"");
712 
713     rString = rString.replaceAll(SYLK_LF, "\n");
714 }
715 
lcl_ScanSylkString(const sal_Unicode * p,OUString & rString,SylkVersion eVersion)716 static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p,
717         OUString& rString, SylkVersion eVersion )
718 {
719     const sal_Unicode* pStartQuote = p;
720     const sal_Unicode* pEndQuote = nullptr;
721     while( *(++p) )
722     {
723         if( *p == '"' )
724         {
725             pEndQuote = p;
726             if (eVersion >= SylkVersion::OOO32)
727             {
728                 if (*(p+1) == ';')
729                 {
730                     if (*(p+2) == ';')
731                     {
732                         p += 2;     // escaped ';'
733                         pEndQuote = nullptr;
734                     }
735                     else
736                         break;      // end field
737                 }
738             }
739             else
740             {
741                 if (*(p+1) == '"')
742                 {
743                     ++p;            // escaped '"'
744                     pEndQuote = nullptr;
745                 }
746                 else if (*(p+1) == ';')
747                     break;          // end field
748             }
749         }
750     }
751     if (!pEndQuote)
752         pEndQuote = p;  // Take all data as string.
753     rString += OUString(pStartQuote + 1, sal::static_int_cast<sal_Int32>( pEndQuote - pStartQuote - 1 ) );
754     lcl_UnescapeSylk( rString, eVersion);
755     return p;
756 }
757 
lcl_ScanSylkFormula(const sal_Unicode * p,OUString & rString,SylkVersion eVersion)758 static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p,
759         OUString& rString, SylkVersion eVersion )
760 {
761     const sal_Unicode* pStart = p;
762     if (eVersion >= SylkVersion::OOO32)
763     {
764         while (*p)
765         {
766             if (*p == ';')
767             {
768                 if (*(p+1) == ';')
769                     ++p;        // escaped ';'
770                 else
771                     break;      // end field
772             }
773             ++p;
774         }
775         rString += OUString( pStart, sal::static_int_cast<sal_Int32>( p - pStart));
776         lcl_UnescapeSylk( rString, eVersion);
777     }
778     else
779     {
780         // Nasty. If in old versions the formula contained a semicolon, it was
781         // quoted and embedded quotes were doubled, but semicolons were not. If
782         // there was no semicolon, it could still contain quotes and doubled
783         // embedded quotes if it was something like ="a""b", which was saved as
784         // E"a""b" as is and has to be preserved, even if older versions
785         // couldn't even load it correctly. However, theoretically another
786         // field might follow and thus the line contain a semicolon again, such
787         // as ...;E"a""b";...
788         bool bQuoted = false;
789         if (*p == '"')
790         {
791             // May be a quoted expression or just a string constant expression
792             // with quotes.
793             while (*(++p))
794             {
795                 if (*p == '"')
796                 {
797                     if (*(p+1) == '"')
798                         ++p;            // escaped '"'
799                     else
800                         break;          // closing '"', had no ';' yet
801                 }
802                 else if (*p == ';')
803                 {
804                     bQuoted = true;     // ';' within quoted expression
805                     break;
806                 }
807             }
808             p = pStart;
809         }
810         if (bQuoted)
811             p = lcl_ScanSylkString( p, rString, eVersion);
812         else
813         {
814             while (*p && *p != ';')
815                 ++p;
816             rString += OUString( pStart, sal::static_int_cast<sal_Int32>( p - pStart));
817         }
818     }
819     return p;
820 }
821 
lcl_DoubleEscapeChar(OUString & rString,sal_Unicode cStr)822 static void lcl_DoubleEscapeChar( OUString& rString, sal_Unicode cStr )
823 {
824     sal_Int32 n = 0;
825     while( ( n = rString.indexOf( cStr, n ) ) != -1 )
826     {
827         rString = rString.replaceAt( n, 0, OUString(cStr) );
828         n += 2;
829     }
830 }
831 
lcl_WriteString(SvStream & rStrm,OUString & rString,sal_Unicode cQuote,sal_Unicode cEsc)832 static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc )
833 {
834     if (cEsc)
835         lcl_DoubleEscapeChar( rString, cEsc );
836 
837     if (cQuote)
838     {
839         rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote);
840     }
841 
842     ScImportExport::WriteUnicodeOrByteString( rStrm, rString );
843 }
844 
lcl_WriteSimpleString(SvStream & rStrm,const OUString & rString)845 static void lcl_WriteSimpleString( SvStream& rStrm, const OUString& rString )
846 {
847     ScImportExport::WriteUnicodeOrByteString( rStrm, rString );
848 }
849 
Text2Doc(SvStream & rStrm)850 bool ScImportExport::Text2Doc( SvStream& rStrm )
851 {
852     bool bOk = true;
853 
854     sal_Unicode pSeps[2];
855     pSeps[0] = cSep;
856     pSeps[1] = 0;
857 
858     ScSetStringParam aSetStringParam;
859     aSetStringParam.mbCheckLinkFormula = true;
860 
861     SCCOL nStartCol = aRange.aStart.Col();
862     SCROW nStartRow = aRange.aStart.Row();
863     SCCOL nEndCol = aRange.aEnd.Col();
864     SCROW nEndRow = aRange.aEnd.Row();
865     sal_uLong  nOldPos = rStrm.Tell();
866     rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
867     bool   bData = !bSingle;
868     if( !bSingle)
869         bOk = StartPaste();
870 
871     while( bOk )
872     {
873         OUString aLine;
874         OUString aCell;
875         SCROW nRow = nStartRow;
876         rStrm.Seek( nOldPos );
877         for( ;; )
878         {
879             rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit );
880             if( rStrm.eof() )
881                 break;
882             SCCOL nCol = nStartCol;
883             const sal_Unicode* p = aLine.getStr();
884             while( *p )
885             {
886                 aCell.clear();
887                 const sal_Unicode* q = p;
888                 while (*p && *p != cSep)
889                 {
890                     // Always look for a pairing quote and ignore separator in between.
891                     while (*p && *p == cStr)
892                         q = p = lcl_ScanString( p, aCell, pSeps, cStr, DoubledQuoteMode::KEEP_ALL, bOverflowCell );
893                     // All until next separator or quote.
894                     while (*p && *p != cSep && *p != cStr)
895                         ++p;
896                     if (!lcl_appendLineData( aCell, q, p))
897                         bOverflowCell = true;   // display warning on import
898                     q = p;
899                 }
900                 if (*p)
901                     ++p;
902                 if (ValidCol(nCol) && ValidRow(nRow) )
903                 {
904                     if( bSingle )
905                     {
906                         if (nCol>nEndCol) nEndCol = nCol;
907                         if (nRow>nEndRow) nEndRow = nRow;
908                     }
909                     if( bData && nCol <= nEndCol && nRow <= nEndRow )
910                         pDoc->SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam );
911                 }
912                 else                            // too many columns/rows
913                 {
914                     if (!ValidRow(nRow))
915                         bOverflowRow = true;    // display warning on import
916                     if (!ValidCol(nCol))
917                         bOverflowCol = true;    // display warning on import
918                 }
919                 ++nCol;
920             }
921             ++nRow;
922         }
923 
924         if( !bData )
925         {
926             aRange.aEnd.SetCol( nEndCol );
927             aRange.aEnd.SetRow( nEndRow );
928             bOk = StartPaste();
929             bData = true;
930         }
931         else
932             break;
933     }
934 
935     EndPaste();
936     if (bOk && mbImportBroadcast)
937     {
938         pDoc->BroadcastCells(aRange, SfxHintId::ScDataChanged);
939         pDocSh->PostDataChanged();
940     }
941 
942     return bOk;
943 }
944 
945 //  Extended Ascii-Import
946 
lcl_PutString(ScDocumentImport & rDocImport,bool bUseDocImport,SCCOL nCol,SCROW nRow,SCTAB nTab,const OUString & rStr,sal_uInt8 nColFormat,SvNumberFormatter * pFormatter,bool bDetectNumFormat,bool bSkipEmptyCells,const::utl::TransliterationWrapper & rTransliteration,CalendarWrapper & rCalendar,const::utl::TransliterationWrapper * pSecondTransliteration,CalendarWrapper * pSecondCalendar)947 static bool lcl_PutString(
948     ScDocumentImport& rDocImport, bool bUseDocImport,
949     SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat,
950     SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bSkipEmptyCells,
951     const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar,
952     const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar )
953 {
954     ScDocument* pDoc = &rDocImport.getDoc();
955     bool bMultiLine = false;
956     if ( nColFormat == SC_COL_SKIP || !ValidCol(nCol) || !ValidRow(nRow) )
957         return bMultiLine;
958     if ( rStr.isEmpty() )
959     {
960         if ( !bSkipEmptyCells )
961         {   // delete destination cell
962             if ( bUseDocImport )
963                 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr );
964             else
965                 pDoc->SetString( nCol, nRow, nTab, rStr );
966         }
967         return false;
968     }
969 
970     if ( nColFormat == SC_COL_TEXT )
971     {
972         double fDummy;
973         sal_uInt32 nIndex = 0;
974         if (pFormatter->IsNumberFormat(rStr, nIndex, fDummy))
975         {
976             // Set the format of this cell to Text.
977             sal_uInt32 nFormat = pFormatter->GetStandardFormat(SvNumFormatType::TEXT);
978             ScPatternAttr aNewAttrs(pDoc->GetPool());
979             SfxItemSet& rSet = aNewAttrs.GetItemSet();
980             rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) );
981             pDoc->ApplyPattern(nCol, nRow, nTab, aNewAttrs);
982 
983         }
984         if ( bUseDocImport )
985         {
986             if(ScStringUtil::isMultiline(rStr))
987             {
988                 ScFieldEditEngine& rEngine = pDoc->GetEditEngine();
989                 rEngine.SetText(rStr);
990                 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
991                 return true;
992             }
993             else
994             {
995                 rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr);
996                 return false;
997             }
998         } else
999         {
1000             pDoc->SetTextCell(ScAddress(nCol, nRow, nTab), rStr);
1001             return bMultiLine;
1002         }
1003     }
1004 
1005     if ( nColFormat == SC_COL_ENGLISH )
1006     {
1007         //! SetString with Extra-Flag ???
1008 
1009         SvNumberFormatter* pDocFormatter = pDoc->GetFormatTable();
1010         sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US);
1011         double fVal;
1012         if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) )
1013         {
1014             // Numberformat will not be set to English
1015             if ( bUseDocImport )
1016                 rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal );
1017             else
1018                 pDoc->SetValue( nCol, nRow, nTab, fVal );
1019             return bMultiLine;
1020         }
1021         // else, continue with SetString
1022     }
1023     else if ( nColFormat != SC_COL_STANDARD ) // Datumformats
1024     {
1025         const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t
1026         sal_Int32 nLen = rStr.getLength();
1027         sal_Int32 nStart[nMaxNumberParts];
1028         sal_Int32 nEnd[nMaxNumberParts];
1029 
1030         sal_uInt16 nDP, nMP, nYP;
1031         switch ( nColFormat )
1032         {
1033             case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; break;
1034             case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; break;
1035             case SC_COL_DMY:
1036             default:         nDP = 0; nMP = 1; nYP = 2; break;
1037         }
1038 
1039         sal_uInt16 nFound = 0;
1040         bool bInNum = false;
1041         for ( sal_Int32 nPos=0; nPos<nLen && (bInNum ||
1042                     nFound<nMaxNumberParts); nPos++ )
1043         {
1044             if (bInNum && nFound == 3 && nColFormat == SC_COL_YMD &&
1045                     nPos <= nStart[nFound]+2 && rStr[nPos] == 'T')
1046                 bInNum = false;     // ISO-8601: YYYY-MM-DDThh:mm...
1047             else if ((((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1))
1048                         && ScGlobal::pCharClass->isLetterNumeric( rStr, nPos))
1049                     || ScGlobal::pCharClass->isDigit( rStr, nPos))
1050             {
1051                 if (!bInNum)
1052                 {
1053                     bInNum = true;
1054                     nStart[nFound] = nPos;
1055                     ++nFound;
1056                 }
1057                 nEnd[nFound-1] = nPos;
1058             }
1059             else
1060                 bInNum = false;
1061         }
1062 
1063         if ( nFound == 1 )
1064         {
1065             //  try to break one number (without separators) into date fields
1066 
1067             sal_Int32 nDateStart = nStart[0];
1068             sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart;
1069 
1070             if ( nDateLen >= 5 && nDateLen <= 8 &&
1071                     ScGlobal::pCharClass->isNumeric( rStr.copy( nDateStart, nDateLen ) ) )
1072             {
1073                 //  6 digits: 2 each for day, month, year
1074                 //  8 digits: 4 for year, 2 each for day and month
1075                 //  5 or 7 digits: first field is shortened by 1
1076 
1077                 bool bLongYear = ( nDateLen >= 7 );
1078                 bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 );
1079 
1080                 sal_uInt16 nFieldStart = nDateStart;
1081                 for (sal_uInt16 nPos=0; nPos<3; nPos++)
1082                 {
1083                     sal_uInt16 nFieldEnd = nFieldStart + 1;     // default: 2 digits
1084                     if ( bLongYear && nPos == nYP )
1085                         nFieldEnd += 2;                     // 2 extra digits for long year
1086                     if ( bShortFirst && nPos == 0 )
1087                         --nFieldEnd;                        // first field shortened?
1088 
1089                     nStart[nPos] = nFieldStart;
1090                     nEnd[nPos]   = nFieldEnd;
1091                     nFieldStart  = nFieldEnd + 1;
1092                 }
1093                 nFound = 3;
1094             }
1095         }
1096 
1097         if ( nFound >= 3 )
1098         {
1099             using namespace ::com::sun::star;
1100             bool bSecondCal = false;
1101             sal_uInt16 nDay  = static_cast<sal_uInt16>(rStr.copy( nStart[nDP], nEnd[nDP]+1-nStart[nDP] ).toInt32());
1102             sal_uInt16 nYear = static_cast<sal_uInt16>(rStr.copy( nStart[nYP], nEnd[nYP]+1-nStart[nYP] ).toInt32());
1103             OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] );
1104             sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32());
1105             if (!nMonth)
1106             {
1107                 static const char aSepShortened[] = "SEP";
1108                 uno::Sequence< i18n::CalendarItem2 > xMonths;
1109                 sal_Int32 i, nMonthCount;
1110                 //  first test all month names from local international
1111                 xMonths = rCalendar.getMonths();
1112                 nMonthCount = xMonths.getLength();
1113                 for (i=0; i<nMonthCount && !nMonth; i++)
1114                 {
1115                     if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) ||
1116                          rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) )
1117                         nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1118                     else if ( i == 8 && rTransliteration.isEqual( "SEPT",
1119                                 xMonths[i].AbbrevName ) &&
1120                             rTransliteration.isEqual( aMStr, aSepShortened ) )
1121                     {   // correct English abbreviation is SEPT,
1122                         // but data mostly contains SEP only
1123                         nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1124                     }
1125                 }
1126                 //  if none found, then test english month names
1127                 if ( !nMonth && pSecondCalendar && pSecondTransliteration )
1128                 {
1129                     xMonths = pSecondCalendar->getMonths();
1130                     nMonthCount = xMonths.getLength();
1131                     for (i=0; i<nMonthCount && !nMonth; i++)
1132                     {
1133                         if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) ||
1134                              pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) )
1135                         {
1136                             nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1137                             bSecondCal = true;
1138                         }
1139                         else if ( i == 8 && pSecondTransliteration->isEqual(
1140                                     aMStr, aSepShortened ) )
1141                         {   // correct English abbreviation is SEPT,
1142                             // but data mostly contains SEP only
1143                             nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1144                             bSecondCal = true;
1145                         }
1146                     }
1147                 }
1148             }
1149 
1150             SvNumberFormatter* pDocFormatter = pDoc->GetFormatTable();
1151             if ( nYear < 100 )
1152                 nYear = pDocFormatter->ExpandTwoDigitYear( nYear );
1153 
1154             CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar);
1155             sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear();
1156             if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths )
1157             {
1158                 --nMonth;
1159                 pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay );
1160                 pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth );
1161                 pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear );
1162                 sal_Int16 nHour, nMinute, nSecond;
1163                 // #i14974# The imported value should have no fractional value, so set the
1164                 // time fields to zero (ICU calendar instance defaults to current date/time)
1165                 nHour = nMinute = nSecond = 0;
1166                 if (nFound > 3)
1167                     nHour = static_cast<sal_Int16>(rStr.copy( nStart[3], nEnd[3]+1-nStart[3]).toInt32());
1168                 if (nFound > 4)
1169                     nMinute = static_cast<sal_Int16>(rStr.copy( nStart[4], nEnd[4]+1-nStart[4]).toInt32());
1170                 if (nFound > 5)
1171                     nSecond = static_cast<sal_Int16>(rStr.copy( nStart[5], nEnd[5]+1-nStart[5]).toInt32());
1172                 // do not use calendar's milliseconds, to avoid fractional part truncation
1173                 double fFrac = 0.0;
1174                 if (nFound > 6)
1175                 {
1176                     sal_Unicode cDec = '.';
1177                     OUString aT = OUStringChar(cDec) + rStr.copy( nStart[6], nEnd[6]+1-nStart[6]);
1178                     rtl_math_ConversionStatus eStatus;
1179                     double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus );
1180                     if (eStatus == rtl_math_ConversionStatus_Ok)
1181                         fFrac = fV / 86400.0;
1182                 }
1183                 pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour );
1184                 pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute );
1185                 pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond );
1186                 pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 );
1187                 if ( pCalendar->isValid() )
1188                 {
1189                     double fDiff = DateTime(pDocFormatter->GetNullDate()) -
1190                         pCalendar->getEpochStart();
1191                     // #i14974# must use getLocalDateTime to get the same
1192                     // date values as set above
1193                     double fDays = pCalendar->getLocalDateTime() + fFrac;
1194                     fDays -= fDiff;
1195 
1196                     LanguageType eLatin, eCjk, eCtl;
1197                     pDoc->GetLanguage( eLatin, eCjk, eCtl );
1198                     LanguageType eDocLang = eLatin;     //! which language for date formats?
1199 
1200                     SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE);
1201                     sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang );
1202                     // maybe there is a special format including seconds or milliseconds
1203                     if (nFound > 5)
1204                         nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang);
1205 
1206                     ScAddress aPos(nCol,nRow,nTab);
1207                     if ( bUseDocImport )
1208                         rDocImport.setNumericCell(aPos, fDays);
1209                     else
1210                         pDoc->SetValue( aPos, fDays );
1211                     pDoc->SetNumberFormat(aPos, nFormat);
1212 
1213                     return bMultiLine;     // success
1214                 }
1215             }
1216         }
1217     }
1218 
1219     // Standard or date not determined -> SetString / EditCell
1220     if( rStr.indexOf( '\n' ) == -1 )
1221     {
1222         ScSetStringParam aParam;
1223         aParam.mpNumFormatter = pFormatter;
1224         aParam.mbDetectNumberFormat = bDetectNumFormat;
1225         aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly;
1226         aParam.mbHandleApostrophe = false;
1227         aParam.mbCheckLinkFormula = true;
1228         if ( bUseDocImport )
1229             rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam);
1230         else
1231             pDoc->SetString( nCol, nRow, nTab, rStr, &aParam );
1232     }
1233     else
1234     {
1235         bMultiLine = true;
1236         ScFieldEditEngine& rEngine = pDoc->GetEditEngine();
1237         rEngine.SetText(rStr);
1238         if ( bUseDocImport )
1239             rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1240         else
1241             pDoc->SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() );
1242     }
1243     return bMultiLine;
1244 }
1245 
lcl_GetFixed(const OUString & rLine,sal_Int32 nStart,sal_Int32 nNext,bool & rbIsQuoted,bool & rbOverflowCell)1246 static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext,
1247                      bool& rbIsQuoted, bool& rbOverflowCell )
1248 {
1249     sal_Int32 nLen = rLine.getLength();
1250     if (nNext > nLen)
1251         nNext = nLen;
1252     if ( nNext <= nStart )
1253         return EMPTY_OUSTRING;
1254 
1255     const sal_Unicode* pStr = rLine.getStr();
1256 
1257     sal_Int32 nSpace = nNext;
1258     while ( nSpace > nStart && pStr[nSpace-1] == ' ' )
1259         --nSpace;
1260 
1261     rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"');
1262     if (rbIsQuoted)
1263     {
1264         bool bFits = (nSpace - nStart - 3 <= SAL_MAX_UINT16);
1265         OSL_ENSURE( bFits, "lcl_GetFixed: line doesn't fit into data");
1266         if (bFits)
1267             return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2));
1268         else
1269         {
1270             rbOverflowCell = true;
1271             return rLine.copy(nStart+1, SAL_MAX_UINT16);
1272         }
1273     }
1274     else
1275     {
1276         bool bFits = (nSpace - nStart <= SAL_MAX_UINT16);
1277         OSL_ENSURE( bFits, "lcl_GetFixed: line doesn't fit into data");
1278         if (bFits)
1279             return rLine.copy(nStart, nSpace-nStart);
1280         else
1281         {
1282             rbOverflowCell = true;
1283             return rLine.copy(nStart, SAL_MAX_UINT16);
1284         }
1285     }
1286 }
1287 
ExtText2Doc(SvStream & rStrm)1288 bool ScImportExport::ExtText2Doc( SvStream& rStrm )
1289 {
1290     if (!pExtOptions)
1291         return Text2Doc( rStrm );
1292 
1293     sal_uInt64 const nOldPos = rStrm.Tell();
1294     sal_uInt64 const nRemaining = rStrm.remainingSize();
1295     std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh,
1296             ScResId( STR_LOAD_DOC ), nRemaining, true ));
1297     rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
1298 
1299     SCCOL nStartCol = aRange.aStart.Col();
1300     SCCOL nEndCol = aRange.aEnd.Col();
1301     SCROW nStartRow = aRange.aStart.Row();
1302     SCTAB nTab = aRange.aStart.Tab();
1303 
1304     bool    bFixed              = pExtOptions->IsFixedLen();
1305     OUString aSeps              = pExtOptions->GetFieldSeps();  // Need non-const for ReadCsvLine(),
1306     const sal_Unicode* pSeps    = aSeps.getStr();               // but it will be const anyway (asserted below).
1307     bool    bMerge              = pExtOptions->IsMergeSeps();
1308     bool    bRemoveSpace        = pExtOptions->IsRemoveSpace();
1309     sal_uInt16  nInfoCount      = pExtOptions->GetInfoCount();
1310     const sal_Int32* pColStart  = pExtOptions->GetColStart();
1311     const sal_uInt8* pColFormat = pExtOptions->GetColFormat();
1312     long nSkipLines             = pExtOptions->GetStartRow();
1313 
1314     LanguageType eDocLang = pExtOptions->GetLanguage();
1315     SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang);
1316     bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber();
1317     bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells();
1318 
1319     // For date recognition
1320     ::utl::TransliterationWrapper aTransliteration(
1321         comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE );
1322     aTransliteration.loadModuleIfNeeded( eDocLang );
1323     CalendarWrapper aCalendar( comphelper::getProcessComponentContext() );
1324     aCalendar.loadDefaultCalendar(
1325         LanguageTag::convertToLocale( eDocLang ) );
1326     std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration;
1327     std::unique_ptr< CalendarWrapper > pEnglishCalendar;
1328     if ( eDocLang != LANGUAGE_ENGLISH_US )
1329     {
1330         pEnglishTransliteration.reset(new ::utl::TransliterationWrapper (
1331             comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ));
1332         aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US );
1333         pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() ));
1334         pEnglishCalendar->loadDefaultCalendar(
1335             LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) );
1336     }
1337 
1338     OUString aLine;
1339     OUString aCell;
1340     sal_uInt16 i;
1341     SCROW nRow = nStartRow;
1342     sal_Unicode cDetectSep = 0xffff;    // No separator detection here.
1343 
1344     while(--nSkipLines>0)
1345     {
1346         aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored
1347         if ( rStrm.eof() )
1348             break;
1349     }
1350 
1351     // Determine range for Undo.
1352     // We don't need this during import of a file to a new sheet or document...
1353     bool bDetermineRange = bUndo;
1354 
1355     // Row heights don't need to be adjusted on the fly if EndPaste() is called
1356     // afterwards, which happens only if bDetermineRange. This variable also
1357     // survives the toggle of bDetermineRange down at the end of the do{} loop.
1358     bool bRangeIsDetermined = bDetermineRange;
1359 
1360     bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText();
1361 
1362     sal_uLong nOriginalStreamPos = rStrm.Tell();
1363 
1364     ScDocumentImport aDocImport(*pDoc);
1365     do
1366     {
1367         for( ;; )
1368         {
1369             aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep);
1370             if ( rStrm.eof() && aLine.isEmpty() )
1371                 break;
1372 
1373             assert(pSeps == aSeps.getStr());
1374 
1375             if ( nRow > pDoc->MaxRow() )
1376             {
1377                 bOverflowRow = true;    // display warning on import
1378                 break;  // for
1379             }
1380 
1381             EmbeddedNullTreatment( aLine);
1382 
1383             sal_Int32 nLineLen = aLine.getLength();
1384             SCCOL nCol = nStartCol;
1385             bool bMultiLine = false;
1386             if ( bFixed ) //  Fixed line length
1387             {
1388                 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1389                 // overflow if there is really data following to be put behind
1390                 // the last column, which doesn't happen if info is
1391                 // SC_COL_SKIP.
1392                 for ( i=0; i<nInfoCount && nCol <= pDoc->MaxCol()+1; i++ )
1393                 {
1394                     sal_uInt8 nFmt = pColFormat[i];
1395                     if (nFmt != SC_COL_SKIP)        // otherwise don't increment nCol either
1396                     {
1397                         if (nCol > pDoc->MaxCol())
1398                             bOverflowCol = true;    // display warning on import
1399                         else if (!bDetermineRange)
1400                         {
1401                             sal_Int32 nStart = pColStart[i];
1402                             sal_Int32 nNext = ( i+1 < nInfoCount ) ? pColStart[i+1] : nLineLen;
1403                             bool bIsQuoted = false;
1404                             aCell = lcl_GetFixed( aLine, nStart, nNext, bIsQuoted, bOverflowCell );
1405                             if (bIsQuoted && bQuotedAsText)
1406                                 nFmt = SC_COL_TEXT;
1407 
1408                             bMultiLine |= lcl_PutString(
1409                                 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1410                                 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration, aCalendar,
1411                                 pEnglishTransliteration.get(), pEnglishCalendar.get());
1412                         }
1413                         ++nCol;
1414                     }
1415                 }
1416             }
1417             else // Search for the separator
1418             {
1419                 SCCOL nSourceCol = 0;
1420                 sal_uInt16 nInfoStart = 0;
1421                 const sal_Unicode* p = aLine.getStr();
1422                 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1423                 // overflow if there is really data following to be put behind
1424                 // the last column, which doesn't happen if info is
1425                 // SC_COL_SKIP.
1426                 while (*p && nCol <= pDoc->MaxCol()+1)
1427                 {
1428                     bool bIsQuoted = false;
1429                     p = ScImportExport::ScanNextFieldFromString( p, aCell,
1430                             cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace );
1431 
1432                     sal_uInt8 nFmt = SC_COL_STANDARD;
1433                     for ( i=nInfoStart; i<nInfoCount; i++ )
1434                     {
1435                         if ( pColStart[i] == nSourceCol + 1 )       // pColStart is 1-based
1436                         {
1437                             nFmt = pColFormat[i];
1438                             nInfoStart = i + 1;     // ColInfos are in succession
1439                             break;  // for
1440                         }
1441                     }
1442                     if ( nFmt != SC_COL_SKIP )
1443                     {
1444                         if (nCol > pDoc->MaxCol())
1445                             bOverflowCol = true;    // display warning on import
1446                         else if (!bDetermineRange)
1447                         {
1448                             if (bIsQuoted && bQuotedAsText)
1449                                 nFmt = SC_COL_TEXT;
1450 
1451                             bMultiLine |= lcl_PutString(
1452                                 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1453                                 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration,
1454                                 aCalendar, pEnglishTransliteration.get(), pEnglishCalendar.get());
1455                         }
1456                         ++nCol;
1457                     }
1458 
1459                     ++nSourceCol;
1460                 }
1461             }
1462             if (nEndCol < nCol)
1463                 nEndCol = nCol;     //! points to the next free or even rDoc.MaxCol()+2
1464 
1465             if (!bDetermineRange)
1466             {
1467                 if (bMultiLine && !bRangeIsDetermined && pDocSh)
1468                     pDocSh->AdjustRowHeight( nRow, nRow, nTab);
1469                 xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos );
1470             }
1471             ++nRow;
1472         }
1473         // so far nRow/nEndCol pointed to the next free
1474         if (nRow > nStartRow)
1475             --nRow;
1476         if (nEndCol > nStartCol)
1477             nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), pDoc->MaxCol());
1478 
1479         if (bDetermineRange)
1480         {
1481             aRange.aEnd.SetCol( nEndCol );
1482             aRange.aEnd.SetRow( nRow );
1483 
1484             if ( !mbApi && nStartCol != nEndCol &&
1485                  !pDoc->IsBlockEmpty( nTab, nStartCol + 1, nStartRow, nEndCol, nRow ) )
1486             {
1487                 ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent());
1488                 if (aBox.run() != RET_YES)
1489                 {
1490                     return false;
1491                 }
1492             }
1493 
1494             rStrm.Seek( nOriginalStreamPos );
1495             nRow = nStartRow;
1496             if (!StartPaste())
1497             {
1498                 EndPaste(false);
1499                 return false;
1500             }
1501         }
1502 
1503         bDetermineRange = !bDetermineRange;     // toggle
1504     } while (!bDetermineRange);
1505     if ( !mbOverwriting )
1506         aDocImport.finalize();
1507 
1508     xProgress.reset();    // make room for AdjustRowHeight progress
1509     if (bRangeIsDetermined)
1510         EndPaste(false);
1511 
1512     if (mbImportBroadcast && !mbOverwriting)
1513     {
1514         pDoc->BroadcastCells(aRange, SfxHintId::ScDataChanged);
1515         pDocSh->PostDataChanged();
1516     }
1517     return true;
1518 }
1519 
EmbeddedNullTreatment(OUString & rStr)1520 void ScImportExport::EmbeddedNullTreatment( OUString & rStr )
1521 {
1522     // A nasty workaround for data with embedded NULL characters. As long as we
1523     // can't handle them properly as cell content (things assume 0-terminated
1524     // strings at too many places) simply strip all NULL characters from raw
1525     // data. Excel does the same. See fdo#57841 for sample data.
1526 
1527     // The normal case is no embedded NULL, check first before de-/allocating
1528     // ustring stuff.
1529     sal_Unicode cNull = 0;
1530     if (rStr.indexOf( cNull) >= 0)
1531     {
1532         rStr = rStr.replaceAll( OUString( &cNull, 1), "");
1533     }
1534 }
1535 
ScanNextFieldFromString(const sal_Unicode * p,OUString & rField,sal_Unicode cStr,const sal_Unicode * pSeps,bool bMergeSeps,bool & rbIsQuoted,bool & rbOverflowCell,bool bRemoveSpace)1536 const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p,
1537         OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted,
1538         bool& rbOverflowCell, bool bRemoveSpace )
1539 {
1540     rbIsQuoted = false;
1541     rField.clear();
1542     const sal_Unicode cBlank = ' ';
1543     if (!ScGlobal::UnicodeStrChr( pSeps, cBlank))
1544     {
1545         // Cope with broken generators that put leading blanks before a quoted
1546         // field, like "field1", "field2", "..."
1547         // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180
1548         const sal_Unicode* pb = p;
1549         while (*pb == cBlank)
1550             ++pb;
1551         if (*pb == cStr)
1552             p = pb;
1553     }
1554     if ( *p == cStr )           // String in quotes
1555     {
1556         rbIsQuoted = true;
1557         const sal_Unicode* p1;
1558         p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell );
1559         while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) )
1560             p++;
1561         // Append remaining unquoted and undelimited data (dirty, dirty) to
1562         // this field.
1563         if (p > p1)
1564         {
1565             const sal_Unicode* ptrim_f = p;
1566             if ( bRemoveSpace )
1567             {
1568                 while ( ptrim_f > p1  && ( *(ptrim_f - 1) == cBlank ) )
1569                     --ptrim_f;
1570             }
1571             if (!lcl_appendLineData( rField, p1, ptrim_f))
1572                 rbOverflowCell = true;
1573         }
1574         if( *p )
1575             p++;
1576     }
1577     else                        // up to delimiter
1578     {
1579         const sal_Unicode* p0 = p;
1580         while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) )
1581             p++;
1582         const sal_Unicode* ptrim_i = p0;
1583         const sal_Unicode* ptrim_f = p;  // [ptrim_i,ptrim_f) is cell data after trimming
1584         if ( bRemoveSpace )
1585         {
1586             while ( *ptrim_i == cBlank )
1587                 ++ptrim_i;
1588             while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) )
1589                 --ptrim_f;
1590         }
1591         if (!lcl_appendLineData( rField, ptrim_i, ptrim_f))
1592             rbOverflowCell = true;
1593         if( *p )
1594             p++;
1595     }
1596     if ( bMergeSeps )           // skip following delimiters
1597     {
1598         while ( *p && ScGlobal::UnicodeStrChr( pSeps, *p ) )
1599             p++;
1600     }
1601     return p;
1602 }
1603 
1604 namespace {
1605 
1606 /**
1607  * Check if a given string has any line break characters or separators.
1608  *
1609  * @param rStr string to inspect.
1610  * @param cSep separator character.
1611  */
hasLineBreaksOrSeps(const OUString & rStr,sal_Unicode cSep)1612 bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep )
1613 {
1614     const sal_Unicode* p = rStr.getStr();
1615     for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p)
1616     {
1617         sal_Unicode c = *p;
1618         if (c == cSep)
1619             // separator found.
1620             return true;
1621 
1622         switch (c)
1623         {
1624             case '\n':
1625             case '\r':
1626                 // line break found.
1627                 return true;
1628             default:
1629                 ;
1630         }
1631     }
1632     return false;
1633 }
1634 
1635 }
1636 
Doc2Text(SvStream & rStrm)1637 bool ScImportExport::Doc2Text( SvStream& rStrm )
1638 {
1639     SCCOL nCol;
1640     SCROW nRow;
1641     SCCOL nStartCol = aRange.aStart.Col();
1642     SCROW nStartRow = aRange.aStart.Row();
1643     SCTAB nStartTab = aRange.aStart.Tab();
1644     SCCOL nEndCol = aRange.aEnd.Col();
1645     SCROW nEndRow = aRange.aEnd.Row();
1646     SCTAB nEndTab = aRange.aEnd.Tab();
1647 
1648     if (!pDoc->GetClipParam().isMultiRange() && nStartTab == nEndTab)
1649         if (!pDoc->ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow ))
1650             return false;
1651 
1652     OUString aCellStr;
1653 
1654     bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF);
1655 
1656     // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab.
1657     std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 );
1658     for( SCCOL i = nStartCol; i <= nEndCol; ++i )
1659         pDoc->InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i );
1660     for (nRow = nStartRow; nRow <= nEndRow; nRow++)
1661     {
1662         if (bIncludeFiltered || !pDoc->RowFiltered( nRow, nStartTab ))
1663         {
1664             for (nCol = nStartCol; nCol <= nEndCol; nCol++)
1665             {
1666                 ScAddress aPos(nCol, nRow, nStartTab);
1667                 sal_uInt32 nNumFmt = pDoc->GetNumberFormat(aPos);
1668                 SvNumberFormatter* pFormatter = pDoc->GetFormatTable();
1669 
1670                 ScRefCellValue aCell(*pDoc, aPos, blockPos[ nCol - nStartCol ]);
1671                 switch (aCell.meType)
1672                 {
1673                     case CELLTYPE_FORMULA:
1674                     {
1675                         if (bFormulas)
1676                         {
1677                             aCell.mpFormula->GetFormula( aCellStr );
1678                             if( aCellStr.indexOf( cSep ) != -1 )
1679                                 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1680                             else
1681                                 lcl_WriteSimpleString( rStrm, aCellStr );
1682                         }
1683                         else
1684                         {
1685                             Color* pColor;
1686                             ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, pDoc);
1687 
1688                             bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1689                             if( bMultiLineText )
1690                             {
1691                                 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1692                                     aCellStr = aCellStr.replaceAll( "\n", " " );
1693                                 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1694                                     aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1695                             }
1696 
1697                             if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1698                                 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1699 
1700                             if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) )
1701                                 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1702                             else
1703                                 lcl_WriteSimpleString( rStrm, aCellStr );
1704                         }
1705                     }
1706                     break;
1707                     case CELLTYPE_VALUE:
1708                     {
1709                         Color* pColor;
1710                         ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, pDoc);
1711                         lcl_WriteSimpleString( rStrm, aCellStr );
1712                     }
1713                     break;
1714                     case CELLTYPE_NONE:
1715                     break;
1716                     default:
1717                     {
1718                         Color* pColor;
1719                         ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, pDoc);
1720 
1721                         bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1722                         if( bMultiLineText )
1723                         {
1724                             if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1725                                 aCellStr = aCellStr.replaceAll( "\n", " " );
1726                             else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1727                                 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1728                         }
1729 
1730                         if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1731                             aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1732 
1733                         if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) )
1734                             lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1735                         else
1736                             lcl_WriteSimpleString( rStrm, aCellStr );
1737                     }
1738                 }
1739                 if( nCol < nEndCol )
1740                     lcl_WriteSimpleString( rStrm, OUString(cSep) );
1741             }
1742             WriteUnicodeOrByteEndl( rStrm );
1743             if( rStrm.GetError() != ERRCODE_NONE )
1744                 break;
1745             if( nSizeLimit && rStrm.Tell() > nSizeLimit )
1746                 break;
1747         }
1748     }
1749 
1750     return rStrm.GetError() == ERRCODE_NONE;
1751 }
1752 
Sylk2Doc(SvStream & rStrm)1753 bool ScImportExport::Sylk2Doc( SvStream& rStrm )
1754 {
1755     bool bOk = true;
1756     bool bMyDoc = false;
1757     SylkVersion eVersion = SylkVersion::OTHER;
1758 
1759     // US-English separators for StringToDouble
1760     sal_Unicode const cDecSep = '.';
1761     sal_Unicode const cGrpSep = ',';
1762 
1763     SCCOL nStartCol = aRange.aStart.Col();
1764     SCROW nStartRow = aRange.aStart.Row();
1765     SCCOL nEndCol = aRange.aEnd.Col();
1766     SCROW nEndRow = aRange.aEnd.Row();
1767     sal_uLong nOldPos = rStrm.Tell();
1768     bool bData = !bSingle;
1769     ::std::vector< sal_uInt32 > aFormats;
1770 
1771     if( !bSingle)
1772         bOk = StartPaste();
1773 
1774     while( bOk )
1775     {
1776         OUString aLine;
1777         OUString aText;
1778         OString aByteLine;
1779         SCCOL nCol = nStartCol;
1780         SCROW nRow = nStartRow;
1781         SCCOL nRefCol = nCol;
1782         SCROW nRefRow = nRow;
1783         rStrm.Seek( nOldPos );
1784         for( ;; )
1785         {
1786             //! allow unicode
1787             rStrm.ReadLine( aByteLine );
1788             aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet());
1789             if( rStrm.eof() )
1790                 break;
1791             bool bInvalidCol = false;
1792             bool bInvalidRow = false;
1793             const sal_Unicode* p = aLine.getStr();
1794             sal_Unicode cTag = *p++;
1795             if( cTag == 'C' )       // Content
1796             {
1797                 if( *p++ != ';' )
1798                     return false;
1799 
1800                 bool bInvalidRefCol = false;
1801                 bool bInvalidRefRow = false;
1802                 while( *p )
1803                 {
1804                     sal_Unicode ch = *p++;
1805                     ch = ScGlobal::ToUpperAlpha( ch );
1806                     switch( ch )
1807                     {
1808                         case 'X':
1809                         {
1810                             bInvalidCol = false;
1811                             bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol);
1812                             if (bFail || nCol < 0 || pDoc->MaxCol() < nCol)
1813                             {
1814                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
1815                                 nCol = std::max<SCCOL>(0, std::min<SCCOL>(nCol, pDoc->MaxCol()));
1816                                 bInvalidCol = bOverflowCol = true;
1817                             }
1818                             break;
1819                         }
1820                         case 'Y':
1821                         {
1822                             bInvalidRow = false;
1823                             bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow);
1824                             if (bFail || nRow < 0 || nMaxImportRow < nRow)
1825                             {
1826                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
1827                                 nRow = std::max<SCROW>(0, std::min<SCROW>(nRow, nMaxImportRow));
1828                                 bInvalidRow = bOverflowRow = true;
1829                             }
1830                             break;
1831                         }
1832                         case 'C':
1833                         {
1834                             bInvalidRefCol = false;
1835                             bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nRefCol);
1836                             if (bFail || nRefCol < 0 || pDoc->MaxCol() < nRefCol)
1837                             {
1838                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol);
1839                                 nRefCol = std::max<SCCOL>(0, std::min<SCCOL>(nRefCol, pDoc->MaxCol()));
1840                                 bInvalidRefCol = bOverflowCol = true;
1841                             }
1842                             break;
1843                         }
1844                         case 'R':
1845                         {
1846                             bInvalidRefRow = false;
1847                             bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRefRow);
1848                             if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow)
1849                             {
1850                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow);
1851                                 nRefRow = std::max<SCROW>(0, std::min<SCROW>(nRefRow, nMaxImportRow));
1852                                 bInvalidRefRow = bOverflowRow = true;
1853                             }
1854                             break;
1855                         }
1856                         case 'K':
1857                         {
1858                             if( !bSingle &&
1859                                     ( nCol < nStartCol || nCol > nEndCol
1860                                       || nRow < nStartRow || nRow > nEndRow
1861                                       || nCol > pDoc->MaxCol() || nRow > nMaxImportRow
1862                                       || bInvalidCol || bInvalidRow ) )
1863                                 break;
1864                             if( !bData )
1865                             {
1866                                 if( nRow > nEndRow )
1867                                     nEndRow = nRow;
1868                                 if( nCol > nEndCol )
1869                                     nEndCol = nCol;
1870                                 break;
1871                             }
1872                             bool bText;
1873                             if( *p == '"' )
1874                             {
1875                                 bText = true;
1876                                 aText.clear();
1877                                 p = lcl_ScanSylkString( p, aText, eVersion);
1878                             }
1879                             else
1880                                 bText = false;
1881                             const sal_Unicode* q = p;
1882                             while( *q && *q != ';' )
1883                                 q++;
1884                             if ( !(*q == ';' && *(q+1) == 'I') && !bInvalidCol && !bInvalidRow )
1885                             {   // don't ignore value
1886                                 if( bText )
1887                                 {
1888                                     pDoc->EnsureTable(aRange.aStart.Tab());
1889                                     pDoc->SetTextCell(
1890                                         ScAddress(nCol, nRow, aRange.aStart.Tab()), aText);
1891                                 }
1892                                 else
1893                                 {
1894                                     double fVal = rtl_math_uStringToDouble( p,
1895                                             aLine.getStr() + aLine.getLength(),
1896                                             cDecSep, cGrpSep, nullptr, nullptr );
1897                                     pDoc->SetValue( nCol, nRow, aRange.aStart.Tab(), fVal );
1898                                 }
1899                             }
1900                         }
1901                         break;
1902                         case 'E':
1903                         case 'M':
1904                         {
1905                             if ( ch == 'M' )
1906                             {
1907                                 if ( nRefCol < nCol )
1908                                     nRefCol = nCol;
1909                                 if ( nRefRow < nRow )
1910                                     nRefRow = nRow;
1911                                 if ( !bData )
1912                                 {
1913                                     if( nRefRow > nEndRow )
1914                                         nEndRow = nRefRow;
1915                                     if( nRefCol > nEndCol )
1916                                         nEndCol = nRefCol;
1917                                 }
1918                             }
1919                             if( !bMyDoc || !bData )
1920                                 break;
1921                             aText = "=";
1922                             p = lcl_ScanSylkFormula( p, aText, eVersion);
1923 
1924                             if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow)))
1925                                 break;
1926 
1927                             ScAddress aPos( nCol, nRow, aRange.aStart.Tab() );
1928                             /* FIXME: do we want GRAM_ODFF_A1 instead? At the
1929                              * end it probably should be GRAM_ODFF_R1C1, since
1930                              * R1C1 is what Excel writes in SYLK, or even
1931                              * better GRAM_ENGLISH_XL_R1C1. */
1932                             const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1;
1933                             ScCompiler aComp( pDoc, aPos, eGrammar);
1934                             std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray
1935                             pDoc->CheckLinkFormulaNeedingCheck(*xCode);
1936                             if ( ch == 'M' )
1937                             {
1938                                 ScMarkData aMark(pDoc->MaxRow(), pDoc->MaxCol());
1939                                 aMark.SelectTable( aPos.Tab(), true );
1940                                 pDoc->InsertMatrixFormula( nCol, nRow, nRefCol,
1941                                     nRefRow, aMark, EMPTY_OUSTRING, xCode.get() );
1942                             }
1943                             else
1944                             {
1945                                 ScFormulaCell* pFCell = new ScFormulaCell(
1946                                         pDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE);
1947                                 pDoc->SetFormulaCell(aPos, pFCell);
1948                             }
1949                         }
1950                         break;
1951                     }
1952                     while( *p && *p != ';' )
1953                         p++;
1954                     if( *p )
1955                         p++;
1956                 }
1957             }
1958             else if( cTag == 'F' )      // Format
1959             {
1960                 if( *p++ != ';' )
1961                     return false;
1962                 sal_Int32 nFormat = -1;
1963                 while( *p )
1964                 {
1965                     sal_Unicode ch = *p++;
1966                     ch = ScGlobal::ToUpperAlpha( ch );
1967                     switch( ch )
1968                     {
1969                         case 'X':
1970                         {
1971                             bInvalidCol = false;
1972                             bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol);
1973                             if (bFail || nCol < 0 || pDoc->MaxCol() < nCol)
1974                             {
1975                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
1976                                 nCol = std::max<SCCOL>(0, std::min<SCCOL>(nCol, pDoc->MaxCol()));
1977                                 bInvalidCol = bOverflowCol = true;
1978                             }
1979                             break;
1980                         }
1981                         case 'Y':
1982                         {
1983                             bInvalidRow = false;
1984                             bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow);
1985                             if (bFail || nRow < 0 || nMaxImportRow < nRow)
1986                             {
1987                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
1988                                 nRow = std::max<SCROW>(0, std::min<SCROW>(nRow, nMaxImportRow));
1989                                 bInvalidRow = bOverflowRow = true;
1990                             }
1991                             break;
1992                         }
1993                         case 'P' :
1994                             if ( bData )
1995                             {
1996                                 // F;P<n> sets format code of P;P<code> at
1997                                 // current position, or at ;X;Y if specified.
1998                                 // Note that ;X;Y may appear after ;P
1999                                 const sal_Unicode* p0 = p;
2000                                 while( *p && *p != ';' )
2001                                     p++;
2002                                 OUString aNumber(p0, p - p0);
2003                                 nFormat = aNumber.toInt32();
2004                             }
2005                             break;
2006                     }
2007                     while( *p && *p != ';' )
2008                         p++;
2009                     if( *p )
2010                         p++;
2011                 }
2012                 if ( !bData )
2013                 {
2014                     if( nRow > nEndRow )
2015                         nEndRow = nRow;
2016                     if( nCol > nEndCol )
2017                         nEndCol = nCol;
2018                 }
2019                 if ( 0 <= nFormat && nFormat < static_cast<sal_Int32>(aFormats.size()) && !bInvalidCol && !bInvalidRow )
2020                 {
2021                     sal_uInt32 nKey = aFormats[nFormat];
2022                     pDoc->ApplyAttr( nCol, nRow, aRange.aStart.Tab(),
2023                             SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) );
2024                 }
2025             }
2026             else if( cTag == 'P' )
2027             {
2028                 if ( bData && *p == ';' && *(p+1) == 'P' )
2029                 {
2030                     OUString aCode( p+2 );
2031                     // unescape doubled semicolons
2032                     aCode = aCode.replaceAll(";;", ";");
2033                     // get rid of Xcl escape characters
2034                     aCode = aCode.replaceAll("\x1b", "");
2035                     sal_Int32 nCheckPos;
2036                     SvNumFormatType nType;
2037                     sal_uInt32 nKey;
2038                     pDoc->GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey,
2039                                                                 LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false);
2040                     if ( nCheckPos )
2041                         nKey = 0;
2042                     aFormats.push_back( nKey );
2043                 }
2044             }
2045             else if( cTag == 'I' && *p == 'D' )
2046             {
2047                 aLine = aLine.copy(4);
2048                 if (aLine == "CALCOOO32")
2049                     eVersion = SylkVersion::OOO32;
2050                 else if (aLine == "SCALC3")
2051                     eVersion = SylkVersion::SCALC3;
2052                 bMyDoc = (eVersion <= SylkVersion::OWN);
2053             }
2054             else if( cTag == 'E' )                      // End
2055                 break;
2056         }
2057         if( !bData )
2058         {
2059             aRange.aEnd.SetCol( nEndCol );
2060             aRange.aEnd.SetRow( nEndRow );
2061             bOk = StartPaste();
2062             bData = true;
2063         }
2064         else
2065             break;
2066     }
2067 
2068     EndPaste();
2069     return bOk;
2070 }
2071 
Doc2Sylk(SvStream & rStrm)2072 bool ScImportExport::Doc2Sylk( SvStream& rStrm )
2073 {
2074     SCCOL nCol;
2075     SCROW nRow;
2076     SCCOL nStartCol = aRange.aStart.Col();
2077     SCROW nStartRow = aRange.aStart.Row();
2078     SCCOL nEndCol = aRange.aEnd.Col();
2079     SCROW nEndRow = aRange.aEnd.Row();
2080     OUString aCellStr;
2081     OUString aValStr;
2082     lcl_WriteSimpleString( rStrm, "ID;PCALCOOO32" );
2083     WriteUnicodeOrByteEndl( rStrm );
2084 
2085     for (nRow = nStartRow; nRow <= nEndRow; nRow++)
2086     {
2087         for (nCol = nStartCol; nCol <= nEndCol; nCol++)
2088         {
2089             OUString aBufStr;
2090             double nVal;
2091             bool bForm = false;
2092             SCROW r = nRow - nStartRow + 1;
2093             SCCOL c = nCol - nStartCol + 1;
2094             ScRefCellValue aCell(*pDoc, ScAddress(nCol, nRow, aRange.aStart.Tab()));
2095             CellType eType = aCell.meType;
2096             switch( eType )
2097             {
2098                 case CELLTYPE_FORMULA:
2099                     bForm = bFormulas;
2100                     if( pDoc->HasValueData( nCol, nRow, aRange.aStart.Tab()) )
2101                         goto hasvalue;
2102                     else
2103                         goto hasstring;
2104 
2105                 case CELLTYPE_VALUE:
2106                 hasvalue:
2107                     pDoc->GetValue( nCol, nRow, aRange.aStart.Tab(), nVal );
2108 
2109                     aValStr = ::rtl::math::doubleToUString( nVal,
2110                             rtl_math_StringFormat_Automatic,
2111                             rtl_math_DecimalPlaces_Max, '.', true );
2112 
2113                     aBufStr = "C;X"
2114                             + OUString::number( c )
2115                             + ";Y"
2116                             + OUString::number( r )
2117                             + ";K"
2118                             + aValStr;
2119                     lcl_WriteSimpleString( rStrm, aBufStr );
2120                     goto checkformula;
2121 
2122                 case CELLTYPE_STRING:
2123                 case CELLTYPE_EDIT:
2124                 hasstring:
2125                     aCellStr = pDoc->GetString(nCol, nRow, aRange.aStart.Tab());
2126                     aCellStr = aCellStr.replaceAll("\n", SYLK_LF);
2127 
2128                     aBufStr = "C;X"
2129                             + OUString::number( c )
2130                             + ";Y"
2131                             + OUString::number( r )
2132                             + ";K";
2133                     lcl_WriteSimpleString( rStrm, aBufStr );
2134                     lcl_WriteString( rStrm, aCellStr, '"', ';' );
2135 
2136                 checkformula:
2137                     if( bForm )
2138                     {
2139                         const ScFormulaCell* pFCell = aCell.mpFormula;
2140                         switch ( pFCell->GetMatrixFlag() )
2141                         {
2142                             case ScMatrixMode::Reference :
2143                                 aCellStr.clear();
2144                             break;
2145                             default:
2146                                 OUString aOUCellStr;
2147                                 pFCell->GetFormula( aOUCellStr,formula::FormulaGrammar::GRAM_PODF_A1);
2148                                 aCellStr = aOUCellStr;
2149                                 /* FIXME: do we want GRAM_ODFF_A1 instead? At
2150                                  * the end it probably should be
2151                                  * GRAM_ODFF_R1C1, since R1C1 is what Excel
2152                                  * writes in SYLK, or even better
2153                                  * GRAM_ENGLISH_XL_R1C1. */
2154                         }
2155                         if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE &&
2156                                 aCellStr.startsWith("{") &&
2157                                 aCellStr.endsWith("}") )
2158                         {   // cut off matrix {} characters
2159                             aCellStr = aCellStr.copy(1, aCellStr.getLength()-2);
2160                         }
2161                         if ( aCellStr[0] == '=' )
2162                             aCellStr = aCellStr.copy(1);
2163                         OUString aPrefix;
2164                         switch ( pFCell->GetMatrixFlag() )
2165                         {
2166                             case ScMatrixMode::Formula :
2167                             {   // diff expression with 'M' M$-extension
2168                                 SCCOL nC;
2169                                 SCROW nR;
2170                                 pFCell->GetMatColsRows( nC, nR );
2171                                 nC += c - 1;
2172                                 nR += r - 1;
2173                                 aPrefix = ";R"
2174                                         + OUString::number( nR )
2175                                         + ";C"
2176                                         + OUString::number( nC )
2177                                         + ";M";
2178                             }
2179                             break;
2180                             case ScMatrixMode::Reference :
2181                             {   // diff expression with 'I' M$-extension
2182                                 ScAddress aPos;
2183                                 (void)pFCell->GetMatrixOrigin( aPos );
2184                                 aPrefix = ";I;R"
2185                                         + OUString::number( aPos.Row() - nStartRow + 1 )
2186                                         + ";C"
2187                                         + OUString::number( aPos.Col() - nStartCol + 1 );
2188                             }
2189                             break;
2190                             default:
2191                                 // formula Expression
2192                                 aPrefix = ";E";
2193                         }
2194                         lcl_WriteSimpleString( rStrm, aPrefix );
2195                         if ( !aCellStr.isEmpty() )
2196                             lcl_WriteString( rStrm, aCellStr, 0, ';' );
2197                     }
2198                     WriteUnicodeOrByteEndl( rStrm );
2199                     break;
2200 
2201                 default:
2202                 {
2203                     // added to avoid warnings
2204                 }
2205             }
2206         }
2207     }
2208     lcl_WriteSimpleString( rStrm, OUString( 'E' ) );
2209     WriteUnicodeOrByteEndl( rStrm );
2210     return rStrm.GetError() == ERRCODE_NONE;
2211 }
2212 
Doc2HTML(SvStream & rStrm,const OUString & rBaseURL)2213 bool ScImportExport::Doc2HTML( SvStream& rStrm, const OUString& rBaseURL )
2214 {
2215     // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options
2216     ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, pDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll,
2217         aStreamPath, aNonConvertibleChars, maFilterOptions );
2218     return rStrm.GetError() == ERRCODE_NONE;
2219 }
2220 
Doc2RTF(SvStream & rStrm)2221 bool ScImportExport::Doc2RTF( SvStream& rStrm )
2222 {
2223     //  rtl_TextEncoding is ignored in ScExportRTF
2224     ScFormatFilter::Get().ScExportRTF( rStrm, pDoc, aRange, RTL_TEXTENCODING_DONTKNOW );
2225     return rStrm.GetError() == ERRCODE_NONE;
2226 }
2227 
Doc2Dif(SvStream & rStrm)2228 bool ScImportExport::Doc2Dif( SvStream& rStrm )
2229 {
2230     // for DIF in the clipboard, IBM_850 is always used
2231     ScFormatFilter::Get().ScExportDif( rStrm, pDoc, aRange, RTL_TEXTENCODING_IBM_850 );
2232     return true;
2233 }
2234 
Dif2Doc(SvStream & rStrm)2235 bool ScImportExport::Dif2Doc( SvStream& rStrm )
2236 {
2237     SCTAB nTab = aRange.aStart.Tab();
2238     ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) );
2239     pImportDoc->InitUndo( pDoc, nTab, nTab );
2240 
2241     // for DIF in the clipboard, IBM_850 is always used
2242     ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 );
2243 
2244     SCCOL nEndCol;
2245     SCROW nEndRow;
2246     pImportDoc->GetCellArea( nTab, nEndCol, nEndRow );
2247     // if there are no cells in the imported content, nEndCol/nEndRow may be before the start
2248     if ( nEndCol < aRange.aStart.Col() )
2249         nEndCol = aRange.aStart.Col();
2250     if ( nEndRow < aRange.aStart.Row() )
2251         nEndRow = aRange.aStart.Row();
2252     aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab );
2253 
2254     bool bOk = StartPaste();
2255     if (bOk)
2256     {
2257         InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2258         pDoc->DeleteAreaTab( aRange, nFlags );
2259         pImportDoc->CopyToDocument(aRange, nFlags, false, *pDoc);
2260         EndPaste();
2261     }
2262 
2263     return bOk;
2264 }
2265 
RTF2Doc(SvStream & rStrm,const OUString & rBaseURL)2266 bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL )
2267 {
2268     std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( pDoc, aRange );
2269     if (!pImp)
2270         return false;
2271     pImp->Read( rStrm, rBaseURL );
2272     aRange = pImp->GetRange();
2273 
2274     bool bOk = StartPaste();
2275     if (bOk)
2276     {
2277         InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2278         pDoc->DeleteAreaTab( aRange, nFlags );
2279         pImp->WriteToDocument();
2280         EndPaste();
2281     }
2282     return bOk;
2283 }
2284 
HTML2Doc(SvStream & rStrm,const OUString & rBaseURL)2285 bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL )
2286 {
2287     std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( pDoc, rBaseURL, aRange);
2288     if (!pImp)
2289         return false;
2290     pImp->Read( rStrm, rBaseURL );
2291     aRange = pImp->GetRange();
2292 
2293     bool bOk = StartPaste();
2294     if (bOk)
2295     {
2296         // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in
2297         // a Draw Layer but no Draw View -> create Draw Layer and View here
2298         if (pDocSh)
2299             pDocSh->MakeDrawLayer();
2300 
2301         InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2302         pDoc->DeleteAreaTab( aRange, nFlags );
2303 
2304         if (pExtOptions)
2305         {
2306             // Pick up import options if available.
2307             LanguageType eLang = pExtOptions->GetLanguage();
2308             SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang);
2309             bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber();
2310             pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber);
2311         }
2312         else
2313             // Regular import, with no options.
2314             pImp->WriteToDocument();
2315 
2316         EndPaste();
2317     }
2318     return bOk;
2319 }
2320 
2321 #ifndef DISABLE_DYNLOADING
2322 
thisModule()2323 extern "C" { static void thisModule() {} }
2324 
2325 #else
2326 
2327 extern "C" {
2328 ScFormatFilterPlugin* ScFilterCreate();
2329 }
2330 
2331 #endif
2332 
2333 typedef ScFormatFilterPlugin * (*FilterFn)();
Get()2334 ScFormatFilterPlugin &ScFormatFilter::Get()
2335 {
2336     static ScFormatFilterPlugin *plugin = [&]()
2337     {
2338 #ifndef DISABLE_DYNLOADING
2339         OUString sFilterLib(SVLIBRARY("scfilt"));
2340         static ::osl::Module aModule;
2341         bool bLoaded = aModule.is();
2342         if (!bLoaded)
2343             bLoaded = aModule.loadRelative(&thisModule, sFilterLib);
2344         if (!bLoaded)
2345             bLoaded = aModule.load(sFilterLib);
2346         if (bLoaded)
2347         {
2348             oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" );
2349             if (fn != nullptr)
2350                 return reinterpret_cast<FilterFn>(fn)();
2351         }
2352         assert(false);
2353         return static_cast<ScFormatFilterPlugin*>(nullptr);
2354 #else
2355         return ScFilterCreate();
2356 #endif
2357     }();
2358 
2359     return *plugin;
2360 }
2361 
2362 // Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
2363 // array.
lcl_UnicodeStrChr(const sal_Unicode * pStr,sal_Unicode c)2364 static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
2365         sal_Unicode c )
2366 {
2367     while (*pStr)
2368     {
2369         if (*pStr == c)
2370             return pStr;
2371         ++pStr;
2372     }
2373     return nullptr;
2374 }
2375 
ScImportStringStream(const OUString & rStr)2376 ScImportStringStream::ScImportStringStream( const OUString& rStr )
2377     : SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()),
2378             rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ)
2379 {
2380     SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
2381 #ifdef OSL_BIGENDIAN
2382     SetEndian(SvStreamEndian::BIG);
2383 #else
2384     SetEndian(SvStreamEndian::LITTLE);
2385 #endif
2386 }
2387 
ReadCsvLine(SvStream & rStream,bool bEmbeddedLineBreak,OUString & rFieldSeparators,sal_Unicode cFieldQuote,sal_Unicode & rcDetectSep)2388 OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
2389         OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep )
2390 {
2391     enum RetryState
2392     {
2393         FORBID,
2394         ALLOW,
2395         RETRY,
2396         RETRIED
2397     } eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID);
2398 
2399     sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0);
2400 
2401 Label_RetryWithNewSep:
2402 
2403     if (eRetryState == RetryState::RETRY)
2404     {
2405         eRetryState = RetryState::RETRIED;
2406         rStream.Seek( nStreamPos);
2407     }
2408 
2409     OUString aStr;
2410     rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2411 
2412     if (bEmbeddedLineBreak)
2413     {
2414         const sal_Unicode* pSeps = rFieldSeparators.getStr();
2415 
2416         QuoteType eQuoteState = FIELDEND_QUOTE;
2417         bool bFieldStart = true;
2418 
2419         sal_Int32 nLastOffset = 0;
2420         sal_Int32 nQuotes = 0;
2421         while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit)
2422         {
2423             const sal_Unicode *p, *pStart;
2424             p = pStart = aStr.getStr();
2425             p += nLastOffset;
2426             while (*p)
2427             {
2428                 if (nQuotes)
2429                 {
2430                     if (*p == cFieldQuote)
2431                     {
2432                         if (bFieldStart)
2433                         {
2434                             ++nQuotes;
2435                             bFieldStart = false;
2436                             eQuoteState = FIELDSTART_QUOTE;
2437                         }
2438                         // Do not detect a FIELDSTART_QUOTE if not in
2439                         // bFieldStart mode, in which case for unquoted content
2440                         // we are in FIELDEND_QUOTE state.
2441                         else if (eQuoteState != FIELDEND_QUOTE)
2442                         {
2443                             eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep);
2444 
2445                             if (eRetryState == RetryState::ALLOW && rcDetectSep == ' ')
2446                             {
2447                                 eRetryState = RetryState::RETRY;
2448                                 rFieldSeparators += OUString(' ');
2449                                 goto Label_RetryWithNewSep;
2450                             }
2451 
2452                             // DONTKNOW_QUOTE is an embedded unescaped quote we
2453                             // don't count for pairing.
2454                             if (eQuoteState != DONTKNOW_QUOTE)
2455                                 ++nQuotes;
2456                         }
2457                     }
2458                     else if (eQuoteState == FIELDEND_QUOTE)
2459                     {
2460                         if (bFieldStart)
2461                             // If blank is a separator it starts a field, if it
2462                             // is not and thus maybe leading before quote we
2463                             // are still at start of field regarding quotes.
2464                             bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2465                         else
2466                             bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2467                     }
2468                 }
2469                 else
2470                 {
2471                     if (*p == cFieldQuote && bFieldStart)
2472                     {
2473                         nQuotes = 1;
2474                         eQuoteState = FIELDSTART_QUOTE;
2475                         bFieldStart = false;
2476                     }
2477                     else if (eQuoteState == FIELDEND_QUOTE)
2478                     {
2479                         // This also skips leading blanks at beginning of line
2480                         // if followed by a quote. It's debatable whether we
2481                         // actually want that or not, but congruent with what
2482                         // ScanNextFieldFromString() does.
2483                         if (bFieldStart)
2484                             bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2485                         else
2486                             bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2487                     }
2488                 }
2489                 // A quote character inside a field content does not start
2490                 // a quote.
2491                 ++p;
2492             }
2493 
2494             if (nQuotes % 2 == 0)
2495                 // We still have a (theoretical?) problem here if due to
2496                 // nArbitraryLineLengthLimit we split a string right between a
2497                 // doubled quote pair.
2498                 break;
2499             else
2500             {
2501                 nLastOffset = aStr.getLength();
2502                 OUString aNext;
2503                 rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2504                 aStr += "\n" + aNext;
2505             }
2506         }
2507     }
2508     return aStr;
2509 }
2510 
2511 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
2512