1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <comphelper/processfactory.hxx>
21 #include <i18nlangtag/languagetag.hxx>
22 #include <sot/formats.hxx>
23 #include <sfx2/mieclip.hxx>
24 #include <com/sun/star/i18n/CalendarFieldIndex.hpp>
25 #include <sal/log.hxx>
26 #include <unotools/charclass.hxx>
27 #include <osl/module.hxx>
28
29 #include <global.hxx>
30 #include <docsh.hxx>
31 #include <undoblk.hxx>
32 #include <rangenam.hxx>
33 #include <tabvwsh.hxx>
34 #include <filter.hxx>
35 #include <asciiopt.hxx>
36 #include <formulacell.hxx>
37 #include <cellform.hxx>
38 #include <progress.hxx>
39 #include <scitems.hxx>
40 #include <editable.hxx>
41 #include <compiler.hxx>
42 #include <warnbox.hxx>
43 #include <clipparam.hxx>
44 #include <impex.hxx>
45 #include <editutil.hxx>
46 #include <patattr.hxx>
47 #include <docpool.hxx>
48 #include <stringutil.hxx>
49 #include <cellvalue.hxx>
50 #include <tokenarray.hxx>
51 #include <documentimport.hxx>
52 #include <refundo.hxx>
53 #include <mtvelements.hxx>
54
55 #include <globstr.hrc>
56 #include <scresid.hxx>
57 #include <o3tl/safeint.hxx>
58 #include <tools/svlibrary.h>
59 #include <unotools/configmgr.hxx>
60 #include <vcl/svapp.hxx>
61 #include <vcl/weld.hxx>
62 #include <editeng/editobj.hxx>
63
64 #include <memory>
65 #include <osl/endian.h>
66
67 // We don't want to end up with 2GB read in one line just because of malformed
68 // multiline fields, so chop it _somewhere_, which is twice supported columns
69 // times maximum cell content length, 2*1024*64K=128M, and because it's
70 // sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of luck
71 // anyway.
72 static const sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * 65536;
73
74 namespace
75 {
76 const char SYLK_LF[] = "\x1b :";
77
lcl_IsEndianSwap(const SvStream & rStrm)78 bool lcl_IsEndianSwap( const SvStream& rStrm )
79 {
80 #ifdef OSL_BIGENDIAN
81 return rStrm.GetEndian() != SvStreamEndian::BIG;
82 #else
83 return rStrm.GetEndian() != SvStreamEndian::LITTLE;
84 #endif
85 }
86 }
87
88 enum class SylkVersion
89 {
90 SCALC3, // Wrote wrongly quoted strings and unescaped semicolons.
91 OOO32, // Correct strings, plus multiline content.
92 OWN, // Place our new versions, if any, before this value.
93 OTHER // Assume that aliens wrote correct strings.
94 };
95
96 // Whole document without Undo
ScImportExport(ScDocument * p)97 ScImportExport::ScImportExport( ScDocument* p )
98 : pDocSh( dynamic_cast< ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ),
99 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? pDoc->MaxRow() : SCROWS32K),
100 cSep( '\t' ), cStr( '"' ),
101 bFormulas( false ), bIncludeFiltered( true ),
102 bAll( true ), bSingle( true ), bUndo( false ),
103 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
104 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
105 mExportTextOptions()
106 {
107 pUndoDoc = nullptr;
108 pExtOptions = nullptr;
109 }
110
111 // Insert am current cell without range(es)
ScImportExport(ScDocument * p,const ScAddress & rPt)112 ScImportExport::ScImportExport( ScDocument* p, const ScAddress& rPt )
113 : pDocSh( dynamic_cast< ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ),
114 aRange( rPt ),
115 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? pDoc->MaxRow() : SCROWS32K),
116 cSep( '\t' ), cStr( '"' ),
117 bFormulas( false ), bIncludeFiltered( true ),
118 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
119 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
120 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
121 mExportTextOptions()
122 {
123 pUndoDoc = nullptr;
124 pExtOptions = nullptr;
125 }
126
127 // ctor with a range is only used for export
128 //! ctor with a string (and bSingle=true) is also used for DdeSetData
ScImportExport(ScDocument * p,const ScRange & r)129 ScImportExport::ScImportExport( ScDocument* p, const ScRange& r )
130 : pDocSh( dynamic_cast<ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ),
131 aRange( r ),
132 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? pDoc->MaxRow() : SCROWS32K),
133 cSep( '\t' ), cStr( '"' ),
134 bFormulas( false ), bIncludeFiltered( true ),
135 bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ),
136 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
137 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
138 mExportTextOptions()
139 {
140 pUndoDoc = nullptr;
141 pExtOptions = nullptr;
142 // Only one sheet (table) supported
143 aRange.aEnd.SetTab( aRange.aStart.Tab() );
144 }
145
146 // Evaluate input string - either range, cell or the whole document (when error)
147 // If a View exists, the TabNo of the view will be used.
ScImportExport(ScDocument * p,const OUString & rPos)148 ScImportExport::ScImportExport( ScDocument* p, const OUString& rPos )
149 : pDocSh( dynamic_cast< ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ),
150 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? pDoc->MaxRow() : SCROWS32K),
151 cSep( '\t' ), cStr( '"' ),
152 bFormulas( false ), bIncludeFiltered( true ),
153 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
154 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
155 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
156 mExportTextOptions()
157 {
158 pUndoDoc = nullptr;
159 pExtOptions = nullptr;
160
161 SCTAB nTab = ScDocShell::GetCurTab();
162 aRange.aStart.SetTab( nTab );
163 OUString aPos( rPos );
164 // Named range?
165 ScRangeName* pRange = pDoc->GetRangeName();
166 if (pRange)
167 {
168 const ScRangeData* pData = pRange->findByUpperName(ScGlobal::pCharClass->uppercase(aPos));
169 if (pData)
170 {
171 if( pData->HasType( ScRangeData::Type::RefArea )
172 || pData->HasType( ScRangeData::Type::AbsArea )
173 || pData->HasType( ScRangeData::Type::AbsPos ) )
174 {
175 pData->GetSymbol(aPos);
176 }
177 }
178 }
179 formula::FormulaGrammar::AddressConvention eConv = pDoc->GetAddressConvention();
180 // Range?
181 if (aRange.Parse(aPos, pDoc, eConv) & ScRefFlags::VALID)
182 bSingle = false;
183 // Cell?
184 else if (aRange.aStart.Parse(aPos, pDoc, eConv) & ScRefFlags::VALID)
185 aRange.aEnd = aRange.aStart;
186 else
187 bAll = true;
188 }
189
~ScImportExport()190 ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE
191 {
192 pUndoDoc.reset();
193 pExtOptions.reset();
194 }
195
SetExtOptions(const ScAsciiOptions & rOpt)196 void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt )
197 {
198 if ( pExtOptions )
199 *pExtOptions = rOpt;
200 else
201 pExtOptions.reset(new ScAsciiOptions( rOpt ));
202
203 // "normal" Options
204
205 cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false);
206 cStr = rOpt.GetTextSep();
207 }
208
SetFilterOptions(const OUString & rFilterOptions)209 void ScImportExport::SetFilterOptions(const OUString& rFilterOptions)
210 {
211 maFilterOptions = rFilterOptions;
212 }
213
IsFormatSupported(SotClipboardFormatId nFormat)214 bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat )
215 {
216 return nFormat == SotClipboardFormatId::STRING
217 || nFormat == SotClipboardFormatId::STRING_TSVC
218 || nFormat == SotClipboardFormatId::SYLK
219 || nFormat == SotClipboardFormatId::LINK
220 || nFormat == SotClipboardFormatId::HTML
221 || nFormat == SotClipboardFormatId::HTML_SIMPLE
222 || nFormat == SotClipboardFormatId::DIF;
223 }
224
225 // Prepare for Undo
StartPaste()226 bool ScImportExport::StartPaste()
227 {
228 if ( !bAll )
229 {
230 ScEditableTester aTester( pDoc, aRange );
231 if ( !aTester.IsEditable() )
232 {
233 vcl::Window* pWin = Application::GetDefDialogParent();
234 std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(pWin ? pWin->GetFrameWeld() : nullptr,
235 VclMessageType::Info, VclButtonsType::Ok,
236 ScResId(aTester.GetMessageId())));
237 xInfoBox->run();
238 return false;
239 }
240 }
241 if( bUndo && pDocSh && pDoc->IsUndoEnabled())
242 {
243 pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO ));
244 pUndoDoc->InitUndo( pDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
245 pDoc->CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc);
246 }
247 return true;
248 }
249
250 // Create Undo/Redo actions, Invalidate/Repaint
EndPaste(bool bAutoRowHeight)251 void ScImportExport::EndPaste(bool bAutoRowHeight)
252 {
253 bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight(
254 aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() );
255
256 if( pUndoDoc && pDoc->IsUndoEnabled() && pDocSh )
257 {
258 ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO ));
259 pRedoDoc->InitUndo( pDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
260 pDoc->CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc);
261 ScMarkData aDestMark(pRedoDoc->MaxRow(), pRedoDoc->MaxCol());
262 aDestMark.SetMarkArea(aRange);
263 pDocSh->GetUndoManager()->AddUndoAction(
264 std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr));
265 }
266 pUndoDoc.reset();
267 if( pDocSh )
268 {
269 if (!bHeight)
270 pDocSh->PostPaint( aRange, PaintPartFlags::Grid );
271 pDocSh->SetDocumentModified();
272 }
273 ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell();
274 if ( pViewSh )
275 pViewSh->UpdateInputHandler();
276
277 }
278
ExportData(const OUString & rMimeType,css::uno::Any & rValue)279 bool ScImportExport::ExportData( const OUString& rMimeType,
280 css::uno::Any & rValue )
281 {
282 SvMemoryStream aStrm;
283 // mba: no BaseURL for data exchange
284 if( ExportStream( aStrm, OUString(),
285 SotExchange::GetFormatIdFromMimeType( rMimeType ) ))
286 {
287 aStrm.WriteUChar( 0 );
288 rValue <<= css::uno::Sequence< sal_Int8 >(
289 static_cast<sal_Int8 const *>(aStrm.GetData()),
290 aStrm.TellEnd() );
291 return true;
292 }
293 return false;
294 }
295
ImportString(const OUString & rText,SotClipboardFormatId nFmt)296 bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt )
297 {
298 switch ( nFmt )
299 {
300 // formats supporting unicode
301 case SotClipboardFormatId::STRING :
302 case SotClipboardFormatId::STRING_TSVC :
303 {
304 ScImportStringStream aStrm( rText);
305 return ImportStream( aStrm, OUString(), nFmt );
306 // ImportStream must handle RTL_TEXTENCODING_UNICODE
307 }
308 default:
309 {
310 rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
311 OString aTmp( rText.getStr(), rText.getLength(), eEnc );
312 SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(sal_Char), StreamMode::READ );
313 aStrm.SetStreamCharSet( eEnc );
314 SetNoEndianSwap( aStrm ); //! no swapping in memory
315 return ImportStream( aStrm, OUString(), nFmt );
316 }
317 }
318 }
319
ExportString(OUString & rText,SotClipboardFormatId nFmt)320 bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt )
321 {
322 if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC )
323 {
324 SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]");
325 rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
326 OString aTmp;
327 bool bOk = ExportByteString( aTmp, eEnc, nFmt );
328 rText = OStringToOUString( aTmp, eEnc );
329 return bOk;
330 }
331 // nSizeLimit not needed for OUString
332
333 SvMemoryStream aStrm;
334 aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
335 SetNoEndianSwap( aStrm ); //! no swapping in memory
336 // mba: no BaseURL for data exc
337 if( ExportStream( aStrm, OUString(), nFmt ) )
338 {
339 aStrm.WriteUInt16( 0 );
340 rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) );
341 return true;
342 }
343 rText.clear();
344 return false;
345
346 // ExportStream must handle RTL_TEXTENCODING_UNICODE
347 }
348
ExportByteString(OString & rText,rtl_TextEncoding eEnc,SotClipboardFormatId nFmt)349 bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt )
350 {
351 OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" );
352 if ( eEnc == RTL_TEXTENCODING_UNICODE )
353 eEnc = osl_getThreadTextEncoding();
354
355 if (!nSizeLimit)
356 nSizeLimit = SAL_MAX_UINT16;
357
358 SvMemoryStream aStrm;
359 aStrm.SetStreamCharSet( eEnc );
360 SetNoEndianSwap( aStrm ); //! no swapping in memory
361 // mba: no BaseURL for data exchange
362 if( ExportStream( aStrm, OUString(), nFmt ) )
363 {
364 aStrm.WriteChar( 0 );
365 if( aStrm.TellEnd() <= nSizeLimit )
366 {
367 rText = static_cast<const sal_Char*>(aStrm.GetData());
368 return true;
369 }
370 }
371 rText.clear();
372 return false;
373 }
374
ImportStream(SvStream & rStrm,const OUString & rBaseURL,SotClipboardFormatId nFmt)375 bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
376 {
377 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
378 {
379 if( ExtText2Doc( rStrm ) ) // evaluate pExtOptions
380 return true;
381 }
382 if( nFmt == SotClipboardFormatId::SYLK )
383 {
384 if( Sylk2Doc( rStrm ) )
385 return true;
386 }
387 if( nFmt == SotClipboardFormatId::DIF )
388 {
389 if( Dif2Doc( rStrm ) )
390 return true;
391 }
392 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
393 {
394 if( RTF2Doc( rStrm, rBaseURL ) )
395 return true;
396 }
397 if( nFmt == SotClipboardFormatId::LINK )
398 return true; // Link-Import?
399 if ( nFmt == SotClipboardFormatId::HTML )
400 {
401 if( HTML2Doc( rStrm, rBaseURL ) )
402 return true;
403 }
404 if ( nFmt == SotClipboardFormatId::HTML_SIMPLE )
405 {
406 MSE40HTMLClipFormatObj aMSE40ClpObj; // needed to skip the header data
407 SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm );
408 if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) )
409 return true;
410 }
411
412 return false;
413 }
414
ExportStream(SvStream & rStrm,const OUString & rBaseURL,SotClipboardFormatId nFmt)415 bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
416 {
417 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
418 {
419 if( Doc2Text( rStrm ) )
420 return true;
421 }
422 if( nFmt == SotClipboardFormatId::SYLK )
423 {
424 if( Doc2Sylk( rStrm ) )
425 return true;
426 }
427 if( nFmt == SotClipboardFormatId::DIF )
428 {
429 if( Doc2Dif( rStrm ) )
430 return true;
431 }
432 if( nFmt == SotClipboardFormatId::LINK && !bAll )
433 {
434 OUString aDocName;
435 if ( pDoc->IsClipboard() )
436 aDocName = ScGlobal::GetClipDocName();
437 else
438 {
439 SfxObjectShell* pShell = pDoc->GetDocumentShell();
440 if (pShell)
441 aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME );
442 }
443
444 OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" );
445 if( !aDocName.isEmpty() )
446 {
447 // Always use Calc A1 syntax for paste link.
448 OUString aRefName;
449 ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D;
450 if( bSingle )
451 aRefName = aRange.aStart.Format(nFlags, pDoc, formula::FormulaGrammar::CONV_OOO);
452 else
453 {
454 if( aRange.aStart.Tab() != aRange.aEnd.Tab() )
455 nFlags |= ScRefFlags::TAB2_3D;
456 aRefName = aRange.Format(nFlags, pDoc, formula::FormulaGrammar::CONV_OOO);
457 }
458 OUString aAppName = Application::GetAppName();
459
460 // extra bits are used to tell the client to prefer external
461 // reference link.
462 OUString const aExtraBits("calc:extref");
463
464 WriteUnicodeOrByteString( rStrm, aAppName, true );
465 WriteUnicodeOrByteString( rStrm, aDocName, true );
466 WriteUnicodeOrByteString( rStrm, aRefName, true );
467 WriteUnicodeOrByteString( rStrm, aExtraBits, true );
468 if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE )
469 rStrm.WriteUInt16( 0 );
470 else
471 rStrm.WriteChar( 0 );
472 return rStrm.GetError() == ERRCODE_NONE;
473 }
474 }
475 if( nFmt == SotClipboardFormatId::HTML )
476 {
477 if( Doc2HTML( rStrm, rBaseURL ) )
478 return true;
479 }
480 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
481 {
482 if( Doc2RTF( rStrm ) )
483 return true;
484 }
485
486 return false;
487 }
488
WriteUnicodeOrByteString(SvStream & rStrm,const OUString & rString,bool bZero)489 void ScImportExport::WriteUnicodeOrByteString( SvStream& rStrm, const OUString& rString, bool bZero )
490 {
491 rtl_TextEncoding eEnc = rStrm.GetStreamCharSet();
492 if ( eEnc == RTL_TEXTENCODING_UNICODE )
493 {
494 if ( !lcl_IsEndianSwap( rStrm ) )
495 rStrm.WriteBytes(rString.getStr(), rString.getLength() * sizeof(sal_Unicode));
496 else
497 {
498 const sal_Unicode* p = rString.getStr();
499 const sal_Unicode* const pStop = p + rString.getLength();
500 while ( p < pStop )
501 {
502 rStrm.WriteUInt16( *p );
503 }
504 }
505 if ( bZero )
506 rStrm.WriteUInt16( 0 );
507 }
508 else
509 {
510 OString aByteStr(OUStringToOString(rString, eEnc));
511 rStrm.WriteOString( aByteStr );
512 if ( bZero )
513 rStrm.WriteChar( 0 );
514 }
515 }
516
517 // This function could be replaced by endlub()
WriteUnicodeOrByteEndl(SvStream & rStrm)518 void ScImportExport::WriteUnicodeOrByteEndl( SvStream& rStrm )
519 {
520 if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE )
521 { // same as endl() but unicode
522 switch ( rStrm.GetLineDelimiter() )
523 {
524 case LINEEND_CR :
525 rStrm.WriteUInt16( '\r' );
526 break;
527 case LINEEND_LF :
528 rStrm.WriteUInt16( '\n' );
529 break;
530 default:
531 rStrm.WriteUInt16( '\r' ).WriteUInt16( '\n' );
532 }
533 }
534 else
535 endl( rStrm );
536 }
537
SetNoEndianSwap(SvStream & rStrm)538 void ScImportExport::SetNoEndianSwap( SvStream& rStrm )
539 {
540 #ifdef OSL_BIGENDIAN
541 rStrm.SetEndian( SvStreamEndian::BIG );
542 #else
543 rStrm.SetEndian( SvStreamEndian::LITTLE );
544 #endif
545 }
546
547 enum QuoteType
548 {
549 FIELDSTART_QUOTE,
550 FIRST_QUOTE,
551 SECOND_QUOTE,
552 FIELDEND_QUOTE,
553 DONTKNOW_QUOTE
554 };
555
556 /** Determine if *p is a quote that ends a quoted field.
557
558 Precondition: we are parsing a quoted field already and *p is a quote.
559
560 @return
561 FIELDEND_QUOTE if end of field quote
562 DONTKNOW_QUOTE anything else
563 */
lcl_isFieldEndQuote(const sal_Unicode * p,const sal_Unicode * pSeps,sal_Unicode & rcDetectSep)564 static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep )
565 {
566 // Due to broken CSV generators that don't double embedded quotes check if
567 // a field separator immediately or with trailing spaces follows the quote,
568 // only then end the field, or at end of string.
569 const sal_Unicode cBlank = ' ';
570 if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank))
571 return FIELDEND_QUOTE;
572 // Detect a possible blank separator if it's not already in the list (which
573 // was checked right above for p[1]==cBlank).
574 if (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank)
575 rcDetectSep = cBlank;
576 while (p[1] == cBlank)
577 ++p;
578 if (!p[1] || ScGlobal::UnicodeStrChr( pSeps, p[1]))
579 return FIELDEND_QUOTE;
580 return DONTKNOW_QUOTE;
581 }
582
583 /** Determine if *p is a quote that is escaped by being doubled or ends a
584 quoted field.
585
586 Precondition: *p is a quote.
587
588 @param nQuotes
589 Quote characters encountered so far.
590 Odd (after opening quote) means either no embedded quotes or only quote
591 pairs so far.
592 Even means either not in a quoted field or already one quote
593 encountered, the first of a pair.
594
595 @return
596 FIELDSTART_QUOTE if first quote in a field, either starting content or
597 embedded so caller should check beforehand.
598 FIRST_QUOTE if first of a doubled quote
599 SECOND_QUOTE if second of a doubled quote
600 FIELDEND_QUOTE if end of field quote
601 DONTKNOW_QUOTE if an unescaped quote we don't consider as end of field,
602 do not increment nQuotes in caller then!
603 */
lcl_isEscapedOrFieldEndQuote(sal_Int32 nQuotes,const sal_Unicode * p,const sal_Unicode * pSeps,sal_Unicode cStr,sal_Unicode & rcDetectSep)604 static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p,
605 const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep )
606 {
607 if ((nQuotes % 2) == 0)
608 {
609 if (p[-1] == cStr)
610 return SECOND_QUOTE;
611 else
612 {
613 SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?");
614 return FIELDSTART_QUOTE;
615 }
616 }
617 if (p[1] == cStr)
618 return FIRST_QUOTE;
619 return lcl_isFieldEndQuote( p, pSeps, rcDetectSep);
620 }
621
622 /** Append characters of [p1,p2) to rField.
623
624 @returns TRUE if ok; FALSE if data overflow, truncated
625 */
lcl_appendLineData(OUString & rField,const sal_Unicode * p1,const sal_Unicode * p2)626 static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 )
627 {
628 OSL_ENSURE( rField.getLength() + (p2 - p1) <= SAL_MAX_UINT16, "lcl_appendLineData: data overflow");
629 if (rField.getLength() + (p2 - p1) <= SAL_MAX_UINT16)
630 {
631 rField += OUString( p1, sal::static_int_cast<sal_Int32>( p2 - p1 ) );
632 return true;
633 }
634 else
635 {
636 rField += OUString( p1, SAL_MAX_UINT16 - rField.getLength() );
637 return false;
638 }
639 }
640
641 enum class DoubledQuoteMode
642 {
643 KEEP_ALL, // both are taken, additionally start and end quote are included in string
644 ESCAPE, // escaped quote, one is taken, one ignored
645 };
646
lcl_ScanString(const sal_Unicode * p,OUString & rString,const sal_Unicode * pSeps,sal_Unicode cStr,DoubledQuoteMode eMode,bool & rbOverflowCell)647 static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rString,
648 const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell )
649 {
650 if (eMode != DoubledQuoteMode::KEEP_ALL)
651 p++; //! jump over opening quote
652 bool bCont;
653 do
654 {
655 bCont = false;
656 const sal_Unicode* p0 = p;
657 for( ;; )
658 {
659 if( !*p )
660 break;
661 if( *p == cStr )
662 {
663 if ( *++p != cStr )
664 {
665 // break or continue for loop
666 if (eMode == DoubledQuoteMode::ESCAPE)
667 {
668 sal_Unicode cDetectSep = 0xffff; // No separator detection here.
669 if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE)
670 break;
671 else
672 continue;
673 }
674 else
675 break;
676 }
677 // doubled quote char
678 switch ( eMode )
679 {
680 case DoubledQuoteMode::KEEP_ALL :
681 p++; // both for us (not breaking for-loop)
682 break;
683 case DoubledQuoteMode::ESCAPE :
684 p++; // one for us (breaking for-loop)
685 bCont = true; // and more
686 break;
687 }
688 if ( eMode == DoubledQuoteMode::ESCAPE )
689 break;
690 }
691 else
692 p++;
693 }
694 if ( p0 < p )
695 {
696 if (!lcl_appendLineData( rString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p)))
697 rbOverflowCell = true;
698 }
699 } while ( bCont );
700 return p;
701 }
702
lcl_UnescapeSylk(OUString & rString,SylkVersion eVersion)703 static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion )
704 {
705 // Older versions didn't escape the semicolon.
706 // Older versions quoted the string and doubled embedded quotes, but not
707 // the semicolons, which was plain wrong.
708 if (eVersion >= SylkVersion::OOO32)
709 rString = rString.replaceAll(";;", ";");
710 else
711 rString = rString.replaceAll("\"\"", "\"");
712
713 rString = rString.replaceAll(SYLK_LF, "\n");
714 }
715
lcl_ScanSylkString(const sal_Unicode * p,OUString & rString,SylkVersion eVersion)716 static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p,
717 OUString& rString, SylkVersion eVersion )
718 {
719 const sal_Unicode* pStartQuote = p;
720 const sal_Unicode* pEndQuote = nullptr;
721 while( *(++p) )
722 {
723 if( *p == '"' )
724 {
725 pEndQuote = p;
726 if (eVersion >= SylkVersion::OOO32)
727 {
728 if (*(p+1) == ';')
729 {
730 if (*(p+2) == ';')
731 {
732 p += 2; // escaped ';'
733 pEndQuote = nullptr;
734 }
735 else
736 break; // end field
737 }
738 }
739 else
740 {
741 if (*(p+1) == '"')
742 {
743 ++p; // escaped '"'
744 pEndQuote = nullptr;
745 }
746 else if (*(p+1) == ';')
747 break; // end field
748 }
749 }
750 }
751 if (!pEndQuote)
752 pEndQuote = p; // Take all data as string.
753 rString += OUString(pStartQuote + 1, sal::static_int_cast<sal_Int32>( pEndQuote - pStartQuote - 1 ) );
754 lcl_UnescapeSylk( rString, eVersion);
755 return p;
756 }
757
lcl_ScanSylkFormula(const sal_Unicode * p,OUString & rString,SylkVersion eVersion)758 static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p,
759 OUString& rString, SylkVersion eVersion )
760 {
761 const sal_Unicode* pStart = p;
762 if (eVersion >= SylkVersion::OOO32)
763 {
764 while (*p)
765 {
766 if (*p == ';')
767 {
768 if (*(p+1) == ';')
769 ++p; // escaped ';'
770 else
771 break; // end field
772 }
773 ++p;
774 }
775 rString += OUString( pStart, sal::static_int_cast<sal_Int32>( p - pStart));
776 lcl_UnescapeSylk( rString, eVersion);
777 }
778 else
779 {
780 // Nasty. If in old versions the formula contained a semicolon, it was
781 // quoted and embedded quotes were doubled, but semicolons were not. If
782 // there was no semicolon, it could still contain quotes and doubled
783 // embedded quotes if it was something like ="a""b", which was saved as
784 // E"a""b" as is and has to be preserved, even if older versions
785 // couldn't even load it correctly. However, theoretically another
786 // field might follow and thus the line contain a semicolon again, such
787 // as ...;E"a""b";...
788 bool bQuoted = false;
789 if (*p == '"')
790 {
791 // May be a quoted expression or just a string constant expression
792 // with quotes.
793 while (*(++p))
794 {
795 if (*p == '"')
796 {
797 if (*(p+1) == '"')
798 ++p; // escaped '"'
799 else
800 break; // closing '"', had no ';' yet
801 }
802 else if (*p == ';')
803 {
804 bQuoted = true; // ';' within quoted expression
805 break;
806 }
807 }
808 p = pStart;
809 }
810 if (bQuoted)
811 p = lcl_ScanSylkString( p, rString, eVersion);
812 else
813 {
814 while (*p && *p != ';')
815 ++p;
816 rString += OUString( pStart, sal::static_int_cast<sal_Int32>( p - pStart));
817 }
818 }
819 return p;
820 }
821
lcl_DoubleEscapeChar(OUString & rString,sal_Unicode cStr)822 static void lcl_DoubleEscapeChar( OUString& rString, sal_Unicode cStr )
823 {
824 sal_Int32 n = 0;
825 while( ( n = rString.indexOf( cStr, n ) ) != -1 )
826 {
827 rString = rString.replaceAt( n, 0, OUString(cStr) );
828 n += 2;
829 }
830 }
831
lcl_WriteString(SvStream & rStrm,OUString & rString,sal_Unicode cQuote,sal_Unicode cEsc)832 static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc )
833 {
834 if (cEsc)
835 lcl_DoubleEscapeChar( rString, cEsc );
836
837 if (cQuote)
838 {
839 rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote);
840 }
841
842 ScImportExport::WriteUnicodeOrByteString( rStrm, rString );
843 }
844
lcl_WriteSimpleString(SvStream & rStrm,const OUString & rString)845 static void lcl_WriteSimpleString( SvStream& rStrm, const OUString& rString )
846 {
847 ScImportExport::WriteUnicodeOrByteString( rStrm, rString );
848 }
849
Text2Doc(SvStream & rStrm)850 bool ScImportExport::Text2Doc( SvStream& rStrm )
851 {
852 bool bOk = true;
853
854 sal_Unicode pSeps[2];
855 pSeps[0] = cSep;
856 pSeps[1] = 0;
857
858 ScSetStringParam aSetStringParam;
859 aSetStringParam.mbCheckLinkFormula = true;
860
861 SCCOL nStartCol = aRange.aStart.Col();
862 SCROW nStartRow = aRange.aStart.Row();
863 SCCOL nEndCol = aRange.aEnd.Col();
864 SCROW nEndRow = aRange.aEnd.Row();
865 sal_uLong nOldPos = rStrm.Tell();
866 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
867 bool bData = !bSingle;
868 if( !bSingle)
869 bOk = StartPaste();
870
871 while( bOk )
872 {
873 OUString aLine;
874 OUString aCell;
875 SCROW nRow = nStartRow;
876 rStrm.Seek( nOldPos );
877 for( ;; )
878 {
879 rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit );
880 if( rStrm.eof() )
881 break;
882 SCCOL nCol = nStartCol;
883 const sal_Unicode* p = aLine.getStr();
884 while( *p )
885 {
886 aCell.clear();
887 const sal_Unicode* q = p;
888 while (*p && *p != cSep)
889 {
890 // Always look for a pairing quote and ignore separator in between.
891 while (*p && *p == cStr)
892 q = p = lcl_ScanString( p, aCell, pSeps, cStr, DoubledQuoteMode::KEEP_ALL, bOverflowCell );
893 // All until next separator or quote.
894 while (*p && *p != cSep && *p != cStr)
895 ++p;
896 if (!lcl_appendLineData( aCell, q, p))
897 bOverflowCell = true; // display warning on import
898 q = p;
899 }
900 if (*p)
901 ++p;
902 if (ValidCol(nCol) && ValidRow(nRow) )
903 {
904 if( bSingle )
905 {
906 if (nCol>nEndCol) nEndCol = nCol;
907 if (nRow>nEndRow) nEndRow = nRow;
908 }
909 if( bData && nCol <= nEndCol && nRow <= nEndRow )
910 pDoc->SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam );
911 }
912 else // too many columns/rows
913 {
914 if (!ValidRow(nRow))
915 bOverflowRow = true; // display warning on import
916 if (!ValidCol(nCol))
917 bOverflowCol = true; // display warning on import
918 }
919 ++nCol;
920 }
921 ++nRow;
922 }
923
924 if( !bData )
925 {
926 aRange.aEnd.SetCol( nEndCol );
927 aRange.aEnd.SetRow( nEndRow );
928 bOk = StartPaste();
929 bData = true;
930 }
931 else
932 break;
933 }
934
935 EndPaste();
936 if (bOk && mbImportBroadcast)
937 {
938 pDoc->BroadcastCells(aRange, SfxHintId::ScDataChanged);
939 pDocSh->PostDataChanged();
940 }
941
942 return bOk;
943 }
944
945 // Extended Ascii-Import
946
lcl_PutString(ScDocumentImport & rDocImport,bool bUseDocImport,SCCOL nCol,SCROW nRow,SCTAB nTab,const OUString & rStr,sal_uInt8 nColFormat,SvNumberFormatter * pFormatter,bool bDetectNumFormat,bool bSkipEmptyCells,const::utl::TransliterationWrapper & rTransliteration,CalendarWrapper & rCalendar,const::utl::TransliterationWrapper * pSecondTransliteration,CalendarWrapper * pSecondCalendar)947 static bool lcl_PutString(
948 ScDocumentImport& rDocImport, bool bUseDocImport,
949 SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat,
950 SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bSkipEmptyCells,
951 const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar,
952 const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar )
953 {
954 ScDocument* pDoc = &rDocImport.getDoc();
955 bool bMultiLine = false;
956 if ( nColFormat == SC_COL_SKIP || !ValidCol(nCol) || !ValidRow(nRow) )
957 return bMultiLine;
958 if ( rStr.isEmpty() )
959 {
960 if ( !bSkipEmptyCells )
961 { // delete destination cell
962 if ( bUseDocImport )
963 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr );
964 else
965 pDoc->SetString( nCol, nRow, nTab, rStr );
966 }
967 return false;
968 }
969
970 if ( nColFormat == SC_COL_TEXT )
971 {
972 double fDummy;
973 sal_uInt32 nIndex = 0;
974 if (pFormatter->IsNumberFormat(rStr, nIndex, fDummy))
975 {
976 // Set the format of this cell to Text.
977 sal_uInt32 nFormat = pFormatter->GetStandardFormat(SvNumFormatType::TEXT);
978 ScPatternAttr aNewAttrs(pDoc->GetPool());
979 SfxItemSet& rSet = aNewAttrs.GetItemSet();
980 rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) );
981 pDoc->ApplyPattern(nCol, nRow, nTab, aNewAttrs);
982
983 }
984 if ( bUseDocImport )
985 {
986 if(ScStringUtil::isMultiline(rStr))
987 {
988 ScFieldEditEngine& rEngine = pDoc->GetEditEngine();
989 rEngine.SetText(rStr);
990 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
991 return true;
992 }
993 else
994 {
995 rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr);
996 return false;
997 }
998 } else
999 {
1000 pDoc->SetTextCell(ScAddress(nCol, nRow, nTab), rStr);
1001 return bMultiLine;
1002 }
1003 }
1004
1005 if ( nColFormat == SC_COL_ENGLISH )
1006 {
1007 //! SetString with Extra-Flag ???
1008
1009 SvNumberFormatter* pDocFormatter = pDoc->GetFormatTable();
1010 sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US);
1011 double fVal;
1012 if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) )
1013 {
1014 // Numberformat will not be set to English
1015 if ( bUseDocImport )
1016 rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal );
1017 else
1018 pDoc->SetValue( nCol, nRow, nTab, fVal );
1019 return bMultiLine;
1020 }
1021 // else, continue with SetString
1022 }
1023 else if ( nColFormat != SC_COL_STANDARD ) // Datumformats
1024 {
1025 const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t
1026 sal_Int32 nLen = rStr.getLength();
1027 sal_Int32 nStart[nMaxNumberParts];
1028 sal_Int32 nEnd[nMaxNumberParts];
1029
1030 sal_uInt16 nDP, nMP, nYP;
1031 switch ( nColFormat )
1032 {
1033 case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; break;
1034 case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; break;
1035 case SC_COL_DMY:
1036 default: nDP = 0; nMP = 1; nYP = 2; break;
1037 }
1038
1039 sal_uInt16 nFound = 0;
1040 bool bInNum = false;
1041 for ( sal_Int32 nPos=0; nPos<nLen && (bInNum ||
1042 nFound<nMaxNumberParts); nPos++ )
1043 {
1044 if (bInNum && nFound == 3 && nColFormat == SC_COL_YMD &&
1045 nPos <= nStart[nFound]+2 && rStr[nPos] == 'T')
1046 bInNum = false; // ISO-8601: YYYY-MM-DDThh:mm...
1047 else if ((((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1))
1048 && ScGlobal::pCharClass->isLetterNumeric( rStr, nPos))
1049 || ScGlobal::pCharClass->isDigit( rStr, nPos))
1050 {
1051 if (!bInNum)
1052 {
1053 bInNum = true;
1054 nStart[nFound] = nPos;
1055 ++nFound;
1056 }
1057 nEnd[nFound-1] = nPos;
1058 }
1059 else
1060 bInNum = false;
1061 }
1062
1063 if ( nFound == 1 )
1064 {
1065 // try to break one number (without separators) into date fields
1066
1067 sal_Int32 nDateStart = nStart[0];
1068 sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart;
1069
1070 if ( nDateLen >= 5 && nDateLen <= 8 &&
1071 ScGlobal::pCharClass->isNumeric( rStr.copy( nDateStart, nDateLen ) ) )
1072 {
1073 // 6 digits: 2 each for day, month, year
1074 // 8 digits: 4 for year, 2 each for day and month
1075 // 5 or 7 digits: first field is shortened by 1
1076
1077 bool bLongYear = ( nDateLen >= 7 );
1078 bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 );
1079
1080 sal_uInt16 nFieldStart = nDateStart;
1081 for (sal_uInt16 nPos=0; nPos<3; nPos++)
1082 {
1083 sal_uInt16 nFieldEnd = nFieldStart + 1; // default: 2 digits
1084 if ( bLongYear && nPos == nYP )
1085 nFieldEnd += 2; // 2 extra digits for long year
1086 if ( bShortFirst && nPos == 0 )
1087 --nFieldEnd; // first field shortened?
1088
1089 nStart[nPos] = nFieldStart;
1090 nEnd[nPos] = nFieldEnd;
1091 nFieldStart = nFieldEnd + 1;
1092 }
1093 nFound = 3;
1094 }
1095 }
1096
1097 if ( nFound >= 3 )
1098 {
1099 using namespace ::com::sun::star;
1100 bool bSecondCal = false;
1101 sal_uInt16 nDay = static_cast<sal_uInt16>(rStr.copy( nStart[nDP], nEnd[nDP]+1-nStart[nDP] ).toInt32());
1102 sal_uInt16 nYear = static_cast<sal_uInt16>(rStr.copy( nStart[nYP], nEnd[nYP]+1-nStart[nYP] ).toInt32());
1103 OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] );
1104 sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32());
1105 if (!nMonth)
1106 {
1107 static const char aSepShortened[] = "SEP";
1108 uno::Sequence< i18n::CalendarItem2 > xMonths;
1109 sal_Int32 i, nMonthCount;
1110 // first test all month names from local international
1111 xMonths = rCalendar.getMonths();
1112 nMonthCount = xMonths.getLength();
1113 for (i=0; i<nMonthCount && !nMonth; i++)
1114 {
1115 if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) ||
1116 rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) )
1117 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1118 else if ( i == 8 && rTransliteration.isEqual( "SEPT",
1119 xMonths[i].AbbrevName ) &&
1120 rTransliteration.isEqual( aMStr, aSepShortened ) )
1121 { // correct English abbreviation is SEPT,
1122 // but data mostly contains SEP only
1123 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1124 }
1125 }
1126 // if none found, then test english month names
1127 if ( !nMonth && pSecondCalendar && pSecondTransliteration )
1128 {
1129 xMonths = pSecondCalendar->getMonths();
1130 nMonthCount = xMonths.getLength();
1131 for (i=0; i<nMonthCount && !nMonth; i++)
1132 {
1133 if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) ||
1134 pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) )
1135 {
1136 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1137 bSecondCal = true;
1138 }
1139 else if ( i == 8 && pSecondTransliteration->isEqual(
1140 aMStr, aSepShortened ) )
1141 { // correct English abbreviation is SEPT,
1142 // but data mostly contains SEP only
1143 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1144 bSecondCal = true;
1145 }
1146 }
1147 }
1148 }
1149
1150 SvNumberFormatter* pDocFormatter = pDoc->GetFormatTable();
1151 if ( nYear < 100 )
1152 nYear = pDocFormatter->ExpandTwoDigitYear( nYear );
1153
1154 CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar);
1155 sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear();
1156 if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths )
1157 {
1158 --nMonth;
1159 pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay );
1160 pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth );
1161 pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear );
1162 sal_Int16 nHour, nMinute, nSecond;
1163 // #i14974# The imported value should have no fractional value, so set the
1164 // time fields to zero (ICU calendar instance defaults to current date/time)
1165 nHour = nMinute = nSecond = 0;
1166 if (nFound > 3)
1167 nHour = static_cast<sal_Int16>(rStr.copy( nStart[3], nEnd[3]+1-nStart[3]).toInt32());
1168 if (nFound > 4)
1169 nMinute = static_cast<sal_Int16>(rStr.copy( nStart[4], nEnd[4]+1-nStart[4]).toInt32());
1170 if (nFound > 5)
1171 nSecond = static_cast<sal_Int16>(rStr.copy( nStart[5], nEnd[5]+1-nStart[5]).toInt32());
1172 // do not use calendar's milliseconds, to avoid fractional part truncation
1173 double fFrac = 0.0;
1174 if (nFound > 6)
1175 {
1176 sal_Unicode cDec = '.';
1177 OUString aT = OUStringChar(cDec) + rStr.copy( nStart[6], nEnd[6]+1-nStart[6]);
1178 rtl_math_ConversionStatus eStatus;
1179 double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus );
1180 if (eStatus == rtl_math_ConversionStatus_Ok)
1181 fFrac = fV / 86400.0;
1182 }
1183 pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour );
1184 pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute );
1185 pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond );
1186 pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 );
1187 if ( pCalendar->isValid() )
1188 {
1189 double fDiff = DateTime(pDocFormatter->GetNullDate()) -
1190 pCalendar->getEpochStart();
1191 // #i14974# must use getLocalDateTime to get the same
1192 // date values as set above
1193 double fDays = pCalendar->getLocalDateTime() + fFrac;
1194 fDays -= fDiff;
1195
1196 LanguageType eLatin, eCjk, eCtl;
1197 pDoc->GetLanguage( eLatin, eCjk, eCtl );
1198 LanguageType eDocLang = eLatin; //! which language for date formats?
1199
1200 SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE);
1201 sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang );
1202 // maybe there is a special format including seconds or milliseconds
1203 if (nFound > 5)
1204 nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang);
1205
1206 ScAddress aPos(nCol,nRow,nTab);
1207 if ( bUseDocImport )
1208 rDocImport.setNumericCell(aPos, fDays);
1209 else
1210 pDoc->SetValue( aPos, fDays );
1211 pDoc->SetNumberFormat(aPos, nFormat);
1212
1213 return bMultiLine; // success
1214 }
1215 }
1216 }
1217 }
1218
1219 // Standard or date not determined -> SetString / EditCell
1220 if( rStr.indexOf( '\n' ) == -1 )
1221 {
1222 ScSetStringParam aParam;
1223 aParam.mpNumFormatter = pFormatter;
1224 aParam.mbDetectNumberFormat = bDetectNumFormat;
1225 aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly;
1226 aParam.mbHandleApostrophe = false;
1227 aParam.mbCheckLinkFormula = true;
1228 if ( bUseDocImport )
1229 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam);
1230 else
1231 pDoc->SetString( nCol, nRow, nTab, rStr, &aParam );
1232 }
1233 else
1234 {
1235 bMultiLine = true;
1236 ScFieldEditEngine& rEngine = pDoc->GetEditEngine();
1237 rEngine.SetText(rStr);
1238 if ( bUseDocImport )
1239 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1240 else
1241 pDoc->SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() );
1242 }
1243 return bMultiLine;
1244 }
1245
lcl_GetFixed(const OUString & rLine,sal_Int32 nStart,sal_Int32 nNext,bool & rbIsQuoted,bool & rbOverflowCell)1246 static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext,
1247 bool& rbIsQuoted, bool& rbOverflowCell )
1248 {
1249 sal_Int32 nLen = rLine.getLength();
1250 if (nNext > nLen)
1251 nNext = nLen;
1252 if ( nNext <= nStart )
1253 return EMPTY_OUSTRING;
1254
1255 const sal_Unicode* pStr = rLine.getStr();
1256
1257 sal_Int32 nSpace = nNext;
1258 while ( nSpace > nStart && pStr[nSpace-1] == ' ' )
1259 --nSpace;
1260
1261 rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"');
1262 if (rbIsQuoted)
1263 {
1264 bool bFits = (nSpace - nStart - 3 <= SAL_MAX_UINT16);
1265 OSL_ENSURE( bFits, "lcl_GetFixed: line doesn't fit into data");
1266 if (bFits)
1267 return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2));
1268 else
1269 {
1270 rbOverflowCell = true;
1271 return rLine.copy(nStart+1, SAL_MAX_UINT16);
1272 }
1273 }
1274 else
1275 {
1276 bool bFits = (nSpace - nStart <= SAL_MAX_UINT16);
1277 OSL_ENSURE( bFits, "lcl_GetFixed: line doesn't fit into data");
1278 if (bFits)
1279 return rLine.copy(nStart, nSpace-nStart);
1280 else
1281 {
1282 rbOverflowCell = true;
1283 return rLine.copy(nStart, SAL_MAX_UINT16);
1284 }
1285 }
1286 }
1287
ExtText2Doc(SvStream & rStrm)1288 bool ScImportExport::ExtText2Doc( SvStream& rStrm )
1289 {
1290 if (!pExtOptions)
1291 return Text2Doc( rStrm );
1292
1293 sal_uInt64 const nOldPos = rStrm.Tell();
1294 sal_uInt64 const nRemaining = rStrm.remainingSize();
1295 std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh,
1296 ScResId( STR_LOAD_DOC ), nRemaining, true ));
1297 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
1298
1299 SCCOL nStartCol = aRange.aStart.Col();
1300 SCCOL nEndCol = aRange.aEnd.Col();
1301 SCROW nStartRow = aRange.aStart.Row();
1302 SCTAB nTab = aRange.aStart.Tab();
1303
1304 bool bFixed = pExtOptions->IsFixedLen();
1305 OUString aSeps = pExtOptions->GetFieldSeps(); // Need non-const for ReadCsvLine(),
1306 const sal_Unicode* pSeps = aSeps.getStr(); // but it will be const anyway (asserted below).
1307 bool bMerge = pExtOptions->IsMergeSeps();
1308 bool bRemoveSpace = pExtOptions->IsRemoveSpace();
1309 sal_uInt16 nInfoCount = pExtOptions->GetInfoCount();
1310 const sal_Int32* pColStart = pExtOptions->GetColStart();
1311 const sal_uInt8* pColFormat = pExtOptions->GetColFormat();
1312 long nSkipLines = pExtOptions->GetStartRow();
1313
1314 LanguageType eDocLang = pExtOptions->GetLanguage();
1315 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang);
1316 bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber();
1317 bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells();
1318
1319 // For date recognition
1320 ::utl::TransliterationWrapper aTransliteration(
1321 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE );
1322 aTransliteration.loadModuleIfNeeded( eDocLang );
1323 CalendarWrapper aCalendar( comphelper::getProcessComponentContext() );
1324 aCalendar.loadDefaultCalendar(
1325 LanguageTag::convertToLocale( eDocLang ) );
1326 std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration;
1327 std::unique_ptr< CalendarWrapper > pEnglishCalendar;
1328 if ( eDocLang != LANGUAGE_ENGLISH_US )
1329 {
1330 pEnglishTransliteration.reset(new ::utl::TransliterationWrapper (
1331 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ));
1332 aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US );
1333 pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() ));
1334 pEnglishCalendar->loadDefaultCalendar(
1335 LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) );
1336 }
1337
1338 OUString aLine;
1339 OUString aCell;
1340 sal_uInt16 i;
1341 SCROW nRow = nStartRow;
1342 sal_Unicode cDetectSep = 0xffff; // No separator detection here.
1343
1344 while(--nSkipLines>0)
1345 {
1346 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored
1347 if ( rStrm.eof() )
1348 break;
1349 }
1350
1351 // Determine range for Undo.
1352 // We don't need this during import of a file to a new sheet or document...
1353 bool bDetermineRange = bUndo;
1354
1355 // Row heights don't need to be adjusted on the fly if EndPaste() is called
1356 // afterwards, which happens only if bDetermineRange. This variable also
1357 // survives the toggle of bDetermineRange down at the end of the do{} loop.
1358 bool bRangeIsDetermined = bDetermineRange;
1359
1360 bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText();
1361
1362 sal_uLong nOriginalStreamPos = rStrm.Tell();
1363
1364 ScDocumentImport aDocImport(*pDoc);
1365 do
1366 {
1367 for( ;; )
1368 {
1369 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep);
1370 if ( rStrm.eof() && aLine.isEmpty() )
1371 break;
1372
1373 assert(pSeps == aSeps.getStr());
1374
1375 if ( nRow > pDoc->MaxRow() )
1376 {
1377 bOverflowRow = true; // display warning on import
1378 break; // for
1379 }
1380
1381 EmbeddedNullTreatment( aLine);
1382
1383 sal_Int32 nLineLen = aLine.getLength();
1384 SCCOL nCol = nStartCol;
1385 bool bMultiLine = false;
1386 if ( bFixed ) // Fixed line length
1387 {
1388 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1389 // overflow if there is really data following to be put behind
1390 // the last column, which doesn't happen if info is
1391 // SC_COL_SKIP.
1392 for ( i=0; i<nInfoCount && nCol <= pDoc->MaxCol()+1; i++ )
1393 {
1394 sal_uInt8 nFmt = pColFormat[i];
1395 if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either
1396 {
1397 if (nCol > pDoc->MaxCol())
1398 bOverflowCol = true; // display warning on import
1399 else if (!bDetermineRange)
1400 {
1401 sal_Int32 nStart = pColStart[i];
1402 sal_Int32 nNext = ( i+1 < nInfoCount ) ? pColStart[i+1] : nLineLen;
1403 bool bIsQuoted = false;
1404 aCell = lcl_GetFixed( aLine, nStart, nNext, bIsQuoted, bOverflowCell );
1405 if (bIsQuoted && bQuotedAsText)
1406 nFmt = SC_COL_TEXT;
1407
1408 bMultiLine |= lcl_PutString(
1409 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1410 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration, aCalendar,
1411 pEnglishTransliteration.get(), pEnglishCalendar.get());
1412 }
1413 ++nCol;
1414 }
1415 }
1416 }
1417 else // Search for the separator
1418 {
1419 SCCOL nSourceCol = 0;
1420 sal_uInt16 nInfoStart = 0;
1421 const sal_Unicode* p = aLine.getStr();
1422 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1423 // overflow if there is really data following to be put behind
1424 // the last column, which doesn't happen if info is
1425 // SC_COL_SKIP.
1426 while (*p && nCol <= pDoc->MaxCol()+1)
1427 {
1428 bool bIsQuoted = false;
1429 p = ScImportExport::ScanNextFieldFromString( p, aCell,
1430 cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace );
1431
1432 sal_uInt8 nFmt = SC_COL_STANDARD;
1433 for ( i=nInfoStart; i<nInfoCount; i++ )
1434 {
1435 if ( pColStart[i] == nSourceCol + 1 ) // pColStart is 1-based
1436 {
1437 nFmt = pColFormat[i];
1438 nInfoStart = i + 1; // ColInfos are in succession
1439 break; // for
1440 }
1441 }
1442 if ( nFmt != SC_COL_SKIP )
1443 {
1444 if (nCol > pDoc->MaxCol())
1445 bOverflowCol = true; // display warning on import
1446 else if (!bDetermineRange)
1447 {
1448 if (bIsQuoted && bQuotedAsText)
1449 nFmt = SC_COL_TEXT;
1450
1451 bMultiLine |= lcl_PutString(
1452 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1453 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration,
1454 aCalendar, pEnglishTransliteration.get(), pEnglishCalendar.get());
1455 }
1456 ++nCol;
1457 }
1458
1459 ++nSourceCol;
1460 }
1461 }
1462 if (nEndCol < nCol)
1463 nEndCol = nCol; //! points to the next free or even rDoc.MaxCol()+2
1464
1465 if (!bDetermineRange)
1466 {
1467 if (bMultiLine && !bRangeIsDetermined && pDocSh)
1468 pDocSh->AdjustRowHeight( nRow, nRow, nTab);
1469 xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos );
1470 }
1471 ++nRow;
1472 }
1473 // so far nRow/nEndCol pointed to the next free
1474 if (nRow > nStartRow)
1475 --nRow;
1476 if (nEndCol > nStartCol)
1477 nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), pDoc->MaxCol());
1478
1479 if (bDetermineRange)
1480 {
1481 aRange.aEnd.SetCol( nEndCol );
1482 aRange.aEnd.SetRow( nRow );
1483
1484 if ( !mbApi && nStartCol != nEndCol &&
1485 !pDoc->IsBlockEmpty( nTab, nStartCol + 1, nStartRow, nEndCol, nRow ) )
1486 {
1487 ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent());
1488 if (aBox.run() != RET_YES)
1489 {
1490 return false;
1491 }
1492 }
1493
1494 rStrm.Seek( nOriginalStreamPos );
1495 nRow = nStartRow;
1496 if (!StartPaste())
1497 {
1498 EndPaste(false);
1499 return false;
1500 }
1501 }
1502
1503 bDetermineRange = !bDetermineRange; // toggle
1504 } while (!bDetermineRange);
1505 if ( !mbOverwriting )
1506 aDocImport.finalize();
1507
1508 xProgress.reset(); // make room for AdjustRowHeight progress
1509 if (bRangeIsDetermined)
1510 EndPaste(false);
1511
1512 if (mbImportBroadcast && !mbOverwriting)
1513 {
1514 pDoc->BroadcastCells(aRange, SfxHintId::ScDataChanged);
1515 pDocSh->PostDataChanged();
1516 }
1517 return true;
1518 }
1519
EmbeddedNullTreatment(OUString & rStr)1520 void ScImportExport::EmbeddedNullTreatment( OUString & rStr )
1521 {
1522 // A nasty workaround for data with embedded NULL characters. As long as we
1523 // can't handle them properly as cell content (things assume 0-terminated
1524 // strings at too many places) simply strip all NULL characters from raw
1525 // data. Excel does the same. See fdo#57841 for sample data.
1526
1527 // The normal case is no embedded NULL, check first before de-/allocating
1528 // ustring stuff.
1529 sal_Unicode cNull = 0;
1530 if (rStr.indexOf( cNull) >= 0)
1531 {
1532 rStr = rStr.replaceAll( OUString( &cNull, 1), "");
1533 }
1534 }
1535
ScanNextFieldFromString(const sal_Unicode * p,OUString & rField,sal_Unicode cStr,const sal_Unicode * pSeps,bool bMergeSeps,bool & rbIsQuoted,bool & rbOverflowCell,bool bRemoveSpace)1536 const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p,
1537 OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted,
1538 bool& rbOverflowCell, bool bRemoveSpace )
1539 {
1540 rbIsQuoted = false;
1541 rField.clear();
1542 const sal_Unicode cBlank = ' ';
1543 if (!ScGlobal::UnicodeStrChr( pSeps, cBlank))
1544 {
1545 // Cope with broken generators that put leading blanks before a quoted
1546 // field, like "field1", "field2", "..."
1547 // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180
1548 const sal_Unicode* pb = p;
1549 while (*pb == cBlank)
1550 ++pb;
1551 if (*pb == cStr)
1552 p = pb;
1553 }
1554 if ( *p == cStr ) // String in quotes
1555 {
1556 rbIsQuoted = true;
1557 const sal_Unicode* p1;
1558 p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell );
1559 while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) )
1560 p++;
1561 // Append remaining unquoted and undelimited data (dirty, dirty) to
1562 // this field.
1563 if (p > p1)
1564 {
1565 const sal_Unicode* ptrim_f = p;
1566 if ( bRemoveSpace )
1567 {
1568 while ( ptrim_f > p1 && ( *(ptrim_f - 1) == cBlank ) )
1569 --ptrim_f;
1570 }
1571 if (!lcl_appendLineData( rField, p1, ptrim_f))
1572 rbOverflowCell = true;
1573 }
1574 if( *p )
1575 p++;
1576 }
1577 else // up to delimiter
1578 {
1579 const sal_Unicode* p0 = p;
1580 while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) )
1581 p++;
1582 const sal_Unicode* ptrim_i = p0;
1583 const sal_Unicode* ptrim_f = p; // [ptrim_i,ptrim_f) is cell data after trimming
1584 if ( bRemoveSpace )
1585 {
1586 while ( *ptrim_i == cBlank )
1587 ++ptrim_i;
1588 while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) )
1589 --ptrim_f;
1590 }
1591 if (!lcl_appendLineData( rField, ptrim_i, ptrim_f))
1592 rbOverflowCell = true;
1593 if( *p )
1594 p++;
1595 }
1596 if ( bMergeSeps ) // skip following delimiters
1597 {
1598 while ( *p && ScGlobal::UnicodeStrChr( pSeps, *p ) )
1599 p++;
1600 }
1601 return p;
1602 }
1603
1604 namespace {
1605
1606 /**
1607 * Check if a given string has any line break characters or separators.
1608 *
1609 * @param rStr string to inspect.
1610 * @param cSep separator character.
1611 */
hasLineBreaksOrSeps(const OUString & rStr,sal_Unicode cSep)1612 bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep )
1613 {
1614 const sal_Unicode* p = rStr.getStr();
1615 for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p)
1616 {
1617 sal_Unicode c = *p;
1618 if (c == cSep)
1619 // separator found.
1620 return true;
1621
1622 switch (c)
1623 {
1624 case '\n':
1625 case '\r':
1626 // line break found.
1627 return true;
1628 default:
1629 ;
1630 }
1631 }
1632 return false;
1633 }
1634
1635 }
1636
Doc2Text(SvStream & rStrm)1637 bool ScImportExport::Doc2Text( SvStream& rStrm )
1638 {
1639 SCCOL nCol;
1640 SCROW nRow;
1641 SCCOL nStartCol = aRange.aStart.Col();
1642 SCROW nStartRow = aRange.aStart.Row();
1643 SCTAB nStartTab = aRange.aStart.Tab();
1644 SCCOL nEndCol = aRange.aEnd.Col();
1645 SCROW nEndRow = aRange.aEnd.Row();
1646 SCTAB nEndTab = aRange.aEnd.Tab();
1647
1648 if (!pDoc->GetClipParam().isMultiRange() && nStartTab == nEndTab)
1649 if (!pDoc->ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow ))
1650 return false;
1651
1652 OUString aCellStr;
1653
1654 bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF);
1655
1656 // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab.
1657 std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 );
1658 for( SCCOL i = nStartCol; i <= nEndCol; ++i )
1659 pDoc->InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i );
1660 for (nRow = nStartRow; nRow <= nEndRow; nRow++)
1661 {
1662 if (bIncludeFiltered || !pDoc->RowFiltered( nRow, nStartTab ))
1663 {
1664 for (nCol = nStartCol; nCol <= nEndCol; nCol++)
1665 {
1666 ScAddress aPos(nCol, nRow, nStartTab);
1667 sal_uInt32 nNumFmt = pDoc->GetNumberFormat(aPos);
1668 SvNumberFormatter* pFormatter = pDoc->GetFormatTable();
1669
1670 ScRefCellValue aCell(*pDoc, aPos, blockPos[ nCol - nStartCol ]);
1671 switch (aCell.meType)
1672 {
1673 case CELLTYPE_FORMULA:
1674 {
1675 if (bFormulas)
1676 {
1677 aCell.mpFormula->GetFormula( aCellStr );
1678 if( aCellStr.indexOf( cSep ) != -1 )
1679 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1680 else
1681 lcl_WriteSimpleString( rStrm, aCellStr );
1682 }
1683 else
1684 {
1685 Color* pColor;
1686 ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, pDoc);
1687
1688 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1689 if( bMultiLineText )
1690 {
1691 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1692 aCellStr = aCellStr.replaceAll( "\n", " " );
1693 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1694 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1695 }
1696
1697 if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1698 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1699
1700 if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) )
1701 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1702 else
1703 lcl_WriteSimpleString( rStrm, aCellStr );
1704 }
1705 }
1706 break;
1707 case CELLTYPE_VALUE:
1708 {
1709 Color* pColor;
1710 ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, pDoc);
1711 lcl_WriteSimpleString( rStrm, aCellStr );
1712 }
1713 break;
1714 case CELLTYPE_NONE:
1715 break;
1716 default:
1717 {
1718 Color* pColor;
1719 ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, pDoc);
1720
1721 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1722 if( bMultiLineText )
1723 {
1724 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1725 aCellStr = aCellStr.replaceAll( "\n", " " );
1726 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1727 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1728 }
1729
1730 if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1731 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1732
1733 if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) )
1734 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1735 else
1736 lcl_WriteSimpleString( rStrm, aCellStr );
1737 }
1738 }
1739 if( nCol < nEndCol )
1740 lcl_WriteSimpleString( rStrm, OUString(cSep) );
1741 }
1742 WriteUnicodeOrByteEndl( rStrm );
1743 if( rStrm.GetError() != ERRCODE_NONE )
1744 break;
1745 if( nSizeLimit && rStrm.Tell() > nSizeLimit )
1746 break;
1747 }
1748 }
1749
1750 return rStrm.GetError() == ERRCODE_NONE;
1751 }
1752
Sylk2Doc(SvStream & rStrm)1753 bool ScImportExport::Sylk2Doc( SvStream& rStrm )
1754 {
1755 bool bOk = true;
1756 bool bMyDoc = false;
1757 SylkVersion eVersion = SylkVersion::OTHER;
1758
1759 // US-English separators for StringToDouble
1760 sal_Unicode const cDecSep = '.';
1761 sal_Unicode const cGrpSep = ',';
1762
1763 SCCOL nStartCol = aRange.aStart.Col();
1764 SCROW nStartRow = aRange.aStart.Row();
1765 SCCOL nEndCol = aRange.aEnd.Col();
1766 SCROW nEndRow = aRange.aEnd.Row();
1767 sal_uLong nOldPos = rStrm.Tell();
1768 bool bData = !bSingle;
1769 ::std::vector< sal_uInt32 > aFormats;
1770
1771 if( !bSingle)
1772 bOk = StartPaste();
1773
1774 while( bOk )
1775 {
1776 OUString aLine;
1777 OUString aText;
1778 OString aByteLine;
1779 SCCOL nCol = nStartCol;
1780 SCROW nRow = nStartRow;
1781 SCCOL nRefCol = nCol;
1782 SCROW nRefRow = nRow;
1783 rStrm.Seek( nOldPos );
1784 for( ;; )
1785 {
1786 //! allow unicode
1787 rStrm.ReadLine( aByteLine );
1788 aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet());
1789 if( rStrm.eof() )
1790 break;
1791 bool bInvalidCol = false;
1792 bool bInvalidRow = false;
1793 const sal_Unicode* p = aLine.getStr();
1794 sal_Unicode cTag = *p++;
1795 if( cTag == 'C' ) // Content
1796 {
1797 if( *p++ != ';' )
1798 return false;
1799
1800 bool bInvalidRefCol = false;
1801 bool bInvalidRefRow = false;
1802 while( *p )
1803 {
1804 sal_Unicode ch = *p++;
1805 ch = ScGlobal::ToUpperAlpha( ch );
1806 switch( ch )
1807 {
1808 case 'X':
1809 {
1810 bInvalidCol = false;
1811 bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol);
1812 if (bFail || nCol < 0 || pDoc->MaxCol() < nCol)
1813 {
1814 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
1815 nCol = std::max<SCCOL>(0, std::min<SCCOL>(nCol, pDoc->MaxCol()));
1816 bInvalidCol = bOverflowCol = true;
1817 }
1818 break;
1819 }
1820 case 'Y':
1821 {
1822 bInvalidRow = false;
1823 bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow);
1824 if (bFail || nRow < 0 || nMaxImportRow < nRow)
1825 {
1826 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
1827 nRow = std::max<SCROW>(0, std::min<SCROW>(nRow, nMaxImportRow));
1828 bInvalidRow = bOverflowRow = true;
1829 }
1830 break;
1831 }
1832 case 'C':
1833 {
1834 bInvalidRefCol = false;
1835 bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nRefCol);
1836 if (bFail || nRefCol < 0 || pDoc->MaxCol() < nRefCol)
1837 {
1838 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol);
1839 nRefCol = std::max<SCCOL>(0, std::min<SCCOL>(nRefCol, pDoc->MaxCol()));
1840 bInvalidRefCol = bOverflowCol = true;
1841 }
1842 break;
1843 }
1844 case 'R':
1845 {
1846 bInvalidRefRow = false;
1847 bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRefRow);
1848 if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow)
1849 {
1850 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow);
1851 nRefRow = std::max<SCROW>(0, std::min<SCROW>(nRefRow, nMaxImportRow));
1852 bInvalidRefRow = bOverflowRow = true;
1853 }
1854 break;
1855 }
1856 case 'K':
1857 {
1858 if( !bSingle &&
1859 ( nCol < nStartCol || nCol > nEndCol
1860 || nRow < nStartRow || nRow > nEndRow
1861 || nCol > pDoc->MaxCol() || nRow > nMaxImportRow
1862 || bInvalidCol || bInvalidRow ) )
1863 break;
1864 if( !bData )
1865 {
1866 if( nRow > nEndRow )
1867 nEndRow = nRow;
1868 if( nCol > nEndCol )
1869 nEndCol = nCol;
1870 break;
1871 }
1872 bool bText;
1873 if( *p == '"' )
1874 {
1875 bText = true;
1876 aText.clear();
1877 p = lcl_ScanSylkString( p, aText, eVersion);
1878 }
1879 else
1880 bText = false;
1881 const sal_Unicode* q = p;
1882 while( *q && *q != ';' )
1883 q++;
1884 if ( !(*q == ';' && *(q+1) == 'I') && !bInvalidCol && !bInvalidRow )
1885 { // don't ignore value
1886 if( bText )
1887 {
1888 pDoc->EnsureTable(aRange.aStart.Tab());
1889 pDoc->SetTextCell(
1890 ScAddress(nCol, nRow, aRange.aStart.Tab()), aText);
1891 }
1892 else
1893 {
1894 double fVal = rtl_math_uStringToDouble( p,
1895 aLine.getStr() + aLine.getLength(),
1896 cDecSep, cGrpSep, nullptr, nullptr );
1897 pDoc->SetValue( nCol, nRow, aRange.aStart.Tab(), fVal );
1898 }
1899 }
1900 }
1901 break;
1902 case 'E':
1903 case 'M':
1904 {
1905 if ( ch == 'M' )
1906 {
1907 if ( nRefCol < nCol )
1908 nRefCol = nCol;
1909 if ( nRefRow < nRow )
1910 nRefRow = nRow;
1911 if ( !bData )
1912 {
1913 if( nRefRow > nEndRow )
1914 nEndRow = nRefRow;
1915 if( nRefCol > nEndCol )
1916 nEndCol = nRefCol;
1917 }
1918 }
1919 if( !bMyDoc || !bData )
1920 break;
1921 aText = "=";
1922 p = lcl_ScanSylkFormula( p, aText, eVersion);
1923
1924 if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow)))
1925 break;
1926
1927 ScAddress aPos( nCol, nRow, aRange.aStart.Tab() );
1928 /* FIXME: do we want GRAM_ODFF_A1 instead? At the
1929 * end it probably should be GRAM_ODFF_R1C1, since
1930 * R1C1 is what Excel writes in SYLK, or even
1931 * better GRAM_ENGLISH_XL_R1C1. */
1932 const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1;
1933 ScCompiler aComp( pDoc, aPos, eGrammar);
1934 std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray
1935 pDoc->CheckLinkFormulaNeedingCheck(*xCode);
1936 if ( ch == 'M' )
1937 {
1938 ScMarkData aMark(pDoc->MaxRow(), pDoc->MaxCol());
1939 aMark.SelectTable( aPos.Tab(), true );
1940 pDoc->InsertMatrixFormula( nCol, nRow, nRefCol,
1941 nRefRow, aMark, EMPTY_OUSTRING, xCode.get() );
1942 }
1943 else
1944 {
1945 ScFormulaCell* pFCell = new ScFormulaCell(
1946 pDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE);
1947 pDoc->SetFormulaCell(aPos, pFCell);
1948 }
1949 }
1950 break;
1951 }
1952 while( *p && *p != ';' )
1953 p++;
1954 if( *p )
1955 p++;
1956 }
1957 }
1958 else if( cTag == 'F' ) // Format
1959 {
1960 if( *p++ != ';' )
1961 return false;
1962 sal_Int32 nFormat = -1;
1963 while( *p )
1964 {
1965 sal_Unicode ch = *p++;
1966 ch = ScGlobal::ToUpperAlpha( ch );
1967 switch( ch )
1968 {
1969 case 'X':
1970 {
1971 bInvalidCol = false;
1972 bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol);
1973 if (bFail || nCol < 0 || pDoc->MaxCol() < nCol)
1974 {
1975 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
1976 nCol = std::max<SCCOL>(0, std::min<SCCOL>(nCol, pDoc->MaxCol()));
1977 bInvalidCol = bOverflowCol = true;
1978 }
1979 break;
1980 }
1981 case 'Y':
1982 {
1983 bInvalidRow = false;
1984 bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow);
1985 if (bFail || nRow < 0 || nMaxImportRow < nRow)
1986 {
1987 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
1988 nRow = std::max<SCROW>(0, std::min<SCROW>(nRow, nMaxImportRow));
1989 bInvalidRow = bOverflowRow = true;
1990 }
1991 break;
1992 }
1993 case 'P' :
1994 if ( bData )
1995 {
1996 // F;P<n> sets format code of P;P<code> at
1997 // current position, or at ;X;Y if specified.
1998 // Note that ;X;Y may appear after ;P
1999 const sal_Unicode* p0 = p;
2000 while( *p && *p != ';' )
2001 p++;
2002 OUString aNumber(p0, p - p0);
2003 nFormat = aNumber.toInt32();
2004 }
2005 break;
2006 }
2007 while( *p && *p != ';' )
2008 p++;
2009 if( *p )
2010 p++;
2011 }
2012 if ( !bData )
2013 {
2014 if( nRow > nEndRow )
2015 nEndRow = nRow;
2016 if( nCol > nEndCol )
2017 nEndCol = nCol;
2018 }
2019 if ( 0 <= nFormat && nFormat < static_cast<sal_Int32>(aFormats.size()) && !bInvalidCol && !bInvalidRow )
2020 {
2021 sal_uInt32 nKey = aFormats[nFormat];
2022 pDoc->ApplyAttr( nCol, nRow, aRange.aStart.Tab(),
2023 SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) );
2024 }
2025 }
2026 else if( cTag == 'P' )
2027 {
2028 if ( bData && *p == ';' && *(p+1) == 'P' )
2029 {
2030 OUString aCode( p+2 );
2031 // unescape doubled semicolons
2032 aCode = aCode.replaceAll(";;", ";");
2033 // get rid of Xcl escape characters
2034 aCode = aCode.replaceAll("\x1b", "");
2035 sal_Int32 nCheckPos;
2036 SvNumFormatType nType;
2037 sal_uInt32 nKey;
2038 pDoc->GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey,
2039 LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false);
2040 if ( nCheckPos )
2041 nKey = 0;
2042 aFormats.push_back( nKey );
2043 }
2044 }
2045 else if( cTag == 'I' && *p == 'D' )
2046 {
2047 aLine = aLine.copy(4);
2048 if (aLine == "CALCOOO32")
2049 eVersion = SylkVersion::OOO32;
2050 else if (aLine == "SCALC3")
2051 eVersion = SylkVersion::SCALC3;
2052 bMyDoc = (eVersion <= SylkVersion::OWN);
2053 }
2054 else if( cTag == 'E' ) // End
2055 break;
2056 }
2057 if( !bData )
2058 {
2059 aRange.aEnd.SetCol( nEndCol );
2060 aRange.aEnd.SetRow( nEndRow );
2061 bOk = StartPaste();
2062 bData = true;
2063 }
2064 else
2065 break;
2066 }
2067
2068 EndPaste();
2069 return bOk;
2070 }
2071
Doc2Sylk(SvStream & rStrm)2072 bool ScImportExport::Doc2Sylk( SvStream& rStrm )
2073 {
2074 SCCOL nCol;
2075 SCROW nRow;
2076 SCCOL nStartCol = aRange.aStart.Col();
2077 SCROW nStartRow = aRange.aStart.Row();
2078 SCCOL nEndCol = aRange.aEnd.Col();
2079 SCROW nEndRow = aRange.aEnd.Row();
2080 OUString aCellStr;
2081 OUString aValStr;
2082 lcl_WriteSimpleString( rStrm, "ID;PCALCOOO32" );
2083 WriteUnicodeOrByteEndl( rStrm );
2084
2085 for (nRow = nStartRow; nRow <= nEndRow; nRow++)
2086 {
2087 for (nCol = nStartCol; nCol <= nEndCol; nCol++)
2088 {
2089 OUString aBufStr;
2090 double nVal;
2091 bool bForm = false;
2092 SCROW r = nRow - nStartRow + 1;
2093 SCCOL c = nCol - nStartCol + 1;
2094 ScRefCellValue aCell(*pDoc, ScAddress(nCol, nRow, aRange.aStart.Tab()));
2095 CellType eType = aCell.meType;
2096 switch( eType )
2097 {
2098 case CELLTYPE_FORMULA:
2099 bForm = bFormulas;
2100 if( pDoc->HasValueData( nCol, nRow, aRange.aStart.Tab()) )
2101 goto hasvalue;
2102 else
2103 goto hasstring;
2104
2105 case CELLTYPE_VALUE:
2106 hasvalue:
2107 pDoc->GetValue( nCol, nRow, aRange.aStart.Tab(), nVal );
2108
2109 aValStr = ::rtl::math::doubleToUString( nVal,
2110 rtl_math_StringFormat_Automatic,
2111 rtl_math_DecimalPlaces_Max, '.', true );
2112
2113 aBufStr = "C;X"
2114 + OUString::number( c )
2115 + ";Y"
2116 + OUString::number( r )
2117 + ";K"
2118 + aValStr;
2119 lcl_WriteSimpleString( rStrm, aBufStr );
2120 goto checkformula;
2121
2122 case CELLTYPE_STRING:
2123 case CELLTYPE_EDIT:
2124 hasstring:
2125 aCellStr = pDoc->GetString(nCol, nRow, aRange.aStart.Tab());
2126 aCellStr = aCellStr.replaceAll("\n", SYLK_LF);
2127
2128 aBufStr = "C;X"
2129 + OUString::number( c )
2130 + ";Y"
2131 + OUString::number( r )
2132 + ";K";
2133 lcl_WriteSimpleString( rStrm, aBufStr );
2134 lcl_WriteString( rStrm, aCellStr, '"', ';' );
2135
2136 checkformula:
2137 if( bForm )
2138 {
2139 const ScFormulaCell* pFCell = aCell.mpFormula;
2140 switch ( pFCell->GetMatrixFlag() )
2141 {
2142 case ScMatrixMode::Reference :
2143 aCellStr.clear();
2144 break;
2145 default:
2146 OUString aOUCellStr;
2147 pFCell->GetFormula( aOUCellStr,formula::FormulaGrammar::GRAM_PODF_A1);
2148 aCellStr = aOUCellStr;
2149 /* FIXME: do we want GRAM_ODFF_A1 instead? At
2150 * the end it probably should be
2151 * GRAM_ODFF_R1C1, since R1C1 is what Excel
2152 * writes in SYLK, or even better
2153 * GRAM_ENGLISH_XL_R1C1. */
2154 }
2155 if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE &&
2156 aCellStr.startsWith("{") &&
2157 aCellStr.endsWith("}") )
2158 { // cut off matrix {} characters
2159 aCellStr = aCellStr.copy(1, aCellStr.getLength()-2);
2160 }
2161 if ( aCellStr[0] == '=' )
2162 aCellStr = aCellStr.copy(1);
2163 OUString aPrefix;
2164 switch ( pFCell->GetMatrixFlag() )
2165 {
2166 case ScMatrixMode::Formula :
2167 { // diff expression with 'M' M$-extension
2168 SCCOL nC;
2169 SCROW nR;
2170 pFCell->GetMatColsRows( nC, nR );
2171 nC += c - 1;
2172 nR += r - 1;
2173 aPrefix = ";R"
2174 + OUString::number( nR )
2175 + ";C"
2176 + OUString::number( nC )
2177 + ";M";
2178 }
2179 break;
2180 case ScMatrixMode::Reference :
2181 { // diff expression with 'I' M$-extension
2182 ScAddress aPos;
2183 (void)pFCell->GetMatrixOrigin( aPos );
2184 aPrefix = ";I;R"
2185 + OUString::number( aPos.Row() - nStartRow + 1 )
2186 + ";C"
2187 + OUString::number( aPos.Col() - nStartCol + 1 );
2188 }
2189 break;
2190 default:
2191 // formula Expression
2192 aPrefix = ";E";
2193 }
2194 lcl_WriteSimpleString( rStrm, aPrefix );
2195 if ( !aCellStr.isEmpty() )
2196 lcl_WriteString( rStrm, aCellStr, 0, ';' );
2197 }
2198 WriteUnicodeOrByteEndl( rStrm );
2199 break;
2200
2201 default:
2202 {
2203 // added to avoid warnings
2204 }
2205 }
2206 }
2207 }
2208 lcl_WriteSimpleString( rStrm, OUString( 'E' ) );
2209 WriteUnicodeOrByteEndl( rStrm );
2210 return rStrm.GetError() == ERRCODE_NONE;
2211 }
2212
Doc2HTML(SvStream & rStrm,const OUString & rBaseURL)2213 bool ScImportExport::Doc2HTML( SvStream& rStrm, const OUString& rBaseURL )
2214 {
2215 // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options
2216 ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, pDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll,
2217 aStreamPath, aNonConvertibleChars, maFilterOptions );
2218 return rStrm.GetError() == ERRCODE_NONE;
2219 }
2220
Doc2RTF(SvStream & rStrm)2221 bool ScImportExport::Doc2RTF( SvStream& rStrm )
2222 {
2223 // rtl_TextEncoding is ignored in ScExportRTF
2224 ScFormatFilter::Get().ScExportRTF( rStrm, pDoc, aRange, RTL_TEXTENCODING_DONTKNOW );
2225 return rStrm.GetError() == ERRCODE_NONE;
2226 }
2227
Doc2Dif(SvStream & rStrm)2228 bool ScImportExport::Doc2Dif( SvStream& rStrm )
2229 {
2230 // for DIF in the clipboard, IBM_850 is always used
2231 ScFormatFilter::Get().ScExportDif( rStrm, pDoc, aRange, RTL_TEXTENCODING_IBM_850 );
2232 return true;
2233 }
2234
Dif2Doc(SvStream & rStrm)2235 bool ScImportExport::Dif2Doc( SvStream& rStrm )
2236 {
2237 SCTAB nTab = aRange.aStart.Tab();
2238 ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) );
2239 pImportDoc->InitUndo( pDoc, nTab, nTab );
2240
2241 // for DIF in the clipboard, IBM_850 is always used
2242 ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 );
2243
2244 SCCOL nEndCol;
2245 SCROW nEndRow;
2246 pImportDoc->GetCellArea( nTab, nEndCol, nEndRow );
2247 // if there are no cells in the imported content, nEndCol/nEndRow may be before the start
2248 if ( nEndCol < aRange.aStart.Col() )
2249 nEndCol = aRange.aStart.Col();
2250 if ( nEndRow < aRange.aStart.Row() )
2251 nEndRow = aRange.aStart.Row();
2252 aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab );
2253
2254 bool bOk = StartPaste();
2255 if (bOk)
2256 {
2257 InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2258 pDoc->DeleteAreaTab( aRange, nFlags );
2259 pImportDoc->CopyToDocument(aRange, nFlags, false, *pDoc);
2260 EndPaste();
2261 }
2262
2263 return bOk;
2264 }
2265
RTF2Doc(SvStream & rStrm,const OUString & rBaseURL)2266 bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL )
2267 {
2268 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( pDoc, aRange );
2269 if (!pImp)
2270 return false;
2271 pImp->Read( rStrm, rBaseURL );
2272 aRange = pImp->GetRange();
2273
2274 bool bOk = StartPaste();
2275 if (bOk)
2276 {
2277 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2278 pDoc->DeleteAreaTab( aRange, nFlags );
2279 pImp->WriteToDocument();
2280 EndPaste();
2281 }
2282 return bOk;
2283 }
2284
HTML2Doc(SvStream & rStrm,const OUString & rBaseURL)2285 bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL )
2286 {
2287 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( pDoc, rBaseURL, aRange);
2288 if (!pImp)
2289 return false;
2290 pImp->Read( rStrm, rBaseURL );
2291 aRange = pImp->GetRange();
2292
2293 bool bOk = StartPaste();
2294 if (bOk)
2295 {
2296 // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in
2297 // a Draw Layer but no Draw View -> create Draw Layer and View here
2298 if (pDocSh)
2299 pDocSh->MakeDrawLayer();
2300
2301 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2302 pDoc->DeleteAreaTab( aRange, nFlags );
2303
2304 if (pExtOptions)
2305 {
2306 // Pick up import options if available.
2307 LanguageType eLang = pExtOptions->GetLanguage();
2308 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang);
2309 bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber();
2310 pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber);
2311 }
2312 else
2313 // Regular import, with no options.
2314 pImp->WriteToDocument();
2315
2316 EndPaste();
2317 }
2318 return bOk;
2319 }
2320
2321 #ifndef DISABLE_DYNLOADING
2322
thisModule()2323 extern "C" { static void thisModule() {} }
2324
2325 #else
2326
2327 extern "C" {
2328 ScFormatFilterPlugin* ScFilterCreate();
2329 }
2330
2331 #endif
2332
2333 typedef ScFormatFilterPlugin * (*FilterFn)();
Get()2334 ScFormatFilterPlugin &ScFormatFilter::Get()
2335 {
2336 static ScFormatFilterPlugin *plugin = [&]()
2337 {
2338 #ifndef DISABLE_DYNLOADING
2339 OUString sFilterLib(SVLIBRARY("scfilt"));
2340 static ::osl::Module aModule;
2341 bool bLoaded = aModule.is();
2342 if (!bLoaded)
2343 bLoaded = aModule.loadRelative(&thisModule, sFilterLib);
2344 if (!bLoaded)
2345 bLoaded = aModule.load(sFilterLib);
2346 if (bLoaded)
2347 {
2348 oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" );
2349 if (fn != nullptr)
2350 return reinterpret_cast<FilterFn>(fn)();
2351 }
2352 assert(false);
2353 return static_cast<ScFormatFilterPlugin*>(nullptr);
2354 #else
2355 return ScFilterCreate();
2356 #endif
2357 }();
2358
2359 return *plugin;
2360 }
2361
2362 // Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
2363 // array.
lcl_UnicodeStrChr(const sal_Unicode * pStr,sal_Unicode c)2364 static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
2365 sal_Unicode c )
2366 {
2367 while (*pStr)
2368 {
2369 if (*pStr == c)
2370 return pStr;
2371 ++pStr;
2372 }
2373 return nullptr;
2374 }
2375
ScImportStringStream(const OUString & rStr)2376 ScImportStringStream::ScImportStringStream( const OUString& rStr )
2377 : SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()),
2378 rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ)
2379 {
2380 SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
2381 #ifdef OSL_BIGENDIAN
2382 SetEndian(SvStreamEndian::BIG);
2383 #else
2384 SetEndian(SvStreamEndian::LITTLE);
2385 #endif
2386 }
2387
ReadCsvLine(SvStream & rStream,bool bEmbeddedLineBreak,OUString & rFieldSeparators,sal_Unicode cFieldQuote,sal_Unicode & rcDetectSep)2388 OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
2389 OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep )
2390 {
2391 enum RetryState
2392 {
2393 FORBID,
2394 ALLOW,
2395 RETRY,
2396 RETRIED
2397 } eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID);
2398
2399 sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0);
2400
2401 Label_RetryWithNewSep:
2402
2403 if (eRetryState == RetryState::RETRY)
2404 {
2405 eRetryState = RetryState::RETRIED;
2406 rStream.Seek( nStreamPos);
2407 }
2408
2409 OUString aStr;
2410 rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2411
2412 if (bEmbeddedLineBreak)
2413 {
2414 const sal_Unicode* pSeps = rFieldSeparators.getStr();
2415
2416 QuoteType eQuoteState = FIELDEND_QUOTE;
2417 bool bFieldStart = true;
2418
2419 sal_Int32 nLastOffset = 0;
2420 sal_Int32 nQuotes = 0;
2421 while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit)
2422 {
2423 const sal_Unicode *p, *pStart;
2424 p = pStart = aStr.getStr();
2425 p += nLastOffset;
2426 while (*p)
2427 {
2428 if (nQuotes)
2429 {
2430 if (*p == cFieldQuote)
2431 {
2432 if (bFieldStart)
2433 {
2434 ++nQuotes;
2435 bFieldStart = false;
2436 eQuoteState = FIELDSTART_QUOTE;
2437 }
2438 // Do not detect a FIELDSTART_QUOTE if not in
2439 // bFieldStart mode, in which case for unquoted content
2440 // we are in FIELDEND_QUOTE state.
2441 else if (eQuoteState != FIELDEND_QUOTE)
2442 {
2443 eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep);
2444
2445 if (eRetryState == RetryState::ALLOW && rcDetectSep == ' ')
2446 {
2447 eRetryState = RetryState::RETRY;
2448 rFieldSeparators += OUString(' ');
2449 goto Label_RetryWithNewSep;
2450 }
2451
2452 // DONTKNOW_QUOTE is an embedded unescaped quote we
2453 // don't count for pairing.
2454 if (eQuoteState != DONTKNOW_QUOTE)
2455 ++nQuotes;
2456 }
2457 }
2458 else if (eQuoteState == FIELDEND_QUOTE)
2459 {
2460 if (bFieldStart)
2461 // If blank is a separator it starts a field, if it
2462 // is not and thus maybe leading before quote we
2463 // are still at start of field regarding quotes.
2464 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2465 else
2466 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2467 }
2468 }
2469 else
2470 {
2471 if (*p == cFieldQuote && bFieldStart)
2472 {
2473 nQuotes = 1;
2474 eQuoteState = FIELDSTART_QUOTE;
2475 bFieldStart = false;
2476 }
2477 else if (eQuoteState == FIELDEND_QUOTE)
2478 {
2479 // This also skips leading blanks at beginning of line
2480 // if followed by a quote. It's debatable whether we
2481 // actually want that or not, but congruent with what
2482 // ScanNextFieldFromString() does.
2483 if (bFieldStart)
2484 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2485 else
2486 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2487 }
2488 }
2489 // A quote character inside a field content does not start
2490 // a quote.
2491 ++p;
2492 }
2493
2494 if (nQuotes % 2 == 0)
2495 // We still have a (theoretical?) problem here if due to
2496 // nArbitraryLineLengthLimit we split a string right between a
2497 // doubled quote pair.
2498 break;
2499 else
2500 {
2501 nLastOffset = aStr.getLength();
2502 OUString aNext;
2503 rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2504 aStr += "\n" + aNext;
2505 }
2506 }
2507 }
2508 return aStr;
2509 }
2510
2511 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
2512