1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <comphelper/processfactory.hxx>
21 #include <i18nlangtag/languagetag.hxx>
22 #include <i18nutil/unicode.hxx>
23 #include <sot/formats.hxx>
24 #include <sfx2/mieclip.hxx>
25 #include <com/sun/star/i18n/CalendarFieldIndex.hpp>
26 #include <sal/log.hxx>
27 #include <unotools/charclass.hxx>
28 #include <osl/module.hxx>
29
30 #include <global.hxx>
31 #include <docsh.hxx>
32 #include <undoblk.hxx>
33 #include <rangenam.hxx>
34 #include <tabvwsh.hxx>
35 #include <filter.hxx>
36 #include <asciiopt.hxx>
37 #include <formulacell.hxx>
38 #include <cellform.hxx>
39 #include <progress.hxx>
40 #include <scitems.hxx>
41 #include <editable.hxx>
42 #include <compiler.hxx>
43 #include <warnbox.hxx>
44 #include <clipparam.hxx>
45 #include <impex.hxx>
46 #include <editutil.hxx>
47 #include <patattr.hxx>
48 #include <docpool.hxx>
49 #include <stringutil.hxx>
50 #include <cellvalue.hxx>
51 #include <tokenarray.hxx>
52 #include <documentimport.hxx>
53 #include <refundo.hxx>
54 #include <mtvelements.hxx>
55
56 #include <globstr.hrc>
57 #include <scresid.hxx>
58 #include <o3tl/safeint.hxx>
59 #include <tools/svlibrary.h>
60 #include <unotools/configmgr.hxx>
61 #include <vcl/svapp.hxx>
62 #include <vcl/weld.hxx>
63 #include <editeng/editobj.hxx>
64
65 #include <memory>
66 #include <string_view>
67
68 #include <osl/endian.h>
69
70 // We don't want to end up with 2GB read in one line just because of malformed
71 // multiline fields, so chop it _somewhere_, which is twice supported columns
72 // times arbitrary maximum cell content length, 2*1024*64K=128M, and because
73 // it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of
74 // luck anyway.
75 constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16;
76 constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit;
77
78 namespace
79 {
80 const char SYLK_LF[] = "\x1b :";
81
lcl_IsEndianSwap(const SvStream & rStrm)82 bool lcl_IsEndianSwap( const SvStream& rStrm )
83 {
84 #ifdef OSL_BIGENDIAN
85 return rStrm.GetEndian() != SvStreamEndian::BIG;
86 #else
87 return rStrm.GetEndian() != SvStreamEndian::LITTLE;
88 #endif
89 }
90 }
91
92 namespace {
93
94 enum class SylkVersion
95 {
96 SCALC3, // Wrote wrongly quoted strings and unescaped semicolons.
97 OOO32, // Correct strings, plus multiline content.
98 OWN, // Place our new versions, if any, before this value.
99 OTHER // Assume that aliens wrote correct strings.
100 };
101
102 }
103
104 // Whole document without Undo
ScImportExport(ScDocument & r)105 ScImportExport::ScImportExport( ScDocument& r )
106 : pDocSh( dynamic_cast< ScDocShell* >(r.GetDocumentShell()) ), rDoc( r ),
107 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
108 cSep( '\t' ), cStr( '"' ),
109 bFormulas( false ), bIncludeFiltered( true ),
110 bAll( true ), bSingle( true ), bUndo( false ),
111 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
112 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
113 mExportTextOptions()
114 {
115 pUndoDoc = nullptr;
116 pExtOptions = nullptr;
117 }
118
119 // Insert am current cell without range(es)
ScImportExport(ScDocument & r,const ScAddress & rPt)120 ScImportExport::ScImportExport( ScDocument& r, const ScAddress& rPt )
121 : pDocSh( dynamic_cast< ScDocShell* >(r.GetDocumentShell()) ), rDoc( r ),
122 aRange( rPt ),
123 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
124 cSep( '\t' ), cStr( '"' ),
125 bFormulas( false ), bIncludeFiltered( true ),
126 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
127 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
128 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
129 mExportTextOptions()
130 {
131 pUndoDoc = nullptr;
132 pExtOptions = nullptr;
133 }
134
135 // ctor with a range is only used for export
136 //! ctor with a string (and bSingle=true) is also used for DdeSetData
ScImportExport(ScDocument & r,const ScRange & rRange)137 ScImportExport::ScImportExport( ScDocument& r, const ScRange& rRange )
138 : pDocSh( dynamic_cast<ScDocShell* >(r.GetDocumentShell()) ), rDoc( r ),
139 aRange( rRange ),
140 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
141 cSep( '\t' ), cStr( '"' ),
142 bFormulas( false ), bIncludeFiltered( true ),
143 bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ),
144 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
145 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
146 mExportTextOptions()
147 {
148 pUndoDoc = nullptr;
149 pExtOptions = nullptr;
150 // Only one sheet (table) supported
151 aRange.aEnd.SetTab( aRange.aStart.Tab() );
152 }
153
154 // Evaluate input string - either range, cell or the whole document (when error)
155 // If a View exists, the TabNo of the view will be used.
ScImportExport(ScDocument & r,const OUString & rPos)156 ScImportExport::ScImportExport( ScDocument& r, const OUString& rPos )
157 : pDocSh( dynamic_cast< ScDocShell* >(r.GetDocumentShell()) ), rDoc( r ),
158 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
159 cSep( '\t' ), cStr( '"' ),
160 bFormulas( false ), bIncludeFiltered( true ),
161 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
162 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
163 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ),
164 mExportTextOptions()
165 {
166 pUndoDoc = nullptr;
167 pExtOptions = nullptr;
168
169 SCTAB nTab = ScDocShell::GetCurTab();
170 aRange.aStart.SetTab( nTab );
171 OUString aPos( rPos );
172 // Named range?
173 ScRangeName* pRange = rDoc.GetRangeName();
174 if (pRange)
175 {
176 const ScRangeData* pData = pRange->findByUpperName(ScGlobal::getCharClassPtr()->uppercase(aPos));
177 if (pData)
178 {
179 if( pData->HasType( ScRangeData::Type::RefArea )
180 || pData->HasType( ScRangeData::Type::AbsArea )
181 || pData->HasType( ScRangeData::Type::AbsPos ) )
182 {
183 pData->GetSymbol(aPos);
184 }
185 }
186 }
187 formula::FormulaGrammar::AddressConvention eConv = rDoc.GetAddressConvention();
188 // Range?
189 if (aRange.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
190 bSingle = false;
191 // Cell?
192 else if (aRange.aStart.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
193 aRange.aEnd = aRange.aStart;
194 else
195 bAll = true;
196 }
197
~ScImportExport()198 ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE
199 {
200 pUndoDoc.reset();
201 pExtOptions.reset();
202 }
203
SetExtOptions(const ScAsciiOptions & rOpt)204 void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt )
205 {
206 if ( pExtOptions )
207 *pExtOptions = rOpt;
208 else
209 pExtOptions.reset(new ScAsciiOptions( rOpt ));
210
211 // "normal" Options
212
213 cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false);
214 cStr = rOpt.GetTextSep();
215 }
216
SetFilterOptions(const OUString & rFilterOptions)217 void ScImportExport::SetFilterOptions(const OUString& rFilterOptions)
218 {
219 maFilterOptions = rFilterOptions;
220 }
221
IsFormatSupported(SotClipboardFormatId nFormat)222 bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat )
223 {
224 return nFormat == SotClipboardFormatId::STRING
225 || nFormat == SotClipboardFormatId::STRING_TSVC
226 || nFormat == SotClipboardFormatId::SYLK
227 || nFormat == SotClipboardFormatId::LINK
228 || nFormat == SotClipboardFormatId::HTML
229 || nFormat == SotClipboardFormatId::HTML_SIMPLE
230 || nFormat == SotClipboardFormatId::DIF;
231 }
232
233 // Prepare for Undo
StartPaste()234 bool ScImportExport::StartPaste()
235 {
236 if ( !bAll )
237 {
238 ScEditableTester aTester( rDoc, aRange );
239 if ( !aTester.IsEditable() )
240 {
241 std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(ScDocShell::GetActiveDialogParent(),
242 VclMessageType::Info, VclButtonsType::Ok,
243 ScResId(aTester.GetMessageId())));
244 xInfoBox->run();
245 return false;
246 }
247 }
248 if( bUndo && pDocSh && rDoc.IsUndoEnabled())
249 {
250 pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO ));
251 pUndoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
252 rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc);
253 }
254 return true;
255 }
256
257 // Create Undo/Redo actions, Invalidate/Repaint
EndPaste(bool bAutoRowHeight)258 void ScImportExport::EndPaste(bool bAutoRowHeight)
259 {
260 bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight(
261 aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() );
262
263 if( pUndoDoc && rDoc.IsUndoEnabled() && pDocSh )
264 {
265 ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO ));
266 pRedoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
267 rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc);
268 ScMarkData aDestMark(pRedoDoc->GetSheetLimits());
269 aDestMark.SetMarkArea(aRange);
270 pDocSh->GetUndoManager()->AddUndoAction(
271 std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr));
272 }
273 pUndoDoc.reset();
274 if( pDocSh )
275 {
276 if (!bHeight)
277 pDocSh->PostPaint( aRange, PaintPartFlags::Grid );
278 pDocSh->SetDocumentModified();
279 }
280 ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell();
281 if ( pViewSh )
282 pViewSh->UpdateInputHandler();
283
284 }
285
ExportData(const OUString & rMimeType,css::uno::Any & rValue)286 bool ScImportExport::ExportData( const OUString& rMimeType,
287 css::uno::Any & rValue )
288 {
289 SvMemoryStream aStrm;
290 // mba: no BaseURL for data exchange
291 if( ExportStream( aStrm, OUString(),
292 SotExchange::GetFormatIdFromMimeType( rMimeType ) ))
293 {
294 aStrm.WriteUChar( 0 );
295 rValue <<= css::uno::Sequence< sal_Int8 >(
296 static_cast<sal_Int8 const *>(aStrm.GetData()),
297 aStrm.TellEnd() );
298 return true;
299 }
300 return false;
301 }
302
ImportString(const OUString & rText,SotClipboardFormatId nFmt)303 bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt )
304 {
305 switch ( nFmt )
306 {
307 // formats supporting unicode
308 case SotClipboardFormatId::STRING :
309 case SotClipboardFormatId::STRING_TSVC :
310 {
311 ScImportStringStream aStrm( rText);
312 return ImportStream( aStrm, OUString(), nFmt );
313 // ImportStream must handle RTL_TEXTENCODING_UNICODE
314 }
315 default:
316 {
317 rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
318 OString aTmp( rText.getStr(), rText.getLength(), eEnc );
319 SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(char), StreamMode::READ );
320 aStrm.SetStreamCharSet( eEnc );
321 SetNoEndianSwap( aStrm ); //! no swapping in memory
322 return ImportStream( aStrm, OUString(), nFmt );
323 }
324 }
325 }
326
ExportString(OUString & rText,SotClipboardFormatId nFmt)327 bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt )
328 {
329 if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC )
330 {
331 SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]");
332 rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
333 OString aTmp;
334 bool bOk = ExportByteString( aTmp, eEnc, nFmt );
335 rText = OStringToOUString( aTmp, eEnc );
336 return bOk;
337 }
338 // nSizeLimit not needed for OUString
339
340 SvMemoryStream aStrm;
341 aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
342 SetNoEndianSwap( aStrm ); //! no swapping in memory
343 // mba: no BaseURL for data exc
344 if( ExportStream( aStrm, OUString(), nFmt ) )
345 {
346 aStrm.WriteUInt16( 0 );
347 rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) );
348 return true;
349 }
350 rText.clear();
351 return false;
352
353 // ExportStream must handle RTL_TEXTENCODING_UNICODE
354 }
355
ExportByteString(OString & rText,rtl_TextEncoding eEnc,SotClipboardFormatId nFmt)356 bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt )
357 {
358 OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" );
359 if ( eEnc == RTL_TEXTENCODING_UNICODE )
360 eEnc = osl_getThreadTextEncoding();
361
362 if (!nSizeLimit)
363 nSizeLimit = SAL_MAX_UINT16;
364
365 SvMemoryStream aStrm;
366 aStrm.SetStreamCharSet( eEnc );
367 SetNoEndianSwap( aStrm ); //! no swapping in memory
368 // mba: no BaseURL for data exchange
369 if( ExportStream( aStrm, OUString(), nFmt ) )
370 {
371 aStrm.WriteChar( 0 );
372 if( aStrm.TellEnd() <= nSizeLimit )
373 {
374 rText = static_cast<const char*>(aStrm.GetData());
375 return true;
376 }
377 }
378 rText.clear();
379 return false;
380 }
381
ImportStream(SvStream & rStrm,const OUString & rBaseURL,SotClipboardFormatId nFmt)382 bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
383 {
384 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
385 {
386 if( ExtText2Doc( rStrm ) ) // evaluate pExtOptions
387 return true;
388 }
389 if( nFmt == SotClipboardFormatId::SYLK )
390 {
391 if( Sylk2Doc( rStrm ) )
392 return true;
393 }
394 if( nFmt == SotClipboardFormatId::DIF )
395 {
396 if( Dif2Doc( rStrm ) )
397 return true;
398 }
399 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
400 {
401 if( RTF2Doc( rStrm, rBaseURL ) )
402 return true;
403 }
404 if( nFmt == SotClipboardFormatId::LINK )
405 return true; // Link-Import?
406 if ( nFmt == SotClipboardFormatId::HTML )
407 {
408 if( HTML2Doc( rStrm, rBaseURL ) )
409 return true;
410 }
411 if ( nFmt == SotClipboardFormatId::HTML_SIMPLE )
412 {
413 MSE40HTMLClipFormatObj aMSE40ClpObj; // needed to skip the header data
414 SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm );
415 if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) )
416 return true;
417 }
418
419 return false;
420 }
421
ExportStream(SvStream & rStrm,const OUString & rBaseURL,SotClipboardFormatId nFmt)422 bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
423 {
424 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
425 {
426 if( Doc2Text( rStrm ) )
427 return true;
428 }
429 if( nFmt == SotClipboardFormatId::SYLK )
430 {
431 if( Doc2Sylk( rStrm ) )
432 return true;
433 }
434 if( nFmt == SotClipboardFormatId::DIF )
435 {
436 if( Doc2Dif( rStrm ) )
437 return true;
438 }
439 if( nFmt == SotClipboardFormatId::LINK && !bAll )
440 {
441 OUString aDocName;
442 if ( rDoc.IsClipboard() )
443 aDocName = ScGlobal::GetClipDocName();
444 else
445 {
446 SfxObjectShell* pShell = rDoc.GetDocumentShell();
447 if (pShell)
448 aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME );
449 }
450
451 OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" );
452 if( !aDocName.isEmpty() )
453 {
454 // Always use Calc A1 syntax for paste link.
455 OUString aRefName;
456 ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D;
457 if( bSingle )
458 aRefName = aRange.aStart.Format(nFlags, &rDoc, formula::FormulaGrammar::CONV_OOO);
459 else
460 {
461 if( aRange.aStart.Tab() != aRange.aEnd.Tab() )
462 nFlags |= ScRefFlags::TAB2_3D;
463 aRefName = aRange.Format(rDoc, nFlags, formula::FormulaGrammar::CONV_OOO);
464 }
465 OUString aAppName = Application::GetAppName();
466
467 // extra bits are used to tell the client to prefer external
468 // reference link.
469
470 WriteUnicodeOrByteString( rStrm, aAppName, true );
471 WriteUnicodeOrByteString( rStrm, aDocName, true );
472 WriteUnicodeOrByteString( rStrm, aRefName, true );
473 WriteUnicodeOrByteString( rStrm, "calc:extref", true );
474 if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE )
475 rStrm.WriteUInt16( 0 );
476 else
477 rStrm.WriteChar( 0 );
478 return rStrm.GetError() == ERRCODE_NONE;
479 }
480 }
481 if( nFmt == SotClipboardFormatId::HTML )
482 {
483 if( Doc2HTML( rStrm, rBaseURL ) )
484 return true;
485 }
486 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
487 {
488 if( Doc2RTF( rStrm ) )
489 return true;
490 }
491
492 return false;
493 }
494
WriteUnicodeOrByteString(SvStream & rStrm,const OUString & rString,bool bZero)495 void ScImportExport::WriteUnicodeOrByteString( SvStream& rStrm, const OUString& rString, bool bZero )
496 {
497 rtl_TextEncoding eEnc = rStrm.GetStreamCharSet();
498 if ( eEnc == RTL_TEXTENCODING_UNICODE )
499 {
500 if ( !lcl_IsEndianSwap( rStrm ) )
501 rStrm.WriteBytes(rString.getStr(), rString.getLength() * sizeof(sal_Unicode));
502 else
503 {
504 const sal_Unicode* p = rString.getStr();
505 const sal_Unicode* const pStop = p + rString.getLength();
506 while ( p < pStop )
507 {
508 rStrm.WriteUInt16( *p );
509 }
510 }
511 if ( bZero )
512 rStrm.WriteUInt16( 0 );
513 }
514 else
515 {
516 OString aByteStr(OUStringToOString(rString, eEnc));
517 rStrm.WriteOString( aByteStr );
518 if ( bZero )
519 rStrm.WriteChar( 0 );
520 }
521 }
522
523 // This function could be replaced by endlub()
WriteUnicodeOrByteEndl(SvStream & rStrm)524 void ScImportExport::WriteUnicodeOrByteEndl( SvStream& rStrm )
525 {
526 if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE )
527 { // same as endl() but unicode
528 switch ( rStrm.GetLineDelimiter() )
529 {
530 case LINEEND_CR :
531 rStrm.WriteUInt16( '\r' );
532 break;
533 case LINEEND_LF :
534 rStrm.WriteUInt16( '\n' );
535 break;
536 default:
537 rStrm.WriteUInt16( '\r' ).WriteUInt16( '\n' );
538 }
539 }
540 else
541 endl( rStrm );
542 }
543
CountVisualWidth(const OUString & rStr,sal_Int32 & nIdx,sal_Int32 nMaxWidth)544 sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth)
545 {
546 sal_Int32 nWidth = 0;
547 while(nIdx < rStr.getLength() && nWidth < nMaxWidth)
548 {
549 sal_uInt32 nCode = rStr.iterateCodePoints(&nIdx);
550
551 if (unicode::isCJKIVSCharacter(nCode) || (nCode >= 0x3000 && nCode <= 0x303F))
552 nWidth += 2;
553 else if (!unicode::isIVSSelector(nCode))
554 nWidth += 1;
555 }
556
557 if (nIdx < rStr.getLength())
558 {
559 sal_Int32 nTmpIdx = nIdx;
560 sal_uInt32 nCode = rStr.iterateCodePoints(&nTmpIdx);
561
562 if (unicode::isIVSSelector(nCode))
563 nIdx = nTmpIdx;
564 }
565 return nWidth;
566 }
567
CountVisualWidth(const OUString & rStr)568 sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr)
569 {
570 sal_Int32 nIdx = 0;
571 return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32);
572 }
573
SetNoEndianSwap(SvStream & rStrm)574 void ScImportExport::SetNoEndianSwap( SvStream& rStrm )
575 {
576 #ifdef OSL_BIGENDIAN
577 rStrm.SetEndian( SvStreamEndian::BIG );
578 #else
579 rStrm.SetEndian( SvStreamEndian::LITTLE );
580 #endif
581 }
582
583 namespace {
584
585 enum QuoteType
586 {
587 FIELDSTART_QUOTE,
588 FIRST_QUOTE,
589 SECOND_QUOTE,
590 FIELDEND_QUOTE,
591 DONTKNOW_QUOTE
592 };
593
594 }
595
596 /** Determine if *p is a quote that ends a quoted field.
597
598 Precondition: we are parsing a quoted field already and *p is a quote.
599
600 @return
601 FIELDEND_QUOTE if end of field quote
602 DONTKNOW_QUOTE anything else
603 */
lcl_isFieldEndQuote(const sal_Unicode * p,const sal_Unicode * pSeps,sal_Unicode & rcDetectSep)604 static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep )
605 {
606 // Due to broken CSV generators that don't double embedded quotes check if
607 // a field separator immediately or with trailing spaces follows the quote,
608 // only then end the field, or at end of string.
609 constexpr sal_Unicode cBlank = ' ';
610 if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank))
611 return FIELDEND_QUOTE;
612 // Detect a possible blank separator if it's not already in the list (which
613 // was checked right above for p[1]==cBlank).
614 const bool bBlankSep = (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank);
615 while (p[1] == cBlank)
616 ++p;
617 if (!p[1] || ScGlobal::UnicodeStrChr( pSeps, p[1]))
618 return FIELDEND_QUOTE;
619 // Extended separator detection after a closing quote (with or without
620 // blanks). Note that nQuotes is incremented *after* the call so is not yet
621 // even here, and that with separator detection we reach here only if
622 // lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or
623 // SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have
624 // to be checked.
625 if (!rcDetectSep)
626 {
627 constexpr sal_Unicode vSep[] = { ',', '\t', ';' };
628 for (const sal_Unicode c : vSep)
629 {
630 if (p[1] == c)
631 {
632 rcDetectSep = c;
633 return FIELDEND_QUOTE;
634 }
635 }
636 }
637 // Blank separator is least significant, after others.
638 if (bBlankSep)
639 {
640 rcDetectSep = cBlank;
641 return FIELDEND_QUOTE;
642 }
643 return DONTKNOW_QUOTE;
644 }
645
646 /** Determine if *p is a quote that is escaped by being doubled or ends a
647 quoted field.
648
649 Precondition: *p is a quote.
650
651 @param nQuotes
652 Quote characters encountered so far.
653 Odd (after opening quote) means either no embedded quotes or only quote
654 pairs so far.
655 Even means either not in a quoted field or already one quote
656 encountered, the first of a pair.
657
658 @return
659 FIELDSTART_QUOTE if first quote in a field, either starting content or
660 embedded so caller should check beforehand.
661 FIRST_QUOTE if first of a doubled quote
662 SECOND_QUOTE if second of a doubled quote
663 FIELDEND_QUOTE if end of field quote
664 DONTKNOW_QUOTE if an unescaped quote we don't consider as end of field,
665 do not increment nQuotes in caller then!
666 */
lcl_isEscapedOrFieldEndQuote(sal_Int32 nQuotes,const sal_Unicode * p,const sal_Unicode * pSeps,sal_Unicode cStr,sal_Unicode & rcDetectSep)667 static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p,
668 const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep )
669 {
670 if ((nQuotes & 1) == 0)
671 {
672 if (p[-1] == cStr)
673 return SECOND_QUOTE;
674 else
675 {
676 SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?");
677 return FIELDSTART_QUOTE;
678 }
679 }
680 if (p[1] == cStr)
681 return FIRST_QUOTE;
682 return lcl_isFieldEndQuote( p, pSeps, rcDetectSep);
683 }
684
685 /** Append characters of [p1,p2) to rField.
686
687 @returns TRUE if ok; FALSE if data overflow, truncated
688 */
lcl_appendLineData(OUString & rField,const sal_Unicode * p1,const sal_Unicode * p2)689 static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 )
690 {
691 if (rField.getLength() + (p2 - p1) <= nArbitraryCellLengthLimit)
692 {
693 rField += std::u16string_view( p1, sal::static_int_cast<sal_Int32>( p2 - p1 ) );
694 return true;
695 }
696 else
697 {
698 SAL_WARN( "sc", "lcl_appendLineData: data overflow");
699 rField += std::u16string_view( p1, nArbitraryCellLengthLimit - rField.getLength() );
700 return false;
701 }
702 }
703
704 namespace {
705
706 enum class DoubledQuoteMode
707 {
708 KEEP_ALL, // both are taken, additionally start and end quote are included in string
709 ESCAPE, // escaped quote, one is taken, one ignored
710 };
711
712 }
713
lcl_ScanString(const sal_Unicode * p,OUString & rString,const sal_Unicode * pSeps,sal_Unicode cStr,DoubledQuoteMode eMode,bool & rbOverflowCell)714 static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rString,
715 const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell )
716 {
717 if (eMode != DoubledQuoteMode::KEEP_ALL)
718 p++; //! jump over opening quote
719 bool bCont;
720 do
721 {
722 bCont = false;
723 const sal_Unicode* p0 = p;
724 for( ;; )
725 {
726 if( !*p )
727 break;
728 if( *p == cStr )
729 {
730 if ( *++p != cStr )
731 {
732 // break or continue for loop
733 if (eMode == DoubledQuoteMode::ESCAPE)
734 {
735 sal_Unicode cDetectSep = 0xffff; // No separator detection here.
736 if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE)
737 break;
738 else
739 continue;
740 }
741 else
742 break;
743 }
744 // doubled quote char
745 switch ( eMode )
746 {
747 case DoubledQuoteMode::KEEP_ALL :
748 p++; // both for us (not breaking for-loop)
749 break;
750 case DoubledQuoteMode::ESCAPE :
751 p++; // one for us (breaking for-loop)
752 bCont = true; // and more
753 break;
754 }
755 if ( eMode == DoubledQuoteMode::ESCAPE )
756 break;
757 }
758 else
759 p++;
760 }
761 if ( p0 < p )
762 {
763 if (!lcl_appendLineData( rString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p)))
764 rbOverflowCell = true;
765 }
766 } while ( bCont );
767 return p;
768 }
769
lcl_UnescapeSylk(OUString & rString,SylkVersion eVersion)770 static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion )
771 {
772 // Older versions didn't escape the semicolon.
773 // Older versions quoted the string and doubled embedded quotes, but not
774 // the semicolons, which was plain wrong.
775 if (eVersion >= SylkVersion::OOO32)
776 rString = rString.replaceAll(";;", ";");
777 else
778 rString = rString.replaceAll("\"\"", "\"");
779
780 rString = rString.replaceAll(SYLK_LF, "\n");
781 }
782
lcl_ScanSylkString(const sal_Unicode * p,OUString & rString,SylkVersion eVersion)783 static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p,
784 OUString& rString, SylkVersion eVersion )
785 {
786 const sal_Unicode* pStartQuote = p;
787 const sal_Unicode* pEndQuote = nullptr;
788 while( *(++p) )
789 {
790 if( *p == '"' )
791 {
792 pEndQuote = p;
793 if (eVersion >= SylkVersion::OOO32)
794 {
795 if (*(p+1) == ';')
796 {
797 if (*(p+2) == ';')
798 {
799 p += 2; // escaped ';'
800 pEndQuote = nullptr;
801 }
802 else
803 break; // end field
804 }
805 }
806 else
807 {
808 if (*(p+1) == '"')
809 {
810 ++p; // escaped '"'
811 pEndQuote = nullptr;
812 }
813 else if (*(p+1) == ';')
814 break; // end field
815 }
816 }
817 }
818 if (!pEndQuote)
819 pEndQuote = p; // Take all data as string.
820 rString += std::u16string_view(pStartQuote + 1, sal::static_int_cast<sal_Int32>( pEndQuote - pStartQuote - 1 ) );
821 lcl_UnescapeSylk( rString, eVersion);
822 return p;
823 }
824
lcl_ScanSylkFormula(const sal_Unicode * p,OUString & rString,SylkVersion eVersion)825 static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p,
826 OUString& rString, SylkVersion eVersion )
827 {
828 const sal_Unicode* pStart = p;
829 if (eVersion >= SylkVersion::OOO32)
830 {
831 while (*p)
832 {
833 if (*p == ';')
834 {
835 if (*(p+1) == ';')
836 ++p; // escaped ';'
837 else
838 break; // end field
839 }
840 ++p;
841 }
842 rString += std::u16string_view( pStart, sal::static_int_cast<sal_Int32>( p - pStart));
843 lcl_UnescapeSylk( rString, eVersion);
844 }
845 else
846 {
847 // Nasty. If in old versions the formula contained a semicolon, it was
848 // quoted and embedded quotes were doubled, but semicolons were not. If
849 // there was no semicolon, it could still contain quotes and doubled
850 // embedded quotes if it was something like ="a""b", which was saved as
851 // E"a""b" as is and has to be preserved, even if older versions
852 // couldn't even load it correctly. However, theoretically another
853 // field might follow and thus the line contain a semicolon again, such
854 // as ...;E"a""b";...
855 bool bQuoted = false;
856 if (*p == '"')
857 {
858 // May be a quoted expression or just a string constant expression
859 // with quotes.
860 while (*(++p))
861 {
862 if (*p == '"')
863 {
864 if (*(p+1) == '"')
865 ++p; // escaped '"'
866 else
867 break; // closing '"', had no ';' yet
868 }
869 else if (*p == ';')
870 {
871 bQuoted = true; // ';' within quoted expression
872 break;
873 }
874 }
875 p = pStart;
876 }
877 if (bQuoted)
878 p = lcl_ScanSylkString( p, rString, eVersion);
879 else
880 {
881 while (*p && *p != ';')
882 ++p;
883 rString += std::u16string_view( pStart, sal::static_int_cast<sal_Int32>( p - pStart));
884 }
885 }
886 return p;
887 }
888
lcl_DoubleEscapeChar(OUString & rString,sal_Unicode cStr)889 static void lcl_DoubleEscapeChar( OUString& rString, sal_Unicode cStr )
890 {
891 sal_Int32 n = 0;
892 while( ( n = rString.indexOf( cStr, n ) ) != -1 )
893 {
894 rString = rString.replaceAt( n, 0, OUString(cStr) );
895 n += 2;
896 }
897 }
898
lcl_WriteString(SvStream & rStrm,OUString & rString,sal_Unicode cQuote,sal_Unicode cEsc)899 static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc )
900 {
901 if (cEsc)
902 lcl_DoubleEscapeChar( rString, cEsc );
903
904 if (cQuote)
905 {
906 rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote);
907 }
908
909 ScImportExport::WriteUnicodeOrByteString( rStrm, rString );
910 }
911
lcl_WriteSimpleString(SvStream & rStrm,const OUString & rString)912 static void lcl_WriteSimpleString( SvStream& rStrm, const OUString& rString )
913 {
914 ScImportExport::WriteUnicodeOrByteString( rStrm, rString );
915 }
916
Text2Doc(SvStream & rStrm)917 bool ScImportExport::Text2Doc( SvStream& rStrm )
918 {
919 bool bOk = true;
920
921 sal_Unicode pSeps[2];
922 pSeps[0] = cSep;
923 pSeps[1] = 0;
924
925 ScSetStringParam aSetStringParam;
926 aSetStringParam.mbCheckLinkFormula = true;
927
928 SCCOL nStartCol = aRange.aStart.Col();
929 SCROW nStartRow = aRange.aStart.Row();
930 SCCOL nEndCol = aRange.aEnd.Col();
931 SCROW nEndRow = aRange.aEnd.Row();
932 sal_uLong nOldPos = rStrm.Tell();
933 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
934 bool bData = !bSingle;
935 if( !bSingle)
936 bOk = StartPaste();
937
938 while( bOk )
939 {
940 OUString aLine;
941 OUString aCell;
942 SCROW nRow = nStartRow;
943 rStrm.Seek( nOldPos );
944 for( ;; )
945 {
946 rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit );
947 // tdf#125440 When inserting tab separated string, consider quotes as field markers
948 DoubledQuoteMode mode = aLine.indexOf("\t") >= 0 ? DoubledQuoteMode::ESCAPE : DoubledQuoteMode::KEEP_ALL;
949 if( rStrm.eof() )
950 break;
951 SCCOL nCol = nStartCol;
952 const sal_Unicode* p = aLine.getStr();
953 while( *p )
954 {
955 aCell.clear();
956 const sal_Unicode* q = p;
957 while (*p && *p != cSep)
958 {
959 // Always look for a pairing quote and ignore separator in between.
960 while (*p && *p == cStr)
961 q = p = lcl_ScanString( p, aCell, pSeps, cStr, mode, bOverflowCell );
962 // All until next separator or quote.
963 while (*p && *p != cSep && *p != cStr)
964 ++p;
965 if (!lcl_appendLineData( aCell, q, p))
966 bOverflowCell = true; // display warning on import
967 q = p;
968 }
969 if (*p)
970 ++p;
971 if (rDoc.ValidCol(nCol) && rDoc.ValidRow(nRow) )
972 {
973 if( bSingle )
974 {
975 if (nCol>nEndCol) nEndCol = nCol;
976 if (nRow>nEndRow) nEndRow = nRow;
977 }
978 if( bData && nCol <= nEndCol && nRow <= nEndRow )
979 rDoc.SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam );
980 }
981 else // too many columns/rows
982 {
983 if (!rDoc.ValidRow(nRow))
984 bOverflowRow = true; // display warning on import
985 if (!rDoc.ValidCol(nCol))
986 bOverflowCol = true; // display warning on import
987 }
988 ++nCol;
989 }
990 ++nRow;
991 }
992
993 if( !bData )
994 {
995 aRange.aEnd.SetCol( nEndCol );
996 aRange.aEnd.SetRow( nEndRow );
997 bOk = StartPaste();
998 bData = true;
999 }
1000 else
1001 break;
1002 }
1003
1004 EndPaste();
1005 if (bOk && mbImportBroadcast)
1006 {
1007 rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
1008 pDocSh->PostDataChanged();
1009 }
1010
1011 return bOk;
1012 }
1013
1014 // Extended Ascii-Import
1015
lcl_PutString(ScDocumentImport & rDocImport,bool bUseDocImport,SCCOL nCol,SCROW nRow,SCTAB nTab,const OUString & rStr,sal_uInt8 nColFormat,SvNumberFormatter * pFormatter,bool bDetectNumFormat,bool bSkipEmptyCells,const::utl::TransliterationWrapper & rTransliteration,CalendarWrapper & rCalendar,const::utl::TransliterationWrapper * pSecondTransliteration,CalendarWrapper * pSecondCalendar)1016 static bool lcl_PutString(
1017 ScDocumentImport& rDocImport, bool bUseDocImport,
1018 SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat,
1019 SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bSkipEmptyCells,
1020 const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar,
1021 const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar )
1022 {
1023 ScDocument& rDoc = rDocImport.getDoc();
1024 bool bMultiLine = false;
1025 if ( nColFormat == SC_COL_SKIP || !rDoc.ValidCol(nCol) || !rDoc.ValidRow(nRow) )
1026 return bMultiLine;
1027 if ( rStr.isEmpty() )
1028 {
1029 if ( !bSkipEmptyCells )
1030 { // delete destination cell
1031 if ( bUseDocImport )
1032 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr );
1033 else
1034 rDoc.SetString( nCol, nRow, nTab, rStr );
1035 }
1036 return false;
1037 }
1038
1039 if ( nColFormat == SC_COL_TEXT )
1040 {
1041 double fDummy;
1042 sal_uInt32 nIndex = 0;
1043 if (pFormatter->IsNumberFormat(rStr, nIndex, fDummy))
1044 {
1045 // Set the format of this cell to Text.
1046 sal_uInt32 nFormat = pFormatter->GetStandardFormat(SvNumFormatType::TEXT);
1047 ScPatternAttr aNewAttrs(rDoc.GetPool());
1048 SfxItemSet& rSet = aNewAttrs.GetItemSet();
1049 rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) );
1050 rDoc.ApplyPattern(nCol, nRow, nTab, aNewAttrs);
1051 }
1052 if ( bUseDocImport )
1053 {
1054 if(ScStringUtil::isMultiline(rStr))
1055 {
1056 ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
1057 rEngine.SetTextCurrentDefaults(rStr);
1058 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1059 return true;
1060 }
1061 else
1062 {
1063 rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr);
1064 return false;
1065 }
1066 } else
1067 {
1068 rDoc.SetTextCell(ScAddress(nCol, nRow, nTab), rStr);
1069 return bMultiLine;
1070 }
1071 }
1072
1073 if ( nColFormat == SC_COL_ENGLISH )
1074 {
1075 //! SetString with Extra-Flag ???
1076
1077 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1078 sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US);
1079 double fVal;
1080 if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) )
1081 {
1082 // Numberformat will not be set to English
1083 if ( bUseDocImport )
1084 rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal );
1085 else
1086 rDoc.SetValue( nCol, nRow, nTab, fVal );
1087 return bMultiLine;
1088 }
1089 // else, continue with SetString
1090 }
1091 else if ( nColFormat != SC_COL_STANDARD ) // Datumformats
1092 {
1093 const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t
1094 const sal_Int32 nLen = rStr.getLength();
1095 sal_Int32 nStart[nMaxNumberParts];
1096 sal_Int32 nEnd[nMaxNumberParts];
1097
1098 sal_uInt16 nDP, nMP, nYP;
1099 switch ( nColFormat )
1100 {
1101 case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; break;
1102 case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; break;
1103 case SC_COL_DMY:
1104 default: nDP = 0; nMP = 1; nYP = 2; break;
1105 }
1106
1107 sal_uInt16 nFound = 0;
1108 bool bInNum = false;
1109 for ( sal_Int32 nPos=0; nPos<nLen && (bInNum ||
1110 nFound<nMaxNumberParts); nPos++ )
1111 {
1112 if (bInNum && nFound == 3 && nColFormat == SC_COL_YMD &&
1113 nPos <= nStart[nFound]+2 && rStr[nPos] == 'T')
1114 bInNum = false; // ISO-8601: YYYY-MM-DDThh:mm...
1115 else if ((((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1))
1116 && ScGlobal::getCharClassPtr()->isLetterNumeric( rStr, nPos))
1117 || ScGlobal::getCharClassPtr()->isDigit( rStr, nPos))
1118 {
1119 if (!bInNum)
1120 {
1121 bInNum = true;
1122 nStart[nFound] = nPos;
1123 ++nFound;
1124 }
1125 nEnd[nFound-1] = nPos;
1126 }
1127 else
1128 bInNum = false;
1129 }
1130
1131 if ( nFound == 1 )
1132 {
1133 // try to break one number (without separators) into date fields
1134
1135 sal_Int32 nDateStart = nStart[0];
1136 sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart;
1137
1138 if ( nDateLen >= 5 && nDateLen <= 8 &&
1139 ScGlobal::getCharClassPtr()->isNumeric( rStr.copy( nDateStart, nDateLen ) ) )
1140 {
1141 // 6 digits: 2 each for day, month, year
1142 // 8 digits: 4 for year, 2 each for day and month
1143 // 5 or 7 digits: first field is shortened by 1
1144
1145 bool bLongYear = ( nDateLen >= 7 );
1146 bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 );
1147
1148 sal_uInt16 nFieldStart = nDateStart;
1149 for (sal_uInt16 nPos=0; nPos<3; nPos++)
1150 {
1151 sal_uInt16 nFieldEnd = nFieldStart + 1; // default: 2 digits
1152 if ( bLongYear && nPos == nYP )
1153 nFieldEnd += 2; // 2 extra digits for long year
1154 if ( bShortFirst && nPos == 0 )
1155 --nFieldEnd; // first field shortened?
1156
1157 nStart[nPos] = nFieldStart;
1158 nEnd[nPos] = nFieldEnd;
1159 nFieldStart = nFieldEnd + 1;
1160 }
1161 nFound = 3;
1162 }
1163 }
1164
1165 if ( nFound >= 3 )
1166 {
1167 using namespace ::com::sun::star;
1168 bool bSecondCal = false;
1169 sal_uInt16 nDay = static_cast<sal_uInt16>(rStr.copy( nStart[nDP], nEnd[nDP]+1-nStart[nDP] ).toInt32());
1170 sal_uInt16 nYear = static_cast<sal_uInt16>(rStr.copy( nStart[nYP], nEnd[nYP]+1-nStart[nYP] ).toInt32());
1171 OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] );
1172 sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32());
1173 if (!nMonth)
1174 {
1175 static constexpr OUStringLiteral aSepShortened = u"SEP";
1176 uno::Sequence< i18n::CalendarItem2 > xMonths;
1177 sal_Int32 i, nMonthCount;
1178 // first test all month names from local international
1179 xMonths = rCalendar.getMonths();
1180 nMonthCount = xMonths.getLength();
1181 for (i=0; i<nMonthCount && !nMonth; i++)
1182 {
1183 if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) ||
1184 rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) )
1185 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1186 else if ( i == 8 && rTransliteration.isEqual( "SEPT",
1187 xMonths[i].AbbrevName ) &&
1188 rTransliteration.isEqual( aMStr, aSepShortened ) )
1189 { // correct English abbreviation is SEPT,
1190 // but data mostly contains SEP only
1191 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1192 }
1193 }
1194 // if none found, then test english month names
1195 if ( !nMonth && pSecondCalendar && pSecondTransliteration )
1196 {
1197 xMonths = pSecondCalendar->getMonths();
1198 nMonthCount = xMonths.getLength();
1199 for (i=0; i<nMonthCount && !nMonth; i++)
1200 {
1201 if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) ||
1202 pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) )
1203 {
1204 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1205 bSecondCal = true;
1206 }
1207 else if ( i == 8 && pSecondTransliteration->isEqual(
1208 aMStr, aSepShortened ) )
1209 { // correct English abbreviation is SEPT,
1210 // but data mostly contains SEP only
1211 nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1212 bSecondCal = true;
1213 }
1214 }
1215 }
1216 }
1217
1218 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1219 if ( nYear < 100 )
1220 nYear = pDocFormatter->ExpandTwoDigitYear( nYear );
1221
1222 CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar);
1223 sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear();
1224 if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths )
1225 {
1226 --nMonth;
1227 pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay );
1228 pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth );
1229 pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear );
1230 sal_Int16 nHour, nMinute, nSecond;
1231 // #i14974# The imported value should have no fractional value, so set the
1232 // time fields to zero (ICU calendar instance defaults to current date/time)
1233 nHour = nMinute = nSecond = 0;
1234 if (nFound > 3)
1235 nHour = static_cast<sal_Int16>(rStr.copy( nStart[3], nEnd[3]+1-nStart[3]).toInt32());
1236 if (nFound > 4)
1237 nMinute = static_cast<sal_Int16>(rStr.copy( nStart[4], nEnd[4]+1-nStart[4]).toInt32());
1238 if (nFound > 5)
1239 nSecond = static_cast<sal_Int16>(rStr.copy( nStart[5], nEnd[5]+1-nStart[5]).toInt32());
1240 // do not use calendar's milliseconds, to avoid fractional part truncation
1241 double fFrac = 0.0;
1242 if (nFound > 6)
1243 {
1244 sal_Unicode cDec = '.';
1245 OUString aT = OUStringChar(cDec) + rStr.subView( nStart[6], nEnd[6]+1-nStart[6]);
1246 rtl_math_ConversionStatus eStatus;
1247 double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus );
1248 if (eStatus == rtl_math_ConversionStatus_Ok)
1249 fFrac = fV / 86400.0;
1250 }
1251 sal_Int32 nPos;
1252 if (nFound > 3 && 1 <= nHour && nHour <= 12 // nHour 0 and >=13 can't be AM/PM
1253 && (nPos = nEnd[nFound-1] + 1) < nLen)
1254 {
1255 // Dreaded AM/PM may be following.
1256 while (nPos < nLen && rStr[nPos] == ' ')
1257 ++nPos;
1258 if (nPos < nLen)
1259 {
1260 sal_Int32 nStop = nPos;
1261 while (nStop < nLen && rStr[nStop] != ' ')
1262 ++nStop;
1263 OUString aAmPm = rStr.copy( nPos, nStop - nPos);
1264 // For AM only 12 needs to be treated, whereas for PM
1265 // it must not. Check both, locale and second/English
1266 // strings.
1267 if (nHour == 12 &&
1268 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimeAM()) ||
1269 (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "AM"))))
1270 {
1271 nHour = 0;
1272 }
1273 else if (nHour < 12 &&
1274 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimePM()) ||
1275 (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "PM"))))
1276 {
1277 nHour += 12;
1278 }
1279 }
1280 }
1281 pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour );
1282 pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute );
1283 pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond );
1284 pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 );
1285 if ( pCalendar->isValid() )
1286 {
1287 double fDiff = DateTime(pDocFormatter->GetNullDate()) -
1288 pCalendar->getEpochStart();
1289 // #i14974# must use getLocalDateTime to get the same
1290 // date values as set above
1291 double fDays = pCalendar->getLocalDateTime() + fFrac;
1292 fDays -= fDiff;
1293
1294 LanguageType eLatin, eCjk, eCtl;
1295 rDoc.GetLanguage( eLatin, eCjk, eCtl );
1296 LanguageType eDocLang = eLatin; //! which language for date formats?
1297
1298 SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE);
1299 sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang );
1300 // maybe there is a special format including seconds or milliseconds
1301 if (nFound > 5)
1302 nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang);
1303
1304 ScAddress aPos(nCol,nRow,nTab);
1305 if ( bUseDocImport )
1306 rDocImport.setNumericCell(aPos, fDays);
1307 else
1308 rDoc.SetValue( aPos, fDays );
1309 rDoc.SetNumberFormat(aPos, nFormat);
1310
1311 return bMultiLine; // success
1312 }
1313 }
1314 }
1315 }
1316
1317 // Standard or date not determined -> SetString / EditCell
1318 if( rStr.indexOf( '\n' ) == -1 )
1319 {
1320 ScSetStringParam aParam;
1321 aParam.mpNumFormatter = pFormatter;
1322 aParam.mbDetectNumberFormat = bDetectNumFormat;
1323 aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly;
1324 aParam.mbHandleApostrophe = false;
1325 aParam.mbCheckLinkFormula = true;
1326 if ( bUseDocImport )
1327 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam);
1328 else
1329 rDoc.SetString( nCol, nRow, nTab, rStr, &aParam );
1330 }
1331 else
1332 {
1333 bMultiLine = true;
1334 ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
1335 rEngine.SetTextCurrentDefaults(rStr);
1336 if ( bUseDocImport )
1337 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1338 else
1339 rDoc.SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() );
1340 }
1341 return bMultiLine;
1342 }
1343
lcl_GetFixed(const OUString & rLine,sal_Int32 nStart,sal_Int32 nNext,bool & rbIsQuoted,bool & rbOverflowCell)1344 static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext,
1345 bool& rbIsQuoted, bool& rbOverflowCell )
1346 {
1347 sal_Int32 nLen = rLine.getLength();
1348 if (nNext > nLen)
1349 nNext = nLen;
1350 if ( nNext <= nStart )
1351 return EMPTY_OUSTRING;
1352
1353 const sal_Unicode* pStr = rLine.getStr();
1354
1355 sal_Int32 nSpace = nNext;
1356 while ( nSpace > nStart && pStr[nSpace-1] == ' ' )
1357 --nSpace;
1358
1359 rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"');
1360 if (rbIsQuoted)
1361 {
1362 bool bFits = (nSpace - nStart - 3 <= nArbitraryCellLengthLimit);
1363 if (bFits)
1364 return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2));
1365 else
1366 {
1367 SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1368 rbOverflowCell = true;
1369 return rLine.copy(nStart+1, nArbitraryCellLengthLimit);
1370 }
1371 }
1372 else
1373 {
1374 bool bFits = (nSpace - nStart <= nArbitraryCellLengthLimit);
1375 if (bFits)
1376 return rLine.copy(nStart, nSpace-nStart);
1377 else
1378 {
1379 SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1380 rbOverflowCell = true;
1381 return rLine.copy(nStart, nArbitraryCellLengthLimit);
1382 }
1383 }
1384 }
1385
ExtText2Doc(SvStream & rStrm)1386 bool ScImportExport::ExtText2Doc( SvStream& rStrm )
1387 {
1388 if (!pExtOptions)
1389 return Text2Doc( rStrm );
1390
1391 sal_uInt64 const nOldPos = rStrm.Tell();
1392 sal_uInt64 const nRemaining = rStrm.remainingSize();
1393 std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh,
1394 ScResId( STR_LOAD_DOC ), nRemaining, true ));
1395 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
1396
1397 SCCOL nStartCol = aRange.aStart.Col();
1398 SCCOL nEndCol = aRange.aEnd.Col();
1399 SCROW nStartRow = aRange.aStart.Row();
1400 SCTAB nTab = aRange.aStart.Tab();
1401
1402 bool bFixed = pExtOptions->IsFixedLen();
1403 OUString aSeps = pExtOptions->GetFieldSeps(); // Need non-const for ReadCsvLine(),
1404 const sal_Unicode* pSeps = aSeps.getStr(); // but it will be const anyway (asserted below).
1405 bool bMerge = pExtOptions->IsMergeSeps();
1406 bool bRemoveSpace = pExtOptions->IsRemoveSpace();
1407 sal_uInt16 nInfoCount = pExtOptions->GetInfoCount();
1408 const sal_Int32* pColStart = pExtOptions->GetColStart();
1409 const sal_uInt8* pColFormat = pExtOptions->GetColFormat();
1410 tools::Long nSkipLines = pExtOptions->GetStartRow();
1411
1412 LanguageType eDocLang = pExtOptions->GetLanguage();
1413 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang);
1414 bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber();
1415 bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells();
1416
1417 // For date recognition
1418 ::utl::TransliterationWrapper aTransliteration(
1419 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE );
1420 aTransliteration.loadModuleIfNeeded( eDocLang );
1421 CalendarWrapper aCalendar( comphelper::getProcessComponentContext() );
1422 aCalendar.loadDefaultCalendar(
1423 LanguageTag::convertToLocale( eDocLang ) );
1424 std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration;
1425 std::unique_ptr< CalendarWrapper > pEnglishCalendar;
1426 if ( eDocLang != LANGUAGE_ENGLISH_US )
1427 {
1428 pEnglishTransliteration.reset(new ::utl::TransliterationWrapper (
1429 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ));
1430 aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US );
1431 pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() ));
1432 pEnglishCalendar->loadDefaultCalendar(
1433 LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) );
1434 }
1435
1436 OUString aLine;
1437 OUString aCell;
1438 sal_uInt16 i;
1439 SCROW nRow = nStartRow;
1440 sal_Unicode cDetectSep = 0xffff; // No separator detection here.
1441
1442 while(--nSkipLines>0)
1443 {
1444 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored
1445 if ( rStrm.eof() )
1446 break;
1447 }
1448
1449 // Determine range for Undo.
1450 // We don't need this during import of a file to a new sheet or document...
1451 bool bDetermineRange = bUndo;
1452
1453 // Row heights don't need to be adjusted on the fly if EndPaste() is called
1454 // afterwards, which happens only if bDetermineRange. This variable also
1455 // survives the toggle of bDetermineRange down at the end of the do{} loop.
1456 bool bRangeIsDetermined = bDetermineRange;
1457
1458 bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText();
1459
1460 sal_uLong nOriginalStreamPos = rStrm.Tell();
1461
1462 ScDocumentImport aDocImport(rDoc);
1463 do
1464 {
1465 for( ;; )
1466 {
1467 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep);
1468 if ( rStrm.eof() && aLine.isEmpty() )
1469 break;
1470
1471 assert(pSeps == aSeps.getStr());
1472
1473 if ( nRow > rDoc.MaxRow() )
1474 {
1475 bOverflowRow = true; // display warning on import
1476 break; // for
1477 }
1478
1479 EmbeddedNullTreatment( aLine);
1480
1481 sal_Int32 nLineLen = aLine.getLength();
1482 SCCOL nCol = nStartCol;
1483 bool bMultiLine = false;
1484 if ( bFixed ) // Fixed line length
1485 {
1486 sal_Int32 nStartIdx = 0;
1487 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1488 // overflow if there is really data following to be put behind
1489 // the last column, which doesn't happen if info is
1490 // SC_COL_SKIP.
1491 for ( i=0; i<nInfoCount && nCol <= rDoc.MaxCol()+1; i++ )
1492 {
1493 sal_uInt8 nFmt = pColFormat[i];
1494 if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either
1495 {
1496 if (nCol > rDoc.MaxCol())
1497 bOverflowCol = true; // display warning on import
1498 else if (!bDetermineRange)
1499 {
1500 sal_Int32 nNextIdx = nStartIdx;
1501 if ( i + 1 < nInfoCount )
1502 CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] );
1503 else
1504 nNextIdx = nLineLen;
1505
1506 bool bIsQuoted = false;
1507 aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell );
1508 if (bIsQuoted && bQuotedAsText)
1509 nFmt = SC_COL_TEXT;
1510
1511 bMultiLine |= lcl_PutString(
1512 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1513 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration, aCalendar,
1514 pEnglishTransliteration.get(), pEnglishCalendar.get());
1515
1516 nStartIdx = nNextIdx;
1517 }
1518 ++nCol;
1519 }
1520 }
1521 }
1522 else // Search for the separator
1523 {
1524 SCCOL nSourceCol = 0;
1525 sal_uInt16 nInfoStart = 0;
1526 const sal_Unicode* p = aLine.getStr();
1527 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1528 // overflow if there is really data following to be put behind
1529 // the last column, which doesn't happen if info is
1530 // SC_COL_SKIP.
1531 while (*p && nCol <= rDoc.MaxCol()+1)
1532 {
1533 bool bIsQuoted = false;
1534 p = ScImportExport::ScanNextFieldFromString( p, aCell,
1535 cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace );
1536
1537 sal_uInt8 nFmt = SC_COL_STANDARD;
1538 for ( i=nInfoStart; i<nInfoCount; i++ )
1539 {
1540 if ( pColStart[i] == nSourceCol + 1 ) // pColStart is 1-based
1541 {
1542 nFmt = pColFormat[i];
1543 nInfoStart = i + 1; // ColInfos are in succession
1544 break; // for
1545 }
1546 }
1547 if ( nFmt != SC_COL_SKIP )
1548 {
1549 if (nCol > rDoc.MaxCol())
1550 bOverflowCol = true; // display warning on import
1551 else if (!bDetermineRange)
1552 {
1553 if (bIsQuoted && bQuotedAsText)
1554 nFmt = SC_COL_TEXT;
1555
1556 bMultiLine |= lcl_PutString(
1557 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1558 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration,
1559 aCalendar, pEnglishTransliteration.get(), pEnglishCalendar.get());
1560 }
1561 ++nCol;
1562 }
1563
1564 ++nSourceCol;
1565 }
1566 }
1567 if (nEndCol < nCol)
1568 nEndCol = nCol; //! points to the next free or even rDoc.MaxCol()+2
1569
1570 if (!bDetermineRange)
1571 {
1572 if (bMultiLine && !bRangeIsDetermined && pDocSh)
1573 pDocSh->AdjustRowHeight( nRow, nRow, nTab);
1574 xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos );
1575 }
1576 ++nRow;
1577 }
1578 // so far nRow/nEndCol pointed to the next free
1579 if (nRow > nStartRow)
1580 --nRow;
1581 if (nEndCol > nStartCol)
1582 nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), rDoc.MaxCol());
1583
1584 if (bDetermineRange)
1585 {
1586 aRange.aEnd.SetCol( nEndCol );
1587 aRange.aEnd.SetRow( nRow );
1588
1589 if ( !mbApi && nStartCol != nEndCol &&
1590 !rDoc.IsBlockEmpty( nTab, nStartCol + 1, nStartRow, nEndCol, nRow ) )
1591 {
1592 ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent());
1593 if (aBox.run() != RET_YES)
1594 {
1595 return false;
1596 }
1597 }
1598
1599 rStrm.Seek( nOriginalStreamPos );
1600 nRow = nStartRow;
1601 if (!StartPaste())
1602 {
1603 EndPaste(false);
1604 return false;
1605 }
1606 }
1607
1608 bDetermineRange = !bDetermineRange; // toggle
1609 } while (!bDetermineRange);
1610 if ( !mbOverwriting )
1611 aDocImport.finalize();
1612
1613 xProgress.reset(); // make room for AdjustRowHeight progress
1614 if (bRangeIsDetermined)
1615 EndPaste(false);
1616
1617 if (mbImportBroadcast && !mbOverwriting)
1618 {
1619 rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
1620 pDocSh->PostDataChanged();
1621 }
1622 return true;
1623 }
1624
EmbeddedNullTreatment(OUString & rStr)1625 void ScImportExport::EmbeddedNullTreatment( OUString & rStr )
1626 {
1627 // A nasty workaround for data with embedded NULL characters. As long as we
1628 // can't handle them properly as cell content (things assume 0-terminated
1629 // strings at too many places) simply strip all NULL characters from raw
1630 // data. Excel does the same. See fdo#57841 for sample data.
1631
1632 // The normal case is no embedded NULL, check first before de-/allocating
1633 // ustring stuff.
1634 const sal_Unicode cNull = 0;
1635 sal_Int32 i;
1636 if ((i = rStr.indexOf( cNull)) >= 0)
1637 {
1638 // Do not use OUString::replaceAll(...,""), in case of repeated null
1639 // bytes that reallocates for each and for massive amounts takes
1640 // ~endless. See tdf#147421 with 3577016 trailing null-bytes.
1641 const sal_Int32 nLen = rStr.getLength();
1642 OUStringBuffer aBuf( nLen);
1643 sal_Int32 s = 0;
1644 sal_Unicode const * const p = rStr.getStr();
1645 do
1646 {
1647 // Append good substring.
1648 aBuf.append( p + s, i - s);
1649 // Skip all cNull.
1650 while (++i < nLen && *(p+i) == cNull)
1651 ;
1652 // Find next cNull after good if characters left, else end.
1653 if (i < nLen)
1654 {
1655 s = i;
1656 i = rStr.indexOf( cNull, i);
1657 }
1658 else
1659 {
1660 s = nLen;
1661 }
1662 }
1663 while (0 <= i && i < nLen);
1664 // Append good trailing substring, if any.
1665 if (s < nLen)
1666 aBuf.append( p + s, nLen - s);
1667
1668 rStr = aBuf.makeStringAndClear();
1669 }
1670 }
1671
ScanNextFieldFromString(const sal_Unicode * p,OUString & rField,sal_Unicode cStr,const sal_Unicode * pSeps,bool bMergeSeps,bool & rbIsQuoted,bool & rbOverflowCell,bool bRemoveSpace)1672 const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p,
1673 OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted,
1674 bool& rbOverflowCell, bool bRemoveSpace )
1675 {
1676 rbIsQuoted = false;
1677 rField.clear();
1678 const sal_Unicode cBlank = ' ';
1679 if (cStr && !ScGlobal::UnicodeStrChr(pSeps, cBlank))
1680 {
1681 // Cope with broken generators that put leading blanks before a quoted
1682 // field, like "field1", "field2", "..."
1683 // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180
1684 const sal_Unicode* pb = p;
1685 while (*pb == cBlank)
1686 ++pb;
1687 if (*pb == cStr)
1688 p = pb;
1689 }
1690 if (cStr && *p == cStr) // String in quotes
1691 {
1692 rbIsQuoted = true;
1693 const sal_Unicode* p1;
1694 p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell );
1695 while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) )
1696 p++;
1697 // Append remaining unquoted and undelimited data (dirty, dirty) to
1698 // this field.
1699 if (p > p1)
1700 {
1701 const sal_Unicode* ptrim_f = p;
1702 if ( bRemoveSpace )
1703 {
1704 while ( ptrim_f > p1 && ( *(ptrim_f - 1) == cBlank ) )
1705 --ptrim_f;
1706 }
1707 if (!lcl_appendLineData( rField, p1, ptrim_f))
1708 rbOverflowCell = true;
1709 }
1710 if( *p )
1711 p++;
1712 }
1713 else // up to delimiter
1714 {
1715 const sal_Unicode* p0 = p;
1716 while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) )
1717 p++;
1718 const sal_Unicode* ptrim_i = p0;
1719 const sal_Unicode* ptrim_f = p; // [ptrim_i,ptrim_f) is cell data after trimming
1720 if ( bRemoveSpace )
1721 {
1722 while ( ptrim_i < ptrim_f && *ptrim_i == cBlank )
1723 ++ptrim_i;
1724 while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) )
1725 --ptrim_f;
1726 }
1727 if (!lcl_appendLineData( rField, ptrim_i, ptrim_f))
1728 rbOverflowCell = true;
1729 if( *p )
1730 p++;
1731 }
1732 if ( bMergeSeps ) // skip following delimiters
1733 {
1734 while ( *p && ScGlobal::UnicodeStrChr( pSeps, *p ) )
1735 p++;
1736 }
1737 return p;
1738 }
1739
1740 namespace {
1741
1742 /**
1743 * Check if a given string has any line break characters or separators.
1744 *
1745 * @param rStr string to inspect.
1746 * @param cSep separator character.
1747 */
hasLineBreaksOrSeps(const OUString & rStr,sal_Unicode cSep)1748 bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep )
1749 {
1750 const sal_Unicode* p = rStr.getStr();
1751 for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p)
1752 {
1753 sal_Unicode c = *p;
1754 if (c == cSep)
1755 // separator found.
1756 return true;
1757
1758 switch (c)
1759 {
1760 case '\n':
1761 case '\r':
1762 // line break found.
1763 return true;
1764 default:
1765 ;
1766 }
1767 }
1768 return false;
1769 }
1770
1771 }
1772
Doc2Text(SvStream & rStrm)1773 bool ScImportExport::Doc2Text( SvStream& rStrm )
1774 {
1775 SCCOL nCol;
1776 SCROW nRow;
1777 SCCOL nStartCol = aRange.aStart.Col();
1778 SCROW nStartRow = aRange.aStart.Row();
1779 SCTAB nStartTab = aRange.aStart.Tab();
1780 SCCOL nEndCol = aRange.aEnd.Col();
1781 SCROW nEndRow = aRange.aEnd.Row();
1782 SCTAB nEndTab = aRange.aEnd.Tab();
1783
1784 if (!rDoc.GetClipParam().isMultiRange() && nStartTab == nEndTab)
1785 if (!rDoc.ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow ))
1786 return false;
1787
1788 OUString aCellStr;
1789
1790 bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF);
1791
1792 // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab.
1793 std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 );
1794 for( SCCOL i = nStartCol; i <= nEndCol; ++i )
1795 rDoc.InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i );
1796 for (nRow = nStartRow; nRow <= nEndRow; nRow++)
1797 {
1798 if (bIncludeFiltered || !rDoc.RowFiltered( nRow, nStartTab ))
1799 {
1800 for (nCol = nStartCol; nCol <= nEndCol; nCol++)
1801 {
1802 ScAddress aPos(nCol, nRow, nStartTab);
1803 sal_uInt32 nNumFmt = rDoc.GetNumberFormat(aPos);
1804 SvNumberFormatter* pFormatter = rDoc.GetFormatTable();
1805
1806 ScRefCellValue aCell(rDoc, aPos, blockPos[ nCol - nStartCol ]);
1807 switch (aCell.meType)
1808 {
1809 case CELLTYPE_FORMULA:
1810 {
1811 if (bFormulas)
1812 {
1813 aCell.mpFormula->GetFormula( aCellStr );
1814 if( aCellStr.indexOf( cSep ) != -1 )
1815 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1816 else
1817 lcl_WriteSimpleString( rStrm, aCellStr );
1818 }
1819 else
1820 {
1821 const Color* pColor;
1822 ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, rDoc);
1823
1824 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1825 if( bMultiLineText )
1826 {
1827 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1828 aCellStr = aCellStr.replaceAll( "\n", " " );
1829 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1830 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1831 }
1832
1833 if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1834 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1835
1836 if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) )
1837 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1838 else
1839 lcl_WriteSimpleString( rStrm, aCellStr );
1840 }
1841 }
1842 break;
1843 case CELLTYPE_VALUE:
1844 {
1845 const Color* pColor;
1846 ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, rDoc);
1847 lcl_WriteSimpleString( rStrm, aCellStr );
1848 }
1849 break;
1850 case CELLTYPE_NONE:
1851 break;
1852 default:
1853 {
1854 const Color* pColor;
1855 ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, rDoc);
1856
1857 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1858 if( bMultiLineText )
1859 {
1860 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1861 aCellStr = aCellStr.replaceAll( "\n", " " );
1862 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1863 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1864 }
1865
1866 if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1867 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1868
1869 if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) )
1870 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1871 else
1872 lcl_WriteSimpleString( rStrm, aCellStr );
1873 }
1874 }
1875 if( nCol < nEndCol )
1876 lcl_WriteSimpleString( rStrm, OUString(cSep) );
1877 }
1878 WriteUnicodeOrByteEndl( rStrm );
1879 if( rStrm.GetError() != ERRCODE_NONE )
1880 break;
1881 if( nSizeLimit && rStrm.Tell() > nSizeLimit )
1882 break;
1883 }
1884 }
1885
1886 return rStrm.GetError() == ERRCODE_NONE;
1887 }
1888
Sylk2Doc(SvStream & rStrm)1889 bool ScImportExport::Sylk2Doc( SvStream& rStrm )
1890 {
1891 bool bOk = true;
1892 bool bMyDoc = false;
1893 SylkVersion eVersion = SylkVersion::OTHER;
1894
1895 // US-English separators for StringToDouble
1896 sal_Unicode const cDecSep = '.';
1897 sal_Unicode const cGrpSep = ',';
1898
1899 SCCOL nStartCol = aRange.aStart.Col();
1900 SCROW nStartRow = aRange.aStart.Row();
1901 SCCOL nEndCol = aRange.aEnd.Col();
1902 SCROW nEndRow = aRange.aEnd.Row();
1903 sal_uLong nOldPos = rStrm.Tell();
1904 bool bData = !bSingle;
1905 ::std::vector< sal_uInt32 > aFormats;
1906
1907 if( !bSingle)
1908 bOk = StartPaste();
1909
1910 while( bOk )
1911 {
1912 OUString aLine;
1913 OUString aText;
1914 OString aByteLine;
1915 SCCOL nCol = nStartCol;
1916 SCROW nRow = nStartRow;
1917 SCCOL nRefCol = nCol;
1918 SCROW nRefRow = nRow;
1919 rStrm.Seek( nOldPos );
1920 for( ;; )
1921 {
1922 //! allow unicode
1923 rStrm.ReadLine( aByteLine );
1924 aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet());
1925 if( rStrm.eof() )
1926 break;
1927 bool bInvalidCol = false;
1928 bool bInvalidRow = false;
1929 const sal_Unicode* p = aLine.getStr();
1930 sal_Unicode cTag = *p++;
1931 if( cTag == 'C' ) // Content
1932 {
1933 if( *p++ != ';' )
1934 return false;
1935
1936 bool bInvalidRefCol = false;
1937 bool bInvalidRefRow = false;
1938 while( *p )
1939 {
1940 sal_Unicode ch = *p++;
1941 ch = ScGlobal::ToUpperAlpha( ch );
1942 switch( ch )
1943 {
1944 case 'X':
1945 {
1946 bInvalidCol = false;
1947 bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol);
1948 if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
1949 {
1950 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
1951 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
1952 bInvalidCol = bOverflowCol = true;
1953 }
1954 break;
1955 }
1956 case 'Y':
1957 {
1958 bInvalidRow = false;
1959 bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow);
1960 if (bFail || nRow < 0 || nMaxImportRow < nRow)
1961 {
1962 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
1963 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
1964 bInvalidRow = bOverflowRow = true;
1965 }
1966 break;
1967 }
1968 case 'C':
1969 {
1970 bInvalidRefCol = false;
1971 bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nRefCol);
1972 if (bFail || nRefCol < 0 || rDoc.MaxCol() < nRefCol)
1973 {
1974 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol);
1975 nRefCol = std::clamp<SCCOL>(nRefCol, 0, rDoc.MaxCol());
1976 bInvalidRefCol = bOverflowCol = true;
1977 }
1978 break;
1979 }
1980 case 'R':
1981 {
1982 bInvalidRefRow = false;
1983 bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRefRow);
1984 if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow)
1985 {
1986 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow);
1987 nRefRow = std::clamp<SCROW>(nRefRow, 0, nMaxImportRow);
1988 bInvalidRefRow = bOverflowRow = true;
1989 }
1990 break;
1991 }
1992 case 'K':
1993 {
1994 if( !bSingle &&
1995 ( nCol < nStartCol || nCol > nEndCol
1996 || nRow < nStartRow || nRow > nEndRow
1997 || nCol > rDoc.MaxCol() || nRow > nMaxImportRow
1998 || bInvalidCol || bInvalidRow ) )
1999 break;
2000 if( !bData )
2001 {
2002 if( nRow > nEndRow )
2003 nEndRow = nRow;
2004 if( nCol > nEndCol )
2005 nEndCol = nCol;
2006 break;
2007 }
2008 bool bText;
2009 if( *p == '"' )
2010 {
2011 bText = true;
2012 aText.clear();
2013 p = lcl_ScanSylkString( p, aText, eVersion);
2014 }
2015 else
2016 bText = false;
2017 const sal_Unicode* q = p;
2018 while( *q && *q != ';' )
2019 q++;
2020 if ( (*q != ';' || *(q+1) != 'I') && !bInvalidCol && !bInvalidRow )
2021 { // don't ignore value
2022 if( bText )
2023 {
2024 rDoc.EnsureTable(aRange.aStart.Tab());
2025 rDoc.SetTextCell(
2026 ScAddress(nCol, nRow, aRange.aStart.Tab()), aText);
2027 }
2028 else
2029 {
2030 double fVal = rtl_math_uStringToDouble( p,
2031 aLine.getStr() + aLine.getLength(),
2032 cDecSep, cGrpSep, nullptr, nullptr );
2033 rDoc.SetValue( nCol, nRow, aRange.aStart.Tab(), fVal );
2034 }
2035 }
2036 }
2037 break;
2038 case 'E':
2039 case 'M':
2040 {
2041 if ( ch == 'M' )
2042 {
2043 if ( nRefCol < nCol )
2044 nRefCol = nCol;
2045 if ( nRefRow < nRow )
2046 nRefRow = nRow;
2047 if ( !bData )
2048 {
2049 if( nRefRow > nEndRow )
2050 nEndRow = nRefRow;
2051 if( nRefCol > nEndCol )
2052 nEndCol = nRefCol;
2053 }
2054 }
2055 if( !bMyDoc || !bData )
2056 break;
2057 aText = "=";
2058 p = lcl_ScanSylkFormula( p, aText, eVersion);
2059
2060 if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow)))
2061 break;
2062
2063 ScAddress aPos( nCol, nRow, aRange.aStart.Tab() );
2064 /* FIXME: do we want GRAM_ODFF_A1 instead? At the
2065 * end it probably should be GRAM_ODFF_R1C1, since
2066 * R1C1 is what Excel writes in SYLK, or even
2067 * better GRAM_ENGLISH_XL_R1C1. */
2068 const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1;
2069 ScCompiler aComp(rDoc, aPos, eGrammar);
2070 std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray
2071 rDoc.CheckLinkFormulaNeedingCheck(*xCode);
2072 if ( ch == 'M' )
2073 {
2074 ScMarkData aMark(rDoc.GetSheetLimits());
2075 aMark.SelectTable( aPos.Tab(), true );
2076 rDoc.InsertMatrixFormula( nCol, nRow, nRefCol,
2077 nRefRow, aMark, EMPTY_OUSTRING, xCode.get() );
2078 }
2079 else
2080 {
2081 ScFormulaCell* pFCell = new ScFormulaCell(
2082 rDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE);
2083 rDoc.SetFormulaCell(aPos, pFCell);
2084 }
2085 }
2086 break;
2087 }
2088 while( *p && *p != ';' )
2089 p++;
2090 if( *p )
2091 p++;
2092 }
2093 }
2094 else if( cTag == 'F' ) // Format
2095 {
2096 if( *p++ != ';' )
2097 return false;
2098 sal_Int32 nFormat = -1;
2099 while( *p )
2100 {
2101 sal_Unicode ch = *p++;
2102 ch = ScGlobal::ToUpperAlpha( ch );
2103 switch( ch )
2104 {
2105 case 'X':
2106 {
2107 bInvalidCol = false;
2108 bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol);
2109 if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
2110 {
2111 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
2112 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
2113 bInvalidCol = bOverflowCol = true;
2114 }
2115 break;
2116 }
2117 case 'Y':
2118 {
2119 bInvalidRow = false;
2120 bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow);
2121 if (bFail || nRow < 0 || nMaxImportRow < nRow)
2122 {
2123 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
2124 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
2125 bInvalidRow = bOverflowRow = true;
2126 }
2127 break;
2128 }
2129 case 'P' :
2130 if ( bData )
2131 {
2132 // F;P<n> sets format code of P;P<code> at
2133 // current position, or at ;X;Y if specified.
2134 // Note that ;X;Y may appear after ;P
2135 const sal_Unicode* p0 = p;
2136 while( *p && *p != ';' )
2137 p++;
2138 OUString aNumber(p0, p - p0);
2139 nFormat = aNumber.toInt32();
2140 }
2141 break;
2142 }
2143 while( *p && *p != ';' )
2144 p++;
2145 if( *p )
2146 p++;
2147 }
2148 if ( !bData )
2149 {
2150 if( nRow > nEndRow )
2151 nEndRow = nRow;
2152 if( nCol > nEndCol )
2153 nEndCol = nCol;
2154 }
2155 if ( 0 <= nFormat && nFormat < static_cast<sal_Int32>(aFormats.size()) && !bInvalidCol && !bInvalidRow )
2156 {
2157 sal_uInt32 nKey = aFormats[nFormat];
2158 rDoc.ApplyAttr( nCol, nRow, aRange.aStart.Tab(),
2159 SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) );
2160 }
2161 }
2162 else if( cTag == 'P' )
2163 {
2164 if ( bData && *p == ';' && *(p+1) == 'P' )
2165 {
2166 OUString aCode( p+2 );
2167
2168 sal_uInt32 nKey;
2169 sal_Int32 nCheckPos;
2170
2171 if (aCode.getLength() > 2048 && utl::ConfigManager::IsFuzzing())
2172 {
2173 // consider an excessive length as a failure when fuzzing
2174 nCheckPos = 1;
2175 }
2176 else
2177 {
2178 // unescape doubled semicolons
2179 aCode = aCode.replaceAll(";;", ";");
2180 // get rid of Xcl escape characters
2181 aCode = aCode.replaceAll("\x1b", "");
2182 SvNumFormatType nType;
2183 rDoc.GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey,
2184 LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false);
2185 }
2186
2187 if ( nCheckPos )
2188 nKey = 0;
2189
2190 aFormats.push_back( nKey );
2191 }
2192 }
2193 else if( cTag == 'I' && *p == 'D' )
2194 {
2195 aLine = aLine.copy(4);
2196 if (aLine == "CALCOOO32")
2197 eVersion = SylkVersion::OOO32;
2198 else if (aLine == "SCALC3")
2199 eVersion = SylkVersion::SCALC3;
2200 bMyDoc = (eVersion <= SylkVersion::OWN);
2201 }
2202 else if( cTag == 'E' ) // End
2203 break;
2204 }
2205 if( !bData )
2206 {
2207 aRange.aEnd.SetCol( nEndCol );
2208 aRange.aEnd.SetRow( nEndRow );
2209 bOk = StartPaste();
2210 bData = true;
2211 }
2212 else
2213 break;
2214 }
2215
2216 EndPaste();
2217 return bOk;
2218 }
2219
Doc2Sylk(SvStream & rStrm)2220 bool ScImportExport::Doc2Sylk( SvStream& rStrm )
2221 {
2222 SCCOL nCol;
2223 SCROW nRow;
2224 SCCOL nStartCol = aRange.aStart.Col();
2225 SCROW nStartRow = aRange.aStart.Row();
2226 SCCOL nEndCol = aRange.aEnd.Col();
2227 SCROW nEndRow = aRange.aEnd.Row();
2228 OUString aCellStr;
2229 OUString aValStr;
2230 lcl_WriteSimpleString( rStrm, "ID;PCALCOOO32" );
2231 WriteUnicodeOrByteEndl( rStrm );
2232
2233 for (nRow = nStartRow; nRow <= nEndRow; nRow++)
2234 {
2235 for (nCol = nStartCol; nCol <= nEndCol; nCol++)
2236 {
2237 OUString aBufStr;
2238 double nVal;
2239 bool bForm = false;
2240 SCROW r = nRow - nStartRow + 1;
2241 SCCOL c = nCol - nStartCol + 1;
2242 ScRefCellValue aCell(rDoc, ScAddress(nCol, nRow, aRange.aStart.Tab()));
2243 CellType eType = aCell.meType;
2244 switch( eType )
2245 {
2246 case CELLTYPE_FORMULA:
2247 bForm = bFormulas;
2248 if( rDoc.HasValueData( nCol, nRow, aRange.aStart.Tab()) )
2249 goto hasvalue;
2250 else
2251 goto hasstring;
2252
2253 case CELLTYPE_VALUE:
2254 hasvalue:
2255 rDoc.GetValue( nCol, nRow, aRange.aStart.Tab(), nVal );
2256
2257 aValStr = ::rtl::math::doubleToUString( nVal,
2258 rtl_math_StringFormat_Automatic,
2259 rtl_math_DecimalPlaces_Max, '.', true );
2260
2261 aBufStr = "C;X"
2262 + OUString::number( c )
2263 + ";Y"
2264 + OUString::number( r )
2265 + ";K"
2266 + aValStr;
2267 lcl_WriteSimpleString( rStrm, aBufStr );
2268 goto checkformula;
2269
2270 case CELLTYPE_STRING:
2271 case CELLTYPE_EDIT:
2272 hasstring:
2273 aCellStr = rDoc.GetString(nCol, nRow, aRange.aStart.Tab());
2274 aCellStr = aCellStr.replaceAll("\n", SYLK_LF);
2275
2276 aBufStr = "C;X"
2277 + OUString::number( c )
2278 + ";Y"
2279 + OUString::number( r )
2280 + ";K";
2281 lcl_WriteSimpleString( rStrm, aBufStr );
2282 lcl_WriteString( rStrm, aCellStr, '"', ';' );
2283
2284 checkformula:
2285 if( bForm )
2286 {
2287 const ScFormulaCell* pFCell = aCell.mpFormula;
2288 switch ( pFCell->GetMatrixFlag() )
2289 {
2290 case ScMatrixMode::Reference :
2291 aCellStr.clear();
2292 break;
2293 default:
2294 OUString aOUCellStr;
2295 pFCell->GetFormula( aOUCellStr,formula::FormulaGrammar::GRAM_PODF_A1);
2296 aCellStr = aOUCellStr;
2297 /* FIXME: do we want GRAM_ODFF_A1 instead? At
2298 * the end it probably should be
2299 * GRAM_ODFF_R1C1, since R1C1 is what Excel
2300 * writes in SYLK, or even better
2301 * GRAM_ENGLISH_XL_R1C1. */
2302 }
2303 if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE &&
2304 aCellStr.startsWith("{") &&
2305 aCellStr.endsWith("}") )
2306 { // cut off matrix {} characters
2307 aCellStr = aCellStr.copy(1, aCellStr.getLength()-2);
2308 }
2309 if ( aCellStr[0] == '=' )
2310 aCellStr = aCellStr.copy(1);
2311 OUString aPrefix;
2312 switch ( pFCell->GetMatrixFlag() )
2313 {
2314 case ScMatrixMode::Formula :
2315 { // diff expression with 'M' M$-extension
2316 SCCOL nC;
2317 SCROW nR;
2318 pFCell->GetMatColsRows( nC, nR );
2319 nC += c - 1;
2320 nR += r - 1;
2321 aPrefix = ";R"
2322 + OUString::number( nR )
2323 + ";C"
2324 + OUString::number( nC )
2325 + ";M";
2326 }
2327 break;
2328 case ScMatrixMode::Reference :
2329 { // diff expression with 'I' M$-extension
2330 ScAddress aPos;
2331 (void)pFCell->GetMatrixOrigin( rDoc, aPos );
2332 aPrefix = ";I;R"
2333 + OUString::number( aPos.Row() - nStartRow + 1 )
2334 + ";C"
2335 + OUString::number( aPos.Col() - nStartCol + 1 );
2336 }
2337 break;
2338 default:
2339 // formula Expression
2340 aPrefix = ";E";
2341 }
2342 lcl_WriteSimpleString( rStrm, aPrefix );
2343 if ( !aCellStr.isEmpty() )
2344 lcl_WriteString( rStrm, aCellStr, 0, ';' );
2345 }
2346 WriteUnicodeOrByteEndl( rStrm );
2347 break;
2348
2349 default:
2350 {
2351 // added to avoid warnings
2352 }
2353 }
2354 }
2355 }
2356 lcl_WriteSimpleString( rStrm, OUString( 'E' ) );
2357 WriteUnicodeOrByteEndl( rStrm );
2358 return rStrm.GetError() == ERRCODE_NONE;
2359 }
2360
Doc2HTML(SvStream & rStrm,const OUString & rBaseURL)2361 bool ScImportExport::Doc2HTML( SvStream& rStrm, const OUString& rBaseURL )
2362 {
2363 // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options
2364 ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll,
2365 aStreamPath, aNonConvertibleChars, maFilterOptions );
2366 return rStrm.GetError() == ERRCODE_NONE;
2367 }
2368
Doc2RTF(SvStream & rStrm)2369 bool ScImportExport::Doc2RTF( SvStream& rStrm )
2370 {
2371 // rtl_TextEncoding is ignored in ScExportRTF
2372 ScFormatFilter::Get().ScExportRTF( rStrm, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW );
2373 return rStrm.GetError() == ERRCODE_NONE;
2374 }
2375
Doc2Dif(SvStream & rStrm)2376 bool ScImportExport::Doc2Dif( SvStream& rStrm )
2377 {
2378 // for DIF in the clipboard, IBM_850 is always used
2379 ScFormatFilter::Get().ScExportDif( rStrm, &rDoc, aRange, RTL_TEXTENCODING_IBM_850 );
2380 return true;
2381 }
2382
Dif2Doc(SvStream & rStrm)2383 bool ScImportExport::Dif2Doc( SvStream& rStrm )
2384 {
2385 SCTAB nTab = aRange.aStart.Tab();
2386 ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) );
2387 pImportDoc->InitUndo( rDoc, nTab, nTab );
2388
2389 // for DIF in the clipboard, IBM_850 is always used
2390 ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 );
2391
2392 SCCOL nEndCol;
2393 SCROW nEndRow;
2394 pImportDoc->GetCellArea( nTab, nEndCol, nEndRow );
2395 // if there are no cells in the imported content, nEndCol/nEndRow may be before the start
2396 if ( nEndCol < aRange.aStart.Col() )
2397 nEndCol = aRange.aStart.Col();
2398 if ( nEndRow < aRange.aStart.Row() )
2399 nEndRow = aRange.aStart.Row();
2400 aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab );
2401
2402 bool bOk = StartPaste();
2403 if (bOk)
2404 {
2405 InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2406 rDoc.DeleteAreaTab( aRange, nFlags );
2407 pImportDoc->CopyToDocument(aRange, nFlags, false, rDoc);
2408 EndPaste();
2409 }
2410
2411 return bOk;
2412 }
2413
RTF2Doc(SvStream & rStrm,const OUString & rBaseURL)2414 bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL )
2415 {
2416 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( &rDoc, aRange );
2417 if (!pImp)
2418 return false;
2419 pImp->Read( rStrm, rBaseURL );
2420 aRange = pImp->GetRange();
2421
2422 bool bOk = StartPaste();
2423 if (bOk)
2424 {
2425 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2426 rDoc.DeleteAreaTab( aRange, nFlags );
2427 pImp->WriteToDocument();
2428 EndPaste();
2429 }
2430 return bOk;
2431 }
2432
HTML2Doc(SvStream & rStrm,const OUString & rBaseURL)2433 bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL )
2434 {
2435 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( &rDoc, rBaseURL, aRange);
2436 if (!pImp)
2437 return false;
2438 pImp->Read( rStrm, rBaseURL );
2439 aRange = pImp->GetRange();
2440
2441 bool bOk = StartPaste();
2442 if (bOk)
2443 {
2444 // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in
2445 // a Draw Layer but no Draw View -> create Draw Layer and View here
2446 if (pDocSh)
2447 pDocSh->MakeDrawLayer();
2448
2449 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2450 rDoc.DeleteAreaTab( aRange, nFlags );
2451
2452 if (pExtOptions)
2453 {
2454 // Pick up import options if available.
2455 LanguageType eLang = pExtOptions->GetLanguage();
2456 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang);
2457 bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber();
2458 pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber);
2459 }
2460 else
2461 // Regular import, with no options.
2462 pImp->WriteToDocument();
2463
2464 EndPaste();
2465 }
2466 return bOk;
2467 }
2468
2469 #ifndef DISABLE_DYNLOADING
2470
thisModule()2471 extern "C" { static void thisModule() {} }
2472
2473 #else
2474
2475 extern "C" {
2476 ScFormatFilterPlugin* ScFilterCreate();
2477 }
2478
2479 #endif
2480
2481 typedef ScFormatFilterPlugin * (*FilterFn)();
Get()2482 ScFormatFilterPlugin &ScFormatFilter::Get()
2483 {
2484 static ScFormatFilterPlugin *plugin = [&]()
2485 {
2486 #ifndef DISABLE_DYNLOADING
2487 OUString sFilterLib(SVLIBRARY("scfilt"));
2488 static ::osl::Module aModule;
2489 bool bLoaded = aModule.is();
2490 if (!bLoaded)
2491 bLoaded = aModule.loadRelative(&thisModule, sFilterLib);
2492 if (!bLoaded)
2493 bLoaded = aModule.load(sFilterLib);
2494 if (bLoaded)
2495 {
2496 oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" );
2497 if (fn != nullptr)
2498 return reinterpret_cast<FilterFn>(fn)();
2499 }
2500 assert(false);
2501 return static_cast<ScFormatFilterPlugin*>(nullptr);
2502 #else
2503 return ScFilterCreate();
2504 #endif
2505 }();
2506
2507 return *plugin;
2508 }
2509
2510 // Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
2511 // array.
lcl_UnicodeStrChr(const sal_Unicode * pStr,sal_Unicode c)2512 static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
2513 sal_Unicode c )
2514 {
2515 while (*pStr)
2516 {
2517 if (*pStr == c)
2518 return pStr;
2519 ++pStr;
2520 }
2521 return nullptr;
2522 }
2523
ScImportStringStream(const OUString & rStr)2524 ScImportStringStream::ScImportStringStream( const OUString& rStr )
2525 : SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()),
2526 rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ)
2527 {
2528 SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
2529 #ifdef OSL_BIGENDIAN
2530 SetEndian(SvStreamEndian::BIG);
2531 #else
2532 SetEndian(SvStreamEndian::LITTLE);
2533 #endif
2534 }
2535
ReadCsvLine(SvStream & rStream,bool bEmbeddedLineBreak,OUString & rFieldSeparators,sal_Unicode cFieldQuote,sal_Unicode & rcDetectSep,sal_uInt32 nMaxSourceLines)2536 OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
2537 OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep, sal_uInt32 nMaxSourceLines )
2538 {
2539 enum RetryState
2540 {
2541 FORBID,
2542 ALLOW,
2543 RETRY,
2544 RETRIED
2545 } eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID);
2546
2547 sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0);
2548
2549 Label_RetryWithNewSep:
2550
2551 if (eRetryState == RetryState::RETRY)
2552 {
2553 eRetryState = RetryState::RETRIED;
2554 rStream.Seek( nStreamPos);
2555 }
2556
2557 OUString aStr;
2558 rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2559
2560 if (bEmbeddedLineBreak)
2561 {
2562 sal_uInt32 nLine = 0;
2563
2564 const sal_Unicode* pSeps = rFieldSeparators.getStr();
2565
2566 QuoteType eQuoteState = FIELDEND_QUOTE;
2567 bool bFieldStart = true;
2568
2569 sal_Int32 nLastOffset = 0;
2570 sal_Int32 nQuotes = 0;
2571 while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit)
2572 {
2573 const sal_Unicode * p = aStr.getStr() + nLastOffset;
2574 const sal_Unicode * const pStop = aStr.getStr() + aStr.getLength();
2575 while (p < pStop)
2576 {
2577 if (!*p)
2578 {
2579 // Skip embedded null-characters. They don't change
2580 // anything and are handled at a higher level.
2581 ++p;
2582 continue;
2583 }
2584
2585 if (nQuotes)
2586 {
2587 if (*p == cFieldQuote)
2588 {
2589 if (bFieldStart)
2590 {
2591 ++nQuotes;
2592 bFieldStart = false;
2593 eQuoteState = FIELDSTART_QUOTE;
2594 }
2595 // Do not detect a FIELDSTART_QUOTE if not in
2596 // bFieldStart mode, in which case for unquoted content
2597 // we are in FIELDEND_QUOTE state.
2598 else if (eQuoteState != FIELDEND_QUOTE)
2599 {
2600 eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep);
2601
2602 if (eRetryState == RetryState::ALLOW && rcDetectSep)
2603 {
2604 eRetryState = RetryState::RETRY;
2605 rFieldSeparators += OUStringChar(rcDetectSep);
2606 pSeps = rFieldSeparators.getStr();
2607 goto Label_RetryWithNewSep;
2608 }
2609
2610 // DONTKNOW_QUOTE is an embedded unescaped quote we
2611 // don't count for pairing.
2612 if (eQuoteState != DONTKNOW_QUOTE)
2613 ++nQuotes;
2614 }
2615 }
2616 else if (eQuoteState == FIELDEND_QUOTE)
2617 {
2618 if (bFieldStart)
2619 // If blank is a separator it starts a field, if it
2620 // is not and thus maybe leading before quote we
2621 // are still at start of field regarding quotes.
2622 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2623 else
2624 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2625 }
2626 }
2627 else
2628 {
2629 if (*p == cFieldQuote && bFieldStart)
2630 {
2631 nQuotes = 1;
2632 eQuoteState = FIELDSTART_QUOTE;
2633 bFieldStart = false;
2634 }
2635 else if (eQuoteState == FIELDEND_QUOTE)
2636 {
2637 // This also skips leading blanks at beginning of line
2638 // if followed by a quote. It's debatable whether we
2639 // actually want that or not, but congruent with what
2640 // ScanNextFieldFromString() does.
2641 if (bFieldStart)
2642 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2643 else
2644 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2645 }
2646 }
2647 // A quote character inside a field content does not start
2648 // a quote.
2649 ++p;
2650 }
2651
2652 if ((nQuotes & 1) == 0)
2653 // We still have a (theoretical?) problem here if due to
2654 // nArbitraryLineLengthLimit (or nMaxSourceLines below) we
2655 // split a string right between a doubled quote pair.
2656 break;
2657 else if (++nLine >= nMaxSourceLines && nMaxSourceLines > 0)
2658 // Unconditionally increment nLine even if nMaxSourceLines==0
2659 // so it can be observed in debugger.
2660 break;
2661 else
2662 {
2663 nLastOffset = aStr.getLength();
2664 OUString aNext;
2665 rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2666 aStr += "\n" + aNext;
2667 }
2668 }
2669 }
2670 return aStr;
2671 }
2672
2673 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
2674