1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <sal/config.h>
21
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/docfile.hxx>
50
51 #include <svtools/htmlcfg.hxx>
52 #include <sfx2/linkmgr.hxx>
53 #include <editeng/kernitem.hxx>
54 #include <editeng/boxitem.hxx>
55 #include <editeng/fhgtitem.hxx>
56 #include <editeng/formatbreakitem.hxx>
57 #include <editeng/postitem.hxx>
58 #include <editeng/wghtitem.hxx>
59 #include <editeng/crossedoutitem.hxx>
60 #include <editeng/udlnitem.hxx>
61 #include <editeng/escapementitem.hxx>
62 #include <editeng/blinkitem.hxx>
63 #include <editeng/ulspitem.hxx>
64 #include <editeng/colritem.hxx>
65 #include <editeng/fontitem.hxx>
66 #include <editeng/adjustitem.hxx>
67 #include <editeng/lrspitem.hxx>
68 #include <editeng/protitem.hxx>
69 #include <editeng/flstitem.hxx>
70 #include <svx/unobrushitemhelper.hxx>
71
72 #include <frmatr.hxx>
73 #include <charatr.hxx>
74 #include <fmtfld.hxx>
75 #include <fmtpdsc.hxx>
76 #include <fmtanchr.hxx>
77 #include <fmtsrnd.hxx>
78 #include <fmtfsize.hxx>
79 #include <fmtclds.hxx>
80 #include <fchrfmt.hxx>
81 #include <fmtinfmt.hxx>
82 #include <fmtfollowtextflow.hxx>
83 #include <fmtornt.hxx>
84 #include <doc.hxx>
85 #include <IDocumentUndoRedo.hxx>
86 #include <IDocumentSettingAccess.hxx>
87 #include <IDocumentLayoutAccess.hxx>
88 #include <IDocumentLinksAdministration.hxx>
89 #include <IDocumentRedlineAccess.hxx>
90 #include <IDocumentFieldsAccess.hxx>
91 #include <IDocumentStylePoolAccess.hxx>
92 #include <IDocumentStatistics.hxx>
93 #include <IDocumentState.hxx>
94 #include <pam.hxx>
95 #include <ndtxt.hxx>
96 #include <mdiexp.hxx>
97 #include <poolfmt.hxx>
98 #include <pagedesc.hxx>
99 #include <IMark.hxx>
100 #include <docsh.hxx>
101 #include <editsh.hxx>
102 #include <docufld.hxx>
103 #include "swcss1.hxx"
104 #include <fltini.hxx>
105 #include <htmltbl.hxx>
106 #include "htmlnum.hxx"
107 #include "swhtml.hxx"
108 #include "wrthtml.hxx"
109 #include <linkenum.hxx>
110 #include <breakit.hxx>
111 #include <SwAppletImpl.hxx>
112 #include <swdll.hxx>
113 #include <txatbase.hxx>
114
115 #include <sfx2/viewfrm.hxx>
116 #include <svx/svdobj.hxx>
117 #include <officecfg/Office/Writer.hxx>
118 #include <comphelper/sequenceashashmap.hxx>
119 #include <comphelper/sequence.hxx>
120
121 #include <swerror.h>
122 #include <ndole.hxx>
123 #include <unoframe.hxx>
124 #include "css1atr.hxx"
125 #include <frameformats.hxx>
126
127 #define FONTSIZE_MASK 7
128
129 #define HTML_ESC_PROP 80
130 #define HTML_ESC_SUPER DFLT_ESC_SUPER
131 #define HTML_ESC_SUB DFLT_ESC_SUB
132
133 #define HTML_SPTYPE_BLOCK 1
134 #define HTML_SPTYPE_HORI 2
135 #define HTML_SPTYPE_VERT 3
136
137 using editeng::SvxBorderLine;
138 using namespace ::com::sun::star;
139
140 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
141 HTMLOptionEnum<SvxAdjust> const aHTMLPAlignTable[] =
142 {
143 { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
144 { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
145 { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
146 { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
147 { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
148 { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
149 { nullptr, SvxAdjust(0) }
150 };
151
152 // <SPACER TYPE=...>
153 HTMLOptionEnum<sal_uInt16> const aHTMLSpacerTypeTable[] =
154 {
155 { OOO_STRING_SVTOOLS_HTML_SPTYPE_block, HTML_SPTYPE_BLOCK },
156 { OOO_STRING_SVTOOLS_HTML_SPTYPE_horizontal, HTML_SPTYPE_HORI },
157 { OOO_STRING_SVTOOLS_HTML_SPTYPE_vertical, HTML_SPTYPE_VERT },
158 { nullptr, 0 }
159 };
160
HTMLReader()161 HTMLReader::HTMLReader()
162 {
163 m_bTemplateBrowseMode = true;
164 }
165
GetTemplateName(SwDoc & rDoc) const166 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
167 {
168 if (!rDoc.getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE))
169 // HTML import into Writer, avoid loading the Writer/Web template.
170 return OUString();
171
172 static const OUStringLiteral sTemplateWithoutExt(u"internal/html");
173 SvtPathOptions aPathOpt;
174
175 // first search for OpenDocument Writer/Web template
176 // OpenDocument Writer/Web template (extension .oth)
177 OUString sTemplate( sTemplateWithoutExt + ".oth" );
178 if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
179 return sTemplate;
180
181 // no OpenDocument Writer/Web template found.
182 // search for OpenOffice.org Writer/Web template
183 sTemplate = sTemplateWithoutExt + ".stw";
184 if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
185 return sTemplate;
186
187 OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
188
189 return OUString();
190 }
191
SetStrmStgPtr()192 bool HTMLReader::SetStrmStgPtr()
193 {
194 OSL_ENSURE( m_pMedium, "Where is the medium??" );
195
196 if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
197 {
198 m_pStream = m_pMedium->GetInStream();
199 return true;
200 }
201 return false;
202
203 }
204
205 // Call for the general Reader-Interface
Read(SwDoc & rDoc,const OUString & rBaseURL,SwPaM & rPam,const OUString & rName)206 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
207 {
208 SetupFilterOptions();
209
210 if( !m_pStream )
211 {
212 OSL_ENSURE( m_pStream, "HTML-Read without stream" );
213 return ERR_SWG_READ_ERROR;
214 }
215
216 if( !m_bInsertMode )
217 {
218 Reader::ResetFrameFormats( rDoc );
219
220 // Set the HTML page style, when it isn't a HTML document,
221 // otherwise it's already set.
222 if( !rDoc.getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE) && m_aNamespace != "reqif-xhtml" )
223 {
224 rDoc.getIDocumentContentOperations().InsertPoolItem( rPam, SwFormatPageDesc(
225 rDoc.getIDocumentStylePoolAccess().GetPageDescFromPool( RES_POOLPAGE_HTML, false )) );
226 }
227 }
228
229 // so nobody steals the document!
230 rtl::Reference<SwDoc> xHoldAlive(&rDoc);
231 ErrCode nRet = ERRCODE_NONE;
232 tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
233 rName, rBaseURL, !m_bInsertMode, m_pMedium,
234 IsReadUTF8(),
235 m_bIgnoreHTMLComments, m_aNamespace );
236
237 SvParserState eState = xParser->CallParser();
238
239 if( SvParserState::Pending == eState )
240 m_pStream->ResetError();
241 else if( SvParserState::Accepted != eState )
242 {
243 const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
244 + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
245
246 // use the stream as transport for error number
247 nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
248 DialogMask::ButtonsOk | DialogMask::MessageError );
249 }
250
251 return nRet;
252 }
253
SwHTMLParser(SwDoc * pD,SwPaM & rCursor,SvStream & rIn,const OUString & rPath,const OUString & rBaseURL,bool bReadNewDoc,SfxMedium * pMed,bool bReadUTF8,bool bNoHTMLComments,const OUString & rNamespace)254 SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn,
255 const OUString& rPath,
256 const OUString& rBaseURL,
257 bool bReadNewDoc,
258 SfxMedium* pMed, bool bReadUTF8,
259 bool bNoHTMLComments,
260 const OUString& rNamespace )
261 : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
262 m_aPathToFile( rPath ),
263 m_sBaseURL( rBaseURL ),
264 m_xAttrTab(std::make_shared<HTMLAttrTable>()),
265 m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
266 m_xDoc( pD ),
267 m_pActionViewShell( nullptr ),
268 m_pSttNdIdx( nullptr ),
269 m_pFormImpl( nullptr ),
270 m_pMarquee( nullptr ),
271 m_pImageMap( nullptr ),
272 m_nBaseFontStMin( 0 ),
273 m_nFontStMin( 0 ),
274 m_nDefListDeep( 0 ),
275 m_nFontStHeadStart( 0 ),
276 m_nSBModuleCnt( 0 ),
277 m_nMissingImgMaps( 0 ),
278 m_nParaCnt( 5 ),
279 // #i83625#
280 m_nContextStMin( 0 ),
281 m_nContextStAttrMin( 0 ),
282 m_nSelectEntryCnt( 0 ),
283 m_nOpenParaToken( HtmlTokenId::NONE ),
284 m_eJumpTo( JumpToMarks::NONE ),
285 #ifdef DBG_UTIL
286 m_nContinue( 0 ),
287 #endif
288 m_eParaAdjust( SvxAdjust::End ),
289 m_bDocInitialized( false ),
290 m_bSetModEnabled( false ),
291 m_bInFloatingFrame( false ),
292 m_bInField( false ),
293 m_bCallNextToken( false ),
294 m_bIgnoreRawData( false ),
295 m_bLBEntrySelected ( false ),
296 m_bTAIgnoreNewPara ( false ),
297 m_bFixMarqueeWidth ( false ),
298 m_bNoParSpace( false ),
299 m_bInNoEmbed( false ),
300 m_bInTitle( false ),
301 m_bUpdateDocStat( false ),
302 m_bFixSelectWidth( false ),
303 m_bTextArea( false ),
304 m_bSelect( false ),
305 m_bInFootEndNoteAnchor( false ),
306 m_bInFootEndNoteSymbol( false ),
307 m_bIgnoreHTMLComments( bNoHTMLComments ),
308 m_bRemoveHidden( false ),
309 m_bBodySeen( false ),
310 m_bReadingHeaderOrFooter( false ),
311 m_bNotifyMacroEventRead( false ),
312 m_isInTableStructure(false),
313 m_nTableDepth( 0 ),
314 m_pTempViewFrame(nullptr)
315 {
316 // If requested explicitly, then force ignoring of comments (don't create postits for them).
317 if (!utl::ConfigManager::IsFuzzing() && officecfg::Office::Writer::Filter::Import::HTML::IgnoreComments::get())
318 m_bIgnoreHTMLComments = true;
319
320 m_nEventId = nullptr;
321 m_bUpperSpace = m_bViewCreated = m_bChkJumpMark = false;
322
323 m_eScriptLang = HTMLScriptLanguage::Unknown;
324
325 rCursor.DeleteMark();
326 m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
327 memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
328
329 // Read the font sizes 1-7 from the INI file
330 SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
331 m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20;
332 m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20;
333 m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20;
334 m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20;
335 m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20;
336 m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20;
337 m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20;
338
339 m_bKeepUnknown = rHtmlOptions.IsImportUnknown();
340
341 if(bReadNewDoc)
342 {
343 //CJK has different defaults, so a different object should be used for this
344 //RES_CHARTR_CJK_FONTSIZE is a valid value
345 SvxFontHeightItem aFontHeight(m_aFontHeights[2], 100, RES_CHRATR_FONTSIZE);
346 m_xDoc->SetDefault( aFontHeight );
347 SvxFontHeightItem aFontHeightCJK(m_aFontHeights[2], 100, RES_CHRATR_CJK_FONTSIZE);
348 m_xDoc->SetDefault( aFontHeightCJK );
349 SvxFontHeightItem aFontHeightCTL(m_aFontHeights[2], 100, RES_CHRATR_CTL_FONTSIZE);
350 m_xDoc->SetDefault( aFontHeightCTL );
351
352 // #i18732# - adjust default of option 'FollowTextFlow'
353 // TODO: not sure what the appropriate default for HTML should be?
354 m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
355 }
356
357 // Change to HTML mode during the import, so that the right styles are created
358 m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
359 m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
360
361 m_pCSS1Parser.reset(new SwCSS1Parser(m_xDoc.get(), *this, m_aFontHeights, m_sBaseURL, IsNewDoc()));
362 m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
363
364 if( bReadUTF8 )
365 {
366 SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
367 }
368 else
369 {
370 SwDocShell *pDocSh = m_xDoc->GetDocShell();
371 SvKeyValueIterator *pHeaderAttrs =
372 pDocSh->GetHeaderAttributes();
373 if( pHeaderAttrs )
374 SetEncodingByHTTPHeader( pHeaderAttrs );
375 }
376 m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
377
378 SwDocShell* pDocSh = m_xDoc->GetDocShell();
379 if( pDocSh )
380 {
381 m_bViewCreated = true; // not, load synchronous
382
383 // a jump mark is present
384
385 if( pMed )
386 {
387 m_sJmpMark = pMed->GetURLObject().GetMark();
388 if( !m_sJmpMark.isEmpty() )
389 {
390 m_eJumpTo = JumpToMarks::Mark;
391 sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
392 sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
393
394 OUString sCmp;
395 if (nPos)
396 {
397 sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
398 }
399
400 if( !sCmp.isEmpty() )
401 {
402 sCmp = sCmp.toAsciiLowerCase();
403 if( sCmp == "region" )
404 m_eJumpTo = JumpToMarks::Region;
405 else if( sCmp == "table" )
406 m_eJumpTo = JumpToMarks::Table;
407 else if( sCmp == "graphic" )
408 m_eJumpTo = JumpToMarks::Graphic;
409 else if( sCmp == "outline" ||
410 sCmp == "text" ||
411 sCmp == "frame" )
412 m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
413 else
414 // otherwise this is a normal (book)mark
415 nPos = -1;
416 }
417 else
418 nPos = -1;
419
420 if( nPos != -1 )
421 m_sJmpMark = m_sJmpMark.copy( 0, nPos );
422 if( m_sJmpMark.isEmpty() )
423 m_eJumpTo = JumpToMarks::NONE;
424 }
425 }
426 }
427
428 if (!rNamespace.isEmpty())
429 {
430 SetNamespace(rNamespace);
431 m_bXHTML = true;
432 if (rNamespace == "reqif-xhtml")
433 m_bReqIF = true;
434 }
435
436 // Extract load parameters which are specific to this filter.
437 if (!pMed)
438 {
439 return;
440 }
441
442 comphelper::SequenceAsHashMap aLoadMap(pMed->GetArgs());
443 auto it = aLoadMap.find("AllowedRTFOLEMimeTypes");
444 if (it == aLoadMap.end())
445 {
446 return;
447 }
448
449 uno::Sequence<OUString> aTypes;
450 it->second >>= aTypes;
451 m_aAllowedRTFOLEMimeTypes = comphelper::sequenceToContainer<std::set<OUString>>(aTypes);
452 }
453
~SwHTMLParser()454 SwHTMLParser::~SwHTMLParser()
455 {
456 #ifdef DBG_UTIL
457 OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
458 #endif
459
460 OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
461 OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
462 m_nContextStMin = 0;
463 while (!m_aContexts.empty())
464 {
465 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
466 ClearContext(xCntxt.get());
467 }
468
469 bool bAsync = m_xDoc->IsInLoadAsynchron();
470 m_xDoc->SetInLoadAsynchron( false );
471 m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
472
473 if( m_xDoc->GetDocShell() && m_nEventId )
474 Application::RemoveUserEvent( m_nEventId );
475
476 // the DocumentDetected maybe can delete the DocShells, therefore fetch again
477 if( m_xDoc->GetDocShell() )
478 {
479 // update linked sections
480 sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
481 if( nLinkMode != NEVER && bAsync &&
482 SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
483 m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
484
485 if ( m_xDoc->GetDocShell()->IsLoading() )
486 {
487 // #i59688#
488 m_xDoc->GetDocShell()->LoadingFinished();
489 }
490 }
491
492 delete m_pSttNdIdx;
493
494 if( !m_aSetAttrTab.empty() )
495 {
496 OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
497 for ( const auto& rpAttr : m_aSetAttrTab )
498 delete rpAttr;
499 m_aSetAttrTab.clear();
500 }
501
502 m_pCSS1Parser.reset();
503 m_pNumRuleInfo.reset();
504 DeleteFormImpl();
505 m_pFootEndNoteImpl.reset();
506
507 OSL_ENSURE(!m_xTable, "It exists still an open table");
508 m_pImageMaps.reset();
509
510 OSL_ENSURE( m_vPendingStack.empty(),
511 "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
512 m_vPendingStack.clear();
513
514 m_xDoc.clear();
515
516 if ( m_pTempViewFrame )
517 {
518 m_pTempViewFrame->DoClose();
519
520 // the temporary view frame is hidden, so the hidden flag might need to be removed
521 if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
522 m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
523 }
524 }
525
IMPL_LINK_NOARG(SwHTMLParser,AsyncCallback,void *,void)526 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
527 {
528 m_nEventId=nullptr;
529
530 // #i47907# - If the document has already been destructed,
531 // the parser should be aware of this:
532 if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
533 || 1 == m_xDoc->getReferenceCount() )
534 {
535 // was the import aborted by SFX?
536 eState = SvParserState::Error;
537 }
538
539 GetAsynchCallLink().Call(nullptr);
540 }
541
CallParser()542 SvParserState SwHTMLParser::CallParser()
543 {
544 // create temporary index on position 0, so it won't be moved!
545 m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
546 if( !IsNewDoc() ) // insert into existing document ?
547 {
548 const SwPosition* pPos = m_pPam->GetPoint();
549
550 m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
551
552 *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
553 m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
554
555 SwPaM aInsertionRangePam( *pPos );
556
557 m_pPam->Move( fnMoveBackward );
558
559 // split any redline over the insertion point
560 aInsertionRangePam.SetMark();
561 *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
562 aInsertionRangePam.Move( fnMoveBackward );
563 m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
564
565 m_xDoc->SetTextFormatColl( *m_pPam,
566 m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
567 }
568
569 if( GetMedium() )
570 {
571 if( !m_bViewCreated )
572 {
573 m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
574 }
575 else
576 {
577 m_bViewCreated = true;
578 m_nEventId = nullptr;
579 }
580 }
581 else // show progress bar
582 {
583 rInput.Seek(STREAM_SEEK_TO_END);
584 rInput.ResetError();
585
586 m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
587
588 rInput.Seek(STREAM_SEEK_TO_BEGIN);
589 rInput.ResetError();
590 }
591
592 StartListening(m_xDoc->GetPageDesc( 0 ).GetNotifier());
593
594 SvParserState eRet = HTMLParser::CallParser();
595 return eRet;
596 }
597
CanRemoveNode(sal_uLong nNodeIdx) const598 bool SwHTMLParser::CanRemoveNode(sal_uLong nNodeIdx) const
599 {
600 const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
601 return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
602 }
603
Continue(HtmlTokenId nToken)604 void SwHTMLParser::Continue( HtmlTokenId nToken )
605 {
606 #ifdef DBG_UTIL
607 OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
608 m_nContinue++;
609 #endif
610
611 // When the import (of SFX) is aborted, an error will be set but
612 // we still continue, so that we clean up properly.
613 OSL_ENSURE( SvParserState::Error!=eState,
614 "SwHTMLParser::Continue: already set an error" );
615 if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
616 eState = SvParserState::Error;
617
618 // Fetch SwViewShell from document, save it and set as current.
619 SwViewShell *pInitVSh = CallStartAction();
620
621 if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
622 {
623 // At first call first return, show document and wait for callback
624 // time.
625 // At this point in CallParser only one digit was read and
626 // a SaveState(0) was called.
627 eState = SvParserState::Pending;
628 m_bViewCreated = true;
629 m_xDoc->SetInLoadAsynchron( true );
630
631 #ifdef DBG_UTIL
632 m_nContinue--;
633 #endif
634
635 return;
636 }
637
638 m_bSetModEnabled = false;
639 if( m_xDoc->GetDocShell() )
640 {
641 m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
642 if( m_bSetModEnabled )
643 {
644 m_xDoc->GetDocShell()->EnableSetModified( false );
645 }
646 }
647
648 // during import don't call OLE-Modified
649 Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
650 m_xDoc->SetOle2Link( Link<bool,void>() );
651
652 bool bModified = m_xDoc->getIDocumentState().IsModified();
653 bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
654 m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
655
656 // When the import will be aborted, don't call Continue anymore.
657 // If a Pending-Stack exists make sure the stack is ended with a call
658 // of NextToken.
659 if( SvParserState::Error == eState )
660 {
661 OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
662 "SwHTMLParser::Continue: Pending-Stack without Token" );
663 if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
664 NextToken( m_vPendingStack.back().nToken );
665 OSL_ENSURE( m_vPendingStack.empty(),
666 "SwHTMLParser::Continue: There is again a Pending-Stack" );
667 }
668 else
669 {
670 HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
671 }
672
673 // disable progress bar again
674 m_xProgress.reset();
675
676 bool bLFStripped = false;
677 if( SvParserState::Pending != GetStatus() )
678 {
679 // set the last attributes yet
680 {
681 if( !m_aScriptSource.isEmpty() )
682 {
683 SwScriptFieldType *pType =
684 static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
685
686 SwScriptField aField( pType, m_aScriptType, m_aScriptSource,
687 false );
688 InsertAttr( SwFormatField( aField ), false );
689 }
690
691 if( m_pAppletImpl )
692 {
693 if( m_pAppletImpl->GetApplet().is() )
694 EndApplet();
695 else
696 EndObject();
697 }
698
699 // maybe remove an existing LF after the last paragraph
700 if( IsNewDoc() )
701 bLFStripped = StripTrailingLF() > 0;
702
703 // close still open numbering
704 while( GetNumInfo().GetNumRule() )
705 EndNumberBulletList();
706
707 OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
708 // try this twice, first normally to let m_nContextStMin decrease
709 // naturally and get contexts popped in desired order, and if that
710 // fails force it
711 for (int i = 0; i < 2; ++i)
712 {
713 while (m_aContexts.size() > m_nContextStMin)
714 {
715 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
716 if (xCntxt)
717 EndContext(xCntxt.get());
718 }
719 if (!m_nContextStMin)
720 break;
721 OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
722 m_nContextStMin = 0;
723 }
724
725 m_aParaAttrs.clear();
726
727 SetAttr( false );
728
729 // set the first delayed styles
730 m_pCSS1Parser->SetDelayedStyles();
731 }
732
733 // again correct the start
734 if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
735 {
736 SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
737 SwNodeIndex aNxtIdx( *m_pSttNdIdx );
738 if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
739 {
740 const sal_Int32 nStt = pTextNode->GetText().getLength();
741 // when the cursor is still in the node, then set him at the end
742 if( m_pPam->GetPoint()->nNode == aNxtIdx )
743 {
744 m_pPam->GetPoint()->nNode = *m_pSttNdIdx;
745 m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
746 }
747
748 #if OSL_DEBUG_LEVEL > 0
749 // !!! shouldn't be possible, or ??
750 OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
751 "Pam.Bound1 is still in the node" );
752 OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
753 "Pam.Bound2 is still in the node" );
754
755 if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
756 {
757 const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
758 m_pPam->GetBound().nContent.Assign( pTextNode,
759 pTextNode->GetText().getLength() + nCntPos );
760 }
761 if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
762 {
763 const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
764 m_pPam->GetBound( false ).nContent.Assign( pTextNode,
765 pTextNode->GetText().getLength() + nCntPos );
766 }
767 #endif
768 // Keep character attribute!
769 SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
770 if (pTextNode->GetText().getLength())
771 pDelNd->FormatToTextAttr( pTextNode );
772 else
773 pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
774 pTextNode->JoinNext();
775 }
776 }
777 }
778
779 if( SvParserState::Accepted == eState )
780 {
781 if( m_nMissingImgMaps )
782 {
783 // Some Image-Map relations are still missing.
784 // Maybe now the Image-Maps are there?
785 ConnectImageMaps();
786 }
787
788 // now remove the last useless paragraph
789 SwPosition* pPos = m_pPam->GetPoint();
790 if( !pPos->nContent.GetIndex() && !bLFStripped )
791 {
792 SwTextNode* pCurrentNd;
793 sal_uLong nNodeIdx = pPos->nNode.GetIndex();
794
795 bool bHasFlysOrMarks =
796 HasCurrentParaFlys() || HasCurrentParaBookmarks( true );
797
798 if( IsNewDoc() )
799 {
800 if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
801 {
802 SwContentNode* pCNd = m_pPam->GetContentNode();
803 if( pCNd && pCNd->StartOfSectionIndex()+2 <
804 pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
805 {
806 SwViewShell *pVSh = CheckActionViewShell();
807 SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
808 if( pCursorSh &&
809 pCursorSh->GetCursor()->GetPoint()
810 ->nNode.GetIndex() == nNodeIdx )
811 {
812 pCursorSh->MovePara(GoPrevPara, fnParaEnd );
813 pCursorSh->SetMark();
814 pCursorSh->ClearMark();
815 }
816 m_pPam->GetBound().nContent.Assign( nullptr, 0 );
817 m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
818 m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
819 }
820 }
821 }
822 else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
823 {
824 if( pCurrentNd->CanJoinNext( &pPos->nNode ))
825 {
826 SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
827 pPos->nContent.Assign( pNextNd, 0 );
828 m_pPam->SetMark(); m_pPam->DeleteMark();
829 pNextNd->JoinPrev();
830 }
831 else if (pCurrentNd->GetText().isEmpty())
832 {
833 pPos->nContent.Assign( nullptr, 0 );
834 m_pPam->SetMark(); m_pPam->DeleteMark();
835 m_xDoc->GetNodes().Delete( pPos->nNode );
836 m_pPam->Move( fnMoveBackward );
837 }
838 }
839 }
840
841 // annul the SplitNode from the beginning
842 else if( !IsNewDoc() )
843 {
844 if( pPos->nContent.GetIndex() ) // then there was no <p> at the end
845 m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
846 SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
847 SwNodeIndex aPrvIdx( pPos->nNode );
848 if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
849 *m_pSttNdIdx <= aPrvIdx )
850 {
851 // Normally here should take place a JoinNext, but all cursors and
852 // so are registered in pTextNode, so that it MUST remain.
853
854 // Convert paragraph to character attribute, from Prev adopt
855 // the paragraph attribute and the template!
856 SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
857 pTextNode->ChgFormatColl( pPrev->GetTextColl() );
858 pTextNode->FormatToTextAttr( pPrev );
859 pTextNode->ResetAllAttr();
860
861 if( pPrev->HasSwAttrSet() )
862 pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
863
864 if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
865 m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
866 if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
867 m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
868
869 pTextNode->JoinPrev();
870 }
871 }
872
873 // adjust AutoLoad in DocumentProperties
874 if (!utl::ConfigManager::IsFuzzing() && IsNewDoc())
875 {
876 SwDocShell *pDocShell(m_xDoc->GetDocShell());
877 OSL_ENSURE(pDocShell, "no SwDocShell");
878 if (pDocShell) {
879 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
880 pDocShell->GetModel(), uno::UNO_QUERY_THROW);
881 uno::Reference<document::XDocumentProperties> xDocProps(
882 xDPS->getDocumentProperties());
883 OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
884 if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
885 (xDocProps->getAutoloadURL().isEmpty()) )
886 {
887 xDocProps->setAutoloadURL(m_aPathToFile);
888 }
889 }
890 }
891
892 if( m_bUpdateDocStat )
893 {
894 m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
895 }
896 }
897
898 if( SvParserState::Pending != GetStatus() )
899 {
900 delete m_pSttNdIdx;
901 m_pSttNdIdx = nullptr;
902 }
903
904 // should the parser be the last one who hold the document, then nothing
905 // has to be done anymore, document will be destroyed shortly!
906 if( 1 < m_xDoc->getReferenceCount() )
907 {
908 if( bWasUndo )
909 {
910 m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
911 m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
912 }
913 else if( !pInitVSh )
914 {
915 // When at the beginning of Continue no Shell was available,
916 // it's possible in the meantime one was created.
917 // In that case the bWasUndo flag is wrong and we must
918 // enable Undo.
919 SwViewShell *pTmpVSh = CheckActionViewShell();
920 if( pTmpVSh )
921 {
922 m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
923 }
924 }
925
926 m_xDoc->SetOle2Link( aOLELink );
927 if( !bModified )
928 m_xDoc->getIDocumentState().ResetModified();
929 if( m_bSetModEnabled && m_xDoc->GetDocShell() )
930 {
931 m_xDoc->GetDocShell()->EnableSetModified();
932 m_bSetModEnabled = false; // this is unnecessary here
933 }
934 }
935
936 // When the Document-SwVievShell still exists and an Action is open
937 // (doesn't have to be by abort), end the Action, disconnect from Shell
938 // and finally reconstruct the old Shell.
939 CallEndAction( true );
940
941 #ifdef DBG_UTIL
942 m_nContinue--;
943 #endif
944 }
945
Notify(const SfxHint & rHint)946 void SwHTMLParser::Notify(const SfxHint& rHint)
947 {
948 if(rHint.GetId() == SfxHintId::Dying)
949 {
950 EndListeningAll();
951 ReleaseRef();
952 }
953 }
954
DocumentDetected()955 void SwHTMLParser::DocumentDetected()
956 {
957 OSL_ENSURE( !m_bDocInitialized, "DocumentDetected called multiple times" );
958 m_bDocInitialized = true;
959 if( IsNewDoc() )
960 {
961 if( IsInHeader() )
962 FinishHeader();
963
964 CallEndAction( true );
965
966 m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
967 // For DocumentDetected in general a SwViewShell is created.
968 // But it also can be created later, in case the UI is captured.
969 CallStartAction();
970 }
971 }
972
973 // is called for every token that is recognised in CallParser
NextToken(HtmlTokenId nToken)974 void SwHTMLParser::NextToken( HtmlTokenId nToken )
975 {
976 if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
977 || 1 == m_xDoc->getReferenceCount() )
978 {
979 // Was the import cancelled by SFX? If a pending stack
980 // exists, clean it.
981 eState = SvParserState::Error;
982 OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
983 "SwHTMLParser::NextToken: Pending-Stack without token" );
984 if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
985 return ;
986 }
987
988 #if OSL_DEBUG_LEVEL > 0
989 if( !m_vPendingStack.empty() )
990 {
991 switch( nToken )
992 {
993 // tables are read by recursive method calls
994 case HtmlTokenId::TABLE_ON:
995 // For CSS declarations we might have to wait
996 // for a file download to finish
997 case HtmlTokenId::LINK:
998 // For controls we might have to set the size.
999 case HtmlTokenId::INPUT:
1000 case HtmlTokenId::TEXTAREA_ON:
1001 case HtmlTokenId::SELECT_ON:
1002 case HtmlTokenId::SELECT_OFF:
1003 break;
1004 default:
1005 OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
1006 break;
1007 }
1008 }
1009 #endif
1010
1011 // The following special cases have to be treated before the
1012 // filter detection, because Netscape doesn't reference the content
1013 // of the title for filter detection either.
1014 if( m_vPendingStack.empty() )
1015 {
1016 if( m_bInTitle )
1017 {
1018 switch( nToken )
1019 {
1020 case HtmlTokenId::TITLE_OFF:
1021 {
1022 OUString sTitle = m_sTitle.makeStringAndClear();
1023 if( IsNewDoc() && !sTitle.isEmpty() )
1024 {
1025 if( m_xDoc->GetDocShell() ) {
1026 uno::Reference<document::XDocumentPropertiesSupplier>
1027 xDPS(m_xDoc->GetDocShell()->GetModel(),
1028 uno::UNO_QUERY_THROW);
1029 uno::Reference<document::XDocumentProperties> xDocProps(
1030 xDPS->getDocumentProperties());
1031 OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1032 if (xDocProps.is()) {
1033 xDocProps->setTitle(sTitle);
1034 }
1035
1036 m_xDoc->GetDocShell()->SetTitle(sTitle);
1037 }
1038 }
1039 m_bInTitle = false;
1040 break;
1041 }
1042
1043 case HtmlTokenId::NONBREAKSPACE:
1044 m_sTitle.append(" ");
1045 break;
1046
1047 case HtmlTokenId::SOFTHYPH:
1048 m_sTitle.append("-");
1049 break;
1050
1051 case HtmlTokenId::TEXTTOKEN:
1052 m_sTitle.append(aToken);
1053 break;
1054
1055 default:
1056 m_sTitle.append("<");
1057 if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1058 m_sTitle.append("/");
1059 m_sTitle.append(sSaveToken);
1060 if( !aToken.isEmpty() )
1061 {
1062 m_sTitle.append(" ");
1063 m_sTitle.append(aToken);
1064 }
1065 m_sTitle.append(">");
1066 break;
1067 }
1068
1069 return;
1070 }
1071 }
1072
1073 // Find out what type of document it is if we don't know already.
1074 // For Controls this has to be finished before the control is inserted
1075 // because for inserting a View is needed.
1076 if( !m_bDocInitialized )
1077 DocumentDetected();
1078
1079 bool bGetIDOption = false, bInsertUnknown = false;
1080 bool bUpperSpaceSave = m_bUpperSpace;
1081 m_bUpperSpace = false;
1082
1083 // The following special cases may or have to be treated after the
1084 // filter detection
1085 if( m_vPendingStack.empty() )
1086 {
1087 if( m_bInFloatingFrame )
1088 {
1089 // <SCRIPT> is ignored here (from us), because it is ignored in
1090 // Applets as well
1091 if( HtmlTokenId::IFRAME_OFF == nToken )
1092 {
1093 m_bCallNextToken = false;
1094 m_bInFloatingFrame = false;
1095 }
1096
1097 return;
1098 }
1099 else if( m_bInNoEmbed )
1100 {
1101 switch( nToken )
1102 {
1103 case HtmlTokenId::NOEMBED_OFF:
1104 m_aContents = convertLineEnd(m_aContents, GetSystemLineEnd());
1105 InsertComment( m_aContents, OOO_STRING_SVTOOLS_HTML_noembed );
1106 m_aContents.clear();
1107 m_bCallNextToken = false;
1108 m_bInNoEmbed = false;
1109 break;
1110
1111 case HtmlTokenId::RAWDATA:
1112 InsertCommentText( OOO_STRING_SVTOOLS_HTML_noembed );
1113 break;
1114
1115 default:
1116 OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1117 break;
1118 }
1119
1120 return;
1121 }
1122 else if( m_pAppletImpl )
1123 {
1124 // in an applet only <PARAM> tags and the </APPLET> tag
1125 // are of interest for us (for the moment)
1126 // <SCRIPT> is ignored here (from Netscape)!
1127
1128 switch( nToken )
1129 {
1130 case HtmlTokenId::APPLET_OFF:
1131 m_bCallNextToken = false;
1132 EndApplet();
1133 break;
1134 case HtmlTokenId::OBJECT_OFF:
1135 m_bCallNextToken = false;
1136 EndObject();
1137 break;
1138 case HtmlTokenId::PARAM:
1139 InsertParam();
1140 break;
1141 default: break;
1142 }
1143
1144 return;
1145 }
1146 else if( m_bTextArea )
1147 {
1148 // in a TextArea everything up to </TEXTAREA> is inserted as text.
1149 // <SCRIPT> is ignored here (from Netscape)!
1150
1151 switch( nToken )
1152 {
1153 case HtmlTokenId::TEXTAREA_OFF:
1154 m_bCallNextToken = false;
1155 EndTextArea();
1156 break;
1157
1158 default:
1159 InsertTextAreaText( nToken );
1160 break;
1161 }
1162
1163 return;
1164 }
1165 else if( m_bSelect )
1166 {
1167 // HAS to be treated after bNoScript!
1168 switch( nToken )
1169 {
1170 case HtmlTokenId::SELECT_OFF:
1171 m_bCallNextToken = false;
1172 EndSelect();
1173 return;
1174
1175 case HtmlTokenId::OPTION:
1176 InsertSelectOption();
1177 return;
1178
1179 case HtmlTokenId::TEXTTOKEN:
1180 InsertSelectText();
1181 return;
1182
1183 case HtmlTokenId::INPUT:
1184 case HtmlTokenId::SCRIPT_ON:
1185 case HtmlTokenId::SCRIPT_OFF:
1186 case HtmlTokenId::NOSCRIPT_ON:
1187 case HtmlTokenId::NOSCRIPT_OFF:
1188 case HtmlTokenId::RAWDATA:
1189 // treat in normal switch
1190 break;
1191
1192 default:
1193 // ignore
1194 return;
1195 }
1196 }
1197 else if( m_pMarquee )
1198 {
1199 // in a TextArea everything up to </TEXTAREA> is inserted as text.
1200 // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1201 // script.
1202 switch( nToken )
1203 {
1204 case HtmlTokenId::MARQUEE_OFF:
1205 m_bCallNextToken = false;
1206 EndMarquee();
1207 break;
1208
1209 case HtmlTokenId::TEXTTOKEN:
1210 InsertMarqueeText();
1211 break;
1212 default: break;
1213 }
1214
1215 return;
1216 }
1217 else if( m_bInField )
1218 {
1219 switch( nToken )
1220 {
1221 case HtmlTokenId::SDFIELD_OFF:
1222 m_bCallNextToken = false;
1223 EndField();
1224 break;
1225
1226 case HtmlTokenId::TEXTTOKEN:
1227 InsertFieldText();
1228 break;
1229 default: break;
1230 }
1231
1232 return;
1233 }
1234 else if( m_bInFootEndNoteAnchor || m_bInFootEndNoteSymbol )
1235 {
1236 switch( nToken )
1237 {
1238 case HtmlTokenId::ANCHOR_OFF:
1239 EndAnchor();
1240 m_bCallNextToken = false;
1241 break;
1242
1243 case HtmlTokenId::TEXTTOKEN:
1244 InsertFootEndNoteText();
1245 break;
1246 default: break;
1247 }
1248 return;
1249 }
1250 else if( !m_aUnknownToken.isEmpty() )
1251 {
1252 // Paste content of unknown tags.
1253 // (but surely if we are not in the header section) fdo#36080 fdo#34666
1254 if (!aToken.isEmpty() && !IsInHeader() )
1255 {
1256 if( !m_bDocInitialized )
1257 DocumentDetected();
1258 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1259
1260 // if there are temporary paragraph attributes and the
1261 // paragraph isn't empty then the paragraph attributes
1262 // are final.
1263 m_aParaAttrs.clear();
1264
1265 SetAttr();
1266 }
1267
1268 // Unknown token in the header are only closed by a matching
1269 // end-token, </HEAD> or <BODY>. Text inside is ignored.
1270 switch( nToken )
1271 {
1272 case HtmlTokenId::UNKNOWNCONTROL_OFF:
1273 if( m_aUnknownToken != sSaveToken )
1274 return;
1275 [[fallthrough]];
1276 case HtmlTokenId::FRAMESET_ON:
1277 case HtmlTokenId::HEAD_OFF:
1278 case HtmlTokenId::BODY_ON:
1279 case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1280 m_aUnknownToken.clear();
1281 break;
1282 case HtmlTokenId::TEXTTOKEN:
1283 return;
1284 default:
1285 m_aUnknownToken.clear();
1286 break;
1287 }
1288 }
1289 }
1290
1291 switch( nToken )
1292 {
1293 case HtmlTokenId::BODY_ON:
1294 if (!m_bBodySeen)
1295 {
1296 m_bBodySeen = true;
1297 if( !m_aStyleSource.isEmpty() )
1298 {
1299 m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1300 m_aStyleSource.clear();
1301 }
1302 if( IsNewDoc() )
1303 {
1304 InsertBodyOptions();
1305 // If there is a template for the first or the right page,
1306 // it is set here.
1307 const SwPageDesc *pPageDesc = nullptr;
1308 if( m_pCSS1Parser->IsSetFirstPageDesc() )
1309 pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1310 else if( m_pCSS1Parser->IsSetRightPageDesc() )
1311 pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1312
1313 if( pPageDesc )
1314 {
1315 m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1316 }
1317 }
1318 }
1319 break;
1320
1321 case HtmlTokenId::LINK:
1322 InsertLink();
1323 break;
1324
1325 case HtmlTokenId::BASE:
1326 {
1327 const HTMLOptions& rHTMLOptions = GetOptions();
1328 for (size_t i = rHTMLOptions.size(); i; )
1329 {
1330 const HTMLOption& rOption = rHTMLOptions[--i];
1331 switch( rOption.GetToken() )
1332 {
1333 case HtmlOptionId::HREF:
1334 m_sBaseURL = rOption.GetString();
1335 break;
1336 case HtmlOptionId::TARGET:
1337 if( IsNewDoc() )
1338 {
1339 SwDocShell *pDocShell(m_xDoc->GetDocShell());
1340 OSL_ENSURE(pDocShell, "no SwDocShell");
1341 if (pDocShell) {
1342 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1343 pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1344 uno::Reference<document::XDocumentProperties>
1345 xDocProps(xDPS->getDocumentProperties());
1346 OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1347 if (xDocProps.is()) {
1348 xDocProps->setDefaultTarget(
1349 rOption.GetString());
1350 }
1351 }
1352 }
1353 break;
1354 default: break;
1355 }
1356 }
1357 }
1358 break;
1359
1360 case HtmlTokenId::META:
1361 {
1362 SvKeyValueIterator *pHTTPHeader = nullptr;
1363 if( IsNewDoc() )
1364 {
1365 SwDocShell *pDocSh = m_xDoc->GetDocShell();
1366 if( pDocSh )
1367 pHTTPHeader = pDocSh->GetHeaderAttributes();
1368 }
1369 SwDocShell *pDocShell(m_xDoc->GetDocShell());
1370 OSL_ENSURE(pDocShell, "no SwDocShell");
1371 if (pDocShell)
1372 {
1373 uno::Reference<document::XDocumentProperties> xDocProps;
1374 if (IsNewDoc())
1375 {
1376 const uno::Reference<document::XDocumentPropertiesSupplier>
1377 xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1378 xDocProps = xDPS->getDocumentProperties();
1379 OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1380 }
1381 ParseMetaOptions( xDocProps, pHTTPHeader );
1382 }
1383 }
1384 break;
1385
1386 case HtmlTokenId::TITLE_ON:
1387 m_bInTitle = true;
1388 break;
1389
1390 case HtmlTokenId::SCRIPT_ON:
1391 NewScript();
1392 break;
1393
1394 case HtmlTokenId::SCRIPT_OFF:
1395 EndScript();
1396 break;
1397
1398 case HtmlTokenId::NOSCRIPT_ON:
1399 case HtmlTokenId::NOSCRIPT_OFF:
1400 bInsertUnknown = true;
1401 break;
1402
1403 case HtmlTokenId::STYLE_ON:
1404 NewStyle();
1405 break;
1406
1407 case HtmlTokenId::STYLE_OFF:
1408 EndStyle();
1409 break;
1410
1411 case HtmlTokenId::RAWDATA:
1412 if( !m_bIgnoreRawData )
1413 {
1414 if( IsReadScript() )
1415 {
1416 AddScriptSource();
1417 }
1418 else if( IsReadStyle() )
1419 {
1420 if( !m_aStyleSource.isEmpty() )
1421 m_aStyleSource += "\n";
1422 m_aStyleSource += aToken;
1423 }
1424 }
1425 break;
1426
1427 case HtmlTokenId::OBJECT_ON:
1428 if (m_bXHTML)
1429 {
1430 if (!InsertEmbed())
1431 InsertImage();
1432 break;
1433 }
1434 #if HAVE_FEATURE_JAVA
1435 NewObject();
1436 m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1437 #endif
1438 break;
1439
1440 case HtmlTokenId::OBJECT_OFF:
1441 if (!m_aEmbeds.empty())
1442 m_aEmbeds.pop();
1443 break;
1444
1445 case HtmlTokenId::APPLET_ON:
1446 #if HAVE_FEATURE_JAVA
1447 InsertApplet();
1448 m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1449 #endif
1450 break;
1451
1452 case HtmlTokenId::IFRAME_ON:
1453 InsertFloatingFrame();
1454 m_bCallNextToken = m_bInFloatingFrame && m_xTable;
1455 break;
1456
1457 case HtmlTokenId::LINEBREAK:
1458 if( !IsReadPRE() )
1459 {
1460 InsertLineBreak();
1461 break;
1462 }
1463 else
1464 bGetIDOption = true;
1465 // <BR>s in <PRE> resemble true LFs, hence no break
1466 [[fallthrough]];
1467
1468 case HtmlTokenId::NEWPARA:
1469 // CR in PRE/LISTING/XMP
1470 {
1471 if( HtmlTokenId::NEWPARA==nToken ||
1472 m_pPam->GetPoint()->nContent.GetIndex() )
1473 {
1474 AppendTextNode(); // there is no LF at this place
1475 // therefore it will cause no problems
1476 SetTextCollAttrs();
1477 }
1478 // progress bar
1479 if (m_xProgress)
1480 m_xProgress->Update(rInput.Tell());
1481 }
1482 break;
1483
1484 case HtmlTokenId::NONBREAKSPACE:
1485 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1486 break;
1487
1488 case HtmlTokenId::SOFTHYPH:
1489 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1490 break;
1491
1492 case HtmlTokenId::LINEFEEDCHAR:
1493 if( m_pPam->GetPoint()->nContent.GetIndex() )
1494 AppendTextNode();
1495 if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1496 {
1497 NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1498 EndAttr( m_xAttrTab->pBreak, false );
1499 }
1500 break;
1501
1502 case HtmlTokenId::TEXTTOKEN:
1503 // insert string without spanning attributes at the end.
1504 if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1505 {
1506 sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1507 const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1508 if (pTextNode)
1509 {
1510 const OUString& rText = pTextNode->GetText();
1511 sal_Unicode cLast = rText[--nPos];
1512 if( ' ' == cLast || '\x0a' == cLast)
1513 aToken = aToken.copy(1);
1514 }
1515 else
1516 aToken = aToken.copy(1);
1517
1518 if( aToken.isEmpty() )
1519 {
1520 m_bUpperSpace = bUpperSpaceSave;
1521 break;
1522 }
1523 }
1524
1525 if( !aToken.isEmpty() )
1526 {
1527 if( !m_bDocInitialized )
1528 DocumentDetected();
1529
1530 if (!m_aEmbeds.empty())
1531 {
1532 // The text token is inside an OLE object, which means
1533 // alternate text.
1534 SwOLENode* pOLENode = m_aEmbeds.top();
1535 if (SwFlyFrameFormat* pFormat
1536 = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1537 {
1538 if (SdrObject* pObject = SwXFrame::GetOrCreateSdrObject(*pFormat))
1539 {
1540 pObject->SetTitle(pObject->GetTitle() + aToken);
1541 break;
1542 }
1543 }
1544 }
1545
1546 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1547
1548 // if there are temporary paragraph attributes and the
1549 // paragraph isn't empty then the paragraph attributes
1550 // are final.
1551 m_aParaAttrs.clear();
1552
1553 SetAttr();
1554 }
1555 break;
1556
1557 case HtmlTokenId::HORZRULE:
1558 InsertHorzRule();
1559 break;
1560
1561 case HtmlTokenId::IMAGE:
1562 InsertImage();
1563 // if only the parser references the doc, we can break and set
1564 // an error code
1565 if( 1 == m_xDoc->getReferenceCount() )
1566 {
1567 eState = SvParserState::Error;
1568 }
1569 break;
1570
1571 case HtmlTokenId::SPACER:
1572 InsertSpacer();
1573 break;
1574
1575 case HtmlTokenId::EMBED:
1576 InsertEmbed();
1577 break;
1578
1579 case HtmlTokenId::NOEMBED_ON:
1580 m_bInNoEmbed = true;
1581 m_bCallNextToken = bool(m_xTable);
1582 ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1583 break;
1584
1585 case HtmlTokenId::DEFLIST_ON:
1586 if( m_nOpenParaToken != HtmlTokenId::NONE )
1587 EndPara();
1588 NewDefList();
1589 break;
1590 case HtmlTokenId::DEFLIST_OFF:
1591 if( m_nOpenParaToken != HtmlTokenId::NONE )
1592 EndPara();
1593 EndDefListItem( HtmlTokenId::NONE );
1594 EndDefList();
1595 break;
1596
1597 case HtmlTokenId::DD_ON:
1598 case HtmlTokenId::DT_ON:
1599 if( m_nOpenParaToken != HtmlTokenId::NONE )
1600 EndPara();
1601 EndDefListItem();// close <DD>/<DT> and set no template
1602 NewDefListItem( nToken );
1603 break;
1604
1605 case HtmlTokenId::DD_OFF:
1606 case HtmlTokenId::DT_OFF:
1607 // c.f. HtmlTokenId::LI_OFF
1608 // Actually we should close a DD/DT now.
1609 // But neither Netscape nor Microsoft do this and so don't we.
1610 EndDefListItem( nToken );
1611 break;
1612
1613 // divisions
1614 case HtmlTokenId::DIVISION_ON:
1615 case HtmlTokenId::CENTER_ON:
1616 if (!m_isInTableStructure)
1617 {
1618 if (m_nOpenParaToken != HtmlTokenId::NONE)
1619 {
1620 if (IsReadPRE())
1621 m_nOpenParaToken = HtmlTokenId::NONE;
1622 else
1623 EndPara();
1624 }
1625 NewDivision( nToken );
1626 }
1627 break;
1628
1629 case HtmlTokenId::DIVISION_OFF:
1630 case HtmlTokenId::CENTER_OFF:
1631 if (!m_isInTableStructure)
1632 {
1633 if (m_nOpenParaToken != HtmlTokenId::NONE)
1634 {
1635 if (IsReadPRE())
1636 m_nOpenParaToken = HtmlTokenId::NONE;
1637 else
1638 EndPara();
1639 }
1640 EndDivision();
1641 }
1642 break;
1643
1644 case HtmlTokenId::MULTICOL_ON:
1645 if( m_nOpenParaToken != HtmlTokenId::NONE )
1646 EndPara();
1647 NewMultiCol();
1648 break;
1649
1650 case HtmlTokenId::MULTICOL_OFF:
1651 if( m_nOpenParaToken != HtmlTokenId::NONE )
1652 EndPara();
1653 EndTag( HtmlTokenId::MULTICOL_ON );
1654 break;
1655
1656 case HtmlTokenId::MARQUEE_ON:
1657 NewMarquee();
1658 m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1659 break;
1660
1661 case HtmlTokenId::FORM_ON:
1662 NewForm();
1663 break;
1664 case HtmlTokenId::FORM_OFF:
1665 EndForm();
1666 break;
1667
1668 // templates
1669 case HtmlTokenId::PARABREAK_ON:
1670 if( m_nOpenParaToken != HtmlTokenId::NONE )
1671 EndPara( true );
1672 NewPara();
1673 break;
1674
1675 case HtmlTokenId::PARABREAK_OFF:
1676 EndPara( true );
1677 break;
1678
1679 case HtmlTokenId::ADDRESS_ON:
1680 if( m_nOpenParaToken != HtmlTokenId::NONE )
1681 EndPara();
1682 NewTextFormatColl(HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SEND_ADDRESS);
1683 break;
1684
1685 case HtmlTokenId::ADDRESS_OFF:
1686 if( m_nOpenParaToken != HtmlTokenId::NONE )
1687 EndPara();
1688 EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1689 break;
1690
1691 case HtmlTokenId::BLOCKQUOTE_ON:
1692 case HtmlTokenId::BLOCKQUOTE30_ON:
1693 if( m_nOpenParaToken != HtmlTokenId::NONE )
1694 EndPara();
1695 NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1696 break;
1697
1698 case HtmlTokenId::BLOCKQUOTE_OFF:
1699 case HtmlTokenId::BLOCKQUOTE30_OFF:
1700 if( m_nOpenParaToken != HtmlTokenId::NONE )
1701 EndPara();
1702 EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1703 break;
1704
1705 case HtmlTokenId::PREFORMTXT_ON:
1706 case HtmlTokenId::LISTING_ON:
1707 case HtmlTokenId::XMP_ON:
1708 if( m_nOpenParaToken != HtmlTokenId::NONE )
1709 EndPara();
1710 NewTextFormatColl( nToken, RES_POOLCOLL_HTML_PRE );
1711 break;
1712
1713 case HtmlTokenId::PREFORMTXT_OFF:
1714 m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1715 EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1716 break;
1717
1718 case HtmlTokenId::LISTING_OFF:
1719 case HtmlTokenId::XMP_OFF:
1720 EndTextFormatColl( nToken );
1721 break;
1722
1723 case HtmlTokenId::HEAD1_ON:
1724 case HtmlTokenId::HEAD2_ON:
1725 case HtmlTokenId::HEAD3_ON:
1726 case HtmlTokenId::HEAD4_ON:
1727 case HtmlTokenId::HEAD5_ON:
1728 case HtmlTokenId::HEAD6_ON:
1729 if( m_nOpenParaToken != HtmlTokenId::NONE )
1730 {
1731 if( IsReadPRE() )
1732 m_nOpenParaToken = HtmlTokenId::NONE;
1733 else
1734 EndPara();
1735 }
1736 NewHeading( nToken );
1737 break;
1738
1739 case HtmlTokenId::HEAD1_OFF:
1740 case HtmlTokenId::HEAD2_OFF:
1741 case HtmlTokenId::HEAD3_OFF:
1742 case HtmlTokenId::HEAD4_OFF:
1743 case HtmlTokenId::HEAD5_OFF:
1744 case HtmlTokenId::HEAD6_OFF:
1745 EndHeading();
1746 break;
1747
1748 case HtmlTokenId::TABLE_ON:
1749 if( !m_vPendingStack.empty() )
1750 BuildTable( SvxAdjust::End );
1751 else
1752 {
1753 if( m_nOpenParaToken != HtmlTokenId::NONE )
1754 EndPara();
1755 OSL_ENSURE(!m_xTable, "table in table not allowed here");
1756 if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1757 (m_pPam->GetPoint()->nNode.GetIndex() >
1758 m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1759 !m_pPam->GetNode().FindFootnoteStartNode() ) )
1760 {
1761 if ( m_nParaCnt < 5 )
1762 Show(); // show what we have up to here
1763
1764 SvxAdjust eAdjust = m_xAttrTab->pAdjust
1765 ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1766 GetAdjust()
1767 : SvxAdjust::End;
1768 BuildTable( eAdjust );
1769 }
1770 else
1771 bInsertUnknown = m_bKeepUnknown;
1772 }
1773 break;
1774
1775 // lists
1776 case HtmlTokenId::DIRLIST_ON:
1777 case HtmlTokenId::MENULIST_ON:
1778 case HtmlTokenId::ORDERLIST_ON:
1779 case HtmlTokenId::UNORDERLIST_ON:
1780 if( m_nOpenParaToken != HtmlTokenId::NONE )
1781 EndPara();
1782 NewNumberBulletList( nToken );
1783 break;
1784
1785 case HtmlTokenId::DIRLIST_OFF:
1786 case HtmlTokenId::MENULIST_OFF:
1787 case HtmlTokenId::ORDERLIST_OFF:
1788 case HtmlTokenId::UNORDERLIST_OFF:
1789 if( m_nOpenParaToken != HtmlTokenId::NONE )
1790 EndPara();
1791 EndNumberBulletListItem( HtmlTokenId::NONE, true );
1792 EndNumberBulletList( nToken );
1793 break;
1794
1795 case HtmlTokenId::LI_ON:
1796 case HtmlTokenId::LISTHEADER_ON:
1797 if( m_nOpenParaToken != HtmlTokenId::NONE &&
1798 (m_pPam->GetPoint()->nContent.GetIndex()
1799 || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1800 {
1801 // only finish paragraph for <P><LI>, not for <DD><LI>
1802 EndPara();
1803 }
1804
1805 EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1806 NewNumberBulletListItem( nToken );
1807 break;
1808
1809 case HtmlTokenId::LI_OFF:
1810 case HtmlTokenId::LISTHEADER_OFF:
1811 EndNumberBulletListItem( nToken, false );
1812 break;
1813
1814 // Attribute :
1815 case HtmlTokenId::ITALIC_ON:
1816 {
1817 SvxPostureItem aPosture( ITALIC_NORMAL, RES_CHRATR_POSTURE );
1818 SvxPostureItem aPostureCJK( ITALIC_NORMAL, RES_CHRATR_CJK_POSTURE );
1819 SvxPostureItem aPostureCTL( ITALIC_NORMAL, RES_CHRATR_CTL_POSTURE );
1820 NewStdAttr( HtmlTokenId::ITALIC_ON,
1821 &m_xAttrTab->pItalic, aPosture,
1822 &m_xAttrTab->pItalicCJK, &aPostureCJK,
1823 &m_xAttrTab->pItalicCTL, &aPostureCTL );
1824 }
1825 break;
1826
1827 case HtmlTokenId::BOLD_ON:
1828 {
1829 SvxWeightItem aWeight( WEIGHT_BOLD, RES_CHRATR_WEIGHT );
1830 SvxWeightItem aWeightCJK( WEIGHT_BOLD, RES_CHRATR_CJK_WEIGHT );
1831 SvxWeightItem aWeightCTL( WEIGHT_BOLD, RES_CHRATR_CTL_WEIGHT );
1832 NewStdAttr( HtmlTokenId::BOLD_ON,
1833 &m_xAttrTab->pBold, aWeight,
1834 &m_xAttrTab->pBoldCJK, &aWeightCJK,
1835 &m_xAttrTab->pBoldCTL, &aWeightCTL );
1836 }
1837 break;
1838
1839 case HtmlTokenId::STRIKE_ON:
1840 case HtmlTokenId::STRIKETHROUGH_ON:
1841 {
1842 NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1843 SvxCrossedOutItem(STRIKEOUT_SINGLE, RES_CHRATR_CROSSEDOUT) );
1844 }
1845 break;
1846
1847 case HtmlTokenId::UNDERLINE_ON:
1848 {
1849 NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1850 SvxUnderlineItem(LINESTYLE_SINGLE, RES_CHRATR_UNDERLINE) );
1851 }
1852 break;
1853
1854 case HtmlTokenId::SUPERSCRIPT_ON:
1855 {
1856 NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1857 SvxEscapementItem(HTML_ESC_SUPER,HTML_ESC_PROP, RES_CHRATR_ESCAPEMENT) );
1858 }
1859 break;
1860
1861 case HtmlTokenId::SUBSCRIPT_ON:
1862 {
1863 NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1864 SvxEscapementItem(HTML_ESC_SUB,HTML_ESC_PROP, RES_CHRATR_ESCAPEMENT) );
1865 }
1866 break;
1867
1868 case HtmlTokenId::BLINK_ON:
1869 {
1870 NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1871 SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1872 }
1873 break;
1874
1875 case HtmlTokenId::SPAN_ON:
1876 NewStdAttr( HtmlTokenId::SPAN_ON );
1877 break;
1878
1879 case HtmlTokenId::ITALIC_OFF:
1880 case HtmlTokenId::BOLD_OFF:
1881 case HtmlTokenId::STRIKE_OFF:
1882 case HtmlTokenId::UNDERLINE_OFF:
1883 case HtmlTokenId::SUPERSCRIPT_OFF:
1884 case HtmlTokenId::SUBSCRIPT_OFF:
1885 case HtmlTokenId::BLINK_OFF:
1886 case HtmlTokenId::SPAN_OFF:
1887 EndTag( nToken );
1888 break;
1889
1890 case HtmlTokenId::STRIKETHROUGH_OFF:
1891 EndTag( HtmlTokenId::STRIKE_OFF );
1892 break;
1893
1894 case HtmlTokenId::BASEFONT_ON:
1895 NewBasefontAttr();
1896 break;
1897 case HtmlTokenId::BASEFONT_OFF:
1898 EndBasefontAttr();
1899 break;
1900 case HtmlTokenId::FONT_ON:
1901 case HtmlTokenId::BIGPRINT_ON:
1902 case HtmlTokenId::SMALLPRINT_ON:
1903 NewFontAttr( nToken );
1904 break;
1905 case HtmlTokenId::FONT_OFF:
1906 case HtmlTokenId::BIGPRINT_OFF:
1907 case HtmlTokenId::SMALLPRINT_OFF:
1908 EndFontAttr( nToken );
1909 break;
1910
1911 case HtmlTokenId::EMPHASIS_ON:
1912 case HtmlTokenId::CITATION_ON:
1913 case HtmlTokenId::STRONG_ON:
1914 case HtmlTokenId::CODE_ON:
1915 case HtmlTokenId::SAMPLE_ON:
1916 case HtmlTokenId::KEYBOARD_ON:
1917 case HtmlTokenId::VARIABLE_ON:
1918 case HtmlTokenId::DEFINSTANCE_ON:
1919 case HtmlTokenId::SHORTQUOTE_ON:
1920 case HtmlTokenId::LANGUAGE_ON:
1921 case HtmlTokenId::AUTHOR_ON:
1922 case HtmlTokenId::PERSON_ON:
1923 case HtmlTokenId::ACRONYM_ON:
1924 case HtmlTokenId::ABBREVIATION_ON:
1925 case HtmlTokenId::INSERTEDTEXT_ON:
1926 case HtmlTokenId::DELETEDTEXT_ON:
1927
1928 case HtmlTokenId::TELETYPE_ON:
1929 NewCharFormat( nToken );
1930 break;
1931
1932 case HtmlTokenId::SDFIELD_ON:
1933 NewField();
1934 m_bCallNextToken = m_bInField && m_xTable;
1935 break;
1936
1937 case HtmlTokenId::EMPHASIS_OFF:
1938 case HtmlTokenId::CITATION_OFF:
1939 case HtmlTokenId::STRONG_OFF:
1940 case HtmlTokenId::CODE_OFF:
1941 case HtmlTokenId::SAMPLE_OFF:
1942 case HtmlTokenId::KEYBOARD_OFF:
1943 case HtmlTokenId::VARIABLE_OFF:
1944 case HtmlTokenId::DEFINSTANCE_OFF:
1945 case HtmlTokenId::SHORTQUOTE_OFF:
1946 case HtmlTokenId::LANGUAGE_OFF:
1947 case HtmlTokenId::AUTHOR_OFF:
1948 case HtmlTokenId::PERSON_OFF:
1949 case HtmlTokenId::ACRONYM_OFF:
1950 case HtmlTokenId::ABBREVIATION_OFF:
1951 case HtmlTokenId::INSERTEDTEXT_OFF:
1952 case HtmlTokenId::DELETEDTEXT_OFF:
1953
1954 case HtmlTokenId::TELETYPE_OFF:
1955 EndTag( nToken );
1956 break;
1957
1958 case HtmlTokenId::HEAD_OFF:
1959 if( !m_aStyleSource.isEmpty() )
1960 {
1961 m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1962 m_aStyleSource.clear();
1963 }
1964 break;
1965
1966 case HtmlTokenId::DOCTYPE:
1967 case HtmlTokenId::BODY_OFF:
1968 case HtmlTokenId::HTML_OFF:
1969 case HtmlTokenId::HEAD_ON:
1970 case HtmlTokenId::TITLE_OFF:
1971 break; // don't evaluate further???
1972 case HtmlTokenId::HTML_ON:
1973 {
1974 const HTMLOptions& rHTMLOptions = GetOptions();
1975 for (size_t i = rHTMLOptions.size(); i; )
1976 {
1977 const HTMLOption& rOption = rHTMLOptions[--i];
1978 if( HtmlOptionId::DIR == rOption.GetToken() )
1979 {
1980 const OUString& rDir = rOption.GetString();
1981 SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
1982 m_pCSS1Parser->GetWhichMap() );
1983 SvxCSS1PropertyInfo aPropInfo;
1984 OUString aDummy;
1985 ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
1986 aPropInfo, nullptr, &rDir );
1987
1988 m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
1989 break;
1990 }
1991 }
1992 }
1993 break;
1994
1995 case HtmlTokenId::INPUT:
1996 InsertInput();
1997 break;
1998
1999 case HtmlTokenId::TEXTAREA_ON:
2000 NewTextArea();
2001 m_bCallNextToken = m_bTextArea && m_xTable;
2002 break;
2003
2004 case HtmlTokenId::SELECT_ON:
2005 NewSelect();
2006 m_bCallNextToken = m_bSelect && m_xTable;
2007 break;
2008
2009 case HtmlTokenId::ANCHOR_ON:
2010 NewAnchor();
2011 break;
2012
2013 case HtmlTokenId::ANCHOR_OFF:
2014 EndAnchor();
2015 break;
2016
2017 case HtmlTokenId::COMMENT:
2018 if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2019 {
2020 // insert as Post-It
2021 // If there are no space characters right behind
2022 // the <!-- and on front of the -->, leave the comment untouched.
2023 if( ' ' == aToken[ 3 ] &&
2024 ' ' == aToken[ aToken.getLength()-3 ] )
2025 {
2026 OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2027 InsertComment(comphelper::string::strip(aComment, ' '));
2028 }
2029 else
2030 {
2031 OUString aComment = "<" + aToken + ">";
2032 InsertComment( aComment );
2033 }
2034 }
2035 break;
2036
2037 case HtmlTokenId::MAP_ON:
2038 // Image Maps are read asynchronously: At first only an image map is created
2039 // Areas are processed later. Nevertheless the
2040 // ImageMap is inserted into the IMap-Array, because it might be used
2041 // already.
2042 m_pImageMap = new ImageMap;
2043 if( ParseMapOptions( m_pImageMap) )
2044 {
2045 if (!m_pImageMaps)
2046 m_pImageMaps.reset( new ImageMaps );
2047 m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2048 }
2049 else
2050 {
2051 delete m_pImageMap;
2052 m_pImageMap = nullptr;
2053 }
2054 break;
2055
2056 case HtmlTokenId::MAP_OFF:
2057 // there is no ImageMap anymore (don't delete IMap, because it's
2058 // already contained in the array!)
2059 m_pImageMap = nullptr;
2060 break;
2061
2062 case HtmlTokenId::AREA:
2063 if( m_pImageMap )
2064 ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2065 SvMacroItemId::OnMouseOut );
2066 break;
2067
2068 case HtmlTokenId::FRAMESET_ON:
2069 bInsertUnknown = m_bKeepUnknown;
2070 break;
2071
2072 case HtmlTokenId::NOFRAMES_ON:
2073 if( IsInHeader() )
2074 FinishHeader();
2075 bInsertUnknown = m_bKeepUnknown;
2076 break;
2077
2078 case HtmlTokenId::UNKNOWNCONTROL_ON:
2079 // Ignore content of unknown token in the header, if the token
2080 // does not start with a '!'.
2081 // (but judging from the code, also if does not start with a '%')
2082 // (and also if we're not somewhere we consider PRE)
2083 if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2084 !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2085 '%' != sSaveToken[0] )
2086 m_aUnknownToken = sSaveToken;
2087 [[fallthrough]];
2088
2089 default:
2090 bInsertUnknown = m_bKeepUnknown;
2091 break;
2092 }
2093
2094 if( bGetIDOption )
2095 InsertIDOption();
2096
2097 if( bInsertUnknown )
2098 {
2099 OUStringBuffer aComment("HTML: <");
2100 if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2101 aComment.append("/");
2102 aComment.append(sSaveToken);
2103 if( !aToken.isEmpty() )
2104 {
2105 UnescapeToken();
2106 aComment.append(" " + aToken);
2107 }
2108 aComment.append(">");
2109 InsertComment( aComment.makeStringAndClear() );
2110 }
2111
2112 // if there are temporary paragraph attributes and the
2113 // paragraph isn't empty then the paragraph attributes are final.
2114 if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2115 m_aParaAttrs.clear();
2116 }
2117
lcl_swhtml_getItemInfo(const HTMLAttr & rAttr,bool & rScriptDependent,sal_uInt16 & rScriptType)2118 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2119 bool& rScriptDependent,
2120 sal_uInt16& rScriptType )
2121 {
2122 switch( rAttr.GetItem().Which() )
2123 {
2124 case RES_CHRATR_FONT:
2125 case RES_CHRATR_FONTSIZE:
2126 case RES_CHRATR_LANGUAGE:
2127 case RES_CHRATR_POSTURE:
2128 case RES_CHRATR_WEIGHT:
2129 rScriptType = i18n::ScriptType::LATIN;
2130 rScriptDependent = true;
2131 break;
2132 case RES_CHRATR_CJK_FONT:
2133 case RES_CHRATR_CJK_FONTSIZE:
2134 case RES_CHRATR_CJK_LANGUAGE:
2135 case RES_CHRATR_CJK_POSTURE:
2136 case RES_CHRATR_CJK_WEIGHT:
2137 rScriptType = i18n::ScriptType::ASIAN;
2138 rScriptDependent = true;
2139 break;
2140 case RES_CHRATR_CTL_FONT:
2141 case RES_CHRATR_CTL_FONTSIZE:
2142 case RES_CHRATR_CTL_LANGUAGE:
2143 case RES_CHRATR_CTL_POSTURE:
2144 case RES_CHRATR_CTL_WEIGHT:
2145 rScriptType = i18n::ScriptType::COMPLEX;
2146 rScriptDependent = true;
2147 break;
2148 default:
2149 rScriptDependent = false;
2150 break;
2151 }
2152 }
2153
AppendTextNode(SwHTMLAppendMode eMode,bool bUpdateNum)2154 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2155 {
2156 // A hard line break at the end always must be removed.
2157 // A second one we replace with paragraph spacing.
2158 sal_Int32 nLFStripped = StripTrailingLF();
2159 if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2160 eMode = AM_SPACE;
2161
2162 // the hard attributes of this paragraph will never be invalid again
2163 m_aParaAttrs.clear();
2164
2165 SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2166 m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2167
2168 if (pTextNode)
2169 {
2170 const SvxULSpaceItem& rULSpace =
2171 static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2172
2173 bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2174 : rULSpace.GetLower() == 0;
2175
2176 if( bChange )
2177 {
2178 const SvxULSpaceItem& rCollULSpace =
2179 pTextNode->GetAnyFormatColl().GetULSpace();
2180
2181 bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2182 : rCollULSpace.GetLower() > 0;
2183
2184 if( bMayReset &&
2185 rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2186 {
2187 pTextNode->ResetAttr( RES_UL_SPACE );
2188 }
2189 else
2190 {
2191 pTextNode->SetAttr(
2192 SvxULSpaceItem( rULSpace.GetUpper(),
2193 AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2194 }
2195 }
2196 }
2197 m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2198
2199 SwPosition aOldPos( *m_pPam->GetPoint() );
2200
2201 bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2202
2203 // split character attributes and maybe set none,
2204 // which are set for the whole paragraph
2205 const SwNodeIndex& rEndIdx = aOldPos.nNode;
2206 const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2207 const SwPosition& rPos = *m_pPam->GetPoint();
2208
2209 HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2210 for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2211 {
2212 HTMLAttr *pAttr = *pHTMLAttributes;
2213 if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2214 {
2215 bool bWholePara = false;
2216
2217 while( pAttr )
2218 {
2219 HTMLAttr *pNext = pAttr->GetNext();
2220 if( pAttr->GetStartParagraphIdx() < rEndIdx.GetIndex() ||
2221 (!bWholePara &&
2222 pAttr->GetStartParagraph() == rEndIdx &&
2223 pAttr->GetStartContent() != nEndCnt) )
2224 {
2225 bWholePara =
2226 pAttr->GetStartParagraph() == rEndIdx &&
2227 pAttr->GetStartContent() == 0;
2228
2229 sal_Int32 nStt = pAttr->m_nStartContent;
2230 bool bScript = false;
2231 sal_uInt16 nScriptItem;
2232 bool bInsert = true;
2233 lcl_swhtml_getItemInfo( *pAttr, bScript,
2234 nScriptItem );
2235 // set previous part
2236 if( bScript )
2237 {
2238 const SwTextNode *pTextNd =
2239 pAttr->GetStartParagraph().GetNode().GetTextNode();
2240 OSL_ENSURE( pTextNd, "No text node" );
2241 if( pTextNd )
2242 {
2243 const OUString& rText = pTextNd->GetText();
2244 sal_uInt16 nScriptText =
2245 g_pBreakIt->GetBreakIter()->getScriptType(
2246 rText, pAttr->GetStartContent() );
2247 sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2248 ->endOfScript( rText, nStt, nScriptText );
2249 while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2250 {
2251 if( nScriptItem == nScriptText )
2252 {
2253 HTMLAttr *pSetAttr =
2254 pAttr->Clone( rEndIdx, nScriptEnd );
2255 pSetAttr->m_nStartContent = nStt;
2256 pSetAttr->ClearPrev();
2257 if( !pNext || bWholePara )
2258 {
2259 if (pSetAttr->m_bInsAtStart)
2260 m_aSetAttrTab.push_front( pSetAttr );
2261 else
2262 m_aSetAttrTab.push_back( pSetAttr );
2263 }
2264 else
2265 pNext->InsertPrev( pSetAttr );
2266 }
2267 nStt = nScriptEnd;
2268 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2269 rText, nStt );
2270 nScriptEnd = g_pBreakIt->GetBreakIter()
2271 ->endOfScript( rText, nStt, nScriptText );
2272 }
2273 bInsert = nScriptItem == nScriptText;
2274 }
2275 }
2276 if( bInsert )
2277 {
2278 HTMLAttr *pSetAttr =
2279 pAttr->Clone( rEndIdx, nEndCnt );
2280 pSetAttr->m_nStartContent = nStt;
2281
2282 // When the attribute is for the whole paragraph, the outer
2283 // attributes aren't effective anymore. Hence it may not be inserted
2284 // in the Prev-List of an outer attribute, because that won't be
2285 // set. That leads to shifting when fields are used.
2286 if( !pNext || bWholePara )
2287 {
2288 if (pSetAttr->m_bInsAtStart)
2289 m_aSetAttrTab.push_front( pSetAttr );
2290 else
2291 m_aSetAttrTab.push_back( pSetAttr );
2292 }
2293 else
2294 pNext->InsertPrev( pSetAttr );
2295 }
2296 else
2297 {
2298 HTMLAttr *pPrev = pAttr->GetPrev();
2299 if( pPrev )
2300 {
2301 // the previous attributes must be set anyway
2302 if( !pNext || bWholePara )
2303 {
2304 if (pPrev->m_bInsAtStart)
2305 m_aSetAttrTab.push_front( pPrev );
2306 else
2307 m_aSetAttrTab.push_back( pPrev );
2308 }
2309 else
2310 pNext->InsertPrev( pPrev );
2311 }
2312 }
2313 pAttr->ClearPrev();
2314 }
2315
2316 pAttr->SetStart( rPos );
2317 pAttr = pNext;
2318 }
2319 }
2320 }
2321
2322 if( bUpdateNum )
2323 {
2324 if( GetNumInfo().GetDepth() )
2325 {
2326 sal_uInt8 nLvl = GetNumInfo().GetLevel();
2327 SetNodeNum( nLvl );
2328 }
2329 else
2330 m_pPam->GetNode().GetTextNode()->ResetAttr( RES_PARATR_NUMRULE );
2331 }
2332
2333 // We must set the attribute of the paragraph before now (because of JavaScript)
2334 SetAttr();
2335
2336 // Now it is time to get rid of all script dependent hints that are
2337 // equal to the settings in the style
2338 SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2339 OSL_ENSURE( pTextNd, "There is the txt node" );
2340 size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2341 ? pTextNd->GetSwpHints().Count() : 0;
2342 if( nCntAttr )
2343 {
2344 // These are the end position of all script dependent hints.
2345 // If we find a hint that starts before the current end position,
2346 // we have to set it. If we find a hint that start behind or at
2347 // that position, we have to take the hint value into account.
2348 // If it is equal to the style, or in fact the paragraph value
2349 // for that hint, the hint is removed. Otherwise its end position
2350 // is remembered.
2351 sal_Int32 aEndPos[15] =
2352 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2353 SwpHints& rHints = pTextNd->GetSwpHints();
2354 for( size_t i=0; i < nCntAttr; i++ )
2355 {
2356 SwTextAttr *pHt = rHints.Get( i );
2357 sal_uInt16 nWhich = pHt->Which();
2358 sal_Int16 nIdx = 0;
2359 bool bFont = false;
2360 switch( nWhich )
2361 {
2362 case RES_CHRATR_FONT:
2363 nIdx = 0;
2364 bFont = true;
2365 break;
2366 case RES_CHRATR_FONTSIZE:
2367 nIdx = 1;
2368 break;
2369 case RES_CHRATR_LANGUAGE:
2370 nIdx = 2;
2371 break;
2372 case RES_CHRATR_POSTURE:
2373 nIdx = 3;
2374 break;
2375 case RES_CHRATR_WEIGHT:
2376 nIdx = 4;
2377 break;
2378 case RES_CHRATR_CJK_FONT:
2379 nIdx = 5;
2380 bFont = true;
2381 break;
2382 case RES_CHRATR_CJK_FONTSIZE:
2383 nIdx = 6;
2384 break;
2385 case RES_CHRATR_CJK_LANGUAGE:
2386 nIdx = 7;
2387 break;
2388 case RES_CHRATR_CJK_POSTURE:
2389 nIdx = 8;
2390 break;
2391 case RES_CHRATR_CJK_WEIGHT:
2392 nIdx = 9;
2393 break;
2394 case RES_CHRATR_CTL_FONT:
2395 nIdx = 10;
2396 bFont = true;
2397 break;
2398 case RES_CHRATR_CTL_FONTSIZE:
2399 nIdx = 11;
2400 break;
2401 case RES_CHRATR_CTL_LANGUAGE:
2402 nIdx = 12;
2403 break;
2404 case RES_CHRATR_CTL_POSTURE:
2405 nIdx = 13;
2406 break;
2407 case RES_CHRATR_CTL_WEIGHT:
2408 nIdx = 14;
2409 break;
2410 default:
2411 // Skip to next attribute
2412 continue;
2413 }
2414 const sal_Int32 nStt = pHt->GetStart();
2415 if( nStt >= aEndPos[nIdx] )
2416 {
2417 const SfxPoolItem& rItem =
2418 static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2419 if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2420 : rItem == pHt->GetAttr() )
2421 {
2422 // The hint is the same as set in the paragraph and
2423 // therefore, it can be deleted
2424 // CAUTION!!! This WILL delete the hint and it MAY
2425 // also delete the SwpHints!!! To avoid any trouble
2426 // we leave the loop immediately if this is the last
2427 // hint.
2428 pTextNd->DeleteAttribute( pHt );
2429 if( 1 == nCntAttr )
2430 break;
2431 i--;
2432 nCntAttr--;
2433 }
2434 else
2435 {
2436 // The hint is different. Therefore all hints within that
2437 // hint have to be ignored.
2438 aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2439 }
2440 }
2441 else
2442 {
2443 // The hint starts before another one ends.
2444 // The hint in this case is not deleted
2445 OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2446 "hints aren't nested properly!" );
2447 }
2448 }
2449 }
2450
2451 if (!m_xTable && !--m_nParaCnt)
2452 Show();
2453
2454 return bRet;
2455 }
2456
AddParSpace()2457 void SwHTMLParser::AddParSpace()
2458 {
2459 //If it already has ParSpace, return
2460 if( !m_bNoParSpace )
2461 return;
2462
2463 m_bNoParSpace = false;
2464
2465 sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2466
2467 SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2468 if( !pTextNode )
2469 return;
2470
2471 SvxULSpaceItem rULSpace =
2472 static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2473 if( rULSpace.GetLower() )
2474 return;
2475
2476 const SvxULSpaceItem& rCollULSpace =
2477 pTextNode->GetAnyFormatColl().GetULSpace();
2478 if( rCollULSpace.GetLower() &&
2479 rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2480 {
2481 pTextNode->ResetAttr( RES_UL_SPACE );
2482 }
2483 else
2484 {
2485 //What I do here, is that I examine the attributes, and if
2486 //I find out, that it's CJK/CTL, then I set the paragraph space
2487 //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2488
2489 bool bIsCJK = false;
2490 bool bIsCTL = false;
2491
2492 const size_t nCntAttr = pTextNode->GetpSwpHints()
2493 ? pTextNode->GetSwpHints().Count() : 0;
2494
2495 for(size_t i = 0; i < nCntAttr; ++i)
2496 {
2497 SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2498 sal_uInt16 const nWhich = pHt->Which();
2499 if (RES_CHRATR_CJK_FONT == nWhich ||
2500 RES_CHRATR_CJK_FONTSIZE == nWhich ||
2501 RES_CHRATR_CJK_LANGUAGE == nWhich ||
2502 RES_CHRATR_CJK_POSTURE == nWhich ||
2503 RES_CHRATR_CJK_WEIGHT == nWhich)
2504 {
2505 bIsCJK = true;
2506 break;
2507 }
2508 if (RES_CHRATR_CTL_FONT == nWhich ||
2509 RES_CHRATR_CTL_FONTSIZE == nWhich ||
2510 RES_CHRATR_CTL_LANGUAGE == nWhich ||
2511 RES_CHRATR_CTL_POSTURE == nWhich ||
2512 RES_CHRATR_CTL_WEIGHT == nWhich)
2513 {
2514 bIsCTL = true;
2515 break;
2516 }
2517 }
2518
2519 if( bIsCTL )
2520 {
2521 pTextNode->SetAttr(
2522 SvxULSpaceItem( rULSpace.GetUpper(), HTML_CTL_PARSPACE, RES_UL_SPACE ) );
2523 }
2524 else if( bIsCJK )
2525 {
2526 pTextNode->SetAttr(
2527 SvxULSpaceItem( rULSpace.GetUpper(), HTML_CJK_PARSPACE, RES_UL_SPACE ) );
2528 } else {
2529 pTextNode->SetAttr(
2530 SvxULSpaceItem( rULSpace.GetUpper(), HTML_PARSPACE, RES_UL_SPACE ) );
2531 }
2532 }
2533 }
2534
Show()2535 void SwHTMLParser::Show()
2536 {
2537 // Here
2538 // - a EndAction is called, so the document is formatted
2539 // - a Reschedule is called,
2540 // - the own View-Shell is set again
2541 // - and a StartAction is called
2542
2543 OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2544 SwViewShell *pOldVSh = CallEndAction();
2545
2546 Application::Reschedule();
2547
2548 if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2549 || 1 == m_xDoc->getReferenceCount() )
2550 {
2551 // was the import aborted by SFX?
2552 eState = SvParserState::Error;
2553 }
2554
2555 // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2556 SwViewShell *pVSh = CallStartAction( pOldVSh );
2557
2558 // is the current node not visible anymore, then we use a bigger increment
2559 if( pVSh )
2560 {
2561 m_nParaCnt = (m_pPam->GetPoint()->nNode.GetNode().IsInVisibleArea(pVSh))
2562 ? 5 : 50;
2563 }
2564 }
2565
ShowStatline()2566 void SwHTMLParser::ShowStatline()
2567 {
2568 // Here
2569 // - a Reschedule is called, so it can be scrolled
2570 // - the own View-Shell is set again
2571 // - a StartAction/EndAction is called, when there was scrolling.
2572
2573 OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2574
2575 // scroll bar
2576 if (m_xProgress)
2577 {
2578 m_xProgress->Update(rInput.Tell());
2579 CheckActionViewShell();
2580 }
2581 else
2582 {
2583 Application::Reschedule();
2584
2585 if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2586 || 1 == m_xDoc->getReferenceCount() )
2587 // was the import aborted by SFX?
2588 eState = SvParserState::Error;
2589
2590 SwViewShell *pVSh = CheckActionViewShell();
2591 if( pVSh && pVSh->HasInvalidRect() )
2592 {
2593 CallEndAction( false, false );
2594 CallStartAction( pVSh, false );
2595 }
2596 }
2597 }
2598
CallStartAction(SwViewShell * pVSh,bool bChkPtr)2599 SwViewShell *SwHTMLParser::CallStartAction( SwViewShell *pVSh, bool bChkPtr )
2600 {
2601 OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2602
2603 if( !pVSh || bChkPtr )
2604 {
2605 #if OSL_DEBUG_LEVEL > 0
2606 SwViewShell *pOldVSh = pVSh;
2607 #endif
2608 pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2609 #if OSL_DEBUG_LEVEL > 0
2610 OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2611 if( pOldVSh && !pVSh )
2612 pVSh = nullptr;
2613 #endif
2614 }
2615 m_pActionViewShell = pVSh;
2616
2617 if( m_pActionViewShell )
2618 {
2619 if( auto pEditShell = dynamic_cast< SwEditShell *>( m_pActionViewShell ) )
2620 pEditShell->StartAction();
2621 else
2622 m_pActionViewShell->StartAction();
2623 }
2624
2625 return m_pActionViewShell;
2626 }
2627
CallEndAction(bool bChkAction,bool bChkPtr)2628 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2629 {
2630 if( bChkPtr )
2631 {
2632 SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2633 OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2634 "CallEndAction: Who swapped the SwViewShell?" );
2635 #if OSL_DEBUG_LEVEL > 0
2636 if( m_pActionViewShell && !pVSh )
2637 pVSh = nullptr;
2638 #endif
2639 if( pVSh != m_pActionViewShell )
2640 m_pActionViewShell = nullptr;
2641 }
2642
2643 if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2644 return m_pActionViewShell;
2645
2646 if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr )
2647 {
2648 // Already scrolled?, then make sure that the view doesn't move!
2649 const bool bOldLock = m_pActionViewShell->IsViewLocked();
2650 m_pActionViewShell->LockView( true );
2651 const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2652 m_pActionViewShell->SetEndActionByVirDev( true );
2653 static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2654 m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2655 m_pActionViewShell->LockView( bOldLock );
2656
2657 // bChkJumpMark is only set when the object was also found
2658 if( m_bChkJumpMark )
2659 {
2660 const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2661 if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2662 ::JumpToSwMark( m_pActionViewShell,
2663 GetMedium()->GetURLObject().GetMark() );
2664 m_bChkJumpMark = false;
2665 }
2666 }
2667 else
2668 m_pActionViewShell->EndAction();
2669
2670 // if the parser holds the last reference to the document, then we can
2671 // abort here and set an error.
2672 if( 1 == m_xDoc->getReferenceCount() )
2673 {
2674 eState = SvParserState::Error;
2675 }
2676
2677 SwViewShell *pVSh = m_pActionViewShell;
2678 m_pActionViewShell = nullptr;
2679
2680 return pVSh;
2681 }
2682
CheckActionViewShell()2683 SwViewShell *SwHTMLParser::CheckActionViewShell()
2684 {
2685 SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2686 OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2687 "CheckActionViewShell: Who has swapped SwViewShell?" );
2688 #if OSL_DEBUG_LEVEL > 0
2689 if( m_pActionViewShell && !pVSh )
2690 pVSh = nullptr;
2691 #endif
2692 if( pVSh != m_pActionViewShell )
2693 m_pActionViewShell = nullptr;
2694
2695 return m_pActionViewShell;
2696 }
2697
SwHTMLFrameFormatListener(SwFrameFormat * pFrameFormat)2698 SwHTMLFrameFormatListener::SwHTMLFrameFormatListener(SwFrameFormat* pFrameFormat)
2699 : m_pFrameFormat(pFrameFormat)
2700 {
2701 StartListening(m_pFrameFormat->GetNotifier());
2702 }
2703
Notify(const SfxHint & rHint)2704 void SwHTMLFrameFormatListener::Notify(const SfxHint& rHint)
2705 {
2706 if (rHint.GetId() == SfxHintId::Dying)
2707 m_pFrameFormat = nullptr;
2708 }
2709
SetAttr_(bool bChkEnd,bool bBeforeTable,std::deque<std::unique_ptr<HTMLAttr>> * pPostIts)2710 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2711 std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2712 {
2713 SwPaM aAttrPam( *m_pPam->GetPoint() );
2714 const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2715 const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2716 HTMLAttr* pAttr;
2717 SwContentNode* pCNd;
2718
2719 std::vector<std::unique_ptr<HTMLAttr>> aFields;
2720
2721 for( auto n = m_aSetAttrTab.size(); n; )
2722 {
2723 pAttr = m_aSetAttrTab[ --n ];
2724 sal_uInt16 nWhich = pAttr->m_pItem->Which();
2725
2726 sal_uLong nEndParaIdx = pAttr->GetEndParagraphIdx();
2727 bool bSetAttr;
2728 if( bChkEnd )
2729 {
2730 // Set character attribute with end early on, so set them still in
2731 // the current paragraph (because of JavaScript and various "chats"(?)).
2732 // This shouldn't be done for attributes which are used for
2733 // the whole paragraph, because they could be from a paragraph style
2734 // which can't be set. Because the attributes are inserted with
2735 // SETATTR_DONTREPLACE, they should be able to be set later.
2736 bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2737 (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2738 ( !pAttr->IsLikePara() &&
2739 nEndParaIdx == rEndIdx.GetIndex() &&
2740 pAttr->GetEndContent() < nEndCnt &&
2741 (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2742 ( bBeforeTable &&
2743 nEndParaIdx == rEndIdx.GetIndex() &&
2744 !pAttr->GetEndContent() );
2745 }
2746 else
2747 {
2748 // Attributes in body nodes array section shouldn't be set if we are in a
2749 // special nodes array section, but vice versa it's possible.
2750 sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2751 bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2752 rEndIdx.GetIndex() > nEndOfIcons ||
2753 nEndParaIdx <= nEndOfIcons;
2754 }
2755
2756 if( bSetAttr )
2757 {
2758 // The attribute shouldn't be in the list of temporary paragraph
2759 // attributes, because then it would be deleted.
2760 while( !m_aParaAttrs.empty() )
2761 {
2762 OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2763 "SetAttr: Attribute must not yet be set" );
2764 m_aParaAttrs.pop_back();
2765 }
2766
2767 // then set it
2768 m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2769
2770 while( pAttr )
2771 {
2772 HTMLAttr *pPrev = pAttr->GetPrev();
2773 if( !pAttr->m_bValid )
2774 {
2775 // invalid attributes can be deleted
2776 delete pAttr;
2777 pAttr = pPrev;
2778 continue;
2779 }
2780
2781 pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2782 if( !pCNd )
2783 {
2784 // because of the awful deleting of nodes an index can also
2785 // point to an end node :-(
2786 if ( (pAttr->GetStartParagraph() == pAttr->GetEndParagraph()) &&
2787 !isTXTATR_NOEND(nWhich) )
2788 {
2789 // when the end index also points to the node, we don't
2790 // need to set attributes anymore, except if it's a text attribute.
2791 delete pAttr;
2792 pAttr = pPrev;
2793 continue;
2794 }
2795 pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2796 if( pCNd )
2797 pAttr->m_nStartContent = 0;
2798 else
2799 {
2800 OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2801 delete pAttr;
2802 pAttr = pPrev;
2803 continue;
2804 }
2805 }
2806 aAttrPam.GetPoint()->nNode = pAttr->m_nStartPara;
2807
2808 // because of the deleting of BRs the start index can also
2809 // point behind the end the text
2810 if( pAttr->m_nStartContent > pCNd->Len() )
2811 pAttr->m_nStartContent = pCNd->Len();
2812 aAttrPam.GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2813
2814 aAttrPam.SetMark();
2815 if ( (pAttr->GetStartParagraph() != pAttr->GetEndParagraph()) &&
2816 !isTXTATR_NOEND(nWhich) )
2817 {
2818 pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2819 if( !pCNd )
2820 {
2821 pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2822 if( pCNd )
2823 pAttr->m_nEndContent = pCNd->Len();
2824 else
2825 {
2826 OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2827 aAttrPam.DeleteMark();
2828 delete pAttr;
2829 pAttr = pPrev;
2830 continue;
2831 }
2832 }
2833
2834 aAttrPam.GetPoint()->nNode = pAttr->m_nEndPara;
2835 }
2836 else if( pAttr->IsLikePara() )
2837 {
2838 pAttr->m_nEndContent = pCNd->Len();
2839 }
2840
2841 // because of the deleting of BRs the start index can also
2842 // point behind the end the text
2843 if( pAttr->m_nEndContent > pCNd->Len() )
2844 pAttr->m_nEndContent = pCNd->Len();
2845
2846 aAttrPam.GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2847 if( bBeforeTable &&
2848 aAttrPam.GetPoint()->nNode.GetIndex() ==
2849 rEndIdx.GetIndex() )
2850 {
2851 // If we're before inserting a table and the attribute ends
2852 // in the current node, then we must end it in the previous
2853 // node or discard it, if it starts in that node.
2854 if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2855 !isTXTATR_NOEND(nWhich) )
2856 {
2857 if( aAttrPam.GetMark()->nNode.GetIndex() !=
2858 rEndIdx.GetIndex() )
2859 {
2860 OSL_ENSURE( !aAttrPam.GetPoint()->nContent.GetIndex(),
2861 "Content-Position before table not 0???" );
2862 aAttrPam.Move( fnMoveBackward );
2863 }
2864 else
2865 {
2866 aAttrPam.DeleteMark();
2867 delete pAttr;
2868 pAttr = pPrev;
2869 continue;
2870 }
2871 }
2872 }
2873
2874 switch( nWhich )
2875 {
2876 case RES_FLTR_BOOKMARK: // insert bookmark
2877 {
2878 const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2879 IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2880 IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2881 if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2882 (*ppBkmk)->GetMarkStart() == *aAttrPam.GetPoint() )
2883 break; // do not generate duplicates on this position
2884 aAttrPam.DeleteMark();
2885 const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2886 aAttrPam,
2887 sName,
2888 IDocumentMarkAccess::MarkType::BOOKMARK,
2889 ::sw::mark::InsertMode::New);
2890
2891 // jump to bookmark
2892 if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2893 {
2894 m_bChkJumpMark = true;
2895 m_eJumpTo = JumpToMarks::NONE;
2896 }
2897 }
2898 break;
2899 case RES_TXTATR_FIELD:
2900 case RES_TXTATR_ANNOTATION:
2901 case RES_TXTATR_INPUTFIELD:
2902 {
2903 SwFieldIds nFieldWhich =
2904 pPostIts
2905 ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2906 : SwFieldIds::Database;
2907 if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2908 SwFieldIds::Script == nFieldWhich) )
2909 {
2910 pPostIts->emplace_front( pAttr );
2911 }
2912 else
2913 {
2914 aFields.emplace_back( pAttr);
2915 }
2916 }
2917 aAttrPam.DeleteMark();
2918 pAttr = pPrev;
2919 continue;
2920
2921 case RES_LR_SPACE:
2922 if( aAttrPam.GetPoint()->nNode.GetIndex() ==
2923 aAttrPam.GetMark()->nNode.GetIndex())
2924 {
2925 // because of numbering set this attribute directly at node
2926 pCNd->SetAttr( *pAttr->m_pItem );
2927 break;
2928 }
2929 OSL_ENSURE( false,
2930 "LRSpace set over multiple paragraphs!" );
2931 [[fallthrough]]; // (shouldn't reach this point anyway)
2932
2933 // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2934 // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2935 // This is the right place in the future if the adapted fill attributes
2936 // may be handled more directly in HTML import to handle them.
2937 case RES_BACKGROUND:
2938 {
2939 const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2940 SfxItemSet aNewSet(m_xDoc->GetAttrPool(), svl::Items<XATTR_FILL_FIRST, XATTR_FILL_LAST>{});
2941
2942 setSvxBrushItemAsFillAttributesToTargetSet(rBrush, aNewSet);
2943 m_xDoc->getIDocumentContentOperations().InsertItemSet(aAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2944 break;
2945 }
2946 default:
2947
2948 // maybe jump to a bookmark
2949 if( RES_TXTATR_INETFMT == nWhich &&
2950 JumpToMarks::Mark == m_eJumpTo &&
2951 m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2952 {
2953 m_bChkJumpMark = true;
2954 m_eJumpTo = JumpToMarks::NONE;
2955 }
2956
2957 m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2958 }
2959 aAttrPam.DeleteMark();
2960
2961 delete pAttr;
2962 pAttr = pPrev;
2963 }
2964 }
2965 }
2966
2967 for( auto n = m_aMoveFlyFrames.size(); n; )
2968 {
2969 SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[--n]->GetFrameFormat();
2970 if (!pFrameFormat)
2971 {
2972 SAL_WARN("sw.html", "SwFrameFormat deleted during import");
2973 m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
2974 m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
2975 continue;
2976 }
2977
2978 const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
2979 OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
2980 "Only At-Para flys need special handling" );
2981 const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
2982 sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
2983 bool bMoveFly;
2984 if( bChkEnd )
2985 {
2986 bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2987 ( nFlyParaIdx == rEndIdx.GetIndex() &&
2988 m_aMoveFlyCnts[n] < nEndCnt );
2989 }
2990 else
2991 {
2992 sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2993 bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2994 rEndIdx.GetIndex() > nEndOfIcons ||
2995 nFlyParaIdx <= nEndOfIcons;
2996 }
2997 if( bMoveFly )
2998 {
2999 pFrameFormat->DelFrames();
3000 *aAttrPam.GetPoint() = *pFlyPos;
3001 aAttrPam.GetPoint()->nContent.Assign( aAttrPam.GetContentNode(),
3002 m_aMoveFlyCnts[n] );
3003 SwFormatAnchor aAnchor( rAnchor );
3004 aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
3005 aAnchor.SetAnchor( aAttrPam.GetPoint() );
3006 pFrameFormat->SetFormatAttr( aAnchor );
3007
3008 const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
3009 if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
3010 {
3011 SwFormatHoriOrient aHoriOri( rHoriOri );
3012 aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
3013 pFrameFormat->SetFormatAttr( aHoriOri );
3014 }
3015 const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
3016 if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
3017 {
3018 SwFormatVertOrient aVertOri( rVertOri );
3019 aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
3020 pFrameFormat->SetFormatAttr( aVertOri );
3021 }
3022
3023 pFrameFormat->MakeFrames();
3024 m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3025 m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3026 }
3027 }
3028 for (auto & field : aFields)
3029 {
3030 pCNd = field->m_nStartPara.GetNode().GetContentNode();
3031 aAttrPam.GetPoint()->nNode = field->m_nStartPara;
3032 aAttrPam.GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
3033
3034 if( bBeforeTable &&
3035 aAttrPam.GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3036 {
3037 OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3038 OSL_ENSURE( !aAttrPam.GetPoint()->nContent.GetIndex(),
3039 "Content-Position before table not 0???" );
3040 // !!!
3041 aAttrPam.Move( fnMoveBackward );
3042 }
3043
3044 m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *field->m_pItem );
3045
3046 field.reset();
3047 }
3048 aFields.clear();
3049 }
3050
NewAttr(const std::shared_ptr<HTMLAttrTable> & rAttrTable,HTMLAttr ** ppAttr,const SfxPoolItem & rItem)3051 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3052 {
3053 // Font height and font colour as well as escape attributes may not be
3054 // combined. Therefore they're saved in a list and in it the last opened
3055 // attribute is at the beginning and count is always one. For all other
3056 // attributes count is just incremented.
3057 if( *ppAttr )
3058 {
3059 HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3060 pAttr->InsertNext( *ppAttr );
3061 (*ppAttr) = pAttr;
3062 }
3063 else
3064 (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3065 }
3066
EndAttr(HTMLAttr * pAttr,bool bChkEmpty)3067 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3068 {
3069 bool bRet = true;
3070
3071 // The list header is saved in the attribute.
3072 HTMLAttr **ppHead = pAttr->m_ppHead;
3073
3074 OSL_ENSURE( ppHead, "No list header attribute found!" );
3075
3076 // save the current position as end position
3077 const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3078 sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3079
3080 // Is the last started or an earlier started attribute being ended?
3081 HTMLAttr *pLast = nullptr;
3082 if( ppHead && pAttr != *ppHead )
3083 {
3084 // The last started attribute isn't being ended
3085
3086 // Then we look for attribute which was started immediately afterwards,
3087 // which has also not yet been ended (otherwise it would no longer be
3088 // in the list).
3089 pLast = *ppHead;
3090 while( pLast && pLast->GetNext() != pAttr )
3091 pLast = pLast->GetNext();
3092
3093 OSL_ENSURE( pLast, "Attribute not found in own list!" );
3094 }
3095
3096 bool bMoveBack = false;
3097 sal_uInt16 nWhich = pAttr->m_pItem->Which();
3098 if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3099 *pEndIdx != pAttr->GetStartParagraph() )
3100 {
3101 // Then move back one position in the content!
3102 bMoveBack = m_pPam->Move( fnMoveBackward );
3103 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3104 }
3105
3106 // now end the attribute
3107 HTMLAttr *pNext = pAttr->GetNext();
3108
3109 bool bInsert;
3110 sal_uInt16 nScriptItem = 0;
3111 bool bScript = false;
3112 // does it have a non-empty range?
3113 if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3114 RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3115 *pEndIdx != pAttr->GetStartParagraph() ||
3116 nEndCnt != pAttr->GetStartContent() )
3117 {
3118 bInsert = true;
3119 // We do some optimization for script dependent attributes here.
3120 if( *pEndIdx == pAttr->GetStartParagraph() )
3121 {
3122 lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3123 }
3124 }
3125 else
3126 {
3127 bInsert = false;
3128 }
3129
3130 const SwTextNode *pTextNd = (bInsert && bScript) ?
3131 pAttr->GetStartParagraph().GetNode().GetTextNode() :
3132 nullptr;
3133
3134 if (pTextNd)
3135 {
3136 const OUString& rText = pTextNd->GetText();
3137 sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3138 rText, pAttr->GetStartContent() );
3139 sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3140 ->endOfScript( rText, pAttr->GetStartContent(), nScriptText );
3141 while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3142 {
3143 if( nScriptItem == nScriptText )
3144 {
3145 HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3146 pSetAttr->ClearPrev();
3147 if( pNext )
3148 pNext->InsertPrev( pSetAttr );
3149 else
3150 {
3151 if (pSetAttr->m_bInsAtStart)
3152 m_aSetAttrTab.push_front( pSetAttr );
3153 else
3154 m_aSetAttrTab.push_back( pSetAttr );
3155 }
3156 }
3157 pAttr->m_nStartContent = nScriptEnd;
3158 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3159 rText, nScriptEnd );
3160 nScriptEnd = g_pBreakIt->GetBreakIter()
3161 ->endOfScript( rText, nScriptEnd, nScriptText );
3162 }
3163 bInsert = nScriptItem == nScriptText;
3164 }
3165 if( bInsert )
3166 {
3167 pAttr->m_nEndPara = *pEndIdx;
3168 pAttr->m_nEndContent = nEndCnt;
3169 pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3170 RES_TXTATR_CHARFMT != nWhich;
3171
3172 if( !pNext )
3173 {
3174 // No open attributes of that type exists any longer, so all
3175 // can be set. Except they depend on another attribute, then
3176 // they're appended there.
3177 if (pAttr->m_bInsAtStart)
3178 m_aSetAttrTab.push_front( pAttr );
3179 else
3180 m_aSetAttrTab.push_back( pAttr );
3181 }
3182 else
3183 {
3184 // There are other open attributes of that type,
3185 // therefore the setting must be postponed.
3186 // Hence the current attribute is added at the end
3187 // of the Prev-List of the successor.
3188 pNext->InsertPrev( pAttr );
3189 }
3190 }
3191 else
3192 {
3193 // Then don't insert, but delete. Because of the "faking" of styles
3194 // by hard attributing there can be also other empty attributes in the
3195 // Prev-List, which must be set anyway.
3196 HTMLAttr *pPrev = pAttr->GetPrev();
3197 bRet = false;
3198 delete pAttr;
3199
3200 if( pPrev )
3201 {
3202 // The previous attributes must be set anyway.
3203 if( pNext )
3204 pNext->InsertPrev( pPrev );
3205 else
3206 {
3207 if (pPrev->m_bInsAtStart)
3208 m_aSetAttrTab.push_front( pPrev );
3209 else
3210 m_aSetAttrTab.push_back( pPrev );
3211 }
3212 }
3213
3214 }
3215
3216 // If the first attribute of the list was set, then the list header
3217 // must be corrected as well.
3218 if( pLast )
3219 pLast->m_pNext = pNext;
3220 else if( ppHead )
3221 *ppHead = pNext;
3222
3223 if( bMoveBack )
3224 m_pPam->Move( fnMoveForward );
3225
3226 return bRet;
3227 }
3228
DeleteAttr(HTMLAttr * pAttr)3229 void SwHTMLParser::DeleteAttr( HTMLAttr* pAttr )
3230 {
3231 // preliminary paragraph attributes are not allowed here, they could
3232 // be set here and then the pointers become invalid!
3233 OSL_ENSURE(m_aParaAttrs.empty(),
3234 "Danger: there are non-final paragraph attributes");
3235 m_aParaAttrs.clear();
3236
3237 // The list header is saved in the attribute
3238 HTMLAttr **ppHead = pAttr->m_ppHead;
3239
3240 OSL_ENSURE( ppHead, "no list header attribute found!" );
3241
3242 // Is the last started or an earlier started attribute being removed?
3243 HTMLAttr *pLast = nullptr;
3244 if( ppHead && pAttr != *ppHead )
3245 {
3246 // The last started attribute isn't being ended
3247
3248 // Then we look for attribute which was started immediately afterwards,
3249 // which has also not yet been ended (otherwise it would no longer be
3250 // in the list).
3251 pLast = *ppHead;
3252 while( pLast && pLast->GetNext() != pAttr )
3253 pLast = pLast->GetNext();
3254
3255 OSL_ENSURE( pLast, "Attribute not found in own list!" );
3256 }
3257
3258 // now delete the attribute
3259 HTMLAttr *pNext = pAttr->GetNext();
3260 HTMLAttr *pPrev = pAttr->GetPrev();
3261 //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3262 std::shared_ptr<HTMLAttrTable> xKeepAlive(pAttr->m_xAttrTab);
3263 delete pAttr;
3264
3265 if( pPrev )
3266 {
3267 // The previous attributes must be set anyway.
3268 if( pNext )
3269 pNext->InsertPrev( pPrev );
3270 else
3271 {
3272 if (pPrev->m_bInsAtStart)
3273 m_aSetAttrTab.push_front( pPrev );
3274 else
3275 m_aSetAttrTab.push_back( pPrev );
3276 }
3277 }
3278
3279 // If the first attribute of the list was deleted, then the list header
3280 // must be corrected as well.
3281 if( pLast )
3282 pLast->m_pNext = pNext;
3283 else if( ppHead )
3284 *ppHead = pNext;
3285 }
3286
SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)3287 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3288 {
3289 // preliminary paragraph attributes are not allowed here, they could
3290 // be set here and then the pointers become invalid!
3291 OSL_ENSURE(m_aParaAttrs.empty(),
3292 "Danger: there are non-final paragraph attributes");
3293 m_aParaAttrs.clear();
3294
3295 HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3296 HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3297
3298 for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3299 {
3300 *pSaveAttributes = *pHTMLAttributes;
3301
3302 HTMLAttr *pAttr = *pSaveAttributes;
3303 while (pAttr)
3304 {
3305 pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3306 pAttr = pAttr->GetNext();
3307 }
3308
3309 *pHTMLAttributes = nullptr;
3310 }
3311 }
3312
SplitAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,bool bMoveEndBack)3313 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3314 bool bMoveEndBack )
3315 {
3316 // preliminary paragraph attributes are not allowed here, they could
3317 // be set here and then the pointers become invalid!
3318 OSL_ENSURE(m_aParaAttrs.empty(),
3319 "Danger: there are non-final paragraph attributes");
3320 m_aParaAttrs.clear();
3321
3322 const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3323 SwNodeIndex nEndIdx( nSttIdx );
3324
3325 // close all still open attributes and re-open them after the table
3326 HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3327 HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3328 bool bSetAttr = true;
3329 const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3330 sal_Int32 nEndCnt = nSttCnt;
3331
3332 if( bMoveEndBack )
3333 {
3334 sal_uLong nOldEnd = nEndIdx.GetIndex();
3335 sal_uLong nTmpIdx;
3336 if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3337 ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3338 {
3339 nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3340 }
3341 SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3342
3343 // Don't set attributes, when the PaM was moved outside of the content area.
3344 bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3345
3346 nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3347 }
3348 for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3349 {
3350 HTMLAttr *pAttr = *pHTMLAttributes;
3351 *pSaveAttributes = nullptr;
3352 while( pAttr )
3353 {
3354 HTMLAttr *pNext = pAttr->GetNext();
3355 HTMLAttr *pPrev = pAttr->GetPrev();
3356
3357 if( bSetAttr &&
3358 ( pAttr->GetStartParagraphIdx() < nEndIdx.GetIndex() ||
3359 (pAttr->GetStartParagraph() == nEndIdx &&
3360 pAttr->GetStartContent() != nEndCnt) ) )
3361 {
3362 // The attribute must be set before the list. We need the
3363 // original and therefore we clone it, because pointer to the
3364 // attribute exist in the other contexts. The Next-List is lost
3365 // in doing so, but the Previous-List is preserved.
3366 HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3367
3368 if( pNext )
3369 pNext->InsertPrev( pSetAttr );
3370 else
3371 {
3372 if (pSetAttr->m_bInsAtStart)
3373 m_aSetAttrTab.push_front( pSetAttr );
3374 else
3375 m_aSetAttrTab.push_back( pSetAttr );
3376 }
3377 }
3378 else if( pPrev )
3379 {
3380 // If the attribute doesn't need to be set before the table, then
3381 // the previous attributes must still be set.
3382 if( pNext )
3383 pNext->InsertPrev( pPrev );
3384 else
3385 {
3386 if (pPrev->m_bInsAtStart)
3387 m_aSetAttrTab.push_front( pPrev );
3388 else
3389 m_aSetAttrTab.push_back( pPrev );
3390 }
3391 }
3392
3393 // set the start of the attribute anew and break link
3394 pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3395
3396 if (*pSaveAttributes)
3397 {
3398 HTMLAttr *pSAttr = *pSaveAttributes;
3399 while( pSAttr->GetNext() )
3400 pSAttr = pSAttr->GetNext();
3401 pSAttr->InsertNext( pAttr );
3402 }
3403 else
3404 *pSaveAttributes = pAttr;
3405
3406 pAttr = pNext;
3407 }
3408
3409 *pHTMLAttributes = nullptr;
3410 }
3411 }
3412
RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)3413 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3414 {
3415 // preliminary paragraph attributes are not allowed here, they could
3416 // be set here and then the pointers become invalid!
3417 OSL_ENSURE(m_aParaAttrs.empty(),
3418 "Danger: there are non-final paragraph attributes");
3419 m_aParaAttrs.clear();
3420
3421 HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3422 HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3423
3424 for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3425 {
3426 OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3427
3428 *pHTMLAttributes = *pSaveAttributes;
3429
3430 HTMLAttr *pAttr = *pHTMLAttributes;
3431 while (pAttr)
3432 {
3433 OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3434 "Previous attribute has still a header" );
3435 pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3436 pAttr = pAttr->GetNext();
3437 }
3438
3439 *pSaveAttributes = nullptr;
3440 }
3441 }
3442
InsertAttr(const SfxPoolItem & rItem,bool bInsAtStart)3443 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3444 {
3445 HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3446 if (bInsAtStart)
3447 m_aSetAttrTab.push_front( pTmp );
3448 else
3449 m_aSetAttrTab.push_back( pTmp );
3450 }
3451
InsertAttrs(std::deque<std::unique_ptr<HTMLAttr>> rAttrs)3452 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3453 {
3454 while( !rAttrs.empty() )
3455 {
3456 std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3457 InsertAttr( pAttr->GetItem(), false );
3458 rAttrs.pop_front();
3459 }
3460 }
3461
NewStdAttr(HtmlTokenId nToken)3462 void SwHTMLParser::NewStdAttr( HtmlTokenId nToken )
3463 {
3464 OUString aId, aStyle, aLang, aDir;
3465 OUString aClass;
3466
3467 const HTMLOptions& rHTMLOptions = GetOptions();
3468 for (size_t i = rHTMLOptions.size(); i; )
3469 {
3470 const HTMLOption& rOption = rHTMLOptions[--i];
3471 switch( rOption.GetToken() )
3472 {
3473 case HtmlOptionId::ID:
3474 aId = rOption.GetString();
3475 break;
3476 case HtmlOptionId::STYLE:
3477 aStyle = rOption.GetString();
3478 break;
3479 case HtmlOptionId::CLASS:
3480 aClass = rOption.GetString();
3481 break;
3482 case HtmlOptionId::LANG:
3483 aLang = rOption.GetString();
3484 break;
3485 case HtmlOptionId::DIR:
3486 aDir = rOption.GetString();
3487 break;
3488 default: break;
3489 }
3490 }
3491
3492 // create a new context
3493 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3494
3495 // parse styles
3496 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3497 {
3498 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3499 SvxCSS1PropertyInfo aPropInfo;
3500
3501 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3502 {
3503 if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3504 !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3505 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3506 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3507 }
3508 }
3509
3510 // save the context
3511 PushContext(xCntxt);
3512 }
3513
NewStdAttr(HtmlTokenId nToken,HTMLAttr ** ppAttr,const SfxPoolItem & rItem,HTMLAttr ** ppAttr2,const SfxPoolItem * pItem2,HTMLAttr ** ppAttr3,const SfxPoolItem * pItem3)3514 void SwHTMLParser::NewStdAttr( HtmlTokenId nToken,
3515 HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3516 HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3517 HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3518 {
3519 OUString aId, aStyle, aClass, aLang, aDir;
3520
3521 const HTMLOptions& rHTMLOptions = GetOptions();
3522 for (size_t i = rHTMLOptions.size(); i; )
3523 {
3524 const HTMLOption& rOption = rHTMLOptions[--i];
3525 switch( rOption.GetToken() )
3526 {
3527 case HtmlOptionId::ID:
3528 aId = rOption.GetString();
3529 break;
3530 case HtmlOptionId::STYLE:
3531 aStyle = rOption.GetString();
3532 break;
3533 case HtmlOptionId::CLASS:
3534 aClass = rOption.GetString();
3535 break;
3536 case HtmlOptionId::LANG:
3537 aLang = rOption.GetString();
3538 break;
3539 case HtmlOptionId::DIR:
3540 aDir = rOption.GetString();
3541 break;
3542 default: break;
3543 }
3544 }
3545
3546 // create a new context
3547 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3548
3549 // parse styles
3550 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3551 {
3552 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3553 SvxCSS1PropertyInfo aPropInfo;
3554
3555 aItemSet.Put( rItem );
3556 if( pItem2 )
3557 aItemSet.Put( *pItem2 );
3558 if( pItem3 )
3559 aItemSet.Put( *pItem3 );
3560
3561 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3562 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3563
3564 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3565 }
3566 else
3567 {
3568 InsertAttr( ppAttr ,rItem, xCntxt.get() );
3569 if( pItem2 )
3570 {
3571 OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3572 InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3573 }
3574 if( pItem3 )
3575 {
3576 OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3577 InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3578 }
3579 }
3580
3581 // save the context
3582 PushContext(xCntxt);
3583 }
3584
EndTag(HtmlTokenId nToken)3585 void SwHTMLParser::EndTag( HtmlTokenId nToken )
3586 {
3587 // fetch context
3588 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3589 if (xCntxt)
3590 {
3591 // and maybe end the attributes
3592 EndContext(xCntxt.get());
3593 }
3594 }
3595
NewBasefontAttr()3596 void SwHTMLParser::NewBasefontAttr()
3597 {
3598 OUString aId, aStyle, aClass, aLang, aDir;
3599 sal_uInt16 nSize = 3;
3600
3601 const HTMLOptions& rHTMLOptions = GetOptions();
3602 for (size_t i = rHTMLOptions.size(); i; )
3603 {
3604 const HTMLOption& rOption = rHTMLOptions[--i];
3605 switch( rOption.GetToken() )
3606 {
3607 case HtmlOptionId::SIZE:
3608 nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
3609 break;
3610 case HtmlOptionId::ID:
3611 aId = rOption.GetString();
3612 break;
3613 case HtmlOptionId::STYLE:
3614 aStyle = rOption.GetString();
3615 break;
3616 case HtmlOptionId::CLASS:
3617 aClass = rOption.GetString();
3618 break;
3619 case HtmlOptionId::LANG:
3620 aLang = rOption.GetString();
3621 break;
3622 case HtmlOptionId::DIR:
3623 aDir = rOption.GetString();
3624 break;
3625 default: break;
3626 }
3627 }
3628
3629 if( nSize < 1 )
3630 nSize = 1;
3631
3632 if( nSize > 7 )
3633 nSize = 7;
3634
3635 // create a new context
3636 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3637
3638 // parse styles
3639 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3640 {
3641 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3642 SvxCSS1PropertyInfo aPropInfo;
3643
3644 //CJK has different defaults
3645 SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3646 aItemSet.Put( aFontHeight );
3647 SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3648 aItemSet.Put( aFontHeightCJK );
3649 //Complex type can contain so many types of letters,
3650 //that it's not really worthy to bother, IMO.
3651 //Still, I have set a default.
3652 SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3653 aItemSet.Put( aFontHeightCTL );
3654
3655 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3656 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3657
3658 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3659 }
3660 else
3661 {
3662 SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3663 InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3664 SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3665 InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3666 SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3667 InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3668 }
3669
3670 // save the context
3671 PushContext(xCntxt);
3672
3673 // save the font size
3674 m_aBaseFontStack.push_back( nSize );
3675 }
3676
EndBasefontAttr()3677 void SwHTMLParser::EndBasefontAttr()
3678 {
3679 EndTag( HtmlTokenId::BASEFONT_ON );
3680
3681 // avoid stack underflow in tables
3682 if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3683 m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3684 }
3685
NewFontAttr(HtmlTokenId nToken)3686 void SwHTMLParser::NewFontAttr( HtmlTokenId nToken )
3687 {
3688 sal_uInt16 nBaseSize =
3689 ( m_aBaseFontStack.size() > m_nBaseFontStMin
3690 ? (m_aBaseFontStack[m_aBaseFontStack.size()-1] & FONTSIZE_MASK)
3691 : 3 );
3692 sal_uInt16 nFontSize =
3693 ( m_aFontStack.size() > m_nFontStMin
3694 ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3695 : nBaseSize );
3696
3697 OUString aFace, aId, aStyle, aClass, aLang, aDir;
3698 Color aColor;
3699 sal_uLong nFontHeight = 0; // actual font height to set
3700 sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3701 bool bColor = false;
3702
3703 const HTMLOptions& rHTMLOptions = GetOptions();
3704 for (size_t i = rHTMLOptions.size(); i; )
3705 {
3706 const HTMLOption& rOption = rHTMLOptions[--i];
3707 switch( rOption.GetToken() )
3708 {
3709 case HtmlOptionId::SIZE:
3710 if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3711 {
3712 sal_Int32 nSSize;
3713 if( '+' == rOption.GetString()[0] ||
3714 '-' == rOption.GetString()[0] )
3715 nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3716 else
3717 nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3718
3719 if( nSSize < 1 )
3720 nSSize = 1;
3721 else if( nSSize > 7 )
3722 nSSize = 7;
3723
3724 nSize = o3tl::narrowing<sal_uInt16>(nSSize);
3725 nFontHeight = m_aFontHeights[nSize-1];
3726 }
3727 break;
3728 case HtmlOptionId::COLOR:
3729 if( HtmlTokenId::FONT_ON==nToken )
3730 {
3731 rOption.GetColor( aColor );
3732 bColor = true;
3733 }
3734 break;
3735 case HtmlOptionId::FACE:
3736 if( HtmlTokenId::FONT_ON==nToken )
3737 aFace = rOption.GetString();
3738 break;
3739 case HtmlOptionId::ID:
3740 aId = rOption.GetString();
3741 break;
3742 case HtmlOptionId::STYLE:
3743 aStyle = rOption.GetString();
3744 break;
3745 case HtmlOptionId::CLASS:
3746 aClass = rOption.GetString();
3747 break;
3748 case HtmlOptionId::LANG:
3749 aLang = rOption.GetString();
3750 break;
3751 case HtmlOptionId::DIR:
3752 aDir = rOption.GetString();
3753 break;
3754 default: break;
3755 }
3756 }
3757
3758 if( HtmlTokenId::FONT_ON != nToken )
3759 {
3760 // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3761
3762 // In headings the current heading sets the font height
3763 // and not BASEFONT.
3764 const SwFormatColl *pColl = GetCurrFormatColl();
3765 sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3766 if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3767 nPoolId<=RES_POOLCOLL_HEADLINE6 )
3768 {
3769 // If the font height in the heading wasn't changed yet,
3770 // then take the one from the style.
3771 if( m_nFontStHeadStart==m_aFontStack.size() )
3772 nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3773 }
3774 else
3775 nPoolId = 0;
3776
3777 if( HtmlTokenId::BIGPRINT_ON == nToken )
3778 nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3779 else
3780 nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3781
3782 // If possible in headlines we fetch the new font height
3783 // from the style.
3784 if( nPoolId && nSize>=1 && nSize <=6 )
3785 nFontHeight =
3786 m_pCSS1Parser->GetTextCollFromPool(
3787 RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3788 else
3789 nFontHeight = m_aFontHeights[nSize-1];
3790 }
3791
3792 OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3793
3794 OUString aFontName;
3795 const OUString aStyleName;
3796 FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3797 FontPitch ePitch = PITCH_DONTKNOW; // if not found
3798 rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3799
3800 if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3801 {
3802 const FontList *pFList = nullptr;
3803 SwDocShell *pDocSh = m_xDoc->GetDocShell();
3804 if( pDocSh )
3805 {
3806 const SvxFontListItem *pFListItem =
3807 static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3808 if( pFListItem )
3809 pFList = pFListItem->GetFontList();
3810 }
3811
3812 bool bFound = false;
3813 sal_Int32 nStrPos = 0;
3814 while( nStrPos!= -1 )
3815 {
3816 OUString aFName = aFace.getToken( 0, ',', nStrPos );
3817 aFName = comphelper::string::strip(aFName, ' ');
3818 if( !aFName.isEmpty() )
3819 {
3820 if( !bFound && pFList )
3821 {
3822 sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3823 if( nullptr != hFont )
3824 {
3825 const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3826 if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3827 {
3828 bFound = true;
3829 if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3830 eEnc = RTL_TEXTENCODING_SYMBOL;
3831 }
3832 }
3833 }
3834 if( !aFontName.isEmpty() )
3835 aFontName += ";";
3836 aFontName += aFName;
3837 }
3838 }
3839 }
3840
3841 // create a new context
3842 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3843
3844 // parse styles
3845 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3846 {
3847 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3848 SvxCSS1PropertyInfo aPropInfo;
3849
3850 if( nFontHeight )
3851 {
3852 SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3853 aItemSet.Put( aFontHeight );
3854 SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3855 aItemSet.Put( aFontHeightCJK );
3856 SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3857 aItemSet.Put( aFontHeightCTL );
3858 }
3859 if( bColor )
3860 aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3861 if( !aFontName.isEmpty() )
3862 {
3863 SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3864 aItemSet.Put( aFont );
3865 SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3866 aItemSet.Put( aFontCJK );
3867 SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3868 aItemSet.Put( aFontCTL );
3869 }
3870
3871 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3872 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3873
3874 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3875 }
3876 else
3877 {
3878 if( nFontHeight )
3879 {
3880 SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3881 InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3882 SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3883 InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3884 SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3885 InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3886 }
3887 if( bColor )
3888 InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3889 if( !aFontName.isEmpty() )
3890 {
3891 SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3892 InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3893 SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3894 InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3895 SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3896 InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3897 }
3898 }
3899
3900 // save the context
3901 PushContext(xCntxt);
3902
3903 m_aFontStack.push_back( nSize );
3904 }
3905
EndFontAttr(HtmlTokenId nToken)3906 void SwHTMLParser::EndFontAttr( HtmlTokenId nToken )
3907 {
3908 EndTag( nToken );
3909
3910 // avoid stack underflow in tables
3911 if( m_aFontStack.size() > m_nFontStMin )
3912 m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3913 }
3914
NewPara()3915 void SwHTMLParser::NewPara()
3916 {
3917 if( m_pPam->GetPoint()->nContent.GetIndex() )
3918 AppendTextNode( AM_SPACE );
3919 else
3920 AddParSpace();
3921
3922 m_eParaAdjust = SvxAdjust::End;
3923 OUString aId, aStyle, aClass, aLang, aDir;
3924
3925 const HTMLOptions& rHTMLOptions = GetOptions();
3926 for (size_t i = rHTMLOptions.size(); i; )
3927 {
3928 const HTMLOption& rOption = rHTMLOptions[--i];
3929 switch( rOption.GetToken() )
3930 {
3931 case HtmlOptionId::ID:
3932 aId = rOption.GetString();
3933 break;
3934 case HtmlOptionId::ALIGN:
3935 m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3936 break;
3937 case HtmlOptionId::STYLE:
3938 aStyle = rOption.GetString();
3939 break;
3940 case HtmlOptionId::CLASS:
3941 aClass = rOption.GetString();
3942 break;
3943 case HtmlOptionId::LANG:
3944 aLang = rOption.GetString();
3945 break;
3946 case HtmlOptionId::DIR:
3947 aDir = rOption.GetString();
3948 break;
3949 default: break;
3950 }
3951 }
3952
3953 // create a new context
3954 std::unique_ptr<HTMLAttrContext> xCntxt(
3955 !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3956 RES_POOLCOLL_TEXT, aClass )
3957 : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3958
3959 // parse styles (Don't consider class. This is only possible as long as none of
3960 // the CSS1 properties of the class must be formatted hard!!!)
3961 if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
3962 {
3963 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3964 SvxCSS1PropertyInfo aPropInfo;
3965
3966 if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3967 {
3968 OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
3969 "Class is not considered" );
3970 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3971 InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
3972 }
3973 }
3974
3975 if( SvxAdjust::End != m_eParaAdjust )
3976 InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
3977
3978 // and push on stack
3979 PushContext( xCntxt );
3980
3981 // set the current style or its attributes
3982 SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
3983
3984 // progress bar
3985 ShowStatline();
3986
3987 OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
3988 m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
3989 }
3990
EndPara(bool bReal)3991 void SwHTMLParser::EndPara( bool bReal )
3992 {
3993 if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
3994 {
3995 #if OSL_DEBUG_LEVEL > 0
3996 const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
3997 OSL_ENSURE( pNumRule, "Where is the NumRule" );
3998 #endif
3999 }
4000
4001 // Netscape skips empty paragraphs, we do the same.
4002 if( bReal )
4003 {
4004 if( m_pPam->GetPoint()->nContent.GetIndex() )
4005 AppendTextNode( AM_SPACE );
4006 else
4007 AddParSpace();
4008 }
4009
4010 // If a DD or DT was open, it's an implied definition list,
4011 // which must be closed now.
4012 if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
4013 m_nDefListDeep)
4014 {
4015 m_nDefListDeep--;
4016 }
4017
4018 // Pop the context of the stack. It can also be from an
4019 // implied opened definition list.
4020 std::unique_ptr<HTMLAttrContext> xCntxt(
4021 PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
4022
4023 // close attribute
4024 if (xCntxt)
4025 {
4026 EndContext(xCntxt.get());
4027 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4028 xCntxt.reset();
4029 }
4030
4031 // reset the existing style
4032 if( bReal )
4033 SetTextCollAttrs();
4034
4035 m_nOpenParaToken = HtmlTokenId::NONE;
4036 }
4037
NewHeading(HtmlTokenId nToken)4038 void SwHTMLParser::NewHeading( HtmlTokenId nToken )
4039 {
4040 m_eParaAdjust = SvxAdjust::End;
4041
4042 OUString aId, aStyle, aClass, aLang, aDir;
4043
4044 const HTMLOptions& rHTMLOptions = GetOptions();
4045 for (size_t i = rHTMLOptions.size(); i; )
4046 {
4047 const HTMLOption& rOption = rHTMLOptions[--i];
4048 switch( rOption.GetToken() )
4049 {
4050 case HtmlOptionId::ID:
4051 aId = rOption.GetString();
4052 break;
4053 case HtmlOptionId::ALIGN:
4054 m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4055 break;
4056 case HtmlOptionId::STYLE:
4057 aStyle = rOption.GetString();
4058 break;
4059 case HtmlOptionId::CLASS:
4060 aClass = rOption.GetString();
4061 break;
4062 case HtmlOptionId::LANG:
4063 aLang = rOption.GetString();
4064 break;
4065 case HtmlOptionId::DIR:
4066 aDir = rOption.GetString();
4067 break;
4068 default: break;
4069 }
4070 }
4071
4072 // open a new paragraph
4073 if( m_pPam->GetPoint()->nContent.GetIndex() )
4074 AppendTextNode( AM_SPACE );
4075 else
4076 AddParSpace();
4077
4078 // search for the matching style
4079 sal_uInt16 nTextColl;
4080 switch( nToken )
4081 {
4082 case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4083 case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4084 case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4085 case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4086 case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4087 case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4088 default: nTextColl = RES_POOLCOLL_STANDARD; break;
4089 }
4090
4091 // create the context
4092 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4093
4094 // parse styles (regarding class see also NewPara)
4095 if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4096 {
4097 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4098 SvxCSS1PropertyInfo aPropInfo;
4099
4100 if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4101 {
4102 OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4103 "Class is not considered" );
4104 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4105 InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4106 }
4107 }
4108
4109 if( SvxAdjust::End != m_eParaAdjust )
4110 InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4111
4112 // and push on stack
4113 PushContext(xCntxt);
4114
4115 // set the current style or its attributes
4116 SetTextCollAttrs(m_aContexts.back().get());
4117
4118 m_nFontStHeadStart = m_aFontStack.size();
4119
4120 // progress bar
4121 ShowStatline();
4122 }
4123
EndHeading()4124 void SwHTMLParser::EndHeading()
4125 {
4126 // open a new paragraph
4127 if( m_pPam->GetPoint()->nContent.GetIndex() )
4128 AppendTextNode( AM_SPACE );
4129 else
4130 AddParSpace();
4131
4132 // search context matching the token and fetch it from stack
4133 std::unique_ptr<HTMLAttrContext> xCntxt;
4134 auto nPos = m_aContexts.size();
4135 while( !xCntxt && nPos>m_nContextStMin )
4136 {
4137 switch( m_aContexts[--nPos]->GetToken() )
4138 {
4139 case HtmlTokenId::HEAD1_ON:
4140 case HtmlTokenId::HEAD2_ON:
4141 case HtmlTokenId::HEAD3_ON:
4142 case HtmlTokenId::HEAD4_ON:
4143 case HtmlTokenId::HEAD5_ON:
4144 case HtmlTokenId::HEAD6_ON:
4145 xCntxt = std::move(m_aContexts[nPos]);
4146 m_aContexts.erase( m_aContexts.begin() + nPos );
4147 break;
4148 default: break;
4149 }
4150 }
4151
4152 // and now end attributes
4153 if (xCntxt)
4154 {
4155 EndContext(xCntxt.get());
4156 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4157 xCntxt.reset();
4158 }
4159
4160 // reset existing style
4161 SetTextCollAttrs();
4162
4163 m_nFontStHeadStart = m_nFontStMin;
4164 }
4165
NewTextFormatColl(HtmlTokenId nToken,sal_uInt16 nColl)4166 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4167 {
4168 OUString aId, aStyle, aClass, aLang, aDir;
4169
4170 const HTMLOptions& rHTMLOptions = GetOptions();
4171 for (size_t i = rHTMLOptions.size(); i; )
4172 {
4173 const HTMLOption& rOption = rHTMLOptions[--i];
4174 switch( rOption.GetToken() )
4175 {
4176 case HtmlOptionId::ID:
4177 aId = rOption.GetString();
4178 break;
4179 case HtmlOptionId::STYLE:
4180 aStyle = rOption.GetString();
4181 break;
4182 case HtmlOptionId::CLASS:
4183 aClass = rOption.GetString();
4184 break;
4185 case HtmlOptionId::LANG:
4186 aLang = rOption.GetString();
4187 break;
4188 case HtmlOptionId::DIR:
4189 aDir = rOption.GetString();
4190 break;
4191 default: break;
4192 }
4193 }
4194
4195 // open a new paragraph
4196 SwHTMLAppendMode eMode = AM_NORMAL;
4197 switch( nToken )
4198 {
4199 case HtmlTokenId::LISTING_ON:
4200 case HtmlTokenId::XMP_ON:
4201 // These both tags will be mapped to the PRE style. For the case that a
4202 // a CLASS exists we will delete it so that we don't get the CLASS of
4203 // the PRE style.
4204 aClass.clear();
4205 [[fallthrough]];
4206 case HtmlTokenId::BLOCKQUOTE_ON:
4207 case HtmlTokenId::BLOCKQUOTE30_ON:
4208 case HtmlTokenId::PREFORMTXT_ON:
4209 eMode = AM_SPACE;
4210 break;
4211 case HtmlTokenId::ADDRESS_ON:
4212 eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4213 break;
4214 case HtmlTokenId::DT_ON:
4215 case HtmlTokenId::DD_ON:
4216 eMode = AM_SOFTNOSPACE;
4217 break;
4218 default:
4219 OSL_ENSURE( false, "unknown style" );
4220 break;
4221 }
4222 if( m_pPam->GetPoint()->nContent.GetIndex() )
4223 AppendTextNode( eMode );
4224 else if( AM_SPACE==eMode )
4225 AddParSpace();
4226
4227 // ... and save in a context
4228 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4229
4230 // parse styles (regarding class see also NewPara)
4231 if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4232 {
4233 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4234 SvxCSS1PropertyInfo aPropInfo;
4235
4236 if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4237 {
4238 OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4239 "Class is not considered" );
4240 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4241 InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4242 }
4243 }
4244
4245 PushContext(xCntxt);
4246
4247 // set the new style
4248 SetTextCollAttrs(m_aContexts.back().get());
4249
4250 // update progress bar
4251 ShowStatline();
4252 }
4253
EndTextFormatColl(HtmlTokenId nToken)4254 void SwHTMLParser::EndTextFormatColl( HtmlTokenId nToken )
4255 {
4256 SwHTMLAppendMode eMode = AM_NORMAL;
4257 switch( getOnToken(nToken) )
4258 {
4259 case HtmlTokenId::BLOCKQUOTE_ON:
4260 case HtmlTokenId::BLOCKQUOTE30_ON:
4261 case HtmlTokenId::PREFORMTXT_ON:
4262 case HtmlTokenId::LISTING_ON:
4263 case HtmlTokenId::XMP_ON:
4264 eMode = AM_SPACE;
4265 break;
4266 case HtmlTokenId::ADDRESS_ON:
4267 case HtmlTokenId::DT_ON:
4268 case HtmlTokenId::DD_ON:
4269 eMode = AM_SOFTNOSPACE;
4270 break;
4271 default:
4272 OSL_ENSURE( false, "unknown style" );
4273 break;
4274 }
4275 if( m_pPam->GetPoint()->nContent.GetIndex() )
4276 AppendTextNode( eMode );
4277 else if( AM_SPACE==eMode )
4278 AddParSpace();
4279
4280 // pop current context of stack
4281 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4282
4283 // and now end attributes
4284 if (xCntxt)
4285 {
4286 EndContext(xCntxt.get());
4287 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4288 xCntxt.reset();
4289 }
4290
4291 // reset existing style
4292 SetTextCollAttrs();
4293 }
4294
NewDefList()4295 void SwHTMLParser::NewDefList()
4296 {
4297 OUString aId, aStyle, aClass, aLang, aDir;
4298
4299 const HTMLOptions& rHTMLOptions = GetOptions();
4300 for (size_t i = rHTMLOptions.size(); i; )
4301 {
4302 const HTMLOption& rOption = rHTMLOptions[--i];
4303 switch( rOption.GetToken() )
4304 {
4305 case HtmlOptionId::ID:
4306 aId = rOption.GetString();
4307 break;
4308 case HtmlOptionId::STYLE:
4309 aStyle = rOption.GetString();
4310 break;
4311 case HtmlOptionId::CLASS:
4312 aClass = rOption.GetString();
4313 break;
4314 case HtmlOptionId::LANG:
4315 aLang = rOption.GetString();
4316 break;
4317 case HtmlOptionId::DIR:
4318 aDir = rOption.GetString();
4319 break;
4320 default: break;
4321 }
4322 }
4323
4324 // open a new paragraph
4325 bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4326 if( m_pPam->GetPoint()->nContent.GetIndex() )
4327 AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4328 else if( bSpace )
4329 AddParSpace();
4330
4331 // one level more
4332 m_nDefListDeep++;
4333
4334 bool bInDD = false, bNotInDD = false;
4335 auto nPos = m_aContexts.size();
4336 while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4337 {
4338 HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4339 switch( nCntxtToken )
4340 {
4341 case HtmlTokenId::DEFLIST_ON:
4342 case HtmlTokenId::DIRLIST_ON:
4343 case HtmlTokenId::MENULIST_ON:
4344 case HtmlTokenId::ORDERLIST_ON:
4345 case HtmlTokenId::UNORDERLIST_ON:
4346 bNotInDD = true;
4347 break;
4348 case HtmlTokenId::DD_ON:
4349 bInDD = true;
4350 break;
4351 default: break;
4352 }
4353 }
4354
4355 // ... and save in a context
4356 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4357
4358 // in it save also the margins
4359 sal_uInt16 nLeft=0, nRight=0;
4360 short nIndent=0;
4361 GetMarginsFromContext( nLeft, nRight, nIndent );
4362
4363 // The indentation, which already results from a DL, correlates with a DT
4364 // on the current level and this correlates to a DD from the previous level.
4365 // For a level >=2 we must add DD distance.
4366 if( !bInDD && m_nDefListDeep > 1 )
4367 {
4368
4369 // and the one of the DT-style of the current level
4370 SvxLRSpaceItem rLRSpace =
4371 m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4372 ->GetLRSpace();
4373 nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4374 }
4375
4376 xCntxt->SetMargins( nLeft, nRight, nIndent );
4377
4378 // parse styles
4379 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4380 {
4381 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4382 SvxCSS1PropertyInfo aPropInfo;
4383
4384 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4385 {
4386 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4387 InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4388 }
4389 }
4390
4391 PushContext(xCntxt);
4392
4393 // set the attributes of the new style
4394 if( m_nDefListDeep > 1 )
4395 SetTextCollAttrs(m_aContexts.back().get());
4396 }
4397
EndDefList()4398 void SwHTMLParser::EndDefList()
4399 {
4400 bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4401 if( m_pPam->GetPoint()->nContent.GetIndex() )
4402 AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4403 else if( bSpace )
4404 AddParSpace();
4405
4406 // one level less
4407 if( m_nDefListDeep > 0 )
4408 m_nDefListDeep--;
4409
4410 // pop current context of stack
4411 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4412
4413 // and now end attributes
4414 if (xCntxt)
4415 {
4416 EndContext(xCntxt.get());
4417 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4418 xCntxt.reset();
4419 }
4420
4421 // and set style
4422 SetTextCollAttrs();
4423 }
4424
NewDefListItem(HtmlTokenId nToken)4425 void SwHTMLParser::NewDefListItem( HtmlTokenId nToken )
4426 {
4427 // determine if the DD/DT exist in a DL
4428 bool bInDefList = false, bNotInDefList = false;
4429 auto nPos = m_aContexts.size();
4430 while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4431 {
4432 HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4433 switch( nCntxtToken )
4434 {
4435 case HtmlTokenId::DEFLIST_ON:
4436 bInDefList = true;
4437 break;
4438 case HtmlTokenId::DIRLIST_ON:
4439 case HtmlTokenId::MENULIST_ON:
4440 case HtmlTokenId::ORDERLIST_ON:
4441 case HtmlTokenId::UNORDERLIST_ON:
4442 bNotInDefList = true;
4443 break;
4444 default: break;
4445 }
4446 }
4447
4448 // if not, then implicitly open a new DL
4449 if( !bInDefList )
4450 {
4451 m_nDefListDeep++;
4452 OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4453 "Now an open paragraph element will be lost." );
4454 m_nOpenParaToken = nToken;
4455 }
4456
4457 NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4458 : RES_POOLCOLL_HTML_DT) );
4459 }
4460
EndDefListItem(HtmlTokenId nToken)4461 void SwHTMLParser::EndDefListItem( HtmlTokenId nToken )
4462 {
4463 // open a new paragraph
4464 if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4465 AppendTextNode( AM_SOFTNOSPACE );
4466
4467 // search context matching the token and fetch it from stack
4468 nToken = getOnToken(nToken);
4469 std::unique_ptr<HTMLAttrContext> xCntxt;
4470 auto nPos = m_aContexts.size();
4471 while( !xCntxt && nPos>m_nContextStMin )
4472 {
4473 HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4474 switch( nCntxtToken )
4475 {
4476 case HtmlTokenId::DD_ON:
4477 case HtmlTokenId::DT_ON:
4478 if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4479 {
4480 xCntxt = std::move(m_aContexts[nPos]);
4481 m_aContexts.erase( m_aContexts.begin() + nPos );
4482 }
4483 break;
4484 case HtmlTokenId::DEFLIST_ON:
4485 // don't look at DD/DT outside the current DefList
4486 case HtmlTokenId::DIRLIST_ON:
4487 case HtmlTokenId::MENULIST_ON:
4488 case HtmlTokenId::ORDERLIST_ON:
4489 case HtmlTokenId::UNORDERLIST_ON:
4490 // and also not outside another list
4491 nPos = m_nContextStMin;
4492 break;
4493 default: break;
4494 }
4495 }
4496
4497 // and now end attributes
4498 if (xCntxt)
4499 {
4500 EndContext(xCntxt.get());
4501 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4502 }
4503 }
4504
4505 /**
4506 *
4507 * @param bNoSurroundOnly The paragraph contains at least one frame
4508 * without wrapping.
4509 * @param bSurroundOnly The paragraph contains at least one frame
4510 * with wrapping, but none without wrapping.
4511 *
4512 * Otherwise the paragraph contains any frame.
4513 */
HasCurrentParaFlys(bool bNoSurroundOnly,bool bSurroundOnly) const4514 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4515 bool bSurroundOnly ) const
4516 {
4517 SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4518
4519 const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4520
4521 bool bFound = false;
4522 for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4523 {
4524 const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4525 SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4526 // A frame was found, when
4527 // - it is paragraph-bound, and
4528 // - is anchored in current paragraph, and
4529 // - every paragraph-bound frame counts, or
4530 // - (only frames without wrapping count and) the frame doesn't have
4531 // a wrapping
4532 SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4533 if (pAPos &&
4534 ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4535 (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4536 pAPos->nNode == rNodeIdx )
4537 {
4538 if( !(bNoSurroundOnly || bSurroundOnly) )
4539 {
4540 bFound = true;
4541 break;
4542 }
4543 else
4544 {
4545 // When looking for frames with wrapping, also disregard
4546 // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4547 // and you don't want to evade those when positioning.
4548 css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4549 if( bNoSurroundOnly )
4550 {
4551 if( css::text::WrapTextMode_NONE==eSurround )
4552 {
4553 bFound = true;
4554 break;
4555 }
4556 }
4557 if( bSurroundOnly )
4558 {
4559 if( css::text::WrapTextMode_NONE==eSurround )
4560 {
4561 bFound = false;
4562 break;
4563 }
4564 else if( css::text::WrapTextMode_THROUGH!=eSurround )
4565 {
4566 bFound = true;
4567 // Continue searching: It's possible that some without
4568 // wrapping will follow...
4569 }
4570 }
4571 }
4572 }
4573 }
4574
4575 return bFound;
4576 }
4577
4578 // the special methods for inserting of objects
4579
GetCurrFormatColl() const4580 const SwFormatColl *SwHTMLParser::GetCurrFormatColl() const
4581 {
4582 const SwContentNode* pCNd = m_pPam->GetContentNode();
4583 return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4584 }
4585
SetTextCollAttrs(HTMLAttrContext * pContext)4586 void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext *pContext )
4587 {
4588 SwTextFormatColl *pCollToSet = nullptr; // the style to set
4589 SfxItemSet *pItemSet = nullptr; // set of hard attributes
4590 sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4591 const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4592 sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4593
4594 bool bInPRE=false; // some context info
4595
4596 sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4597 short nFirstLineIndent = 0; // indentations
4598
4599 for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4600 {
4601 const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4602
4603 sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4604 if( nColl )
4605 {
4606 // There is a style to set. Then at first we must decide,
4607 // if the style can be set.
4608 bool bSetThis = true;
4609 switch( nColl )
4610 {
4611 case RES_POOLCOLL_HTML_PRE:
4612 bInPRE = true;
4613 break;
4614 case RES_POOLCOLL_TEXT:
4615 // <TD><P CLASS=xxx> must become TD.xxx
4616 if( nDfltColl==RES_POOLCOLL_TABLE ||
4617 nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4618 nColl = nDfltColl;
4619 break;
4620 case RES_POOLCOLL_HTML_HR:
4621 // also <HR> in <PRE> set as style, otherwise it can't
4622 // be exported anymore
4623 break;
4624 default:
4625 if( bInPRE )
4626 bSetThis = false;
4627 break;
4628 }
4629
4630 SwTextFormatColl *pNewColl =
4631 m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4632
4633 if( bSetThis )
4634 {
4635 // If now a different style should be set as previously, the
4636 // previous style must be replaced by hard attribution.
4637
4638 if( pCollToSet )
4639 {
4640 // insert the attributes hard, which previous style sets
4641 if( !pItemSet )
4642 pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4643 else
4644 {
4645 const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4646 SfxItemSet aItemSet( *rCollSet.GetPool(),
4647 rCollSet.GetRanges() );
4648 aItemSet.Set( rCollSet );
4649 pItemSet->Put( aItemSet );
4650 }
4651 // but remove the attributes, which the current style sets,
4652 // because otherwise they will be overwritten later
4653 pItemSet->Differentiate( pNewColl->GetAttrSet() );
4654 }
4655
4656 pCollToSet = pNewColl;
4657 }
4658 else
4659 {
4660 // hard attribution
4661 if( !pItemSet )
4662 pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4663 else
4664 {
4665 const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4666 SfxItemSet aItemSet( *rCollSet.GetPool(),
4667 rCollSet.GetRanges() );
4668 aItemSet.Set( rCollSet );
4669 pItemSet->Put( aItemSet );
4670 }
4671 }
4672 }
4673 else
4674 {
4675 // Maybe a default style exists?
4676 nColl = pCntxt->GetDefaultTextFormatColl();
4677 if( nColl )
4678 nDfltColl = nColl;
4679 }
4680
4681 // if applicable fetch new paragraph indents
4682 if( pCntxt->IsLRSpaceChanged() )
4683 {
4684 sal_uInt16 nLeft=0, nRight=0;
4685
4686 pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4687 nLeftMargin = nLeft;
4688 nRightMargin = nRight;
4689 }
4690 }
4691
4692 // If in current context a new style should be set,
4693 // its paragraph margins must be inserted in the context.
4694 if( pContext && nTopColl )
4695 {
4696 // <TD><P CLASS=xxx> must become TD.xxx
4697 if( nTopColl==RES_POOLCOLL_TEXT &&
4698 (nDfltColl==RES_POOLCOLL_TABLE ||
4699 nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4700 nTopColl = nDfltColl;
4701
4702 const SwTextFormatColl *pTopColl =
4703 m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4704 const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4705 const SfxPoolItem *pItem;
4706 if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4707 {
4708 const SvxLRSpaceItem *pLRItem =
4709 static_cast<const SvxLRSpaceItem *>(pItem);
4710
4711 sal_Int32 nLeft = pLRItem->GetTextLeft();
4712 sal_Int32 nRight = pLRItem->GetRight();
4713 nFirstLineIndent = pLRItem->GetTextFirstLineOffset();
4714
4715 // In Definition lists the margins also contain the margins from the previous levels
4716 if( RES_POOLCOLL_HTML_DD == nTopColl )
4717 {
4718 const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4719 ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4720 ->GetLRSpace();
4721 nLeft -= rDTLRSpace.GetTextLeft();
4722 nRight -= rDTLRSpace.GetRight();
4723 }
4724 else if( RES_POOLCOLL_HTML_DT == nTopColl )
4725 {
4726 nLeft = 0;
4727 nRight = 0;
4728 }
4729
4730 // the paragraph margins add up
4731 nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4732 nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4733
4734 pContext->SetMargins( nLeftMargin, nRightMargin,
4735 nFirstLineIndent );
4736 }
4737 if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4738 {
4739 const SvxULSpaceItem *pULItem =
4740 static_cast<const SvxULSpaceItem *>(pItem);
4741 pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4742 }
4743 }
4744
4745 // If no style is set in the context use the text body.
4746 if( !pCollToSet )
4747 {
4748 pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4749 const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4750 if( !nLeftMargin )
4751 nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4752 if( !nRightMargin )
4753 nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4754 if( !nFirstLineIndent )
4755 nFirstLineIndent = rLRItem.GetTextFirstLineOffset();
4756 }
4757
4758 // remove previous hard attribution of paragraph
4759 for( auto pParaAttr : m_aParaAttrs )
4760 pParaAttr->Invalidate();
4761 m_aParaAttrs.clear();
4762
4763 // set the style
4764 m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4765
4766 // if applicable correct the paragraph indent
4767 const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4768 bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4769 nFirstLineIndent != rLRItem.GetTextFirstLineOffset() ||
4770 nRightMargin != rLRItem.GetRight();
4771
4772 if( bSetLRSpace )
4773 {
4774 SvxLRSpaceItem aLRItem( rLRItem );
4775 aLRItem.SetTextLeft( nLeftMargin );
4776 aLRItem.SetRight( nRightMargin );
4777 aLRItem.SetTextFirstLineOffset( nFirstLineIndent );
4778 if( pItemSet )
4779 pItemSet->Put( aLRItem );
4780 else
4781 {
4782 NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4783 m_xAttrTab->pLRSpace->SetLikePara();
4784 m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4785 EndAttr( m_xAttrTab->pLRSpace, false );
4786 }
4787 }
4788
4789 // and now set the attributes
4790 if( pItemSet )
4791 {
4792 InsertParaAttrs( *pItemSet );
4793 delete pItemSet;
4794 }
4795 }
4796
NewCharFormat(HtmlTokenId nToken)4797 void SwHTMLParser::NewCharFormat( HtmlTokenId nToken )
4798 {
4799 OUString aId, aStyle, aLang, aDir;
4800 OUString aClass;
4801
4802 const HTMLOptions& rHTMLOptions = GetOptions();
4803 for (size_t i = rHTMLOptions.size(); i; )
4804 {
4805 const HTMLOption& rOption = rHTMLOptions[--i];
4806 switch( rOption.GetToken() )
4807 {
4808 case HtmlOptionId::ID:
4809 aId = rOption.GetString();
4810 break;
4811 case HtmlOptionId::STYLE:
4812 aStyle = rOption.GetString();
4813 break;
4814 case HtmlOptionId::CLASS:
4815 aClass = rOption.GetString();
4816 break;
4817 case HtmlOptionId::LANG:
4818 aLang = rOption.GetString();
4819 break;
4820 case HtmlOptionId::DIR:
4821 aDir = rOption.GetString();
4822 break;
4823 default: break;
4824 }
4825 }
4826
4827 // create a new context
4828 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4829
4830 // set the style and save it in the context
4831 SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4832 OSL_ENSURE( pCFormat, "No character format found for token" );
4833
4834 // parse styles (regarding class see also NewPara)
4835 if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4836 {
4837 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4838 SvxCSS1PropertyInfo aPropInfo;
4839
4840 if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4841 {
4842 OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4843 "Class is not considered" );
4844 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4845 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4846 }
4847 }
4848
4849 // Character formats are stored in their own stack and can never be inserted
4850 // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4851 if( pCFormat )
4852 InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4853
4854 // save the context
4855 PushContext(xCntxt);
4856 }
4857
InsertSpacer()4858 void SwHTMLParser::InsertSpacer()
4859 {
4860 // and if applicable change it via the options
4861 sal_Int16 eVertOri = text::VertOrientation::TOP;
4862 sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4863 Size aSize( 0, 0);
4864 tools::Long nSize = 0;
4865 bool bPercentWidth = false;
4866 bool bPercentHeight = false;
4867 sal_uInt16 nType = HTML_SPTYPE_HORI;
4868
4869 const HTMLOptions& rHTMLOptions = GetOptions();
4870 for (size_t i = rHTMLOptions.size(); i; )
4871 {
4872 const HTMLOption& rOption = rHTMLOptions[--i];
4873 switch( rOption.GetToken() )
4874 {
4875 case HtmlOptionId::TYPE:
4876 rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4877 break;
4878 case HtmlOptionId::ALIGN:
4879 eVertOri =
4880 rOption.GetEnum( aHTMLImgVAlignTable,
4881 eVertOri );
4882 eHoriOri =
4883 rOption.GetEnum( aHTMLImgHAlignTable,
4884 eHoriOri );
4885 break;
4886 case HtmlOptionId::WIDTH:
4887 // First only save as pixel value!
4888 bPercentWidth = (rOption.GetString().indexOf('%') != -1);
4889 aSize.setWidth( static_cast<tools::Long>(rOption.GetNumber()) );
4890 break;
4891 case HtmlOptionId::HEIGHT:
4892 // First only save as pixel value!
4893 bPercentHeight = (rOption.GetString().indexOf('%') != -1);
4894 aSize.setHeight( static_cast<tools::Long>(rOption.GetNumber()) );
4895 break;
4896 case HtmlOptionId::SIZE:
4897 // First only save as pixel value!
4898 nSize = rOption.GetNumber();
4899 break;
4900 default: break;
4901 }
4902 }
4903
4904 switch( nType )
4905 {
4906 case HTML_SPTYPE_BLOCK:
4907 {
4908 // create an empty text frame
4909
4910 // fetch the ItemSet
4911 SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4912 svl::Items<RES_FRMATR_BEGIN, RES_FRMATR_END-1>{} );
4913 if( !IsNewDoc() )
4914 Reader::ResetFrameFormatAttrs( aFrameSet );
4915
4916 // set the anchor and the adjustment
4917 SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4918
4919 // and the size of the frame
4920 Size aDfltSz( MINFLY, MINFLY );
4921 Size aSpace( 0, 0 );
4922 SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4923 m_pCSS1Parser->GetWhichMap() );
4924 SvxCSS1PropertyInfo aDummyPropInfo;
4925
4926 SetFixSize( aSize, aDfltSz, bPercentWidth, bPercentHeight,
4927 aDummyPropInfo, aFrameSet );
4928 SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4929
4930 // protect the content
4931 SvxProtectItem aProtectItem( RES_PROTECT) ;
4932 aProtectItem.SetContentProtect( true );
4933 aFrameSet.Put( aProtectItem );
4934
4935 // create the frame
4936 RndStdIds eAnchorId =
4937 aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4938 SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4939 m_pPam->GetPoint(), &aFrameSet );
4940 // Possibly create frames and register auto-bound frames.
4941 RegisterFlyFrame( pFlyFormat );
4942 }
4943 break;
4944 case HTML_SPTYPE_VERT:
4945 if( nSize > 0 )
4946 {
4947 if (Application::GetDefaultDevice())
4948 {
4949 nSize = Application::GetDefaultDevice()
4950 ->PixelToLogic( Size(0,nSize),
4951 MapMode(MapUnit::MapTwip) ).Height();
4952 }
4953
4954 // set a paragraph margin
4955 SwTextNode *pTextNode = nullptr;
4956 if( !m_pPam->GetPoint()->nContent.GetIndex() )
4957 {
4958 // if possible change the bottom paragraph margin
4959 // of previous node
4960
4961 SetAttr(); // set still open paragraph attributes
4962
4963 pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4964 ->GetTextNode();
4965
4966 // If the previous paragraph isn't a text node, then now an
4967 // empty paragraph is created, which already generates a single
4968 // line of spacing.
4969 if( !pTextNode )
4970 nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4971 }
4972
4973 if( pTextNode )
4974 {
4975 SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
4976 ->SwContentNode::GetAttr( RES_UL_SPACE )) );
4977 aULSpace.SetLower( aULSpace.GetLower() + o3tl::narrowing<sal_uInt16>(nSize) );
4978 pTextNode->SetAttr( aULSpace );
4979 }
4980 else
4981 {
4982 NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, o3tl::narrowing<sal_uInt16>(nSize), RES_UL_SPACE));
4983 EndAttr( m_xAttrTab->pULSpace, false );
4984
4985 AppendTextNode(); // Don't change spacing!
4986 }
4987 }
4988 break;
4989 case HTML_SPTYPE_HORI:
4990 if( nSize > 0 )
4991 {
4992 // If the paragraph is still empty, set first line
4993 // indentation, otherwise apply letter spacing over a space.
4994
4995 if (Application::GetDefaultDevice())
4996 {
4997 nSize = Application::GetDefaultDevice()
4998 ->PixelToLogic( Size(nSize,0),
4999 MapMode(MapUnit::MapTwip) ).Width();
5000 }
5001
5002 if( !m_pPam->GetPoint()->nContent.GetIndex() )
5003 {
5004 sal_uInt16 nLeft=0, nRight=0;
5005 short nIndent = 0;
5006
5007 GetMarginsFromContextWithNumberBullet( nLeft, nRight, nIndent );
5008 nIndent = nIndent + static_cast<short>(nSize);
5009
5010 SvxLRSpaceItem aLRItem( RES_LR_SPACE );
5011 aLRItem.SetTextLeft( nLeft );
5012 aLRItem.SetRight( nRight );
5013 aLRItem.SetTextFirstLineOffset( nIndent );
5014
5015 NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
5016 EndAttr( m_xAttrTab->pLRSpace, false );
5017 }
5018 else
5019 {
5020 NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
5021 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, " " );
5022 EndAttr( m_xAttrTab->pKerning );
5023 }
5024 }
5025 }
5026 }
5027
ToTwips(sal_uInt16 nPixel)5028 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5029 {
5030 if( nPixel && Application::GetDefaultDevice() )
5031 {
5032 SwTwips nTwips = Application::GetDefaultDevice()->PixelToLogic(
5033 Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5034 return o3tl::narrowing<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5035 }
5036 else
5037 return nPixel;
5038 }
5039
GetCurrentBrowseWidth()5040 SwTwips SwHTMLParser::GetCurrentBrowseWidth()
5041 {
5042 const SwTwips nWidth = SwHTMLTableLayout::GetBrowseWidth( *m_xDoc );
5043 if( nWidth )
5044 return nWidth;
5045
5046 if( !m_aHTMLPageSize.Width() )
5047 {
5048 const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5049
5050 const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5051 const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5052 const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5053 const SwFormatCol& rCol = rPgFormat.GetCol();
5054
5055 m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5056 m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5057
5058 if( 1 < rCol.GetNumCols() )
5059 m_aHTMLPageSize.setWidth( m_aHTMLPageSize.Width() / ( rCol.GetNumCols()) );
5060 }
5061
5062 return m_aHTMLPageSize.Width();
5063 }
5064
InsertIDOption()5065 void SwHTMLParser::InsertIDOption()
5066 {
5067 OUString aId;
5068 const HTMLOptions& rHTMLOptions = GetOptions();
5069 for (size_t i = rHTMLOptions.size(); i; )
5070 {
5071 const HTMLOption& rOption = rHTMLOptions[--i];
5072 if( HtmlOptionId::ID==rOption.GetToken() )
5073 {
5074 aId = rOption.GetString();
5075 break;
5076 }
5077 }
5078
5079 if( !aId.isEmpty() )
5080 InsertBookmark( aId );
5081 }
5082
InsertLineBreak()5083 void SwHTMLParser::InsertLineBreak()
5084 {
5085 // <BR CLEAR=xxx> is handled as:
5086 // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5087 // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5088 // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5089 // changed as following:
5090 // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5091 // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5092 // and a right aligned frame gets a left "only anchor" wrapping.
5093 // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5094 // then a new paragraph is opened
5095 // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5096
5097 OUString aId, aStyle, aClass; // the id of bookmark
5098 bool bClearLeft = false, bClearRight = false;
5099 bool bCleared = false; // Was a CLEAR executed?
5100
5101 // then we fetch the options
5102 const HTMLOptions& rHTMLOptions = GetOptions();
5103 for (size_t i = rHTMLOptions.size(); i; )
5104 {
5105 const HTMLOption& rOption = rHTMLOptions[--i];
5106 switch( rOption.GetToken() )
5107 {
5108 case HtmlOptionId::CLEAR:
5109 {
5110 const OUString &rClear = rOption.GetString();
5111 if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5112 {
5113 bClearLeft = true;
5114 bClearRight = true;
5115 }
5116 else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5117 bClearLeft = true;
5118 else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5119 bClearRight = true;
5120 }
5121 break;
5122 case HtmlOptionId::ID:
5123 aId = rOption.GetString();
5124 break;
5125 case HtmlOptionId::STYLE:
5126 aStyle = rOption.GetString();
5127 break;
5128 case HtmlOptionId::CLASS:
5129 aClass = rOption.GetString();
5130 break;
5131 default: break;
5132 }
5133 }
5134
5135 // CLEAR is only supported for the current paragraph
5136 if( bClearLeft || bClearRight )
5137 {
5138 SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5139 SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5140 if( pTextNd )
5141 {
5142 const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5143
5144 for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5145 {
5146 SwFrameFormat *const pFormat = rFrameFormatTable[i];
5147 SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5148 SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5149 if (pAPos &&
5150 ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5151 (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5152 pAPos->nNode == rNodeIdx &&
5153 pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5154 {
5155 sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5156 ? text::HoriOrientation::LEFT
5157 : pFormat->GetHoriOrient().GetHoriOrient();
5158
5159 css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5160 if( m_pPam->GetPoint()->nContent.GetIndex() )
5161 {
5162 if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5163 eSurround = css::text::WrapTextMode_RIGHT;
5164 else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5165 eSurround = css::text::WrapTextMode_LEFT;
5166 }
5167 else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5168 (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5169 {
5170 eSurround = css::text::WrapTextMode_NONE;
5171 }
5172
5173 if( css::text::WrapTextMode_PARALLEL != eSurround )
5174 {
5175 SwFormatSurround aSurround( eSurround );
5176 if( css::text::WrapTextMode_NONE != eSurround )
5177 aSurround.SetAnchorOnly( true );
5178 pFormat->SetFormatAttr( aSurround );
5179 bCleared = true;
5180 }
5181 }
5182 }
5183 }
5184 }
5185
5186 // parse styles
5187 std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5188 bool bBreakItem = false;
5189 if( HasStyleOptions( aStyle, aId, aClass ) )
5190 {
5191 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5192 SvxCSS1PropertyInfo aPropInfo;
5193
5194 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5195 {
5196 if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5197 {
5198 aBreakItem.reset(aItemSet.Get(RES_BREAK).Clone());
5199 bBreakItem = true;
5200 }
5201 if( !aPropInfo.m_aId.isEmpty() )
5202 InsertBookmark( aPropInfo.m_aId );
5203 }
5204 }
5205
5206 if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5207 {
5208 NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5209 EndAttr( m_xAttrTab->pBreak, false );
5210 }
5211
5212 if( !bCleared && !bBreakItem )
5213 {
5214 // If no CLEAR could or should be executed, a line break will be inserted
5215 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, "\x0A" );
5216 }
5217 else if( m_pPam->GetPoint()->nContent.GetIndex() )
5218 {
5219 // If a CLEAR is executed in a non-empty paragraph, then after it
5220 // a new paragraph has to be opened.
5221 // MIB 21.02.97: Here actually we should change the bottom paragraph
5222 // margin to zero. This will fail for something like this <BR ..><P>
5223 // (>Netscape). That's why we don't do it.
5224 AppendTextNode( AM_NOSPACE );
5225 }
5226 if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5227 {
5228 NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5229 EndAttr( m_xAttrTab->pBreak, false );
5230 }
5231 }
5232
InsertHorzRule()5233 void SwHTMLParser::InsertHorzRule()
5234 {
5235 sal_uInt16 nSize = 0;
5236 sal_uInt16 nWidth = 0;
5237
5238 SvxAdjust eAdjust = SvxAdjust::End;
5239
5240 bool bPercentWidth = false;
5241 bool bNoShade = false;
5242 bool bColor = false;
5243
5244 Color aColor;
5245 OUString aId;
5246
5247 // let's fetch the options
5248 const HTMLOptions& rHTMLOptions = GetOptions();
5249 for (size_t i = rHTMLOptions.size(); i; )
5250 {
5251 const HTMLOption& rOption = rHTMLOptions[--i];
5252 switch( rOption.GetToken() )
5253 {
5254 case HtmlOptionId::ID:
5255 aId = rOption.GetString();
5256 break;
5257 case HtmlOptionId::SIZE:
5258 nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5259 break;
5260 case HtmlOptionId::WIDTH:
5261 bPercentWidth = (rOption.GetString().indexOf('%') != -1);
5262 nWidth = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5263 if( bPercentWidth && nWidth>=100 )
5264 {
5265 // the default case are 100% lines (no attributes necessary)
5266 nWidth = 0;
5267 bPercentWidth = false;
5268 }
5269 break;
5270 case HtmlOptionId::ALIGN:
5271 eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5272 break;
5273 case HtmlOptionId::NOSHADE:
5274 bNoShade = true;
5275 break;
5276 case HtmlOptionId::COLOR:
5277 rOption.GetColor( aColor );
5278 bColor = true;
5279 break;
5280 default: break;
5281 }
5282 }
5283
5284 if( m_pPam->GetPoint()->nContent.GetIndex() )
5285 AppendTextNode( AM_NOSPACE );
5286 if( m_nOpenParaToken != HtmlTokenId::NONE )
5287 EndPara();
5288 AppendTextNode();
5289 m_pPam->Move( fnMoveBackward );
5290
5291 // ...and save in a context
5292 std::unique_ptr<HTMLAttrContext> xCntxt(
5293 new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5294
5295 PushContext(xCntxt);
5296
5297 // set the new style
5298 SetTextCollAttrs(m_aContexts.back().get());
5299
5300 // the hard attributes of the current paragraph will never become invalid
5301 m_aParaAttrs.clear();
5302
5303 if( nSize>0 || bColor || bNoShade )
5304 {
5305 // set line colour and/or width
5306 if( !bColor )
5307 aColor = COL_GRAY;
5308
5309 SvxBorderLine aBorderLine( &aColor );
5310 if( nSize )
5311 {
5312 tools::Long nPWidth = 0;
5313 tools::Long nPHeight = static_cast<tools::Long>(nSize);
5314 SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5315 if ( !bNoShade )
5316 {
5317 aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5318 }
5319 aBorderLine.SetWidth( nPHeight );
5320 }
5321 else if( bNoShade )
5322 {
5323 aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5324 }
5325 else
5326 {
5327 aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5328 aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5329 }
5330
5331 SvxBoxItem aBoxItem(RES_BOX);
5332 aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5333 HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5334 m_aSetAttrTab.push_back( pTmp );
5335 }
5336 if( nWidth )
5337 {
5338 // If we aren't in a table, then the width value will be "faked" with
5339 // paragraph indents. That makes little sense in a table. In order to
5340 // avoid that the line is considered during the width calculation, it
5341 // still gets an appropriate LRSpace-Item.
5342 if (!m_xTable)
5343 {
5344 // fake length and alignment of line above paragraph indents
5345 tools::Long nBrowseWidth = GetCurrentBrowseWidth();
5346 nWidth = bPercentWidth ? o3tl::narrowing<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5347 : ToTwips( o3tl::narrowing<sal_uInt16>(nBrowseWidth) );
5348 if( nWidth < MINLAY )
5349 nWidth = MINLAY;
5350
5351 const SwFormatColl *pColl = (static_cast<tools::Long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5352 if (pColl)
5353 {
5354 SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5355 tools::Long nDist = nBrowseWidth - nWidth;
5356
5357 switch( eAdjust )
5358 {
5359 case SvxAdjust::Right:
5360 aLRItem.SetTextLeft( o3tl::narrowing<sal_uInt16>(nDist) );
5361 break;
5362 case SvxAdjust::Left:
5363 aLRItem.SetRight( o3tl::narrowing<sal_uInt16>(nDist) );
5364 break;
5365 case SvxAdjust::Center:
5366 default:
5367 nDist /= 2;
5368 aLRItem.SetTextLeft( o3tl::narrowing<sal_uInt16>(nDist) );
5369 aLRItem.SetRight( o3tl::narrowing<sal_uInt16>(nDist) );
5370 break;
5371 }
5372
5373 HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5374 m_aSetAttrTab.push_back( pTmp );
5375 }
5376 }
5377 }
5378
5379 // it's not possible to insert bookmarks in links
5380 if( !aId.isEmpty() )
5381 InsertBookmark( aId );
5382
5383 // pop current context of stack
5384 std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5385 xPoppedContext.reset();
5386
5387 m_pPam->Move( fnMoveForward );
5388
5389 // and set the current style in the next paragraph
5390 SetTextCollAttrs();
5391 }
5392
ParseMoreMetaOptions()5393 void SwHTMLParser::ParseMoreMetaOptions()
5394 {
5395 OUString aName, aContent;
5396 bool bHTTPEquiv = false;
5397
5398 const HTMLOptions& rHTMLOptions = GetOptions();
5399 for (size_t i = rHTMLOptions.size(); i; )
5400 {
5401 const HTMLOption& rOption = rHTMLOptions[--i];
5402 switch( rOption.GetToken() )
5403 {
5404 case HtmlOptionId::NAME:
5405 aName = rOption.GetString();
5406 bHTTPEquiv = false;
5407 break;
5408 case HtmlOptionId::HTTPEQUIV:
5409 aName = rOption.GetString();
5410 bHTTPEquiv = true;
5411 break;
5412 case HtmlOptionId::CONTENT:
5413 aContent = rOption.GetString();
5414 break;
5415 default: break;
5416 }
5417 }
5418
5419 // Here things get a little tricky: We know for sure, that the Doc-Info
5420 // wasn't changed. Therefore it's enough to query for Generator and Refresh
5421 // to find a not processed Token. These are the only ones which won't change
5422 // the Doc-Info.
5423 if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_generator ) ||
5424 aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_refresh ) ||
5425 aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type ) ||
5426 aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_script_type ) )
5427 return;
5428
5429 aContent = aContent.replaceAll("\r", "").replaceAll("\n", "");
5430
5431 if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdendnote ) )
5432 {
5433 FillEndNoteInfo( aContent );
5434 return;
5435 }
5436
5437 if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdfootnote ) )
5438 {
5439 FillFootNoteInfo( aContent );
5440 return;
5441 }
5442
5443 OUStringBuffer sText;
5444 sText.append("HTML: <");
5445 sText.append(OOO_STRING_SVTOOLS_HTML_meta);
5446 sText.append(' ');
5447 if( bHTTPEquiv )
5448 sText.append(OOO_STRING_SVTOOLS_HTML_O_httpequiv);
5449 else
5450 sText.append(OOO_STRING_SVTOOLS_HTML_O_name);
5451 sText.append("=\"");
5452 sText.append(aName);
5453 sText.append("\" ");
5454 sText.append(OOO_STRING_SVTOOLS_HTML_O_content);
5455 sText.append("=\"");
5456 sText.append(aContent);
5457 sText.append("\">");
5458
5459 SwPostItField aPostItField(
5460 static_cast<SwPostItFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Postit )),
5461 OUString(), sText.makeStringAndClear(), OUString(), OUString(), DateTime(DateTime::SYSTEM));
5462 SwFormatField aFormatField( aPostItField );
5463 InsertAttr( aFormatField, false );
5464 }
5465
HTMLAttr(const SwPosition & rPos,const SfxPoolItem & rItem,HTMLAttr ** ppHd,const std::shared_ptr<HTMLAttrTable> & rAttrTab)5466 HTMLAttr::HTMLAttr( const SwPosition& rPos, const SfxPoolItem& rItem,
5467 HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab ) :
5468 m_nStartPara( rPos.nNode ),
5469 m_nEndPara( rPos.nNode ),
5470 m_nStartContent( rPos.nContent.GetIndex() ),
5471 m_nEndContent(rPos.nContent.GetIndex() ),
5472 m_bInsAtStart( true ),
5473 m_bLikePara( false ),
5474 m_bValid( true ),
5475 m_pItem( rItem.Clone() ),
5476 m_xAttrTab( rAttrTab ),
5477 m_pNext( nullptr ),
5478 m_pPrev( nullptr ),
5479 m_ppHead( ppHd )
5480 {
5481 }
5482
HTMLAttr(const HTMLAttr & rAttr,const SwNodeIndex & rEndPara,sal_Int32 nEndCnt,HTMLAttr ** ppHd,const std::shared_ptr<HTMLAttrTable> & rAttrTab)5483 HTMLAttr::HTMLAttr( const HTMLAttr &rAttr, const SwNodeIndex &rEndPara,
5484 sal_Int32 nEndCnt, HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab ) :
5485 m_nStartPara( rAttr.m_nStartPara ),
5486 m_nEndPara( rEndPara ),
5487 m_nStartContent( rAttr.m_nStartContent ),
5488 m_nEndContent( nEndCnt ),
5489 m_bInsAtStart( rAttr.m_bInsAtStart ),
5490 m_bLikePara( rAttr.m_bLikePara ),
5491 m_bValid( rAttr.m_bValid ),
5492 m_pItem( rAttr.m_pItem->Clone() ),
5493 m_xAttrTab( rAttrTab ),
5494 m_pNext( nullptr ),
5495 m_pPrev( nullptr ),
5496 m_ppHead( ppHd )
5497 {
5498 }
5499
~HTMLAttr()5500 HTMLAttr::~HTMLAttr()
5501 {
5502 }
5503
Clone(const SwNodeIndex & rEndPara,sal_Int32 nEndCnt) const5504 HTMLAttr *HTMLAttr::Clone(const SwNodeIndex& rEndPara, sal_Int32 nEndCnt) const
5505 {
5506 // create the attribute anew with old start position
5507 HTMLAttr *pNew = new HTMLAttr( *this, rEndPara, nEndCnt, m_ppHead, m_xAttrTab );
5508
5509 // The Previous-List must be taken over, the Next-List not!
5510 pNew->m_pPrev = m_pPrev;
5511
5512 return pNew;
5513 }
5514
Reset(const SwNodeIndex & rSttPara,sal_Int32 nSttCnt,HTMLAttr ** ppHd,const std::shared_ptr<HTMLAttrTable> & rAttrTab)5515 void HTMLAttr::Reset(const SwNodeIndex& rSttPara, sal_Int32 nSttCnt,
5516 HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab)
5517 {
5518 // reset the start (and the end)
5519 m_nStartPara = rSttPara;
5520 m_nStartContent = nSttCnt;
5521 m_nEndPara = rSttPara;
5522 m_nEndContent = nSttCnt;
5523
5524 // correct the head and nullify link
5525 m_pNext = nullptr;
5526 m_pPrev = nullptr;
5527 m_ppHead = ppHd;
5528 m_xAttrTab = rAttrTab;
5529 }
5530
InsertPrev(HTMLAttr * pPrv)5531 void HTMLAttr::InsertPrev( HTMLAttr *pPrv )
5532 {
5533 OSL_ENSURE( !pPrv->m_pNext || pPrv->m_pNext == this,
5534 "HTMLAttr::InsertPrev: pNext wrong" );
5535 pPrv->m_pNext = nullptr;
5536
5537 OSL_ENSURE( nullptr == pPrv->m_ppHead || m_ppHead == pPrv->m_ppHead,
5538 "HTMLAttr::InsertPrev: ppHead wrong" );
5539 pPrv->m_ppHead = nullptr;
5540
5541 HTMLAttr *pAttr = this;
5542 while( pAttr->GetPrev() )
5543 pAttr = pAttr->GetPrev();
5544
5545 pAttr->m_pPrev = pPrv;
5546 }
5547
ParseMetaOptions(const uno::Reference<document::XDocumentProperties> & i_xDocProps,SvKeyValueIterator * i_pHeader)5548 bool SwHTMLParser::ParseMetaOptions(
5549 const uno::Reference<document::XDocumentProperties> & i_xDocProps,
5550 SvKeyValueIterator *i_pHeader )
5551 {
5552 // always call base ParseMetaOptions, it sets the encoding (#i96700#)
5553 bool ret( HTMLParser::ParseMetaOptions(i_xDocProps, i_pHeader) );
5554 if (!ret && IsNewDoc())
5555 {
5556 ParseMoreMetaOptions();
5557 }
5558 return ret;
5559 }
5560
5561 // override so we can parse DOCINFO field subtypes INFO[1-4]
AddMetaUserDefined(OUString const & i_rMetaName)5562 void SwHTMLParser::AddMetaUserDefined( OUString const & i_rMetaName )
5563 {
5564 // unless we already have 4 names, append the argument to m_InfoNames
5565 OUString* pName // the first empty string in m_InfoNames
5566 (m_InfoNames[0].isEmpty() ? &m_InfoNames[0] :
5567 (m_InfoNames[1].isEmpty() ? &m_InfoNames[1] :
5568 (m_InfoNames[2].isEmpty() ? &m_InfoNames[2] :
5569 (m_InfoNames[3].isEmpty() ? &m_InfoNames[3] : nullptr ))));
5570 if (pName)
5571 {
5572 (*pName) = i_rMetaName;
5573 }
5574 }
5575
SetupFilterOptions()5576 void HTMLReader::SetupFilterOptions()
5577 {
5578 // Reset state from previous Read() invocation.
5579 m_aNamespace.clear();
5580
5581 if (!m_pMedium)
5582 return;
5583
5584 const SfxItemSet* pItemSet = m_pMedium->GetItemSet();
5585 if (!pItemSet)
5586 return;
5587
5588 auto pItem = pItemSet->GetItem<SfxStringItem>(SID_FILE_FILTEROPTIONS);
5589 if (!pItem)
5590 return;
5591
5592 OUString aFilterOptions = pItem->GetValue();
5593 static const OUStringLiteral aXhtmlNsKey(u"xhtmlns=");
5594 if (aFilterOptions.startsWith(aXhtmlNsKey))
5595 {
5596 OUString aNamespace = aFilterOptions.copy(aXhtmlNsKey.getLength());
5597 m_aNamespace = aNamespace;
5598 }
5599 }
5600
5601 namespace
5602 {
5603 class FontCacheGuard
5604 {
5605 public:
~FontCacheGuard()5606 ~FontCacheGuard()
5607 {
5608 FlushFontCache();
5609 }
5610 };
5611 }
5612
TestImportHTML(SvStream & rStream)5613 bool TestImportHTML(SvStream &rStream)
5614 {
5615 FontCacheGuard aFontCacheGuard;
5616 HTMLReader aReader;
5617 aReader.m_pStream = &rStream;
5618
5619 SwGlobals::ensure();
5620
5621 SfxObjectShellLock xDocSh(new SwDocShell(SfxObjectCreateMode::INTERNAL));
5622 xDocSh->DoInitNew();
5623 SwDoc *pD = static_cast<SwDocShell*>((&xDocSh))->GetDoc();
5624
5625 SwNodeIndex aIdx(pD->GetNodes().GetEndOfContent(), -1);
5626 SwPaM aPaM(aIdx);
5627 pD->SetInReading(true);
5628 bool bRet = false;
5629 try
5630 {
5631 bRet = aReader.Read(*pD, OUString(), aPaM, OUString()) == ERRCODE_NONE;
5632 }
5633 catch (const std::runtime_error&)
5634 {
5635 }
5636 catch (const std::out_of_range&)
5637 {
5638 }
5639 pD->SetInReading(false);
5640
5641 return bRet;
5642 }
5643
5644 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
5645