1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
25 
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
38 
39 #include <vcl/errinf.hxx>
40 #include <svl/stritem.hxx>
41 #include <vcl/imap.hxx>
42 #include <svtools/htmltokn.h>
43 #include <svtools/htmlkywd.hxx>
44 #include <svtools/ctrltool.hxx>
45 #include <unotools/configmgr.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/docfile.hxx>
50 
51 #include <svtools/htmlcfg.hxx>
52 #include <sfx2/linkmgr.hxx>
53 #include <editeng/kernitem.hxx>
54 #include <editeng/boxitem.hxx>
55 #include <editeng/fhgtitem.hxx>
56 #include <editeng/formatbreakitem.hxx>
57 #include <editeng/postitem.hxx>
58 #include <editeng/wghtitem.hxx>
59 #include <editeng/crossedoutitem.hxx>
60 #include <editeng/udlnitem.hxx>
61 #include <editeng/escapementitem.hxx>
62 #include <editeng/blinkitem.hxx>
63 #include <editeng/ulspitem.hxx>
64 #include <editeng/colritem.hxx>
65 #include <editeng/fontitem.hxx>
66 #include <editeng/adjustitem.hxx>
67 #include <editeng/lrspitem.hxx>
68 #include <editeng/protitem.hxx>
69 #include <editeng/flstitem.hxx>
70 #include <svx/unobrushitemhelper.hxx>
71 
72 #include <frmatr.hxx>
73 #include <charatr.hxx>
74 #include <fmtfld.hxx>
75 #include <fmtpdsc.hxx>
76 #include <fmtanchr.hxx>
77 #include <fmtsrnd.hxx>
78 #include <fmtfsize.hxx>
79 #include <fmtclds.hxx>
80 #include <fchrfmt.hxx>
81 #include <fmtinfmt.hxx>
82 #include <fmtfollowtextflow.hxx>
83 #include <fmtornt.hxx>
84 #include <doc.hxx>
85 #include <IDocumentUndoRedo.hxx>
86 #include <IDocumentSettingAccess.hxx>
87 #include <IDocumentLayoutAccess.hxx>
88 #include <IDocumentLinksAdministration.hxx>
89 #include <IDocumentRedlineAccess.hxx>
90 #include <IDocumentFieldsAccess.hxx>
91 #include <IDocumentStylePoolAccess.hxx>
92 #include <IDocumentStatistics.hxx>
93 #include <IDocumentState.hxx>
94 #include <pam.hxx>
95 #include <ndtxt.hxx>
96 #include <mdiexp.hxx>
97 #include <poolfmt.hxx>
98 #include <pagedesc.hxx>
99 #include <IMark.hxx>
100 #include <docsh.hxx>
101 #include <editsh.hxx>
102 #include <docufld.hxx>
103 #include "swcss1.hxx"
104 #include <fltini.hxx>
105 #include <htmltbl.hxx>
106 #include "htmlnum.hxx"
107 #include "swhtml.hxx"
108 #include "wrthtml.hxx"
109 #include <linkenum.hxx>
110 #include <breakit.hxx>
111 #include <SwAppletImpl.hxx>
112 #include <swdll.hxx>
113 #include <txatbase.hxx>
114 
115 #include <sfx2/viewfrm.hxx>
116 #include <svx/svdobj.hxx>
117 #include <officecfg/Office/Writer.hxx>
118 #include <comphelper/sequenceashashmap.hxx>
119 #include <comphelper/sequence.hxx>
120 
121 #include <swerror.h>
122 #include <ndole.hxx>
123 #include <unoframe.hxx>
124 #include "css1atr.hxx"
125 #include <frameformats.hxx>
126 
127 #define FONTSIZE_MASK           7
128 
129 #define HTML_ESC_PROP 80
130 #define HTML_ESC_SUPER DFLT_ESC_SUPER
131 #define HTML_ESC_SUB DFLT_ESC_SUB
132 
133 #define HTML_SPTYPE_BLOCK 1
134 #define HTML_SPTYPE_HORI 2
135 #define HTML_SPTYPE_VERT 3
136 
137 using editeng::SvxBorderLine;
138 using namespace ::com::sun::star;
139 
140 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
141 HTMLOptionEnum<SvxAdjust> const aHTMLPAlignTable[] =
142 {
143     { OOO_STRING_SVTOOLS_HTML_AL_left,    SvxAdjust::Left     },
144     { OOO_STRING_SVTOOLS_HTML_AL_center,  SvxAdjust::Center   },
145     { OOO_STRING_SVTOOLS_HTML_AL_middle,  SvxAdjust::Center   }, // Netscape
146     { OOO_STRING_SVTOOLS_HTML_AL_right,   SvxAdjust::Right    },
147     { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block    },
148     { OOO_STRING_SVTOOLS_HTML_AL_char,    SvxAdjust::Left     },
149     { nullptr,                            SvxAdjust(0)        }
150 };
151 
152 // <SPACER TYPE=...>
153 HTMLOptionEnum<sal_uInt16> const aHTMLSpacerTypeTable[] =
154 {
155     { OOO_STRING_SVTOOLS_HTML_SPTYPE_block,      HTML_SPTYPE_BLOCK       },
156     { OOO_STRING_SVTOOLS_HTML_SPTYPE_horizontal, HTML_SPTYPE_HORI        },
157     { OOO_STRING_SVTOOLS_HTML_SPTYPE_vertical,   HTML_SPTYPE_VERT        },
158     { nullptr,                                   0                       }
159 };
160 
HTMLReader()161 HTMLReader::HTMLReader()
162 {
163     m_bTemplateBrowseMode = true;
164 }
165 
GetTemplateName(SwDoc & rDoc) const166 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
167 {
168     if (!rDoc.getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE))
169         // HTML import into Writer, avoid loading the Writer/Web template.
170         return OUString();
171 
172     static const OUStringLiteral sTemplateWithoutExt(u"internal/html");
173     SvtPathOptions aPathOpt;
174 
175     // first search for OpenDocument Writer/Web template
176     // OpenDocument Writer/Web template (extension .oth)
177     OUString sTemplate( sTemplateWithoutExt + ".oth" );
178     if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
179         return sTemplate;
180 
181         // no OpenDocument Writer/Web template found.
182         // search for OpenOffice.org Writer/Web template
183     sTemplate = sTemplateWithoutExt + ".stw";
184     if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
185         return sTemplate;
186 
187     OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
188 
189     return OUString();
190 }
191 
SetStrmStgPtr()192 bool HTMLReader::SetStrmStgPtr()
193 {
194     OSL_ENSURE( m_pMedium, "Where is the medium??" );
195 
196     if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
197     {
198         m_pStream = m_pMedium->GetInStream();
199         return true;
200     }
201     return false;
202 
203 }
204 
205 // Call for the general Reader-Interface
Read(SwDoc & rDoc,const OUString & rBaseURL,SwPaM & rPam,const OUString & rName)206 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
207 {
208     SetupFilterOptions();
209 
210     if( !m_pStream )
211     {
212         OSL_ENSURE( m_pStream, "HTML-Read without stream" );
213         return ERR_SWG_READ_ERROR;
214     }
215 
216     if( !m_bInsertMode )
217     {
218         Reader::ResetFrameFormats( rDoc );
219 
220         // Set the HTML page style, when it isn't a HTML document,
221         // otherwise it's already set.
222         if( !rDoc.getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE) && m_aNamespace != "reqif-xhtml" )
223         {
224             rDoc.getIDocumentContentOperations().InsertPoolItem( rPam, SwFormatPageDesc(
225                 rDoc.getIDocumentStylePoolAccess().GetPageDescFromPool( RES_POOLPAGE_HTML, false )) );
226         }
227     }
228 
229     // so nobody steals the document!
230     rtl::Reference<SwDoc> xHoldAlive(&rDoc);
231     ErrCode nRet = ERRCODE_NONE;
232     tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
233                                             rName, rBaseURL, !m_bInsertMode, m_pMedium,
234                                             IsReadUTF8(),
235                                             m_bIgnoreHTMLComments, m_aNamespace );
236 
237     SvParserState eState = xParser->CallParser();
238 
239     if( SvParserState::Pending == eState )
240         m_pStream->ResetError();
241     else if( SvParserState::Accepted != eState )
242     {
243         const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
244             + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
245 
246         // use the stream as transport for error number
247         nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
248                                     DialogMask::ButtonsOk | DialogMask::MessageError );
249     }
250 
251     return nRet;
252 }
253 
SwHTMLParser(SwDoc * pD,SwPaM & rCursor,SvStream & rIn,const OUString & rPath,const OUString & rBaseURL,bool bReadNewDoc,SfxMedium * pMed,bool bReadUTF8,bool bNoHTMLComments,const OUString & rNamespace)254 SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn,
255                             const OUString& rPath,
256                             const OUString& rBaseURL,
257                             bool bReadNewDoc,
258                             SfxMedium* pMed, bool bReadUTF8,
259                             bool bNoHTMLComments,
260                             const OUString& rNamespace )
261     : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
262     m_aPathToFile( rPath ),
263     m_sBaseURL( rBaseURL ),
264     m_xAttrTab(std::make_shared<HTMLAttrTable>()),
265     m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
266     m_xDoc( pD ),
267     m_pActionViewShell( nullptr ),
268     m_pSttNdIdx( nullptr ),
269     m_pFormImpl( nullptr ),
270     m_pMarquee( nullptr ),
271     m_pImageMap( nullptr ),
272     m_nBaseFontStMin( 0 ),
273     m_nFontStMin( 0 ),
274     m_nDefListDeep( 0 ),
275     m_nFontStHeadStart( 0 ),
276     m_nSBModuleCnt( 0 ),
277     m_nMissingImgMaps( 0 ),
278     m_nParaCnt( 5 ),
279     // #i83625#
280     m_nContextStMin( 0 ),
281     m_nContextStAttrMin( 0 ),
282     m_nSelectEntryCnt( 0 ),
283     m_nOpenParaToken( HtmlTokenId::NONE ),
284     m_eJumpTo( JumpToMarks::NONE ),
285 #ifdef DBG_UTIL
286     m_nContinue( 0 ),
287 #endif
288     m_eParaAdjust( SvxAdjust::End ),
289     m_bDocInitialized( false ),
290     m_bSetModEnabled( false ),
291     m_bInFloatingFrame( false ),
292     m_bInField( false ),
293     m_bCallNextToken( false ),
294     m_bIgnoreRawData( false ),
295     m_bLBEntrySelected ( false ),
296     m_bTAIgnoreNewPara ( false ),
297     m_bFixMarqueeWidth ( false ),
298     m_bNoParSpace( false ),
299     m_bInNoEmbed( false ),
300     m_bInTitle( false ),
301     m_bUpdateDocStat( false ),
302     m_bFixSelectWidth( false ),
303     m_bTextArea( false ),
304     m_bSelect( false ),
305     m_bInFootEndNoteAnchor( false ),
306     m_bInFootEndNoteSymbol( false ),
307     m_bIgnoreHTMLComments( bNoHTMLComments ),
308     m_bRemoveHidden( false ),
309     m_bBodySeen( false ),
310     m_bReadingHeaderOrFooter( false ),
311     m_bNotifyMacroEventRead( false ),
312     m_isInTableStructure(false),
313     m_nTableDepth( 0 ),
314     m_pTempViewFrame(nullptr)
315 {
316     // If requested explicitly, then force ignoring of comments (don't create postits for them).
317     if (!utl::ConfigManager::IsFuzzing() && officecfg::Office::Writer::Filter::Import::HTML::IgnoreComments::get())
318         m_bIgnoreHTMLComments = true;
319 
320     m_nEventId = nullptr;
321     m_bUpperSpace = m_bViewCreated = m_bChkJumpMark = false;
322 
323     m_eScriptLang = HTMLScriptLanguage::Unknown;
324 
325     rCursor.DeleteMark();
326     m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert
327     memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
328 
329     // Read the font sizes 1-7 from the INI file
330     SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
331     m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20;
332     m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20;
333     m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20;
334     m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20;
335     m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20;
336     m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20;
337     m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20;
338 
339     m_bKeepUnknown = rHtmlOptions.IsImportUnknown();
340 
341     if(bReadNewDoc)
342     {
343         //CJK has different defaults, so a different object should be used for this
344         //RES_CHARTR_CJK_FONTSIZE is a valid value
345         SvxFontHeightItem aFontHeight(m_aFontHeights[2], 100, RES_CHRATR_FONTSIZE);
346         m_xDoc->SetDefault( aFontHeight );
347         SvxFontHeightItem aFontHeightCJK(m_aFontHeights[2], 100, RES_CHRATR_CJK_FONTSIZE);
348         m_xDoc->SetDefault( aFontHeightCJK );
349         SvxFontHeightItem aFontHeightCTL(m_aFontHeights[2], 100, RES_CHRATR_CTL_FONTSIZE);
350         m_xDoc->SetDefault( aFontHeightCTL );
351 
352         // #i18732# - adjust default of option 'FollowTextFlow'
353         // TODO: not sure what the appropriate default for HTML should be?
354         m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
355     }
356 
357     // Change to HTML mode during the import, so that the right styles are created
358     m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
359     m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
360 
361     m_pCSS1Parser.reset(new SwCSS1Parser(m_xDoc.get(), *this, m_aFontHeights, m_sBaseURL, IsNewDoc()));
362     m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() );
363 
364     if( bReadUTF8 )
365     {
366         SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
367     }
368     else
369     {
370         SwDocShell *pDocSh = m_xDoc->GetDocShell();
371         SvKeyValueIterator *pHeaderAttrs =
372             pDocSh->GetHeaderAttributes();
373         if( pHeaderAttrs )
374             SetEncodingByHTTPHeader( pHeaderAttrs );
375     }
376     m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
377 
378     SwDocShell* pDocSh = m_xDoc->GetDocShell();
379     if( pDocSh )
380     {
381         m_bViewCreated = true;  // not, load synchronous
382 
383         // a jump mark is present
384 
385         if( pMed )
386         {
387             m_sJmpMark = pMed->GetURLObject().GetMark();
388             if( !m_sJmpMark.isEmpty() )
389             {
390                 m_eJumpTo = JumpToMarks::Mark;
391                 sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
392                 sal_Int32 nPos =  nLastPos != -1 ? nLastPos : 0;
393 
394                 OUString sCmp;
395                 if (nPos)
396                 {
397                     sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
398                 }
399 
400                 if( !sCmp.isEmpty() )
401                 {
402                     sCmp = sCmp.toAsciiLowerCase();
403                     if( sCmp == "region" )
404                         m_eJumpTo = JumpToMarks::Region;
405                     else if( sCmp == "table" )
406                         m_eJumpTo = JumpToMarks::Table;
407                     else if( sCmp == "graphic" )
408                         m_eJumpTo = JumpToMarks::Graphic;
409                     else if( sCmp == "outline" ||
410                             sCmp == "text" ||
411                             sCmp == "frame" )
412                         m_eJumpTo = JumpToMarks::NONE;  // this is nothing valid!
413                     else
414                         // otherwise this is a normal (book)mark
415                         nPos = -1;
416                 }
417                 else
418                     nPos = -1;
419 
420                 if( nPos != -1 )
421                     m_sJmpMark = m_sJmpMark.copy( 0, nPos );
422                 if( m_sJmpMark.isEmpty() )
423                     m_eJumpTo = JumpToMarks::NONE;
424             }
425         }
426     }
427 
428     if (!rNamespace.isEmpty())
429     {
430         SetNamespace(rNamespace);
431         m_bXHTML = true;
432         if (rNamespace == "reqif-xhtml")
433             m_bReqIF = true;
434     }
435 
436     // Extract load parameters which are specific to this filter.
437     if (!pMed)
438     {
439         return;
440     }
441 
442     comphelper::SequenceAsHashMap aLoadMap(pMed->GetArgs());
443     auto it = aLoadMap.find("AllowedRTFOLEMimeTypes");
444     if (it == aLoadMap.end())
445     {
446         return;
447     }
448 
449     uno::Sequence<OUString> aTypes;
450     it->second >>= aTypes;
451     m_aAllowedRTFOLEMimeTypes = comphelper::sequenceToContainer<std::set<OUString>>(aTypes);
452 }
453 
~SwHTMLParser()454 SwHTMLParser::~SwHTMLParser()
455 {
456 #ifdef DBG_UTIL
457     OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
458 #endif
459 
460     OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
461     OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
462     m_nContextStMin = 0;
463     while (!m_aContexts.empty())
464     {
465         std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
466         ClearContext(xCntxt.get());
467     }
468 
469     bool bAsync = m_xDoc->IsInLoadAsynchron();
470     m_xDoc->SetInLoadAsynchron( false );
471     m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
472 
473     if( m_xDoc->GetDocShell() && m_nEventId )
474         Application::RemoveUserEvent( m_nEventId );
475 
476     // the DocumentDetected maybe can delete the DocShells, therefore fetch again
477     if( m_xDoc->GetDocShell() )
478     {
479         // update linked sections
480         sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
481         if( nLinkMode != NEVER && bAsync &&
482             SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
483             m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
484 
485         if ( m_xDoc->GetDocShell()->IsLoading() )
486         {
487             // #i59688#
488             m_xDoc->GetDocShell()->LoadingFinished();
489         }
490     }
491 
492     delete m_pSttNdIdx;
493 
494     if( !m_aSetAttrTab.empty() )
495     {
496         OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
497         for ( const auto& rpAttr : m_aSetAttrTab )
498             delete rpAttr;
499         m_aSetAttrTab.clear();
500     }
501 
502     m_pCSS1Parser.reset();
503     m_pNumRuleInfo.reset();
504     DeleteFormImpl();
505     m_pFootEndNoteImpl.reset();
506 
507     OSL_ENSURE(!m_xTable, "It exists still an open table");
508     m_pImageMaps.reset();
509 
510     OSL_ENSURE( m_vPendingStack.empty(),
511             "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
512     m_vPendingStack.clear();
513 
514     m_xDoc.clear();
515 
516     if ( m_pTempViewFrame )
517     {
518         m_pTempViewFrame->DoClose();
519 
520         // the temporary view frame is hidden, so the hidden flag might need to be removed
521         if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
522             m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
523     }
524 }
525 
IMPL_LINK_NOARG(SwHTMLParser,AsyncCallback,void *,void)526 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
527 {
528     m_nEventId=nullptr;
529 
530     // #i47907# - If the document has already been destructed,
531     // the parser should be aware of this:
532     if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
533         || 1 == m_xDoc->getReferenceCount() )
534     {
535         // was the import aborted by SFX?
536         eState = SvParserState::Error;
537     }
538 
539     GetAsynchCallLink().Call(nullptr);
540 }
541 
CallParser()542 SvParserState SwHTMLParser::CallParser()
543 {
544     // create temporary index on position 0, so it won't be moved!
545     m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
546     if( !IsNewDoc() )       // insert into existing document ?
547     {
548         const SwPosition* pPos = m_pPam->GetPoint();
549 
550         m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
551 
552         *m_pSttNdIdx = pPos->nNode.GetIndex()-1;
553         m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
554 
555         SwPaM aInsertionRangePam( *pPos );
556 
557         m_pPam->Move( fnMoveBackward );
558 
559         // split any redline over the insertion point
560         aInsertionRangePam.SetMark();
561         *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
562         aInsertionRangePam.Move( fnMoveBackward );
563         m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
564 
565         m_xDoc->SetTextFormatColl( *m_pPam,
566                 m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
567     }
568 
569     if( GetMedium() )
570     {
571         if( !m_bViewCreated )
572         {
573             m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
574         }
575         else
576         {
577             m_bViewCreated = true;
578             m_nEventId = nullptr;
579         }
580     }
581     else // show progress bar
582     {
583         rInput.Seek(STREAM_SEEK_TO_END);
584         rInput.ResetError();
585 
586         m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
587 
588         rInput.Seek(STREAM_SEEK_TO_BEGIN);
589         rInput.ResetError();
590     }
591 
592     StartListening(m_xDoc->GetPageDesc( 0 ).GetNotifier());
593 
594     SvParserState eRet = HTMLParser::CallParser();
595     return eRet;
596 }
597 
CanRemoveNode(sal_uLong nNodeIdx) const598 bool SwHTMLParser::CanRemoveNode(sal_uLong nNodeIdx) const
599 {
600     const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
601     return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
602 }
603 
Continue(HtmlTokenId nToken)604 void SwHTMLParser::Continue( HtmlTokenId nToken )
605 {
606 #ifdef DBG_UTIL
607     OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
608     m_nContinue++;
609 #endif
610 
611     // When the import (of SFX) is aborted, an error will be set but
612     // we still continue, so that we clean up properly.
613     OSL_ENSURE( SvParserState::Error!=eState,
614             "SwHTMLParser::Continue: already set an error" );
615     if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
616         eState = SvParserState::Error;
617 
618     // Fetch SwViewShell from document, save it and set as current.
619     SwViewShell *pInitVSh = CallStartAction();
620 
621     if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
622     {
623         // At first call first return, show document and wait for callback
624         // time.
625         // At this point in CallParser only one digit was read and
626         // a SaveState(0) was called.
627         eState = SvParserState::Pending;
628         m_bViewCreated = true;
629         m_xDoc->SetInLoadAsynchron( true );
630 
631 #ifdef DBG_UTIL
632         m_nContinue--;
633 #endif
634 
635         return;
636     }
637 
638     m_bSetModEnabled = false;
639     if( m_xDoc->GetDocShell() )
640     {
641         m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
642         if( m_bSetModEnabled )
643         {
644             m_xDoc->GetDocShell()->EnableSetModified( false );
645         }
646     }
647 
648     // during import don't call OLE-Modified
649     Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
650     m_xDoc->SetOle2Link( Link<bool,void>() );
651 
652     bool bModified = m_xDoc->getIDocumentState().IsModified();
653     bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
654     m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
655 
656     // When the import will be aborted, don't call Continue anymore.
657     // If a Pending-Stack exists make sure the stack is ended with a call
658     // of NextToken.
659     if( SvParserState::Error == eState )
660     {
661         OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
662                 "SwHTMLParser::Continue: Pending-Stack without Token" );
663         if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
664             NextToken( m_vPendingStack.back().nToken );
665         OSL_ENSURE( m_vPendingStack.empty(),
666                 "SwHTMLParser::Continue: There is again a Pending-Stack" );
667     }
668     else
669     {
670         HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
671     }
672 
673     // disable progress bar again
674     m_xProgress.reset();
675 
676     bool bLFStripped = false;
677     if( SvParserState::Pending != GetStatus() )
678     {
679         // set the last attributes yet
680         {
681             if( !m_aScriptSource.isEmpty() )
682             {
683                 SwScriptFieldType *pType =
684                     static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
685 
686                 SwScriptField aField( pType, m_aScriptType, m_aScriptSource,
687                                     false );
688                 InsertAttr( SwFormatField( aField ), false );
689             }
690 
691             if( m_pAppletImpl )
692             {
693                 if( m_pAppletImpl->GetApplet().is() )
694                     EndApplet();
695                 else
696                     EndObject();
697             }
698 
699             // maybe remove an existing LF after the last paragraph
700             if( IsNewDoc() )
701                 bLFStripped = StripTrailingLF() > 0;
702 
703             // close still open numbering
704             while( GetNumInfo().GetNumRule() )
705                 EndNumberBulletList();
706 
707             OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
708             // try this twice, first normally to let m_nContextStMin decrease
709             // naturally and get contexts popped in desired order, and if that
710             // fails force it
711             for (int i = 0; i < 2; ++i)
712             {
713                 while (m_aContexts.size() > m_nContextStMin)
714                 {
715                     std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
716                     if (xCntxt)
717                         EndContext(xCntxt.get());
718                 }
719                 if (!m_nContextStMin)
720                     break;
721                 OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
722                 m_nContextStMin = 0;
723             }
724 
725             m_aParaAttrs.clear();
726 
727             SetAttr( false );
728 
729             // set the first delayed styles
730             m_pCSS1Parser->SetDelayedStyles();
731         }
732 
733         // again correct the start
734         if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
735         {
736             SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
737             SwNodeIndex aNxtIdx( *m_pSttNdIdx );
738             if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
739             {
740                 const sal_Int32 nStt = pTextNode->GetText().getLength();
741                 // when the cursor is still in the node, then set him at the end
742                 if( m_pPam->GetPoint()->nNode == aNxtIdx )
743                 {
744                     m_pPam->GetPoint()->nNode = *m_pSttNdIdx;
745                     m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt );
746                 }
747 
748 #if OSL_DEBUG_LEVEL > 0
749 // !!! shouldn't be possible, or ??
750                 OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(),
751                     "Pam.Bound1 is still in the node" );
752                 OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(),
753                     "Pam.Bound2 is still in the node" );
754 
755                 if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() )
756                 {
757                     const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex();
758                     m_pPam->GetBound().nContent.Assign( pTextNode,
759                                     pTextNode->GetText().getLength() + nCntPos );
760                 }
761                 if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() )
762                 {
763                     const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex();
764                     m_pPam->GetBound( false ).nContent.Assign( pTextNode,
765                                     pTextNode->GetText().getLength() + nCntPos );
766                 }
767 #endif
768                 // Keep character attribute!
769                 SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
770                 if (pTextNode->GetText().getLength())
771                     pDelNd->FormatToTextAttr( pTextNode );
772                 else
773                     pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
774                 pTextNode->JoinNext();
775             }
776         }
777     }
778 
779     if( SvParserState::Accepted == eState )
780     {
781         if( m_nMissingImgMaps )
782         {
783             // Some Image-Map relations are still missing.
784             // Maybe now the Image-Maps are there?
785             ConnectImageMaps();
786         }
787 
788         // now remove the last useless paragraph
789         SwPosition* pPos = m_pPam->GetPoint();
790         if( !pPos->nContent.GetIndex() && !bLFStripped )
791         {
792             SwTextNode* pCurrentNd;
793             sal_uLong nNodeIdx = pPos->nNode.GetIndex();
794 
795             bool bHasFlysOrMarks =
796                 HasCurrentParaFlys() || HasCurrentParaBookmarks( true );
797 
798             if( IsNewDoc() )
799             {
800                 if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx))
801                 {
802                     SwContentNode* pCNd = m_pPam->GetContentNode();
803                     if( pCNd && pCNd->StartOfSectionIndex()+2 <
804                         pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
805                     {
806                         SwViewShell *pVSh = CheckActionViewShell();
807                         SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
808                         if( pCursorSh &&
809                             pCursorSh->GetCursor()->GetPoint()
810                                    ->nNode.GetIndex() == nNodeIdx )
811                         {
812                             pCursorSh->MovePara(GoPrevPara, fnParaEnd );
813                             pCursorSh->SetMark();
814                             pCursorSh->ClearMark();
815                         }
816                         m_pPam->GetBound().nContent.Assign( nullptr, 0 );
817                         m_pPam->GetBound(false).nContent.Assign( nullptr, 0 );
818                         m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode );
819                     }
820                 }
821             }
822             else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
823             {
824                 if( pCurrentNd->CanJoinNext( &pPos->nNode ))
825                 {
826                     SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode();
827                     pPos->nContent.Assign( pNextNd, 0 );
828                     m_pPam->SetMark(); m_pPam->DeleteMark();
829                     pNextNd->JoinPrev();
830                 }
831                 else if (pCurrentNd->GetText().isEmpty())
832                 {
833                     pPos->nContent.Assign( nullptr, 0 );
834                     m_pPam->SetMark(); m_pPam->DeleteMark();
835                     m_xDoc->GetNodes().Delete( pPos->nNode );
836                     m_pPam->Move( fnMoveBackward );
837                 }
838             }
839         }
840 
841         // annul the SplitNode from the beginning
842         else if( !IsNewDoc() )
843         {
844             if( pPos->nContent.GetIndex() )                 // then there was no <p> at the end
845                 m_pPam->Move( fnMoveForward, GoInNode );    // therefore to the next
846             SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode();
847             SwNodeIndex aPrvIdx( pPos->nNode );
848             if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
849                 *m_pSttNdIdx <= aPrvIdx )
850             {
851                 // Normally here should take place a JoinNext, but all cursors and
852                 // so are registered in pTextNode, so that it MUST remain.
853 
854                 // Convert paragraph to character attribute, from Prev adopt
855                 // the paragraph attribute and the template!
856                 SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
857                 pTextNode->ChgFormatColl( pPrev->GetTextColl() );
858                 pTextNode->FormatToTextAttr( pPrev );
859                 pTextNode->ResetAllAttr();
860 
861                 if( pPrev->HasSwAttrSet() )
862                     pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
863 
864                 if( &m_pPam->GetBound().nNode.GetNode() == pPrev )
865                     m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
866                 if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev )
867                     m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
868 
869                 pTextNode->JoinPrev();
870             }
871         }
872 
873         // adjust AutoLoad in DocumentProperties
874         if (!utl::ConfigManager::IsFuzzing() && IsNewDoc())
875         {
876             SwDocShell *pDocShell(m_xDoc->GetDocShell());
877             OSL_ENSURE(pDocShell, "no SwDocShell");
878             if (pDocShell) {
879                 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
880                     pDocShell->GetModel(), uno::UNO_QUERY_THROW);
881                 uno::Reference<document::XDocumentProperties> xDocProps(
882                     xDPS->getDocumentProperties());
883                 OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
884                 if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
885                      (xDocProps->getAutoloadURL().isEmpty()) )
886                 {
887                     xDocProps->setAutoloadURL(m_aPathToFile);
888                 }
889             }
890         }
891 
892         if( m_bUpdateDocStat )
893         {
894             m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
895         }
896     }
897 
898     if( SvParserState::Pending != GetStatus() )
899     {
900         delete m_pSttNdIdx;
901         m_pSttNdIdx = nullptr;
902     }
903 
904     // should the parser be the last one who hold the document, then nothing
905     // has to be done anymore, document will be destroyed shortly!
906     if( 1 < m_xDoc->getReferenceCount() )
907     {
908         if( bWasUndo )
909         {
910             m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
911             m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
912         }
913         else if( !pInitVSh )
914         {
915             // When at the beginning of Continue no Shell was available,
916             // it's possible in the meantime one was created.
917             // In that case the bWasUndo flag is wrong and we must
918             // enable Undo.
919             SwViewShell *pTmpVSh = CheckActionViewShell();
920             if( pTmpVSh )
921             {
922                 m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
923             }
924         }
925 
926         m_xDoc->SetOle2Link( aOLELink );
927         if( !bModified )
928             m_xDoc->getIDocumentState().ResetModified();
929         if( m_bSetModEnabled && m_xDoc->GetDocShell() )
930         {
931             m_xDoc->GetDocShell()->EnableSetModified();
932             m_bSetModEnabled = false; // this is unnecessary here
933         }
934     }
935 
936     // When the Document-SwVievShell still exists and an Action is open
937     // (doesn't have to be by abort), end the Action, disconnect from Shell
938     // and finally reconstruct the old Shell.
939     CallEndAction( true );
940 
941 #ifdef DBG_UTIL
942     m_nContinue--;
943 #endif
944 }
945 
Notify(const SfxHint & rHint)946 void SwHTMLParser::Notify(const SfxHint& rHint)
947 {
948     if(rHint.GetId() == SfxHintId::Dying)
949     {
950         EndListeningAll();
951         ReleaseRef();
952     }
953 }
954 
DocumentDetected()955 void SwHTMLParser::DocumentDetected()
956 {
957     OSL_ENSURE( !m_bDocInitialized, "DocumentDetected called multiple times" );
958     m_bDocInitialized = true;
959     if( IsNewDoc() )
960     {
961         if( IsInHeader() )
962             FinishHeader();
963 
964         CallEndAction( true );
965 
966         m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
967         // For DocumentDetected in general a SwViewShell is created.
968         // But it also can be created later, in case the UI is captured.
969         CallStartAction();
970     }
971 }
972 
973 // is called for every token that is recognised in CallParser
NextToken(HtmlTokenId nToken)974 void SwHTMLParser::NextToken( HtmlTokenId nToken )
975 {
976     if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
977         || 1 == m_xDoc->getReferenceCount() )
978     {
979         // Was the import cancelled by SFX? If a pending stack
980         // exists, clean it.
981         eState = SvParserState::Error;
982         OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
983                 "SwHTMLParser::NextToken: Pending-Stack without token" );
984         if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
985             return ;
986     }
987 
988 #if OSL_DEBUG_LEVEL > 0
989     if( !m_vPendingStack.empty() )
990     {
991         switch( nToken )
992         {
993             // tables are read by recursive method calls
994         case HtmlTokenId::TABLE_ON:
995             // For CSS declarations we might have to wait
996             // for a file download to finish
997         case HtmlTokenId::LINK:
998             // For controls we might have to set the size.
999         case HtmlTokenId::INPUT:
1000         case HtmlTokenId::TEXTAREA_ON:
1001         case HtmlTokenId::SELECT_ON:
1002         case HtmlTokenId::SELECT_OFF:
1003             break;
1004         default:
1005             OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
1006             break;
1007         }
1008     }
1009 #endif
1010 
1011     // The following special cases have to be treated before the
1012     // filter detection, because Netscape doesn't reference the content
1013     // of the title for filter detection either.
1014     if( m_vPendingStack.empty() )
1015     {
1016         if( m_bInTitle )
1017         {
1018             switch( nToken )
1019             {
1020             case HtmlTokenId::TITLE_OFF:
1021             {
1022                 OUString sTitle = m_sTitle.makeStringAndClear();
1023                 if( IsNewDoc() && !sTitle.isEmpty() )
1024                 {
1025                     if( m_xDoc->GetDocShell() ) {
1026                         uno::Reference<document::XDocumentPropertiesSupplier>
1027                             xDPS(m_xDoc->GetDocShell()->GetModel(),
1028                             uno::UNO_QUERY_THROW);
1029                         uno::Reference<document::XDocumentProperties> xDocProps(
1030                             xDPS->getDocumentProperties());
1031                         OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1032                         if (xDocProps.is()) {
1033                             xDocProps->setTitle(sTitle);
1034                         }
1035 
1036                         m_xDoc->GetDocShell()->SetTitle(sTitle);
1037                     }
1038                 }
1039                 m_bInTitle = false;
1040                 break;
1041             }
1042 
1043             case HtmlTokenId::NONBREAKSPACE:
1044                 m_sTitle.append(" ");
1045                 break;
1046 
1047             case HtmlTokenId::SOFTHYPH:
1048                 m_sTitle.append("-");
1049                 break;
1050 
1051             case HtmlTokenId::TEXTTOKEN:
1052                 m_sTitle.append(aToken);
1053                 break;
1054 
1055             default:
1056                 m_sTitle.append("<");
1057                 if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1058                     m_sTitle.append("/");
1059                 m_sTitle.append(sSaveToken);
1060                 if( !aToken.isEmpty() )
1061                 {
1062                     m_sTitle.append(" ");
1063                     m_sTitle.append(aToken);
1064                 }
1065                 m_sTitle.append(">");
1066                 break;
1067             }
1068 
1069             return;
1070         }
1071     }
1072 
1073     // Find out what type of document it is if we don't know already.
1074     // For Controls this has to be finished before the control is inserted
1075     // because for inserting a View is needed.
1076     if( !m_bDocInitialized )
1077         DocumentDetected();
1078 
1079     bool bGetIDOption = false, bInsertUnknown = false;
1080     bool bUpperSpaceSave = m_bUpperSpace;
1081     m_bUpperSpace = false;
1082 
1083     // The following special cases may or have to be treated after the
1084     // filter detection
1085     if( m_vPendingStack.empty() )
1086     {
1087         if( m_bInFloatingFrame )
1088         {
1089             // <SCRIPT> is ignored here (from us), because it is ignored in
1090             // Applets as well
1091             if( HtmlTokenId::IFRAME_OFF == nToken )
1092             {
1093                 m_bCallNextToken = false;
1094                 m_bInFloatingFrame = false;
1095             }
1096 
1097             return;
1098         }
1099         else if( m_bInNoEmbed )
1100         {
1101             switch( nToken )
1102             {
1103             case HtmlTokenId::NOEMBED_OFF:
1104                 m_aContents = convertLineEnd(m_aContents, GetSystemLineEnd());
1105                 InsertComment( m_aContents, OOO_STRING_SVTOOLS_HTML_noembed );
1106                 m_aContents.clear();
1107                 m_bCallNextToken = false;
1108                 m_bInNoEmbed = false;
1109                 break;
1110 
1111             case HtmlTokenId::RAWDATA:
1112                 InsertCommentText( OOO_STRING_SVTOOLS_HTML_noembed );
1113                 break;
1114 
1115             default:
1116                 OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1117                 break;
1118             }
1119 
1120             return;
1121         }
1122         else if( m_pAppletImpl )
1123         {
1124             // in an applet only <PARAM> tags and the </APPLET> tag
1125             // are of interest for us (for the moment)
1126             // <SCRIPT> is ignored here (from Netscape)!
1127 
1128             switch( nToken )
1129             {
1130             case HtmlTokenId::APPLET_OFF:
1131                 m_bCallNextToken = false;
1132                 EndApplet();
1133                 break;
1134             case HtmlTokenId::OBJECT_OFF:
1135                 m_bCallNextToken = false;
1136                 EndObject();
1137                 break;
1138             case HtmlTokenId::PARAM:
1139                 InsertParam();
1140                 break;
1141             default: break;
1142             }
1143 
1144             return;
1145         }
1146         else if( m_bTextArea )
1147         {
1148             // in a TextArea everything up to </TEXTAREA> is inserted as text.
1149             // <SCRIPT> is ignored here (from Netscape)!
1150 
1151             switch( nToken )
1152             {
1153             case HtmlTokenId::TEXTAREA_OFF:
1154                 m_bCallNextToken = false;
1155                 EndTextArea();
1156                 break;
1157 
1158             default:
1159                 InsertTextAreaText( nToken );
1160                 break;
1161             }
1162 
1163             return;
1164         }
1165         else if( m_bSelect )
1166         {
1167             // HAS to be treated after bNoScript!
1168             switch( nToken )
1169             {
1170             case HtmlTokenId::SELECT_OFF:
1171                 m_bCallNextToken = false;
1172                 EndSelect();
1173                 return;
1174 
1175             case HtmlTokenId::OPTION:
1176                 InsertSelectOption();
1177                 return;
1178 
1179             case HtmlTokenId::TEXTTOKEN:
1180                 InsertSelectText();
1181                 return;
1182 
1183             case HtmlTokenId::INPUT:
1184             case HtmlTokenId::SCRIPT_ON:
1185             case HtmlTokenId::SCRIPT_OFF:
1186             case HtmlTokenId::NOSCRIPT_ON:
1187             case HtmlTokenId::NOSCRIPT_OFF:
1188             case HtmlTokenId::RAWDATA:
1189                 // treat in normal switch
1190                 break;
1191 
1192             default:
1193                 // ignore
1194                 return;
1195             }
1196         }
1197         else if( m_pMarquee )
1198         {
1199             // in a TextArea everything up to </TEXTAREA> is inserted as text.
1200             // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1201             // script.
1202             switch( nToken )
1203             {
1204             case HtmlTokenId::MARQUEE_OFF:
1205                 m_bCallNextToken = false;
1206                 EndMarquee();
1207                 break;
1208 
1209             case HtmlTokenId::TEXTTOKEN:
1210                 InsertMarqueeText();
1211                 break;
1212             default: break;
1213             }
1214 
1215             return;
1216         }
1217         else if( m_bInField )
1218         {
1219             switch( nToken )
1220             {
1221             case HtmlTokenId::SDFIELD_OFF:
1222                 m_bCallNextToken = false;
1223                 EndField();
1224                 break;
1225 
1226             case HtmlTokenId::TEXTTOKEN:
1227                 InsertFieldText();
1228                 break;
1229             default: break;
1230             }
1231 
1232             return;
1233         }
1234         else if( m_bInFootEndNoteAnchor || m_bInFootEndNoteSymbol )
1235         {
1236             switch( nToken )
1237             {
1238             case HtmlTokenId::ANCHOR_OFF:
1239                 EndAnchor();
1240                 m_bCallNextToken = false;
1241                 break;
1242 
1243             case HtmlTokenId::TEXTTOKEN:
1244                 InsertFootEndNoteText();
1245                 break;
1246             default: break;
1247             }
1248             return;
1249         }
1250         else if( !m_aUnknownToken.isEmpty() )
1251         {
1252             // Paste content of unknown tags.
1253             // (but surely if we are not in the header section) fdo#36080 fdo#34666
1254             if (!aToken.isEmpty() && !IsInHeader() )
1255             {
1256                 if( !m_bDocInitialized )
1257                     DocumentDetected();
1258                 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1259 
1260                 // if there are temporary paragraph attributes and the
1261                 // paragraph isn't empty then the paragraph attributes
1262                 // are final.
1263                 m_aParaAttrs.clear();
1264 
1265                 SetAttr();
1266             }
1267 
1268             // Unknown token in the header are only closed by a matching
1269             // end-token, </HEAD> or <BODY>. Text inside is ignored.
1270             switch( nToken )
1271             {
1272             case HtmlTokenId::UNKNOWNCONTROL_OFF:
1273                 if( m_aUnknownToken != sSaveToken )
1274                     return;
1275                 [[fallthrough]];
1276             case HtmlTokenId::FRAMESET_ON:
1277             case HtmlTokenId::HEAD_OFF:
1278             case HtmlTokenId::BODY_ON:
1279             case HtmlTokenId::IMAGE:        // Don't know why Netscape acts this way.
1280                 m_aUnknownToken.clear();
1281                 break;
1282             case HtmlTokenId::TEXTTOKEN:
1283                 return;
1284             default:
1285                 m_aUnknownToken.clear();
1286                 break;
1287             }
1288         }
1289     }
1290 
1291     switch( nToken )
1292     {
1293     case HtmlTokenId::BODY_ON:
1294         if (!m_bBodySeen)
1295         {
1296             m_bBodySeen = true;
1297             if( !m_aStyleSource.isEmpty() )
1298             {
1299                 m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1300                 m_aStyleSource.clear();
1301             }
1302             if( IsNewDoc() )
1303             {
1304                 InsertBodyOptions();
1305                 // If there is a template for the first or the right page,
1306                 // it is set here.
1307                 const SwPageDesc *pPageDesc = nullptr;
1308                 if( m_pCSS1Parser->IsSetFirstPageDesc() )
1309                     pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1310                 else if( m_pCSS1Parser->IsSetRightPageDesc() )
1311                     pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1312 
1313                 if( pPageDesc )
1314                 {
1315                     m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1316                 }
1317             }
1318         }
1319         break;
1320 
1321     case HtmlTokenId::LINK:
1322         InsertLink();
1323         break;
1324 
1325     case HtmlTokenId::BASE:
1326         {
1327             const HTMLOptions& rHTMLOptions = GetOptions();
1328             for (size_t i = rHTMLOptions.size(); i; )
1329             {
1330                 const HTMLOption& rOption = rHTMLOptions[--i];
1331                 switch( rOption.GetToken() )
1332                 {
1333                 case HtmlOptionId::HREF:
1334                     m_sBaseURL = rOption.GetString();
1335                     break;
1336                 case HtmlOptionId::TARGET:
1337                     if( IsNewDoc() )
1338                     {
1339                         SwDocShell *pDocShell(m_xDoc->GetDocShell());
1340                         OSL_ENSURE(pDocShell, "no SwDocShell");
1341                         if (pDocShell) {
1342                             uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1343                                 pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1344                             uno::Reference<document::XDocumentProperties>
1345                                 xDocProps(xDPS->getDocumentProperties());
1346                             OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1347                             if (xDocProps.is()) {
1348                                 xDocProps->setDefaultTarget(
1349                                     rOption.GetString());
1350                             }
1351                         }
1352                     }
1353                     break;
1354                 default: break;
1355                 }
1356             }
1357         }
1358         break;
1359 
1360     case HtmlTokenId::META:
1361         {
1362             SvKeyValueIterator *pHTTPHeader = nullptr;
1363             if( IsNewDoc() )
1364             {
1365                 SwDocShell *pDocSh = m_xDoc->GetDocShell();
1366                 if( pDocSh )
1367                     pHTTPHeader = pDocSh->GetHeaderAttributes();
1368             }
1369             SwDocShell *pDocShell(m_xDoc->GetDocShell());
1370             OSL_ENSURE(pDocShell, "no SwDocShell");
1371             if (pDocShell)
1372             {
1373                 uno::Reference<document::XDocumentProperties> xDocProps;
1374                 if (IsNewDoc())
1375                 {
1376                     const uno::Reference<document::XDocumentPropertiesSupplier>
1377                         xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1378                     xDocProps = xDPS->getDocumentProperties();
1379                     OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1380                 }
1381                 ParseMetaOptions( xDocProps, pHTTPHeader );
1382             }
1383         }
1384         break;
1385 
1386     case HtmlTokenId::TITLE_ON:
1387         m_bInTitle = true;
1388         break;
1389 
1390     case HtmlTokenId::SCRIPT_ON:
1391         NewScript();
1392         break;
1393 
1394     case HtmlTokenId::SCRIPT_OFF:
1395         EndScript();
1396         break;
1397 
1398     case HtmlTokenId::NOSCRIPT_ON:
1399     case HtmlTokenId::NOSCRIPT_OFF:
1400         bInsertUnknown = true;
1401         break;
1402 
1403     case HtmlTokenId::STYLE_ON:
1404         NewStyle();
1405         break;
1406 
1407     case HtmlTokenId::STYLE_OFF:
1408         EndStyle();
1409         break;
1410 
1411     case HtmlTokenId::RAWDATA:
1412         if( !m_bIgnoreRawData )
1413         {
1414             if( IsReadScript() )
1415             {
1416                 AddScriptSource();
1417             }
1418             else if( IsReadStyle() )
1419             {
1420                 if( !m_aStyleSource.isEmpty() )
1421                     m_aStyleSource += "\n";
1422                 m_aStyleSource += aToken;
1423             }
1424         }
1425         break;
1426 
1427     case HtmlTokenId::OBJECT_ON:
1428         if (m_bXHTML)
1429         {
1430             if (!InsertEmbed())
1431                 InsertImage();
1432             break;
1433         }
1434 #if HAVE_FEATURE_JAVA
1435         NewObject();
1436         m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1437 #endif
1438         break;
1439 
1440     case HtmlTokenId::OBJECT_OFF:
1441         if (!m_aEmbeds.empty())
1442             m_aEmbeds.pop();
1443         break;
1444 
1445     case HtmlTokenId::APPLET_ON:
1446 #if HAVE_FEATURE_JAVA
1447         InsertApplet();
1448         m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1449 #endif
1450         break;
1451 
1452     case HtmlTokenId::IFRAME_ON:
1453         InsertFloatingFrame();
1454         m_bCallNextToken = m_bInFloatingFrame && m_xTable;
1455         break;
1456 
1457     case HtmlTokenId::LINEBREAK:
1458         if( !IsReadPRE() )
1459         {
1460             InsertLineBreak();
1461             break;
1462         }
1463         else
1464             bGetIDOption = true;
1465             // <BR>s in <PRE> resemble true LFs, hence no break
1466         [[fallthrough]];
1467 
1468     case HtmlTokenId::NEWPARA:
1469         // CR in PRE/LISTING/XMP
1470         {
1471             if( HtmlTokenId::NEWPARA==nToken ||
1472                 m_pPam->GetPoint()->nContent.GetIndex() )
1473             {
1474                 AppendTextNode(); // there is no LF at this place
1475                                  // therefore it will cause no problems
1476                 SetTextCollAttrs();
1477             }
1478             // progress bar
1479             if (m_xProgress)
1480                 m_xProgress->Update(rInput.Tell());
1481         }
1482         break;
1483 
1484     case HtmlTokenId::NONBREAKSPACE:
1485         m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1486         break;
1487 
1488     case HtmlTokenId::SOFTHYPH:
1489         m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1490         break;
1491 
1492     case HtmlTokenId::LINEFEEDCHAR:
1493         if( m_pPam->GetPoint()->nContent.GetIndex() )
1494             AppendTextNode();
1495         if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode))
1496         {
1497             NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1498             EndAttr( m_xAttrTab->pBreak, false );
1499         }
1500         break;
1501 
1502     case HtmlTokenId::TEXTTOKEN:
1503         // insert string without spanning attributes at the end.
1504         if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1505         {
1506             sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex();
1507             const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
1508             if (pTextNode)
1509             {
1510                 const OUString& rText = pTextNode->GetText();
1511                 sal_Unicode cLast = rText[--nPos];
1512                 if( ' ' == cLast || '\x0a' == cLast)
1513                     aToken = aToken.copy(1);
1514             }
1515             else
1516                 aToken = aToken.copy(1);
1517 
1518             if( aToken.isEmpty() )
1519             {
1520                 m_bUpperSpace = bUpperSpaceSave;
1521                 break;
1522             }
1523         }
1524 
1525         if( !aToken.isEmpty() )
1526         {
1527             if( !m_bDocInitialized )
1528                 DocumentDetected();
1529 
1530             if (!m_aEmbeds.empty())
1531             {
1532                 // The text token is inside an OLE object, which means
1533                 // alternate text.
1534                 SwOLENode* pOLENode = m_aEmbeds.top();
1535                 if (SwFlyFrameFormat* pFormat
1536                     = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1537                 {
1538                     if (SdrObject* pObject = SwXFrame::GetOrCreateSdrObject(*pFormat))
1539                     {
1540                         pObject->SetTitle(pObject->GetTitle() + aToken);
1541                         break;
1542                     }
1543                 }
1544             }
1545 
1546             m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
1547 
1548             // if there are temporary paragraph attributes and the
1549             // paragraph isn't empty then the paragraph attributes
1550             // are final.
1551             m_aParaAttrs.clear();
1552 
1553             SetAttr();
1554         }
1555         break;
1556 
1557     case HtmlTokenId::HORZRULE:
1558         InsertHorzRule();
1559         break;
1560 
1561     case HtmlTokenId::IMAGE:
1562         InsertImage();
1563         // if only the parser references the doc, we can break and set
1564         // an error code
1565         if( 1 == m_xDoc->getReferenceCount() )
1566         {
1567             eState = SvParserState::Error;
1568         }
1569         break;
1570 
1571     case HtmlTokenId::SPACER:
1572         InsertSpacer();
1573         break;
1574 
1575     case HtmlTokenId::EMBED:
1576         InsertEmbed();
1577         break;
1578 
1579     case HtmlTokenId::NOEMBED_ON:
1580         m_bInNoEmbed = true;
1581         m_bCallNextToken = bool(m_xTable);
1582         ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1583         break;
1584 
1585     case HtmlTokenId::DEFLIST_ON:
1586         if( m_nOpenParaToken != HtmlTokenId::NONE )
1587             EndPara();
1588         NewDefList();
1589         break;
1590     case HtmlTokenId::DEFLIST_OFF:
1591         if( m_nOpenParaToken != HtmlTokenId::NONE )
1592             EndPara();
1593         EndDefListItem( HtmlTokenId::NONE );
1594         EndDefList();
1595         break;
1596 
1597     case HtmlTokenId::DD_ON:
1598     case HtmlTokenId::DT_ON:
1599         if( m_nOpenParaToken != HtmlTokenId::NONE )
1600             EndPara();
1601         EndDefListItem();// close <DD>/<DT> and set no template
1602         NewDefListItem( nToken );
1603         break;
1604 
1605     case HtmlTokenId::DD_OFF:
1606     case HtmlTokenId::DT_OFF:
1607         // c.f. HtmlTokenId::LI_OFF
1608         // Actually we should close a DD/DT now.
1609         // But neither Netscape nor Microsoft do this and so don't we.
1610         EndDefListItem( nToken );
1611         break;
1612 
1613     // divisions
1614     case HtmlTokenId::DIVISION_ON:
1615     case HtmlTokenId::CENTER_ON:
1616         if (!m_isInTableStructure)
1617         {
1618             if (m_nOpenParaToken != HtmlTokenId::NONE)
1619             {
1620                 if (IsReadPRE())
1621                     m_nOpenParaToken = HtmlTokenId::NONE;
1622                 else
1623                     EndPara();
1624             }
1625             NewDivision( nToken );
1626         }
1627         break;
1628 
1629     case HtmlTokenId::DIVISION_OFF:
1630     case HtmlTokenId::CENTER_OFF:
1631         if (!m_isInTableStructure)
1632         {
1633             if (m_nOpenParaToken != HtmlTokenId::NONE)
1634             {
1635                 if (IsReadPRE())
1636                     m_nOpenParaToken = HtmlTokenId::NONE;
1637                 else
1638                     EndPara();
1639             }
1640             EndDivision();
1641         }
1642         break;
1643 
1644     case HtmlTokenId::MULTICOL_ON:
1645         if( m_nOpenParaToken != HtmlTokenId::NONE )
1646             EndPara();
1647         NewMultiCol();
1648         break;
1649 
1650     case HtmlTokenId::MULTICOL_OFF:
1651         if( m_nOpenParaToken != HtmlTokenId::NONE )
1652             EndPara();
1653         EndTag( HtmlTokenId::MULTICOL_ON );
1654         break;
1655 
1656     case HtmlTokenId::MARQUEE_ON:
1657         NewMarquee();
1658         m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1659         break;
1660 
1661     case HtmlTokenId::FORM_ON:
1662         NewForm();
1663         break;
1664     case HtmlTokenId::FORM_OFF:
1665         EndForm();
1666         break;
1667 
1668     // templates
1669     case HtmlTokenId::PARABREAK_ON:
1670         if( m_nOpenParaToken != HtmlTokenId::NONE )
1671             EndPara( true );
1672         NewPara();
1673         break;
1674 
1675     case HtmlTokenId::PARABREAK_OFF:
1676         EndPara( true );
1677         break;
1678 
1679     case HtmlTokenId::ADDRESS_ON:
1680         if( m_nOpenParaToken != HtmlTokenId::NONE )
1681             EndPara();
1682         NewTextFormatColl(HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SEND_ADDRESS);
1683         break;
1684 
1685     case HtmlTokenId::ADDRESS_OFF:
1686         if( m_nOpenParaToken != HtmlTokenId::NONE )
1687             EndPara();
1688         EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1689         break;
1690 
1691     case HtmlTokenId::BLOCKQUOTE_ON:
1692     case HtmlTokenId::BLOCKQUOTE30_ON:
1693         if( m_nOpenParaToken != HtmlTokenId::NONE )
1694             EndPara();
1695         NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1696         break;
1697 
1698     case HtmlTokenId::BLOCKQUOTE_OFF:
1699     case HtmlTokenId::BLOCKQUOTE30_OFF:
1700         if( m_nOpenParaToken != HtmlTokenId::NONE )
1701             EndPara();
1702         EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1703         break;
1704 
1705     case HtmlTokenId::PREFORMTXT_ON:
1706     case HtmlTokenId::LISTING_ON:
1707     case HtmlTokenId::XMP_ON:
1708         if( m_nOpenParaToken != HtmlTokenId::NONE )
1709             EndPara();
1710         NewTextFormatColl( nToken, RES_POOLCOLL_HTML_PRE );
1711         break;
1712 
1713     case HtmlTokenId::PREFORMTXT_OFF:
1714         m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1715         EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1716         break;
1717 
1718     case HtmlTokenId::LISTING_OFF:
1719     case HtmlTokenId::XMP_OFF:
1720         EndTextFormatColl( nToken );
1721         break;
1722 
1723     case HtmlTokenId::HEAD1_ON:
1724     case HtmlTokenId::HEAD2_ON:
1725     case HtmlTokenId::HEAD3_ON:
1726     case HtmlTokenId::HEAD4_ON:
1727     case HtmlTokenId::HEAD5_ON:
1728     case HtmlTokenId::HEAD6_ON:
1729         if( m_nOpenParaToken != HtmlTokenId::NONE )
1730         {
1731             if( IsReadPRE() )
1732                 m_nOpenParaToken = HtmlTokenId::NONE;
1733             else
1734                 EndPara();
1735         }
1736         NewHeading( nToken );
1737         break;
1738 
1739     case HtmlTokenId::HEAD1_OFF:
1740     case HtmlTokenId::HEAD2_OFF:
1741     case HtmlTokenId::HEAD3_OFF:
1742     case HtmlTokenId::HEAD4_OFF:
1743     case HtmlTokenId::HEAD5_OFF:
1744     case HtmlTokenId::HEAD6_OFF:
1745         EndHeading();
1746         break;
1747 
1748     case HtmlTokenId::TABLE_ON:
1749         if( !m_vPendingStack.empty() )
1750             BuildTable( SvxAdjust::End );
1751         else
1752         {
1753             if( m_nOpenParaToken != HtmlTokenId::NONE )
1754                 EndPara();
1755             OSL_ENSURE(!m_xTable, "table in table not allowed here");
1756             if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) &&
1757                 (m_pPam->GetPoint()->nNode.GetIndex() >
1758                             m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1759                 !m_pPam->GetNode().FindFootnoteStartNode() ) )
1760             {
1761                 if ( m_nParaCnt < 5 )
1762                     Show();     // show what we have up to here
1763 
1764                 SvxAdjust eAdjust = m_xAttrTab->pAdjust
1765                     ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1766                                              GetAdjust()
1767                     : SvxAdjust::End;
1768                 BuildTable( eAdjust );
1769             }
1770             else
1771                 bInsertUnknown = m_bKeepUnknown;
1772         }
1773         break;
1774 
1775     // lists
1776     case HtmlTokenId::DIRLIST_ON:
1777     case HtmlTokenId::MENULIST_ON:
1778     case HtmlTokenId::ORDERLIST_ON:
1779     case HtmlTokenId::UNORDERLIST_ON:
1780         if( m_nOpenParaToken != HtmlTokenId::NONE )
1781             EndPara();
1782         NewNumberBulletList( nToken );
1783         break;
1784 
1785     case HtmlTokenId::DIRLIST_OFF:
1786     case HtmlTokenId::MENULIST_OFF:
1787     case HtmlTokenId::ORDERLIST_OFF:
1788     case HtmlTokenId::UNORDERLIST_OFF:
1789         if( m_nOpenParaToken != HtmlTokenId::NONE )
1790             EndPara();
1791         EndNumberBulletListItem( HtmlTokenId::NONE, true );
1792         EndNumberBulletList( nToken );
1793         break;
1794 
1795     case HtmlTokenId::LI_ON:
1796     case HtmlTokenId::LISTHEADER_ON:
1797         if( m_nOpenParaToken != HtmlTokenId::NONE &&
1798             (m_pPam->GetPoint()->nContent.GetIndex()
1799             || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1800         {
1801             // only finish paragraph for <P><LI>, not for <DD><LI>
1802             EndPara();
1803         }
1804 
1805         EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1806         NewNumberBulletListItem( nToken );
1807         break;
1808 
1809     case HtmlTokenId::LI_OFF:
1810     case HtmlTokenId::LISTHEADER_OFF:
1811         EndNumberBulletListItem( nToken, false );
1812         break;
1813 
1814     // Attribute :
1815     case HtmlTokenId::ITALIC_ON:
1816         {
1817             SvxPostureItem aPosture( ITALIC_NORMAL, RES_CHRATR_POSTURE );
1818             SvxPostureItem aPostureCJK( ITALIC_NORMAL, RES_CHRATR_CJK_POSTURE );
1819             SvxPostureItem aPostureCTL( ITALIC_NORMAL, RES_CHRATR_CTL_POSTURE );
1820             NewStdAttr( HtmlTokenId::ITALIC_ON,
1821                            &m_xAttrTab->pItalic, aPosture,
1822                            &m_xAttrTab->pItalicCJK, &aPostureCJK,
1823                            &m_xAttrTab->pItalicCTL, &aPostureCTL );
1824         }
1825         break;
1826 
1827     case HtmlTokenId::BOLD_ON:
1828         {
1829             SvxWeightItem aWeight( WEIGHT_BOLD, RES_CHRATR_WEIGHT );
1830             SvxWeightItem aWeightCJK( WEIGHT_BOLD, RES_CHRATR_CJK_WEIGHT );
1831             SvxWeightItem aWeightCTL( WEIGHT_BOLD, RES_CHRATR_CTL_WEIGHT );
1832             NewStdAttr( HtmlTokenId::BOLD_ON,
1833                         &m_xAttrTab->pBold, aWeight,
1834                         &m_xAttrTab->pBoldCJK, &aWeightCJK,
1835                         &m_xAttrTab->pBoldCTL, &aWeightCTL );
1836         }
1837         break;
1838 
1839     case HtmlTokenId::STRIKE_ON:
1840     case HtmlTokenId::STRIKETHROUGH_ON:
1841         {
1842             NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1843                         SvxCrossedOutItem(STRIKEOUT_SINGLE, RES_CHRATR_CROSSEDOUT) );
1844         }
1845         break;
1846 
1847     case HtmlTokenId::UNDERLINE_ON:
1848         {
1849             NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1850                         SvxUnderlineItem(LINESTYLE_SINGLE, RES_CHRATR_UNDERLINE) );
1851         }
1852         break;
1853 
1854     case HtmlTokenId::SUPERSCRIPT_ON:
1855         {
1856             NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1857                         SvxEscapementItem(HTML_ESC_SUPER,HTML_ESC_PROP, RES_CHRATR_ESCAPEMENT) );
1858         }
1859         break;
1860 
1861     case HtmlTokenId::SUBSCRIPT_ON:
1862         {
1863             NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1864                         SvxEscapementItem(HTML_ESC_SUB,HTML_ESC_PROP, RES_CHRATR_ESCAPEMENT) );
1865         }
1866         break;
1867 
1868     case HtmlTokenId::BLINK_ON:
1869         {
1870             NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1871                         SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1872         }
1873         break;
1874 
1875     case HtmlTokenId::SPAN_ON:
1876         NewStdAttr( HtmlTokenId::SPAN_ON );
1877         break;
1878 
1879     case HtmlTokenId::ITALIC_OFF:
1880     case HtmlTokenId::BOLD_OFF:
1881     case HtmlTokenId::STRIKE_OFF:
1882     case HtmlTokenId::UNDERLINE_OFF:
1883     case HtmlTokenId::SUPERSCRIPT_OFF:
1884     case HtmlTokenId::SUBSCRIPT_OFF:
1885     case HtmlTokenId::BLINK_OFF:
1886     case HtmlTokenId::SPAN_OFF:
1887         EndTag( nToken );
1888         break;
1889 
1890     case HtmlTokenId::STRIKETHROUGH_OFF:
1891         EndTag( HtmlTokenId::STRIKE_OFF );
1892         break;
1893 
1894     case HtmlTokenId::BASEFONT_ON:
1895         NewBasefontAttr();
1896         break;
1897     case HtmlTokenId::BASEFONT_OFF:
1898         EndBasefontAttr();
1899         break;
1900     case HtmlTokenId::FONT_ON:
1901     case HtmlTokenId::BIGPRINT_ON:
1902     case HtmlTokenId::SMALLPRINT_ON:
1903         NewFontAttr( nToken );
1904         break;
1905     case HtmlTokenId::FONT_OFF:
1906     case HtmlTokenId::BIGPRINT_OFF:
1907     case HtmlTokenId::SMALLPRINT_OFF:
1908         EndFontAttr( nToken );
1909         break;
1910 
1911     case HtmlTokenId::EMPHASIS_ON:
1912     case HtmlTokenId::CITATION_ON:
1913     case HtmlTokenId::STRONG_ON:
1914     case HtmlTokenId::CODE_ON:
1915     case HtmlTokenId::SAMPLE_ON:
1916     case HtmlTokenId::KEYBOARD_ON:
1917     case HtmlTokenId::VARIABLE_ON:
1918     case HtmlTokenId::DEFINSTANCE_ON:
1919     case HtmlTokenId::SHORTQUOTE_ON:
1920     case HtmlTokenId::LANGUAGE_ON:
1921     case HtmlTokenId::AUTHOR_ON:
1922     case HtmlTokenId::PERSON_ON:
1923     case HtmlTokenId::ACRONYM_ON:
1924     case HtmlTokenId::ABBREVIATION_ON:
1925     case HtmlTokenId::INSERTEDTEXT_ON:
1926     case HtmlTokenId::DELETEDTEXT_ON:
1927 
1928     case HtmlTokenId::TELETYPE_ON:
1929         NewCharFormat( nToken );
1930         break;
1931 
1932     case HtmlTokenId::SDFIELD_ON:
1933         NewField();
1934         m_bCallNextToken = m_bInField && m_xTable;
1935         break;
1936 
1937     case HtmlTokenId::EMPHASIS_OFF:
1938     case HtmlTokenId::CITATION_OFF:
1939     case HtmlTokenId::STRONG_OFF:
1940     case HtmlTokenId::CODE_OFF:
1941     case HtmlTokenId::SAMPLE_OFF:
1942     case HtmlTokenId::KEYBOARD_OFF:
1943     case HtmlTokenId::VARIABLE_OFF:
1944     case HtmlTokenId::DEFINSTANCE_OFF:
1945     case HtmlTokenId::SHORTQUOTE_OFF:
1946     case HtmlTokenId::LANGUAGE_OFF:
1947     case HtmlTokenId::AUTHOR_OFF:
1948     case HtmlTokenId::PERSON_OFF:
1949     case HtmlTokenId::ACRONYM_OFF:
1950     case HtmlTokenId::ABBREVIATION_OFF:
1951     case HtmlTokenId::INSERTEDTEXT_OFF:
1952     case HtmlTokenId::DELETEDTEXT_OFF:
1953 
1954     case HtmlTokenId::TELETYPE_OFF:
1955         EndTag( nToken );
1956         break;
1957 
1958     case HtmlTokenId::HEAD_OFF:
1959         if( !m_aStyleSource.isEmpty() )
1960         {
1961             m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1962             m_aStyleSource.clear();
1963         }
1964         break;
1965 
1966     case HtmlTokenId::DOCTYPE:
1967     case HtmlTokenId::BODY_OFF:
1968     case HtmlTokenId::HTML_OFF:
1969     case HtmlTokenId::HEAD_ON:
1970     case HtmlTokenId::TITLE_OFF:
1971         break;      // don't evaluate further???
1972     case HtmlTokenId::HTML_ON:
1973         {
1974             const HTMLOptions& rHTMLOptions = GetOptions();
1975             for (size_t i = rHTMLOptions.size(); i; )
1976             {
1977                 const HTMLOption& rOption = rHTMLOptions[--i];
1978                 if( HtmlOptionId::DIR == rOption.GetToken() )
1979                 {
1980                     const OUString& rDir = rOption.GetString();
1981                     SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
1982                                          m_pCSS1Parser->GetWhichMap() );
1983                     SvxCSS1PropertyInfo aPropInfo;
1984                     OUString aDummy;
1985                     ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
1986                                        aPropInfo, nullptr, &rDir );
1987 
1988                     m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
1989                     break;
1990                 }
1991             }
1992         }
1993         break;
1994 
1995     case HtmlTokenId::INPUT:
1996         InsertInput();
1997         break;
1998 
1999     case HtmlTokenId::TEXTAREA_ON:
2000         NewTextArea();
2001         m_bCallNextToken = m_bTextArea && m_xTable;
2002         break;
2003 
2004     case HtmlTokenId::SELECT_ON:
2005         NewSelect();
2006         m_bCallNextToken = m_bSelect && m_xTable;
2007         break;
2008 
2009     case HtmlTokenId::ANCHOR_ON:
2010         NewAnchor();
2011         break;
2012 
2013     case HtmlTokenId::ANCHOR_OFF:
2014         EndAnchor();
2015         break;
2016 
2017     case HtmlTokenId::COMMENT:
2018         if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2019         {
2020             // insert as Post-It
2021             // If there are no space characters right behind
2022             // the <!-- and on front of the -->, leave the comment untouched.
2023             if( ' ' == aToken[ 3 ] &&
2024                 ' ' == aToken[ aToken.getLength()-3 ] )
2025             {
2026                 OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
2027                 InsertComment(comphelper::string::strip(aComment, ' '));
2028             }
2029             else
2030             {
2031                 OUString aComment = "<" + aToken + ">";
2032                 InsertComment( aComment );
2033             }
2034         }
2035         break;
2036 
2037     case HtmlTokenId::MAP_ON:
2038         // Image Maps are read asynchronously: At first only an image map is created
2039         // Areas are processed later. Nevertheless the
2040         // ImageMap is inserted into the IMap-Array, because it might be used
2041         // already.
2042         m_pImageMap = new ImageMap;
2043         if( ParseMapOptions( m_pImageMap) )
2044         {
2045             if (!m_pImageMaps)
2046                 m_pImageMaps.reset( new ImageMaps );
2047             m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2048         }
2049         else
2050         {
2051             delete m_pImageMap;
2052             m_pImageMap = nullptr;
2053         }
2054         break;
2055 
2056     case HtmlTokenId::MAP_OFF:
2057         // there is no ImageMap anymore (don't delete IMap, because it's
2058         // already contained in the array!)
2059         m_pImageMap = nullptr;
2060         break;
2061 
2062     case HtmlTokenId::AREA:
2063         if( m_pImageMap )
2064             ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2065                                          SvMacroItemId::OnMouseOut );
2066         break;
2067 
2068     case HtmlTokenId::FRAMESET_ON:
2069         bInsertUnknown = m_bKeepUnknown;
2070         break;
2071 
2072     case HtmlTokenId::NOFRAMES_ON:
2073         if( IsInHeader() )
2074             FinishHeader();
2075         bInsertUnknown = m_bKeepUnknown;
2076         break;
2077 
2078     case HtmlTokenId::UNKNOWNCONTROL_ON:
2079         // Ignore content of unknown token in the header, if the token
2080         // does not start with a '!'.
2081         // (but judging from the code, also if does not start with a '%')
2082         // (and also if we're not somewhere we consider PRE)
2083         if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2084             !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2085             '%' != sSaveToken[0] )
2086             m_aUnknownToken = sSaveToken;
2087         [[fallthrough]];
2088 
2089     default:
2090         bInsertUnknown = m_bKeepUnknown;
2091         break;
2092     }
2093 
2094     if( bGetIDOption )
2095         InsertIDOption();
2096 
2097     if( bInsertUnknown )
2098     {
2099         OUStringBuffer aComment("HTML: <");
2100         if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2101             aComment.append("/");
2102         aComment.append(sSaveToken);
2103         if( !aToken.isEmpty() )
2104         {
2105             UnescapeToken();
2106             aComment.append(" " + aToken);
2107         }
2108         aComment.append(">");
2109         InsertComment( aComment.makeStringAndClear() );
2110     }
2111 
2112     // if there are temporary paragraph attributes and the
2113     // paragraph isn't empty then the paragraph attributes are final.
2114     if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() )
2115         m_aParaAttrs.clear();
2116 }
2117 
lcl_swhtml_getItemInfo(const HTMLAttr & rAttr,bool & rScriptDependent,sal_uInt16 & rScriptType)2118 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2119                                  bool& rScriptDependent,
2120                                  sal_uInt16& rScriptType )
2121 {
2122     switch( rAttr.GetItem().Which() )
2123     {
2124     case RES_CHRATR_FONT:
2125     case RES_CHRATR_FONTSIZE:
2126     case RES_CHRATR_LANGUAGE:
2127     case RES_CHRATR_POSTURE:
2128     case RES_CHRATR_WEIGHT:
2129         rScriptType = i18n::ScriptType::LATIN;
2130         rScriptDependent = true;
2131         break;
2132     case RES_CHRATR_CJK_FONT:
2133     case RES_CHRATR_CJK_FONTSIZE:
2134     case RES_CHRATR_CJK_LANGUAGE:
2135     case RES_CHRATR_CJK_POSTURE:
2136     case RES_CHRATR_CJK_WEIGHT:
2137         rScriptType = i18n::ScriptType::ASIAN;
2138         rScriptDependent = true;
2139         break;
2140     case RES_CHRATR_CTL_FONT:
2141     case RES_CHRATR_CTL_FONTSIZE:
2142     case RES_CHRATR_CTL_LANGUAGE:
2143     case RES_CHRATR_CTL_POSTURE:
2144     case RES_CHRATR_CTL_WEIGHT:
2145         rScriptType = i18n::ScriptType::COMPLEX;
2146         rScriptDependent = true;
2147         break;
2148     default:
2149         rScriptDependent = false;
2150         break;
2151     }
2152 }
2153 
AppendTextNode(SwHTMLAppendMode eMode,bool bUpdateNum)2154 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2155 {
2156     // A hard line break at the end always must be removed.
2157     // A second one we replace with paragraph spacing.
2158     sal_Int32 nLFStripped = StripTrailingLF();
2159     if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2160         eMode = AM_SPACE;
2161 
2162     // the hard attributes of this paragraph will never be invalid again
2163     m_aParaAttrs.clear();
2164 
2165     SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2166         m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr;
2167 
2168     if (pTextNode)
2169     {
2170         const SvxULSpaceItem& rULSpace =
2171             static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2172 
2173         bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2174                                          : rULSpace.GetLower() == 0;
2175 
2176         if( bChange )
2177         {
2178             const SvxULSpaceItem& rCollULSpace =
2179                 pTextNode->GetAnyFormatColl().GetULSpace();
2180 
2181             bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2182                                                : rCollULSpace.GetLower() > 0;
2183 
2184             if( bMayReset &&
2185                 rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2186             {
2187                 pTextNode->ResetAttr( RES_UL_SPACE );
2188             }
2189             else
2190             {
2191                 pTextNode->SetAttr(
2192                     SvxULSpaceItem( rULSpace.GetUpper(),
2193                          AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2194             }
2195         }
2196     }
2197     m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2198 
2199     SwPosition aOldPos( *m_pPam->GetPoint() );
2200 
2201     bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2202 
2203     // split character attributes and maybe set none,
2204     // which are set for the whole paragraph
2205     const SwNodeIndex& rEndIdx = aOldPos.nNode;
2206     const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex();
2207     const SwPosition& rPos = *m_pPam->GetPoint();
2208 
2209     HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2210     for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2211     {
2212         HTMLAttr *pAttr = *pHTMLAttributes;
2213         if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2214         {
2215             bool bWholePara = false;
2216 
2217             while( pAttr )
2218             {
2219                 HTMLAttr *pNext = pAttr->GetNext();
2220                 if( pAttr->GetStartParagraphIdx() < rEndIdx.GetIndex() ||
2221                     (!bWholePara &&
2222                      pAttr->GetStartParagraph() == rEndIdx &&
2223                      pAttr->GetStartContent() != nEndCnt) )
2224                 {
2225                     bWholePara =
2226                         pAttr->GetStartParagraph() == rEndIdx &&
2227                         pAttr->GetStartContent() == 0;
2228 
2229                     sal_Int32 nStt = pAttr->m_nStartContent;
2230                     bool bScript = false;
2231                     sal_uInt16 nScriptItem;
2232                     bool bInsert = true;
2233                     lcl_swhtml_getItemInfo( *pAttr, bScript,
2234                                             nScriptItem );
2235                         // set previous part
2236                     if( bScript )
2237                     {
2238                         const SwTextNode *pTextNd =
2239                             pAttr->GetStartParagraph().GetNode().GetTextNode();
2240                         OSL_ENSURE( pTextNd, "No text node" );
2241                         if( pTextNd )
2242                         {
2243                             const OUString& rText = pTextNd->GetText();
2244                             sal_uInt16 nScriptText =
2245                                 g_pBreakIt->GetBreakIter()->getScriptType(
2246                                             rText, pAttr->GetStartContent() );
2247                             sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2248                                     ->endOfScript( rText, nStt, nScriptText );
2249                             while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2250                             {
2251                                 if( nScriptItem == nScriptText )
2252                                 {
2253                                     HTMLAttr *pSetAttr =
2254                                         pAttr->Clone( rEndIdx, nScriptEnd );
2255                                     pSetAttr->m_nStartContent = nStt;
2256                                     pSetAttr->ClearPrev();
2257                                     if( !pNext || bWholePara )
2258                                     {
2259                                         if (pSetAttr->m_bInsAtStart)
2260                                             m_aSetAttrTab.push_front( pSetAttr );
2261                                         else
2262                                             m_aSetAttrTab.push_back( pSetAttr );
2263                                     }
2264                                     else
2265                                         pNext->InsertPrev( pSetAttr );
2266                                 }
2267                                 nStt = nScriptEnd;
2268                                 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2269                                                 rText, nStt );
2270                                 nScriptEnd = g_pBreakIt->GetBreakIter()
2271                                     ->endOfScript( rText, nStt, nScriptText );
2272                             }
2273                             bInsert = nScriptItem == nScriptText;
2274                         }
2275                     }
2276                     if( bInsert )
2277                     {
2278                         HTMLAttr *pSetAttr =
2279                             pAttr->Clone( rEndIdx, nEndCnt );
2280                         pSetAttr->m_nStartContent = nStt;
2281 
2282                         // When the attribute is for the whole paragraph, the outer
2283                         // attributes aren't effective anymore. Hence it may not be inserted
2284                         // in the Prev-List of an outer attribute, because that won't be
2285                         // set. That leads to shifting when fields are used.
2286                         if( !pNext || bWholePara )
2287                         {
2288                             if (pSetAttr->m_bInsAtStart)
2289                                 m_aSetAttrTab.push_front( pSetAttr );
2290                             else
2291                                 m_aSetAttrTab.push_back( pSetAttr );
2292                         }
2293                         else
2294                             pNext->InsertPrev( pSetAttr );
2295                     }
2296                     else
2297                     {
2298                         HTMLAttr *pPrev = pAttr->GetPrev();
2299                         if( pPrev )
2300                         {
2301                             // the previous attributes must be set anyway
2302                             if( !pNext || bWholePara )
2303                             {
2304                                 if (pPrev->m_bInsAtStart)
2305                                     m_aSetAttrTab.push_front( pPrev );
2306                                 else
2307                                     m_aSetAttrTab.push_back( pPrev );
2308                             }
2309                             else
2310                                 pNext->InsertPrev( pPrev );
2311                         }
2312                     }
2313                     pAttr->ClearPrev();
2314                 }
2315 
2316                 pAttr->SetStart( rPos );
2317                 pAttr = pNext;
2318             }
2319         }
2320     }
2321 
2322     if( bUpdateNum )
2323     {
2324         if( GetNumInfo().GetDepth() )
2325         {
2326             sal_uInt8 nLvl = GetNumInfo().GetLevel();
2327             SetNodeNum( nLvl );
2328         }
2329         else
2330             m_pPam->GetNode().GetTextNode()->ResetAttr( RES_PARATR_NUMRULE );
2331     }
2332 
2333     // We must set the attribute of the paragraph before now (because of JavaScript)
2334     SetAttr();
2335 
2336     // Now it is time to get rid of all script dependent hints that are
2337     // equal to the settings in the style
2338     SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode();
2339     OSL_ENSURE( pTextNd, "There is the txt node" );
2340     size_t nCntAttr = (pTextNd  && pTextNd->GetpSwpHints())
2341                             ? pTextNd->GetSwpHints().Count() : 0;
2342     if( nCntAttr )
2343     {
2344         // These are the end position of all script dependent hints.
2345         // If we find a hint that starts before the current end position,
2346         // we have to set it. If we find a hint that start behind or at
2347         // that position, we have to take the hint value into account.
2348         // If it is equal to the style, or in fact the paragraph value
2349         // for that hint, the hint is removed. Otherwise its end position
2350         // is remembered.
2351         sal_Int32 aEndPos[15] =
2352             { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2353         SwpHints& rHints = pTextNd->GetSwpHints();
2354         for( size_t i=0; i < nCntAttr; i++ )
2355         {
2356             SwTextAttr *pHt = rHints.Get( i );
2357             sal_uInt16 nWhich = pHt->Which();
2358             sal_Int16 nIdx = 0;
2359             bool bFont = false;
2360             switch( nWhich )
2361             {
2362                 case RES_CHRATR_FONT:
2363                     nIdx = 0;
2364                     bFont = true;
2365                     break;
2366                 case RES_CHRATR_FONTSIZE:
2367                     nIdx = 1;
2368                     break;
2369                 case RES_CHRATR_LANGUAGE:
2370                     nIdx = 2;
2371                     break;
2372                 case RES_CHRATR_POSTURE:
2373                     nIdx = 3;
2374                     break;
2375                 case RES_CHRATR_WEIGHT:
2376                     nIdx = 4;
2377                     break;
2378                 case RES_CHRATR_CJK_FONT:
2379                     nIdx = 5;
2380                     bFont = true;
2381                     break;
2382                 case RES_CHRATR_CJK_FONTSIZE:
2383                     nIdx = 6;
2384                     break;
2385                 case RES_CHRATR_CJK_LANGUAGE:
2386                     nIdx = 7;
2387                     break;
2388                 case RES_CHRATR_CJK_POSTURE:
2389                     nIdx = 8;
2390                     break;
2391                 case RES_CHRATR_CJK_WEIGHT:
2392                     nIdx = 9;
2393                     break;
2394                 case RES_CHRATR_CTL_FONT:
2395                     nIdx = 10;
2396                     bFont = true;
2397                     break;
2398                 case RES_CHRATR_CTL_FONTSIZE:
2399                     nIdx = 11;
2400                     break;
2401                 case RES_CHRATR_CTL_LANGUAGE:
2402                     nIdx = 12;
2403                     break;
2404                 case RES_CHRATR_CTL_POSTURE:
2405                     nIdx = 13;
2406                     break;
2407                 case RES_CHRATR_CTL_WEIGHT:
2408                     nIdx = 14;
2409                     break;
2410                 default:
2411                     // Skip to next attribute
2412                     continue;
2413             }
2414             const sal_Int32 nStt = pHt->GetStart();
2415             if( nStt >= aEndPos[nIdx] )
2416             {
2417                 const SfxPoolItem& rItem =
2418                     static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2419                 if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2420                             : rItem == pHt->GetAttr() )
2421                 {
2422                     // The hint is the same as set in the paragraph and
2423                     // therefore, it can be deleted
2424                     // CAUTION!!! This WILL delete the hint and it MAY
2425                     // also delete the SwpHints!!! To avoid any trouble
2426                     // we leave the loop immediately if this is the last
2427                     // hint.
2428                     pTextNd->DeleteAttribute( pHt );
2429                     if( 1 == nCntAttr )
2430                         break;
2431                     i--;
2432                     nCntAttr--;
2433                 }
2434                 else
2435                 {
2436                     // The hint is different. Therefore all hints within that
2437                     // hint have to be ignored.
2438                     aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2439                 }
2440             }
2441             else
2442             {
2443                 // The hint starts before another one ends.
2444                 // The hint in this case is not deleted
2445                 OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2446                         "hints aren't nested properly!" );
2447             }
2448         }
2449     }
2450 
2451     if (!m_xTable && !--m_nParaCnt)
2452         Show();
2453 
2454     return bRet;
2455 }
2456 
AddParSpace()2457 void SwHTMLParser::AddParSpace()
2458 {
2459     //If it already has ParSpace, return
2460     if( !m_bNoParSpace )
2461         return;
2462 
2463     m_bNoParSpace = false;
2464 
2465     sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1;
2466 
2467     SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2468     if( !pTextNode )
2469         return;
2470 
2471     SvxULSpaceItem rULSpace =
2472         static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ));
2473     if( rULSpace.GetLower() )
2474         return;
2475 
2476     const SvxULSpaceItem& rCollULSpace =
2477         pTextNode->GetAnyFormatColl().GetULSpace();
2478     if( rCollULSpace.GetLower() &&
2479         rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2480     {
2481         pTextNode->ResetAttr( RES_UL_SPACE );
2482     }
2483     else
2484     {
2485         //What I do here, is that I examine the attributes, and if
2486         //I find out, that it's CJK/CTL, then I set the paragraph space
2487         //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2488 
2489         bool bIsCJK = false;
2490         bool bIsCTL = false;
2491 
2492         const size_t nCntAttr = pTextNode->GetpSwpHints()
2493                         ? pTextNode->GetSwpHints().Count() : 0;
2494 
2495         for(size_t i = 0; i < nCntAttr; ++i)
2496         {
2497             SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2498             sal_uInt16 const nWhich = pHt->Which();
2499             if (RES_CHRATR_CJK_FONT == nWhich ||
2500                 RES_CHRATR_CJK_FONTSIZE == nWhich ||
2501                 RES_CHRATR_CJK_LANGUAGE == nWhich ||
2502                 RES_CHRATR_CJK_POSTURE == nWhich ||
2503                 RES_CHRATR_CJK_WEIGHT == nWhich)
2504             {
2505                 bIsCJK = true;
2506                 break;
2507             }
2508             if (RES_CHRATR_CTL_FONT == nWhich ||
2509                 RES_CHRATR_CTL_FONTSIZE == nWhich ||
2510                 RES_CHRATR_CTL_LANGUAGE == nWhich ||
2511                 RES_CHRATR_CTL_POSTURE == nWhich ||
2512                 RES_CHRATR_CTL_WEIGHT == nWhich)
2513             {
2514                 bIsCTL = true;
2515                 break;
2516             }
2517         }
2518 
2519         if( bIsCTL )
2520         {
2521             pTextNode->SetAttr(
2522                 SvxULSpaceItem( rULSpace.GetUpper(), HTML_CTL_PARSPACE, RES_UL_SPACE )  );
2523         }
2524         else if( bIsCJK )
2525         {
2526             pTextNode->SetAttr(
2527                 SvxULSpaceItem( rULSpace.GetUpper(), HTML_CJK_PARSPACE, RES_UL_SPACE )  );
2528         } else {
2529             pTextNode->SetAttr(
2530                 SvxULSpaceItem( rULSpace.GetUpper(), HTML_PARSPACE, RES_UL_SPACE )  );
2531         }
2532     }
2533 }
2534 
Show()2535 void SwHTMLParser::Show()
2536 {
2537     // Here
2538     // - a EndAction is called, so the document is formatted
2539     // - a Reschedule is called,
2540     // - the own View-Shell is set again
2541     // - and a StartAction is called
2542 
2543     OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2544     SwViewShell *pOldVSh = CallEndAction();
2545 
2546     Application::Reschedule();
2547 
2548     if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2549         || 1 == m_xDoc->getReferenceCount() )
2550     {
2551         // was the import aborted by SFX?
2552         eState = SvParserState::Error;
2553     }
2554 
2555     // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2556     SwViewShell *pVSh = CallStartAction( pOldVSh );
2557 
2558     // is the current node not visible anymore, then we use a bigger increment
2559     if( pVSh )
2560     {
2561         m_nParaCnt = (m_pPam->GetPoint()->nNode.GetNode().IsInVisibleArea(pVSh))
2562             ? 5 : 50;
2563     }
2564 }
2565 
ShowStatline()2566 void SwHTMLParser::ShowStatline()
2567 {
2568     // Here
2569     // - a Reschedule is called, so it can be scrolled
2570     // - the own View-Shell is set again
2571     // - a StartAction/EndAction is called, when there was scrolling.
2572 
2573     OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2574 
2575     // scroll bar
2576     if (m_xProgress)
2577     {
2578         m_xProgress->Update(rInput.Tell());
2579         CheckActionViewShell();
2580     }
2581     else
2582     {
2583         Application::Reschedule();
2584 
2585         if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2586             || 1 == m_xDoc->getReferenceCount() )
2587             // was the import aborted by SFX?
2588             eState = SvParserState::Error;
2589 
2590         SwViewShell *pVSh = CheckActionViewShell();
2591         if( pVSh && pVSh->HasInvalidRect() )
2592         {
2593             CallEndAction( false, false );
2594             CallStartAction( pVSh, false );
2595         }
2596     }
2597 }
2598 
CallStartAction(SwViewShell * pVSh,bool bChkPtr)2599 SwViewShell *SwHTMLParser::CallStartAction( SwViewShell *pVSh, bool bChkPtr )
2600 {
2601     OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2602 
2603     if( !pVSh || bChkPtr )
2604     {
2605 #if OSL_DEBUG_LEVEL > 0
2606         SwViewShell *pOldVSh = pVSh;
2607 #endif
2608         pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2609 #if OSL_DEBUG_LEVEL > 0
2610         OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2611         if( pOldVSh && !pVSh )
2612             pVSh = nullptr;
2613 #endif
2614     }
2615     m_pActionViewShell = pVSh;
2616 
2617     if( m_pActionViewShell )
2618     {
2619         if( auto pEditShell = dynamic_cast< SwEditShell *>( m_pActionViewShell ) )
2620             pEditShell->StartAction();
2621         else
2622             m_pActionViewShell->StartAction();
2623     }
2624 
2625     return m_pActionViewShell;
2626 }
2627 
CallEndAction(bool bChkAction,bool bChkPtr)2628 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2629 {
2630     if( bChkPtr )
2631     {
2632         SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2633         OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2634                 "CallEndAction: Who swapped the SwViewShell?" );
2635 #if OSL_DEBUG_LEVEL > 0
2636         if( m_pActionViewShell && !pVSh )
2637             pVSh = nullptr;
2638 #endif
2639         if( pVSh != m_pActionViewShell )
2640             m_pActionViewShell = nullptr;
2641     }
2642 
2643     if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2644         return m_pActionViewShell;
2645 
2646     if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) !=  nullptr )
2647     {
2648         // Already scrolled?, then make sure that the view doesn't move!
2649         const bool bOldLock = m_pActionViewShell->IsViewLocked();
2650         m_pActionViewShell->LockView( true );
2651         const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev();
2652         m_pActionViewShell->SetEndActionByVirDev( true );
2653         static_cast<SwEditShell*>(m_pActionViewShell)->EndAction();
2654         m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev );
2655         m_pActionViewShell->LockView( bOldLock );
2656 
2657         // bChkJumpMark is only set when the object was also found
2658         if( m_bChkJumpMark )
2659         {
2660             const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2661             if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2662                 ::JumpToSwMark( m_pActionViewShell,
2663                                 GetMedium()->GetURLObject().GetMark() );
2664             m_bChkJumpMark = false;
2665         }
2666     }
2667     else
2668         m_pActionViewShell->EndAction();
2669 
2670     // if the parser holds the last reference to the document, then we can
2671     // abort here and set an error.
2672     if( 1 == m_xDoc->getReferenceCount() )
2673     {
2674         eState = SvParserState::Error;
2675     }
2676 
2677     SwViewShell *pVSh = m_pActionViewShell;
2678     m_pActionViewShell = nullptr;
2679 
2680     return pVSh;
2681 }
2682 
CheckActionViewShell()2683 SwViewShell *SwHTMLParser::CheckActionViewShell()
2684 {
2685     SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2686     OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2687             "CheckActionViewShell: Who has swapped SwViewShell?" );
2688 #if OSL_DEBUG_LEVEL > 0
2689     if( m_pActionViewShell && !pVSh )
2690         pVSh = nullptr;
2691 #endif
2692     if( pVSh != m_pActionViewShell )
2693         m_pActionViewShell = nullptr;
2694 
2695     return m_pActionViewShell;
2696 }
2697 
SwHTMLFrameFormatListener(SwFrameFormat * pFrameFormat)2698 SwHTMLFrameFormatListener::SwHTMLFrameFormatListener(SwFrameFormat* pFrameFormat)
2699     : m_pFrameFormat(pFrameFormat)
2700 {
2701     StartListening(m_pFrameFormat->GetNotifier());
2702 }
2703 
Notify(const SfxHint & rHint)2704 void SwHTMLFrameFormatListener::Notify(const SfxHint& rHint)
2705 {
2706     if (rHint.GetId() == SfxHintId::Dying)
2707         m_pFrameFormat = nullptr;
2708 }
2709 
SetAttr_(bool bChkEnd,bool bBeforeTable,std::deque<std::unique_ptr<HTMLAttr>> * pPostIts)2710 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2711                              std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2712 {
2713     SwPaM aAttrPam( *m_pPam->GetPoint() );
2714     const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode;
2715     const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
2716     HTMLAttr* pAttr;
2717     SwContentNode* pCNd;
2718 
2719     std::vector<std::unique_ptr<HTMLAttr>> aFields;
2720 
2721     for( auto n = m_aSetAttrTab.size(); n; )
2722     {
2723         pAttr = m_aSetAttrTab[ --n ];
2724         sal_uInt16 nWhich = pAttr->m_pItem->Which();
2725 
2726         sal_uLong nEndParaIdx = pAttr->GetEndParagraphIdx();
2727         bool bSetAttr;
2728         if( bChkEnd )
2729         {
2730             // Set character attribute with end early on, so set them still in
2731             // the current paragraph (because of JavaScript and various "chats"(?)).
2732             // This shouldn't be done for attributes which are used for
2733             // the whole paragraph, because they could be from a paragraph style
2734             // which can't be set. Because the attributes are inserted with
2735             // SETATTR_DONTREPLACE, they should be able to be set later.
2736             bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() &&
2737                          (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) ||
2738                        ( !pAttr->IsLikePara() &&
2739                          nEndParaIdx == rEndIdx.GetIndex() &&
2740                          pAttr->GetEndContent() < nEndCnt &&
2741                          (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2742                        ( bBeforeTable &&
2743                          nEndParaIdx == rEndIdx.GetIndex() &&
2744                          !pAttr->GetEndContent() );
2745         }
2746         else
2747         {
2748             // Attributes in body nodes array section shouldn't be set if we are in a
2749             // special nodes array section, but vice versa it's possible.
2750             sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2751             bSetAttr = nEndParaIdx < rEndIdx.GetIndex() ||
2752                        rEndIdx.GetIndex() > nEndOfIcons ||
2753                        nEndParaIdx <= nEndOfIcons;
2754         }
2755 
2756         if( bSetAttr )
2757         {
2758             // The attribute shouldn't be in the list of temporary paragraph
2759             // attributes, because then it would be deleted.
2760             while( !m_aParaAttrs.empty() )
2761             {
2762                 OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2763                         "SetAttr: Attribute must not yet be set" );
2764                 m_aParaAttrs.pop_back();
2765             }
2766 
2767             // then set it
2768             m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2769 
2770             while( pAttr )
2771             {
2772                 HTMLAttr *pPrev = pAttr->GetPrev();
2773                 if( !pAttr->m_bValid )
2774                 {
2775                     // invalid attributes can be deleted
2776                     delete pAttr;
2777                     pAttr = pPrev;
2778                     continue;
2779                 }
2780 
2781                 pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2782                 if( !pCNd )
2783                 {
2784                     // because of the awful deleting of nodes an index can also
2785                     // point to an end node :-(
2786                     if ( (pAttr->GetStartParagraph() == pAttr->GetEndParagraph()) &&
2787                          !isTXTATR_NOEND(nWhich) )
2788                     {
2789                         // when the end index also points to the node, we don't
2790                         // need to set attributes anymore, except if it's a text attribute.
2791                         delete pAttr;
2792                         pAttr = pPrev;
2793                         continue;
2794                     }
2795                     pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2796                     if( pCNd )
2797                         pAttr->m_nStartContent = 0;
2798                     else
2799                     {
2800                         OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2801                         delete pAttr;
2802                         pAttr = pPrev;
2803                         continue;
2804                     }
2805                 }
2806                 aAttrPam.GetPoint()->nNode = pAttr->m_nStartPara;
2807 
2808                 // because of the deleting of BRs the start index can also
2809                 // point behind the end the text
2810                 if( pAttr->m_nStartContent > pCNd->Len() )
2811                     pAttr->m_nStartContent = pCNd->Len();
2812                 aAttrPam.GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent );
2813 
2814                 aAttrPam.SetMark();
2815                 if ( (pAttr->GetStartParagraph() != pAttr->GetEndParagraph()) &&
2816                          !isTXTATR_NOEND(nWhich) )
2817                 {
2818                     pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2819                     if( !pCNd )
2820                     {
2821                         pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2822                         if( pCNd )
2823                             pAttr->m_nEndContent = pCNd->Len();
2824                         else
2825                         {
2826                             OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2827                             aAttrPam.DeleteMark();
2828                             delete pAttr;
2829                             pAttr = pPrev;
2830                             continue;
2831                         }
2832                     }
2833 
2834                     aAttrPam.GetPoint()->nNode = pAttr->m_nEndPara;
2835                 }
2836                 else if( pAttr->IsLikePara() )
2837                 {
2838                     pAttr->m_nEndContent = pCNd->Len();
2839                 }
2840 
2841                 // because of the deleting of BRs the start index can also
2842                 // point behind the end the text
2843                 if( pAttr->m_nEndContent > pCNd->Len() )
2844                     pAttr->m_nEndContent = pCNd->Len();
2845 
2846                 aAttrPam.GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent );
2847                 if( bBeforeTable &&
2848                     aAttrPam.GetPoint()->nNode.GetIndex() ==
2849                         rEndIdx.GetIndex() )
2850                 {
2851                     // If we're before inserting a table and the attribute ends
2852                     // in the current node, then we must end it in the previous
2853                     // node or discard it, if it starts in that node.
2854                     if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2855                          !isTXTATR_NOEND(nWhich) )
2856                     {
2857                         if( aAttrPam.GetMark()->nNode.GetIndex() !=
2858                             rEndIdx.GetIndex() )
2859                         {
2860                             OSL_ENSURE( !aAttrPam.GetPoint()->nContent.GetIndex(),
2861                                     "Content-Position before table not 0???" );
2862                             aAttrPam.Move( fnMoveBackward );
2863                         }
2864                         else
2865                         {
2866                             aAttrPam.DeleteMark();
2867                             delete pAttr;
2868                             pAttr = pPrev;
2869                             continue;
2870                         }
2871                     }
2872                 }
2873 
2874                 switch( nWhich )
2875                 {
2876                 case RES_FLTR_BOOKMARK: // insert bookmark
2877                     {
2878                         const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2879                         IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2880                         IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2881                         if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2882                             (*ppBkmk)->GetMarkStart() == *aAttrPam.GetPoint() )
2883                             break; // do not generate duplicates on this position
2884                         aAttrPam.DeleteMark();
2885                         const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2886                             aAttrPam,
2887                             sName,
2888                             IDocumentMarkAccess::MarkType::BOOKMARK,
2889                             ::sw::mark::InsertMode::New);
2890 
2891                         // jump to bookmark
2892                         if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2893                         {
2894                             m_bChkJumpMark = true;
2895                             m_eJumpTo = JumpToMarks::NONE;
2896                         }
2897                     }
2898                     break;
2899                 case RES_TXTATR_FIELD:
2900                 case RES_TXTATR_ANNOTATION:
2901                 case RES_TXTATR_INPUTFIELD:
2902                     {
2903                         SwFieldIds nFieldWhich =
2904                             pPostIts
2905                             ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2906                             : SwFieldIds::Database;
2907                         if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2908                                          SwFieldIds::Script == nFieldWhich) )
2909                         {
2910                             pPostIts->emplace_front( pAttr );
2911                         }
2912                         else
2913                         {
2914                             aFields.emplace_back( pAttr);
2915                         }
2916                     }
2917                     aAttrPam.DeleteMark();
2918                     pAttr = pPrev;
2919                     continue;
2920 
2921                 case RES_LR_SPACE:
2922                     if( aAttrPam.GetPoint()->nNode.GetIndex() ==
2923                         aAttrPam.GetMark()->nNode.GetIndex())
2924                     {
2925                         // because of numbering set this attribute directly at node
2926                         pCNd->SetAttr( *pAttr->m_pItem );
2927                         break;
2928                     }
2929                     OSL_ENSURE( false,
2930                             "LRSpace set over multiple paragraphs!" );
2931                     [[fallthrough]]; // (shouldn't reach this point anyway)
2932 
2933                 // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2934                 // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2935                 // This is the right place in the future if the adapted fill attributes
2936                 // may be handled more directly in HTML import to handle them.
2937                 case RES_BACKGROUND:
2938                 {
2939                     const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2940                     SfxItemSet aNewSet(m_xDoc->GetAttrPool(), svl::Items<XATTR_FILL_FIRST, XATTR_FILL_LAST>{});
2941 
2942                     setSvxBrushItemAsFillAttributesToTargetSet(rBrush, aNewSet);
2943                     m_xDoc->getIDocumentContentOperations().InsertItemSet(aAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2944                     break;
2945                 }
2946                 default:
2947 
2948                     // maybe jump to a bookmark
2949                     if( RES_TXTATR_INETFMT == nWhich &&
2950                         JumpToMarks::Mark == m_eJumpTo &&
2951                         m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2952                     {
2953                         m_bChkJumpMark = true;
2954                         m_eJumpTo = JumpToMarks::NONE;
2955                     }
2956 
2957                     m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2958                 }
2959                 aAttrPam.DeleteMark();
2960 
2961                 delete pAttr;
2962                 pAttr = pPrev;
2963             }
2964         }
2965     }
2966 
2967     for( auto n = m_aMoveFlyFrames.size(); n; )
2968     {
2969         SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[--n]->GetFrameFormat();
2970         if (!pFrameFormat)
2971         {
2972             SAL_WARN("sw.html", "SwFrameFormat deleted during import");
2973             m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
2974             m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
2975             continue;
2976         }
2977 
2978         const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
2979         OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
2980                 "Only At-Para flys need special handling" );
2981         const SwPosition *pFlyPos = rAnchor.GetContentAnchor();
2982         sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex();
2983         bool bMoveFly;
2984         if( bChkEnd )
2985         {
2986             bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2987                        ( nFlyParaIdx == rEndIdx.GetIndex() &&
2988                          m_aMoveFlyCnts[n] < nEndCnt );
2989         }
2990         else
2991         {
2992             sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2993             bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() ||
2994                        rEndIdx.GetIndex() > nEndOfIcons ||
2995                        nFlyParaIdx <= nEndOfIcons;
2996         }
2997         if( bMoveFly )
2998         {
2999             pFrameFormat->DelFrames();
3000             *aAttrPam.GetPoint() = *pFlyPos;
3001             aAttrPam.GetPoint()->nContent.Assign( aAttrPam.GetContentNode(),
3002                                                    m_aMoveFlyCnts[n] );
3003             SwFormatAnchor aAnchor( rAnchor );
3004             aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
3005             aAnchor.SetAnchor( aAttrPam.GetPoint() );
3006             pFrameFormat->SetFormatAttr( aAnchor );
3007 
3008             const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
3009             if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
3010             {
3011                 SwFormatHoriOrient aHoriOri( rHoriOri );
3012                 aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
3013                 pFrameFormat->SetFormatAttr( aHoriOri );
3014             }
3015             const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
3016             if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
3017             {
3018                 SwFormatVertOrient aVertOri( rVertOri );
3019                 aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
3020                 pFrameFormat->SetFormatAttr( aVertOri );
3021             }
3022 
3023             pFrameFormat->MakeFrames();
3024             m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3025             m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3026         }
3027     }
3028     for (auto & field : aFields)
3029     {
3030         pCNd = field->m_nStartPara.GetNode().GetContentNode();
3031         aAttrPam.GetPoint()->nNode = field->m_nStartPara;
3032         aAttrPam.GetPoint()->nContent.Assign( pCNd, field->m_nStartContent );
3033 
3034         if( bBeforeTable &&
3035             aAttrPam.GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() )
3036         {
3037             OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3038             OSL_ENSURE( !aAttrPam.GetPoint()->nContent.GetIndex(),
3039                     "Content-Position before table not 0???" );
3040             // !!!
3041             aAttrPam.Move( fnMoveBackward );
3042         }
3043 
3044         m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *field->m_pItem );
3045 
3046         field.reset();
3047     }
3048     aFields.clear();
3049 }
3050 
NewAttr(const std::shared_ptr<HTMLAttrTable> & rAttrTable,HTMLAttr ** ppAttr,const SfxPoolItem & rItem)3051 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3052 {
3053     // Font height and font colour as well as escape attributes may not be
3054     // combined. Therefore they're saved in a list and in it the last opened
3055     // attribute is at the beginning and count is always one. For all other
3056     // attributes count is just incremented.
3057     if( *ppAttr )
3058     {
3059         HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3060         pAttr->InsertNext( *ppAttr );
3061         (*ppAttr) = pAttr;
3062     }
3063     else
3064         (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3065 }
3066 
EndAttr(HTMLAttr * pAttr,bool bChkEmpty)3067 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3068 {
3069     bool bRet = true;
3070 
3071     // The list header is saved in the attribute.
3072     HTMLAttr **ppHead = pAttr->m_ppHead;
3073 
3074     OSL_ENSURE( ppHead, "No list header attribute found!" );
3075 
3076     // save the current position as end position
3077     const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode;
3078     sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3079 
3080     // Is the last started or an earlier started attribute being ended?
3081     HTMLAttr *pLast = nullptr;
3082     if( ppHead && pAttr != *ppHead )
3083     {
3084         // The last started attribute isn't being ended
3085 
3086         // Then we look for attribute which was started immediately afterwards,
3087         // which has also not yet been ended (otherwise it would no longer be
3088         // in the list).
3089         pLast = *ppHead;
3090         while( pLast && pLast->GetNext() != pAttr )
3091             pLast = pLast->GetNext();
3092 
3093         OSL_ENSURE( pLast, "Attribute not found in own list!" );
3094     }
3095 
3096     bool bMoveBack = false;
3097     sal_uInt16 nWhich = pAttr->m_pItem->Which();
3098     if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3099         *pEndIdx != pAttr->GetStartParagraph() )
3100     {
3101         // Then move back one position in the content!
3102         bMoveBack = m_pPam->Move( fnMoveBackward );
3103         nEndCnt = m_pPam->GetPoint()->nContent.GetIndex();
3104     }
3105 
3106     // now end the attribute
3107     HTMLAttr *pNext = pAttr->GetNext();
3108 
3109     bool bInsert;
3110     sal_uInt16 nScriptItem = 0;
3111     bool bScript = false;
3112     // does it have a non-empty range?
3113     if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3114         RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3115         *pEndIdx != pAttr->GetStartParagraph() ||
3116         nEndCnt != pAttr->GetStartContent() )
3117     {
3118         bInsert = true;
3119         // We do some optimization for script dependent attributes here.
3120         if( *pEndIdx == pAttr->GetStartParagraph() )
3121         {
3122             lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3123         }
3124     }
3125     else
3126     {
3127         bInsert = false;
3128     }
3129 
3130     const SwTextNode *pTextNd = (bInsert && bScript) ?
3131         pAttr->GetStartParagraph().GetNode().GetTextNode() :
3132         nullptr;
3133 
3134     if (pTextNd)
3135     {
3136         const OUString& rText = pTextNd->GetText();
3137         sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3138                         rText, pAttr->GetStartContent() );
3139         sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3140                     ->endOfScript( rText, pAttr->GetStartContent(), nScriptText );
3141         while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3142         {
3143             if( nScriptItem == nScriptText )
3144             {
3145                 HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd );
3146                 pSetAttr->ClearPrev();
3147                 if( pNext )
3148                     pNext->InsertPrev( pSetAttr );
3149                 else
3150                 {
3151                     if (pSetAttr->m_bInsAtStart)
3152                         m_aSetAttrTab.push_front( pSetAttr );
3153                     else
3154                         m_aSetAttrTab.push_back( pSetAttr );
3155                 }
3156             }
3157             pAttr->m_nStartContent = nScriptEnd;
3158             nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3159                             rText, nScriptEnd );
3160             nScriptEnd = g_pBreakIt->GetBreakIter()
3161                     ->endOfScript( rText, nScriptEnd, nScriptText );
3162         }
3163         bInsert = nScriptItem == nScriptText;
3164     }
3165     if( bInsert )
3166     {
3167         pAttr->m_nEndPara = *pEndIdx;
3168         pAttr->m_nEndContent = nEndCnt;
3169         pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3170                              RES_TXTATR_CHARFMT != nWhich;
3171 
3172         if( !pNext )
3173         {
3174             // No open attributes of that type exists any longer, so all
3175             // can be set. Except they depend on another attribute, then
3176             // they're appended there.
3177             if (pAttr->m_bInsAtStart)
3178                 m_aSetAttrTab.push_front( pAttr );
3179             else
3180                 m_aSetAttrTab.push_back( pAttr );
3181         }
3182         else
3183         {
3184             // There are other open attributes of that type,
3185             // therefore the setting must be postponed.
3186             // Hence the current attribute is added at the end
3187             // of the Prev-List of the successor.
3188             pNext->InsertPrev( pAttr );
3189         }
3190     }
3191     else
3192     {
3193         // Then don't insert, but delete. Because of the "faking" of styles
3194         // by hard attributing there can be also other empty attributes in the
3195         // Prev-List, which must be set anyway.
3196         HTMLAttr *pPrev = pAttr->GetPrev();
3197         bRet = false;
3198         delete pAttr;
3199 
3200         if( pPrev )
3201         {
3202             // The previous attributes must be set anyway.
3203             if( pNext )
3204                 pNext->InsertPrev( pPrev );
3205             else
3206             {
3207                 if (pPrev->m_bInsAtStart)
3208                     m_aSetAttrTab.push_front( pPrev );
3209                 else
3210                     m_aSetAttrTab.push_back( pPrev );
3211             }
3212         }
3213 
3214     }
3215 
3216     // If the first attribute of the list was set, then the list header
3217     // must be corrected as well.
3218     if( pLast )
3219         pLast->m_pNext = pNext;
3220     else if( ppHead )
3221         *ppHead = pNext;
3222 
3223     if( bMoveBack )
3224         m_pPam->Move( fnMoveForward );
3225 
3226     return bRet;
3227 }
3228 
DeleteAttr(HTMLAttr * pAttr)3229 void SwHTMLParser::DeleteAttr( HTMLAttr* pAttr )
3230 {
3231     // preliminary paragraph attributes are not allowed here, they could
3232     // be set here and then the pointers become invalid!
3233     OSL_ENSURE(m_aParaAttrs.empty(),
3234         "Danger: there are non-final paragraph attributes");
3235     m_aParaAttrs.clear();
3236 
3237     // The list header is saved in the attribute
3238     HTMLAttr **ppHead = pAttr->m_ppHead;
3239 
3240     OSL_ENSURE( ppHead, "no list header attribute found!" );
3241 
3242     // Is the last started or an earlier started attribute being removed?
3243     HTMLAttr *pLast = nullptr;
3244     if( ppHead && pAttr != *ppHead )
3245     {
3246         // The last started attribute isn't being ended
3247 
3248         // Then we look for attribute which was started immediately afterwards,
3249         // which has also not yet been ended (otherwise it would no longer be
3250         // in the list).
3251         pLast = *ppHead;
3252         while( pLast && pLast->GetNext() != pAttr )
3253             pLast = pLast->GetNext();
3254 
3255         OSL_ENSURE( pLast, "Attribute not found in own list!" );
3256     }
3257 
3258     // now delete the attribute
3259     HTMLAttr *pNext = pAttr->GetNext();
3260     HTMLAttr *pPrev = pAttr->GetPrev();
3261     //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3262     std::shared_ptr<HTMLAttrTable> xKeepAlive(pAttr->m_xAttrTab);
3263     delete pAttr;
3264 
3265     if( pPrev )
3266     {
3267         // The previous attributes must be set anyway.
3268         if( pNext )
3269             pNext->InsertPrev( pPrev );
3270         else
3271         {
3272             if (pPrev->m_bInsAtStart)
3273                 m_aSetAttrTab.push_front( pPrev );
3274             else
3275                 m_aSetAttrTab.push_back( pPrev );
3276         }
3277     }
3278 
3279     // If the first attribute of the list was deleted, then the list header
3280     // must be corrected as well.
3281     if( pLast )
3282         pLast->m_pNext = pNext;
3283     else if( ppHead )
3284         *ppHead = pNext;
3285 }
3286 
SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)3287 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3288 {
3289     // preliminary paragraph attributes are not allowed here, they could
3290     // be set here and then the pointers become invalid!
3291     OSL_ENSURE(m_aParaAttrs.empty(),
3292             "Danger: there are non-final paragraph attributes");
3293     m_aParaAttrs.clear();
3294 
3295     HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3296     HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3297 
3298     for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3299     {
3300         *pSaveAttributes = *pHTMLAttributes;
3301 
3302         HTMLAttr *pAttr = *pSaveAttributes;
3303         while (pAttr)
3304         {
3305             pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3306             pAttr = pAttr->GetNext();
3307         }
3308 
3309         *pHTMLAttributes = nullptr;
3310     }
3311 }
3312 
SplitAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,bool bMoveEndBack)3313 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3314                                  bool bMoveEndBack )
3315 {
3316     // preliminary paragraph attributes are not allowed here, they could
3317     // be set here and then the pointers become invalid!
3318     OSL_ENSURE(m_aParaAttrs.empty(),
3319             "Danger: there are non-final paragraph attributes");
3320     m_aParaAttrs.clear();
3321 
3322     const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode;
3323     SwNodeIndex nEndIdx( nSttIdx );
3324 
3325     // close all still open attributes and re-open them after the table
3326     HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3327     HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3328     bool bSetAttr = true;
3329     const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex();
3330     sal_Int32 nEndCnt = nSttCnt;
3331 
3332     if( bMoveEndBack )
3333     {
3334         sal_uLong nOldEnd = nEndIdx.GetIndex();
3335         sal_uLong nTmpIdx;
3336         if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3337             ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3338         {
3339             nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3340         }
3341         SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3342 
3343         // Don't set attributes, when the PaM was moved outside of the content area.
3344         bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3345 
3346         nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3347     }
3348     for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3349     {
3350         HTMLAttr *pAttr = *pHTMLAttributes;
3351         *pSaveAttributes = nullptr;
3352         while( pAttr )
3353         {
3354             HTMLAttr *pNext = pAttr->GetNext();
3355             HTMLAttr *pPrev = pAttr->GetPrev();
3356 
3357             if( bSetAttr &&
3358                 ( pAttr->GetStartParagraphIdx() < nEndIdx.GetIndex() ||
3359                   (pAttr->GetStartParagraph() == nEndIdx &&
3360                    pAttr->GetStartContent() != nEndCnt) ) )
3361             {
3362                 // The attribute must be set before the list. We need the
3363                 // original and therefore we clone it, because pointer to the
3364                 // attribute exist in the other contexts. The Next-List is lost
3365                 // in doing so, but the Previous-List is preserved.
3366                 HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt );
3367 
3368                 if( pNext )
3369                     pNext->InsertPrev( pSetAttr );
3370                 else
3371                 {
3372                     if (pSetAttr->m_bInsAtStart)
3373                         m_aSetAttrTab.push_front( pSetAttr );
3374                     else
3375                         m_aSetAttrTab.push_back( pSetAttr );
3376                 }
3377             }
3378             else if( pPrev )
3379             {
3380                 // If the attribute doesn't need to be set before the table, then
3381                 // the previous attributes must still be set.
3382                 if( pNext )
3383                     pNext->InsertPrev( pPrev );
3384                 else
3385                 {
3386                     if (pPrev->m_bInsAtStart)
3387                         m_aSetAttrTab.push_front( pPrev );
3388                     else
3389                         m_aSetAttrTab.push_back( pPrev );
3390                 }
3391             }
3392 
3393             // set the start of the attribute anew and break link
3394             pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab);
3395 
3396             if (*pSaveAttributes)
3397             {
3398                 HTMLAttr *pSAttr = *pSaveAttributes;
3399                 while( pSAttr->GetNext() )
3400                     pSAttr = pSAttr->GetNext();
3401                 pSAttr->InsertNext( pAttr );
3402             }
3403             else
3404                 *pSaveAttributes = pAttr;
3405 
3406             pAttr = pNext;
3407         }
3408 
3409         *pHTMLAttributes = nullptr;
3410     }
3411 }
3412 
RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)3413 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3414 {
3415     // preliminary paragraph attributes are not allowed here, they could
3416     // be set here and then the pointers become invalid!
3417     OSL_ENSURE(m_aParaAttrs.empty(),
3418             "Danger: there are non-final paragraph attributes");
3419     m_aParaAttrs.clear();
3420 
3421     HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3422     HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3423 
3424     for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3425     {
3426         OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3427 
3428         *pHTMLAttributes = *pSaveAttributes;
3429 
3430         HTMLAttr *pAttr = *pHTMLAttributes;
3431         while (pAttr)
3432         {
3433             OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3434                     "Previous attribute has still a header" );
3435             pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3436             pAttr = pAttr->GetNext();
3437         }
3438 
3439         *pSaveAttributes = nullptr;
3440     }
3441 }
3442 
InsertAttr(const SfxPoolItem & rItem,bool bInsAtStart)3443 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3444 {
3445     HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3446     if (bInsAtStart)
3447         m_aSetAttrTab.push_front( pTmp );
3448     else
3449         m_aSetAttrTab.push_back( pTmp );
3450 }
3451 
InsertAttrs(std::deque<std::unique_ptr<HTMLAttr>> rAttrs)3452 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3453 {
3454     while( !rAttrs.empty() )
3455     {
3456         std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3457         InsertAttr( pAttr->GetItem(), false );
3458         rAttrs.pop_front();
3459     }
3460 }
3461 
NewStdAttr(HtmlTokenId nToken)3462 void SwHTMLParser::NewStdAttr( HtmlTokenId nToken )
3463 {
3464     OUString aId, aStyle, aLang, aDir;
3465     OUString aClass;
3466 
3467     const HTMLOptions& rHTMLOptions = GetOptions();
3468     for (size_t i = rHTMLOptions.size(); i; )
3469     {
3470         const HTMLOption& rOption = rHTMLOptions[--i];
3471         switch( rOption.GetToken() )
3472         {
3473         case HtmlOptionId::ID:
3474             aId = rOption.GetString();
3475             break;
3476         case HtmlOptionId::STYLE:
3477             aStyle = rOption.GetString();
3478             break;
3479         case HtmlOptionId::CLASS:
3480             aClass = rOption.GetString();
3481             break;
3482         case HtmlOptionId::LANG:
3483             aLang = rOption.GetString();
3484             break;
3485         case HtmlOptionId::DIR:
3486             aDir = rOption.GetString();
3487             break;
3488         default: break;
3489         }
3490     }
3491 
3492     // create a new context
3493     std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3494 
3495     // parse styles
3496     if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3497     {
3498         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3499         SvxCSS1PropertyInfo aPropInfo;
3500 
3501         if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3502         {
3503             if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3504                 !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3505                 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3506             InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3507         }
3508     }
3509 
3510     // save the context
3511     PushContext(xCntxt);
3512 }
3513 
NewStdAttr(HtmlTokenId nToken,HTMLAttr ** ppAttr,const SfxPoolItem & rItem,HTMLAttr ** ppAttr2,const SfxPoolItem * pItem2,HTMLAttr ** ppAttr3,const SfxPoolItem * pItem3)3514 void SwHTMLParser::NewStdAttr( HtmlTokenId nToken,
3515                                HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3516                                HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3517                                HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3518 {
3519     OUString aId, aStyle, aClass, aLang, aDir;
3520 
3521     const HTMLOptions& rHTMLOptions = GetOptions();
3522     for (size_t i = rHTMLOptions.size(); i; )
3523     {
3524         const HTMLOption& rOption = rHTMLOptions[--i];
3525         switch( rOption.GetToken() )
3526         {
3527         case HtmlOptionId::ID:
3528             aId = rOption.GetString();
3529             break;
3530         case HtmlOptionId::STYLE:
3531             aStyle = rOption.GetString();
3532             break;
3533         case HtmlOptionId::CLASS:
3534             aClass = rOption.GetString();
3535             break;
3536         case HtmlOptionId::LANG:
3537             aLang = rOption.GetString();
3538             break;
3539         case HtmlOptionId::DIR:
3540             aDir = rOption.GetString();
3541             break;
3542         default: break;
3543         }
3544     }
3545 
3546     // create a new context
3547     std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3548 
3549     // parse styles
3550     if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3551     {
3552         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3553         SvxCSS1PropertyInfo aPropInfo;
3554 
3555         aItemSet.Put( rItem );
3556         if( pItem2 )
3557             aItemSet.Put( *pItem2 );
3558         if( pItem3 )
3559             aItemSet.Put( *pItem3 );
3560 
3561         if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3562             DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3563 
3564         InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3565     }
3566     else
3567     {
3568         InsertAttr( ppAttr ,rItem, xCntxt.get() );
3569         if( pItem2 )
3570         {
3571             OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3572             InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3573         }
3574         if( pItem3 )
3575         {
3576             OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3577             InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3578         }
3579     }
3580 
3581     // save the context
3582     PushContext(xCntxt);
3583 }
3584 
EndTag(HtmlTokenId nToken)3585 void SwHTMLParser::EndTag( HtmlTokenId nToken )
3586 {
3587     // fetch context
3588     std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3589     if (xCntxt)
3590     {
3591         // and maybe end the attributes
3592         EndContext(xCntxt.get());
3593     }
3594 }
3595 
NewBasefontAttr()3596 void SwHTMLParser::NewBasefontAttr()
3597 {
3598     OUString aId, aStyle, aClass, aLang, aDir;
3599     sal_uInt16 nSize = 3;
3600 
3601     const HTMLOptions& rHTMLOptions = GetOptions();
3602     for (size_t i = rHTMLOptions.size(); i; )
3603     {
3604         const HTMLOption& rOption = rHTMLOptions[--i];
3605         switch( rOption.GetToken() )
3606         {
3607         case HtmlOptionId::SIZE:
3608             nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
3609             break;
3610         case HtmlOptionId::ID:
3611             aId = rOption.GetString();
3612             break;
3613         case HtmlOptionId::STYLE:
3614             aStyle = rOption.GetString();
3615             break;
3616         case HtmlOptionId::CLASS:
3617             aClass = rOption.GetString();
3618             break;
3619         case HtmlOptionId::LANG:
3620             aLang = rOption.GetString();
3621             break;
3622         case HtmlOptionId::DIR:
3623             aDir = rOption.GetString();
3624             break;
3625         default: break;
3626         }
3627     }
3628 
3629     if( nSize < 1 )
3630         nSize = 1;
3631 
3632     if( nSize > 7 )
3633         nSize = 7;
3634 
3635     // create a new context
3636     std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3637 
3638     // parse styles
3639     if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3640     {
3641         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3642         SvxCSS1PropertyInfo aPropInfo;
3643 
3644         //CJK has different defaults
3645         SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3646         aItemSet.Put( aFontHeight );
3647         SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3648         aItemSet.Put( aFontHeightCJK );
3649         //Complex type can contain so many types of letters,
3650         //that it's not really worthy to bother, IMO.
3651         //Still, I have set a default.
3652         SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3653         aItemSet.Put( aFontHeightCTL );
3654 
3655         if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3656             DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3657 
3658         InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3659     }
3660     else
3661     {
3662         SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3663         InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3664         SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3665         InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3666         SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3667         InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3668     }
3669 
3670     // save the context
3671     PushContext(xCntxt);
3672 
3673     // save the font size
3674     m_aBaseFontStack.push_back( nSize );
3675 }
3676 
EndBasefontAttr()3677 void SwHTMLParser::EndBasefontAttr()
3678 {
3679     EndTag( HtmlTokenId::BASEFONT_ON );
3680 
3681     // avoid stack underflow in tables
3682     if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3683         m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3684 }
3685 
NewFontAttr(HtmlTokenId nToken)3686 void SwHTMLParser::NewFontAttr( HtmlTokenId nToken )
3687 {
3688     sal_uInt16 nBaseSize =
3689         ( m_aBaseFontStack.size() > m_nBaseFontStMin
3690             ? (m_aBaseFontStack[m_aBaseFontStack.size()-1] & FONTSIZE_MASK)
3691             : 3 );
3692     sal_uInt16 nFontSize =
3693         ( m_aFontStack.size() > m_nFontStMin
3694             ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3695             : nBaseSize );
3696 
3697     OUString aFace, aId, aStyle, aClass, aLang, aDir;
3698     Color aColor;
3699     sal_uLong nFontHeight = 0;  // actual font height to set
3700     sal_uInt16 nSize = 0;       // font height in Netscape notation (1-7)
3701     bool bColor = false;
3702 
3703     const HTMLOptions& rHTMLOptions = GetOptions();
3704     for (size_t i = rHTMLOptions.size(); i; )
3705     {
3706         const HTMLOption& rOption = rHTMLOptions[--i];
3707         switch( rOption.GetToken() )
3708         {
3709         case HtmlOptionId::SIZE:
3710             if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3711             {
3712                 sal_Int32 nSSize;
3713                 if( '+' == rOption.GetString()[0] ||
3714                     '-' == rOption.GetString()[0] )
3715                     nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3716                 else
3717                     nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3718 
3719                 if( nSSize < 1 )
3720                     nSSize = 1;
3721                 else if( nSSize > 7 )
3722                     nSSize = 7;
3723 
3724                 nSize = o3tl::narrowing<sal_uInt16>(nSSize);
3725                 nFontHeight = m_aFontHeights[nSize-1];
3726             }
3727             break;
3728         case HtmlOptionId::COLOR:
3729             if( HtmlTokenId::FONT_ON==nToken )
3730             {
3731                 rOption.GetColor( aColor );
3732                 bColor = true;
3733             }
3734             break;
3735         case HtmlOptionId::FACE:
3736             if( HtmlTokenId::FONT_ON==nToken )
3737                 aFace = rOption.GetString();
3738             break;
3739         case HtmlOptionId::ID:
3740             aId = rOption.GetString();
3741             break;
3742         case HtmlOptionId::STYLE:
3743             aStyle = rOption.GetString();
3744             break;
3745         case HtmlOptionId::CLASS:
3746             aClass = rOption.GetString();
3747             break;
3748         case HtmlOptionId::LANG:
3749             aLang = rOption.GetString();
3750             break;
3751         case HtmlOptionId::DIR:
3752             aDir = rOption.GetString();
3753             break;
3754         default: break;
3755         }
3756     }
3757 
3758     if( HtmlTokenId::FONT_ON != nToken )
3759     {
3760         // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3761 
3762         // In headings the current heading sets the font height
3763         // and not BASEFONT.
3764         const SwFormatColl *pColl = GetCurrFormatColl();
3765         sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3766         if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3767             nPoolId<=RES_POOLCOLL_HEADLINE6 )
3768         {
3769             // If the font height in the heading wasn't changed yet,
3770             // then take the one from the style.
3771             if( m_nFontStHeadStart==m_aFontStack.size() )
3772                 nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3773         }
3774         else
3775             nPoolId = 0;
3776 
3777         if( HtmlTokenId::BIGPRINT_ON == nToken )
3778             nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3779         else
3780             nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3781 
3782         // If possible in headlines we fetch the new font height
3783         // from the style.
3784         if( nPoolId && nSize>=1 && nSize <=6 )
3785             nFontHeight =
3786                 m_pCSS1Parser->GetTextCollFromPool(
3787                     RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3788         else
3789             nFontHeight = m_aFontHeights[nSize-1];
3790     }
3791 
3792     OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3793 
3794     OUString aFontName;
3795     const OUString aStyleName;
3796     FontFamily eFamily = FAMILY_DONTKNOW;   // family and pitch,
3797     FontPitch ePitch = PITCH_DONTKNOW;      // if not found
3798     rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3799 
3800     if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3801     {
3802         const FontList *pFList = nullptr;
3803         SwDocShell *pDocSh = m_xDoc->GetDocShell();
3804         if( pDocSh )
3805         {
3806             const SvxFontListItem *pFListItem =
3807                static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3808             if( pFListItem )
3809                 pFList = pFListItem->GetFontList();
3810         }
3811 
3812         bool bFound = false;
3813         sal_Int32 nStrPos = 0;
3814         while( nStrPos!= -1 )
3815         {
3816             OUString aFName = aFace.getToken( 0, ',', nStrPos );
3817             aFName = comphelper::string::strip(aFName, ' ');
3818             if( !aFName.isEmpty() )
3819             {
3820                 if( !bFound && pFList )
3821                 {
3822                     sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3823                     if( nullptr != hFont )
3824                     {
3825                         const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3826                         if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3827                         {
3828                             bFound = true;
3829                             if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3830                                 eEnc = RTL_TEXTENCODING_SYMBOL;
3831                         }
3832                     }
3833                 }
3834                 if( !aFontName.isEmpty() )
3835                     aFontName += ";";
3836                 aFontName += aFName;
3837             }
3838         }
3839     }
3840 
3841     // create a new context
3842     std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3843 
3844     // parse styles
3845     if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3846     {
3847         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3848         SvxCSS1PropertyInfo aPropInfo;
3849 
3850         if( nFontHeight )
3851         {
3852             SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3853             aItemSet.Put( aFontHeight );
3854             SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3855             aItemSet.Put( aFontHeightCJK );
3856             SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3857             aItemSet.Put( aFontHeightCTL );
3858         }
3859         if( bColor )
3860             aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3861         if( !aFontName.isEmpty() )
3862         {
3863             SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3864             aItemSet.Put( aFont );
3865             SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3866             aItemSet.Put( aFontCJK );
3867             SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3868             aItemSet.Put( aFontCTL );
3869         }
3870 
3871         if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3872             DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3873 
3874         InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3875     }
3876     else
3877     {
3878         if( nFontHeight )
3879         {
3880             SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3881             InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3882             SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3883             InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3884             SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3885             InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3886         }
3887         if( bColor )
3888             InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3889         if( !aFontName.isEmpty() )
3890         {
3891             SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3892             InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3893             SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3894             InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3895             SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3896             InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3897         }
3898     }
3899 
3900     // save the context
3901     PushContext(xCntxt);
3902 
3903     m_aFontStack.push_back( nSize );
3904 }
3905 
EndFontAttr(HtmlTokenId nToken)3906 void SwHTMLParser::EndFontAttr( HtmlTokenId nToken )
3907 {
3908     EndTag( nToken );
3909 
3910     // avoid stack underflow in tables
3911     if( m_aFontStack.size() > m_nFontStMin )
3912         m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3913 }
3914 
NewPara()3915 void SwHTMLParser::NewPara()
3916 {
3917     if( m_pPam->GetPoint()->nContent.GetIndex() )
3918         AppendTextNode( AM_SPACE );
3919     else
3920         AddParSpace();
3921 
3922     m_eParaAdjust = SvxAdjust::End;
3923     OUString aId, aStyle, aClass, aLang, aDir;
3924 
3925     const HTMLOptions& rHTMLOptions = GetOptions();
3926     for (size_t i = rHTMLOptions.size(); i; )
3927     {
3928         const HTMLOption& rOption = rHTMLOptions[--i];
3929         switch( rOption.GetToken() )
3930         {
3931             case HtmlOptionId::ID:
3932                 aId = rOption.GetString();
3933                 break;
3934             case HtmlOptionId::ALIGN:
3935                 m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3936                 break;
3937             case HtmlOptionId::STYLE:
3938                 aStyle = rOption.GetString();
3939                 break;
3940             case HtmlOptionId::CLASS:
3941                 aClass = rOption.GetString();
3942                 break;
3943             case HtmlOptionId::LANG:
3944                 aLang = rOption.GetString();
3945                 break;
3946             case HtmlOptionId::DIR:
3947                 aDir = rOption.GetString();
3948                 break;
3949             default: break;
3950         }
3951     }
3952 
3953     // create a new context
3954     std::unique_ptr<HTMLAttrContext> xCntxt(
3955         !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3956                                              RES_POOLCOLL_TEXT, aClass )
3957                      : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3958 
3959     // parse styles (Don't consider class. This is only possible as long as none of
3960     // the CSS1 properties of the class must be formatted hard!!!)
3961     if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
3962     {
3963         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3964         SvxCSS1PropertyInfo aPropInfo;
3965 
3966         if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3967         {
3968             OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
3969                     "Class is not considered" );
3970             DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3971             InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
3972         }
3973     }
3974 
3975     if( SvxAdjust::End != m_eParaAdjust )
3976         InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
3977 
3978     // and push on stack
3979     PushContext( xCntxt );
3980 
3981     // set the current style or its attributes
3982     SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
3983 
3984     // progress bar
3985     ShowStatline();
3986 
3987     OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
3988     m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
3989 }
3990 
EndPara(bool bReal)3991 void SwHTMLParser::EndPara( bool bReal )
3992 {
3993     if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
3994     {
3995 #if OSL_DEBUG_LEVEL > 0
3996         const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule();
3997         OSL_ENSURE( pNumRule, "Where is the NumRule" );
3998 #endif
3999     }
4000 
4001     // Netscape skips empty paragraphs, we do the same.
4002     if( bReal )
4003     {
4004         if( m_pPam->GetPoint()->nContent.GetIndex() )
4005             AppendTextNode( AM_SPACE );
4006         else
4007             AddParSpace();
4008     }
4009 
4010     // If a DD or DT was open, it's an implied definition list,
4011     // which must be closed now.
4012     if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
4013         m_nDefListDeep)
4014     {
4015         m_nDefListDeep--;
4016     }
4017 
4018     // Pop the context of the stack. It can also be from an
4019     // implied opened definition list.
4020     std::unique_ptr<HTMLAttrContext> xCntxt(
4021         PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
4022 
4023     // close attribute
4024     if (xCntxt)
4025     {
4026         EndContext(xCntxt.get());
4027         SetAttr();  // because of JavaScript set paragraph attributes as fast as possible
4028         xCntxt.reset();
4029     }
4030 
4031     // reset the existing style
4032     if( bReal )
4033         SetTextCollAttrs();
4034 
4035     m_nOpenParaToken = HtmlTokenId::NONE;
4036 }
4037 
NewHeading(HtmlTokenId nToken)4038 void SwHTMLParser::NewHeading( HtmlTokenId nToken )
4039 {
4040     m_eParaAdjust = SvxAdjust::End;
4041 
4042     OUString aId, aStyle, aClass, aLang, aDir;
4043 
4044     const HTMLOptions& rHTMLOptions = GetOptions();
4045     for (size_t i = rHTMLOptions.size(); i; )
4046     {
4047         const HTMLOption& rOption = rHTMLOptions[--i];
4048         switch( rOption.GetToken() )
4049         {
4050             case HtmlOptionId::ID:
4051                 aId = rOption.GetString();
4052                 break;
4053             case HtmlOptionId::ALIGN:
4054                 m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4055                 break;
4056             case HtmlOptionId::STYLE:
4057                 aStyle = rOption.GetString();
4058                 break;
4059             case HtmlOptionId::CLASS:
4060                 aClass = rOption.GetString();
4061                 break;
4062             case HtmlOptionId::LANG:
4063                 aLang = rOption.GetString();
4064                 break;
4065             case HtmlOptionId::DIR:
4066                 aDir = rOption.GetString();
4067                 break;
4068             default: break;
4069         }
4070     }
4071 
4072     // open a new paragraph
4073     if( m_pPam->GetPoint()->nContent.GetIndex() )
4074         AppendTextNode( AM_SPACE );
4075     else
4076         AddParSpace();
4077 
4078     // search for the matching style
4079     sal_uInt16 nTextColl;
4080     switch( nToken )
4081     {
4082     case HtmlTokenId::HEAD1_ON:         nTextColl = RES_POOLCOLL_HEADLINE1;  break;
4083     case HtmlTokenId::HEAD2_ON:         nTextColl = RES_POOLCOLL_HEADLINE2;  break;
4084     case HtmlTokenId::HEAD3_ON:         nTextColl = RES_POOLCOLL_HEADLINE3;  break;
4085     case HtmlTokenId::HEAD4_ON:         nTextColl = RES_POOLCOLL_HEADLINE4;  break;
4086     case HtmlTokenId::HEAD5_ON:         nTextColl = RES_POOLCOLL_HEADLINE5;  break;
4087     case HtmlTokenId::HEAD6_ON:         nTextColl = RES_POOLCOLL_HEADLINE6;  break;
4088     default:                    nTextColl = RES_POOLCOLL_STANDARD;   break;
4089     }
4090 
4091     // create the context
4092     std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4093 
4094     // parse styles (regarding class see also NewPara)
4095     if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4096     {
4097         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4098         SvxCSS1PropertyInfo aPropInfo;
4099 
4100         if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4101         {
4102             OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4103                     "Class is not considered" );
4104             DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4105             InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4106         }
4107     }
4108 
4109     if( SvxAdjust::End != m_eParaAdjust )
4110         InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4111 
4112     // and push on stack
4113     PushContext(xCntxt);
4114 
4115     // set the current style or its attributes
4116     SetTextCollAttrs(m_aContexts.back().get());
4117 
4118     m_nFontStHeadStart = m_aFontStack.size();
4119 
4120     // progress bar
4121     ShowStatline();
4122 }
4123 
EndHeading()4124 void SwHTMLParser::EndHeading()
4125 {
4126     // open a new paragraph
4127     if( m_pPam->GetPoint()->nContent.GetIndex() )
4128         AppendTextNode( AM_SPACE );
4129     else
4130         AddParSpace();
4131 
4132     // search context matching the token and fetch it from stack
4133     std::unique_ptr<HTMLAttrContext> xCntxt;
4134     auto nPos = m_aContexts.size();
4135     while( !xCntxt && nPos>m_nContextStMin )
4136     {
4137         switch( m_aContexts[--nPos]->GetToken() )
4138         {
4139         case HtmlTokenId::HEAD1_ON:
4140         case HtmlTokenId::HEAD2_ON:
4141         case HtmlTokenId::HEAD3_ON:
4142         case HtmlTokenId::HEAD4_ON:
4143         case HtmlTokenId::HEAD5_ON:
4144         case HtmlTokenId::HEAD6_ON:
4145             xCntxt = std::move(m_aContexts[nPos]);
4146             m_aContexts.erase( m_aContexts.begin() + nPos );
4147             break;
4148         default: break;
4149         }
4150     }
4151 
4152     // and now end attributes
4153     if (xCntxt)
4154     {
4155         EndContext(xCntxt.get());
4156         SetAttr();  // because of JavaScript set paragraph attributes as fast as possible
4157         xCntxt.reset();
4158     }
4159 
4160     // reset existing style
4161     SetTextCollAttrs();
4162 
4163     m_nFontStHeadStart = m_nFontStMin;
4164 }
4165 
NewTextFormatColl(HtmlTokenId nToken,sal_uInt16 nColl)4166 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4167 {
4168     OUString aId, aStyle, aClass, aLang, aDir;
4169 
4170     const HTMLOptions& rHTMLOptions = GetOptions();
4171     for (size_t i = rHTMLOptions.size(); i; )
4172     {
4173         const HTMLOption& rOption = rHTMLOptions[--i];
4174         switch( rOption.GetToken() )
4175         {
4176             case HtmlOptionId::ID:
4177                 aId = rOption.GetString();
4178                 break;
4179             case HtmlOptionId::STYLE:
4180                 aStyle = rOption.GetString();
4181                 break;
4182             case HtmlOptionId::CLASS:
4183                 aClass = rOption.GetString();
4184                 break;
4185             case HtmlOptionId::LANG:
4186                 aLang = rOption.GetString();
4187                 break;
4188             case HtmlOptionId::DIR:
4189                 aDir = rOption.GetString();
4190                 break;
4191             default: break;
4192         }
4193     }
4194 
4195     // open a new paragraph
4196     SwHTMLAppendMode eMode = AM_NORMAL;
4197     switch( nToken )
4198     {
4199     case HtmlTokenId::LISTING_ON:
4200     case HtmlTokenId::XMP_ON:
4201         // These both tags will be mapped to the PRE style. For the case that a
4202         // a CLASS exists we will delete it so that we don't get the CLASS of
4203         // the PRE style.
4204         aClass.clear();
4205         [[fallthrough]];
4206     case HtmlTokenId::BLOCKQUOTE_ON:
4207     case HtmlTokenId::BLOCKQUOTE30_ON:
4208     case HtmlTokenId::PREFORMTXT_ON:
4209         eMode = AM_SPACE;
4210         break;
4211     case HtmlTokenId::ADDRESS_ON:
4212         eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4213         break;
4214     case HtmlTokenId::DT_ON:
4215     case HtmlTokenId::DD_ON:
4216         eMode = AM_SOFTNOSPACE;
4217         break;
4218     default:
4219         OSL_ENSURE( false, "unknown style" );
4220         break;
4221     }
4222     if( m_pPam->GetPoint()->nContent.GetIndex() )
4223         AppendTextNode( eMode );
4224     else if( AM_SPACE==eMode )
4225         AddParSpace();
4226 
4227     // ... and save in a context
4228     std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4229 
4230     // parse styles (regarding class see also NewPara)
4231     if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4232     {
4233         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4234         SvxCSS1PropertyInfo aPropInfo;
4235 
4236         if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4237         {
4238             OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4239                     "Class is not considered" );
4240             DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4241             InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4242         }
4243     }
4244 
4245     PushContext(xCntxt);
4246 
4247     // set the new style
4248     SetTextCollAttrs(m_aContexts.back().get());
4249 
4250     // update progress bar
4251     ShowStatline();
4252 }
4253 
EndTextFormatColl(HtmlTokenId nToken)4254 void SwHTMLParser::EndTextFormatColl( HtmlTokenId nToken )
4255 {
4256     SwHTMLAppendMode eMode = AM_NORMAL;
4257     switch( getOnToken(nToken) )
4258     {
4259     case HtmlTokenId::BLOCKQUOTE_ON:
4260     case HtmlTokenId::BLOCKQUOTE30_ON:
4261     case HtmlTokenId::PREFORMTXT_ON:
4262     case HtmlTokenId::LISTING_ON:
4263     case HtmlTokenId::XMP_ON:
4264         eMode = AM_SPACE;
4265         break;
4266     case HtmlTokenId::ADDRESS_ON:
4267     case HtmlTokenId::DT_ON:
4268     case HtmlTokenId::DD_ON:
4269         eMode = AM_SOFTNOSPACE;
4270         break;
4271     default:
4272         OSL_ENSURE( false, "unknown style" );
4273         break;
4274     }
4275     if( m_pPam->GetPoint()->nContent.GetIndex() )
4276         AppendTextNode( eMode );
4277     else if( AM_SPACE==eMode )
4278         AddParSpace();
4279 
4280     // pop current context of stack
4281     std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4282 
4283     // and now end attributes
4284     if (xCntxt)
4285     {
4286         EndContext(xCntxt.get());
4287         SetAttr();  // because of JavaScript set paragraph attributes as fast as possible
4288         xCntxt.reset();
4289     }
4290 
4291     // reset existing style
4292     SetTextCollAttrs();
4293 }
4294 
NewDefList()4295 void SwHTMLParser::NewDefList()
4296 {
4297     OUString aId, aStyle, aClass, aLang, aDir;
4298 
4299     const HTMLOptions& rHTMLOptions = GetOptions();
4300     for (size_t i = rHTMLOptions.size(); i; )
4301     {
4302         const HTMLOption& rOption = rHTMLOptions[--i];
4303         switch( rOption.GetToken() )
4304         {
4305             case HtmlOptionId::ID:
4306                 aId = rOption.GetString();
4307                 break;
4308             case HtmlOptionId::STYLE:
4309                 aStyle = rOption.GetString();
4310                 break;
4311             case HtmlOptionId::CLASS:
4312                 aClass = rOption.GetString();
4313                 break;
4314             case HtmlOptionId::LANG:
4315                 aLang = rOption.GetString();
4316                 break;
4317             case HtmlOptionId::DIR:
4318                 aDir = rOption.GetString();
4319                 break;
4320             default: break;
4321         }
4322     }
4323 
4324     // open a new paragraph
4325     bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4326     if( m_pPam->GetPoint()->nContent.GetIndex() )
4327         AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4328     else if( bSpace )
4329         AddParSpace();
4330 
4331     // one level more
4332     m_nDefListDeep++;
4333 
4334     bool bInDD = false, bNotInDD = false;
4335     auto nPos = m_aContexts.size();
4336     while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4337     {
4338         HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4339         switch( nCntxtToken )
4340         {
4341         case HtmlTokenId::DEFLIST_ON:
4342         case HtmlTokenId::DIRLIST_ON:
4343         case HtmlTokenId::MENULIST_ON:
4344         case HtmlTokenId::ORDERLIST_ON:
4345         case HtmlTokenId::UNORDERLIST_ON:
4346             bNotInDD = true;
4347             break;
4348         case HtmlTokenId::DD_ON:
4349             bInDD = true;
4350             break;
4351         default: break;
4352         }
4353     }
4354 
4355     // ... and save in a context
4356     std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4357 
4358     // in it save also the margins
4359     sal_uInt16 nLeft=0, nRight=0;
4360     short nIndent=0;
4361     GetMarginsFromContext( nLeft, nRight, nIndent );
4362 
4363     // The indentation, which already results from a DL, correlates with a DT
4364     // on the current level and this correlates to a DD from the previous level.
4365     // For a level >=2 we must add DD distance.
4366     if( !bInDD && m_nDefListDeep > 1 )
4367     {
4368 
4369         // and the one of the DT-style of the current level
4370         SvxLRSpaceItem rLRSpace =
4371             m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4372                        ->GetLRSpace();
4373         nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft());
4374     }
4375 
4376     xCntxt->SetMargins( nLeft, nRight, nIndent );
4377 
4378     // parse styles
4379     if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4380     {
4381         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4382         SvxCSS1PropertyInfo aPropInfo;
4383 
4384         if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4385         {
4386             DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4387             InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4388         }
4389     }
4390 
4391     PushContext(xCntxt);
4392 
4393     // set the attributes of the new style
4394     if( m_nDefListDeep > 1 )
4395         SetTextCollAttrs(m_aContexts.back().get());
4396 }
4397 
EndDefList()4398 void SwHTMLParser::EndDefList()
4399 {
4400     bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4401     if( m_pPam->GetPoint()->nContent.GetIndex() )
4402         AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4403     else if( bSpace )
4404         AddParSpace();
4405 
4406     // one level less
4407     if( m_nDefListDeep > 0 )
4408         m_nDefListDeep--;
4409 
4410     // pop current context of stack
4411     std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4412 
4413     // and now end attributes
4414     if (xCntxt)
4415     {
4416         EndContext(xCntxt.get());
4417         SetAttr();  // because of JavaScript set paragraph attributes as fast as possible
4418         xCntxt.reset();
4419     }
4420 
4421     // and set style
4422     SetTextCollAttrs();
4423 }
4424 
NewDefListItem(HtmlTokenId nToken)4425 void SwHTMLParser::NewDefListItem( HtmlTokenId nToken )
4426 {
4427     // determine if the DD/DT exist in a DL
4428     bool bInDefList = false, bNotInDefList = false;
4429     auto nPos = m_aContexts.size();
4430     while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4431     {
4432         HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4433         switch( nCntxtToken )
4434         {
4435         case HtmlTokenId::DEFLIST_ON:
4436             bInDefList = true;
4437             break;
4438         case HtmlTokenId::DIRLIST_ON:
4439         case HtmlTokenId::MENULIST_ON:
4440         case HtmlTokenId::ORDERLIST_ON:
4441         case HtmlTokenId::UNORDERLIST_ON:
4442             bNotInDefList = true;
4443             break;
4444         default: break;
4445         }
4446     }
4447 
4448     // if not, then implicitly open a new DL
4449     if( !bInDefList )
4450     {
4451         m_nDefListDeep++;
4452         OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4453                 "Now an open paragraph element will be lost." );
4454         m_nOpenParaToken = nToken;
4455     }
4456 
4457     NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4458                                               : RES_POOLCOLL_HTML_DT) );
4459 }
4460 
EndDefListItem(HtmlTokenId nToken)4461 void SwHTMLParser::EndDefListItem( HtmlTokenId nToken )
4462 {
4463     // open a new paragraph
4464     if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() )
4465         AppendTextNode( AM_SOFTNOSPACE );
4466 
4467     // search context matching the token and fetch it from stack
4468     nToken = getOnToken(nToken);
4469     std::unique_ptr<HTMLAttrContext> xCntxt;
4470     auto nPos = m_aContexts.size();
4471     while( !xCntxt && nPos>m_nContextStMin )
4472     {
4473         HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4474         switch( nCntxtToken )
4475         {
4476         case HtmlTokenId::DD_ON:
4477         case HtmlTokenId::DT_ON:
4478             if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken  )
4479             {
4480                 xCntxt = std::move(m_aContexts[nPos]);
4481                 m_aContexts.erase( m_aContexts.begin() + nPos );
4482             }
4483             break;
4484         case HtmlTokenId::DEFLIST_ON:
4485             // don't look at DD/DT outside the current DefList
4486         case HtmlTokenId::DIRLIST_ON:
4487         case HtmlTokenId::MENULIST_ON:
4488         case HtmlTokenId::ORDERLIST_ON:
4489         case HtmlTokenId::UNORDERLIST_ON:
4490             // and also not outside another list
4491             nPos = m_nContextStMin;
4492             break;
4493         default: break;
4494         }
4495     }
4496 
4497     // and now end attributes
4498     if (xCntxt)
4499     {
4500         EndContext(xCntxt.get());
4501         SetAttr();  // because of JavaScript set paragraph attributes as fast as possible
4502     }
4503 }
4504 
4505 /**
4506  *
4507  * @param bNoSurroundOnly   The paragraph contains at least one frame
4508  *                          without wrapping.
4509  * @param bSurroundOnly     The paragraph contains at least one frame
4510  *                          with wrapping, but none without wrapping.
4511  *
4512  *                          Otherwise the paragraph contains any frame.
4513  */
HasCurrentParaFlys(bool bNoSurroundOnly,bool bSurroundOnly) const4514 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4515                                        bool bSurroundOnly ) const
4516 {
4517     SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
4518 
4519     const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
4520 
4521     bool bFound = false;
4522     for ( size_t i=0; i<rFrameFormatTable.size(); i++ )
4523     {
4524         const SwFrameFormat *const pFormat = rFrameFormatTable[i];
4525         SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4526         // A frame was found, when
4527         // - it is paragraph-bound, and
4528         // - is anchored in current paragraph, and
4529         //   - every paragraph-bound frame counts, or
4530         //   - (only frames without wrapping count and) the frame doesn't have
4531         //     a wrapping
4532         SwPosition const*const pAPos = pAnchor->GetContentAnchor();
4533         if (pAPos &&
4534             ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4535              (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4536             pAPos->nNode == rNodeIdx )
4537         {
4538             if( !(bNoSurroundOnly || bSurroundOnly) )
4539             {
4540                 bFound = true;
4541                 break;
4542             }
4543             else
4544             {
4545                 // When looking for frames with wrapping, also disregard
4546                 // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4547                 // and you don't want to evade those when positioning.
4548                 css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4549                 if( bNoSurroundOnly )
4550                 {
4551                     if( css::text::WrapTextMode_NONE==eSurround )
4552                     {
4553                         bFound = true;
4554                         break;
4555                     }
4556                 }
4557                 if( bSurroundOnly )
4558                 {
4559                     if( css::text::WrapTextMode_NONE==eSurround )
4560                     {
4561                         bFound = false;
4562                         break;
4563                     }
4564                     else if( css::text::WrapTextMode_THROUGH!=eSurround )
4565                     {
4566                         bFound = true;
4567                         // Continue searching: It's possible that some without
4568                         // wrapping will follow...
4569                     }
4570                 }
4571             }
4572         }
4573     }
4574 
4575     return bFound;
4576 }
4577 
4578 // the special methods for inserting of objects
4579 
GetCurrFormatColl() const4580 const SwFormatColl *SwHTMLParser::GetCurrFormatColl() const
4581 {
4582     const SwContentNode* pCNd = m_pPam->GetContentNode();
4583     return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4584 }
4585 
SetTextCollAttrs(HTMLAttrContext * pContext)4586 void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext *pContext )
4587 {
4588     SwTextFormatColl *pCollToSet = nullptr; // the style to set
4589     SfxItemSet *pItemSet = nullptr;         // set of hard attributes
4590     sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4591     const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4592     sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4593 
4594     bool bInPRE=false;                          // some context info
4595 
4596     sal_uInt16 nLeftMargin = 0, nRightMargin = 0;   // the margins and
4597     short nFirstLineIndent = 0;                     // indentations
4598 
4599     for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i )
4600     {
4601         const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4602 
4603         sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4604         if( nColl )
4605         {
4606             // There is a style to set. Then at first we must decide,
4607             // if the style can be set.
4608             bool bSetThis = true;
4609             switch( nColl )
4610             {
4611             case RES_POOLCOLL_HTML_PRE:
4612                 bInPRE = true;
4613                 break;
4614             case RES_POOLCOLL_TEXT:
4615                 // <TD><P CLASS=xxx> must become TD.xxx
4616                 if( nDfltColl==RES_POOLCOLL_TABLE ||
4617                     nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4618                     nColl = nDfltColl;
4619                 break;
4620             case RES_POOLCOLL_HTML_HR:
4621                 // also <HR> in <PRE> set as style, otherwise it can't
4622                 // be exported anymore
4623                 break;
4624             default:
4625                 if( bInPRE )
4626                     bSetThis = false;
4627                 break;
4628             }
4629 
4630             SwTextFormatColl *pNewColl =
4631                 m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4632 
4633             if( bSetThis )
4634             {
4635                 // If now a different style should be set as previously, the
4636                 // previous style must be replaced by hard attribution.
4637 
4638                 if( pCollToSet )
4639                 {
4640                     // insert the attributes hard, which previous style sets
4641                     if( !pItemSet )
4642                         pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4643                     else
4644                     {
4645                         const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4646                         SfxItemSet aItemSet( *rCollSet.GetPool(),
4647                                              rCollSet.GetRanges() );
4648                         aItemSet.Set( rCollSet );
4649                         pItemSet->Put( aItemSet );
4650                     }
4651                     // but remove the attributes, which the current style sets,
4652                     // because otherwise they will be overwritten later
4653                     pItemSet->Differentiate( pNewColl->GetAttrSet() );
4654                 }
4655 
4656                 pCollToSet = pNewColl;
4657             }
4658             else
4659             {
4660                 // hard attribution
4661                 if( !pItemSet )
4662                     pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4663                 else
4664                 {
4665                     const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4666                     SfxItemSet aItemSet( *rCollSet.GetPool(),
4667                                          rCollSet.GetRanges() );
4668                     aItemSet.Set( rCollSet );
4669                     pItemSet->Put( aItemSet );
4670                 }
4671             }
4672         }
4673         else
4674         {
4675             // Maybe a default style exists?
4676             nColl = pCntxt->GetDefaultTextFormatColl();
4677             if( nColl )
4678                 nDfltColl = nColl;
4679         }
4680 
4681         // if applicable fetch new paragraph indents
4682         if( pCntxt->IsLRSpaceChanged() )
4683         {
4684             sal_uInt16 nLeft=0, nRight=0;
4685 
4686             pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4687             nLeftMargin = nLeft;
4688             nRightMargin = nRight;
4689         }
4690     }
4691 
4692     // If in current context a new style should be set,
4693     // its paragraph margins must be inserted in the context.
4694     if( pContext && nTopColl )
4695     {
4696         // <TD><P CLASS=xxx> must become TD.xxx
4697         if( nTopColl==RES_POOLCOLL_TEXT &&
4698             (nDfltColl==RES_POOLCOLL_TABLE ||
4699              nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4700             nTopColl = nDfltColl;
4701 
4702         const SwTextFormatColl *pTopColl =
4703             m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4704         const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4705         const SfxPoolItem *pItem;
4706         if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) )
4707         {
4708             const SvxLRSpaceItem *pLRItem =
4709                 static_cast<const SvxLRSpaceItem *>(pItem);
4710 
4711             sal_Int32 nLeft = pLRItem->GetTextLeft();
4712             sal_Int32 nRight = pLRItem->GetRight();
4713             nFirstLineIndent = pLRItem->GetTextFirstLineOffset();
4714 
4715             // In Definition lists the margins also contain the margins from the previous levels
4716             if( RES_POOLCOLL_HTML_DD == nTopColl )
4717             {
4718                 const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser
4719                     ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())
4720                     ->GetLRSpace();
4721                 nLeft -= rDTLRSpace.GetTextLeft();
4722                 nRight -= rDTLRSpace.GetRight();
4723             }
4724             else if( RES_POOLCOLL_HTML_DT == nTopColl )
4725             {
4726                 nLeft = 0;
4727                 nRight = 0;
4728             }
4729 
4730             // the paragraph margins add up
4731             nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4732             nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4733 
4734             pContext->SetMargins( nLeftMargin, nRightMargin,
4735                                   nFirstLineIndent );
4736         }
4737         if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) )
4738         {
4739             const SvxULSpaceItem *pULItem =
4740                 static_cast<const SvxULSpaceItem *>(pItem);
4741             pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4742         }
4743     }
4744 
4745     // If no style is set in the context use the text body.
4746     if( !pCollToSet )
4747     {
4748         pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4749         const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4750         if( !nLeftMargin )
4751             nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft());
4752         if( !nRightMargin )
4753             nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight());
4754         if( !nFirstLineIndent )
4755             nFirstLineIndent = rLRItem.GetTextFirstLineOffset();
4756     }
4757 
4758     // remove previous hard attribution of paragraph
4759     for( auto pParaAttr : m_aParaAttrs )
4760         pParaAttr->Invalidate();
4761     m_aParaAttrs.clear();
4762 
4763     // set the style
4764     m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4765 
4766     // if applicable correct the paragraph indent
4767     const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace();
4768     bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() ||
4769                       nFirstLineIndent != rLRItem.GetTextFirstLineOffset() ||
4770                       nRightMargin != rLRItem.GetRight();
4771 
4772     if( bSetLRSpace )
4773     {
4774         SvxLRSpaceItem aLRItem( rLRItem );
4775         aLRItem.SetTextLeft( nLeftMargin );
4776         aLRItem.SetRight( nRightMargin );
4777         aLRItem.SetTextFirstLineOffset( nFirstLineIndent );
4778         if( pItemSet )
4779             pItemSet->Put( aLRItem );
4780         else
4781         {
4782             NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
4783             m_xAttrTab->pLRSpace->SetLikePara();
4784             m_aParaAttrs.push_back( m_xAttrTab->pLRSpace );
4785             EndAttr( m_xAttrTab->pLRSpace, false );
4786         }
4787     }
4788 
4789     // and now set the attributes
4790     if( pItemSet )
4791     {
4792         InsertParaAttrs( *pItemSet );
4793         delete pItemSet;
4794     }
4795 }
4796 
NewCharFormat(HtmlTokenId nToken)4797 void SwHTMLParser::NewCharFormat( HtmlTokenId nToken )
4798 {
4799     OUString aId, aStyle, aLang, aDir;
4800     OUString aClass;
4801 
4802     const HTMLOptions& rHTMLOptions = GetOptions();
4803     for (size_t i = rHTMLOptions.size(); i; )
4804     {
4805         const HTMLOption& rOption = rHTMLOptions[--i];
4806         switch( rOption.GetToken() )
4807         {
4808         case HtmlOptionId::ID:
4809             aId = rOption.GetString();
4810             break;
4811         case HtmlOptionId::STYLE:
4812             aStyle = rOption.GetString();
4813             break;
4814         case HtmlOptionId::CLASS:
4815             aClass = rOption.GetString();
4816             break;
4817         case HtmlOptionId::LANG:
4818             aLang = rOption.GetString();
4819             break;
4820         case HtmlOptionId::DIR:
4821             aDir = rOption.GetString();
4822             break;
4823         default: break;
4824         }
4825     }
4826 
4827     // create a new context
4828     std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4829 
4830     // set the style and save it in the context
4831     SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4832     OSL_ENSURE( pCFormat, "No character format found for token" );
4833 
4834     // parse styles (regarding class see also NewPara)
4835     if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4836     {
4837         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4838         SvxCSS1PropertyInfo aPropInfo;
4839 
4840         if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4841         {
4842             OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4843                     "Class is not considered" );
4844             DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4845             InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4846         }
4847     }
4848 
4849     // Character formats are stored in their own stack and can never be inserted
4850     // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4851     if( pCFormat )
4852         InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4853 
4854     // save the context
4855     PushContext(xCntxt);
4856 }
4857 
InsertSpacer()4858 void SwHTMLParser::InsertSpacer()
4859 {
4860     // and if applicable change it via the options
4861     sal_Int16 eVertOri = text::VertOrientation::TOP;
4862     sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4863     Size aSize( 0, 0);
4864     tools::Long nSize = 0;
4865     bool bPercentWidth = false;
4866     bool bPercentHeight = false;
4867     sal_uInt16 nType = HTML_SPTYPE_HORI;
4868 
4869     const HTMLOptions& rHTMLOptions = GetOptions();
4870     for (size_t i = rHTMLOptions.size(); i; )
4871     {
4872         const HTMLOption& rOption = rHTMLOptions[--i];
4873         switch( rOption.GetToken() )
4874         {
4875         case HtmlOptionId::TYPE:
4876             rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4877             break;
4878         case HtmlOptionId::ALIGN:
4879             eVertOri =
4880                 rOption.GetEnum( aHTMLImgVAlignTable,
4881                                   eVertOri );
4882             eHoriOri =
4883                 rOption.GetEnum( aHTMLImgHAlignTable,
4884                                   eHoriOri );
4885             break;
4886         case HtmlOptionId::WIDTH:
4887             // First only save as pixel value!
4888             bPercentWidth = (rOption.GetString().indexOf('%') != -1);
4889             aSize.setWidth( static_cast<tools::Long>(rOption.GetNumber()) );
4890             break;
4891         case HtmlOptionId::HEIGHT:
4892             // First only save as pixel value!
4893             bPercentHeight = (rOption.GetString().indexOf('%') != -1);
4894             aSize.setHeight( static_cast<tools::Long>(rOption.GetNumber()) );
4895             break;
4896         case HtmlOptionId::SIZE:
4897             // First only save as pixel value!
4898             nSize = rOption.GetNumber();
4899             break;
4900         default: break;
4901         }
4902     }
4903 
4904     switch( nType )
4905     {
4906     case HTML_SPTYPE_BLOCK:
4907         {
4908             // create an empty text frame
4909 
4910             // fetch the ItemSet
4911             SfxItemSet aFrameSet( m_xDoc->GetAttrPool(),
4912                                 svl::Items<RES_FRMATR_BEGIN, RES_FRMATR_END-1>{} );
4913             if( !IsNewDoc() )
4914                 Reader::ResetFrameFormatAttrs( aFrameSet );
4915 
4916             // set the anchor and the adjustment
4917             SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4918 
4919             // and the size of the frame
4920             Size aDfltSz( MINFLY, MINFLY );
4921             Size aSpace( 0, 0 );
4922             SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4923                                  m_pCSS1Parser->GetWhichMap() );
4924             SvxCSS1PropertyInfo aDummyPropInfo;
4925 
4926             SetFixSize( aSize, aDfltSz, bPercentWidth, bPercentHeight,
4927                         aDummyPropInfo, aFrameSet );
4928             SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4929 
4930             // protect the content
4931             SvxProtectItem aProtectItem( RES_PROTECT) ;
4932             aProtectItem.SetContentProtect( true );
4933             aFrameSet.Put( aProtectItem );
4934 
4935             // create the frame
4936             RndStdIds eAnchorId =
4937                 aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4938             SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4939                                             m_pPam->GetPoint(), &aFrameSet );
4940             // Possibly create frames and register auto-bound frames.
4941             RegisterFlyFrame( pFlyFormat );
4942         }
4943         break;
4944     case HTML_SPTYPE_VERT:
4945         if( nSize > 0 )
4946         {
4947             if (Application::GetDefaultDevice())
4948             {
4949                 nSize = Application::GetDefaultDevice()
4950                             ->PixelToLogic( Size(0,nSize),
4951                                             MapMode(MapUnit::MapTwip) ).Height();
4952             }
4953 
4954             // set a paragraph margin
4955             SwTextNode *pTextNode = nullptr;
4956             if( !m_pPam->GetPoint()->nContent.GetIndex() )
4957             {
4958                 // if possible change the bottom paragraph margin
4959                 // of previous node
4960 
4961                 SetAttr();  // set still open paragraph attributes
4962 
4963                 pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1]
4964                                ->GetTextNode();
4965 
4966                 // If the previous paragraph isn't a text node, then now an
4967                 // empty paragraph is created, which already generates a single
4968                 // line of spacing.
4969                 if( !pTextNode )
4970                     nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
4971             }
4972 
4973             if( pTextNode )
4974             {
4975                 SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode
4976                     ->SwContentNode::GetAttr( RES_UL_SPACE )) );
4977                 aULSpace.SetLower( aULSpace.GetLower() + o3tl::narrowing<sal_uInt16>(nSize) );
4978                 pTextNode->SetAttr( aULSpace );
4979             }
4980             else
4981             {
4982                 NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, o3tl::narrowing<sal_uInt16>(nSize), RES_UL_SPACE));
4983                 EndAttr( m_xAttrTab->pULSpace, false );
4984 
4985                 AppendTextNode();    // Don't change spacing!
4986             }
4987         }
4988         break;
4989     case HTML_SPTYPE_HORI:
4990         if( nSize > 0 )
4991         {
4992             // If the paragraph is still empty, set first line
4993             // indentation, otherwise apply letter spacing over a space.
4994 
4995             if (Application::GetDefaultDevice())
4996             {
4997                 nSize = Application::GetDefaultDevice()
4998                             ->PixelToLogic( Size(nSize,0),
4999                                             MapMode(MapUnit::MapTwip) ).Width();
5000             }
5001 
5002             if( !m_pPam->GetPoint()->nContent.GetIndex() )
5003             {
5004                 sal_uInt16 nLeft=0, nRight=0;
5005                 short nIndent = 0;
5006 
5007                 GetMarginsFromContextWithNumberBullet( nLeft, nRight, nIndent );
5008                 nIndent = nIndent + static_cast<short>(nSize);
5009 
5010                 SvxLRSpaceItem aLRItem( RES_LR_SPACE );
5011                 aLRItem.SetTextLeft( nLeft );
5012                 aLRItem.SetRight( nRight );
5013                 aLRItem.SetTextFirstLineOffset( nIndent );
5014 
5015                 NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem);
5016                 EndAttr( m_xAttrTab->pLRSpace, false );
5017             }
5018             else
5019             {
5020                 NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
5021                 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, " " );
5022                 EndAttr( m_xAttrTab->pKerning );
5023             }
5024         }
5025     }
5026 }
5027 
ToTwips(sal_uInt16 nPixel)5028 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5029 {
5030     if( nPixel && Application::GetDefaultDevice() )
5031     {
5032         SwTwips nTwips = Application::GetDefaultDevice()->PixelToLogic(
5033                     Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width();
5034         return o3tl::narrowing<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16)));
5035     }
5036     else
5037         return nPixel;
5038 }
5039 
GetCurrentBrowseWidth()5040 SwTwips SwHTMLParser::GetCurrentBrowseWidth()
5041 {
5042     const SwTwips nWidth = SwHTMLTableLayout::GetBrowseWidth( *m_xDoc );
5043     if( nWidth )
5044         return nWidth;
5045 
5046     if( !m_aHTMLPageSize.Width() )
5047     {
5048         const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5049 
5050         const SwFormatFrameSize& rSz   = rPgFormat.GetFrameSize();
5051         const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5052         const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5053         const SwFormatCol& rCol = rPgFormat.GetCol();
5054 
5055         m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5056         m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5057 
5058         if( 1 < rCol.GetNumCols() )
5059             m_aHTMLPageSize.setWidth( m_aHTMLPageSize.Width() / ( rCol.GetNumCols()) );
5060     }
5061 
5062     return m_aHTMLPageSize.Width();
5063 }
5064 
InsertIDOption()5065 void SwHTMLParser::InsertIDOption()
5066 {
5067     OUString aId;
5068     const HTMLOptions& rHTMLOptions = GetOptions();
5069     for (size_t i = rHTMLOptions.size(); i; )
5070     {
5071         const HTMLOption& rOption = rHTMLOptions[--i];
5072         if( HtmlOptionId::ID==rOption.GetToken() )
5073         {
5074             aId = rOption.GetString();
5075             break;
5076         }
5077     }
5078 
5079     if( !aId.isEmpty() )
5080         InsertBookmark( aId );
5081 }
5082 
InsertLineBreak()5083 void SwHTMLParser::InsertLineBreak()
5084 {
5085     // <BR CLEAR=xxx> is handled as:
5086     // 1.) Only regard the paragraph-bound frames anchored in current paragraph.
5087     // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right-
5088     //     justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is
5089     //     changed as following:
5090     // 3.) If the paragraph contains no text, then the frames don't get a wrapping
5091     // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping
5092     //     and a right aligned frame gets a left "only anchor" wrapping.
5093     // 5.) if in a non-empty paragraph the wrapping of a frame is changed,
5094     //     then a new paragraph is opened
5095     // 6.) If no wrappings of frames are changed, a hard line break is inserted.
5096 
5097     OUString aId, aStyle, aClass;             // the id of bookmark
5098     bool bClearLeft = false, bClearRight = false;
5099     bool bCleared = false;  // Was a CLEAR executed?
5100 
5101     // then we fetch the options
5102     const HTMLOptions& rHTMLOptions = GetOptions();
5103     for (size_t i = rHTMLOptions.size(); i; )
5104     {
5105         const HTMLOption& rOption = rHTMLOptions[--i];
5106         switch( rOption.GetToken() )
5107         {
5108             case HtmlOptionId::CLEAR:
5109                 {
5110                     const OUString &rClear = rOption.GetString();
5111                     if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5112                     {
5113                         bClearLeft = true;
5114                         bClearRight = true;
5115                     }
5116                     else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5117                         bClearLeft = true;
5118                     else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5119                         bClearRight = true;
5120                 }
5121                 break;
5122             case HtmlOptionId::ID:
5123                 aId = rOption.GetString();
5124                 break;
5125             case HtmlOptionId::STYLE:
5126                 aStyle = rOption.GetString();
5127                 break;
5128             case HtmlOptionId::CLASS:
5129                 aClass = rOption.GetString();
5130                 break;
5131             default: break;
5132         }
5133     }
5134 
5135     // CLEAR is only supported for the current paragraph
5136     if( bClearLeft || bClearRight )
5137     {
5138         SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode;
5139         SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode();
5140         if( pTextNd )
5141         {
5142             const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats();
5143 
5144             for( size_t i=0; i<rFrameFormatTable.size(); i++ )
5145             {
5146                 SwFrameFormat *const pFormat = rFrameFormatTable[i];
5147                 SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
5148                 SwPosition const*const pAPos = pAnchor->GetContentAnchor();
5149                 if (pAPos &&
5150                     ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
5151                      (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
5152                     pAPos->nNode == rNodeIdx &&
5153                     pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE )
5154                 {
5155                     sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which()
5156                         ? text::HoriOrientation::LEFT
5157                         : pFormat->GetHoriOrient().GetHoriOrient();
5158 
5159                     css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL;
5160                     if( m_pPam->GetPoint()->nContent.GetIndex() )
5161                     {
5162                         if( bClearLeft && text::HoriOrientation::LEFT==eHori )
5163                             eSurround = css::text::WrapTextMode_RIGHT;
5164                         else if( bClearRight && text::HoriOrientation::RIGHT==eHori )
5165                             eSurround = css::text::WrapTextMode_LEFT;
5166                     }
5167                     else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) ||
5168                              (bClearRight && text::HoriOrientation::RIGHT==eHori) )
5169                     {
5170                         eSurround = css::text::WrapTextMode_NONE;
5171                     }
5172 
5173                     if( css::text::WrapTextMode_PARALLEL != eSurround )
5174                     {
5175                         SwFormatSurround aSurround( eSurround );
5176                         if( css::text::WrapTextMode_NONE != eSurround )
5177                             aSurround.SetAnchorOnly( true );
5178                         pFormat->SetFormatAttr( aSurround );
5179                         bCleared = true;
5180                     }
5181                 }
5182             }
5183         }
5184     }
5185 
5186     // parse styles
5187     std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5188     bool bBreakItem = false;
5189     if( HasStyleOptions( aStyle, aId, aClass ) )
5190     {
5191         SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5192         SvxCSS1PropertyInfo aPropInfo;
5193 
5194         if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5195         {
5196             if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5197             {
5198                 aBreakItem.reset(aItemSet.Get(RES_BREAK).Clone());
5199                 bBreakItem = true;
5200             }
5201             if( !aPropInfo.m_aId.isEmpty() )
5202                 InsertBookmark( aPropInfo.m_aId );
5203         }
5204     }
5205 
5206     if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5207     {
5208         NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5209         EndAttr( m_xAttrTab->pBreak, false );
5210     }
5211 
5212     if( !bCleared && !bBreakItem )
5213     {
5214         // If no CLEAR could or should be executed, a line break will be inserted
5215         m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, "\x0A" );
5216     }
5217     else if( m_pPam->GetPoint()->nContent.GetIndex() )
5218     {
5219         // If a CLEAR is executed in a non-empty paragraph, then after it
5220         // a new paragraph has to be opened.
5221         // MIB 21.02.97: Here actually we should change the bottom paragraph
5222         // margin to zero. This will fail for something like this <BR ..><P>
5223         // (>Netscape). That's why we don't do it.
5224         AppendTextNode( AM_NOSPACE );
5225     }
5226     if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5227     {
5228         NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5229         EndAttr( m_xAttrTab->pBreak, false );
5230     }
5231 }
5232 
InsertHorzRule()5233 void SwHTMLParser::InsertHorzRule()
5234 {
5235     sal_uInt16 nSize = 0;
5236     sal_uInt16 nWidth = 0;
5237 
5238     SvxAdjust eAdjust = SvxAdjust::End;
5239 
5240     bool bPercentWidth = false;
5241     bool bNoShade = false;
5242     bool bColor = false;
5243 
5244     Color aColor;
5245     OUString aId;
5246 
5247     // let's fetch the options
5248     const HTMLOptions& rHTMLOptions = GetOptions();
5249     for (size_t i = rHTMLOptions.size(); i; )
5250     {
5251         const HTMLOption& rOption = rHTMLOptions[--i];
5252         switch( rOption.GetToken() )
5253         {
5254         case HtmlOptionId::ID:
5255             aId = rOption.GetString();
5256             break;
5257         case HtmlOptionId::SIZE:
5258             nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5259             break;
5260         case HtmlOptionId::WIDTH:
5261             bPercentWidth = (rOption.GetString().indexOf('%') != -1);
5262             nWidth = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5263             if( bPercentWidth && nWidth>=100 )
5264             {
5265                 // the default case are 100% lines (no attributes necessary)
5266                 nWidth = 0;
5267                 bPercentWidth = false;
5268             }
5269             break;
5270         case HtmlOptionId::ALIGN:
5271             eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5272             break;
5273         case HtmlOptionId::NOSHADE:
5274             bNoShade = true;
5275             break;
5276         case HtmlOptionId::COLOR:
5277             rOption.GetColor( aColor );
5278             bColor = true;
5279             break;
5280         default: break;
5281         }
5282     }
5283 
5284     if( m_pPam->GetPoint()->nContent.GetIndex() )
5285         AppendTextNode( AM_NOSPACE );
5286     if( m_nOpenParaToken != HtmlTokenId::NONE )
5287         EndPara();
5288     AppendTextNode();
5289     m_pPam->Move( fnMoveBackward );
5290 
5291     // ...and save in a context
5292     std::unique_ptr<HTMLAttrContext> xCntxt(
5293         new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5294 
5295     PushContext(xCntxt);
5296 
5297     // set the new style
5298     SetTextCollAttrs(m_aContexts.back().get());
5299 
5300     // the hard attributes of the current paragraph will never become invalid
5301     m_aParaAttrs.clear();
5302 
5303     if( nSize>0 || bColor || bNoShade )
5304     {
5305         // set line colour and/or width
5306         if( !bColor )
5307             aColor = COL_GRAY;
5308 
5309         SvxBorderLine aBorderLine( &aColor );
5310         if( nSize )
5311         {
5312             tools::Long nPWidth = 0;
5313             tools::Long nPHeight = static_cast<tools::Long>(nSize);
5314             SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5315             if ( !bNoShade )
5316             {
5317                 aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5318             }
5319             aBorderLine.SetWidth( nPHeight );
5320         }
5321         else if( bNoShade )
5322         {
5323             aBorderLine.SetWidth( DEF_LINE_WIDTH_2 );
5324         }
5325         else
5326         {
5327             aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5328             aBorderLine.SetWidth( DEF_LINE_WIDTH_0 );
5329         }
5330 
5331         SvxBoxItem aBoxItem(RES_BOX);
5332         aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5333         HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5334         m_aSetAttrTab.push_back( pTmp );
5335     }
5336     if( nWidth )
5337     {
5338         // If we aren't in a table, then the width value will be "faked" with
5339         // paragraph indents. That makes little sense in a table. In order to
5340         // avoid that the line is considered during the width calculation, it
5341         // still gets an appropriate LRSpace-Item.
5342         if (!m_xTable)
5343         {
5344             // fake length and alignment of line above paragraph indents
5345             tools::Long nBrowseWidth = GetCurrentBrowseWidth();
5346             nWidth = bPercentWidth ? o3tl::narrowing<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5347                                : ToTwips( o3tl::narrowing<sal_uInt16>(nBrowseWidth) );
5348             if( nWidth < MINLAY )
5349                 nWidth = MINLAY;
5350 
5351             const SwFormatColl *pColl = (static_cast<tools::Long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5352             if (pColl)
5353             {
5354                 SvxLRSpaceItem aLRItem( pColl->GetLRSpace() );
5355                 tools::Long nDist = nBrowseWidth - nWidth;
5356 
5357                 switch( eAdjust )
5358                 {
5359                 case SvxAdjust::Right:
5360                     aLRItem.SetTextLeft( o3tl::narrowing<sal_uInt16>(nDist) );
5361                     break;
5362                 case SvxAdjust::Left:
5363                     aLRItem.SetRight( o3tl::narrowing<sal_uInt16>(nDist) );
5364                     break;
5365                 case SvxAdjust::Center:
5366                 default:
5367                     nDist /= 2;
5368                     aLRItem.SetTextLeft( o3tl::narrowing<sal_uInt16>(nDist) );
5369                     aLRItem.SetRight( o3tl::narrowing<sal_uInt16>(nDist) );
5370                     break;
5371                 }
5372 
5373                 HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5374                 m_aSetAttrTab.push_back( pTmp );
5375             }
5376         }
5377     }
5378 
5379     // it's not possible to insert bookmarks in links
5380     if( !aId.isEmpty() )
5381         InsertBookmark( aId );
5382 
5383     // pop current context of stack
5384     std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5385     xPoppedContext.reset();
5386 
5387     m_pPam->Move( fnMoveForward );
5388 
5389     // and set the current style in the next paragraph
5390     SetTextCollAttrs();
5391 }
5392 
ParseMoreMetaOptions()5393 void SwHTMLParser::ParseMoreMetaOptions()
5394 {
5395     OUString aName, aContent;
5396     bool bHTTPEquiv = false;
5397 
5398     const HTMLOptions& rHTMLOptions = GetOptions();
5399     for (size_t i = rHTMLOptions.size(); i; )
5400     {
5401         const HTMLOption& rOption = rHTMLOptions[--i];
5402         switch( rOption.GetToken() )
5403         {
5404         case HtmlOptionId::NAME:
5405             aName = rOption.GetString();
5406             bHTTPEquiv = false;
5407             break;
5408         case HtmlOptionId::HTTPEQUIV:
5409             aName = rOption.GetString();
5410             bHTTPEquiv = true;
5411             break;
5412         case HtmlOptionId::CONTENT:
5413             aContent = rOption.GetString();
5414             break;
5415         default: break;
5416         }
5417     }
5418 
5419     // Here things get a little tricky: We know for sure, that the Doc-Info
5420     // wasn't changed. Therefore it's enough to query for Generator and Refresh
5421     // to find a not processed Token. These are the only ones which won't change
5422     // the Doc-Info.
5423     if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_generator ) ||
5424         aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_refresh ) ||
5425         aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type ) ||
5426         aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_script_type ) )
5427         return;
5428 
5429     aContent = aContent.replaceAll("\r", "").replaceAll("\n", "");
5430 
5431     if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdendnote ) )
5432     {
5433         FillEndNoteInfo( aContent );
5434         return;
5435     }
5436 
5437     if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdfootnote ) )
5438     {
5439         FillFootNoteInfo( aContent );
5440         return;
5441     }
5442 
5443     OUStringBuffer sText;
5444     sText.append("HTML: <");
5445     sText.append(OOO_STRING_SVTOOLS_HTML_meta);
5446     sText.append(' ');
5447     if( bHTTPEquiv  )
5448         sText.append(OOO_STRING_SVTOOLS_HTML_O_httpequiv);
5449     else
5450         sText.append(OOO_STRING_SVTOOLS_HTML_O_name);
5451     sText.append("=\"");
5452     sText.append(aName);
5453     sText.append("\" ");
5454     sText.append(OOO_STRING_SVTOOLS_HTML_O_content);
5455     sText.append("=\"");
5456     sText.append(aContent);
5457     sText.append("\">");
5458 
5459     SwPostItField aPostItField(
5460         static_cast<SwPostItFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Postit )),
5461         OUString(), sText.makeStringAndClear(), OUString(), OUString(), DateTime(DateTime::SYSTEM));
5462     SwFormatField aFormatField( aPostItField );
5463     InsertAttr( aFormatField,  false );
5464 }
5465 
HTMLAttr(const SwPosition & rPos,const SfxPoolItem & rItem,HTMLAttr ** ppHd,const std::shared_ptr<HTMLAttrTable> & rAttrTab)5466 HTMLAttr::HTMLAttr( const SwPosition& rPos, const SfxPoolItem& rItem,
5467                       HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab ) :
5468     m_nStartPara( rPos.nNode ),
5469     m_nEndPara( rPos.nNode ),
5470     m_nStartContent( rPos.nContent.GetIndex() ),
5471     m_nEndContent(rPos.nContent.GetIndex() ),
5472     m_bInsAtStart( true ),
5473     m_bLikePara( false ),
5474     m_bValid( true ),
5475     m_pItem( rItem.Clone() ),
5476     m_xAttrTab( rAttrTab ),
5477     m_pNext( nullptr ),
5478     m_pPrev( nullptr ),
5479     m_ppHead( ppHd )
5480 {
5481 }
5482 
HTMLAttr(const HTMLAttr & rAttr,const SwNodeIndex & rEndPara,sal_Int32 nEndCnt,HTMLAttr ** ppHd,const std::shared_ptr<HTMLAttrTable> & rAttrTab)5483 HTMLAttr::HTMLAttr( const HTMLAttr &rAttr, const SwNodeIndex &rEndPara,
5484                       sal_Int32 nEndCnt, HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab ) :
5485     m_nStartPara( rAttr.m_nStartPara ),
5486     m_nEndPara( rEndPara ),
5487     m_nStartContent( rAttr.m_nStartContent ),
5488     m_nEndContent( nEndCnt ),
5489     m_bInsAtStart( rAttr.m_bInsAtStart ),
5490     m_bLikePara( rAttr.m_bLikePara ),
5491     m_bValid( rAttr.m_bValid ),
5492     m_pItem( rAttr.m_pItem->Clone() ),
5493     m_xAttrTab( rAttrTab ),
5494     m_pNext( nullptr ),
5495     m_pPrev( nullptr ),
5496     m_ppHead( ppHd )
5497 {
5498 }
5499 
~HTMLAttr()5500 HTMLAttr::~HTMLAttr()
5501 {
5502 }
5503 
Clone(const SwNodeIndex & rEndPara,sal_Int32 nEndCnt) const5504 HTMLAttr *HTMLAttr::Clone(const SwNodeIndex& rEndPara, sal_Int32 nEndCnt) const
5505 {
5506     // create the attribute anew with old start position
5507     HTMLAttr *pNew = new HTMLAttr( *this, rEndPara, nEndCnt, m_ppHead, m_xAttrTab );
5508 
5509     // The Previous-List must be taken over, the Next-List not!
5510     pNew->m_pPrev = m_pPrev;
5511 
5512     return pNew;
5513 }
5514 
Reset(const SwNodeIndex & rSttPara,sal_Int32 nSttCnt,HTMLAttr ** ppHd,const std::shared_ptr<HTMLAttrTable> & rAttrTab)5515 void HTMLAttr::Reset(const SwNodeIndex& rSttPara, sal_Int32 nSttCnt,
5516                      HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab)
5517 {
5518     // reset the start (and the end)
5519     m_nStartPara = rSttPara;
5520     m_nStartContent = nSttCnt;
5521     m_nEndPara = rSttPara;
5522     m_nEndContent = nSttCnt;
5523 
5524     // correct the head and nullify link
5525     m_pNext = nullptr;
5526     m_pPrev = nullptr;
5527     m_ppHead = ppHd;
5528     m_xAttrTab = rAttrTab;
5529 }
5530 
InsertPrev(HTMLAttr * pPrv)5531 void HTMLAttr::InsertPrev( HTMLAttr *pPrv )
5532 {
5533     OSL_ENSURE( !pPrv->m_pNext || pPrv->m_pNext == this,
5534             "HTMLAttr::InsertPrev: pNext wrong" );
5535     pPrv->m_pNext = nullptr;
5536 
5537     OSL_ENSURE( nullptr == pPrv->m_ppHead || m_ppHead == pPrv->m_ppHead,
5538             "HTMLAttr::InsertPrev: ppHead wrong" );
5539     pPrv->m_ppHead = nullptr;
5540 
5541     HTMLAttr *pAttr = this;
5542     while( pAttr->GetPrev() )
5543         pAttr = pAttr->GetPrev();
5544 
5545     pAttr->m_pPrev = pPrv;
5546 }
5547 
ParseMetaOptions(const uno::Reference<document::XDocumentProperties> & i_xDocProps,SvKeyValueIterator * i_pHeader)5548 bool SwHTMLParser::ParseMetaOptions(
5549         const uno::Reference<document::XDocumentProperties> & i_xDocProps,
5550         SvKeyValueIterator *i_pHeader )
5551 {
5552     // always call base ParseMetaOptions, it sets the encoding (#i96700#)
5553     bool ret( HTMLParser::ParseMetaOptions(i_xDocProps, i_pHeader) );
5554     if (!ret && IsNewDoc())
5555     {
5556         ParseMoreMetaOptions();
5557     }
5558     return ret;
5559 }
5560 
5561 // override so we can parse DOCINFO field subtypes INFO[1-4]
AddMetaUserDefined(OUString const & i_rMetaName)5562 void SwHTMLParser::AddMetaUserDefined( OUString const & i_rMetaName )
5563 {
5564     // unless we already have 4 names, append the argument to m_InfoNames
5565     OUString* pName // the first empty string in m_InfoNames
5566          (m_InfoNames[0].isEmpty() ? &m_InfoNames[0] :
5567          (m_InfoNames[1].isEmpty() ? &m_InfoNames[1] :
5568          (m_InfoNames[2].isEmpty() ? &m_InfoNames[2] :
5569          (m_InfoNames[3].isEmpty() ? &m_InfoNames[3] : nullptr ))));
5570     if (pName)
5571     {
5572         (*pName) = i_rMetaName;
5573     }
5574 }
5575 
SetupFilterOptions()5576 void HTMLReader::SetupFilterOptions()
5577 {
5578     // Reset state from previous Read() invocation.
5579     m_aNamespace.clear();
5580 
5581     if (!m_pMedium)
5582         return;
5583 
5584     const SfxItemSet* pItemSet = m_pMedium->GetItemSet();
5585     if (!pItemSet)
5586         return;
5587 
5588     auto pItem = pItemSet->GetItem<SfxStringItem>(SID_FILE_FILTEROPTIONS);
5589     if (!pItem)
5590         return;
5591 
5592     OUString aFilterOptions = pItem->GetValue();
5593     static const OUStringLiteral aXhtmlNsKey(u"xhtmlns=");
5594     if (aFilterOptions.startsWith(aXhtmlNsKey))
5595     {
5596         OUString aNamespace = aFilterOptions.copy(aXhtmlNsKey.getLength());
5597         m_aNamespace = aNamespace;
5598     }
5599 }
5600 
5601 namespace
5602 {
5603     class FontCacheGuard
5604     {
5605     public:
~FontCacheGuard()5606         ~FontCacheGuard()
5607         {
5608             FlushFontCache();
5609         }
5610     };
5611 }
5612 
TestImportHTML(SvStream & rStream)5613 bool TestImportHTML(SvStream &rStream)
5614 {
5615     FontCacheGuard aFontCacheGuard;
5616     HTMLReader aReader;
5617     aReader.m_pStream = &rStream;
5618 
5619     SwGlobals::ensure();
5620 
5621     SfxObjectShellLock xDocSh(new SwDocShell(SfxObjectCreateMode::INTERNAL));
5622     xDocSh->DoInitNew();
5623     SwDoc *pD =  static_cast<SwDocShell*>((&xDocSh))->GetDoc();
5624 
5625     SwNodeIndex aIdx(pD->GetNodes().GetEndOfContent(), -1);
5626     SwPaM aPaM(aIdx);
5627     pD->SetInReading(true);
5628     bool bRet = false;
5629     try
5630     {
5631         bRet = aReader.Read(*pD, OUString(), aPaM, OUString()) == ERRCODE_NONE;
5632     }
5633     catch (const std::runtime_error&)
5634     {
5635     }
5636     catch (const std::out_of_range&)
5637     {
5638     }
5639     pD->SetInReading(false);
5640 
5641     return bRet;
5642 }
5643 
5644 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
5645