1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 #ifndef INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX
20 #define INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX
21 
22 #include <config_java.h>
23 
24 #include <sfx2/sfxhtml.hxx>
25 #include <svl/listener.hxx>
26 #include <svl/macitem.hxx>
27 #include <svtools/htmltokn.h>
28 #include <editeng/svxenum.hxx>
29 #include <rtl/ref.hxx>
30 #include <fltshell.hxx>
31 #include <com/sun/star/drawing/XShape.hpp>
32 #include <com/sun/star/form/XFormComponent.hpp>
33 
34 #include <memory>
35 #include <vector>
36 #include <deque>
37 #include <stack>
38 
39 class SfxMedium;
40 class SfxViewFrame;
41 class SdrObject;
42 class SvxMacroTableDtor;
43 class SwDoc;
44 class SwPaM;
45 class SwViewShell;
46 class SwStartNode;
47 class SwFormatColl;
48 class SwField;
49 class SwHTMLForm_Impl;
50 class SwApplet_Impl;
51 struct SwHTMLFootEndNote_Impl;
52 class HTMLTableCnts;
53 struct SwPending;
54 class SvxCSS1PropertyInfo;
55 struct ImplSVEvent;
56 
57 #define HTML_PARSPACE (MM50)
58 #define HTML_CJK_PARSPACE (MM50/2)
59 #define HTML_CTL_PARSPACE (MM50/2)
60 
61 #define HTML_DFLT_IMG_WIDTH (MM50*4)
62 #define HTML_DFLT_IMG_HEIGHT (MM50*2)
63 
64 // some things you often need
65 extern HTMLOptionEnum<SvxAdjust> const aHTMLPAlignTable[];
66 extern HTMLOptionEnum<sal_Int16> const aHTMLImgHAlignTable[];
67 extern HTMLOptionEnum<sal_Int16> const aHTMLImgVAlignTable[];
68 
69 // attribute stack:
70 
71 class HTMLAttr;
72 typedef std::deque<HTMLAttr *> HTMLAttrs;
73 
74 // Table of attributes: The order here is important: The attributes in the
75 // beginning of the table will set first in EndAllAttrs.
76 struct HTMLAttrTable
77 {
78     HTMLAttr
79                 *pKeep,         // frame attributes
80                 *pBox,
81                 *pBrush,
82                 *pBreak,
83                 *pPageDesc,
84 
85                 *pLRSpace,      // paragraph attributes
86                 *pULSpace,
87                 *pLineSpacing,
88                 *pAdjust,
89                 *pDropCap,
90                 *pSplit,
91                 *pWidows,
92                 *pOrphans,
93                 *pDirection,
94 
95                 *pCharFormats,     // text attributes
96                 *pINetFormat,
97 
98                 *pBold,         // character attributes
99                 *pBoldCJK,
100                 *pBoldCTL,
101                 *pItalic,
102                 *pItalicCJK,
103                 *pItalicCTL,
104                 *pStrike,
105                 *pUnderline,
106                 *pBlink,
107                 *pFont,
108                 *pFontCJK,
109                 *pFontCTL,
110                 *pFontHeight,
111                 *pFontHeightCJK,
112                 *pFontHeightCTL,
113                 *pFontColor,
114                 *pEscapement,
115                 *pCaseMap,
116                 *pKerning,      // (only for SPACER)
117                 *pCharBrush,    // character background
118                 *pLanguage,
119                 *pLanguageCJK,
120                 *pLanguageCTL,
121                 *pCharBox
122                 ;
123 };
124 
125 class HTMLAttr
126 {
127     friend class SwHTMLParser;
128     friend class CellSaveStruct;
129 
130     SwNodeIndex m_nStartPara, m_nEndPara;
131     sal_Int32 m_nStartContent, m_nEndContent;
132     bool m_bInsAtStart : 1;
133     bool m_bLikePara : 1; // set attribute above the whole paragraph
134     bool m_bValid : 1;    // is the attribute valid?
135 
136     std::unique_ptr<SfxPoolItem> m_pItem;
137     std::shared_ptr<HTMLAttrTable> m_xAttrTab;
138     HTMLAttr *m_pNext;   // still to close attributes with different values
139     HTMLAttr *m_pPrev;   // already closed but not set attributes
140     HTMLAttr **m_ppHead; // list head
141 
142     HTMLAttr( const SwPosition& rPos, const SfxPoolItem& rItem,
143                HTMLAttr **pHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab );
144 
145     HTMLAttr( const HTMLAttr &rAttr, const SwNodeIndex &rEndPara,
146                sal_Int32 nEndCnt, HTMLAttr **pHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab );
147 
148 public:
149 
150     ~HTMLAttr();
151 
152     HTMLAttr *Clone( const SwNodeIndex& rEndPara, sal_Int32 nEndCnt ) const;
153     void Reset( const SwNodeIndex& rSttPara, sal_Int32 nSttCnt,
154                 HTMLAttr **pHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab );
155     inline void SetStart( const SwPosition& rPos );
156 
GetSttParaIdx() const157     sal_uInt32 GetSttParaIdx() const { return m_nStartPara.GetIndex(); }
GetEndParaIdx() const158     sal_uInt32 GetEndParaIdx() const { return m_nEndPara.GetIndex(); }
159 
GetSttPara() const160     const SwNodeIndex& GetSttPara() const { return m_nStartPara; }
GetEndPara() const161     const SwNodeIndex& GetEndPara() const { return m_nEndPara; }
162 
GetSttCnt() const163     sal_Int32 GetSttCnt() const { return m_nStartContent; }
GetEndCnt() const164     sal_Int32 GetEndCnt() const { return m_nEndContent; }
165 
IsLikePara() const166     bool IsLikePara() const { return m_bLikePara; }
SetLikePara()167     void SetLikePara() { m_bLikePara = true; }
168 
GetItem()169           SfxPoolItem& GetItem()        { return *m_pItem; }
GetItem() const170     const SfxPoolItem& GetItem() const  { return *m_pItem; }
171 
GetNext() const172     HTMLAttr *GetNext() const { return m_pNext; }
InsertNext(HTMLAttr * pNxt)173     void InsertNext( HTMLAttr *pNxt ) { m_pNext = pNxt; }
174 
GetPrev() const175     HTMLAttr *GetPrev() const { return m_pPrev; }
176     void InsertPrev( HTMLAttr *pPrv );
ClearPrev()177     void ClearPrev() { m_pPrev = nullptr; }
178 
SetHead(HTMLAttr ** ppHd,const std::shared_ptr<HTMLAttrTable> & rAttrTab)179     void SetHead(HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab)
180     {
181         m_ppHead = ppHd;
182         m_xAttrTab = rAttrTab;
183     }
184 
185     // During setting attributes from styles it can happen that these
186     // shouldn't be set anymore. To delete them would be very expensive, because
187     // you don't know all the places where they are linked in. Therefore they're
188     // made invalid and deleted at the next call of SetAttr_().
Invalidate()189     void Invalidate() { m_bValid = false; }
190 };
191 
192 class HTMLAttrContext_SaveDoc;
193 
194 enum SwHTMLAppendMode {
195      AM_NORMAL,         // no paragraph spacing handling
196      AM_NOSPACE,        // set spacing hard to 0cm
197      AM_SPACE,          // set spacing hard to 0.5cm
198      AM_SOFTNOSPACE,    // don't set spacing, but save 0cm
199      AM_NONE            // no append
200 };
201 
202 class HTMLAttrContext
203 {
204     HTMLAttrs m_aAttrs;      // the attributes created in the context
205 
206     OUString const    m_aClass;          // context class
207 
208     std::unique_ptr<HTMLAttrContext_SaveDoc> m_pSaveDocContext;
209     std::unique_ptr<SfxItemSet> m_pFrameItemSet;
210 
211     HtmlTokenId const m_nToken;         // the token of the context
212 
213     sal_uInt16 const  m_nTextFormatColl;    // a style created in the context or zero
214 
215     sal_uInt16  m_nLeftMargin;        // a changed left border
216     sal_uInt16  m_nRightMargin;       // a changed right border
217     sal_uInt16  m_nFirstLineIndent;   // a changed first line indent
218 
219     sal_uInt16  m_nUpperSpace;
220     sal_uInt16  m_nLowerSpace;
221 
222     SwHTMLAppendMode m_eAppend;
223 
224     bool    m_bLRSpaceChanged : 1;    // left/right border, changed indent?
225     bool    m_bULSpaceChanged : 1;    // top/bottom border changed?
226     bool const    m_bDefaultTextFormatColl : 1;// nTextFormatColl is only default
227     bool    m_bSpansSection : 1;      // the context opens a SwSection
228     bool    m_bPopStack : 1;          // delete above stack elements
229     bool    m_bFinishPREListingXMP : 1;
230     bool    m_bRestartPRE : 1;
231     bool    m_bRestartXMP : 1;
232     bool    m_bRestartListing : 1;
233     bool    m_bHeaderOrFooter : 1;
234 
235 public:
236     void ClearSaveDocContext();
237 
238     HTMLAttrContext( HtmlTokenId nTokn, sal_uInt16 nPoolId, const OUString& rClass,
239                       bool bDfltColl=false );
240     explicit HTMLAttrContext( HtmlTokenId nTokn );
241     ~HTMLAttrContext();
242 
GetToken() const243     HtmlTokenId GetToken() const { return m_nToken; }
244 
GetTextFormatColl() const245     sal_uInt16 GetTextFormatColl() const { return m_bDefaultTextFormatColl ? 0 : m_nTextFormatColl; }
GetDfltTextFormatColl() const246     sal_uInt16 GetDfltTextFormatColl() const { return m_bDefaultTextFormatColl ? m_nTextFormatColl : 0; }
247 
GetClass() const248     const OUString& GetClass() const { return m_aClass; }
249 
250     inline void SetMargins( sal_uInt16 nLeft, sal_uInt16 nRight, short nIndent );
251 
IsLRSpaceChanged() const252     bool IsLRSpaceChanged() const { return m_bLRSpaceChanged; }
253     inline void GetMargins( sal_uInt16& nLeft, sal_uInt16& nRight,
254                             short &nIndent ) const;
255 
256     inline void SetULSpace( sal_uInt16 nUpper, sal_uInt16 nLower );
IsULSpaceChanged() const257     bool IsULSpaceChanged() const { return m_bULSpaceChanged; }
258     inline void GetULSpace( sal_uInt16& rUpper, sal_uInt16& rLower ) const;
259 
HasAttrs() const260     bool HasAttrs() const { return !m_aAttrs.empty(); }
GetAttrs() const261     const HTMLAttrs& GetAttrs() const { return m_aAttrs; }
GetAttrs()262     HTMLAttrs& GetAttrs() { return m_aAttrs; }
263 
SetSpansSection(bool bSet)264     void SetSpansSection( bool bSet ) { m_bSpansSection = bSet; }
GetSpansSection() const265     bool GetSpansSection() const { return m_bSpansSection; }
266 
SetPopStack(bool bSet)267     void SetPopStack( bool bSet ) { m_bPopStack = bSet; }
GetPopStack() const268     bool GetPopStack() const { return m_bPopStack; }
269 
HasSaveDocContext() const270     bool HasSaveDocContext() const { return m_pSaveDocContext!=nullptr; }
271     HTMLAttrContext_SaveDoc *GetSaveDocContext( bool bCreate=false );
272 
GetFrameItemSet() const273     const SfxItemSet *GetFrameItemSet() const { return m_pFrameItemSet.get(); }
274     SfxItemSet *GetFrameItemSet( SwDoc *pCreateDoc );
275 
SetFinishPREListingXMP(bool bSet)276     void SetFinishPREListingXMP( bool bSet ) { m_bFinishPREListingXMP = bSet; }
IsFinishPREListingXMP() const277     bool IsFinishPREListingXMP() const { return m_bFinishPREListingXMP; }
278 
SetRestartPRE(bool bSet)279     void SetRestartPRE( bool bSet ) { m_bRestartPRE = bSet; }
IsRestartPRE() const280     bool IsRestartPRE() const { return m_bRestartPRE; }
281 
SetRestartXMP(bool bSet)282     void SetRestartXMP( bool bSet ) { m_bRestartXMP = bSet; }
IsRestartXMP() const283     bool IsRestartXMP() const { return m_bRestartXMP; }
284 
SetRestartListing(bool bSet)285     void SetRestartListing( bool bSet ) { m_bRestartListing = bSet; }
IsRestartListing() const286     bool IsRestartListing() const { return m_bRestartListing; }
287 
SetHeaderOrFooter(bool bSet)288     void SetHeaderOrFooter( bool bSet ) { m_bHeaderOrFooter = bSet; }
IsHeaderOrFooter() const289     bool IsHeaderOrFooter() const { return m_bHeaderOrFooter; }
290 
SetAppendMode(SwHTMLAppendMode eMode)291     void SetAppendMode( SwHTMLAppendMode eMode ) { m_eAppend = eMode; }
GetAppendMode() const292     SwHTMLAppendMode GetAppendMode() const { return m_eAppend; }
293 };
294 
295 typedef std::vector<std::unique_ptr<HTMLAttrContext>> HTMLAttrContexts;
296 
297 class HTMLTable;
298 class SwCSS1Parser;
299 class SwHTMLNumRuleInfo;
300 
301 typedef std::vector<std::unique_ptr<ImageMap>> ImageMaps;
302 
303 enum class HtmlContextFlags {
304     ProtectStack    = 0x0001,
305     StripPara       = 0x0002,
306     KeepNumrule     = 0x0004,
307     HeaderDist      = 0x0008,
308     FooterDist      = 0x0010,
309     KeepAttrs       = 0x0020,
310     MultiColMask    = StripPara | KeepNumrule | KeepAttrs // for headers, footers or footnotes
311 };
312 namespace o3tl
313 {
314     template<> struct typed_flags<HtmlContextFlags> : is_typed_flags<HtmlContextFlags, 0x03f> {};
315 }
316 
317 enum class HtmlFrameFormatFlags {
318     Box                 = 0x0001,
319     Background          = 0x0002,
320     Padding             = 0x0004,
321     Direction           = 0x0008,
322 };
323 namespace o3tl
324 {
325     template<> struct typed_flags<HtmlFrameFormatFlags> : is_typed_flags<HtmlFrameFormatFlags, 0x0f> {};
326 }
327 
328 class SwHTMLParser : public SfxHTMLParser, public SvtListener
329 {
330     friend class SectionSaveStruct;
331     friend class CellSaveStruct;
332     friend class CaptionSaveStruct;
333 
334     /*
335      Progress bar
336      */
337     std::unique_ptr<ImportProgress> m_xProgress;
338 
339     OUString const      m_aPathToFile;
340     OUString      m_sBaseURL;
341     OUString      m_aBasicLib;
342     OUString      m_aBasicModule;
343     OUString      m_aScriptSource;  // content of the current script block
344     OUString      m_aScriptType;    // type of read script (StarBasic/VB/JAVA)
345     OUString      m_aScriptURL;     // script URL
346     OUString      m_aStyleSource;   // content of current style sheet
347     OUString      m_aContents;      // text of current marquee, field and so
348     OUStringBuffer m_sTitle;
349     OUString      m_aUnknownToken;  // a started unknown token
350     OUString      m_aBulletGrfs[MAXLEVEL];
351     OUString      m_sJmpMark;
352 
353     std::vector<sal_uInt16>   m_aBaseFontStack; // stack for <BASEFONT>
354                                 // Bit 0-2: font size (1-7)
355     std::vector<sal_uInt16>   m_aFontStack;     // stack for <FONT>, <BIG>, <SMALL>
356                                 // Bit 0-2: font size (1-7)
357                                 // Bit 15:  font colour was set
358 
359     HTMLAttrs      m_aSetAttrTab;// "closed", not set attributes
360     HTMLAttrs      m_aParaAttrs; // temporary paragraph attributes
361     std::shared_ptr<HTMLAttrTable>  m_xAttrTab;   // "open" attributes
362     HTMLAttrContexts m_aContexts;// the current context of attribute/token
363     std::vector<SwFrameFormat *> m_aMoveFlyFrames;// Fly-Frames, the anchor is moved
364     std::deque<sal_Int32> m_aMoveFlyCnts;// and the Content-Positions
365     //stray SwTableBoxes which need to be deleted to avoid leaking, but hold
366     //onto them until parsing is done
367     std::vector<std::unique_ptr<SwTableBox>> m_aOrphanedTableBoxes;
368 
369     std::unique_ptr<SwApplet_Impl> m_pAppletImpl; // current applet
370 
371     std::unique_ptr<SwCSS1Parser> m_pCSS1Parser;   // Style-Sheet-Parser
372     std::unique_ptr<SwHTMLNumRuleInfo> m_pNumRuleInfo;
373     std::vector<SwPending>  m_vPendingStack;
374 
375     rtl::Reference<SwDoc> m_xDoc;
376     SwPaM           *m_pPam;      // SwPosition should be enough, or ??
377     SwViewShell       *m_pActionViewShell;  // SwViewShell, where StartAction was called
378     SwNodeIndex     *m_pSttNdIdx;
379 
380     std::vector<HTMLTable*> m_aTables;
381     std::shared_ptr<HTMLTable> m_xTable; // current "outermost" table
382     SwHTMLForm_Impl* m_pFormImpl;   // current form
383     SdrObject       *m_pMarquee;    // current marquee
384     std::unique_ptr<SwField> m_xField; // current field
385     ImageMap        *m_pImageMap;   // current image map
386     std::unique_ptr<ImageMaps> m_pImageMaps;  ///< all Image-Maps that have been read
387     std::unique_ptr<SwHTMLFootEndNote_Impl> m_pFootEndNoteImpl;
388 
389     Size    m_aHTMLPageSize;      // page size of HTML template
390 
391     sal_uInt32  m_aFontHeights[7];    // font heights 1-7
392     ImplSVEvent * m_nEventId;
393 
394     sal_uInt16  m_nBaseFontStMin;
395     sal_uInt16  m_nFontStMin;
396     sal_uInt16  m_nDefListDeep;
397     sal_uInt16  m_nFontStHeadStart;   // elements in font stack at <Hn>
398     sal_uInt16  m_nSBModuleCnt;       // counter for basic modules
399     sal_uInt16  m_nMissingImgMaps;    // How many image maps are still missing?
400     size_t m_nParaCnt;
401     size_t m_nContextStMin;           // lower limit of PopContext
402     size_t m_nContextStAttrMin;       // lower limit of attributes
403     sal_uInt16  m_nSelectEntryCnt;    // Number of entries in the actual listbox
404     HtmlTokenId m_nOpenParaToken;     // opened paragraph element
405 
406     enum class JumpToMarks { NONE, Mark, Table, Region, Graphic };
407     JumpToMarks m_eJumpTo;
408 
409 #ifdef DBG_UTIL
410     sal_uInt16  m_nContinue;        // depth of Continue calls
411 #endif
412 
413     SvxAdjust   m_eParaAdjust;    // adjustment of current paragraph
414     HTMLScriptLanguage m_eScriptLang; // current script language
415 
416     bool m_bOldIsHTMLMode : 1;    // Was it a HTML document?
417 
418     bool m_bDocInitalized : 1;    // document resp. shell was initialize
419                                   // flag to prevent double init via recursion
420     bool m_bViewCreated : 1;      // the view was already created (asynchronous)
421     bool m_bSetModEnabled : 1;
422 
423     bool m_bInFloatingFrame : 1;  // We are in a floating frame
424     bool m_bInField : 1;
425     bool m_bKeepUnknown : 1;      // handle unknown/not supported tokens
426     // 8
427     bool m_bCallNextToken : 1;    // In tables: call NextToken in any case
428     bool m_bIgnoreRawData : 1;    // ignore content of script/style
429     bool m_bLBEntrySelected : 1;  // Is the current option selected?
430     bool m_bTAIgnoreNewPara : 1;  // ignore next LF in text area?
431     bool m_bFixMarqueeWidth : 1;  // Change size of marquee?
432 
433     bool m_bUpperSpace : 1;       // top paragraph spacing is needed
434     bool m_bNoParSpace : 1;
435     // 16
436 
437     bool m_bInNoEmbed : 1;        // we are in a NOEMBED area
438 
439     bool m_bInTitle : 1;          // we are in title
440 
441     bool m_bChkJumpMark : 1;      // maybe jump to predetermined mark
442     bool m_bUpdateDocStat : 1;
443     bool m_bFixSelectWidth : 1;   // Set new width of select?
444     bool m_bTextArea : 1;
445     // 24
446     bool m_bSelect : 1;
447     bool m_bInFootEndNoteAnchor : 1;
448     bool m_bInFootEndNoteSymbol : 1;
449     bool m_bIgnoreHTMLComments : 1;
450     bool m_bRemoveHidden : 1; // the filter implementation might set the hidden flag
451 
452     bool m_bBodySeen : 1;
453     bool m_bReadingHeaderOrFooter : 1;
454     bool m_bNotifyMacroEventRead : 1;
455     bool m_isInTableStructure;
456 
457     sal_Int32 m_nTableDepth;
458 
459     /// the names corresponding to the DOCINFO field subtypes INFO[1-4]
460     OUString m_InfoNames[4];
461 
462     SfxViewFrame* m_pTempViewFrame;
463 
464     bool m_bXHTML = false;
465     bool m_bReqIF = false;
466 
467     /**
468      * Non-owning pointers to already inserted OLE nodes, matching opened
469      * <object> XHTML elements.
470      */
471     std::stack<SwOLENode*> m_aEmbeds;
472 
473     void DeleteFormImpl();
474 
475     void DocumentDetected();
476     void Show();
477     void ShowStatline();
478     SwViewShell *CallStartAction( SwViewShell *pVSh = nullptr, bool bChkPtr = true );
479     SwViewShell *CallEndAction( bool bChkAction = false, bool bChkPtr = true );
480     SwViewShell *CheckActionViewShell();
481 
482     DECL_LINK( AsyncCallback, void*, void );
483 
484     // set attribute on document
485     void SetAttr_( bool bChkEnd, bool bBeforeTable, std::deque<std::unique_ptr<HTMLAttr>> *pPostIts );
SetAttr(bool bChkEnd=true,bool bBeforeTable=false,std::deque<std::unique_ptr<HTMLAttr>> * pPostIts=nullptr)486     void SetAttr( bool bChkEnd = true, bool bBeforeTable = false,
487                          std::deque<std::unique_ptr<HTMLAttr>> *pPostIts = nullptr )
488     {
489         if( !m_aSetAttrTab.empty() || !m_aMoveFlyFrames.empty() )
490             SetAttr_( bChkEnd, bBeforeTable, pPostIts );
491     }
492 
493     HTMLAttr **GetAttrTabEntry( sal_uInt16 nWhich );
494 
495     // create a new text node on PaM position
496     bool AppendTextNode( SwHTMLAppendMode eMode=AM_NORMAL, bool bUpdateNum=true );
497     void AddParSpace();
498 
499     // start/end an attribute
500     // ppDepAttr indicated an attribute table entry, which attribute has to be
501     // set, before the attribute is closed
502     void NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTab, HTMLAttr **ppAttr, const SfxPoolItem& rItem);
503     bool EndAttr( HTMLAttr *pAttr, bool bChkEmpty=true );
504     void DeleteAttr( HTMLAttr* pAttr );
505 
506     void EndContextAttrs( HTMLAttrContext *pContext );
507     void SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab);
508     void SplitAttrTab( const SwPosition& rNewPos );
509     void SplitAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab, bool bMoveEndBack);
510     void RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab);
511     void InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart );
512     void InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs );
513 
514     bool DoPositioning( SfxItemSet &rItemSet,
515                         SvxCSS1PropertyInfo &rPropInfo,
516                         HTMLAttrContext *pContext );
517     bool CreateContainer( const OUString& rClass, SfxItemSet &rItemSet,
518                           SvxCSS1PropertyInfo &rPropInfo,
519                           HTMLAttrContext *pContext );
520     bool EndSection( bool bLFStripped=false );
521 
522     void InsertAttrs( SfxItemSet &rItemSet, SvxCSS1PropertyInfo const &rPropInfo,
523                       HTMLAttrContext *pContext, bool bCharLvl=false );
524     void InsertAttr( HTMLAttr **ppAttr, const SfxPoolItem & rItem,
525                      HTMLAttrContext *pCntxt );
526     void SplitPREListingXMP( HTMLAttrContext *pCntxt );
527     void FixHeaderFooterDistance( bool bHeader, const SwPosition *pOldPos );
528 
529     void EndContext( HTMLAttrContext *pContext );
530     void ClearContext( HTMLAttrContext *pContext );
531 
532     const SwFormatColl *GetCurrFormatColl() const;
533 
534     SwTwips GetCurrentBrowseWidth();
535 
GetNumInfo()536     SwHTMLNumRuleInfo& GetNumInfo() { return *m_pNumRuleInfo; }
537     // add parameter <bCountedInList>
538     void SetNodeNum( sal_uInt8 nLevel );
539 
540     // Manage paragraph styles
541 
542     // set the style resp. its attributes on the stack
543     void SetTextCollAttrs( HTMLAttrContext *pContext = nullptr );
544 
545     void InsertParaAttrs( const SfxItemSet& rItemSet );
546 
547     // Manage attribute context
548 
549     // save current context
PushContext(std::unique_ptr<HTMLAttrContext> & rCntxt)550     void PushContext(std::unique_ptr<HTMLAttrContext>& rCntxt)
551     {
552         m_aContexts.push_back(std::move(rCntxt));
553     }
554 
555     // Fetch top/specified context but not outside the context with token
556     // nLimit. If bRemove set then remove it.
557     std::unique_ptr<HTMLAttrContext> PopContext(HtmlTokenId nToken = HtmlTokenId::NONE);
558 
559     void GetMarginsFromContext( sal_uInt16 &nLeft, sal_uInt16 &nRight, short& nIndent,
560                                 bool bIgnoreCurrent=false ) const;
561     void GetMarginsFromContextWithNumBul( sal_uInt16 &nLeft, sal_uInt16 &nRight,
562                                           short& nIndent ) const;
563     void GetULSpaceFromContext( sal_uInt16 &rUpper, sal_uInt16 &rLower ) const;
564 
565     void MovePageDescAttrs( SwNode *pSrcNd, sal_uLong nDestIdx, bool bFormatBreak );
566 
567     // Handling of tags at paragraph level
568 
569     // <P> and <H1> to <H6>
570     void NewPara();
571     void EndPara( bool bReal = false );
572     void NewHeading( HtmlTokenId nToken );
573     void EndHeading();
574 
575     // <ADDRESS>, <BLOCKQUOTE> and <PRE>
576     void NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nPoolId );
577     void EndTextFormatColl( HtmlTokenId nToken );
578 
579     // <DIV> and <CENTER>
580     void NewDivision( HtmlTokenId nToken );
581     void EndDivision();
582 
583     // insert/close Fly-Frames
584     void InsertFlyFrame( const SfxItemSet& rItemSet, HTMLAttrContext *pCntxt,
585                          const OUString& rId );
586 
587     void SaveDocContext( HTMLAttrContext *pCntxt, HtmlContextFlags nFlags,
588                        const SwPosition *pNewPos );
589     void RestoreDocContext( HTMLAttrContext *pCntxt );
590 
591     // end all opened <DIV> areas
592     bool EndSections( bool bLFStripped );
593 
594     // <MULTICOL>
595     void NewMultiCol( sal_uInt16 columnsFromCss=0 );
596 
597     // <MARQUEE>
598     void NewMarquee( HTMLTable *pCurTable=nullptr );
599     void EndMarquee();
600     void InsertMarqueeText();
601 
602     // Handling of lists
603 
604     // order list <OL> and unordered list <UL> with <LI>
605     void NewNumBulList( HtmlTokenId nToken );
606     void EndNumBulList( HtmlTokenId nToken = HtmlTokenId::NONE );
607     void NewNumBulListItem( HtmlTokenId nToken );
608     void EndNumBulListItem( HtmlTokenId nToken, bool bSetColl);
609 
610     // definitions lists <DL> with <DD>, <DT>
611     void NewDefList();
612     void EndDefList();
613     void NewDefListItem( HtmlTokenId nToken );
614     void EndDefListItem( HtmlTokenId nToken = HtmlTokenId::NONE );
615 
616     // Handling of tags on character level
617 
618     // handle tags like <B>, <I> and so, which enable/disable a certain
619     // attribute or like <SPAN> get attributes from styles
620     void NewStdAttr( HtmlTokenId nToken );
621     void NewStdAttr( HtmlTokenId nToken,
622                      HTMLAttr **ppAttr, const SfxPoolItem & rItem,
623                      HTMLAttr **ppAttr2=nullptr, const SfxPoolItem *pItem2=nullptr,
624                      HTMLAttr **ppAttr3=nullptr, const SfxPoolItem *pItem3=nullptr );
625     void EndTag( HtmlTokenId nToken );
626 
627     // handle font attributes
628     void NewBasefontAttr();             // for <BASEFONT>
629     void EndBasefontAttr();
630     void NewFontAttr( HtmlTokenId nToken ); // for <FONT>, <BIG> and <SMALL>
631     void EndFontAttr( HtmlTokenId nToken );
632 
633     // tags realized via character styles
634     void NewCharFormat( HtmlTokenId nToken );
635 
636     void ClearFootnotesMarksInRange(const SwNodeIndex& rSttIdx, const SwNodeIndex& rEndIdx);
637 
638     void DeleteSection(SwStartNode* pSttNd);
639 
640     // <SDFIELD>
641 public:
642     static SvxNumType GetNumType( const OUString& rStr, SvxNumType eDfltType );
643 private:
644     void NewField();
645     void EndField();
646     void InsertFieldText();
647 
648     // <SPACER>
649     void InsertSpacer();
650 
651     // Inserting graphics, plug-ins and applets
652 
653     // search image maps and link with graphic nodes
654     ImageMap *FindImageMap( const OUString& rURL ) const;
655     void ConnectImageMaps();
656 
657     // find anchor of Fly-Frames and set corresponding attributes
658     // in Attrset (htmlgrin.cxx)
659     void SetAnchorAndAdjustment( sal_Int16 eVertOri,
660                                  sal_Int16 eHoriOri,
661                                  const SvxCSS1PropertyInfo &rPropInfo,
662                                  SfxItemSet& rFrameSet );
663     void SetAnchorAndAdjustment( sal_Int16 eVertOri,
664                                  sal_Int16 eHoriOri,
665                                  SfxItemSet& rFrameSet,
666                                  bool bDontAppend=false );
667     void SetAnchorAndAdjustment( const SvxCSS1PropertyInfo &rPropInfo,
668                                  SfxItemSet &rFrameItemSet );
669 
670     static void SetFrameFormatAttrs( SfxItemSet &rItemSet,
671                          HtmlFrameFormatFlags nFlags, SfxItemSet &rFrameItemSet );
672 
673     // create frames and register auto bound frames
674     void RegisterFlyFrame( SwFrameFormat *pFlyFrame );
675 
676     // Adjust the size of the Fly-Frames to requirements and conditions
677     // (not for graphics, therefore htmlplug.cxx)
678     static void SetFixSize( const Size& rPixSize, const Size& rTwipDfltSize,
679                      bool bPrcWidth, bool bPrcHeight,
680                      SvxCSS1PropertyInfo const &rPropInfo,
681                      SfxItemSet& rFlyItemSet );
682     static void SetVarSize( SvxCSS1PropertyInfo const &rPropInfo,
683                      SfxItemSet& rFlyItemSet, SwTwips nDfltWidth=MINLAY,
684                      sal_uInt8 nDltPrcWidth=0 );
685     static void SetSpace( const Size& rPixSpace, SfxItemSet &rItemSet,
686                    SvxCSS1PropertyInfo &rPropInfo, SfxItemSet& rFlyItemSet );
687 
688     sal_uInt16 IncGrfsThatResizeTable();
689 
690     void GetDefaultScriptType( ScriptType& rType,
691                                OUString& rTypeStr ) const;
692 
693     // the actual insert methods for <IMG>, <EMBED>, <APPLET> and <PARAM>
694     void InsertImage();     // htmlgrin.cxx
695     bool InsertEmbed();     // htmlplug.cxx
696 
697 #if HAVE_FEATURE_JAVA
698     void NewObject();   // htmlplug.cxx
699 #endif
700     void EndObject();       // link CommandLine with applet (htmlplug.cxx)
701 #if HAVE_FEATURE_JAVA
702     void InsertApplet();    // htmlplug.cxx
703 #endif
704     void EndApplet();       // link CommandLine with applet (htmlplug.cxx)
705     void InsertParam();     // htmlplug.cxx
706 
707     void InsertFloatingFrame();
708 
709     // parse <BODY>-tag: set background graphic and background colour (htmlgrin.cxx)
710     void InsertBodyOptions();
711 
712     // Inserting links and bookmarks (htmlgrin.cxx)
713 
714     // parse <A>-tag: insert a link resp. bookmark
715     void NewAnchor();
716     void EndAnchor();
717 
718     // insert bookmark
719     void InsertBookmark( const OUString& rName );
720 
721     void InsertCommentText( const sal_Char *pTag );
722     void InsertComment( const OUString& rName, const sal_Char *pTag = nullptr );
723 
724     // Has the current paragraph bookmarks?
725     bool HasCurrentParaBookmarks( bool bIgnoreStack=false ) const;
726 
727     // Inserting script/basic elements
728 
729     // parse the last read basic module (htmlbas.cxx)
730     void NewScript();
731     void EndScript();
732 
733     void AddScriptSource();
734 
735     // insert event in SFX configuration (htmlbas.cxx)
736     void InsertBasicDocEvent( const OUString& aEventName, const OUString& rName,
737                               ScriptType eScrType, const OUString& rScrType );
738 
739     // Inserting styles
740 
741     // <STYLE>
742     void NewStyle();
743     void EndStyle();
744 
745     static inline bool HasStyleOptions( const OUString &rStyle, const OUString &rId,
746                                  const OUString &rClass, const OUString *pLang=nullptr,
747                                  const OUString *pDir=nullptr );
748     bool ParseStyleOptions( const OUString &rStyle, const OUString &rId,
749                             const OUString &rClass, SfxItemSet &rItemSet,
750                             SvxCSS1PropertyInfo &rPropInfo,
751                             const OUString *pLang=nullptr, const OUString *pDir=nullptr );
752 
753     // Inserting Controls and Forms (htmlform.cxx)
754 
755     // Insert draw object into document
756     void InsertDrawObject( SdrObject* pNewDrawObj, const Size& rSpace,
757                            sal_Int16 eVertOri,
758                            sal_Int16 eHoriOri,
759                            SfxItemSet& rCSS1ItemSet,
760                            SvxCSS1PropertyInfo& rCSS1PropInfo );
761     css::uno::Reference< css::drawing::XShape >  InsertControl(
762                         const css::uno::Reference< css::form::XFormComponent > & rFormComp,
763                         const css::uno::Reference< css::beans::XPropertySet > & rFCompPropSet,
764                         const Size& rSize,
765                         sal_Int16 eVertOri,
766                         sal_Int16 eHoriOri,
767                         SfxItemSet& rCSS1ItemSet,
768                         SvxCSS1PropertyInfo& rCSS1PropInfo,
769                         const SvxMacroTableDtor& rMacroTable,
770                         const std::vector<OUString>& rUnoMacroTable,
771                         const std::vector<OUString>& rUnoMacroParamTable,
772                         bool bSetPropSet = true,
773                         bool bHidden = false );
774     void SetControlSize( const css::uno::Reference< css::drawing::XShape > & rShape, const Size& rTextSz,
775                          bool bMinWidth, bool bMinHeight );
776 
777 public:
778     static void ResizeDrawObject( SdrObject* pObj, SwTwips nWidth );
779 private:
780     static void RegisterDrawObjectToTable( HTMLTable *pCurTable, SdrObject* pObj,
781                                     sal_uInt8 nWidth );
782 
783     void NewForm( bool bAppend=true );
784     void EndForm( bool bAppend=true );
785 
786     // Insert methods for <INPUT>, <TEXTAREA> and <SELECT>
787     void InsertInput();
788 
789     void NewTextArea();
790     void InsertTextAreaText( HtmlTokenId nToken );
791     void EndTextArea();
792 
793     void NewSelect();
794     void InsertSelectOption();
795     void InsertSelectText();
796     void EndSelect();
797 
798     // Inserting tables (htmltab.cxx)
799 public:
800 
801     // Insert box content after the given node
802     const SwStartNode *InsertTableSection( const SwStartNode *pPrevStNd );
803 
804     // Insert box content at the end of the table containing the PaM
805     // and move the PaM into the cell
806     const SwStartNode *InsertTableSection( sal_uInt16 nPoolId );
807 
808     // Insert methods for various table tags
809     std::unique_ptr<HTMLTableCnts> InsertTableContents( bool bHead );
810 
811 private:
812     // Create a section for the temporary storage of the table caption
813     SwStartNode *InsertTempTableCaptionSection();
814 
815     void BuildTableCell( HTMLTable *pTable, bool bReadOptions, bool bHead );
816     void BuildTableRow( HTMLTable *pTable, bool bReadOptions,
817                         SvxAdjust eGrpAdjust, sal_Int16 eVertOri );
818     void BuildTableSection( HTMLTable *pTable, bool bReadOptions, bool bHead );
819     void BuildTableColGroup( HTMLTable *pTable, bool bReadOptions );
820     void BuildTableCaption( HTMLTable *pTable );
821     std::shared_ptr<HTMLTable> BuildTable(SvxAdjust eCellAdjust,
822                                           bool bIsParentHead = false,
823                                           bool bHasParentSection=true,
824                                           bool bHasToFlow = false);
825 
826     // misc ...
827 
828     void ParseMoreMetaOptions();
829 
830     bool FileDownload( const OUString& rURL, OUString& rStr );
831     void InsertLink();
832 
833     void InsertIDOption();
834     void InsertLineBreak();
835     void InsertHorzRule();
836 
837     void FillEndNoteInfo( const OUString& rContent );
838     void FillFootNoteInfo( const OUString& rContent );
839     void InsertFootEndNote( const OUString& rName, bool bEndNote, bool bFixed );
840     void FinishFootEndNote();
841     void InsertFootEndNoteText();
842     SwNodeIndex *GetFootEndNoteSection( const OUString& rName );
843     void DeleteFootEndNoteImpl();
844 
845     sal_Int32 StripTrailingLF();
846 
847     // Remove empty paragraph at the PaM position
848     void StripTrailingPara();
849     // If removing an empty node would corrupt the document
850     bool CanRemoveNode(sal_uLong nNodeIdx) const;
851 
852     // Are there fly frames in the current paragraph?
853     bool HasCurrentParaFlys( bool bNoSurroundOnly = false,
854                              bool bSurroundOnly = false ) const;
855 
856     bool PendingObjectsInPaM(SwPaM& rPam) const;
857 
858     class TableDepthGuard
859     {
860     private:
861         SwHTMLParser& m_rParser;
862     public:
TableDepthGuard(SwHTMLParser & rParser)863         TableDepthGuard(SwHTMLParser& rParser)
864             : m_rParser(rParser)
865         {
866             ++m_rParser.m_nTableDepth;
867         }
TooDeep() const868         bool TooDeep() const { return m_rParser.m_nTableDepth > 1024; }
~TableDepthGuard()869         ~TableDepthGuard()
870         {
871             --m_rParser.m_nTableDepth;
872         }
873     };
874 
875 public:         // used in tables
876 
877     // Create brush item (with new) or 0
878     SvxBrushItem* CreateBrushItem( const Color *pColor,
879                                    const OUString &rImageURL,
880                                    const OUString &rStyle,
881                                    const OUString &rId,
882                                    const OUString &rClass );
883 
884 protected:
885     // Executed for each token recognized by CallParser
886     virtual void NextToken( HtmlTokenId nToken ) override;
887     virtual ~SwHTMLParser() override;
888 
889     // If the document is removed, remove the parser as well
890     virtual void Notify(const SfxHint&) override;
891 
892     virtual void AddMetaUserDefined( OUString const & i_rMetaName ) override;
893 
894 public:
895 
896     SwHTMLParser( SwDoc* pD, SwPaM & rCursor, SvStream& rIn,
897                     const OUString& rFileName,
898                     const OUString& rBaseURL,
899                     bool bReadNewDoc,
900                     SfxMedium* pMed, bool bReadUTF8,
901                     bool bIgnoreHTMLComments,
902                     const OUString& rNamespace);
903 
904     virtual SvParserState CallParser() override;
905 
906     static sal_uInt16 ToTwips( sal_uInt16 nPixel );
907 
908     // for reading asynchronously from SvStream
909     virtual void Continue( HtmlTokenId nToken ) override;
910 
911     virtual bool ParseMetaOptions( const css::uno::Reference<css::document::XDocumentProperties>&,
912             SvKeyValueIterator* ) override;
913 
914 
RegisterHTMLTable(HTMLTable * pNew)915     void RegisterHTMLTable(HTMLTable* pNew)
916     {
917         m_aTables.push_back(pNew);
918     }
919 
920     void DeregisterHTMLTable(HTMLTable* pOld);
921 
922     SwDoc* GetDoc() const;
923 
924     bool IsReqIF() const;
925 
IsReadingHeaderOrFooter() const926     bool IsReadingHeaderOrFooter() const { return m_bReadingHeaderOrFooter; }
927 
928     void NotifyMacroEventRead();
929 
930     /// Strips query and fragment from a URL path if base URL is a file:// one.
931     static OUString StripQueryFromPath(const OUString& rBase, const OUString& rPath);
932 };
933 
934 struct SwPendingData
935 {
~SwPendingDataSwPendingData936     virtual ~SwPendingData() {}
937 };
938 
939 struct SwPending
940 {
941     HtmlTokenId const nToken;
942     std::unique_ptr<SwPendingData> pData;
943 
SwPendingSwPending944     SwPending( HtmlTokenId nTkn )
945         : nToken( nTkn )
946         {}
947 };
948 
SetStart(const SwPosition & rPos)949 inline void HTMLAttr::SetStart( const SwPosition& rPos )
950 {
951     m_nStartPara = rPos.nNode;
952     m_nStartContent = rPos.nContent.GetIndex();
953     m_nEndPara = m_nStartPara;
954     m_nEndContent = m_nStartContent;
955 }
956 
SetMargins(sal_uInt16 nLeft,sal_uInt16 nRight,short nIndent)957 inline void HTMLAttrContext::SetMargins( sal_uInt16 nLeft, sal_uInt16 nRight,
958                                           short nIndent )
959 {
960     m_nLeftMargin = nLeft;
961     m_nRightMargin = nRight;
962     m_nFirstLineIndent = nIndent;
963     m_bLRSpaceChanged = true;
964 }
965 
GetMargins(sal_uInt16 & nLeft,sal_uInt16 & nRight,short & nIndent) const966 inline void HTMLAttrContext::GetMargins( sal_uInt16& nLeft,
967                                           sal_uInt16& nRight,
968                                           short& nIndent ) const
969 {
970     if( m_bLRSpaceChanged )
971     {
972         nLeft = m_nLeftMargin;
973         nRight = m_nRightMargin;
974         nIndent = m_nFirstLineIndent;
975     }
976 }
977 
SetULSpace(sal_uInt16 nUpper,sal_uInt16 nLower)978 inline void HTMLAttrContext::SetULSpace( sal_uInt16 nUpper, sal_uInt16 nLower )
979 {
980     m_nUpperSpace = nUpper;
981     m_nLowerSpace = nLower;
982     m_bULSpaceChanged = true;
983 }
984 
GetULSpace(sal_uInt16 & rUpper,sal_uInt16 & rLower) const985 inline void HTMLAttrContext::GetULSpace( sal_uInt16& rUpper,
986                                           sal_uInt16& rLower ) const
987 {
988     if( m_bULSpaceChanged )
989     {
990         rUpper = m_nUpperSpace;
991         rLower = m_nLowerSpace;
992     }
993 }
994 
HasStyleOptions(const OUString & rStyle,const OUString & rId,const OUString & rClass,const OUString * pLang,const OUString * pDir)995 inline bool SwHTMLParser::HasStyleOptions( const OUString &rStyle,
996                                             const OUString &rId,
997                                             const OUString &rClass,
998                                             const OUString *pLang,
999                                                const OUString *pDir )
1000 {
1001     return !rStyle.isEmpty() || !rId.isEmpty() || !rClass.isEmpty() ||
1002            (pLang && !pLang->isEmpty()) || (pDir && !pDir->isEmpty());
1003 }
1004 
1005 class SwTextFootnote;
1006 
1007 struct SwHTMLTextFootnote
1008 {
1009     OUString sName;
1010     SwTextFootnote* pTextFootnote;
SwHTMLTextFootnoteSwHTMLTextFootnote1011     SwHTMLTextFootnote(const OUString &rName, SwTextFootnote* pInTextFootnote)
1012         : sName(rName)
1013         , pTextFootnote(pInTextFootnote)
1014     {
1015     }
1016 };
1017 
1018 struct SwHTMLFootEndNote_Impl
1019 {
1020     std::vector<SwHTMLTextFootnote> aTextFootnotes;
1021 
1022     OUString sName;
1023     OUString sContent;            // information for the last footnote
1024     bool bEndNote;
1025     bool bFixed;
1026 };
1027 
1028 #endif
1029 
1030 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
1031