1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19 #ifndef INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX
20 #define INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX
21
22 #include <config_java.h>
23
24 #include <sfx2/sfxhtml.hxx>
25 #include <svl/listener.hxx>
26 #include <svl/macitem.hxx>
27 #include <svtools/htmltokn.h>
28 #include <editeng/svxenum.hxx>
29 #include <rtl/ref.hxx>
30 #include <fltshell.hxx>
31 #include <com/sun/star/drawing/XShape.hpp>
32 #include <com/sun/star/form/XFormComponent.hpp>
33
34 #include <memory>
35 #include <vector>
36 #include <deque>
37 #include <stack>
38
39 class SfxMedium;
40 class SfxViewFrame;
41 class SdrObject;
42 class SvxMacroTableDtor;
43 class SwDoc;
44 class SwPaM;
45 class SwViewShell;
46 class SwStartNode;
47 class SwFormatColl;
48 class SwField;
49 class SwHTMLForm_Impl;
50 class SwApplet_Impl;
51 struct SwHTMLFootEndNote_Impl;
52 class HTMLTableCnts;
53 struct SwPending;
54 class SvxCSS1PropertyInfo;
55 struct ImplSVEvent;
56
57 #define HTML_PARSPACE (MM50)
58 #define HTML_CJK_PARSPACE (MM50/2)
59 #define HTML_CTL_PARSPACE (MM50/2)
60
61 #define HTML_DFLT_IMG_WIDTH (MM50*4)
62 #define HTML_DFLT_IMG_HEIGHT (MM50*2)
63
64 // some things you often need
65 extern HTMLOptionEnum<SvxAdjust> const aHTMLPAlignTable[];
66 extern HTMLOptionEnum<sal_Int16> const aHTMLImgHAlignTable[];
67 extern HTMLOptionEnum<sal_Int16> const aHTMLImgVAlignTable[];
68
69 // attribute stack:
70
71 class HTMLAttr;
72 typedef std::deque<HTMLAttr *> HTMLAttrs;
73
74 // Table of attributes: The order here is important: The attributes in the
75 // beginning of the table will set first in EndAllAttrs.
76 struct HTMLAttrTable
77 {
78 HTMLAttr
79 *pKeep, // frame attributes
80 *pBox,
81 *pBrush,
82 *pBreak,
83 *pPageDesc,
84
85 *pLRSpace, // paragraph attributes
86 *pULSpace,
87 *pLineSpacing,
88 *pAdjust,
89 *pDropCap,
90 *pSplit,
91 *pWidows,
92 *pOrphans,
93 *pDirection,
94
95 *pCharFormats, // text attributes
96 *pINetFormat,
97
98 *pBold, // character attributes
99 *pBoldCJK,
100 *pBoldCTL,
101 *pItalic,
102 *pItalicCJK,
103 *pItalicCTL,
104 *pStrike,
105 *pUnderline,
106 *pBlink,
107 *pFont,
108 *pFontCJK,
109 *pFontCTL,
110 *pFontHeight,
111 *pFontHeightCJK,
112 *pFontHeightCTL,
113 *pFontColor,
114 *pEscapement,
115 *pCaseMap,
116 *pKerning, // (only for SPACER)
117 *pCharBrush, // character background
118 *pLanguage,
119 *pLanguageCJK,
120 *pLanguageCTL,
121 *pCharBox
122 ;
123 };
124
125 class HTMLAttr
126 {
127 friend class SwHTMLParser;
128 friend class CellSaveStruct;
129
130 SwNodeIndex m_nStartPara, m_nEndPara;
131 sal_Int32 m_nStartContent, m_nEndContent;
132 bool m_bInsAtStart : 1;
133 bool m_bLikePara : 1; // set attribute above the whole paragraph
134 bool m_bValid : 1; // is the attribute valid?
135
136 std::unique_ptr<SfxPoolItem> m_pItem;
137 std::shared_ptr<HTMLAttrTable> m_xAttrTab;
138 HTMLAttr *m_pNext; // still to close attributes with different values
139 HTMLAttr *m_pPrev; // already closed but not set attributes
140 HTMLAttr **m_ppHead; // list head
141
142 HTMLAttr( const SwPosition& rPos, const SfxPoolItem& rItem,
143 HTMLAttr **pHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab );
144
145 HTMLAttr( const HTMLAttr &rAttr, const SwNodeIndex &rEndPara,
146 sal_Int32 nEndCnt, HTMLAttr **pHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab );
147
148 public:
149
150 ~HTMLAttr();
151
152 HTMLAttr *Clone( const SwNodeIndex& rEndPara, sal_Int32 nEndCnt ) const;
153 void Reset( const SwNodeIndex& rSttPara, sal_Int32 nSttCnt,
154 HTMLAttr **pHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab );
155 inline void SetStart( const SwPosition& rPos );
156
GetSttParaIdx() const157 sal_uInt32 GetSttParaIdx() const { return m_nStartPara.GetIndex(); }
GetEndParaIdx() const158 sal_uInt32 GetEndParaIdx() const { return m_nEndPara.GetIndex(); }
159
GetSttPara() const160 const SwNodeIndex& GetSttPara() const { return m_nStartPara; }
GetEndPara() const161 const SwNodeIndex& GetEndPara() const { return m_nEndPara; }
162
GetSttCnt() const163 sal_Int32 GetSttCnt() const { return m_nStartContent; }
GetEndCnt() const164 sal_Int32 GetEndCnt() const { return m_nEndContent; }
165
IsLikePara() const166 bool IsLikePara() const { return m_bLikePara; }
SetLikePara()167 void SetLikePara() { m_bLikePara = true; }
168
GetItem()169 SfxPoolItem& GetItem() { return *m_pItem; }
GetItem() const170 const SfxPoolItem& GetItem() const { return *m_pItem; }
171
GetNext() const172 HTMLAttr *GetNext() const { return m_pNext; }
InsertNext(HTMLAttr * pNxt)173 void InsertNext( HTMLAttr *pNxt ) { m_pNext = pNxt; }
174
GetPrev() const175 HTMLAttr *GetPrev() const { return m_pPrev; }
176 void InsertPrev( HTMLAttr *pPrv );
ClearPrev()177 void ClearPrev() { m_pPrev = nullptr; }
178
SetHead(HTMLAttr ** ppHd,const std::shared_ptr<HTMLAttrTable> & rAttrTab)179 void SetHead(HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab)
180 {
181 m_ppHead = ppHd;
182 m_xAttrTab = rAttrTab;
183 }
184
185 // During setting attributes from styles it can happen that these
186 // shouldn't be set anymore. To delete them would be very expensive, because
187 // you don't know all the places where they are linked in. Therefore they're
188 // made invalid and deleted at the next call of SetAttr_().
Invalidate()189 void Invalidate() { m_bValid = false; }
190 };
191
192 class HTMLAttrContext_SaveDoc;
193
194 enum SwHTMLAppendMode {
195 AM_NORMAL, // no paragraph spacing handling
196 AM_NOSPACE, // set spacing hard to 0cm
197 AM_SPACE, // set spacing hard to 0.5cm
198 AM_SOFTNOSPACE, // don't set spacing, but save 0cm
199 AM_NONE // no append
200 };
201
202 class HTMLAttrContext
203 {
204 HTMLAttrs m_aAttrs; // the attributes created in the context
205
206 OUString const m_aClass; // context class
207
208 std::unique_ptr<HTMLAttrContext_SaveDoc> m_pSaveDocContext;
209 std::unique_ptr<SfxItemSet> m_pFrameItemSet;
210
211 HtmlTokenId const m_nToken; // the token of the context
212
213 sal_uInt16 const m_nTextFormatColl; // a style created in the context or zero
214
215 sal_uInt16 m_nLeftMargin; // a changed left border
216 sal_uInt16 m_nRightMargin; // a changed right border
217 sal_uInt16 m_nFirstLineIndent; // a changed first line indent
218
219 sal_uInt16 m_nUpperSpace;
220 sal_uInt16 m_nLowerSpace;
221
222 SwHTMLAppendMode m_eAppend;
223
224 bool m_bLRSpaceChanged : 1; // left/right border, changed indent?
225 bool m_bULSpaceChanged : 1; // top/bottom border changed?
226 bool const m_bDefaultTextFormatColl : 1;// nTextFormatColl is only default
227 bool m_bSpansSection : 1; // the context opens a SwSection
228 bool m_bPopStack : 1; // delete above stack elements
229 bool m_bFinishPREListingXMP : 1;
230 bool m_bRestartPRE : 1;
231 bool m_bRestartXMP : 1;
232 bool m_bRestartListing : 1;
233 bool m_bHeaderOrFooter : 1;
234
235 public:
236 void ClearSaveDocContext();
237
238 HTMLAttrContext( HtmlTokenId nTokn, sal_uInt16 nPoolId, const OUString& rClass,
239 bool bDfltColl=false );
240 explicit HTMLAttrContext( HtmlTokenId nTokn );
241 ~HTMLAttrContext();
242
GetToken() const243 HtmlTokenId GetToken() const { return m_nToken; }
244
GetTextFormatColl() const245 sal_uInt16 GetTextFormatColl() const { return m_bDefaultTextFormatColl ? 0 : m_nTextFormatColl; }
GetDfltTextFormatColl() const246 sal_uInt16 GetDfltTextFormatColl() const { return m_bDefaultTextFormatColl ? m_nTextFormatColl : 0; }
247
GetClass() const248 const OUString& GetClass() const { return m_aClass; }
249
250 inline void SetMargins( sal_uInt16 nLeft, sal_uInt16 nRight, short nIndent );
251
IsLRSpaceChanged() const252 bool IsLRSpaceChanged() const { return m_bLRSpaceChanged; }
253 inline void GetMargins( sal_uInt16& nLeft, sal_uInt16& nRight,
254 short &nIndent ) const;
255
256 inline void SetULSpace( sal_uInt16 nUpper, sal_uInt16 nLower );
IsULSpaceChanged() const257 bool IsULSpaceChanged() const { return m_bULSpaceChanged; }
258 inline void GetULSpace( sal_uInt16& rUpper, sal_uInt16& rLower ) const;
259
HasAttrs() const260 bool HasAttrs() const { return !m_aAttrs.empty(); }
GetAttrs() const261 const HTMLAttrs& GetAttrs() const { return m_aAttrs; }
GetAttrs()262 HTMLAttrs& GetAttrs() { return m_aAttrs; }
263
SetSpansSection(bool bSet)264 void SetSpansSection( bool bSet ) { m_bSpansSection = bSet; }
GetSpansSection() const265 bool GetSpansSection() const { return m_bSpansSection; }
266
SetPopStack(bool bSet)267 void SetPopStack( bool bSet ) { m_bPopStack = bSet; }
GetPopStack() const268 bool GetPopStack() const { return m_bPopStack; }
269
HasSaveDocContext() const270 bool HasSaveDocContext() const { return m_pSaveDocContext!=nullptr; }
271 HTMLAttrContext_SaveDoc *GetSaveDocContext( bool bCreate=false );
272
GetFrameItemSet() const273 const SfxItemSet *GetFrameItemSet() const { return m_pFrameItemSet.get(); }
274 SfxItemSet *GetFrameItemSet( SwDoc *pCreateDoc );
275
SetFinishPREListingXMP(bool bSet)276 void SetFinishPREListingXMP( bool bSet ) { m_bFinishPREListingXMP = bSet; }
IsFinishPREListingXMP() const277 bool IsFinishPREListingXMP() const { return m_bFinishPREListingXMP; }
278
SetRestartPRE(bool bSet)279 void SetRestartPRE( bool bSet ) { m_bRestartPRE = bSet; }
IsRestartPRE() const280 bool IsRestartPRE() const { return m_bRestartPRE; }
281
SetRestartXMP(bool bSet)282 void SetRestartXMP( bool bSet ) { m_bRestartXMP = bSet; }
IsRestartXMP() const283 bool IsRestartXMP() const { return m_bRestartXMP; }
284
SetRestartListing(bool bSet)285 void SetRestartListing( bool bSet ) { m_bRestartListing = bSet; }
IsRestartListing() const286 bool IsRestartListing() const { return m_bRestartListing; }
287
SetHeaderOrFooter(bool bSet)288 void SetHeaderOrFooter( bool bSet ) { m_bHeaderOrFooter = bSet; }
IsHeaderOrFooter() const289 bool IsHeaderOrFooter() const { return m_bHeaderOrFooter; }
290
SetAppendMode(SwHTMLAppendMode eMode)291 void SetAppendMode( SwHTMLAppendMode eMode ) { m_eAppend = eMode; }
GetAppendMode() const292 SwHTMLAppendMode GetAppendMode() const { return m_eAppend; }
293 };
294
295 typedef std::vector<std::unique_ptr<HTMLAttrContext>> HTMLAttrContexts;
296
297 class HTMLTable;
298 class SwCSS1Parser;
299 class SwHTMLNumRuleInfo;
300
301 typedef std::vector<std::unique_ptr<ImageMap>> ImageMaps;
302
303 enum class HtmlContextFlags {
304 ProtectStack = 0x0001,
305 StripPara = 0x0002,
306 KeepNumrule = 0x0004,
307 HeaderDist = 0x0008,
308 FooterDist = 0x0010,
309 KeepAttrs = 0x0020,
310 MultiColMask = StripPara | KeepNumrule | KeepAttrs // for headers, footers or footnotes
311 };
312 namespace o3tl
313 {
314 template<> struct typed_flags<HtmlContextFlags> : is_typed_flags<HtmlContextFlags, 0x03f> {};
315 }
316
317 enum class HtmlFrameFormatFlags {
318 Box = 0x0001,
319 Background = 0x0002,
320 Padding = 0x0004,
321 Direction = 0x0008,
322 };
323 namespace o3tl
324 {
325 template<> struct typed_flags<HtmlFrameFormatFlags> : is_typed_flags<HtmlFrameFormatFlags, 0x0f> {};
326 }
327
328 class SwHTMLParser : public SfxHTMLParser, public SvtListener
329 {
330 friend class SectionSaveStruct;
331 friend class CellSaveStruct;
332 friend class CaptionSaveStruct;
333
334 /*
335 Progress bar
336 */
337 std::unique_ptr<ImportProgress> m_xProgress;
338
339 OUString const m_aPathToFile;
340 OUString m_sBaseURL;
341 OUString m_aBasicLib;
342 OUString m_aBasicModule;
343 OUString m_aScriptSource; // content of the current script block
344 OUString m_aScriptType; // type of read script (StarBasic/VB/JAVA)
345 OUString m_aScriptURL; // script URL
346 OUString m_aStyleSource; // content of current style sheet
347 OUString m_aContents; // text of current marquee, field and so
348 OUStringBuffer m_sTitle;
349 OUString m_aUnknownToken; // a started unknown token
350 OUString m_aBulletGrfs[MAXLEVEL];
351 OUString m_sJmpMark;
352
353 std::vector<sal_uInt16> m_aBaseFontStack; // stack for <BASEFONT>
354 // Bit 0-2: font size (1-7)
355 std::vector<sal_uInt16> m_aFontStack; // stack for <FONT>, <BIG>, <SMALL>
356 // Bit 0-2: font size (1-7)
357 // Bit 15: font colour was set
358
359 HTMLAttrs m_aSetAttrTab;// "closed", not set attributes
360 HTMLAttrs m_aParaAttrs; // temporary paragraph attributes
361 std::shared_ptr<HTMLAttrTable> m_xAttrTab; // "open" attributes
362 HTMLAttrContexts m_aContexts;// the current context of attribute/token
363 std::vector<SwFrameFormat *> m_aMoveFlyFrames;// Fly-Frames, the anchor is moved
364 std::deque<sal_Int32> m_aMoveFlyCnts;// and the Content-Positions
365 //stray SwTableBoxes which need to be deleted to avoid leaking, but hold
366 //onto them until parsing is done
367 std::vector<std::unique_ptr<SwTableBox>> m_aOrphanedTableBoxes;
368
369 std::unique_ptr<SwApplet_Impl> m_pAppletImpl; // current applet
370
371 std::unique_ptr<SwCSS1Parser> m_pCSS1Parser; // Style-Sheet-Parser
372 std::unique_ptr<SwHTMLNumRuleInfo> m_pNumRuleInfo;
373 std::vector<SwPending> m_vPendingStack;
374
375 rtl::Reference<SwDoc> m_xDoc;
376 SwPaM *m_pPam; // SwPosition should be enough, or ??
377 SwViewShell *m_pActionViewShell; // SwViewShell, where StartAction was called
378 SwNodeIndex *m_pSttNdIdx;
379
380 std::vector<HTMLTable*> m_aTables;
381 std::shared_ptr<HTMLTable> m_xTable; // current "outermost" table
382 SwHTMLForm_Impl* m_pFormImpl; // current form
383 SdrObject *m_pMarquee; // current marquee
384 std::unique_ptr<SwField> m_xField; // current field
385 ImageMap *m_pImageMap; // current image map
386 std::unique_ptr<ImageMaps> m_pImageMaps; ///< all Image-Maps that have been read
387 std::unique_ptr<SwHTMLFootEndNote_Impl> m_pFootEndNoteImpl;
388
389 Size m_aHTMLPageSize; // page size of HTML template
390
391 sal_uInt32 m_aFontHeights[7]; // font heights 1-7
392 ImplSVEvent * m_nEventId;
393
394 sal_uInt16 m_nBaseFontStMin;
395 sal_uInt16 m_nFontStMin;
396 sal_uInt16 m_nDefListDeep;
397 sal_uInt16 m_nFontStHeadStart; // elements in font stack at <Hn>
398 sal_uInt16 m_nSBModuleCnt; // counter for basic modules
399 sal_uInt16 m_nMissingImgMaps; // How many image maps are still missing?
400 size_t m_nParaCnt;
401 size_t m_nContextStMin; // lower limit of PopContext
402 size_t m_nContextStAttrMin; // lower limit of attributes
403 sal_uInt16 m_nSelectEntryCnt; // Number of entries in the actual listbox
404 HtmlTokenId m_nOpenParaToken; // opened paragraph element
405
406 enum class JumpToMarks { NONE, Mark, Table, Region, Graphic };
407 JumpToMarks m_eJumpTo;
408
409 #ifdef DBG_UTIL
410 sal_uInt16 m_nContinue; // depth of Continue calls
411 #endif
412
413 SvxAdjust m_eParaAdjust; // adjustment of current paragraph
414 HTMLScriptLanguage m_eScriptLang; // current script language
415
416 bool m_bOldIsHTMLMode : 1; // Was it a HTML document?
417
418 bool m_bDocInitalized : 1; // document resp. shell was initialize
419 // flag to prevent double init via recursion
420 bool m_bViewCreated : 1; // the view was already created (asynchronous)
421 bool m_bSetModEnabled : 1;
422
423 bool m_bInFloatingFrame : 1; // We are in a floating frame
424 bool m_bInField : 1;
425 bool m_bKeepUnknown : 1; // handle unknown/not supported tokens
426 // 8
427 bool m_bCallNextToken : 1; // In tables: call NextToken in any case
428 bool m_bIgnoreRawData : 1; // ignore content of script/style
429 bool m_bLBEntrySelected : 1; // Is the current option selected?
430 bool m_bTAIgnoreNewPara : 1; // ignore next LF in text area?
431 bool m_bFixMarqueeWidth : 1; // Change size of marquee?
432
433 bool m_bUpperSpace : 1; // top paragraph spacing is needed
434 bool m_bNoParSpace : 1;
435 // 16
436
437 bool m_bInNoEmbed : 1; // we are in a NOEMBED area
438
439 bool m_bInTitle : 1; // we are in title
440
441 bool m_bChkJumpMark : 1; // maybe jump to predetermined mark
442 bool m_bUpdateDocStat : 1;
443 bool m_bFixSelectWidth : 1; // Set new width of select?
444 bool m_bTextArea : 1;
445 // 24
446 bool m_bSelect : 1;
447 bool m_bInFootEndNoteAnchor : 1;
448 bool m_bInFootEndNoteSymbol : 1;
449 bool m_bIgnoreHTMLComments : 1;
450 bool m_bRemoveHidden : 1; // the filter implementation might set the hidden flag
451
452 bool m_bBodySeen : 1;
453 bool m_bReadingHeaderOrFooter : 1;
454 bool m_bNotifyMacroEventRead : 1;
455 bool m_isInTableStructure;
456
457 sal_Int32 m_nTableDepth;
458
459 /// the names corresponding to the DOCINFO field subtypes INFO[1-4]
460 OUString m_InfoNames[4];
461
462 SfxViewFrame* m_pTempViewFrame;
463
464 bool m_bXHTML = false;
465 bool m_bReqIF = false;
466
467 /**
468 * Non-owning pointers to already inserted OLE nodes, matching opened
469 * <object> XHTML elements.
470 */
471 std::stack<SwOLENode*> m_aEmbeds;
472
473 void DeleteFormImpl();
474
475 void DocumentDetected();
476 void Show();
477 void ShowStatline();
478 SwViewShell *CallStartAction( SwViewShell *pVSh = nullptr, bool bChkPtr = true );
479 SwViewShell *CallEndAction( bool bChkAction = false, bool bChkPtr = true );
480 SwViewShell *CheckActionViewShell();
481
482 DECL_LINK( AsyncCallback, void*, void );
483
484 // set attribute on document
485 void SetAttr_( bool bChkEnd, bool bBeforeTable, std::deque<std::unique_ptr<HTMLAttr>> *pPostIts );
SetAttr(bool bChkEnd=true,bool bBeforeTable=false,std::deque<std::unique_ptr<HTMLAttr>> * pPostIts=nullptr)486 void SetAttr( bool bChkEnd = true, bool bBeforeTable = false,
487 std::deque<std::unique_ptr<HTMLAttr>> *pPostIts = nullptr )
488 {
489 if( !m_aSetAttrTab.empty() || !m_aMoveFlyFrames.empty() )
490 SetAttr_( bChkEnd, bBeforeTable, pPostIts );
491 }
492
493 HTMLAttr **GetAttrTabEntry( sal_uInt16 nWhich );
494
495 // create a new text node on PaM position
496 bool AppendTextNode( SwHTMLAppendMode eMode=AM_NORMAL, bool bUpdateNum=true );
497 void AddParSpace();
498
499 // start/end an attribute
500 // ppDepAttr indicated an attribute table entry, which attribute has to be
501 // set, before the attribute is closed
502 void NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTab, HTMLAttr **ppAttr, const SfxPoolItem& rItem);
503 bool EndAttr( HTMLAttr *pAttr, bool bChkEmpty=true );
504 void DeleteAttr( HTMLAttr* pAttr );
505
506 void EndContextAttrs( HTMLAttrContext *pContext );
507 void SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab);
508 void SplitAttrTab( const SwPosition& rNewPos );
509 void SplitAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab, bool bMoveEndBack);
510 void RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab);
511 void InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart );
512 void InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs );
513
514 bool DoPositioning( SfxItemSet &rItemSet,
515 SvxCSS1PropertyInfo &rPropInfo,
516 HTMLAttrContext *pContext );
517 bool CreateContainer( const OUString& rClass, SfxItemSet &rItemSet,
518 SvxCSS1PropertyInfo &rPropInfo,
519 HTMLAttrContext *pContext );
520 bool EndSection( bool bLFStripped=false );
521
522 void InsertAttrs( SfxItemSet &rItemSet, SvxCSS1PropertyInfo const &rPropInfo,
523 HTMLAttrContext *pContext, bool bCharLvl=false );
524 void InsertAttr( HTMLAttr **ppAttr, const SfxPoolItem & rItem,
525 HTMLAttrContext *pCntxt );
526 void SplitPREListingXMP( HTMLAttrContext *pCntxt );
527 void FixHeaderFooterDistance( bool bHeader, const SwPosition *pOldPos );
528
529 void EndContext( HTMLAttrContext *pContext );
530 void ClearContext( HTMLAttrContext *pContext );
531
532 const SwFormatColl *GetCurrFormatColl() const;
533
534 SwTwips GetCurrentBrowseWidth();
535
GetNumInfo()536 SwHTMLNumRuleInfo& GetNumInfo() { return *m_pNumRuleInfo; }
537 // add parameter <bCountedInList>
538 void SetNodeNum( sal_uInt8 nLevel );
539
540 // Manage paragraph styles
541
542 // set the style resp. its attributes on the stack
543 void SetTextCollAttrs( HTMLAttrContext *pContext = nullptr );
544
545 void InsertParaAttrs( const SfxItemSet& rItemSet );
546
547 // Manage attribute context
548
549 // save current context
PushContext(std::unique_ptr<HTMLAttrContext> & rCntxt)550 void PushContext(std::unique_ptr<HTMLAttrContext>& rCntxt)
551 {
552 m_aContexts.push_back(std::move(rCntxt));
553 }
554
555 // Fetch top/specified context but not outside the context with token
556 // nLimit. If bRemove set then remove it.
557 std::unique_ptr<HTMLAttrContext> PopContext(HtmlTokenId nToken = HtmlTokenId::NONE);
558
559 void GetMarginsFromContext( sal_uInt16 &nLeft, sal_uInt16 &nRight, short& nIndent,
560 bool bIgnoreCurrent=false ) const;
561 void GetMarginsFromContextWithNumBul( sal_uInt16 &nLeft, sal_uInt16 &nRight,
562 short& nIndent ) const;
563 void GetULSpaceFromContext( sal_uInt16 &rUpper, sal_uInt16 &rLower ) const;
564
565 void MovePageDescAttrs( SwNode *pSrcNd, sal_uLong nDestIdx, bool bFormatBreak );
566
567 // Handling of tags at paragraph level
568
569 // <P> and <H1> to <H6>
570 void NewPara();
571 void EndPara( bool bReal = false );
572 void NewHeading( HtmlTokenId nToken );
573 void EndHeading();
574
575 // <ADDRESS>, <BLOCKQUOTE> and <PRE>
576 void NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nPoolId );
577 void EndTextFormatColl( HtmlTokenId nToken );
578
579 // <DIV> and <CENTER>
580 void NewDivision( HtmlTokenId nToken );
581 void EndDivision();
582
583 // insert/close Fly-Frames
584 void InsertFlyFrame( const SfxItemSet& rItemSet, HTMLAttrContext *pCntxt,
585 const OUString& rId );
586
587 void SaveDocContext( HTMLAttrContext *pCntxt, HtmlContextFlags nFlags,
588 const SwPosition *pNewPos );
589 void RestoreDocContext( HTMLAttrContext *pCntxt );
590
591 // end all opened <DIV> areas
592 bool EndSections( bool bLFStripped );
593
594 // <MULTICOL>
595 void NewMultiCol( sal_uInt16 columnsFromCss=0 );
596
597 // <MARQUEE>
598 void NewMarquee( HTMLTable *pCurTable=nullptr );
599 void EndMarquee();
600 void InsertMarqueeText();
601
602 // Handling of lists
603
604 // order list <OL> and unordered list <UL> with <LI>
605 void NewNumBulList( HtmlTokenId nToken );
606 void EndNumBulList( HtmlTokenId nToken = HtmlTokenId::NONE );
607 void NewNumBulListItem( HtmlTokenId nToken );
608 void EndNumBulListItem( HtmlTokenId nToken, bool bSetColl);
609
610 // definitions lists <DL> with <DD>, <DT>
611 void NewDefList();
612 void EndDefList();
613 void NewDefListItem( HtmlTokenId nToken );
614 void EndDefListItem( HtmlTokenId nToken = HtmlTokenId::NONE );
615
616 // Handling of tags on character level
617
618 // handle tags like <B>, <I> and so, which enable/disable a certain
619 // attribute or like <SPAN> get attributes from styles
620 void NewStdAttr( HtmlTokenId nToken );
621 void NewStdAttr( HtmlTokenId nToken,
622 HTMLAttr **ppAttr, const SfxPoolItem & rItem,
623 HTMLAttr **ppAttr2=nullptr, const SfxPoolItem *pItem2=nullptr,
624 HTMLAttr **ppAttr3=nullptr, const SfxPoolItem *pItem3=nullptr );
625 void EndTag( HtmlTokenId nToken );
626
627 // handle font attributes
628 void NewBasefontAttr(); // for <BASEFONT>
629 void EndBasefontAttr();
630 void NewFontAttr( HtmlTokenId nToken ); // for <FONT>, <BIG> and <SMALL>
631 void EndFontAttr( HtmlTokenId nToken );
632
633 // tags realized via character styles
634 void NewCharFormat( HtmlTokenId nToken );
635
636 void ClearFootnotesMarksInRange(const SwNodeIndex& rSttIdx, const SwNodeIndex& rEndIdx);
637
638 void DeleteSection(SwStartNode* pSttNd);
639
640 // <SDFIELD>
641 public:
642 static SvxNumType GetNumType( const OUString& rStr, SvxNumType eDfltType );
643 private:
644 void NewField();
645 void EndField();
646 void InsertFieldText();
647
648 // <SPACER>
649 void InsertSpacer();
650
651 // Inserting graphics, plug-ins and applets
652
653 // search image maps and link with graphic nodes
654 ImageMap *FindImageMap( const OUString& rURL ) const;
655 void ConnectImageMaps();
656
657 // find anchor of Fly-Frames and set corresponding attributes
658 // in Attrset (htmlgrin.cxx)
659 void SetAnchorAndAdjustment( sal_Int16 eVertOri,
660 sal_Int16 eHoriOri,
661 const SvxCSS1PropertyInfo &rPropInfo,
662 SfxItemSet& rFrameSet );
663 void SetAnchorAndAdjustment( sal_Int16 eVertOri,
664 sal_Int16 eHoriOri,
665 SfxItemSet& rFrameSet,
666 bool bDontAppend=false );
667 void SetAnchorAndAdjustment( const SvxCSS1PropertyInfo &rPropInfo,
668 SfxItemSet &rFrameItemSet );
669
670 static void SetFrameFormatAttrs( SfxItemSet &rItemSet,
671 HtmlFrameFormatFlags nFlags, SfxItemSet &rFrameItemSet );
672
673 // create frames and register auto bound frames
674 void RegisterFlyFrame( SwFrameFormat *pFlyFrame );
675
676 // Adjust the size of the Fly-Frames to requirements and conditions
677 // (not for graphics, therefore htmlplug.cxx)
678 static void SetFixSize( const Size& rPixSize, const Size& rTwipDfltSize,
679 bool bPrcWidth, bool bPrcHeight,
680 SvxCSS1PropertyInfo const &rPropInfo,
681 SfxItemSet& rFlyItemSet );
682 static void SetVarSize( SvxCSS1PropertyInfo const &rPropInfo,
683 SfxItemSet& rFlyItemSet, SwTwips nDfltWidth=MINLAY,
684 sal_uInt8 nDltPrcWidth=0 );
685 static void SetSpace( const Size& rPixSpace, SfxItemSet &rItemSet,
686 SvxCSS1PropertyInfo &rPropInfo, SfxItemSet& rFlyItemSet );
687
688 sal_uInt16 IncGrfsThatResizeTable();
689
690 void GetDefaultScriptType( ScriptType& rType,
691 OUString& rTypeStr ) const;
692
693 // the actual insert methods for <IMG>, <EMBED>, <APPLET> and <PARAM>
694 void InsertImage(); // htmlgrin.cxx
695 bool InsertEmbed(); // htmlplug.cxx
696
697 #if HAVE_FEATURE_JAVA
698 void NewObject(); // htmlplug.cxx
699 #endif
700 void EndObject(); // link CommandLine with applet (htmlplug.cxx)
701 #if HAVE_FEATURE_JAVA
702 void InsertApplet(); // htmlplug.cxx
703 #endif
704 void EndApplet(); // link CommandLine with applet (htmlplug.cxx)
705 void InsertParam(); // htmlplug.cxx
706
707 void InsertFloatingFrame();
708
709 // parse <BODY>-tag: set background graphic and background colour (htmlgrin.cxx)
710 void InsertBodyOptions();
711
712 // Inserting links and bookmarks (htmlgrin.cxx)
713
714 // parse <A>-tag: insert a link resp. bookmark
715 void NewAnchor();
716 void EndAnchor();
717
718 // insert bookmark
719 void InsertBookmark( const OUString& rName );
720
721 void InsertCommentText( const sal_Char *pTag );
722 void InsertComment( const OUString& rName, const sal_Char *pTag = nullptr );
723
724 // Has the current paragraph bookmarks?
725 bool HasCurrentParaBookmarks( bool bIgnoreStack=false ) const;
726
727 // Inserting script/basic elements
728
729 // parse the last read basic module (htmlbas.cxx)
730 void NewScript();
731 void EndScript();
732
733 void AddScriptSource();
734
735 // insert event in SFX configuration (htmlbas.cxx)
736 void InsertBasicDocEvent( const OUString& aEventName, const OUString& rName,
737 ScriptType eScrType, const OUString& rScrType );
738
739 // Inserting styles
740
741 // <STYLE>
742 void NewStyle();
743 void EndStyle();
744
745 static inline bool HasStyleOptions( const OUString &rStyle, const OUString &rId,
746 const OUString &rClass, const OUString *pLang=nullptr,
747 const OUString *pDir=nullptr );
748 bool ParseStyleOptions( const OUString &rStyle, const OUString &rId,
749 const OUString &rClass, SfxItemSet &rItemSet,
750 SvxCSS1PropertyInfo &rPropInfo,
751 const OUString *pLang=nullptr, const OUString *pDir=nullptr );
752
753 // Inserting Controls and Forms (htmlform.cxx)
754
755 // Insert draw object into document
756 void InsertDrawObject( SdrObject* pNewDrawObj, const Size& rSpace,
757 sal_Int16 eVertOri,
758 sal_Int16 eHoriOri,
759 SfxItemSet& rCSS1ItemSet,
760 SvxCSS1PropertyInfo& rCSS1PropInfo );
761 css::uno::Reference< css::drawing::XShape > InsertControl(
762 const css::uno::Reference< css::form::XFormComponent > & rFormComp,
763 const css::uno::Reference< css::beans::XPropertySet > & rFCompPropSet,
764 const Size& rSize,
765 sal_Int16 eVertOri,
766 sal_Int16 eHoriOri,
767 SfxItemSet& rCSS1ItemSet,
768 SvxCSS1PropertyInfo& rCSS1PropInfo,
769 const SvxMacroTableDtor& rMacroTable,
770 const std::vector<OUString>& rUnoMacroTable,
771 const std::vector<OUString>& rUnoMacroParamTable,
772 bool bSetPropSet = true,
773 bool bHidden = false );
774 void SetControlSize( const css::uno::Reference< css::drawing::XShape > & rShape, const Size& rTextSz,
775 bool bMinWidth, bool bMinHeight );
776
777 public:
778 static void ResizeDrawObject( SdrObject* pObj, SwTwips nWidth );
779 private:
780 static void RegisterDrawObjectToTable( HTMLTable *pCurTable, SdrObject* pObj,
781 sal_uInt8 nWidth );
782
783 void NewForm( bool bAppend=true );
784 void EndForm( bool bAppend=true );
785
786 // Insert methods for <INPUT>, <TEXTAREA> and <SELECT>
787 void InsertInput();
788
789 void NewTextArea();
790 void InsertTextAreaText( HtmlTokenId nToken );
791 void EndTextArea();
792
793 void NewSelect();
794 void InsertSelectOption();
795 void InsertSelectText();
796 void EndSelect();
797
798 // Inserting tables (htmltab.cxx)
799 public:
800
801 // Insert box content after the given node
802 const SwStartNode *InsertTableSection( const SwStartNode *pPrevStNd );
803
804 // Insert box content at the end of the table containing the PaM
805 // and move the PaM into the cell
806 const SwStartNode *InsertTableSection( sal_uInt16 nPoolId );
807
808 // Insert methods for various table tags
809 std::unique_ptr<HTMLTableCnts> InsertTableContents( bool bHead );
810
811 private:
812 // Create a section for the temporary storage of the table caption
813 SwStartNode *InsertTempTableCaptionSection();
814
815 void BuildTableCell( HTMLTable *pTable, bool bReadOptions, bool bHead );
816 void BuildTableRow( HTMLTable *pTable, bool bReadOptions,
817 SvxAdjust eGrpAdjust, sal_Int16 eVertOri );
818 void BuildTableSection( HTMLTable *pTable, bool bReadOptions, bool bHead );
819 void BuildTableColGroup( HTMLTable *pTable, bool bReadOptions );
820 void BuildTableCaption( HTMLTable *pTable );
821 std::shared_ptr<HTMLTable> BuildTable(SvxAdjust eCellAdjust,
822 bool bIsParentHead = false,
823 bool bHasParentSection=true,
824 bool bHasToFlow = false);
825
826 // misc ...
827
828 void ParseMoreMetaOptions();
829
830 bool FileDownload( const OUString& rURL, OUString& rStr );
831 void InsertLink();
832
833 void InsertIDOption();
834 void InsertLineBreak();
835 void InsertHorzRule();
836
837 void FillEndNoteInfo( const OUString& rContent );
838 void FillFootNoteInfo( const OUString& rContent );
839 void InsertFootEndNote( const OUString& rName, bool bEndNote, bool bFixed );
840 void FinishFootEndNote();
841 void InsertFootEndNoteText();
842 SwNodeIndex *GetFootEndNoteSection( const OUString& rName );
843 void DeleteFootEndNoteImpl();
844
845 sal_Int32 StripTrailingLF();
846
847 // Remove empty paragraph at the PaM position
848 void StripTrailingPara();
849 // If removing an empty node would corrupt the document
850 bool CanRemoveNode(sal_uLong nNodeIdx) const;
851
852 // Are there fly frames in the current paragraph?
853 bool HasCurrentParaFlys( bool bNoSurroundOnly = false,
854 bool bSurroundOnly = false ) const;
855
856 bool PendingObjectsInPaM(SwPaM& rPam) const;
857
858 class TableDepthGuard
859 {
860 private:
861 SwHTMLParser& m_rParser;
862 public:
TableDepthGuard(SwHTMLParser & rParser)863 TableDepthGuard(SwHTMLParser& rParser)
864 : m_rParser(rParser)
865 {
866 ++m_rParser.m_nTableDepth;
867 }
TooDeep() const868 bool TooDeep() const { return m_rParser.m_nTableDepth > 1024; }
~TableDepthGuard()869 ~TableDepthGuard()
870 {
871 --m_rParser.m_nTableDepth;
872 }
873 };
874
875 public: // used in tables
876
877 // Create brush item (with new) or 0
878 SvxBrushItem* CreateBrushItem( const Color *pColor,
879 const OUString &rImageURL,
880 const OUString &rStyle,
881 const OUString &rId,
882 const OUString &rClass );
883
884 protected:
885 // Executed for each token recognized by CallParser
886 virtual void NextToken( HtmlTokenId nToken ) override;
887 virtual ~SwHTMLParser() override;
888
889 // If the document is removed, remove the parser as well
890 virtual void Notify(const SfxHint&) override;
891
892 virtual void AddMetaUserDefined( OUString const & i_rMetaName ) override;
893
894 public:
895
896 SwHTMLParser( SwDoc* pD, SwPaM & rCursor, SvStream& rIn,
897 const OUString& rFileName,
898 const OUString& rBaseURL,
899 bool bReadNewDoc,
900 SfxMedium* pMed, bool bReadUTF8,
901 bool bIgnoreHTMLComments,
902 const OUString& rNamespace);
903
904 virtual SvParserState CallParser() override;
905
906 static sal_uInt16 ToTwips( sal_uInt16 nPixel );
907
908 // for reading asynchronously from SvStream
909 virtual void Continue( HtmlTokenId nToken ) override;
910
911 virtual bool ParseMetaOptions( const css::uno::Reference<css::document::XDocumentProperties>&,
912 SvKeyValueIterator* ) override;
913
914
RegisterHTMLTable(HTMLTable * pNew)915 void RegisterHTMLTable(HTMLTable* pNew)
916 {
917 m_aTables.push_back(pNew);
918 }
919
920 void DeregisterHTMLTable(HTMLTable* pOld);
921
922 SwDoc* GetDoc() const;
923
924 bool IsReqIF() const;
925
IsReadingHeaderOrFooter() const926 bool IsReadingHeaderOrFooter() const { return m_bReadingHeaderOrFooter; }
927
928 void NotifyMacroEventRead();
929
930 /// Strips query and fragment from a URL path if base URL is a file:// one.
931 static OUString StripQueryFromPath(const OUString& rBase, const OUString& rPath);
932 };
933
934 struct SwPendingData
935 {
~SwPendingDataSwPendingData936 virtual ~SwPendingData() {}
937 };
938
939 struct SwPending
940 {
941 HtmlTokenId const nToken;
942 std::unique_ptr<SwPendingData> pData;
943
SwPendingSwPending944 SwPending( HtmlTokenId nTkn )
945 : nToken( nTkn )
946 {}
947 };
948
SetStart(const SwPosition & rPos)949 inline void HTMLAttr::SetStart( const SwPosition& rPos )
950 {
951 m_nStartPara = rPos.nNode;
952 m_nStartContent = rPos.nContent.GetIndex();
953 m_nEndPara = m_nStartPara;
954 m_nEndContent = m_nStartContent;
955 }
956
SetMargins(sal_uInt16 nLeft,sal_uInt16 nRight,short nIndent)957 inline void HTMLAttrContext::SetMargins( sal_uInt16 nLeft, sal_uInt16 nRight,
958 short nIndent )
959 {
960 m_nLeftMargin = nLeft;
961 m_nRightMargin = nRight;
962 m_nFirstLineIndent = nIndent;
963 m_bLRSpaceChanged = true;
964 }
965
GetMargins(sal_uInt16 & nLeft,sal_uInt16 & nRight,short & nIndent) const966 inline void HTMLAttrContext::GetMargins( sal_uInt16& nLeft,
967 sal_uInt16& nRight,
968 short& nIndent ) const
969 {
970 if( m_bLRSpaceChanged )
971 {
972 nLeft = m_nLeftMargin;
973 nRight = m_nRightMargin;
974 nIndent = m_nFirstLineIndent;
975 }
976 }
977
SetULSpace(sal_uInt16 nUpper,sal_uInt16 nLower)978 inline void HTMLAttrContext::SetULSpace( sal_uInt16 nUpper, sal_uInt16 nLower )
979 {
980 m_nUpperSpace = nUpper;
981 m_nLowerSpace = nLower;
982 m_bULSpaceChanged = true;
983 }
984
GetULSpace(sal_uInt16 & rUpper,sal_uInt16 & rLower) const985 inline void HTMLAttrContext::GetULSpace( sal_uInt16& rUpper,
986 sal_uInt16& rLower ) const
987 {
988 if( m_bULSpaceChanged )
989 {
990 rUpper = m_nUpperSpace;
991 rLower = m_nLowerSpace;
992 }
993 }
994
HasStyleOptions(const OUString & rStyle,const OUString & rId,const OUString & rClass,const OUString * pLang,const OUString * pDir)995 inline bool SwHTMLParser::HasStyleOptions( const OUString &rStyle,
996 const OUString &rId,
997 const OUString &rClass,
998 const OUString *pLang,
999 const OUString *pDir )
1000 {
1001 return !rStyle.isEmpty() || !rId.isEmpty() || !rClass.isEmpty() ||
1002 (pLang && !pLang->isEmpty()) || (pDir && !pDir->isEmpty());
1003 }
1004
1005 class SwTextFootnote;
1006
1007 struct SwHTMLTextFootnote
1008 {
1009 OUString sName;
1010 SwTextFootnote* pTextFootnote;
SwHTMLTextFootnoteSwHTMLTextFootnote1011 SwHTMLTextFootnote(const OUString &rName, SwTextFootnote* pInTextFootnote)
1012 : sName(rName)
1013 , pTextFootnote(pInTextFootnote)
1014 {
1015 }
1016 };
1017
1018 struct SwHTMLFootEndNote_Impl
1019 {
1020 std::vector<SwHTMLTextFootnote> aTextFootnotes;
1021
1022 OUString sName;
1023 OUString sContent; // information for the last footnote
1024 bool bEndNote;
1025 bool bFixed;
1026 };
1027
1028 #endif
1029
1030 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
1031