1 /*
2 For general Scribus (>=1.3.2) copyright and licensing information please refer
3 to the COPYING file provided with the program. Following this notice may exist
4 a copyright and/or license notice that predates the release of Scribus 1.3.2
5 for which a new license (GPL+exception) is in place.
6 */
7 /***************************************************************************
8  *   Copyright (C) 2004 by Riku Leino                                      *
9  *   tsoots@gmail.com                                                      *
10  *                                                                         *
11  *   This program is free software; you can redistribute it and/or modify  *
12  *   it under the terms of the GNU General Public License as published by  *
13  *   the Free Software Foundation; either version 2 of the License, or     *
14  *   (at your option) any later version.                                   *
15  *                                                                         *
16  *   This program is distributed in the hope that it will be useful,       *
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
19  *   GNU General Public License for more details.                          *
20  *                                                                         *
21  *   You should have received a copy of the GNU General Public License     *
22  *   along with this program; if not, write to the                         *
23  *   Free Software Foundation, Inc.,                                       *
24  *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.             *
25  ***************************************************************************/
26 #ifndef HTMLREADER_H
27 #define HTMLREADER_H
28 
29 #include "scconfig.h"
30 
31 #include <vector>
32 #include <libxml/HTMLparser.h>
33 
34 #include <QMap>
35 #include <QString>
36 
37 #include <gtparagraphstyle.h>
38 #include <gtwriter.h>
39 
40 typedef QMap<QString, QString> HTMLAttributesMap;
41 
42 /*! \brief Parse and import a HTML file.
43 Supported tags: P, CENTER, BR, A, UL, OL, LI, H1, H2, H3, H4,
44 B, STRONG, I, EM, CODE, BODY, PRE, IMG, SUB, SUP, DEL, INS, U,
45 DIV.
46 */
47 class HTMLReader
48 {
49 public:
50 	HTMLReader(gtParagraphStyle *ps, gtWriter *w, bool textOnly);
51 	~HTMLReader();
52 
53 	void parse(const QString& filename);
54 	static void startElement(void *user_data, const xmlChar * fullname, const xmlChar ** atts);
55 	static void endElement(void *user_data, const xmlChar * name);
56 	static void characters(void *user_data, const xmlChar * ch, int len);
57 	bool startElement(const QString &name, const HTMLAttributesMap &attrs);
58 	bool endElement(const QString &name);
59 	bool characters(const QString &ch);
60 
61 private:
62 	QString currentDir;
63 	QString currentFile;
64 	QString defaultColor;
65 	QString defaultWeight;
66 	QString defaultSlant;
67 	QString templateCategory;
68 	QString href;
69 	QString extLinks;
70 	int extIndex { 1 };
71 	int listLevel { -1 };
72 	std::vector<gtParagraphStyle*> listStyles;
73 	std::vector<int> nextItemNumbers;
74 	gtWriter *writer { nullptr };
75 	gtParagraphStyle *pstyle { nullptr };
76 	gtParagraphStyle *pstylec { nullptr };
77 	gtParagraphStyle *pstyleh1 { nullptr };
78 	gtParagraphStyle *pstyleh2 { nullptr };
79 	gtParagraphStyle *pstyleh3 { nullptr };
80 	gtParagraphStyle *pstyleh4 { nullptr };
81 	gtParagraphStyle *pstyleh5 { nullptr };
82 	gtParagraphStyle *pstyleh6 { nullptr };
83 	gtParagraphStyle *pstylecode { nullptr };
84 	gtParagraphStyle *pstylep { nullptr };
85 	gtParagraphStyle *pstylepre { nullptr };
86 	bool inOL { false };
87 	bool wasInOL { false };
88 	bool inUL { false };
89 	bool wasInUL { false };
90 	bool inLI { false };
91 	bool addedLI { false };
92 	bool inH1 { false };
93 	bool inH2 { false };
94 	bool inH3 { false };
95 	bool inH4 { false };
96 	bool inH5 { false };
97 	bool inH6 { false };
98 	bool inA { false };
99 	bool inCenter { false };
100 	bool inCode { false };
101 	bool inBody { false };
102 	bool inPre { false };
103 	bool inP { false };
104 
105 	static bool elemJustStarted;
106 	static bool elemJustFinished;
107 
108 	bool lastCharWasSpace { false };
109 	bool noFormatting { false };
110 
111 	static HTMLReader* hreader;
112 
113 	void initPStyles();
114 	void toggleEffect(FontEffect e);
115 	void setItalicFont();
116 	void unsetItalicFont();
117 	void setBlueFont();
118 	void setDefaultColor();
119 	void setBoldFont();
120 	void unSetBoldFont();
121 	void createListStyle();
122 };
123 
124 #endif
125