1 /************************************************************************
2 **
3 **  Copyright (C) 2015-2020 Kevin B. Hendricks, Stratford, Ontario Canada
4 **  Copyright (C) 2009-2011 Strahinja Markovic  <strahinja.markovic@gmail.com>
5 **
6 **  This file is part of Sigil.
7 **
8 **  Sigil is free software: you can redistribute it and/or modify
9 **  it under the terms of the GNU General Public License as published by
10 **  the Free Software Foundation, either version 3 of the License, or
11 **  (at your option) any later version.
12 **
13 **  Sigil is distributed in the hope that it will be useful,
14 **  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 **  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 **  GNU General Public License for more details.
17 **
18 **  You should have received a copy of the GNU General Public License
19 **  along with Sigil.  If not, see <http://www.gnu.org/licenses/>.
20 **
21 *************************************************************************/
22 
23 #pragma once
24 #ifndef IMPORTHTML_H
25 #define IMPORTHTML_H
26 
27 #include "Importers/Importer.h"
28 #include "BookManipulation/XhtmlDoc.h"
29 
30 class HTMLResource;
31 class CSSResource;
32 class QDomDocument;
33 
34 
35 class ImportHTML : public Importer
36 {
37 
38 public:
39 
40     // Constructor;
41     // The parameter is the file to be imported
42     ImportHTML(const QString &fullfilepath);
43 
44     // Needed so that we can use an existing Book
45     // in which to load HTML files (and their dependencies).
46     void SetBook(QSharedPointer<Book> book, bool ignore_duplicates);
47 
48     virtual XhtmlDoc::WellFormedError CheckValidToLoad();
49 
50     // Reads and parses the file
51     // and returns the created Book.
52     virtual QSharedPointer<Book> GetBook(bool extract_metadata=true);
53 
54     const QStringList& GetAddedBookPaths();
55 
56 private:
57 
58     // Loads the source code into the Book
59     QString LoadSource();
60 
61     // Searches for meta information in the HTML file
62     // and tries to convert it to Dublin Core
63     void LoadMetadata(const QString &source);
64 
65     HTMLResource *CreateHTMLResource();
66 
67     void UpdateFiles(HTMLResource *html_resource,
68                      QString &source,
69                      const QHash<QString, QString> &updates);
70 
71     // Loads the referenced files into the main folder of the book;
72     // as the files get a new name, the references are updated
73     QHash<QString, QString> LoadFolderStructure(const QString & source);
74 
75     // Returns a hash with keys being old references (URLs) to resources,
76     // and values being the new references to those resources.
77     QHash<QString, QString> LoadMediaFiles(const QStringList & file_paths);
78 
79     QHash<QString, QString> LoadStyleFiles(const QStringList & file_paths);
80 
81 
82     ///////////////////////////////
83     // PRIVATE MEMBER VARIABLES
84     ///////////////////////////////
85 
86     bool m_IgnoreDuplicates;
87 
88     QString m_CachedSource;
89 
90     QString m_EpubVersion;
91 
92     QStringList m_AddedBookPaths;
93 };
94 
95 #endif // IMPORTHTML_H
96