1 /************************************************************************ 2 ** 3 ** Copyright (C) 2018-2019 Kevin B. Hendricks, Stratford Ontario Canada 4 ** Copyright (C) 2012 John Schember <john@nachtimwald.com> 5 ** Copyright (C) 2009-2011 Strahinja Markovic <strahinja.markovic@gmail.com> 6 ** 7 ** This file is part of Sigil. 8 ** 9 ** Sigil is free software: you can redistribute it and/or modify 10 ** it under the terms of the GNU General Public License as published by 11 ** the Free Software Foundation, either version 3 of the License, or 12 ** (at your option) any later version. 13 ** 14 ** Sigil is distributed in the hope that it will be useful, 15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 ** GNU General Public License for more details. 18 ** 19 ** You should have received a copy of the GNU General Public License 20 ** along with Sigil. If not, see <http://www.gnu.org/licenses/>. 21 ** 22 *************************************************************************/ 23 24 #pragma once 25 #ifndef IMPORTEPUB_H 26 #define IMPORTEPUB_H 27 28 #include <QCoreApplication> 29 #include <QtCore/QHash> 30 #include <QtCore/QSet> 31 #include <QtCore/QStringList> 32 33 #include "Importers/Importer.h" 34 #include "Misc/TempFolder.h" 35 36 class HTMLResource; 37 class CSSResource; 38 class QXmlStreamReader; 39 40 class ImportEPUB : public Importer 41 { 42 Q_DECLARE_TR_FUNCTIONS(ImportEPUB) 43 44 public: 45 // The parameter is the file to be imported 46 ImportEPUB(const QString &fullfilepath); 47 48 // Reads and parses the file 49 // and returns the created Book 50 virtual QSharedPointer<Book> GetBook(bool extract_metaata=true); 51 52 private: 53 /** 54 * Extracts the EPUB file to a temporary folder. 55 * The path to the the temp folder with the extracted files 56 * is stored in m_ExtractedFolderPath. 57 */ 58 void ExtractContainer(); 59 60 /** 61 * Locates the OPF file in the extracted folder. 62 * The path to the OPF is then stored in m_OPFFilePath. 63 */ 64 void LocateOPF(); 65 66 /** 67 * Parses the OPF file and stores the parsed information 68 * inside m_MetaElements, m_Files and m_ReadingOrderIds 69 */ 70 void ReadOPF(); 71 72 /** 73 * Reads an <identifier> element. 74 * 75 * @param opf_reader The OPF reader positioned to read 76 * the required element type. 77 */ 78 void ReadIdentifierElement(QXmlStreamReader *opf_reader); 79 80 /** 81 * Reads a metadata <link> element. 82 * 83 * @param opf_reader The OPF reader positioned to read 84 * the required element type. 85 */ 86 void ReadMetadataLinkElement(QXmlStreamReader *opf_reader); 87 88 /** 89 * Reads a manifest <item> element. 90 * 91 * @param opf_reader The OPF reader positioned to read 92 * the required element type. 93 */ 94 void ReadManifestItemElement(QXmlStreamReader *opf_reader); 95 96 /** 97 * Locate or create an NCX file if missing or not correctly specified. 98 * 99 * @param ncx_id_on_spine - The toc attribute value from the <spine> 100 */ 101 void LocateOrCreateNCX(const QString &ncx_id_on_spine); 102 103 /** 104 * Loads the book's infrastructure files, like 105 * the NCX and the OPF. 106 */ 107 void LoadInfrastructureFiles(); 108 109 /** 110 * Loads the referenced files into the main folder of the book. 111 * 112 * @return success 113 */ 114 bool LoadFolderStructure(); 115 116 /** 117 * Loads a single file. 118 * 119 * @param path A full path to the file to load. 120 * @param mimetype The mimetype of the file to load. 121 * @return A tuple where the first member is the old path to the file, 122 * and the new member is the new, OEBPS-relative path to it. 123 */ 124 std::tuple<QString, QString> LoadOneFile(const QString &path, 125 const QString &mimetype = QString()); 126 127 /** 128 * Performs the necessary modifications to the OPF 129 * source so that it can be read. 130 * 131 * @param source The XML source of the OPF. 132 * @return The prepared source. 133 */ 134 QString PrepareOPFForReading(const QString &source); 135 136 /** 137 * Parses the "encryption.xml" file in the META-INF folder. 138 * We return the list of file paths and the algorithms used 139 * to encrypt them. 140 * 141 * @return The list of encrypted fsiles. The keys are the 142 * absolute paths to the files and the values are the 143 * encryption algorithm IDs. 144 */ 145 QHash<QString, QString> ParseEncryptionXml(); 146 147 bool BookContentEncrypted(const QHash<QString, QString> &encrypted_files); 148 149 void AddObfuscatedButUndeclaredFonts(const QHash<QString, QString> &encrypted_files); 150 151 /** 152 * Another workaround function to handle com.apple.ibooks.display-options.xml 153 * and any future non-standard Apple xml. If additional files need to 154 * be excluded from being handled as proper ePub content, you will also 155 * need to alter FILE_EXCEPTIONS at the top of FolderKeeper.cpp so the regex 156 * will detect them. 157 * This is not added to the manifest, but epubcheck uses a similar exception 158 * and accepts ePubs containing an unmanifested file of this name. 159 */ 160 void AddNonStandardAppleXML(); 161 162 void ProcessFontFiles(const QList<Resource *> &resources, 163 const QHash<QString, QString> &encrypted_files); 164 165 /** 166 * The main temp folder where files are stored. 167 */ 168 TempFolder m_TempFolder; 169 170 /** 171 * The full path to the folder where the 172 * EPUB was extracted to. 173 */ 174 QString m_ExtractedFolderPath; 175 176 /** 177 * The full path to the OPF file 178 * of the publication. 179 */ 180 QString m_OPFFilePath; 181 182 /** 183 * The full path to the NCX file 184 * of the publication. 185 */ 186 QString m_NCXFilePath; 187 188 /** 189 * The map of all the files in the publication's 190 * manifest; The keys are the element ID's, 191 * the values are stored paths to the files. 192 */ 193 QMap<QString, QString> m_Files; 194 195 /** 196 * The map of all files in the publication's manifest; 197 * The keys are the element ID's, the vaules are the 198 * mimetype of the file. 199 */ 200 QMap<QString, QString> m_FileMimetypes; 201 202 /** 203 * InDesign likes listing several files multiple times in the manifest, 204 * even though that's explicitly forbidden by the spec. So we use this 205 * to make sure we don't load such files multiple times. 206 */ 207 QStringList m_ManifestFilePaths; 208 QStringList m_ManifestMediaTypes; 209 210 QSet<QString> m_ZipFilePaths; 211 212 QDir m_opfDir; 213 214 /** 215 * The identifier of the book's unique identifier. 216 */ 217 QString m_UniqueIdentifierId; 218 219 /** 220 * The value of the book's unique identifier. 221 */ 222 QString m_UniqueIdentifierValue; 223 224 /** 225 * The value of the book's first UUID-based identifier. 226 */ 227 QString m_UuidIdentifierValue; 228 229 /** 230 * It's theoretically possible (although unlikely) that an epub 231 * will have more than one file listed in the OPF manifest with 232 * an NCX mimetype. Only one of them will be the actual NCX though. 233 * This hash stores all the candidates, as an ID-to-href mapping. 234 */ 235 QHash<QString, QString> m_NcxCandidates; 236 237 bool m_HasSpineItems; 238 bool m_NCXNotInManifest; 239 QString m_NCXId; 240 QString m_NavId; 241 QString m_NavHref; 242 Resource * m_NavResource; 243 244 /** 245 * The value of the opf package version tag 246 */ 247 QString m_PackageVersion; 248 249 }; 250 251 #endif // IMPORTEPUB_H 252