1 /************************************************************************
2 **
3 ** Copyright (C) 2016 Kevin B. Hendricks Stratford, ON, Canada
4 **
5 ** This file is part of Sigil.
6 **
7 ** Sigil is free software: you can redistribute it and/or modify
8 ** it under the terms of the GNU General Public License as published by
9 ** the Free Software Foundation, either version 3 of the License, or
10 ** (at your option) any later version.
11 **
12 ** Sigil is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ** GNU General Public License for more details.
16 **
17 ** You should have received a copy of the GNU General Public License
18 ** along with Sigil. If not, see <http://www.gnu.org/licenses/>.
19 **
20 *************************************************************************/
21
22 #include <QMutex>
23 #include <QString>
24 #include <QStringList>
25
26 #include "BookManipulation/HTMLMetadata.h"
27
28 static const QStringList EVENT_LIST = QStringList() << "creation" << "publication" << "modification";
29 static const QStringList MODIFICATION_ALIASES = QStringList() << "modified" << "modification";
30 static const QStringList CREATION_ALIASES = QStringList() << "created" << "creation";
31 static const QStringList PUBLICATION_ALIASES = QStringList() << "issued" << "published" << "publication";
32 static const QStringList SCHEME_LIST = QStringList() << "ISBN" << "ISSN" << "DOI";
33
34 QMutex HTMLMetadata::s_AccessMutex;
35 HTMLMetadata *HTMLMetadata::m_Instance = NULL;
36
Instance()37 HTMLMetadata *HTMLMetadata::Instance()
38 {
39 // We use a static local variable
40 // to hold our singleton instance; using a pointer member
41 // variable creates problems with object destruction;
42 QMutexLocker locker(&s_AccessMutex);
43
44 if (m_Instance == 0) {
45 m_Instance = new HTMLMetadata();
46 }
47
48 return m_Instance;
49 }
50
51
52
53 // Processes metadata from inside xhtml files for the gui
54 // try to extract whatever dc metadata possible
MapHTMLToOPFMetadata(GumboNode * node,GumboInterface & gi)55 MetaEntry HTMLMetadata::MapHTMLToOPFMetadata(GumboNode* node, GumboInterface & gi)
56 {
57 MetaEntry meta;
58 if (node->v.element.tag == GUMBO_TAG_META) {
59 QString name;
60 QString value;
61 QHash<QString,QString> matts;
62
63 GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "name");
64 if (attr) name = QString::fromUtf8(attr->value);
65
66 attr = gumbo_get_attribute(&node->v.element.attributes, "content");
67 if (attr) value = QString::fromUtf8(attr->value);
68
69 attr = gumbo_get_attribute(&node->v.element.attributes, "scheme");
70 if (attr) matts[QString("scheme")] = QString::fromUtf8(attr->value);
71
72 attr = gumbo_get_attribute(&node->v.element.attributes, "opf:scheme");
73 if (attr) matts[QString("opf:scheme")] = QString::fromUtf8(attr->value);
74
75 attr = gumbo_get_attribute(&node->v.element.attributes, "id");
76 if (attr) matts[QString("id")] = QString::fromUtf8(attr->value);
77
78 if ((!name.isEmpty()) && (!value.isEmpty())) {
79 meta = FixupHTMLMetadata(name, value, matts);
80 }
81 }
82 return meta;
83 }
84
85
86 // Maps Dublic Core metadata to internal book meta format
FixupHTMLMetadata(QString name,QString value,const QHash<QString,QString> & matts)87 MetaEntry HTMLMetadata::FixupHTMLMetadata(QString name, QString value, const QHash<QString,QString> & matts)
88 {
89 QString lowname = name.toLower();
90 MetaEntry me;
91
92 if (!lowname.startsWith("dc.") &&
93 !lowname.startsWith("dcterms.")) {
94 if (lowname == "copyright") {
95 me.m_name = "dc:rights";
96 me.m_content = value;
97 } else if (lowname == "author") {
98 me.m_name = "dc:creator";
99 me.m_content = value;
100 } else if (lowname == "publisher") {
101 me.m_name = "dc:publisher";
102 me.m_content = value;
103 } else if (lowname == "source") {
104 me.m_name = "dc:source";
105 me.m_content = value;
106 } else if (lowname == "description") {
107 me.m_name = "dc:description";
108 me.m_content = value;
109 } else if (lowname == "date" || lowname == "published") {
110 me.m_name = "dc:date";
111 me.m_content = value;
112 } else if (lowname == "eisbn") {
113 me.m_name = "dc:identifier";
114 me.m_content = "urn:isbn:" + value;
115 } else if (lowname == "issn") {
116 me.m_name = "dc:identifier";
117 me.m_content = "urn:issn:" + value;
118 } else if (lowname == "doi") {
119 me.m_name = "dc:identifier";
120 me.m_content = "urn:doi:" + value;
121 }
122 return me;
123 }
124 return HtmlToOpfDC(name, value, matts);
125 }
126
127
128
129 // Converts HTML sourced Dublin Core metadata to OPF style metadata
130 //
131 // Sample of HTML Based DC Metadata:
132 // <meta name="DC.Title" content="The Title"/>
133 // <meta name="DC.Language" content="en"/>
134 // <meta name="DC.Creator" content=""/>
135 // <meta name="DC.Publisher" content="Publisher Name"/>
136 // <meta name="DC.Date" content="2016-03-01"/>
137 // <meta name="DC.Identifier" content="978-0-00000-000-0" scheme="ISBN"/>
138 // <meta name="DC.Relation" content="978-0-00000-000-0" scheme="ISBN"/>
139
HtmlToOpfDC(QString mname,QString mvalue,const QHash<QString,QString> & matts)140 MetaEntry HTMLMetadata::HtmlToOpfDC(QString mname, QString mvalue, const QHash<QString,QString> & matts)
141 {
142 // Dublin Core from html file with the original 15 element namespace or
143 // expanded DCTerms namespace. Allows qualifiers as refinements
144 // prefix.name[.refinement]
145 QStringList fields = QString(mname.toLower() + "..").split(".");
146 QString name = fields[ 1 ];
147 QString refinement = fields[ 2 ];
148 QString dc_event;
149
150 if (MODIFICATION_ALIASES.contains(name) || MODIFICATION_ALIASES.contains(refinement)) {
151 name = "dc:date";
152 dc_event = "modification";
153 } else if (CREATION_ALIASES.contains(name) || CREATION_ALIASES.contains(refinement)) {
154 name = "dc:date";
155 dc_event = "creation";
156 } else if (PUBLICATION_ALIASES.contains(name) || PUBLICATION_ALIASES.contains(refinement)) {
157 name = "dc:date";
158 dc_event = "publication";
159 }
160
161 QString role = (name == "creator") || (name == "contributor") ? refinement : QString();
162
163 QString scheme = matts.value("scheme");
164 if ((name == "identifier") && (scheme.isEmpty())) {
165 scheme = refinement;
166 }
167 if (!scheme.isEmpty()) {
168 if (SCHEME_LIST.contains(scheme, Qt::CaseInsensitive)) {
169 scheme = SCHEME_LIST.filter(scheme, Qt::CaseInsensitive)[ 0 ];
170 }
171 }
172
173 MetaEntry me;
174 me.m_name = "dc:" + name;
175 me.m_content = mvalue;
176 if (!scheme.isEmpty()) {
177 me.m_atts[ "opf:scheme" ] = scheme;
178 }
179 if (!dc_event.isEmpty()) {
180 me.m_atts[ "opf:event" ] = dc_event;
181 }
182 if (!role.isEmpty()) {
183 me.m_atts[ "opf:role" ] = role;
184 }
185 return me;
186 }
187