1 /***************************************************************************
2 Copyright (C) 2012 Robby Stephenson <robby@periapsis.org>
3 ***************************************************************************/
4
5 /***************************************************************************
6 * *
7 * This program is free software; you can redistribute it and/or *
8 * modify it under the terms of the GNU General Public License as *
9 * published by the Free Software Foundation; either version 2 of *
10 * the License or (at your option) version 3 or any later version *
11 * accepted by the membership of KDE e.V. (or its successor approved *
12 * by the membership of KDE e.V.), which shall act as a proxy *
13 * defined in Section 14 of version 3 of the license. *
14 * *
15 * This program is distributed in the hope that it will be useful, *
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
18 * GNU General Public License for more details. *
19 * *
20 * You should have received a copy of the GNU General Public License *
21 * along with this program. If not, see <http://www.gnu.org/licenses/>. *
22 * *
23 ***************************************************************************/
24
25 #include "hathitrustfetcher.h"
26 #include "../translators/xslthandler.h"
27 #include "../translators/tellicoimporter.h"
28 #include "../utils/isbnvalidator.h"
29 #include "../utils/lccnvalidator.h"
30 #include "../utils/guiproxy.h"
31 #include "../utils/string_utils.h"
32 #include "../utils/datafileregistry.h"
33 #include "../tellico_debug.h"
34
35 #include <KLocalizedString>
36 #include <KIO/Job>
37 #include <KIO/JobUiDelegate>
38 #include <KJobWidgets>
39
40 #include <QLabel>
41 #include <QFile>
42 #include <QTextStream>
43 #include <QGridLayout>
44 #include <QTextCodec>
45 #include <QJsonDocument>
46 #include <QJsonObject>
47 #include <QDomDocument>
48
49 namespace {
50 static const char* HATHITRUST_QUERY_URL = "http://catalog.hathitrust.org/api/volumes/full/json/";
51 }
52
53 using namespace Tellico;
54 using Tellico::Fetch::HathiTrustFetcher;
55
HathiTrustFetcher(QObject * parent_)56 HathiTrustFetcher::HathiTrustFetcher(QObject* parent_)
57 : Fetcher(parent_), m_started(false), m_MARC21XMLHandler(nullptr), m_MODSHandler(nullptr) {
58 }
59
~HathiTrustFetcher()60 HathiTrustFetcher::~HathiTrustFetcher() {
61 delete m_MARC21XMLHandler;
62 m_MARC21XMLHandler = nullptr;
63 delete m_MODSHandler;
64 m_MODSHandler = nullptr;
65 }
66
source() const67 QString HathiTrustFetcher::source() const {
68 return m_name.isEmpty() ? defaultName() : m_name;
69 }
70
canSearch(Fetch::FetchKey k) const71 bool HathiTrustFetcher::canSearch(Fetch::FetchKey k) const {
72 return k == ISBN || k == LCCN;
73 }
74
canFetch(int type) const75 bool HathiTrustFetcher::canFetch(int type) const {
76 return type == Data::Collection::Book || type == Data::Collection::Bibtex;
77 }
78
readConfigHook(const KConfigGroup &)79 void HathiTrustFetcher::readConfigHook(const KConfigGroup&) {
80 }
81
search()82 void HathiTrustFetcher::search() {
83 m_started = true;
84 doSearch();
85 }
86
doSearch()87 void HathiTrustFetcher::doSearch() {
88 if(request().key() != ISBN && request().key() != LCCN) {
89 stop();
90 return;
91 }
92
93 QUrl u(QString::fromLatin1(HATHITRUST_QUERY_URL));
94
95 QStringList searchValues;
96 // we split ISBN and LCCN values, which are the only ones we accept
97 const QStringList searchTerms = FieldFormat::splitValue(request().value());
98 foreach(const QString& searchTerm, searchTerms) {
99 if(request().key() == ISBN) {
100 searchValues += QStringLiteral("isbn:%1").arg(ISBNValidator::cleanValue(searchTerm));
101 } else {
102 searchValues += QStringLiteral("lccn:%1").arg(LCCNValidator::formalize(searchTerm));
103 }
104 }
105 u.setPath(u.path() + searchValues.join(QLatin1String("|")));
106
107 // myDebug() << u;
108
109 m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
110 KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
111 connect(m_job.data(), &KJob::result, this, &HathiTrustFetcher::slotComplete);
112 }
113
stop()114 void HathiTrustFetcher::stop() {
115 if(!m_started) {
116 return;
117 }
118 if(m_job) {
119 m_job->kill();
120 }
121 m_started = false;
122 emit signalDone(this);
123 }
124
initMARC21Handler()125 bool HathiTrustFetcher::initMARC21Handler() {
126 if(m_MARC21XMLHandler) {
127 return true;
128 }
129
130 QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("MARC21slim2MODS3.xsl"));
131 if(xsltfile.isEmpty()) {
132 myWarning() << "can not locate MARC21slim2MODS3.xsl.";
133 return false;
134 }
135
136 QUrl u = QUrl::fromLocalFile(xsltfile);
137
138 m_MARC21XMLHandler = new XSLTHandler(u);
139 if(!m_MARC21XMLHandler->isValid()) {
140 myWarning() << "error in MARC21slim2MODS3.xsl.";
141 delete m_MARC21XMLHandler;
142 m_MARC21XMLHandler = nullptr;
143 return false;
144 }
145 return true;
146 }
147
initMODSHandler()148 bool HathiTrustFetcher::initMODSHandler() {
149 if(m_MODSHandler) {
150 return true;
151 }
152
153 QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("mods2tellico.xsl"));
154 if(xsltfile.isEmpty()) {
155 myWarning() << "can not locate mods2tellico.xsl.";
156 return false;
157 }
158
159 QUrl u = QUrl::fromLocalFile(xsltfile);
160
161 m_MODSHandler = new XSLTHandler(u);
162 if(!m_MODSHandler->isValid()) {
163 myWarning() << "error in mods2tellico.xsl.";
164 delete m_MODSHandler;
165 m_MODSHandler = nullptr;
166 // no use in keeping the MARC handlers now
167 delete m_MARC21XMLHandler;
168 m_MARC21XMLHandler = nullptr;
169 return false;
170 }
171 return true;
172 }
173
fetchEntryHook(uint uid_)174 Tellico::Data::EntryPtr HathiTrustFetcher::fetchEntryHook(uint uid_) {
175 return m_entries.value(uid_);
176 }
177
updateRequest(Data::EntryPtr entry_)178 Tellico::Fetch::FetchRequest HathiTrustFetcher::updateRequest(Data::EntryPtr entry_) {
179 const QString isbn = entry_->field(QStringLiteral("isbn"));
180 if(!isbn.isEmpty()) {
181 return FetchRequest(ISBN, isbn);
182 }
183 const QString lccn = entry_->field(QStringLiteral("lccn"));
184 if(!lccn.isEmpty()) {
185 return FetchRequest(LCCN, lccn);
186 }
187 return FetchRequest();
188 }
189
slotComplete(KJob * job_)190 void HathiTrustFetcher::slotComplete(KJob* job_) {
191 KIO::StoredTransferJob* job = static_cast<KIO::StoredTransferJob*>(job_);
192
193 if(!initMARC21Handler() || !initMODSHandler()) {
194 // debug messages are taken care of in the specific methods
195 stop();
196 return;
197 }
198
199 if(job->error()) {
200 job->uiDelegate()->showErrorMessage();
201 stop();
202 return;
203 }
204
205 QByteArray data = job->data();
206 if(data.isEmpty()) {
207 myDebug() << "no data";
208 stop();
209 return;
210 }
211 // see bug 319662. If fetcher is cancelled, job is killed
212 // if the pointer is retained, it gets double-deleted
213 m_job = nullptr;
214
215 #if 0
216 myWarning() << "Remove debug from hathitrustfetcher.cpp";
217 QFile f(QString::fromLatin1("/tmp/test.json"));
218 if(f.open(QIODevice::WriteOnly)) {
219 QTextStream t(&f);
220 t.setCodec("UTF-8");
221 t << data;
222 }
223 f.close();
224 #endif
225
226 QJsonDocument doc = QJsonDocument::fromJson(data);
227 QVariantMap resultMap = doc.object().toVariantMap();
228 if(resultMap.isEmpty()) {
229 myDebug() << "no results";
230 stop();
231 return;
232 }
233
234 QVariantMap::const_iterator i = resultMap.constBegin();
235 for( ; i != resultMap.constEnd(); ++i) {
236 const QVariantMap recordMap = i.value().toMap().value(QStringLiteral("records")).toMap();
237 if(recordMap.isEmpty()) {
238 myDebug() << "empty result map";
239 continue;
240 }
241 // we know there's a record, so no need to check for existence of first iterator in map
242 QVariantMap::const_iterator ri = recordMap.constBegin();
243 if(ri == recordMap.constEnd()) {
244 myWarning() << "no iterator in record";
245 continue;
246 }
247 QString marcxml = ri.value().toMap().value(QStringLiteral("marc-xml")).toString();
248 // HathiTrust doesn't always include the XML NS in the JSON results. Assume it's always
249 // MARC XML and check that
250 QDomDocument dom;
251 if(dom.setContent(marcxml, true /* namespace processing */) && dom.documentElement().namespaceURI().isEmpty()) {
252 const QString rootName = dom.documentElement().tagName();
253 myDebug() << "no namespace, attempting to set on" << rootName << "element";
254 QRegularExpression rootRx(QLatin1Char('<') + rootName + QLatin1Char('>'));
255 QString newRoot = QLatin1Char('<') + rootName + QLatin1String(" xmlns=\"http://www.loc.gov/MARC21/slim\">");
256 marcxml.replace(rootRx, newRoot);
257 }
258 const QString modsxml = m_MARC21XMLHandler->applyStylesheet(marcxml);
259
260 Import::TellicoImporter imp(m_MODSHandler->applyStylesheet(modsxml));
261 imp.setOptions(imp.options() ^ Import::ImportProgress); // no progress needed
262 Data::CollPtr coll = imp.collection();
263 if(!coll) {
264 myWarning() << "no coll pointer";
265 continue;
266 }
267
268 // since the Dewey and LoC field titles have a context in their i18n call here
269 // but not in the mods2tellico.xsl stylesheet where the field is actually created
270 // update the field titles here
271 QHashIterator<QString, QString> i(allOptionalFields());
272 while(i.hasNext()) {
273 i.next();
274 Data::FieldPtr field = coll->fieldByName(i.key());
275 if(field) {
276 field->setTitle(i.value());
277 coll->modifyField(field);
278 }
279 }
280
281 foreach(Data::EntryPtr entry, coll->entries()) {
282 FetchResult* r = new FetchResult(this, entry);
283 m_entries.insert(r->uid, entry);
284 emit signalResultFound(r);
285 }
286 }
287
288 m_hasMoreResults = false; // for now, no continued searches
289 stop();
290 }
291
configWidget(QWidget * parent_) const292 Tellico::Fetch::ConfigWidget* HathiTrustFetcher::configWidget(QWidget* parent_) const {
293 return new HathiTrustFetcher::ConfigWidget(parent_, this);
294 }
295
defaultName()296 QString HathiTrustFetcher::defaultName() {
297 return QStringLiteral("HathiTrust"); // no translation
298 }
299
defaultIcon()300 QString HathiTrustFetcher::defaultIcon() {
301 return favIcon("http://www.hathitrust.org");
302 }
303
allOptionalFields()304 Tellico::StringHash HathiTrustFetcher::allOptionalFields() {
305 // same ones as z3950fetcher
306 StringHash hash;
307 hash[QStringLiteral("address")] = i18n("Address");
308 hash[QStringLiteral("abstract")] = i18n("Abstract");
309 hash[QStringLiteral("illustrator")] = i18n("Illustrator");
310 hash[QStringLiteral("dewey")] = i18nc("Dewey Decimal classification system", "Dewey Decimal");
311 hash[QStringLiteral("lcc")] = i18nc("Library of Congress classification system", "LoC Classification");
312 return hash;
313 }
314
ConfigWidget(QWidget * parent_,const HathiTrustFetcher * fetcher_)315 HathiTrustFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const HathiTrustFetcher* fetcher_)
316 : Fetch::ConfigWidget(parent_) {
317 QVBoxLayout* l = new QVBoxLayout(optionsWidget());
318 l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
319 l->addStretch();
320
321 // now add additional fields widget
322 addFieldsWidget(HathiTrustFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
323 }
324
saveConfigHook(KConfigGroup &)325 void HathiTrustFetcher::ConfigWidget::saveConfigHook(KConfigGroup&) {
326 }
327
preferredName() const328 QString HathiTrustFetcher::ConfigWidget::preferredName() const {
329 return HathiTrustFetcher::defaultName();
330 }
331