1 /***************************************************************************
2     Copyright (C) 2012 Robby Stephenson <robby@periapsis.org>
3  ***************************************************************************/
4 
5 /***************************************************************************
6  *                                                                         *
7  *   This program is free software; you can redistribute it and/or         *
8  *   modify it under the terms of the GNU General Public License as        *
9  *   published by the Free Software Foundation; either version 2 of        *
10  *   the License or (at your option) version 3 or any later version        *
11  *   accepted by the membership of KDE e.V. (or its successor approved     *
12  *   by the membership of KDE e.V.), which shall act as a proxy            *
13  *   defined in Section 14 of version 3 of the license.                    *
14  *                                                                         *
15  *   This program is distributed in the hope that it will be useful,       *
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
18  *   GNU General Public License for more details.                          *
19  *                                                                         *
20  *   You should have received a copy of the GNU General Public License     *
21  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
22  *                                                                         *
23  ***************************************************************************/
24 
25 #include "hathitrustfetcher.h"
26 #include "../translators/xslthandler.h"
27 #include "../translators/tellicoimporter.h"
28 #include "../utils/isbnvalidator.h"
29 #include "../utils/lccnvalidator.h"
30 #include "../utils/guiproxy.h"
31 #include "../utils/string_utils.h"
32 #include "../utils/datafileregistry.h"
33 #include "../tellico_debug.h"
34 
35 #include <KLocalizedString>
36 #include <KIO/Job>
37 #include <KIO/JobUiDelegate>
38 #include <KJobWidgets>
39 
40 #include <QLabel>
41 #include <QFile>
42 #include <QTextStream>
43 #include <QGridLayout>
44 #include <QTextCodec>
45 #include <QJsonDocument>
46 #include <QJsonObject>
47 #include <QDomDocument>
48 
49 namespace {
50   static const char* HATHITRUST_QUERY_URL = "http://catalog.hathitrust.org/api/volumes/full/json/";
51 }
52 
53 using namespace Tellico;
54 using Tellico::Fetch::HathiTrustFetcher;
55 
HathiTrustFetcher(QObject * parent_)56 HathiTrustFetcher::HathiTrustFetcher(QObject* parent_)
57     : Fetcher(parent_), m_started(false), m_MARC21XMLHandler(nullptr), m_MODSHandler(nullptr) {
58 }
59 
~HathiTrustFetcher()60 HathiTrustFetcher::~HathiTrustFetcher() {
61   delete m_MARC21XMLHandler;
62   m_MARC21XMLHandler = nullptr;
63   delete m_MODSHandler;
64   m_MODSHandler = nullptr;
65 }
66 
source() const67 QString HathiTrustFetcher::source() const {
68   return m_name.isEmpty() ? defaultName() : m_name;
69 }
70 
canSearch(Fetch::FetchKey k) const71 bool HathiTrustFetcher::canSearch(Fetch::FetchKey k) const {
72   return k == ISBN || k == LCCN;
73 }
74 
canFetch(int type) const75 bool HathiTrustFetcher::canFetch(int type) const {
76   return type == Data::Collection::Book || type == Data::Collection::Bibtex;
77 }
78 
readConfigHook(const KConfigGroup &)79 void HathiTrustFetcher::readConfigHook(const KConfigGroup&) {
80 }
81 
search()82 void HathiTrustFetcher::search() {
83   m_started = true;
84   doSearch();
85 }
86 
doSearch()87 void HathiTrustFetcher::doSearch() {
88   if(request().key() != ISBN && request().key() != LCCN) {
89     stop();
90     return;
91   }
92 
93   QUrl u(QString::fromLatin1(HATHITRUST_QUERY_URL));
94 
95   QStringList searchValues;
96   // we split ISBN and LCCN values, which are the only ones we accept
97   const QStringList searchTerms = FieldFormat::splitValue(request().value());
98   foreach(const QString& searchTerm, searchTerms) {
99     if(request().key() == ISBN) {
100       searchValues += QStringLiteral("isbn:%1").arg(ISBNValidator::cleanValue(searchTerm));
101     } else {
102       searchValues += QStringLiteral("lccn:%1").arg(LCCNValidator::formalize(searchTerm));
103     }
104   }
105   u.setPath(u.path() + searchValues.join(QLatin1String("|")));
106 
107 //  myDebug() << u;
108 
109   m_job = KIO::storedGet(u, KIO::NoReload, KIO::HideProgressInfo);
110   KJobWidgets::setWindow(m_job, GUI::Proxy::widget());
111   connect(m_job.data(), &KJob::result, this, &HathiTrustFetcher::slotComplete);
112 }
113 
stop()114 void HathiTrustFetcher::stop() {
115   if(!m_started) {
116     return;
117   }
118   if(m_job) {
119     m_job->kill();
120   }
121   m_started = false;
122   emit signalDone(this);
123 }
124 
initMARC21Handler()125 bool HathiTrustFetcher::initMARC21Handler() {
126   if(m_MARC21XMLHandler) {
127     return true;
128   }
129 
130   QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("MARC21slim2MODS3.xsl"));
131   if(xsltfile.isEmpty()) {
132     myWarning() << "can not locate MARC21slim2MODS3.xsl.";
133     return false;
134   }
135 
136   QUrl u = QUrl::fromLocalFile(xsltfile);
137 
138   m_MARC21XMLHandler = new XSLTHandler(u);
139   if(!m_MARC21XMLHandler->isValid()) {
140     myWarning() << "error in MARC21slim2MODS3.xsl.";
141     delete m_MARC21XMLHandler;
142     m_MARC21XMLHandler = nullptr;
143     return false;
144   }
145   return true;
146 }
147 
initMODSHandler()148 bool HathiTrustFetcher::initMODSHandler() {
149   if(m_MODSHandler) {
150     return true;
151   }
152 
153   QString xsltfile = DataFileRegistry::self()->locate(QStringLiteral("mods2tellico.xsl"));
154   if(xsltfile.isEmpty()) {
155     myWarning() << "can not locate mods2tellico.xsl.";
156     return false;
157   }
158 
159   QUrl u = QUrl::fromLocalFile(xsltfile);
160 
161   m_MODSHandler = new XSLTHandler(u);
162   if(!m_MODSHandler->isValid()) {
163     myWarning() << "error in mods2tellico.xsl.";
164     delete m_MODSHandler;
165     m_MODSHandler = nullptr;
166     // no use in keeping the MARC handlers now
167     delete m_MARC21XMLHandler;
168     m_MARC21XMLHandler = nullptr;
169     return false;
170   }
171   return true;
172 }
173 
fetchEntryHook(uint uid_)174 Tellico::Data::EntryPtr HathiTrustFetcher::fetchEntryHook(uint uid_) {
175   return m_entries.value(uid_);
176 }
177 
updateRequest(Data::EntryPtr entry_)178 Tellico::Fetch::FetchRequest HathiTrustFetcher::updateRequest(Data::EntryPtr entry_) {
179   const QString isbn = entry_->field(QStringLiteral("isbn"));
180   if(!isbn.isEmpty()) {
181     return FetchRequest(ISBN, isbn);
182   }
183   const QString lccn = entry_->field(QStringLiteral("lccn"));
184   if(!lccn.isEmpty()) {
185     return FetchRequest(LCCN, lccn);
186   }
187   return FetchRequest();
188 }
189 
slotComplete(KJob * job_)190 void HathiTrustFetcher::slotComplete(KJob* job_) {
191   KIO::StoredTransferJob* job = static_cast<KIO::StoredTransferJob*>(job_);
192 
193   if(!initMARC21Handler() || !initMODSHandler()) {
194     // debug messages are taken care of in the specific methods
195     stop();
196     return;
197   }
198 
199   if(job->error()) {
200     job->uiDelegate()->showErrorMessage();
201     stop();
202     return;
203   }
204 
205   QByteArray data = job->data();
206   if(data.isEmpty()) {
207     myDebug() << "no data";
208     stop();
209     return;
210   }
211   // see bug 319662. If fetcher is cancelled, job is killed
212   // if the pointer is retained, it gets double-deleted
213   m_job = nullptr;
214 
215 #if 0
216   myWarning() << "Remove debug from hathitrustfetcher.cpp";
217   QFile f(QString::fromLatin1("/tmp/test.json"));
218   if(f.open(QIODevice::WriteOnly)) {
219     QTextStream t(&f);
220     t.setCodec("UTF-8");
221     t << data;
222   }
223   f.close();
224 #endif
225 
226   QJsonDocument doc = QJsonDocument::fromJson(data);
227   QVariantMap resultMap = doc.object().toVariantMap();
228   if(resultMap.isEmpty()) {
229     myDebug() << "no results";
230     stop();
231     return;
232   }
233 
234   QVariantMap::const_iterator i = resultMap.constBegin();
235   for( ; i != resultMap.constEnd(); ++i) {
236     const QVariantMap recordMap = i.value().toMap().value(QStringLiteral("records")).toMap();
237     if(recordMap.isEmpty()) {
238       myDebug() << "empty result map";
239       continue;
240     }
241     // we know there's a record, so no need to check for existence of first iterator in map
242     QVariantMap::const_iterator ri = recordMap.constBegin();
243     if(ri == recordMap.constEnd()) {
244       myWarning() << "no iterator in record";
245       continue;
246     }
247     QString marcxml = ri.value().toMap().value(QStringLiteral("marc-xml")).toString();
248     // HathiTrust doesn't always include the XML NS in the JSON results. Assume it's always
249     // MARC XML and check that
250     QDomDocument dom;
251     if(dom.setContent(marcxml, true /* namespace processing */) && dom.documentElement().namespaceURI().isEmpty()) {
252       const QString rootName = dom.documentElement().tagName();
253       myDebug() << "no namespace, attempting to set on" << rootName << "element";
254       QRegularExpression rootRx(QLatin1Char('<') + rootName + QLatin1Char('>'));
255       QString newRoot = QLatin1Char('<') + rootName + QLatin1String(" xmlns=\"http://www.loc.gov/MARC21/slim\">");
256       marcxml.replace(rootRx, newRoot);
257     }
258     const QString modsxml = m_MARC21XMLHandler->applyStylesheet(marcxml);
259 
260     Import::TellicoImporter imp(m_MODSHandler->applyStylesheet(modsxml));
261     imp.setOptions(imp.options() ^ Import::ImportProgress); // no progress needed
262     Data::CollPtr coll = imp.collection();
263     if(!coll) {
264       myWarning() << "no coll pointer";
265       continue;
266     }
267 
268     // since the Dewey and LoC field titles have a context in their i18n call here
269     // but not in the mods2tellico.xsl stylesheet where the field is actually created
270     // update the field titles here
271     QHashIterator<QString, QString> i(allOptionalFields());
272     while(i.hasNext()) {
273       i.next();
274       Data::FieldPtr field = coll->fieldByName(i.key());
275       if(field) {
276         field->setTitle(i.value());
277         coll->modifyField(field);
278       }
279     }
280 
281     foreach(Data::EntryPtr entry, coll->entries()) {
282       FetchResult* r = new FetchResult(this, entry);
283       m_entries.insert(r->uid, entry);
284       emit signalResultFound(r);
285     }
286   }
287 
288   m_hasMoreResults = false; // for now, no continued searches
289   stop();
290 }
291 
configWidget(QWidget * parent_) const292 Tellico::Fetch::ConfigWidget* HathiTrustFetcher::configWidget(QWidget* parent_) const {
293   return new HathiTrustFetcher::ConfigWidget(parent_, this);
294 }
295 
defaultName()296 QString HathiTrustFetcher::defaultName() {
297   return QStringLiteral("HathiTrust"); // no translation
298 }
299 
defaultIcon()300 QString HathiTrustFetcher::defaultIcon() {
301   return favIcon("http://www.hathitrust.org");
302 }
303 
allOptionalFields()304 Tellico::StringHash HathiTrustFetcher::allOptionalFields() {
305   // same ones as z3950fetcher
306   StringHash hash;
307   hash[QStringLiteral("address")]  = i18n("Address");
308   hash[QStringLiteral("abstract")] = i18n("Abstract");
309   hash[QStringLiteral("illustrator")] = i18n("Illustrator");
310   hash[QStringLiteral("dewey")] = i18nc("Dewey Decimal classification system", "Dewey Decimal");
311   hash[QStringLiteral("lcc")] = i18nc("Library of Congress classification system", "LoC Classification");
312   return hash;
313 }
314 
ConfigWidget(QWidget * parent_,const HathiTrustFetcher * fetcher_)315 HathiTrustFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const HathiTrustFetcher* fetcher_)
316     : Fetch::ConfigWidget(parent_) {
317   QVBoxLayout* l = new QVBoxLayout(optionsWidget());
318   l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
319   l->addStretch();
320 
321   // now add additional fields widget
322   addFieldsWidget(HathiTrustFetcher::allOptionalFields(), fetcher_ ? fetcher_->optionalFields() : QStringList());
323 }
324 
saveConfigHook(KConfigGroup &)325 void HathiTrustFetcher::ConfigWidget::saveConfigHook(KConfigGroup&) {
326 }
327 
preferredName() const328 QString HathiTrustFetcher::ConfigWidget::preferredName() const {
329   return HathiTrustFetcher::defaultName();
330 }
331