1 /***************************************************************************
2     Copyright (C) 2003-2009 Robby Stephenson <robby@periapsis.org>
3  ***************************************************************************/
4 
5 /***************************************************************************
6  *                                                                         *
7  *   This program is free software; you can redistribute it and/or         *
8  *   modify it under the terms of the GNU General Public License as        *
9  *   published by the Free Software Foundation; either version 2 of        *
10  *   the License or (at your option) version 3 or any later version        *
11  *   accepted by the membership of KDE e.V. (or its successor approved     *
12  *   by the membership of KDE e.V.), which shall act as a proxy            *
13  *   defined in Section 14 of version 3 of the license.                    *
14  *                                                                         *
15  *   This program is distributed in the hope that it will be useful,       *
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
18  *   GNU General Public License for more details.                          *
19  *                                                                         *
20  *   You should have received a copy of the GNU General Public License     *
21  *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
22  *                                                                         *
23  ***************************************************************************/
24 
25 #include "bibtexcollection.h"
26 #include "../entrycomparison.h"
27 #include "../utils/bibtexhandler.h"
28 #include "../fieldformat.h"
29 #include "../tellico_debug.h"
30 
31 #include <KLocalizedString>
32 #include <KStringHandler>
33 
34 using namespace Tellico;
35 using Tellico::Data::BibtexCollection;
36 
37 namespace {
38   static const char* bibtex_general = I18N_NOOP("General");
39   static const char* bibtex_publishing = I18N_NOOP("Publishing");
40   static const char* bibtex_misc = I18N_NOOP("Miscellaneous");
41 }
42 
BibtexCollection(bool addDefaultFields_,const QString & title_)43 BibtexCollection::BibtexCollection(bool addDefaultFields_, const QString& title_)
44    : Collection(title_.isEmpty() ? i18n("Bibliography") : title_) {
45   setDefaultGroupField(QStringLiteral("author"));
46   if(addDefaultFields_) {
47     addFields(defaultFields());
48   }
49 
50   // Bibtex has some default macros for the months
51   addMacro(QStringLiteral("jan"), QString());
52   addMacro(QStringLiteral("feb"), QString());
53   addMacro(QStringLiteral("mar"), QString());
54   addMacro(QStringLiteral("apr"), QString());
55   addMacro(QStringLiteral("may"), QString());
56   addMacro(QStringLiteral("jun"), QString());
57   addMacro(QStringLiteral("jul"), QString());
58   addMacro(QStringLiteral("aug"), QString());
59   addMacro(QStringLiteral("sep"), QString());
60   addMacro(QStringLiteral("oct"), QString());
61   addMacro(QStringLiteral("nov"), QString());
62   addMacro(QStringLiteral("dec"), QString());
63 }
64 
defaultFields()65 Tellico::Data::FieldList BibtexCollection::defaultFields() {
66   FieldList list;
67   FieldPtr field;
68 
69   const QString bibtex = QStringLiteral("bibtex");
70 
71 /******************* General ****************************/
72 
73   field = Field::createDefaultField(Field::TitleField);
74   field->setProperty(bibtex, QStringLiteral("title"));
75   list.append(field);
76 
77   QStringList types;
78   types << QStringLiteral("article") << QStringLiteral("book")
79         << QStringLiteral("booklet") << QStringLiteral("inbook")
80         << QStringLiteral("incollection") << QStringLiteral("inproceedings")
81         << QStringLiteral("manual") << QStringLiteral("mastersthesis")
82         << QStringLiteral("misc") << QStringLiteral("phdthesis")
83         << QStringLiteral("proceedings") << QStringLiteral("techreport")
84         << QStringLiteral("unpublished") << QStringLiteral("periodical")
85         << QStringLiteral("conference");
86   field = new Field(QStringLiteral("entry-type"), i18n("Entry Type"), types);
87   field->setProperty(bibtex, QStringLiteral("entry-type"));
88   field->setCategory(i18n(bibtex_general));
89   field->setFlags(Field::AllowGrouped | Field::NoDelete);
90   field->setDescription(i18n("These entry types are specific to bibtex. See the bibtex documentation."));
91   list.append(field);
92 
93   field = new Field(QStringLiteral("author"), i18n("Author"));
94   field->setProperty(bibtex, QStringLiteral("author"));
95   field->setCategory(i18n(bibtex_general));
96   field->setFlags(Field::AllowCompletion | Field::AllowMultiple | Field::AllowGrouped);
97   field->setFormatType(FieldFormat::FormatName);
98   list.append(field);
99 
100   field = new Field(QStringLiteral("bibtex-key"), i18n("Bibtex Key"));
101   field->setProperty(bibtex, QStringLiteral("key"));
102   field->setCategory(i18n("General"));
103   field->setFlags(Field::NoDelete);
104   list.append(field);
105 
106   field = new Field(QStringLiteral("booktitle"), i18n("Book Title"));
107   field->setProperty(bibtex, QStringLiteral("booktitle"));
108   field->setCategory(i18n(bibtex_general));
109   field->setFormatType(FieldFormat::FormatTitle);
110   list.append(field);
111 
112   field = new Field(QStringLiteral("editor"), i18n("Editor"));
113   field->setProperty(bibtex, QStringLiteral("editor"));
114   field->setCategory(i18n(bibtex_general));
115   field->setFlags(Field::AllowCompletion | Field::AllowMultiple | Field::AllowGrouped);
116   field->setFormatType(FieldFormat::FormatName);
117   list.append(field);
118 
119   field = new Field(QStringLiteral("organization"), i18n("Organization"));
120   field->setProperty(bibtex, QStringLiteral("organization"));
121   field->setCategory(i18n(bibtex_general));
122   field->setFlags(Field::AllowCompletion | Field::AllowGrouped);
123   field->setFormatType(FieldFormat::FormatPlain);
124   list.append(field);
125 
126 //  field = new Field(QLatin1String("institution"), i18n("Institution"));
127 //  field->setProperty(QLatin1String("bibtex"), QLatin1String("institution"));
128 //  field->setCategory(i18n(bibtex_general));
129 //  field->setFlags(Field::AllowDelete);
130 //  field->setFormatType(FieldFormat::FormatTitle);
131 //  list.append(field);
132 
133 /******************* Publishing ****************************/
134   field = new Field(QStringLiteral("publisher"), i18n("Publisher"));
135   field->setProperty(bibtex, QStringLiteral("publisher"));
136   field->setCategory(i18n(bibtex_publishing));
137   field->setFlags(Field::AllowCompletion | Field::AllowGrouped);
138   field->setFormatType(FieldFormat::FormatPlain);
139   list.append(field);
140 
141   field = new Field(QStringLiteral("address"), i18n("Address"));
142   field->setProperty(bibtex, QStringLiteral("address"));
143   field->setCategory(i18n(bibtex_publishing));
144   field->setFlags(Field::AllowCompletion | Field::AllowGrouped);
145   list.append(field);
146 
147   field = new Field(QStringLiteral("edition"), i18n("Edition"));
148   field->setProperty(bibtex, QStringLiteral("edition"));
149   field->setCategory(i18n(bibtex_publishing));
150   field->setFlags(Field::AllowCompletion);
151   list.append(field);
152 
153   // don't make it a number, it could have latex processing commands in it
154   field = new Field(QStringLiteral("pages"), i18n("Pages"));
155   field->setProperty(bibtex, QStringLiteral("pages"));
156   field->setCategory(i18n(bibtex_publishing));
157   list.append(field);
158 
159   field = new Field(QStringLiteral("year"), i18n("Year"), Field::Number);
160   field->setProperty(bibtex, QStringLiteral("year"));
161   field->setCategory(i18n(bibtex_publishing));
162   field->setFlags(Field::AllowGrouped);
163   list.append(field);
164 
165   field = Field::createDefaultField(Field::IsbnField);
166   field->setProperty(bibtex, QStringLiteral("isbn"));
167   field->setCategory(i18n(bibtex_publishing));
168   list.append(field);
169 
170   field = new Field(QStringLiteral("journal"), i18n("Journal"));
171   field->setProperty(bibtex, QStringLiteral("journal"));
172   field->setCategory(i18n(bibtex_publishing));
173   field->setFlags(Field::AllowCompletion | Field::AllowGrouped);
174   field->setFormatType(FieldFormat::FormatPlain);
175   list.append(field);
176 
177   field = new Field(QStringLiteral("doi"), i18n("DOI"));
178   field->setProperty(bibtex, QStringLiteral("doi"));
179   field->setCategory(i18n(bibtex_publishing));
180   field->setDescription(i18n("Digital Object Identifier"));
181   list.append(field);
182 
183   // could make this a string list, but since bibtex import could have funky values
184   // keep it an editbox
185   field = new Field(QStringLiteral("month"), i18n("Month"));
186   field->setProperty(bibtex, QStringLiteral("month"));
187   field->setCategory(i18n(bibtex_publishing));
188   field->setFlags(Field::AllowCompletion);
189   list.append(field);
190 
191   field = new Field(QStringLiteral("number"), i18n("Number"), Field::Number);
192   field->setProperty(bibtex, QStringLiteral("number"));
193   field->setCategory(i18n(bibtex_publishing));
194   list.append(field);
195 
196   field = new Field(QStringLiteral("howpublished"), i18n("How Published"));
197   field->setProperty(bibtex, QStringLiteral("howpublished"));
198   field->setCategory(i18n(bibtex_publishing));
199   list.append(field);
200 
201 //  field = new Field(QLatin1String("school"), i18n("School"));
202 //  field->setProperty(QLatin1String("bibtex"), QLatin1String("school"));
203 //  field->setCategory(i18n(bibtex_publishing));
204 //  field->setFlags(Field::AllowCompletion | Field::AllowGrouped);
205 //  list.append(field);
206 
207 /******************* Classification ****************************/
208   field = new Field(QStringLiteral("chapter"), i18n("Chapter"), Field::Number);
209   field->setProperty(bibtex, QStringLiteral("chapter"));
210   field->setCategory(i18n(bibtex_misc));
211   list.append(field);
212 
213   field = new Field(QStringLiteral("series"), i18n("Series"));
214   field->setProperty(bibtex, QStringLiteral("series"));
215   field->setCategory(i18n(bibtex_misc));
216   field->setFlags(Field::AllowCompletion | Field::AllowGrouped);
217   field->setFormatType(FieldFormat::FormatTitle);
218   list.append(field);
219 
220   field = new Field(QStringLiteral("volume"), i18nc("A number field in a bibliography", "Volume"), Field::Number);
221   field->setProperty(bibtex, QStringLiteral("volume"));
222   field->setCategory(i18n(bibtex_misc));
223   list.append(field);
224 
225   field = new Field(QStringLiteral("crossref"), i18n("Cross-Reference"));
226   field->setProperty(bibtex, QStringLiteral("crossref"));
227   field->setCategory(i18n(bibtex_misc));
228   list.append(field);
229 
230 //  field = new Field(QLatin1String("annote"), i18n("Annotation"));
231 //  field->setProperty(QLatin1String("bibtex"), QLatin1String("annote"));
232 //  field->setCategory(i18n(bibtex_misc));
233 //  list.append(field);
234 
235   field = new Field(QStringLiteral("keyword"), i18n("Keywords"));
236   field->setProperty(bibtex, QStringLiteral("keywords"));
237   field->setCategory(i18n(bibtex_misc));
238   field->setFlags(Field::AllowCompletion | Field::AllowMultiple | Field::AllowGrouped);
239   list.append(field);
240 
241   field = new Field(QStringLiteral("url"), i18n("URL"), Field::URL);
242   field->setProperty(bibtex, QStringLiteral("url"));
243   field->setCategory(i18n(bibtex_misc));
244   list.append(field);
245 
246   field = new Field(QStringLiteral("abstract"), i18n("Abstract"), Field::Para);
247   field->setProperty(bibtex, QStringLiteral("abstract"));
248   list.append(field);
249 
250   field = new Field(QStringLiteral("note"), i18n("Notes"), Field::Para);
251   field->setProperty(bibtex, QStringLiteral("note"));
252   list.append(field);
253 
254   field = Field::createDefaultField(Field::IDField);
255   field->setCategory(i18n(bibtex_misc));
256   list.append(field);
257 
258   field = Field::createDefaultField(Field::CreatedDateField);
259   field->setCategory(i18n(bibtex_misc));
260   list.append(field);
261 
262   field = Field::createDefaultField(Field::ModifiedDateField);
263   field->setCategory(i18n(bibtex_misc));
264   list.append(field);
265 
266   return list;
267 }
268 
addField(Tellico::Data::FieldPtr field_)269 bool BibtexCollection::addField(Tellico::Data::FieldPtr field_) {
270   if(!field_) {
271     return false;
272   }
273   bool success = Collection::addField(field_);
274   if(success) {
275     const QString bibtex = field_->property(QStringLiteral("bibtex"));
276     if(!bibtex.isEmpty()) {
277       m_bibtexFieldDict.insert(bibtex, field_.data());
278     }
279   }
280   return success;
281 }
282 
modifyField(Tellico::Data::FieldPtr newField_)283 bool BibtexCollection::modifyField(Tellico::Data::FieldPtr newField_) {
284   if(!newField_) {
285     return false;
286   }
287 //  myDebug();
288   const QString bibtex = QStringLiteral("bibtex");
289   bool success = Collection::modifyField(newField_);
290   FieldPtr oldField = fieldByName(newField_->name());
291   const QString oldBibtex = oldField->property(bibtex);
292   const QString newBibtex = newField_->property(bibtex);
293   if(!oldBibtex.isEmpty()) {
294     success &= (m_bibtexFieldDict.remove(oldBibtex) != 0);
295   }
296   if(!newBibtex.isEmpty()) {
297     oldField->setProperty(bibtex, newBibtex);
298     m_bibtexFieldDict.insert(newBibtex, oldField.data());
299   }
300   return success;
301 }
302 
removeField(Tellico::Data::FieldPtr field_,bool force_)303 bool BibtexCollection::removeField(Tellico::Data::FieldPtr field_, bool force_) {
304   if(!field_) {
305     return false;
306   }
307 //  myDebug();
308   bool success = true;
309   const QString bibtex = field_->property(QStringLiteral("bibtex"));
310   if(!bibtex.isEmpty()) {
311     success &= (m_bibtexFieldDict.remove(bibtex) != 0);
312   }
313   return success && Collection::removeField(field_, force_);
314 }
315 
removeField(const QString & name_,bool force_)316 bool BibtexCollection::removeField(const QString& name_, bool force_) {
317   return removeField(fieldByName(name_), force_);
318 }
319 
fieldByBibtexName(const QString & bibtex_) const320 Tellico::Data::FieldPtr BibtexCollection::fieldByBibtexName(const QString& bibtex_) const {
321   return FieldPtr(m_bibtexFieldDict.contains(bibtex_) ? m_bibtexFieldDict.value(bibtex_) : nullptr);
322 }
323 
entryByBibtexKey(const QString & key_) const324 Tellico::Data::EntryPtr BibtexCollection::entryByBibtexKey(const QString& key_) const {
325   EntryPtr entry;
326   // we do assume unique keys
327   foreach(EntryPtr e, entries()) {
328     if(e->field(QStringLiteral("bibtex-key")) == key_) {
329       entry = e;
330       break;
331     }
332   }
333   return entry;
334 }
335 
prepareText(const QString & text_) const336 QString BibtexCollection::prepareText(const QString& text_) const {
337   QString text = text_;
338   BibtexHandler::cleanText(text);
339   return text;
340 }
341 
342 // same as BookCollection::sameEntry()
sameEntry(Tellico::Data::EntryPtr entry1_,Tellico::Data::EntryPtr entry2_) const343 int BibtexCollection::sameEntry(Tellico::Data::EntryPtr entry1_, Tellico::Data::EntryPtr entry2_) const {
344   // equal identifiers are easy, give it a weight of 100
345   if(EntryComparison::score(entry1_, entry2_, QStringLiteral("isbn"),  this) > 0 ||
346      EntryComparison::score(entry1_, entry2_, QStringLiteral("lccn"),  this) > 0 ||
347      EntryComparison::score(entry1_, entry2_, QStringLiteral("doi"),   this) > 0 ||
348      EntryComparison::score(entry1_, entry2_, QStringLiteral("pmid"),  this) > 0 ||
349      EntryComparison::score(entry1_, entry2_, QStringLiteral("arxiv"), this) > 0) {
350     return 100; // good match
351   }
352   int res = 3*EntryComparison::score(entry1_, entry2_, QStringLiteral("title"), this);
353 //  if(res == 0) {
354 //    myDebug() << "different titles for " << entry1_->title() << " vs. "
355 //              << entry2_->title();
356 //  }
357   res += EntryComparison::score(entry1_, entry2_, QStringLiteral("author"),   this);
358   res += EntryComparison::score(entry1_, entry2_, QStringLiteral("cr_year"),  this);
359   res += EntryComparison::score(entry1_, entry2_, QStringLiteral("pub_year"), this);
360   res += EntryComparison::score(entry1_, entry2_, QStringLiteral("binding"),  this);
361   return res;
362 }
363 
364 // static
convertBookCollection(Tellico::Data::CollPtr coll_)365 Tellico::Data::CollPtr BibtexCollection::convertBookCollection(Tellico::Data::CollPtr coll_) {
366   const QString bibtex = QStringLiteral("bibtex");
367   BibtexCollection* coll = new BibtexCollection(false, coll_->title());
368   CollPtr collPtr(coll);
369   FieldList fields = coll_->fields();
370   foreach(FieldPtr fIt, fields) {
371     FieldPtr field(new Field(*fIt));
372 
373     // if it already has a bibtex property, skip it
374     if(!field->property(bibtex).isEmpty()) {
375       coll->addField(field);
376       continue;
377     }
378 
379     // be sure to set bibtex property before adding it though
380     QString name = field->name();
381     // this first group has bibtex field names the same as their own field name
382     if(name == QLatin1String("title")
383        || name == QLatin1String("author")
384        || name == QLatin1String("editor")
385        || name == QLatin1String("edition")
386        || name == QLatin1String("publisher")
387        || name == QLatin1String("isbn")
388        || name == QLatin1String("lccn")
389        || name == QLatin1String("url")
390        || name == QLatin1String("language")
391        || name == QLatin1String("pages")
392        || name == QLatin1String("series")) {
393       field->setProperty(bibtex, name);
394     } else if(name == QLatin1String("series_num")) {
395       field->setProperty(bibtex, QStringLiteral("number"));
396     } else if(name == QLatin1String("pur_price")) {
397       field->setProperty(bibtex, QStringLiteral("price"));
398     } else if(name == QLatin1String("cr_year")) {
399       field->setProperty(bibtex, QStringLiteral("year"));
400     } else if(name == QLatin1String("bibtex-id")) {
401       field->setProperty(bibtex, QStringLiteral("key"));
402     } else if(name == QLatin1String("keyword")) {
403       field->setProperty(bibtex, QStringLiteral("keywords"));
404     } else if(name == QLatin1String("comments")) {
405       field->setProperty(bibtex, QStringLiteral("note"));
406     }
407     coll->addField(field);
408   }
409 
410   // also need to add required fields, those with NoDelete set
411   foreach(FieldPtr defaultField, coll->defaultFields()) {
412     if(!coll->hasField(defaultField->name()) && defaultField->hasFlag(Field::NoDelete)) {
413       // but don't add a Bibtex Key if there's already a bibtex-id
414       if(defaultField->property(bibtex) != QLatin1String("key")
415          || !coll->hasField(QStringLiteral("bibtex-id"))) {
416         coll->addField(defaultField);
417       }
418     }
419   }
420 
421   // set the entry-type to book
422   FieldPtr field = coll->fieldByBibtexName(QStringLiteral("entry-type"));
423   QString entryTypeName;
424   if(field) {
425     entryTypeName = field->name();
426   } else {
427     myWarning() << "there must be an entry type field";
428   }
429 
430   EntryList newEntries;
431   foreach(EntryPtr entry, coll_->entries()) {
432     EntryPtr newEntry(new Entry(*entry));
433     newEntry->setCollection(collPtr);
434     if(!entryTypeName.isEmpty()) {
435       newEntry->setField(entryTypeName, QStringLiteral("book"));
436     }
437     newEntries.append(newEntry);
438   }
439   coll->addEntries(newEntries);
440 
441   return collPtr;
442 }
443 
setFieldValue(Data::EntryPtr entry_,const QString & bibtexField_,const QString & value_,Data::CollPtr existingColl_)444 bool BibtexCollection::setFieldValue(Data::EntryPtr entry_, const QString& bibtexField_, const QString& value_, Data::CollPtr existingColl_) {
445   Q_ASSERT(entry_->collection()->type() == Collection::Bibtex);
446   BibtexCollection* c = static_cast<BibtexCollection*>(entry_->collection().data());
447   FieldPtr field = c->fieldByBibtexName(bibtexField_);
448   // special-case: "keyword" and "keywords" should be the same field.
449   if(!field && bibtexField_ == QLatin1String("keyword")) {
450     field = c->fieldByBibtexName(QStringLiteral("keywords"));
451   }
452   if(!field) {
453     // it was the case that the default bibliography did not have a bibtex property for keywords
454     // so a "keywords" field would get created in the imported collection
455     // but the existing collection had a field "keyword" so the values would not get imported
456     // here, check to see if the current collection has a field with the same bibtex name and
457     // use it instead of creating a new one
458     BibtexCollection* existingColl = dynamic_cast<BibtexCollection*>(existingColl_.data());
459     FieldPtr existingField;
460     if(existingColl && existingColl->type() == Collection::Bibtex) {
461       existingField = existingColl->fieldByBibtexName(bibtexField_);
462     }
463     if(existingField) {
464       field = new Field(*existingField);
465     } else if(value_.length() < 100) {
466       // arbitrarily say if the value has more than 100 chars, then it's a paragraph
467       QString vlower = value_.toLower();
468       // special case, try to detect URLs
469       if(bibtexField_ == QLatin1String("url")
470          || vlower.startsWith(QLatin1String("http")) // may also be https
471          || vlower.startsWith(QLatin1String("ftp:/"))
472          || vlower.startsWith(QLatin1String("file:/"))
473          || vlower.startsWith(QLatin1String("/"))) { // assume this indicates a local path
474         myDebug() << "creating a URL field for" << bibtexField_;
475         field = new Field(bibtexField_, KStringHandler::capwords(bibtexField_), Field::URL);
476       } else {
477         myDebug() << "creating a LINE field for" << bibtexField_;
478         field = new Field(bibtexField_, KStringHandler::capwords(bibtexField_), Field::Line);
479       }
480       field->setCategory(i18n("Unknown"));
481     } else {
482       myDebug() << "creating a PARA field for" << bibtexField_;
483       field = new Field(bibtexField_, KStringHandler::capwords(bibtexField_), Field::Para);
484     }
485     field->setProperty(QStringLiteral("bibtex"), bibtexField_);
486     c->addField(field);
487   }
488   // special case keywords, replace commas with semi-colons so they get separated
489   QString value = value_;
490   Q_ASSERT(field);
491   if(bibtexField_.startsWith(QLatin1String("keyword"))) {
492     value.replace(QRegularExpression(QLatin1String("\\s*,\\s*")), FieldFormat::delimiterString());
493     // special case refbase bibtex export, with multiple keywords fields
494     QString oValue = entry_->field(field);
495     if(!oValue.isEmpty()) {
496       value = oValue + FieldFormat::delimiterString() + value;
497     }
498   // special case for tilde, since it's a non-breaking space in LateX
499   // replace it EXCEPT for URL or DOI fields
500   } else if(bibtexField_ != QLatin1String("doi") && field->type() != Field::URL) {
501     value.replace(QLatin1Char('~'), QChar(0xA0));
502   } else if(field->type() == Field::URL || bibtexField_ == QLatin1String("url")) {
503     // special case for url package
504     if(value.startsWith(QLatin1String("\\url{")) && value.endsWith(QLatin1Char('}'))) {
505       value.remove(0, 5).chop(1);
506     }
507   }
508   return entry_->setField(field, value);
509 }
510 
duplicateBibtexKeys() const511 Tellico::Data::EntryList BibtexCollection::duplicateBibtexKeys() const {
512   QSet<EntryPtr> dupes;
513   QHash<QString, EntryPtr> keyHash;
514 
515   const QString keyField = QStringLiteral("bibtex-key");
516   QString keyValue;
517   foreach(EntryPtr entry, entries()) {
518     keyValue = entry->field(keyField);
519     if(keyHash.contains(keyValue)) {
520       dupes << keyHash.value(keyValue) << entry;
521      } else {
522        keyHash.insert(keyValue, entry);
523      }
524   }
525   return dupes.values();
526 }
527