1 /*
2 * BibTeX Converter
3 * Copyright (C) 2010-2021 by Thomas Dreibholz
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * Contact: dreibh@iem.uni-due.de
19 */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <math.h>
25
26 #include <algorithm>
27 #include <string>
28 #include <set>
29 #include <vector>
30
31 #include "publicationset.h"
32
33
34 // ###### Constructor #######################################################
PublicationSet(const size_t maxSize)35 PublicationSet::PublicationSet(const size_t maxSize)
36 {
37 maxEntries = maxSize;
38 publicationArray = new Node*[maxEntries];
39 assert(publicationArray != NULL);
40 clearAll();
41 }
42
43
44 // ###### Destructor ########################################################
~PublicationSet()45 PublicationSet::~PublicationSet()
46 {
47 delete [] publicationArray;
48 maxEntries = 0;
49 entries = 0;
50 }
51
52
53 // ###### Clear complete set ################################################
clearAll()54 void PublicationSet::clearAll()
55 {
56 entries = 0;
57 for(size_t i = 0;i < maxEntries; i++) {
58 publicationArray[i] = NULL;
59 }
60 }
61
62
63 // ###### Add a single node #################################################
add(Node * publication)64 bool PublicationSet::add(Node* publication)
65 {
66 assert(entries + 1 <= maxEntries);
67 for(size_t i = 0; i < entries; i++) {
68 if(publicationArray[entries] == publication) {
69 return(false);
70 }
71 }
72 publicationArray[entries] = publication;
73 entries++;
74 return(true);
75 }
76
77
78 // ###### Add all nodes from collection #####################################
addAll(Node * publication)79 void PublicationSet::addAll(Node* publication)
80 {
81 while(publication != NULL) {
82 if(add(publication)) {
83 publication->anchor = publication->keyword;
84 }
85 publication = publication->next;
86 }
87 }
88
89
90 // NOTE: PublicationSet::sort() will *NOT* be thread-safe!
91 static const std::string* gSortKey = NULL;
92 static const bool* gSortAscending = NULL;
93 static size_t gMaxSortLevels = 0;
94
95 // ###### Node comparison function for qsort() ##############################
publicationNodeComparisonFunction(const void * ptr1,const void * ptr2)96 static int publicationNodeComparisonFunction(const void* ptr1, const void* ptr2)
97 {
98 const Node* node1 = *((const Node**)ptr1);
99 const Node* node2 = *((const Node**)ptr2);
100
101 for(size_t i = 0; i < gMaxSortLevels; i++) {
102 const Node* child1 = findChildNode((Node*)node1, gSortKey[i].c_str());
103 const Node* child2 = findChildNode((Node*)node2, gSortKey[i].c_str());
104 int result = 0;
105 if( (child1 == NULL) && (child2 != NULL) ) {
106 result = 1;
107 }
108 if( (child1 != NULL) && (child2 == NULL) ) {
109 result = -1;
110 }
111 else if( (child1 != NULL) && (child2 != NULL) ) {
112 if(child1->value < child2->value) {
113 result = -1;
114 }
115 else if(child1->value > child2->value) {
116 result = 1;
117 }
118 }
119
120 if(!gSortAscending[i]) {
121 result *= -1;
122 }
123
124 if(result != 0) {
125 return(result);
126 }
127 }
128 return(0);
129 }
130
131 // ###### Sort publications #################################################
sort(const std::string * sortKey,const bool * sortAscending,const size_t maxSortLevels)132 void PublicationSet::sort(const std::string* sortKey,
133 const bool* sortAscending,
134 const size_t maxSortLevels)
135 {
136 gMaxSortLevels = maxSortLevels;
137 gSortKey = sortKey;
138 gSortAscending = sortAscending;
139
140 qsort(publicationArray, entries, sizeof(Node*), publicationNodeComparisonFunction);
141
142 gSortKey = NULL;
143 gSortAscending = NULL;
144 }
145
146
147
148 // ###### Generate name for file download ###################################
makeDownloadFileName(const char * downloadDirectory,const std::string & anchor,const std::string & mimeString)149 std::string PublicationSet::makeDownloadFileName(const char* downloadDirectory,
150 const std::string& anchor,
151 const std::string& mimeString)
152 {
153 std::string extension = "data";
154 if(mimeString == "application/pdf") {
155 extension = ".pdf";
156 }
157 else if(mimeString == "application/xml") {
158 extension = ".xml";
159 }
160 else if(mimeString == "text/html") {
161 extension = ".html";
162 }
163 else if(mimeString == "text/plain") {
164 extension = ".txt";
165 }
166
167 if( (downloadDirectory != NULL) && (strlen(downloadDirectory) != 0) ) {
168 return((std::string)downloadDirectory + "/" + anchor + extension);
169 }
170 return(anchor + extension);
171 }
172
173
174 // ###### Export to BibTeX ##################################################
exportPublicationSetToBibTeX(PublicationSet * publicationSet,const char * fileNamePrefix,const bool separateFiles,const bool skipNotesWithISBNandISSN,const bool addNotesWithISBNandISSN,const bool addUrlCommand)175 bool PublicationSet::exportPublicationSetToBibTeX(PublicationSet* publicationSet,
176 const char* fileNamePrefix,
177 const bool separateFiles,
178 const bool skipNotesWithISBNandISSN,
179 const bool addNotesWithISBNandISSN,
180 const bool addUrlCommand)
181 {
182 FILE* fh = NULL;
183 if(!separateFiles) {
184 fh = fopen(fileNamePrefix, "w");
185 if(fh == NULL) {
186 fprintf(stderr, "ERROR: Unable to create BibTeX file %s!\n", fileNamePrefix);
187 return(false);
188 }
189 }
190
191 for(size_t index = 0; index < publicationSet->size(); index++) {
192 const Node* publication = publicationSet->get(index);
193 if(publication->value == "Comment") {
194 if(fh != NULL) {
195 fprintf(fh, "%%%s\n\n", publication->keyword.c_str());
196 }
197 }
198 else {
199 if(separateFiles) {
200 char fileName[1024];
201 snprintf((char*)&fileName, sizeof(fileName), "%s%s.bib", fileNamePrefix, publication->keyword.c_str());
202 fh = fopen(fileName, "w");
203 if(fh == NULL) {
204 fprintf(stderr, "ERROR: Unable to create XML file %s!\n", fileName);
205 return(false);
206 }
207 }
208
209 fprintf(fh, "@%s{ %s,\n", publication->value.c_str(),
210 publication->keyword.c_str());
211
212 bool empty = true;
213 Node* child = publication->child;
214 const Node* issn = NULL;
215 const Node* isbn = NULL;
216 const char* separator = "";
217 while(child != NULL) {
218 if(!empty) {
219 separator = ",\n";
220 }
221 empty = false;
222
223 if( (child->keyword == "title") ||
224 (child->keyword == "booktitle") ||
225 (child->keyword == "series") ||
226 (child->keyword == "journal") ||
227 (child->keyword == "abstract") ) {
228 fprintf(fh, "%s\t%s = \"{%s}\"", separator, child->keyword.c_str(), child->value.c_str());
229 }
230 else if( (child->keyword == "day") ||
231 (child->keyword == "year") ) {
232 fprintf(fh, "%s\t%s = \"%u\"", separator, child->keyword.c_str(), child->number);
233 }
234 else if( (child->keyword == "month") ) {
235 static const char* bibtexMonthNames[12] = {"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"};
236 if((child->number >= 1) && (child->number <= 12)) {
237 fprintf(fh, "%s\t%s = %s", separator, child->keyword.c_str(), bibtexMonthNames[child->number - 1]);
238 }
239 }
240 else if( (child->keyword == "url") ) {
241 if(addUrlCommand) {
242 fprintf(fh, "%s\t%s = \"\\url{%s}\"", separator, child->keyword.c_str(), urlToLaTeX(child->value).c_str());
243 }
244 else {
245 fprintf(fh, "%s\t%s = \"%s\"", separator, child->keyword.c_str(), urlToLaTeX(child->value).c_str());
246 }
247 }
248 else if( (child->keyword == "doi") ) {
249 fprintf(fh, "%s\t%s = \"%s\"", separator, child->keyword.c_str(), urlToLaTeX(child->value).c_str());
250 }
251 else if( (child->keyword == "note") ) {
252 if( (skipNotesWithISBNandISSN == false) ||
253 ((strncmp(child->value.c_str(), "ISBN", 4) != 0) &&
254 (strncmp(child->value.c_str(), "ISSN", 4) != 0) &&
255 (strncmp(child->value.c_str(), "{ISBN}", 6) != 0) &&
256 (strncmp(child->value.c_str(), "{ISSN}", 6) != 0)) ) {
257 fprintf(fh, "%s\t%s = \"%s\"", separator, child->keyword.c_str(), child->value.c_str());
258 }
259 }
260 else if( (child->keyword == "removeme") ) {
261 // Skip this entry. Useful for combining BibTeXConv with "sed" filtering.
262 }
263 else {
264 if(child->keyword == "isbn") {
265 isbn = child;
266 }
267 else if(child->keyword == "issn") {
268 issn = child;
269 }
270 fprintf(fh, "%s\t%s = \"%s\"", separator, child->keyword.c_str(), child->value.c_str());
271 }
272 child = child->next;
273 }
274
275 if( (addNotesWithISBNandISSN) &&
276 ((isbn != NULL) || (issn != NULL)) ) {
277 if(isbn) {
278 fprintf(fh, "%s\tnote = \"{ISBN} %s\"", separator, isbn->value.c_str());
279 }
280 else if(issn) {
281 fprintf(fh, "%s\tnote = \"{ISSN} %s\"", separator, issn->value.c_str());
282 }
283 }
284
285 fputs("\n}\n\n", fh);
286 }
287
288 if( (separateFiles) && (fh != NULL)) {
289 fclose(fh);
290 fh = NULL;
291 }
292 }
293
294 if(!separateFiles) {
295 fclose(fh);
296 }
297 return(true);
298 }
299
300
301 // ###### Export to XML #####################################################
exportPublicationSetToXML(PublicationSet * publicationSet,const char * fileNamePrefix,const bool separateFiles)302 bool PublicationSet::exportPublicationSetToXML(PublicationSet* publicationSet,
303 const char* fileNamePrefix,
304 const bool separateFiles)
305 {
306 FILE* fh = NULL;
307 if(!separateFiles) {
308 fh = fopen(fileNamePrefix, "w");
309 if(fh == NULL) {
310 fprintf(stderr, "ERROR: Unable to create XML file %s!\n", fileNamePrefix);
311 return(false);
312 }
313 fputs("<?xml version='1.0' encoding='UTF-8'?>\n", fh);
314 fputs("<!DOCTYPE rfc PUBLIC '-//IETF//DTD RFC 2629//EN' 'http://xml.resource.org/authoring/rfc2629.dtd'>\n", fh);
315 }
316
317 for(size_t index = 0; index < publicationSet->size(); index++) {
318 Node* publication = publicationSet->get(index);
319
320 if(publication->value == "Comment") {
321 if(fh != NULL) {
322 fprintf(fh, "<!-- %s -->\n\n", publication->keyword.c_str());
323 }
324 }
325 else {
326 if(separateFiles) {
327 char fileName[1024];
328 snprintf((char*)&fileName, sizeof(fileName), "%s%s.xml", fileNamePrefix, publication->keyword.c_str());
329 fh = fopen(fileName, "w");
330 if(fh == NULL) {
331 fprintf(stderr, "ERROR: Unable to create XML file %s!\n", fileName);
332 return(false);
333 }
334 fputs("<?xml version='1.0' encoding='UTF-8'?>\n", fh);
335 fputs("<!DOCTYPE rfc PUBLIC '-//IETF//DTD RFC 2629//EN' 'http://xml.resource.org/authoring/rfc2629.dtd'>\n", fh);
336 }
337
338 const Node* title = findChildNode(publication, "title");
339 const Node* author = findChildNode(publication, "author");
340 const Node* year = findChildNode(publication, "year");
341 const Node* month = findChildNode(publication, "month");
342 const Node* day = findChildNode(publication, "day");
343 const Node* url = findChildNode(publication, "url");
344 const Node* urlMime = findChildNode(publication, "url.mime");
345 const Node* urlSize = findChildNode(publication, "url.size");
346 const Node* type = findChildNode(publication, "type");
347 const Node* howpublished = findChildNode(publication, "howpublished");
348 const Node* booktitle = findChildNode(publication, "booktitle");
349 const Node* journal = findChildNode(publication, "journal");
350 const Node* volume = findChildNode(publication, "volume");
351 const Node* number = findChildNode(publication, "number");
352 const Node* pages = findChildNode(publication, "pages");
353 const Node* isbn = findChildNode(publication, "isbn");
354 const Node* issn = findChildNode(publication, "issn");
355 const Node* doi = findChildNode(publication, "doi");
356
357 if(url == NULL) {
358 fprintf(fh, "<reference anchor=\"%s\">\n",
359 labelToXMLLabel(publication->keyword).c_str());
360 }
361 else {
362 fprintf(fh, "<reference anchor=\"%s\" target=\"%s\">\n",
363 labelToXMLLabel(publication->keyword).c_str(),
364 url->value.c_str());
365 }
366 fputs("\t<front>\n", fh);
367 if(title) {
368 fprintf(fh, "\t\t<title>%s</title>\n", string2xml(title->value).c_str());
369 }
370 if(author) {
371 for(size_t authorIndex = 0; authorIndex < author->arguments.size(); authorIndex += 3) {
372 std::string familyName = author->arguments[authorIndex + 0];
373 std::string givenName = author->arguments[authorIndex + 1];
374 std::string initials = author->arguments[authorIndex + 2];
375 removeBrackets(familyName);
376 removeBrackets(givenName);
377 removeBrackets(initials);
378 fprintf(fh,
379 "\t\t<author initials=\"%s\" surname=\"%s\" fullname=\"%s\" />\n",
380 string2xml(initials).c_str(), string2xml(familyName).c_str(),
381 string2xml(givenName +
382 ((givenName != "") ? "~" : "") +
383 familyName).c_str());
384 }
385 }
386 if(year || month || day) {
387 fputs("\t\t<date ", fh);
388 if(day) {
389 fprintf(fh, "day=\"%u\" ", day->number);
390 }
391 if(month) {
392 static const char* xmlMonthNames[12] = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"};
393 if((month->number >= 1) && (month->number <= 12)) {
394 fprintf(fh, "month=\"%s\" ", xmlMonthNames[month->number - 1]);
395 }
396 }
397 if(year) {
398 fprintf(fh, "year=\"%u\" ", year->number);
399 }
400 fputs("/>\n", fh);
401 }
402 fputs("\t</front>\n", fh);
403
404 std::string seriesName = "";
405 std::string seriesValue = "";
406 if(howpublished) {
407 seriesName = howpublished->value;
408 }
409 if(booktitle) {
410 seriesName = booktitle->value;
411 }
412 if(journal) {
413 seriesName = journal->value;
414 }
415 if(type) {
416 seriesName = type->value;
417 if(number) {
418 if(seriesValue != "") {
419 seriesValue += ", ";
420 }
421 seriesValue += number->value;
422 number = NULL;
423 }
424 }
425 if(number) {
426 if(seriesValue != "") {
427 seriesValue += ", ";
428 }
429 seriesValue += "Number " + number->value;
430 }
431 if(volume) {
432 if(seriesValue != "") {
433 seriesValue += ", ";
434 }
435 seriesValue += "Volume " + volume->value;
436 }
437 if(pages) {
438 if(seriesValue != "") {
439 seriesValue += ", ";
440 }
441 seriesValue += "Pages " + pages->value;
442 }
443 if(issn) {
444 if(seriesValue != "") {
445 seriesValue += ", ";
446 }
447 seriesValue += "ISSN~" + issn->value;
448 }
449 if(isbn) {
450 if(seriesValue != "") {
451 seriesValue += ", ";
452 }
453 seriesValue += "ISBN~" + isbn->value;
454 }
455 if(doi) {
456 if(seriesValue != "") {
457 seriesValue += ", ";
458 }
459 seriesValue += "DOI~" + doi->value;
460 }
461 if((seriesName != "") || (seriesValue != "")) {
462 if(seriesValue == "") {
463 // This would produce an ugly space.
464 seriesValue = seriesName;
465 seriesName = "";
466 }
467 fprintf(fh, "\t<seriesInfo name=\"%s\" value=\"%s\" />\n",
468 string2xml(seriesName).c_str(),
469 string2xml(seriesValue).c_str());
470 }
471
472 if(url) {
473 std::string type = "";
474 if(urlMime) {
475 const size_t slash = urlMime->value.find("/");
476 if(slash != std::string::npos) {
477 type = urlMime->value.substr(slash + 1, urlMime->value.size() - slash);
478 std::transform(type.begin(), type.end(), type.begin(),
479 (int(*)(int))std::toupper);
480 if(type == "PLAIN") {
481 type = "TXT";
482 }
483 }
484 }
485 type = " type=\"" + type + "\"";
486
487 std::string octets = "";
488 if(urlSize) {
489 octets = format(" octets=\"%u\"", atol(urlSize->value.c_str()));
490 }
491
492 fprintf(fh, "\t<format%s%s target=\"%s\" />\n",
493 type.c_str(), octets.c_str(),
494 url->value.c_str());
495 }
496 fputs("</reference>\n\n", fh);
497 }
498
499 if( (separateFiles) && (fh != NULL)) {
500 fclose(fh);
501 fh = NULL;
502 }
503 }
504
505 if(!separateFiles) {
506 fclose(fh);
507 }
508 return(true);
509 }
510
511
512 struct StackEntry {
513 size_t pos;
514 bool skip;
515 };
516
517 // NOTE: PublicationSet::applyTemplate() will *NOT* be thread-safe!
518 static unsigned int gNumber = 0;
519 static unsigned int gTotalNumber = 0;
520
521
522 // ###### Get next action ##################################################
getNextAction(const char * inputString,size_t & counter)523 inline static std::string getNextAction(const char* inputString, size_t& counter)
524 {
525 std::string result;
526 if(inputString[0] == '{') {
527 std::string input((const char*)&inputString[1]);
528 result = extractToken(input, std::string("}"));
529 std::transform(result.begin(), result.end(), result.begin(), ::tolower);
530 counter += result.size() + 1;
531 }
532 else {
533 char str[2];
534 str[0] = inputString[0];
535 str[1] = 0x00;
536 result = std::string(str);
537 }
538 // fprintf(stderr,"R=<%s>\n",result.c_str());
539 return(result);
540 }
541
542
543 // ###### Apply printing template to publication ############################
applyTemplate(Node * publication,Node * prevPublication,Node * nextPublication,const char * namingTemplate,const std::string & printingTemplate,const std::vector<std::string> & monthNames,const std::string & nbsp,const std::string & lineBreak,const bool xmlStyle,const char * downloadDirectory,FILE * fh)544 std::string PublicationSet::applyTemplate(Node* publication,
545 Node* prevPublication,
546 Node* nextPublication,
547 const char* namingTemplate,
548 const std::string& printingTemplate,
549 const std::vector<std::string>& monthNames,
550 const std::string& nbsp,
551 const std::string& lineBreak,
552 const bool xmlStyle,
553 const char* downloadDirectory,
554 FILE* fh)
555 {
556 std::string result;
557 std::vector<StackEntry> stack;
558 Node* child;
559 Node* author = NULL;
560 size_t authorIndex = 0;
561 size_t authorBegin = std::string::npos;
562 bool skip = false;
563 const size_t printingTemplateSize = printingTemplate.size();
564 std::string type = std::string("");
565
566 gNumber++;
567 gTotalNumber++;
568
569 for(size_t i = 0; i < printingTemplateSize; i++) {
570 if( (printingTemplate[i] == '%') && (i + 1 < printingTemplateSize) ) {
571 const std::string action = getNextAction((const char*)&printingTemplate[i + 1], i);
572 if( (action == "L") || (action == "label") ) { // Original BibTeX label
573 result += string2utf8(publication->keyword, nbsp, lineBreak, xmlStyle);
574 }
575 else if(action == "html-label") { // Original BibTeX label
576 result += labelToHTMLLabel(string2utf8(publication->keyword, nbsp, lineBreak, xmlStyle));
577 }
578 else if( (action == "C") || (action == "anchor") ) { // Anchor
579 result += string2utf8(publication->anchor, nbsp, lineBreak, xmlStyle);
580 }
581 else if( (action == "c") || (action == "class") ) { // Class (e.g. TechReport, InProceedings, etc.)
582 result += string2utf8(publication->value, nbsp, lineBreak, xmlStyle);
583 }
584 else if( (action == "Z") || (action == "name") ) { // Name based on naming template
585 size_t p;
586 size_t begin = 0;
587 size_t len = strlen(namingTemplate);
588 bool inTemplate = false;
589 for(p = 0; p < len; p++) {
590 if(inTemplate == false) {
591 if(namingTemplate[p] == '%') {
592 inTemplate = true;
593 char str[p + 1];
594 if(p > begin) {
595 memcpy((char*)&str, &namingTemplate[begin], p - begin);
596 }
597 str[p - begin] = 0x00;
598 result += string2utf8(str, nbsp, lineBreak, xmlStyle);
599 begin = p + 1;
600 }
601 }
602 else {
603 if(namingTemplate[p] == '%') {
604 result += "%";
605 inTemplate = false;
606 begin = p + 1;
607 }
608 else if(isdigit(namingTemplate[p])) {
609 // Number
610 }
611 else if( (namingTemplate[p] == 'n') ||
612 (namingTemplate[p] == 'N') ) {
613 char str[p + 3];
614 str[0] = '%';
615 if(p > begin) {
616 memcpy((char*)&str[1], &namingTemplate[begin], p - begin);
617 }
618 str[p - begin + 1] = 'u';
619 str[p - begin + 2] = 0x00;
620 if(namingTemplate[p] == 'n') {
621 result += format(str, gNumber);
622 }
623 else if(namingTemplate[p] == 'N') {
624 result += format(str, gTotalNumber);
625 }
626 inTemplate = false;
627 begin = p + 1;
628 }
629 else {
630 fprintf(stderr, "ERROR: Bad naming template \"%s\"!\n", namingTemplate);
631 exit(1);
632 }
633 }
634 }
635 if(begin < p) {
636 result += string2utf8(&namingTemplate[begin], nbsp, lineBreak, xmlStyle);
637 }
638 }
639 else if( (action == "#") || (action == "download-file-name") ) { // Download file name
640 child = findChildNode(publication, "url.mime");
641 result += makeDownloadFileName(downloadDirectory, publication->keyword,
642 (child != NULL) ? child->value : "");
643 }
644 else if( (action == "a") || (action == "begin-author-loop") ) { // Author LOOP BEGIN
645 if(authorBegin != std::string::npos) {
646 fputs("ERROR: Unexpected author loop begin %a -> an author loop is still open!\n", stderr);
647 exit(1);
648 }
649 author = findChildNode(publication, "author");
650 authorIndex = 0;
651 authorBegin = i;
652 }
653 else if( (action == "g") || (action == "author-initials") ) { // Current author given name initials
654 if(author) {
655 std::string initials = author->arguments[authorIndex + 2];
656 removeBrackets(initials);
657 if(initials != "") {
658 result += string2utf8(initials, nbsp, lineBreak, xmlStyle);
659 }
660 else {
661 skip = true;
662 }
663 }
664 }
665 else if( (action == "G") || (action == "author-give-name") ) { // Current author given name
666 if(author) {
667 std::string givenName = author->arguments[authorIndex + 1];
668 removeBrackets(givenName);
669 if(givenName != "") {
670 result += string2utf8(givenName, nbsp, lineBreak, xmlStyle);
671 }
672 else {
673 skip = true;
674 }
675 }
676 }
677 else if( (action == "F") || (action == "author-family-name") ) { // Current author family name
678 if(author) {
679 std::string familyName = author->arguments[authorIndex + 0];
680 removeBrackets(familyName);
681 result += string2utf8(familyName, nbsp, lineBreak, xmlStyle);
682 }
683 }
684 else if( (action.substr(0, 3) == "is?") ||
685 (action.substr(0, 7) == "is-not?") ||
686 (action.substr(0, 13) == "is-less-than?") ||
687 (action.substr(0, 22) == "is-less-than-or-equal?") ||
688 (action.substr(0, 16) == "is-greater-than?") ||
689 (action.substr(0, 25) == "is-greater-than-or-equal?") ) { // IS string
690 if(i + 1 < printingTemplateSize) {
691 StackEntry entry = stack.back();
692 const std::string writtenString = result.substr(entry.pos);
693
694 if(skip == true) {
695 // Text will already be skipped ...
696 }
697 else if(action.substr(0, 3) == "is?") {
698 const std::string comparisonString = action.substr(3);
699 skip = ! (writtenString == comparisonString);
700 }
701 else if(action.substr(0, 7) == "is-not?") {
702 const std::string comparisonString = action.substr(7);
703 skip = ! (writtenString != comparisonString);
704 }
705 else if(action.substr(0, 13) < "is-less-than?") {
706 const std::string comparisonString = action.substr(13);
707 skip = ! (writtenString == comparisonString);
708 }
709 else if(action.substr(0, 22) <= "is-less-than-or-equal?") {
710 const std::string comparisonString = action.substr(22);
711 skip = ! (writtenString == comparisonString);
712 }
713 else if(action.substr(0, 16) > "is-greater-than?") {
714 const std::string comparisonString = action.substr(16);
715 skip = ! (writtenString == comparisonString);
716 }
717 else if(action.substr(0, 25) >= "is-greater-than-or-equal?") {
718 const std::string comparisonString = action.substr(25);
719 skip = ! (writtenString == comparisonString);
720 }
721
722 result.erase(entry.pos); // Remove the written "test" string.
723 }
724 }
725 else if( (action == "f") || (action == "is-first-author?") ) { // IS first author
726 if(skip == false) {
727 skip = ! (authorIndex == 0);
728 }
729 }
730 else if( (action == "n") || (action == "is-not-first-author?") ) { // IS not first author
731 if(skip == false) {
732 skip = ! ((author != NULL) && (authorIndex > 0));
733 }
734 }
735 else if( (action == "l") || (action == "is-last-author?") ) { // IS last author
736 if(skip == false) {
737 skip = ! ((author != NULL) && (authorIndex + 3 >= author->arguments.size()));
738 }
739 }
740 else if( (action == "A") || (action == "end-author-loop") ) { // Author LOOP EBD
741 if(authorBegin == std::string::npos) {
742 fputs("ERROR: Unexpected author loop end %A -> %a author loop begin needed first!\n", stderr);
743 exit(1);
744 }
745 authorIndex += 3;
746 if( (author != NULL) && (authorIndex < author->arguments.size()) ) {
747 i = authorBegin;
748 }
749 else {
750 author = NULL;
751 authorIndex = 0;
752 }
753 }
754 else if( (action == "T") || (action == "title") ) { // Title
755 child = findChildNode(publication, "title");
756 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
757 }
758 else if( (action == "H") || (action == "how-published") ) { // HowPublished
759 child = findChildNode(publication, "howpublished");
760 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
761 }
762 else if( (action == "B") || (action == "booktitle") ) { // Booktitle
763 child = findChildNode(publication, "booktitle");
764 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
765 }
766 else if( (action == "r") || (action == "series") ) { // Series
767 child = findChildNode(publication, "series");
768 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
769 }
770 else if( (action == "J") || (action == "journal") ) { // Journal
771 child = findChildNode(publication, "journal");
772 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
773 }
774 else if( (action == "E") || (action == "edition") ) { // Edition
775 child = findChildNode(publication, "edition");
776 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
777 }
778 else if( (action == "V") || (action == "volume") ) { // Volume
779 child = findChildNode(publication, "volume");
780 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
781 }
782 else if( (action == "t") || (action == "type") ) { // Type
783 child = findChildNode(publication, "type");
784 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
785 }
786 else if( (action == "N") || (action == "number") ) { // Number
787 child = findChildNode(publication, "number");
788 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
789 }
790 else if( (action == "P") || (action == "pages") ) { // Pages
791 child = findChildNode(publication, "pages");
792 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
793 }
794 else if(action == "numpages") { // Number of pages
795 child = findChildNode(publication, "numpages");
796 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
797 }
798 else if( (action == "@") || (action == "address") ) { // Address
799 child = findChildNode(publication, "address");
800 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
801 }
802 else if( (action == "Y") || (action == "year") ) { // Year
803 child = findChildNode(publication, "year");
804 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
805 }
806 else if( (action == "M") || (action == "month-name") ) { // Month as name
807 child = findChildNode(publication, "month");
808 if(child) {
809 if( (child->number >= 1) && (child->number <= 12) ) {
810 result += string2utf8(monthNames[child->number - 1], nbsp, lineBreak, xmlStyle);
811 } else { skip = true; }
812 } else { skip = true; }
813 }
814 else if( (action == "m") || (action == "month-number") ) { // Month as number
815 child = findChildNode(publication, "month");
816 if(child) {
817 char month[16];
818 snprintf((char*)&month, sizeof(month), "%d", child->number);
819 result += string2utf8(month, nbsp, lineBreak, xmlStyle);
820 } else { skip = true; }
821 }
822 else if( (action == "D") || (action == "day") ) { // Day
823 child = findChildNode(publication, "day");
824 if(child) {
825 char day[16];
826 snprintf((char*)&day, sizeof(day), "%d", child->number);
827 result += string2utf8(day, nbsp, lineBreak, xmlStyle);
828 } else { skip = true; }
829 }
830 else if( (action == "$") || (action == "publisher") ) { // Publisher
831 child = findChildNode(publication, "publisher");
832 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
833 }
834 else if( (action == "S") || (action == "school") ) { // School
835 child = findChildNode(publication, "school");
836 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
837 }
838 else if( (action == "?") || (action == "institution") ) { // Institution
839 child = findChildNode(publication, "institution");
840 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
841 }
842 else if( (action == "I") || (action == "isbn") ) { // ISBN
843 child = findChildNode(publication, "isbn");
844 if(child) { result += string2utf8("ISBN~" + child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
845 }
846 else if( (action == "i") || (action == "issn") ) { // ISSN
847 child = findChildNode(publication, "issn");
848 if(child) { result += string2utf8("ISSN~" + child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
849 }
850 else if( (action == "x") || (action == "language") ) { // Language
851 child = findChildNode(publication, "language");
852 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
853 }
854 else if( (action == "O") || (action == "content-language") ) { // Content Language
855 child = findChildNode(publication, "content-language");
856 if(child == NULL) { // No content language -> try same as "language" instead:
857 child = findChildNode(publication, "language");
858 }
859 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
860 }
861 else if( (action == "x") || (action == "xml-language") ) { // Language
862 child = findChildNode(publication, "language");
863 if(child) {
864 const char* language = getXMLLanguageFromLaTeX(child->value.c_str());
865 if(language != NULL) {
866 result += std::string(language);
867 } else { skip = true; }
868 } else { skip = true; }
869 }
870 else if( (action == "U") || (action == "url") ) { // URL
871 child = findChildNode(publication, "url");
872 if(child) { result += string2utf8(child->value, "", "", xmlStyle); } else { skip = true; }
873 }
874 else if( (action == "d") || (action == "doi") ) { // DOI
875 child = findChildNode(publication, "doi");
876 if(child) { result += string2utf8(child->value, "", "", xmlStyle); } else { skip = true; }
877 }
878 else if( (action == "q") || (action == "urn") ) { // URN
879 child = findChildNode(publication, "urn");
880 if(child) { result += string2utf8(child->value, "", "", xmlStyle); } else { skip = true; }
881 }
882 else if(action == "keywords") { // Keywords
883 child = findChildNode(publication, "keywords");
884 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
885 }
886 else if(action == "abstract") { // Abstract
887 child = findChildNode(publication, "abstract");
888 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
889 }
890 else if(action == "url-md5") { // MD5
891 child = findChildNode(publication, "url.md5");
892 if(child) { result += string2utf8(child->value, nbsp, lineBreak, xmlStyle); } else { skip = true; }
893 }
894 else if( (action == "z") || (action == "url-mime") ) { // URL mime type
895 child = findChildNode(publication, "url.mime");
896 if(child) { result += string2utf8(child->value, "", "", xmlStyle); } else { skip = true; }
897 }
898 else if( (action == "y") || (action == "url-type") ) { // URL type
899 child = findChildNode(publication, "url.mime");
900 if(child) {
901 if(child->value == "application/pdf") {
902 result += "PDF";
903 }
904 else if(child->value == "application/xml") {
905 result += "XML";
906 }
907 else if(child->value == "text/html") {
908 result += "HTML";
909 }
910 else if(child->value == "text/plain") {
911 result += "TXT";
912 }
913 else if(child->value == "image/svg+xml") {
914 result += "SVG";
915 }
916 else {
917 result += child->value;
918 }
919 } else { skip = true; }
920 }
921 else if( (action == "s") || (hasPrefix(action, "url-size-", type)) ) { // URL size
922 if((action.size() == 1) && (i + 2 < printingTemplateSize)) {
923 switch(printingTemplate[i + 2]) {
924 case 'K': // KiB
925 type = "kib";
926 break;
927 case 'M': // MiB
928 type = "mib";
929 break;
930 default: // Bytes
931 type = "";
932 break;
933 }
934 i++;
935 }
936 child = findChildNode(publication, "url.size");
937 if( (child) && (atoll(child->value.c_str()) != 0) ) {
938 double divisor;
939 if(type == "kib") {
940 divisor = 1024.0;
941 }
942 else if(type == "mib") {
943 divisor = 1024.0 * 1024.0;
944 }
945 else {
946 divisor = 1.0;
947 }
948 result += string2utf8(format("%1.0f", ceil(atoll(child->value.c_str()) / divisor)), nbsp, lineBreak, xmlStyle);
949 }
950 else { skip = true; }
951 }
952 else if( (action == "X") || (action == "note") ) { // Note
953 child = findChildNode(publication, "note");
954 if(child) {
955 if( (strncmp(child->value.c_str(), "ISBN", 4) == 0) ||
956 (strncmp(child->value.c_str(), "ISSN", 4) == 0) ||
957 (strncmp(child->value.c_str(), "{ISBN}", 6) == 0) ||
958 (strncmp(child->value.c_str(), "{ISSN}", 6) == 0) ) {
959 skip = true;
960 }
961 else {
962 result += string2utf8(child->value, nbsp, lineBreak, xmlStyle);
963 }
964 } else { skip = true; }
965 }
966 else if(action == "%") { // %
967 result += '%';
968 }
969 else if( (action == "b") || (hasPrefix(action, "begin-subdivision-", type)) ||
970 (action == "w") || (hasPrefix(action, "within-subdivision-", type)) ||
971 (action == "e") || (hasPrefix(action, "end-subdivision-", type)) ) { // Begin/Within/End of subdivision
972 if(i + 2 < printingTemplateSize) {
973 if ((action.size() == 1) && (i + 2 < printingTemplateSize) ) {
974 switch(printingTemplate[i + 2]) {
975 case 'D':
976 type = "day";
977 break;
978 case 'm':
979 case 'M':
980 type = "month";
981 break;
982 case 'Y':
983 type = "year";
984 break;
985 default:
986 fprintf(stderr, "ERROR: Unexpected %% placeholder '%c' in subdivision part of custom printing template!\n",
987 printingTemplate[i + 2]);
988 exit(1);
989 }
990 i++;
991 }
992 else {
993 if( (type != "day") && (type != "month") && (type != "year") ) {
994 fprintf(stderr, "ERROR: Unexpected %% placeholder '%s' in subdivision part of custom printing template!\n",
995 action.c_str());
996 exit(1);
997 }
998 }
999 const Node* prevChild = (prevPublication != NULL) ? findChildNode(prevPublication, type.c_str()) : NULL;
1000 child = findChildNode(publication, type.c_str());
1001 const Node* nextChild = (nextPublication != NULL) ? findChildNode(nextPublication, type.c_str()) : NULL;
1002
1003 bool begin = (prevChild == NULL) ||
1004 ( (prevChild != NULL) && (child != NULL) && (prevChild->value != child->value) );
1005 bool end = (nextChild == NULL) ||
1006 ( (child != NULL) && (nextChild != NULL) && (child->value != nextChild->value) );
1007 switch(action[0]) {
1008 case 'b':
1009 skip = ! begin;
1010 break;
1011 case 'w':
1012 skip = (begin || end);
1013 break;
1014 case 'e':
1015 skip = ! end;
1016 break;
1017 }
1018 }
1019 }
1020 else if( (action == "1") || (action == "custom-1") ||
1021 (action == "2") || (action == "custom-2") ||
1022 (action == "3") || (action == "custom-3") ||
1023 (action == "4") || (action == "custom-4") ||
1024 (action == "5") || (action == "custom-5") ||
1025 (action == "6") || (action == "custom-6") ||
1026 (action == "7") || (action == "custom-7") ||
1027 (action == "8") || (action == "custom-8") ||
1028 (action == "9") || (action == "custom-9") ) { // Custom #1..9
1029 const unsigned int id = action[action.size() - 1] - '1';
1030 if(publication->custom[id] != "") {
1031 result += string2utf8(publication->custom[id], nbsp, lineBreak, xmlStyle);
1032 }
1033 else {
1034 skip = true;
1035 }
1036 }
1037 else if( (action == "custom-1-as-is") ||
1038 (action == "custom-2-as-is") ||
1039 (action == "custom-3-as-is") ||
1040 (action == "custom-4-as-is") ||
1041 (action == "custom-5-as-is") ||
1042 (action == "custom-6-as-is") ||
1043 (action == "custom-7-as-is") ||
1044 (action == "custom-8-as-is") ||
1045 (action == "custom-9-as-is") ) {
1046 const unsigned int id = action[7] - '1';
1047 if(publication->custom[id] != "") {
1048 result += publication->custom[id];
1049 }
1050 else {
1051 skip = true;
1052 }
1053 }
1054 else if(action == "exec") { // Execute command and pipe in the result
1055 if(i + 1 < printingTemplateSize) {
1056 StackEntry entry = stack.back();
1057 const std::string call = result.substr(entry.pos);
1058
1059 if(skip != true) {
1060 // Text will already be skipped ...
1061 result.erase(entry.pos); // Remove the written "exec" string.
1062
1063 FILE* pipe = popen(call.c_str(), "r");
1064 if(pipe == NULL) {
1065 fprintf(stderr, "Unable to run %s!\n", call.c_str());
1066 exit(1);
1067 }
1068
1069 skip = true;
1070 char buffer[16384];
1071 ssize_t inputBytes;
1072 while( (inputBytes = fread((char*)&buffer, 1, sizeof(buffer) - 1, pipe)) > 0 ) {
1073 if(inputBytes > 0) {
1074 buffer[inputBytes] = 0x00;
1075 result += buffer;
1076 skip = false;
1077 }
1078 else {
1079 fprintf(stderr, "Reading from run of %s failed!\n", call.c_str());
1080 exit(1);
1081 }
1082 }
1083
1084 const int returnCode = pclose(pipe);
1085 if(returnCode != 0) {
1086 fprintf(stderr, "Run of %s failed with code %d!\n", call.c_str(), returnCode);
1087 exit(1);
1088 }
1089 }
1090 }
1091 }
1092 else {
1093 fprintf(stderr, "ERROR: Unexpected %% placeholder '%s' in custom printing template!\n",
1094 action.c_str());
1095 exit(1);
1096 }
1097 i++;
1098 }
1099 else if( (printingTemplate[i] == '\\') && (i + 1 < printingTemplateSize) ) {
1100 switch(printingTemplate[i + 1]) {
1101 case 'n':
1102 result += '\n';
1103 break;
1104 case 't':
1105 result += '\t';
1106 break;
1107 default:
1108 result += printingTemplate[i + 1];
1109 break;
1110 }
1111 i++;
1112 }
1113 else if(printingTemplate[i] == '[') {
1114 if(stack.empty()) {
1115 skip = false; // Up to now, everything will be accepted
1116 }
1117 struct StackEntry entry = { result.size(), skip };
1118 stack.push_back(entry);
1119 }
1120 else if(printingTemplate[i] == ']') {
1121 if(!stack.empty()) {
1122 StackEntry entry = stack.back();
1123 stack.pop_back();
1124 if(skip == true) {
1125 result.erase(entry.pos);
1126 skip = entry.skip;
1127 }
1128 }
1129 else {
1130 fputs("ERROR: Unexpected ']' in custom printing template!\n", stderr);
1131 exit(1);
1132 }
1133 }
1134 else if(printingTemplate[i] == '|') {
1135 if(!stack.empty()) {
1136 StackEntry entry = stack.back();
1137 stack.pop_back();
1138 // ====== Failed => try alternative ==========================
1139 if(skip == true) {
1140 result.erase(entry.pos);
1141 skip = entry.skip;
1142 stack.push_back(entry);
1143 }
1144 // ====== Successful => skip alternative(s) ==================
1145 else {
1146 skip = entry.skip;
1147 int levels = 1;
1148 for( ; i < printingTemplateSize; i++) {
1149 if(printingTemplate[i] == '\\') {
1150 i++;
1151 }
1152 else {
1153 if(printingTemplate[i] == '[') {
1154 levels++;
1155 }
1156 if(printingTemplate[i] == ']') {
1157 levels--;
1158 if(levels == 0) {
1159 break;
1160 }
1161 }
1162 }
1163 }
1164 }
1165 }
1166 else {
1167 fputs("ERROR: Unexpected '|' in custom printing template!\n", stderr);
1168 exit(1);
1169 }
1170 }
1171 else {
1172 std::string character = "";
1173
1174 #ifdef USE_UTF8
1175 if( ( (((unsigned char)printingTemplate[i]) & 0xE0) == 0xC0 ) &&
1176 (i + 1 < printingTemplateSize) ) {
1177 // Two-byte UTF-8 character
1178 character += printingTemplate[i];
1179 character += printingTemplate[++i];
1180 }
1181 else if( ( (((unsigned char)printingTemplate[i]) & 0xF0) == 0xE0 ) &&
1182 (i + 2 < printingTemplateSize) ) {
1183 // Three-byte UTF-8 character
1184 character += printingTemplate[i];
1185 character += printingTemplate[++i];
1186 character += printingTemplate[++i];
1187 }
1188 else if( ( (((unsigned char)printingTemplate[i]) & 0xF8) == 0xF0 ) &&
1189 (i + 3 < printingTemplateSize) ) {
1190 // Four-byte UTF-8 character
1191 character += printingTemplate[i];
1192 character += printingTemplate[++i];
1193 character += printingTemplate[++i];
1194 character += printingTemplate[++i];
1195 }
1196 else if( (((unsigned char)printingTemplate[i]) & 0x80) == 0 ) {
1197 // Regular 1-byte character
1198 #endif
1199 character += printingTemplate[i];
1200 #ifdef USE_UTF8
1201 }
1202 else {
1203 // Invalid!
1204 }
1205 #endif
1206
1207 // Add current character. We may *not* use XML style encoding here,
1208 // since the character may be itself part of XML tags!
1209 result += string2utf8(character, nbsp);
1210 }
1211 }
1212 return(result);
1213 }
1214
1215
1216 // ###### Export to custom ##################################################
exportPublicationSetToCustom(PublicationSet * publicationSet,const char * namingTemplate,const std::string & customPrintingHeader,const std::string & customPrintingTrailer,const std::string & printingTemplate,const std::vector<std::string> & monthNames,const std::string & nbsp,const std::string & lineBreak,const bool xmlStyle,const char * downloadDirectory,FILE * fh)1217 bool PublicationSet::exportPublicationSetToCustom(PublicationSet* publicationSet,
1218 const char* namingTemplate,
1219 const std::string& customPrintingHeader,
1220 const std::string& customPrintingTrailer,
1221 const std::string& printingTemplate,
1222 const std::vector<std::string>& monthNames,
1223 const std::string& nbsp,
1224 const std::string& lineBreak,
1225 const bool xmlStyle,
1226 const char* downloadDirectory,
1227 FILE* fh)
1228 {
1229 Node* publication = NULL;
1230 gNumber = 0;
1231 for(size_t index = 0; index < publicationSet->size(); index++) {
1232 // ====== Get prev, current and next publications =====================
1233 if(publicationSet->get(index)->value == "Comment") {
1234 continue;
1235 }
1236 Node* prevPublication = publication;
1237 publication = publicationSet->get(index);
1238 size_t nextPublicationIndex = 1;
1239 Node* nextPublication = (index + nextPublicationIndex< publicationSet->size()) ? publicationSet->get(index + nextPublicationIndex) : NULL;
1240 while( (nextPublication != NULL) && (nextPublication->value == "Comment")) {
1241 nextPublicationIndex++;
1242 nextPublication = (index + nextPublicationIndex< publicationSet->size()) ? publicationSet->get(index + nextPublicationIndex) : NULL;
1243 }
1244
1245 const std::string result = applyTemplate(publication, prevPublication, nextPublication,
1246 namingTemplate,
1247 printingTemplate,
1248 monthNames, nbsp, lineBreak, xmlStyle,
1249 downloadDirectory,
1250 fh);
1251
1252 fputs(string2utf8(processBackslash(customPrintingHeader), nbsp).c_str(), stdout);
1253 fputs(result.c_str(), stdout);
1254 fputs(string2utf8(processBackslash(customPrintingTrailer), nbsp).c_str(), stdout);
1255 }
1256
1257 return(true);
1258 }
1259